(* $Id: unidata.mli,v 1.14 2006/08/06 19:48:55 yori Exp $ *)
(* Copyright 2002, 2003 Yamagata Yoriyuki *)

module type Type = sig
val read_data : ?datadir:string -> string -> 'a

type general_category_type = 
  [ `Lu		(* Letter, Uppercase *)
  | `Ll		(* Letter, Lowercase *)
  | `Lt		(* Letter, Titlecase *)
  | `Mn		(* Mark, Non-Spacing *)
  | `Mc		(* Mark, Spacing Combining *)
  | `Me		(* Mark, Enclosing *)
  | `Nd		(* Number, Decimal Digit *)
  | `Nl		(* Number, Letter *)
  | `No		(* Number, Other *)
  | `Zs		(* Separator, Space *)
  | `Zl		(* Separator, Line *)
  | `Zp		(* Separator, Paragraph *)
  | `Cc		(* Other, Control *)
  | `Cf		(* Other, Format *)
  | `Cs		(* Other, Surrogate *)
  | `Co		(* Other, Private Use *)
  | `Cn		(* Other, Not Assigned *)
  | `Lm		(* Letter, Modifier *)
  | `Lo		(* Letter, Other *)
  | `Pc		(* Punctuation, Connector *)
  | `Pd		(* Punctuation, Dash *)
  | `Ps		(* Punctuation, Open *)
  | `Pe		(* Punctuation, Close *)
  | `Pi		(* Punctuation, Initial quote  *)
  | `Pf		(* Punctuation, Final quote  *)
  | `Po		(* Punctuation, Other *)
  | `Sm		(* Symbol, Math *)
  | `Sc		(* Symbol, Currency *)
  | `Sk		(* Symbol, Modifier *)
  | `So ]	(* Symbol, Other *)

val cat_of_name : string -> general_category_type
val num_of_cat : general_category_type -> int
val cat_of_num : int -> general_category_type

type script_type =
  [ `Common
  | `Inherited
  | `Latin
  | `Greek
  | `Cyrillic
  | `Armenian
  | `Hebrew
  | `Arabic
  | `Syriac
  | `Thaana
  | `Devanagari
  | `Bengali
  | `Gurmukhi
  | `Gujarati
  | `Oriya
  | `Tamil
  | `Telugu
  | `Kannada
  | `Malayalam
  | `Sinhala
  | `Thai
  | `Lao
  | `Tibetan
  | `Myanmar
  | `Georgian
  | `Hangul
  | `Ethiopic
  | `Cherokee
  | `Canadian_Aboriginal
  | `Ogham
  | `Runic
  | `Khmer
  | `Mongolian
  | `Hiragana
  | `Katakana
  | `Bopomofo
  | `Han
  | `Yi
  | `Old_Italic
  | `Gothic
  | `Deseret
  | `Tagalog
  | `Hanunoo
  | `Buhid
  | `Tagbanwa ]

val script_of_name : string -> script_type
val script_of_num : int -> script_type
val num_of_script : script_type -> int

type decomposition_type =
    [ `Canon | `Font | `NoBreak | `Initial | `Medial | `Final |
    `Isolated | `Circle | `Super | `Sub | `Vertical | `Wide | `Narrow |
    `Small | `Square | `Fraction | `Compat ]

type decomposition_info =
    (* Already in the canonical form *)
    [ `Canonform
    (* `Composite (dtype, text) :
     * means the given character is decomposed into text by dtype
     * decomposition. *)
  | `HangulSyllable
  | `Composite of decomposition_type * UChar.t list ]

(* Collation *)

type ce_type = int		(*collation element*)

val primary : ce_type -> int
val secondary : ce_type -> int
val tertiary : ce_type -> int
val compose_ce : int -> int -> int -> ce_type
val complete_ignorable : ce_type

type ce_tbl = (UChar.t list * ce_type list) list UCharTbl.t

type variable_option =   
  [ `Blanked 
  | `Non_ignorable 
  | `Shifted
  | `Shift_Trimmed ]

type col_info =
    {variable_top : int;
     variable_option : variable_option;
     french_accent : bool;		
     hiraganaQ : bool;
     hiraganaQ_weight : int;
     tbl : ce_tbl}

val get_col_info : ?locale:string -> unit -> col_info

(* If the returned list contains ([u1; u2; ... ;un], [ce1; ce2; ... ;cem]),
   for the given character u, the sequence u u1 u2 ... un corresponds
   sequence of collation elements ce1 ce2 ... cem. the list is in 
   decreasing order respect to n. *)
val ce : ce_tbl -> UChar.t -> (UChar.t list * ce_type list) list

type localedata =
    {col_info : col_info option}
end

module Make (Config : ConfigInt.Type) : Type