(* $Id: uCharInfo.mli,v 1.20 2006/08/06 19:48:55 yori Exp $ *)
(* Copyright 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)

module type Type = sig

(** Character Information *)

(** Type of Unicode general character categories.  
   Each variant specifies
 - [`Lu] : Letter, Uppercase 
 - [`Ll] : Letter, Lowercase 
 - [`Lt] : Letter, Titlecase 
 - [`Mn] : Mark, Non-Spacing 
 - [`Mc] : Mark, Spacing Combining 
 - [`Me] : Mark, Enclosing 
 - [`Nd] : Number, Decimal Digit 
 - [`Nl] : Number, Letter 
 - [`No] : Number, Other 
 - [`Zs] : Separator, Space 
 - [`Zl] : Separator, Line 
 - [`Zp] : Separator, Paragraph 
 - [`Cc] : Other, Control 
 - [`Cf] : Other, Format 
 - [`Cs] : Other, Surrogate 
 - [`Co] : Other, Private Use 
 - [`Cn] : Other, Not Assigned 
 - [`Lm] : Letter, Modifier 
 - [`Lo] : Letter, Other 
 - [`Pc] : Punctuation, Connector 
 - [`Pd] : Punctuation, Dash 
 - [`Ps] : Punctuation, Open 
 - [`Pe] : Punctuation, Close 
 - [`Pi] : Punctuation, Initial
 - [`Pf] : Punctuation, Final
 - [`Po] : Punctuation, Other 
 - [`Sm] : Symbol, Math 
 - [`Sc] : Symbol, Currency 
 - [`Sk] : Symbol, Modifier 
 - [`So] : Symbol, Other  *)
type general_category_type = 
  [ `Lu		(** Letter, Uppercase *)
  | `Ll		(** Letter, Lowercase *)
  | `Lt		(** Letter, Titlecase *)
  | `Mn		(** Mark, Non-Spacing *)
  | `Mc		(** Mark, Spacing Combining *)
  | `Me		(** Mark, Enclosing *)
  | `Nd		(** Number, Decimal Digit *)
  | `Nl		(** Number, Letter *)
  | `No		(** Number, Other *)
  | `Zs		(** Separator, Space *)
  | `Zl		(** Separator, Line *)
  | `Zp		(** Separator, Paragraph *)
  | `Cc		(** Other, Control *)
  | `Cf		(** Other, Format *)
  | `Cs		(** Other, Surrogate *)
  | `Co		(** Other, Private Use *)
  | `Cn		(** Other, Not Assigned *)
  | `Lm		(** Letter, Modifier *)
  | `Lo		(** Letter, Other *)
  | `Pc		(** Punctuation, Connector *)
  | `Pd		(** Punctuation, Dash *)
  | `Ps		(** Punctuation, Open *)
  | `Pe		(** Punctuation, Close *)
  | `Pi		(** Punctuation, Initial quote  *)
  | `Pf		(** Punctuation, Final quote  *)
  | `Po		(** Punctuation, Other *)
  | `Sm		(** Symbol, Math *)
  | `Sc		(** Symbol, Currency *)
  | `Sk		(** Symbol, Modifier *)
  | `So	        (** Symbol, Other *) ]

val general_category : UChar.t -> general_category_type
val load_general_category_map : unit -> general_category_type UMap.t

(** Type of character properties *)
type character_property_type = [

(**Derived Core Properties*)

    `Math				
  | `Alphabetic
  | `Lowercase
  | `Uppercase
  | `ID_Start
  | `ID_Continue
  | `XID_Start
  | `XID_Continue
  | `Default_Ignorable_Code_Point
  | `Grapheme_Extend
  | `Grapheme_Base

(**Extended Properties*)

  | `Bidi_Control			
  | `White_Space
  | `Hyphen
  | `Quotation_Mark
  | `Terminal_Punctuation
  | `Other_Math
  | `Hex_Digit
  | `Ascii_Hex_Digit
  | `Other_Alphabetic
  | `Ideographic
  | `Diacritic
  | `Extender
  | `Other_Lowercase
  | `Other_Uppercase
  | `Noncharacter_Code_Point
  | `Other_Grapheme_Extend
  | `Grapheme_Link
  | `IDS_Binary_Operator
  | `IDS_Trinary_Operator
  | `Radical
  | `Unified_Ideograph
  | `Other_default_Ignorable_Code_Point
  | `Deprecated
  | `Soft_Dotted
  | `Logical_Order_Exception ]

(** Load the table for the given character type. *)
val load_property_tbl : character_property_type -> UCharTbl.Bool.t

(** Load the table for the given name of the character type.
   The name can be obtained by removing ` from its name of 
   the polymorphic variant tag. *)
val load_property_tbl_by_name : string -> UCharTbl.Bool.t

(** Load the set of characters of the given character type. *)
val load_property_set : character_property_type -> USet.t

(** Load the set of characters of the given name of the character type.
   The name can be obtained by removing ` from its name of 
   the polymorphic variant tag. *)
val load_property_set_by_name : string -> USet.t

(** Type for script type *)
type script_type =
  [ `Common
  | `Inherited
  | `Latin
  | `Greek
  | `Cyrillic
  | `Armenian
  | `Hebrew
  | `Arabic
  | `Syriac
  | `Thaana
  | `Devanagari
  | `Bengali
  | `Gurmukhi
  | `Gujarati
  | `Oriya
  | `Tamil
  | `Telugu
  | `Kannada
  | `Malayalam
  | `Sinhala
  | `Thai
  | `Lao
  | `Tibetan
  | `Myanmar
  | `Georgian
  | `Hangul
  | `Ethiopic
  | `Cherokee
  | `Canadian_Aboriginal
  | `Ogham
  | `Runic
  | `Khmer
  | `Mongolian
  | `Hiragana
  | `Katakana
  | `Bopomofo
  | `Han
  | `Yi
  | `Old_Italic
  | `Gothic
  | `Deseret
  | `Tagalog
  | `Hanunoo
  | `Buhid
  | `Tagbanwa ]

val script : UChar.t -> script_type
val load_script_map : unit -> script_type UMap.t

(** casing *)

val load_to_lower1_tbl : unit -> UChar.t UCharTbl.t
val load_to_upper1_tbl : unit -> UChar.t UCharTbl.t
val load_to_title1_tbl : unit -> UChar.t UCharTbl.t

type casemap_condition =
  [ `Locale of string
  | `FinalSigma
  | `AfterSoftDotted
  | `MoreAbove
  | `BeforeDot
  | `Not of casemap_condition ]

type special_casing_property =
  {lower : UChar.t list;
  title : UChar.t list;
  upper : UChar.t list;
  condition : casemap_condition list;} 

val load_conditional_casing_tbl : 
    unit -> special_casing_property list UCharTbl.t

val load_casefolding_tbl : unit -> UChar.t list UCharTbl.t

(** Combined class
   A combined class is an integer of 0 -- 255, showing how this character
   interacts to other combined characters.  *)
val combined_class : UChar.t -> int

(** Decomposition *)

(** Types of decomposition. *)
type decomposition_type =
    [ `Canon | `Font | `NoBreak | `Initial | `Medial | `Final |
    `Isolated | `Circle | `Super | `Sub | `Vertical | `Wide | `Narrow |
    `Small | `Square | `Fraction | `Compat ]

type decomposition_info =
(** Already in the canonical form *)
    [ `Canonform
(** Hangul is treated algotighmically.*)
  | `HangulSyllable
(** [`Composite (dtype, text)]
   means the given character is decomposed into text by dtype
   decomposition. *)
  | `Composite of decomposition_type * UChar.t list ]

val load_decomposition_tbl : unit -> decomposition_info UCharTbl.t

(** Canonical Composition *)

(** The return value [[(u_1, u'_1); ... (u_n, u'_1)]] means
   for the given character [u], [u u_i] forms 
   the canonical composition [u'_i].
   If u is a Hangul jamo, composition returns []. *)
val load_composition_tbl : unit -> (UChar.t * UChar.t) list UCharTbl.t

(** Whether the given composed character is used in NFC or NFKC *)
val load_composition_exclusion_tbl : unit -> UCharTbl.Bool.t

end

module Make (Config : ConfigInt.Type) : Type