/* $Id: unidata.h,v 1.9 2006/04/09 07:37:05 hurtta Exp $ */

/******************************************************************************
 *  The Elm (ME+) Mail System  -  $Revision: 1.9 $   $State: Exp $
 *
 *  Author: Kari Hurtta <hurtta+elm@posti.FMI.FI> (was hurtta+elm@ozone.FMI.FI)
 *****************************************************************************/


/* unidata.c --------------------------------------------------- */


#ifdef ANSI_C
struct unidata_mapped_data;                  /* Needed for prototype */
#endif

extern int get_unidata P_((struct unidata_mapped_data ** ptr, CONST char *filename));
extern int read_unidata_text P_((struct unidata_mapped_data **ptr, 
				 CONST char *filename, FILE *logfile,
				 int silent_gaps));
extern void free_unidata P_((struct unidata_mapped_data * v));
extern int write_unidata P_((struct unidata_mapped_data *v,CONST char *filename));
extern int write_unidata_text P_((struct unidata_mapped_data *v, FILE *f));

/* UNICODE character types */

#define CHARTYPE_major(x)    ((x) & 0xFF00)

#define CHARTYPE_Mark        0x0100             /* Mark                    */
#define CHARTYPE_Mn          0x0101             /* Mark, Non-Spacing       */
#define CHARTYPE_Mc          0x0102             /* Mark, Spacing Combining */
#define CHARTYPE_Me          0x0103             /* Mark, Enclosing         */

#define CHARTYPE_Number      0x0200             /* Number                  */
#define CHARTYPE_Nd          0x0201             /* Number, Decimal Digit   */
#define CHARTYPE_Nl          0x0202             /* Number, Letter          */
#define CHARTYPE_No          0x0203             /* Number, Other           */

#define CHARTYPE_Separator   0x0400             /* Separator               */
#define CHARTYPE_Zs          0x0401             /* Separator, Space        */
#define CHARTYPE_Zl          0x0402             /* Separator, Line         */
#define CHARTYPE_Zp          0x0403             /* Separator, Paragraph    */

#define CHARTYPE_Other       0x0800             /* Other                   */
#define CHARTYPE_Cc          0x0801             /* Other, Control          */
#define CHARTYPE_Cf          0x0802             /* Other, Format           */
#define CHARTYPE_Cs          0x0803             /* Other, Surrogate        */
#define CHARTYPE_Co          0x0804             /* Other, Private Use      */
#define CHARTYPE_Cn          0x0805             /* Other, Not Assigned     */

#define CHARTYPE_Letter      0x1000             /* Letter                  */
#define CHARTYPE_Lu          0x1001             /* Letter, Uppercase       */
#define CHARTYPE_Ll          0x1002             /* Letter, Lowercase       */
#define CHARTYPE_Lt          0x1003             /* Letter, Titlecase       */
#define CHARTYPE_Lm          0x1004             /* Letter, Modifier        */
#define CHARTYPE_Lo          0x1005             /* Letter, Other           */

#define CHARTYPE_Punctuation 0x2000             /* Punctuation             */
#define CHARTYPE_Pc          0x2001             /* Punctuation, Connector  */
#define CHARTYPE_Pd          0x2002             /* Punctuation, Dash       */
#define CHARTYPE_Ps          0x2003             /* Punctuation, Open       */
#define CHARTYPE_Pe          0x2004             /* Punctuation, Close      */
#define CHARTYPE_Po          0x2005             /* Punctuation, Other      */

#define CHARTYPE_Symbol      0x4000             /* Symbol                  */
#define CHARTYPE_Sm          0x4001             /* Symbol, Math            */
#define CHARTYPE_Sc          0x4002             /* Symbol, Currency        */
#define CHARTYPE_Sk          0x4003             /* Symbol, Modifier        */
#define CHARTYPE_So          0x4004             /* Symbol, Other           */

/* UNICODE direction information */

/* Strong types */
#define DIRECTION_L          0x0100            /* Left-Right                 */
#define DIRECTION_R          0x0200            /* Right-Left                 */

/* Weak types   */
#define DIRECTION_EN         0x0101            /* European Number            */
#define DIRECTION_ES         0x0102            /* European Number Separator  */
#define DIRECTION_ET         0x0103            /* European Number Terminator */
#define DIRECTION_AN         0x0201            /* Arabic Number              */
#define DIRECTION_CS         0x0001            /* Common Number Separator    */

#define DIRECTION_Separator  0x0010            /* Separators                 */
#define DIRECTION_B          0x0011            /* Block Separator            */
#define DIRECTION_S          0x0012            /* Segment Separator          */

#define DIRECTION_Neutral    0x0020            /* Neutrals                   */
#define DIRECTION_WS         0x0021            /* Whitespace                 */
#define DIRECTION_ON         0x0022            /* Other Neutrals             */

#define DIRECTION_mirrored   0x1000            /* "mirrored" character       */

struct character_information {
    uint16       character_type;      /* CHARTYPE_*  */
    uint16       direction_data;      /* DIRECTION_* */
    uint16       mirror_code;         /* DIRECTION_mirrored */
    uint16       upper,lower,title;   /* conversion or character itself */
    uint16       decomp_len;          /* decomposition length */
};             

/* Note:            buffer_len == 0 if not unicode decompression wanted
                    buffer_len : number of unicode characters

   Returns          0 = failure (not valid character)
                    1 = succeed
		   -1 = database bad
*/
extern int unicode_lookup_character P_((struct unidata_mapped_data *v, 
					unsigned int unicode,
					struct character_information *info,
					uint16 unicode_out[],
					int size));


/* Values vor *comp_type ----------------------------------------------------*/
#define DECOMP_canonical 0x00 /* Not compatibility character                 */
#define DECOMP_font     0x01 /* <font>  
				A font variant (e.g. a blackletter form).    */
#define DECOMP_noBreak  0x02 /* <noBreak>	
				A no-break version of a space or hyphen.     */
#define DECOMP_initial  0x03 /* <initial>	
				An initial presentation form (Arabic).       */
#define DECOMP_medial   0x04 /* <medial>	
				A medial presentation form (Arabic).         */
#define DECOMP_final    0x05 /* <final>		
				A final presentation form (Arabic).          */
#define DECOMP_isolated 0x06 /* <isolated>	
				An isolated presentation form (Arabic).      */
#define DECOMP_circle   0x07 /* <circle>	
				An encircled form.                           */
#define DECOMP_super    0x08 /* <super>	A superscript form.                  */
#define DECOMP_sub      0x09 /* <sub>	A subscript form.                    */
#define DECOMP_vertical 0x0A /* <vertical>	
				A vertical layout presentation form.         */
#define DECOMP_wide     0x0B /* <wide>		
				A wide (or zenkaku) compatibility character. */
#define DECOMP_narrow   0x0C /* <narrow>	
			       A narrow (or hankaku) compatibility character.*/
#define DECOMP_small    0x0D /* <small>		
				A small variant form (CNS compatibility).    */
#define DECOMP_square   0x0E /* <square>
				A CJK squared font variant.                  */
#define DECOMP_compat   0x0F /* <compat>	
			       Otherwise unspecified compatibility character.*/

#define DECOMP_fraction 0x10 /* <fraction>
				????                                         */

/* Returns          = 0    failure (not compressable)
                    = 1    (one-to-one mapping)
                    > 0    compressed input returned
		    = -1   incomplete input
*/

int unicode_compress_input P_((struct unidata_mapped_data *v,
			       uint16 *unicode_res,
			       uint16 unicode_inp[],
			       int len,
			       int *comp_type));

/* unicode.c --------------------------------------------------- */

extern struct unidata_mapped_data * default_unidata P_((void));

/*
 * Local Variables:
 *  mode:c
 *  c-basic-offset:4
 *  buffer-file-coding-system: iso-8859-1
 * End:
 */


syntax highlighted by Code2HTML, v. 0.9.1