/* $Id: unidata.h,v 1.9 2006/04/09 07:37:05 hurtta Exp $ */
/******************************************************************************
* The Elm (ME+) Mail System - $Revision: 1.9 $ $State: Exp $
*
* Author: Kari Hurtta <hurtta+elm@posti.FMI.FI> (was hurtta+elm@ozone.FMI.FI)
*****************************************************************************/
/* unidata.c --------------------------------------------------- */
#ifdef ANSI_C
struct unidata_mapped_data; /* Needed for prototype */
#endif
extern int get_unidata P_((struct unidata_mapped_data ** ptr, CONST char *filename));
extern int read_unidata_text P_((struct unidata_mapped_data **ptr,
CONST char *filename, FILE *logfile,
int silent_gaps));
extern void free_unidata P_((struct unidata_mapped_data * v));
extern int write_unidata P_((struct unidata_mapped_data *v,CONST char *filename));
extern int write_unidata_text P_((struct unidata_mapped_data *v, FILE *f));
/* UNICODE character types */
#define CHARTYPE_major(x) ((x) & 0xFF00)
#define CHARTYPE_Mark 0x0100 /* Mark */
#define CHARTYPE_Mn 0x0101 /* Mark, Non-Spacing */
#define CHARTYPE_Mc 0x0102 /* Mark, Spacing Combining */
#define CHARTYPE_Me 0x0103 /* Mark, Enclosing */
#define CHARTYPE_Number 0x0200 /* Number */
#define CHARTYPE_Nd 0x0201 /* Number, Decimal Digit */
#define CHARTYPE_Nl 0x0202 /* Number, Letter */
#define CHARTYPE_No 0x0203 /* Number, Other */
#define CHARTYPE_Separator 0x0400 /* Separator */
#define CHARTYPE_Zs 0x0401 /* Separator, Space */
#define CHARTYPE_Zl 0x0402 /* Separator, Line */
#define CHARTYPE_Zp 0x0403 /* Separator, Paragraph */
#define CHARTYPE_Other 0x0800 /* Other */
#define CHARTYPE_Cc 0x0801 /* Other, Control */
#define CHARTYPE_Cf 0x0802 /* Other, Format */
#define CHARTYPE_Cs 0x0803 /* Other, Surrogate */
#define CHARTYPE_Co 0x0804 /* Other, Private Use */
#define CHARTYPE_Cn 0x0805 /* Other, Not Assigned */
#define CHARTYPE_Letter 0x1000 /* Letter */
#define CHARTYPE_Lu 0x1001 /* Letter, Uppercase */
#define CHARTYPE_Ll 0x1002 /* Letter, Lowercase */
#define CHARTYPE_Lt 0x1003 /* Letter, Titlecase */
#define CHARTYPE_Lm 0x1004 /* Letter, Modifier */
#define CHARTYPE_Lo 0x1005 /* Letter, Other */
#define CHARTYPE_Punctuation 0x2000 /* Punctuation */
#define CHARTYPE_Pc 0x2001 /* Punctuation, Connector */
#define CHARTYPE_Pd 0x2002 /* Punctuation, Dash */
#define CHARTYPE_Ps 0x2003 /* Punctuation, Open */
#define CHARTYPE_Pe 0x2004 /* Punctuation, Close */
#define CHARTYPE_Po 0x2005 /* Punctuation, Other */
#define CHARTYPE_Symbol 0x4000 /* Symbol */
#define CHARTYPE_Sm 0x4001 /* Symbol, Math */
#define CHARTYPE_Sc 0x4002 /* Symbol, Currency */
#define CHARTYPE_Sk 0x4003 /* Symbol, Modifier */
#define CHARTYPE_So 0x4004 /* Symbol, Other */
/* UNICODE direction information */
/* Strong types */
#define DIRECTION_L 0x0100 /* Left-Right */
#define DIRECTION_R 0x0200 /* Right-Left */
/* Weak types */
#define DIRECTION_EN 0x0101 /* European Number */
#define DIRECTION_ES 0x0102 /* European Number Separator */
#define DIRECTION_ET 0x0103 /* European Number Terminator */
#define DIRECTION_AN 0x0201 /* Arabic Number */
#define DIRECTION_CS 0x0001 /* Common Number Separator */
#define DIRECTION_Separator 0x0010 /* Separators */
#define DIRECTION_B 0x0011 /* Block Separator */
#define DIRECTION_S 0x0012 /* Segment Separator */
#define DIRECTION_Neutral 0x0020 /* Neutrals */
#define DIRECTION_WS 0x0021 /* Whitespace */
#define DIRECTION_ON 0x0022 /* Other Neutrals */
#define DIRECTION_mirrored 0x1000 /* "mirrored" character */
struct character_information {
uint16 character_type; /* CHARTYPE_* */
uint16 direction_data; /* DIRECTION_* */
uint16 mirror_code; /* DIRECTION_mirrored */
uint16 upper,lower,title; /* conversion or character itself */
uint16 decomp_len; /* decomposition length */
};
/* Note: buffer_len == 0 if not unicode decompression wanted
buffer_len : number of unicode characters
Returns 0 = failure (not valid character)
1 = succeed
-1 = database bad
*/
extern int unicode_lookup_character P_((struct unidata_mapped_data *v,
unsigned int unicode,
struct character_information *info,
uint16 unicode_out[],
int size));
/* Values vor *comp_type ----------------------------------------------------*/
#define DECOMP_canonical 0x00 /* Not compatibility character */
#define DECOMP_font 0x01 /* <font>
A font variant (e.g. a blackletter form). */
#define DECOMP_noBreak 0x02 /* <noBreak>
A no-break version of a space or hyphen. */
#define DECOMP_initial 0x03 /* <initial>
An initial presentation form (Arabic). */
#define DECOMP_medial 0x04 /* <medial>
A medial presentation form (Arabic). */
#define DECOMP_final 0x05 /* <final>
A final presentation form (Arabic). */
#define DECOMP_isolated 0x06 /* <isolated>
An isolated presentation form (Arabic). */
#define DECOMP_circle 0x07 /* <circle>
An encircled form. */
#define DECOMP_super 0x08 /* <super> A superscript form. */
#define DECOMP_sub 0x09 /* <sub> A subscript form. */
#define DECOMP_vertical 0x0A /* <vertical>
A vertical layout presentation form. */
#define DECOMP_wide 0x0B /* <wide>
A wide (or zenkaku) compatibility character. */
#define DECOMP_narrow 0x0C /* <narrow>
A narrow (or hankaku) compatibility character.*/
#define DECOMP_small 0x0D /* <small>
A small variant form (CNS compatibility). */
#define DECOMP_square 0x0E /* <square>
A CJK squared font variant. */
#define DECOMP_compat 0x0F /* <compat>
Otherwise unspecified compatibility character.*/
#define DECOMP_fraction 0x10 /* <fraction>
???? */
/* Returns = 0 failure (not compressable)
= 1 (one-to-one mapping)
> 0 compressed input returned
= -1 incomplete input
*/
int unicode_compress_input P_((struct unidata_mapped_data *v,
uint16 *unicode_res,
uint16 unicode_inp[],
int len,
int *comp_type));
/* unicode.c --------------------------------------------------- */
extern struct unidata_mapped_data * default_unidata P_((void));
/*
* Local Variables:
* mode:c
* c-basic-offset:4
* buffer-file-coding-system: iso-8859-1
* End:
*/
syntax highlighted by Code2HTML, v. 0.9.1