/* slwclut.c: wide character lookup tables */ /* Copyright (C) 2004, 2005, 2006, 2007 John E. Davis This file is part of the S-Lang Library. The S-Lang Library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The S-Lang Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "slinclud.h" #include #include "slang.h" #include "_slang.h" #include "slischar.h" #define IS_ASCII256(x) ((x) < 256) struct SLwchar_Lut_Type { unsigned char lut[256]; /* for chars < 256 */ int utf8_mode; SLwchar_Type *chmin, *chmax; unsigned int table_len; unsigned int malloced_len; unsigned char char_class; }; void SLwchar_free_lut (SLwchar_Lut_Type *r) { if (r == NULL) return; SLfree ((char *) r->chmin); SLfree ((char *) r->chmax); SLfree ((char *) r); } SLwchar_Lut_Type *SLwchar_create_lut (unsigned int num_entries) { SLwchar_Lut_Type *r; r = (SLwchar_Lut_Type *)SLcalloc (sizeof (SLwchar_Lut_Type), 1); if (r == NULL) return NULL; r->chmin = (SLwchar_Type *) SLmalloc (num_entries*sizeof(SLwchar_Type)); r->chmax = (SLwchar_Type *) SLmalloc (num_entries*sizeof(SLwchar_Type)); if ((r->chmin == NULL) || (r->chmax == NULL)) { SLwchar_free_lut (r); return NULL; } r->malloced_len = num_entries; r->utf8_mode = _pSLinterp_UTF8_Mode; return r; } int SLwchar_add_range_to_lut (SLwchar_Lut_Type *r, SLwchar_Type a, SLwchar_Type b) { if (b < a) { SLwchar_Type tmp = a; a = b; b = tmp; } if (b < 256) { unsigned char *lut = r->lut; while (a <= b) { lut[a] = 1; a++; } return 0; } if (a < 256) { if (-1 == SLwchar_add_range_to_lut (r, a, 255)) return -1; a = 256; } if (r->table_len == r->malloced_len) { SLwchar_Type *chmin, *chmax; unsigned int malloced_len = r->malloced_len + 5; chmin = (SLwchar_Type *) SLrealloc ((char *)r->chmin, malloced_len * sizeof (SLwchar_Type)); if (chmin == NULL) return -1; r->chmin = chmin; chmax = (SLwchar_Type *) SLrealloc ((char *)r->chmax, malloced_len * sizeof (SLwchar_Type)); if (chmax == NULL) return -1; r->chmax = chmax; r->malloced_len = malloced_len; } r->chmin[r->table_len] = a; r->chmax[r->table_len] = b; r->table_len += 1; return 0; } static void add_char_class (SLwchar_Lut_Type *r, unsigned char char_class) { unsigned int i; unsigned char *lut; r->char_class |= char_class; lut = r->lut; for (i = 0; i < 256; i++) { if (SL_CLASSIFICATION_LOOKUP(i) & char_class) lut[i] = 1; } } static int wch_in_lut (SLwchar_Lut_Type *r, SLwchar_Type wch) { unsigned int i, table_len; SLwchar_Type *chmin, *chmax; if (wch < 256) return r->lut[wch]; if (r->char_class && (SL_CLASSIFICATION_LOOKUP(wch) & r->char_class)) return 1; /* FIXME. I should use a binary search for this... */ table_len = r->table_len; chmin = r->chmin; chmax = r->chmax; for (i = 0; i < table_len; i++) { if ((wch <= chmax[i]) && (wch >= chmin[i])) return 1; } return 0; } int SLwchar_in_lut (SLwchar_Lut_Type *r, SLwchar_Type wch) { if (r == NULL) return -1; return wch_in_lut (r, wch); } SLuchar_Type *SLwchar_skip_range (SLwchar_Lut_Type *r, SLuchar_Type *p, SLuchar_Type *pmax, int ignore_combining, int invert) { unsigned char *lut; int utf8_mode; if ((r == NULL) || (p == NULL) || (pmax == NULL)) return NULL; lut = r->lut; invert = (invert != 0); utf8_mode = r->utf8_mode; while (p < pmax) { SLwchar_Type wch; unsigned int dn; if ((*p < 0x80) || (utf8_mode == 0)) { if ((int)lut[*p] == invert) return p; p++; continue; } if (NULL == SLutf8_decode (p, pmax, &wch, &dn)) { if (invert == 0) return p; p++; continue; } if ((ignore_combining) && (0 == SLwchar_wcwidth (wch))) { p += dn; continue; } if (invert == wch_in_lut (r, wch)) return p; p += dn; } return p; } SLuchar_Type *SLwchar_bskip_range (SLwchar_Lut_Type *r, SLuchar_Type *pmin, SLuchar_Type *p, int ignore_combining, int invert) { unsigned char *lut; SLuchar_Type *pmax; int utf8_mode; if ((r == NULL) || (p == NULL) || (pmin == NULL)) return NULL; lut = r->lut; pmax = p; invert = (invert != 0); utf8_mode = r->utf8_mode; while (p > pmin) { SLuchar_Type *p0; SLwchar_Type wch; unsigned int dn; p0 = p - 1; if ((*p0 < 0x80) || (utf8_mode == 0)) { if ((int)lut[*p0] == invert) return p; p = p0; continue; } p0 = SLutf8_bskip_char (pmin, p); if (NULL == SLutf8_decode (p0, pmax, &wch, &dn)) { if (invert) return p; p = p0; continue; } if ((ignore_combining) && (0 == SLwchar_wcwidth (wch))) { p = p0; continue; } if (invert == wch_in_lut (r, wch)) return p; p = p0; } return p; } /* * Special Range characters: * * \w matches a unicode "word" character, taken to be alphanumeric. * \a alpha character, excluding digits * \s matches whitespace * \l matches lowercase * \u matches uppercase * \d matches a digit */ /* QUESTION: What is the encoding of the range? Is it utf-8? I suspect * it ought to be. For example, a jed .sl file may use: * * skip_chars ("\\w\u{ADFF}-\u{AFFF}"); * * to skip words chars and chars in the range 0xADFF-0xAFFF. By the time it * gets here, the parser will have converted the wchars \u{ADFF} and \u{AFFF} * to their UTF-8 equivalents. Hence the function needs to use SLutf8_decode * to get characters. */ typedef struct { char *name; char escaped_form; } Posix_Char_Class_Type; static Posix_Char_Class_Type Posix_Char_Class_Table [] = { {"alnum", 'w'}, {"alpha", 'a'}, {"blank", 'b'}, {"cntrl", 'c'}, {"digit", 'd'}, {"graph", 'g'}, {"lower", 'l'}, {"print", 'p'}, {"punct", ','}, {"space", 's'}, {"upper", 'u'}, {"xdigit", 'x'}, {NULL, 0} }; static int is_posix_charclass (SLuchar_Type **up, SLuchar_Type *umax, SLwchar_Type *char_classp) { SLuchar_Type *u, *u1; unsigned int len; Posix_Char_Class_Type *p; u = *up; if (*u != ':') return 0; u++; u1 = u; while ((u1 < umax) && (*u1 >= 'a') && (*u1 <= 'z')) u1++; if (((u1+1) >= umax) || (u1[0] != ':') || (u1[1] != ']')) return 0; len = u1 - u; p = Posix_Char_Class_Table; while (p->name != NULL) { if ((0 == strncmp (p->name, (char *) u, len)) && (p->name[len] == 0)) { *char_classp = p->escaped_form; *up = u1 + 2; return 1; } p++; } SLang_verror (SL_NotImplemented_Error, "Character class in range specification is unknown or unsupported"); return -1; } static int get_lex_char (SLuchar_Type **up, SLuchar_Type *umax, int allow_charclass, SLwchar_Type *chp, SLwchar_Type *char_classp) { SLuchar_Type *u; SLwchar_Type ch; u = *up; if (u == umax) { *chp = 0; *char_classp = 0; return 0; } if (NULL == (u = _pSLinterp_decode_wchar (u, umax, &ch))) return -1; if ((ch == '[') && allow_charclass) { int status = is_posix_charclass (&u, umax, &ch); if (status != 0) { if (status == 1) { *chp = *char_classp = ch; *up = u; } return status; } } if ((ch != '\\') || (allow_charclass == 0) || (u == umax)) /* Permit a single backslash as the last character */ { *char_classp = 0; *chp = ch; *up = u; return 0; } /* Here, ch=='\\' and *u represents the next character. */ /* Allow \\ and \^ to represent \ and ^, resp. Supporting \^ is useful * in constructs such as "\\^x" since "^x" may mean anything but x, and not * '^' or 'x'. */ ch = *u; if ((ch == '\\') || (ch == '^')) { *char_classp = 0; *chp = ch; *up = u+1; return 0; } if (NULL == (u = _pSLinterp_decode_wchar (u, umax, &ch))) return -1; *chp = *char_classp = ch; *up = u; return 0; } typedef struct { #define LEXICAL_CHAR_TYPE 1 #define LEXICAL_RANGE_TYPE 2 #define LEXICAL_CLASS_TYPE 3 int lexical_type; union { SLwchar_Type range[2]; SLwchar_Type wch; int char_class; } e; } Lexical_Element_Type; static SLuchar_Type *get_lexical_element (SLuchar_Type *u, SLuchar_Type *umax, int allow_range, int allow_charclass, Lexical_Element_Type *lex) { SLwchar_Type r0, r1; SLwchar_Type char_class; if (u == umax) return NULL; if (-1 == get_lex_char (&u, umax, allow_charclass, &r0, &char_class)) return NULL; if (char_class) { lex->lexical_type = LEXICAL_CLASS_TYPE; switch (char_class) { case 'a': /* alpha */ lex->e.char_class = SLCHARCLASS_ALPHA; break; case 'b': lex->e.char_class = SLCHARCLASS_BLANK; break; case 'c': lex->e.char_class = SLCHARCLASS_CNTRL; break; case 'd': /* digit */ lex->lexical_type = LEXICAL_RANGE_TYPE; lex->e.range[0] = '0'; lex->e.range[1] = '9'; break; case 'g': lex->e.char_class = SLCHARCLASS_GRAPH; break; case 'l': /* lowercase */ lex->e.char_class = SLCHARCLASS_LOWER; break; case 'p': /* lowercase */ lex->e.char_class = SLCHARCLASS_PRINT; break; case ',': /* lowercase */ lex->e.char_class = SLCHARCLASS_PUNCT; break; case 's': /* whitespace */ lex->e.char_class = SLCHARCLASS_SPACE; break; case 'u': /* uppercase */ lex->e.char_class = SLCHARCLASS_UPPER; break; case 'x': lex->e.char_class = SLCHARCLASS_XDIGIT; break; case 'w': /* alphanumeric */ lex->e.char_class = SLCHARCLASS_ALPHA|SLCHARCLASS_XDIGIT; break; default: SLang_verror (SL_INVALID_PARM, "Invalid character class '%c'.", char_class); return NULL; } return u; } if ((*u != '-') || (allow_range == 0)) { lex->lexical_type = LEXICAL_CHAR_TYPE; lex->e.wch = r0; return u; } u++; if (u == umax) { lex->lexical_type = LEXICAL_CHAR_TYPE; lex->e.wch = '-'; return u; /* SLang_verror (SL_INVALID_PARM, "Unfinished range specification"); */ /* return NULL; */ } if (-1 == get_lex_char (&u, umax, allow_charclass, &r1, &char_class)) return NULL; if (char_class) { SLang_verror (SL_INVALID_PARM, "Character class not allowed in a range"); return NULL; } if (r1 == 0) { SLang_verror (SL_INVALID_PARM, "Unfinished range specification"); return NULL; } lex->lexical_type = LEXICAL_RANGE_TYPE; lex->e.range[0] = r0; lex->e.range[1] = r1; return u; } SLwchar_Lut_Type *SLwchar_strtolut (SLuchar_Type *u, int allow_range, int allow_charclass) { SLuchar_Type *umax; SLwchar_Lut_Type *r; Lexical_Element_Type lex; r = SLwchar_create_lut (32); if (r == NULL) return NULL; umax = u + strlen ((char *) u); while (u < umax) { if (NULL == (u = get_lexical_element (u, umax, allow_range, allow_charclass, &lex))) goto return_error; switch (lex.lexical_type) { case LEXICAL_CHAR_TYPE: if (-1 == SLwchar_add_range_to_lut (r, lex.e.wch, lex.e.wch)) goto return_error; break; case LEXICAL_RANGE_TYPE: if (-1 == SLwchar_add_range_to_lut (r, lex.e.range[0], lex.e.range[1])) goto return_error; break; case LEXICAL_CLASS_TYPE: add_char_class (r, lex.e.char_class); break; } } return r; return_error: SLwchar_free_lut (r); return NULL; } /* This structure is used for mapping 1 character to another, and is used * by, e.g., strtrans. * * The most efficient implementation that I have come up with requires a * many-1 mapping between _constructs_ in the "from" list and the "to" list. * Here a _construct_ is a single character, range, or a character class. * The following mappings are legal: * * Character --> Character * Range --> Character * Range --> Equal length range * Range --> Class (upper or lower) * Class --> Character * Class --> Compatible Class * * For inversion, the only mapping that makes sense is a many to one mapping. * For example, strtrans(str, "^A-Za-z", "x"), should replace any character * that is not one of the ranges A-Z and a-z by x. */ typedef struct Char_Map_Type { int (*map_function)(Lexical_Element_Type *, Lexical_Element_Type *, int, SLwchar_Type, SLwchar_Type *); Lexical_Element_Type from; Lexical_Element_Type to; struct Char_Map_Type *next; } Char_Map_Type; struct SLwchar_Map_Type { /* for chars < 256. */ SLwchar_Type chmap[256]; int invert; Char_Map_Type *list; }; static int map_char_to_char_method (Lexical_Element_Type *from, Lexical_Element_Type *to, int invert, SLwchar_Type in, SLwchar_Type *out) { int ok = (in == from->e.wch); if (0 == (ok ^ invert)) return 0; *out = to->e.wch; return 1; } static int map_range_to_char_method (Lexical_Element_Type *from, Lexical_Element_Type *to, int invert, SLwchar_Type in, SLwchar_Type *out) { int ok = ((in >= from->e.range[0]) && (in <= from->e.range[1])); if (0 == (ok ^ invert)) return 0; *out = to->e.wch; return 1; } static int map_range_to_range_method (Lexical_Element_Type *from, Lexical_Element_Type *to, int invert, SLwchar_Type in, SLwchar_Type *out) { int ok = ((in >= from->e.range[0]) && (in <= from->e.range[1])); if (0 == (ok ^ invert)) return 0; *out = to->e.range[0] + (in - from->e.range[0]); return 1; } static int map_range_to_class_method (Lexical_Element_Type *from, Lexical_Element_Type *to, int invert, SLwchar_Type in, SLwchar_Type *out) { int ok = ((in >= from->e.range[0]) && (in <= from->e.range[1])); if (0 == (ok ^ invert)) return 0; if (to->e.char_class == SLCHARCLASS_UPPER) *out = SLwchar_toupper (in); else if (to->e.char_class == SLCHARCLASS_LOWER) *out = SLwchar_tolower (in); else return 0; return 1; } static int is_of_class (int char_class, SLwchar_Type w) { switch (char_class) { case SLCHARCLASS_ALPHA: return SLwchar_isalpha (w); case SLCHARCLASS_ALPHA|SLCHARCLASS_XDIGIT: return SLwchar_isalnum (w); case SLCHARCLASS_UPPER: return SLwchar_isupper (w); case SLCHARCLASS_LOWER: return SLwchar_islower (w); case SLCHARCLASS_SPACE: return SLwchar_isspace (w); } return 0; } static int map_class_to_char_method (Lexical_Element_Type *from, Lexical_Element_Type *to, int invert, SLwchar_Type in, SLwchar_Type *out) { int ok = is_of_class (from->e.char_class, in); if (0 == (ok ^ invert)) return 0; *out = to->e.wch; return 1; } static int map_class_to_class_method (Lexical_Element_Type *from, Lexical_Element_Type *to, int invert, SLwchar_Type in, SLwchar_Type *out) { int ok = is_of_class (from->e.char_class, in); if (0 == (ok ^ invert)) return 0; if (to->e.char_class == SLCHARCLASS_UPPER) *out = SLwchar_toupper (in); else if (to->e.char_class == SLCHARCLASS_LOWER) *out = SLwchar_tolower (in); else return 0; return 1; } static void init_chmap (SLwchar_Type *chmap, SLwchar_Type wch, SLwchar_Type (*to_func)(SLwchar_Type)) { unsigned int i; chmap[0] = 0; if (to_func == NULL) { for (i = 1; i < 256; i++) chmap[i] = wch; } else { for (i = 1; i < 256; i++) chmap[i] = (*to_func) (i); } } static void get_range_values (Lexical_Element_Type *lex, SLwchar_Type *chminp, SLwchar_Type *chmaxp, int *range_dirp) { SLwchar_Type chmin = lex->e.range[0]; SLwchar_Type chmax = lex->e.range[1]; *range_dirp = 1; if (chmin > chmax) { SLwchar_Type tmp = chmin; chmin = chmax; chmax = tmp; lex->e.range[0] = chmax; lex->e.range[1] = chmin; *range_dirp = -1; } *chminp = chmin; *chmaxp = chmax; } static int check_char_mapping (SLwchar_Map_Type *map, Char_Map_Type *list, int first_time) { Lexical_Element_Type *lex_from, *lex_to; SLwchar_Type chmin, chmax, wch, wch1; SLwchar_Type (*to_func) (SLwchar_Type); int (*is_func) (SLwchar_Type); SLwchar_Type *chmap; int invert, from_range_dir, to_range_dir; lex_to = &list->to; lex_from = &list->from; chmap = map->chmap; invert = map->invert; switch (lex_from->lexical_type) { default: return -1; case LEXICAL_CHAR_TYPE: if (lex_to->lexical_type != LEXICAL_CHAR_TYPE) return -1; wch = lex_to->e.wch; if (invert && first_time) init_chmap (chmap, wch, NULL); list->map_function = map_char_to_char_method; if (0 == IS_ASCII256(lex_from->e.wch)) break; if (invert) map->chmap[lex_from->e.wch] = lex_from->e.wch; else { map->chmap[lex_from->e.wch] = wch; list->map_function = NULL; } break; case LEXICAL_RANGE_TYPE: get_range_values (lex_from, &chmin, &chmax, &from_range_dir); switch (lex_to->lexical_type) { case LEXICAL_CHAR_TYPE: wch = lex_to->e.wch; if (invert && first_time) init_chmap (chmap, wch, NULL); while ((chmin < 256) && (chmin <= chmax)) { chmap[chmin] = (invert ? chmin : wch); chmin++; } list->map_function = map_range_to_char_method; break; case LEXICAL_CLASS_TYPE: if (lex_to->e.char_class == SLCHARCLASS_UPPER) to_func = SLwchar_toupper; else if (lex_to->e.char_class == SLCHARCLASS_LOWER) to_func = SLwchar_tolower; else return -1; if (invert && first_time) init_chmap (chmap, 0, to_func); while ((chmin < 256) && (chmin <= chmax)) { chmap[chmin] = (invert ? chmin : (*to_func) (chmin)); chmin++; } list->map_function = map_range_to_class_method; break; case LEXICAL_RANGE_TYPE: if (invert) { SLang_verror (SL_INVALID_PARM, "Inversion from a range to a range not permitted"); return -1; } get_range_values (lex_to, &wch, &wch1, &to_range_dir); if ((chmax - chmin) != (wch1 - wch)) { SLang_verror (SL_INVALID_PARM, "Character mapping of unequal ranges is forbidden"); return -1; } if (from_range_dir != to_range_dir) { wch = wch1; to_range_dir = -1; } else to_range_dir = 1; while ((chmin < 256) && (chmin <= chmax)) { chmap[chmin] = wch; chmin++; wch += to_range_dir; } list->map_function = map_range_to_range_method; break; default: return -1; } if ((chmax < 256) && (invert == 0)) list->map_function = NULL; break; case LEXICAL_CLASS_TYPE: switch (lex_from->e.char_class) { case SLCHARCLASS_ALPHA: is_func = SLwchar_isalpha; break; case SLCHARCLASS_ALPHA|SLCHARCLASS_XDIGIT: is_func = SLwchar_isalnum; break; case SLCHARCLASS_UPPER: is_func = SLwchar_isupper; break; case SLCHARCLASS_LOWER: is_func = SLwchar_islower; break; case SLCHARCLASS_SPACE: is_func = SLwchar_isspace; break; default: SLang_verror (SL_INVALID_PARM, "Invalid character class in character map"); return -1; } switch (lex_to->lexical_type) { case LEXICAL_CHAR_TYPE: wch = lex_to->e.wch; if (first_time && invert) init_chmap (chmap, wch, NULL); for (chmin = 0; chmin < 256; chmin++) { if ((*is_func)(chmin)) chmap[chmin] = (invert ? chmin : wch); } list->map_function = map_class_to_char_method; break; case LEXICAL_CLASS_TYPE: switch (lex_to->e.char_class) { case SLCHARCLASS_LOWER: to_func = SLwchar_tolower; break; case SLCHARCLASS_UPPER: to_func = SLwchar_toupper; break; default: return -1; } if (invert && first_time) init_chmap (chmap, 0, to_func); for (chmin = 0; chmin < 256; chmin++) { if ((*is_func)(chmin)) chmap[chmin] = (invert ? chmin : (*to_func)(chmin)); } break; default: return -1; } list->map_function = map_class_to_class_method; break; } return 0; } static void free_char_map_type (Char_Map_Type *m) { SLfree ((char *) m); } void SLwchar_free_char_map (SLwchar_Map_Type *map) { Char_Map_Type *list; if (map == NULL) return; list = map->list; while (list != NULL) { Char_Map_Type *next = list->next; free_char_map_type (list); list = next; } SLfree ((char *) map); } SLwchar_Map_Type *SLwchar_allocate_char_map (SLuchar_Type *from, SLuchar_Type *to) { SLwchar_Map_Type *map; Char_Map_Type *list, *prev; SLuchar_Type *from_max, *to_max; unsigned int i; int invert = 0, first_time; if (*from == '^') { invert = 1; from++; } #if 0 if (*from == 0) { SLang_verror (SL_INVALID_PARM, "Illegal empty string in character map specification"); return NULL; } #endif map = (SLwchar_Map_Type *)SLcalloc (1, sizeof (SLwchar_Map_Type)); if (map == NULL) return NULL; map->invert = invert; for (i = 0; i < 256; i++) map->chmap[i] = i; from_max = from + strlen ((char *) from); to_max = to + strlen ((char *) to); list = NULL; while (from < from_max) { Char_Map_Type *next; SLuchar_Type *next_to; if (NULL == (next = (Char_Map_Type *) SLcalloc (1, sizeof (Char_Map_Type)))) goto return_error; if (list == NULL) map->list = next; else list->next = next; list = next; if (NULL == (from = get_lexical_element (from, from_max, 1, 1, &list->from))) goto return_error; if (NULL == (next_to = get_lexical_element (to, to_max, 1, 1, &list->to))) goto return_error; /* If the mapping is not 1-1, then the last "to" object applies to the * remaining "from" objects. This will permit, e.g., * A-Za-z --> X */ if (next_to != to_max) { if (invert) { SLang_verror (SL_INVALID_PARM, "Character map inversion must specify a many-to-one mapping"); goto return_error; } to = next_to; } } list = map->list; prev = NULL; first_time = 1; while (list != NULL) { Char_Map_Type *next = list->next; if (-1 == check_char_mapping (map, list, first_time)) { SLang_verror (SL_INVALID_PARM, "Specified character mapping is invalid"); goto return_error; } first_time = 0; if (list->map_function == NULL) { if (prev == NULL) map->list = next; else prev->next = next; free_char_map_type (list); } else prev = list; list = next; } return map; return_error: SLwchar_free_char_map (map); return NULL; } static int apply_lexical_map (SLwchar_Map_Type *map, SLwchar_Type wc_in, SLwchar_Type *wc_out) { Char_Map_Type *list = map->list; int invert = map->invert; while (list != NULL) { if (list->map_function != NULL) { int status = (*list->map_function)(&list->from, &list->to, invert, wc_in, wc_out); if (invert ^ status) return status; } list = list->next; } return 0; } int SLwchar_apply_char_map (SLwchar_Map_Type *map, SLwchar_Type *input, SLwchar_Type *output, unsigned int num) { unsigned int i; SLwchar_Type *chmap; if ((map == NULL) || (input == NULL) || (output == NULL)) return -1; chmap = map->chmap; for (i = 0; i < num; i++) { SLwchar_Type wc_in; if ((wc_in = input[i]) < 0x100) { output[i] = chmap[wc_in]; continue; } if (0 == apply_lexical_map (map, wc_in, output + i)) output[i] = wc_in; } return 0; } /* This function returns a malloced string */ SLuchar_Type *SLuchar_apply_char_map (SLwchar_Map_Type *map, SLuchar_Type *str) { SLuchar_Type *str_max; SLuchar_Type *output, *output_max, *outptr; int use_chmap; unsigned int len; SLwchar_Type *chmap; if ((map == NULL) || (str == NULL)) return NULL; use_chmap = 1; if (_pSLinterp_UTF8_Mode == 0) str_max = str + strlen ((char *)str); else { str_max = str; while (*str_max) { if (*str_max & 0x80) use_chmap = 0; str_max++; } } len = str_max - str; chmap = map->chmap; if (use_chmap) { unsigned int i; output = (SLuchar_Type *)SLmalloc (len+1); if (output == NULL) return NULL; for (i = 0; i < len; i++) output[i] = chmap[str[i]]; output[len] = 0; return output; } /* Hard way */ len += SLUTF8_MAX_MBLEN; if (NULL == (output = (SLuchar_Type *)SLmalloc (len + 1))) return NULL; output_max = output + len; outptr = output; while (str < str_max) { SLwchar_Type w_out, w_in; unsigned int encoded_len; w_in = (SLwchar_Type) *str; if (w_in < 0x100) { str++; w_out = chmap[w_in]; if ((w_out < 0x80) && (outptr < output_max)) { *outptr++ = (SLuchar_Type) w_out; continue; } } else { if (NULL == (str = _pSLinterp_decode_wchar (str, str_max, &w_in))) goto return_error; if (-1 == SLwchar_apply_char_map (map, &w_in, &w_out, 1)) goto return_error; } if (outptr + SLUTF8_MAX_MBLEN >= output_max) { SLuchar_Type *tmp; len += 32 * SLUTF8_MAX_MBLEN; if (NULL == (tmp = (SLuchar_Type *)SLrealloc ((char *)output, len))) goto return_error; outptr = tmp + (outptr - output); output = tmp; output_max = output + len; } if (NULL == (outptr = _pSLinterp_encode_wchar (w_out, outptr, &encoded_len))) goto return_error; } *outptr = 0; return output; return_error: SLfree ((char *) output); return NULL; }