static char rcsid[] = "@(#)$Id: cs_utf.c,v 1.33 2006/04/09 07:37:07 hurtta Exp $"; /****************************************************************************** * The Elm (ME+) Mail System - $Revision: 1.33 $ $State: Exp $ * * Author: Kari Hurtta (hurtta+elm@ozone.FMI.FI) *****************************************************************************/ #include "headers.h" #include "s_me.h" #include "cs_imp.h" DEBUG_VAR(Debug,__FILE__,"charset"); char base64chars[64] = { 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 43, 47 }; static int index_imap[128] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, 63,-1,-1,-1, 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 }; static char imapchars[64] = { 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 43, 44 }; #define imap(c) ((((c) > 0) && ((c) < 127)) ? index_imap[ (c) ] : -1) #define toimap(c) (((c) >= 0) && ((c) < 64)) ? imapchars[(c)] : -1 /* 'Map' for UTF-8 and UTF-7 ---------------------------------------------- */ static struct utfelem { uint16 start; uint16 end; uint16 unicodeval; /* Unicode value correspond with 'start' */ } utfXelems[] = { /* Identity map excluding: 0x0000 -- 0x007F US-ASCII -- by default identity on type=utf-8 0xD800 -- 0xDFFF Surrogates Area 0xE000 -- 0xF8FF Private Use Area */ { 0x0080, 0xD7FF, 0x0080 }, { 0xF900, 0xFFFF, 0xF900 }, }; /* Mappping to unicode */ static uint16 map_utf P_((struct map_info *map, unsigned int ch)); static uint16 map_utf(map,ch) struct map_info *map; unsigned int ch; { if (ch < 0x0080) return ch; /* Identity */ if (map) { int i; if (!map->map_initialized) map->map_init_it(map); for (i = 0; i < map->b.utfmap->elem_count; i++) { if (ch >= map->b.utfmap->elems[i].start && ch <= map->b.utfmap->elems[i].end) { return ( ch - map->b.utfmap->elems[i].start ) + map->b.utfmap->elems[i].unicodeval; } } } return MAPPING_NONE; } /* Mappping from unicode */ static uint16 map_utf_rev P_((struct map_info *map, unsigned int val, int *found)); static uint16 map_utf_rev(map,val,found) struct map_info *map; unsigned int val; int * found; { if (val < 0x0080) { *found = 1; return val; /* Identity */ } if (map) { int i; if (!map->map_initialized) map->map_init_it(map); for (i = 0; i < map->b.utfmap->elem_count; i++) { int n = map->b.utfmap->elems[i].end - map->b.utfmap->elems[i].start; if (val >= map->b.utfmap->elems[i].unicodeval && val <= map->b.utfmap->elems[i].unicodeval+n) { *found = 1; return ( val - map->b.utfmap->elems[i].unicodeval ) + map->b.utfmap->elems[i].start; } } } *found = 0; return 0x003F; /* '?' */ } static void map_init_utfX P_((struct map_info *map)); static void map_init_utfX(map) struct map_info *map; { map->b.utfmap = safe_malloc(sizeof (* (map -> b.utfmap))); map->b.utfmap->elem_count = sizeof (utfXelems) / sizeof (utfXelems[0]); map->b.utfmap->elems = utfXelems; map->map_initialized = 1; DPRINT(Debug,5,(&Debug, "Map %s initialized\n",map->map_name)); } struct map_info map_utf8 = { &cs_utf8, "UNICODE", 0, map_init_utfX, 0 }; struct map_info map_utf7 = { &cs_utf7, "UNICODE", 0, map_init_utfX, 0 }; static struct map_info map_imap = { &cs_imap, "UNICODE", 0, map_init_utfX, 0 }; /* 'imap' charset -- used for IMAP_name_convention */ static struct charcode_info imap_encoding = { CS_charset_magic, &cs_imap, &map_imap, SET_valid, "*IMAP*", NULL, NULL, 0, NULL }; CONST charset_t IMAP_ENCODING = &imap_encoding; static struct charcode_info utf8_encoding = { CS_charset_magic, &cs_utf8, &map_utf8, SET_valid, "UTF-8", NULL, &set_utf8, 106, NULL }; CONST charset_t UTF8_ENCODING = &utf8_encoding; static int read_elemlist P_((const char *name, struct utfelem ** elems, int *elemcount)); static int read_elemlist(name,elems,elemcount) CONST char *name; struct utfelem ** elems; int *elemcount; { static struct elemlist { char * name; struct utfelem * elems; int elemcount; } * ELIST = NULL; static int ECOUNT = 0; FILE *F; int i; char buffer[STRING]; int l; char *fn = NULL; for (i = 0; i < ECOUNT; i++) { if (0 == strcmp(ELIST[i].name,name)) { *elems = ELIST[i].elems; *elemcount = ELIST[i].elemcount; return 1; } } F = open_mapname(name,&fn); if (!F) { if (fn) free(fn); return 0; } ELIST = safe_realloc(ELIST,(ECOUNT+1)*sizeof (struct elemlist)); ELIST[ECOUNT].name = safe_strdup(name); ELIST[ECOUNT].elems = NULL; ELIST[ECOUNT].elemcount = 0; while (0 < (l = mail_gets(buffer,sizeof buffer,F))) { long s1, e1, s2, e2; char *p; if (buffer[l-1] != '\n') { lib_error(CATGETS(elm_msg_cat, MeSet,MeMapTooLongLine, "Map %s: %s: Too long line: %s"), name,fn,buffer); } if (buffer[0] == '#') continue; s1 = strtol(buffer,&p,16); e1 = s1; if (*p == '-') e1 = strtol(p+1,&p,16); if (*p == ' ' || *p == '\t') { while (*p == ' ' || *p == '\t') p++; s2 = strtol(p,&p,16); e2 = s2; if (*p == '-') e2 = strtol(p+1,&p,16); } else { lib_error(CATGETS(elm_msg_cat, MeSet,MeMapBadValue, "Map %s: %s: Bad value: %s"), name,fn,buffer); continue; } if (*p != '\n' || e1 - s1 < 0 || e1 - s1 != e2 - s2 || s1 < 0x0000 || e1 > 0xFFFF || s2 < 0x0000 || e1 > 0xFFFF) { lib_error(CATGETS(elm_msg_cat, MeSet,MeMapBadValue, "Map %s: %s: Bad value: %s"), name,fn,buffer); continue; } ELIST[ECOUNT].elems = safe_realloc(ELIST[ECOUNT].elems, (ELIST[ECOUNT].elemcount+1) * sizeof (struct utfelem)); ELIST[ECOUNT].elems[ELIST[ECOUNT].elemcount].start = s1; ELIST[ECOUNT].elems[ELIST[ECOUNT].elemcount].end = e1; ELIST[ECOUNT].elems[ELIST[ECOUNT].elemcount].unicodeval = s2; ELIST[ECOUNT].elemcount++; } *elems = ELIST[ECOUNT].elems; *elemcount = ELIST[ECOUNT].elemcount; ECOUNT++; if (fn) free(fn); return 1; } static void map_init_bad P_((struct map_info *map)); static void map_init_bad(map) struct map_info *map; { panic("STRING PANIC",__FILE__,__LINE__,"map_init_bad", "map_init_bad called",0); } static struct map_info * open_utf8_map P_((const char * map_name)); static struct map_info * open_utf8_map(map_name) CONST char * map_name; { struct map_info *ret = NULL; struct utfelem * elems; int elemcount; if (read_elemlist(map_name,&elems,&elemcount)) { ret = safe_malloc(sizeof (struct map_info)); ret -> map_type = &cs_utf8; ret -> map_name = safe_strdup(map_name); ret -> b.utfmap = safe_malloc(sizeof (* (ret -> b.utfmap))); ret -> map_initialized = 1; ret -> map_init_it = map_init_bad; ret -> b.utfmap->elem_count = elemcount; ret -> b.utfmap->elems = elems; } return ret; } static struct map_info * open_utf7_map P_((const char * map_name)); static struct map_info * open_utf7_map(map_name) CONST char * map_name; { struct map_info *ret = NULL; struct utfelem * elems; int elemcount; if (read_elemlist(map_name,&elems,&elemcount)) { ret = safe_malloc(sizeof (struct map_info)); ret -> map_type = &cs_utf7; ret -> map_name = safe_strdup(map_name); ret -> b.utfmap = safe_malloc(sizeof (* (ret -> b.utfmap))); ret -> map_initialized = 1; ret -> map_init_it = map_init_bad; ret -> b.utfmap->elem_count = elemcount; ret -> b.utfmap->elems = elems; } return ret; } static struct map_info * open_imap_map P_((const char * map_name)); static struct map_info * open_imap_map(map_name) CONST char * map_name; { struct map_info *ret = NULL; struct utfelem * elems; int elemcount; if (read_elemlist(map_name,&elems,&elemcount)) { ret = safe_malloc(sizeof (struct map_info)); ret -> map_type = &cs_imap; ret -> map_name = safe_strdup(map_name); ret -> b.utfmap = safe_malloc(sizeof (* (ret -> b.utfmap))); ret -> map_initialized = 1; ret -> map_init_it = map_init_bad; ret -> b.utfmap->elem_count = elemcount; ret -> b.utfmap->elems = elems; } return ret; } /* ------------------------------------------------------------------------ */ static void cs_init_utf P_((struct string *str)); static void cs_init_utf(str) struct string *str; { str->p->len = 0; str->p->a.words = 0; } static void cs_free_utf P_((struct string *str)); static void cs_free_utf(str) struct string *str; { if (str->p->a.words) { free(str->p->a.words); str->p->a.words = NULL; } str->p->len = 0; } static int cs_add_streambyte_to_utf8 P_((struct string *str,int ch)); static int cs_add_streambyte_to_s_utf8 P_((struct charset_state *str, int ch)); static void cs_soft_reset_s_utf8 P_((struct charset_state *str)); static int cs_add_streambyte_to_utf8(str,ch) struct string *str; int ch; { if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_utf8) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_utf8", "Bad state",0); if (!cs_add_streambyte_to_s_utf8(str->p->state,ch)) { DPRINT(Debug,10,(&Debug, "cs_add_streambyte_to_utf8: Failed to add byte\n")); return 0; } if (str->p->state->p->ready) { if (str->p->state->p->a.utf8.value > 0xFFFF) return 0; /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+1)* sizeof (uint16)); str->p->a.words[str->p->len++] = str->p->state->p->a.utf8.value; cs_soft_reset_s_utf8(str->p->state); } return 1; } static int cs_add_streambyte_to_utf7 P_((struct string *str,int ch)); static int cs_add_streambyte_to_s_utf7 P_((struct charset_state *str, int ch)); static void cs_soft_reset_s_utf7 P_((struct charset_state *str)); static int cs_add_streambyte_to_utf7(str,ch) struct string *str; int ch; { if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_utf7) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_utf7", "Bad state",0); if (!cs_add_streambyte_to_s_utf7(str->p->state,ch)) { DPRINT(Debug,10,(&Debug, "cs_add_streambyte_to_utf7: Failied to add byte\n")); return 0; } if (str->p->state->p->ready) { if (str->p->state->p->a.utf7.value > 0xFFFF) return 0; /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+1)* sizeof (uint16)); str->p->a.words[str->p->len++] = str->p->state->p->a.utf7.value; cs_soft_reset_s_utf7(str->p->state); } return 1; } static int cs_add_streambyte_to_imap P_((struct string *str,int ch)); static int cs_add_streambyte_to_s_imap P_((struct charset_state *str, int ch)); static int cs_add_streambyte_to_imap(str,ch) struct string *str; int ch; { if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_imap) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_imap", "Bad state",0); if (!cs_add_streambyte_to_s_imap(str->p->state,ch)) { DPRINT(Debug,10,(&Debug, "cs_add_streambyte_to_imap: Failied to add byte\n")); return 0; } if (str->p->state->p->ready) { if (str->p->state->p->a.utf7.value > 0xFFFF) return 0; /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+1)* sizeof (uint16)); str->p->a.words[str->p->len++] = str->p->state->p->a.utf7.value; cs_soft_reset_s_utf7(str->p->state); } return 1; } static void cs_add_intdata_to_utf P_((struct string *str, const struct string *data)); static void cs_add_intdata_to_utf(str,data) struct string *str; CONST struct string *data; { if (data->p->len > 0) { int i; /* realloc with size 0 is equivalent of free and may corrupt memory ... */ /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+data->p->len)* sizeof (uint16)); for (i = 0; i < data->p->len; i++) { str->p->a.words[str->p->len++] = data->p->a.words[i]; } } } /* Does compression, recalculation and so on ... so 'const' is not very const */ static void cs_check_length_utf P_((const struct string *str)); static void cs_check_length_utf(str) CONST struct string *str; { if (str->p->len > 0) { int L = str->p->len; int ok = 1; int i; uint16 *words = safe_malloc(L * sizeof (uint)); /* Convert to UNICODE first */ for (i = 0; i < L; i++) { words[i] = map_utf(str->string_type->map_info, str->p->a.words[i]); if (words[i] == MAPPING_NONE) { DPRINT(Debug,61,(&Debug, "%s: WARNING: Mapping %04X for compression failed\n", str->string_type->MIME_name ? str->string_type->MIME_name : "", str->p->a.words[i])); ok = 0; } } if (ok) ok = compress_unicode(words,&L); if (ok) { for (i = 0; i < L; i++) { int found; str->p->a.words[i] = map_utf_rev(str->string_type->map_info,words[i],&found); if (!found) { DPRINT(Debug,61,(&Debug, "%s: WARNING: Mapping back %04X after compression failed\n", str->string_type->MIME_name ? str->string_type->MIME_name : "", words[i])); } } DPRINT(Debug,61,(&Debug, "%s: String compressed len=%d => %d\n", str->string_type->MIME_name ? str->string_type->MIME_name : "", str->p->len,L)); str->p->len = L; } free(words); } } static uint16 cs_give_unicode_from_utf P_((const struct string *str, int pos, int *found)); static uint16 cs_give_unicode_from_utf(str,pos,found) CONST struct string *str; int pos; int *found; { uint16 res; if (pos < 0 || pos >= str->p->len) panic("STRING PANIC",__FILE__,__LINE__,"cs_give_unicode_from_utf", "Index out of array",0); res = map_utf(str->string_type->map_info,str->p->a.words[pos]); if (res != MAPPING_NONE) { *found = 1; return res; } *found = 0; return 0x003F; /* '?' */ } static void cs_add_unicodedata_to_utf P_((struct string *str, int len, const uint16 *data)); static void cs_add_unicodedata_to_utf(str,len,data) struct string *str; int len; CONST uint16 *data; { if (len > 0) { int i; /* realloc with size 0 is equivalent of free and may corrupt memory ... */ /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+len)* sizeof (uint16)); for (i = 0; i < len; i++) { int found; str->p->a.words[str->p->len++] = map_utf_rev(str->string_type->map_info,data[i],&found); } } } static int cs_cmp_utf P_((const struct string *str1,const struct string *str2)); static int cs_cmp_utf(str1,str2) CONST struct string *str1; CONST struct string *str2; { int i; if (str1->string_type->charset_type != str2->string_type->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_cmp_utf", "String type mismatch",0); for (i = 0; i < str1->p->len && i < str2->p->len; i++) { if (str1->p->a.words[i] < str2->p->a.words[i]) return -1; if (str1->p->a.words[i] > str2->p->a.words[i]) return 1; } if (i < str1->p->len) return 1; if (i < str2->p->len) return -1; return 0; } typedef unsigned char utf8_buffer[4]; static int gen_utf8_char P_((utf8_buffer buffer, unsigned int val)); static int gen_utf8_char(buffer,val) utf8_buffer buffer; unsigned int val; { if (val < 0x0080) { buffer[0] = val; return 1; } else { int l,x; uint16 bits; if (val < 0x7FF) { bits = 192; l = x = 2; } else { bits = 224; l = x = 3; } while (x > 0) { buffer[--x] = 128 | (val & 63); val >>= 6; } if (val) { panic("STRING PANIC",__FILE__,__LINE__,"gen_utf8_char", "Value overflow?",0); } buffer[0] |= bits; return l; } } static unsigned char *cs_stream_from_utf8 P_((const struct string *str, int printable, screen_info_p terminal, int *reslen)); static unsigned char *cs_stream_from_utf8(str,printable,terminal,reslen) CONST struct string *str; int printable; screen_info_p terminal; int *reslen; { unsigned char * res = safe_malloc(str->p->len*3+1); int i; int x = 0; for (i = 0; i < str->p->len; i++) { uint16 val = str->p->a.words[i]; if (printable) { uint16 unicode = map_utf(str->string_type->map_info,val); if (MAPPING_NONE == unicode || !unicode_ch(unicode,UOP_printable)) val = 0x003F; /* '?' */ } x += gen_utf8_char(res+x,val); } res[x] = '\0'; *reslen = x; return res; } static int need_utf7_encode P_((unsigned int unicode)); static int need_utf7_encode(unicode) unsigned int unicode; { /* directly encoded characters */ if (0x0041 /* 'A' */ <= unicode && 0x005A /* 'Z' */ >= unicode) return 0; if (0x0061 /* 'a' */ <= unicode && 0x007A /* 'z' */ >= unicode) return 0; if (0x0030 /* '0' */ <= unicode && 0x0039 /* '9' */ >= unicode) return 0; switch (unicode) { /* directly encoded characters */ /* ' */ case 39: return 0; /* ( */ case 40: return 0; /* ) */ case 41: return 0; /* , */ case 44: return 0; /* - */ case 45: return 0; /* . */ case 46: return 0; /* / */ case 47: return 0; /* : */ case 58: return 0; /* ? */ case 63: return 0; /* optional direct characters */ /* ! */ case 33: return utf7_encode_optional; /* " */ case 34: return utf7_encode_optional; /* # */ case 35: return utf7_encode_optional; /* $ */ case 36: return utf7_encode_optional; /* % */ case 37: return utf7_encode_optional; /* & */ case 38: return utf7_encode_optional; /* * */ case 42: return utf7_encode_optional; /* ; */ case 59: return utf7_encode_optional; /* < */ case 60: return utf7_encode_optional; /* = */ case 61: return utf7_encode_optional; /* > */ case 62: return utf7_encode_optional; /* @ */ case 64: return utf7_encode_optional; /* [ */ case 91: return utf7_encode_optional; /* ] */ case 93: return utf7_encode_optional; /* ^ */ case 94: return utf7_encode_optional; /* _ */ case 95: return utf7_encode_optional; /* ' */ case 96: return utf7_encode_optional; /* { */ case 123: return utf7_encode_optional; /* | */ case 124: return utf7_encode_optional; /* } */ case 125: return utf7_encode_optional; /* space */ case 32: return 0; /* tab */ case 9: return 0; /* cr */ case 13: return 0; /* lf */ case 10: return 0; } return 1; } static int need_imap_encode P_((unsigned int unicode)); static int need_imap_encode(unicode) unsigned int unicode; { if (unicode >= 0x20 && unicode <= 0x25) return 0; if (unicode >= 0x27 && unicode <= 0x7e) return 0; return 1; } typedef unsigned char utf7_buffer[4]; static int gen_utf7_char P_((int *encoded, int *bits, unsigned long *bitval, utf7_buffer buffer, unsigned int val)); static int gen_utf7_char(encoded,bits,bitval,buffer,val) int *encoded; int *bits; unsigned long *bitval; utf7_buffer buffer; unsigned int val; { int x = 0; if (0x002B /* '+' */ == val && !*encoded) { /* Special case */ buffer[x++] = 0x2B; /* '+' */ buffer[x++] = 0x2D; /* '-' */ } else if (need_utf7_encode(val)) { if (!*encoded) { buffer[x++] = 0x2B; /* '+' */ (*encoded)++; *bits = 0; *bitval = 0; } if (val > 0xFFFF) { panic("STRING PANIC",__FILE__,__LINE__,"gen_utf7_char", "Value overflow",0); } *bitval <<= 16; *bitval |= val; *bits += 16; while (*bits >= 6) { long code = *bitval >> (*bits-6); int c = to64(code); if (-1 == c) { panic("STRING PANIC",__FILE__,__LINE__, "gen_utf7_char", "Encode error",0); } buffer[x++] = c; *bitval -= code << (*bits-6); *bits -= 6; } } else { if (*encoded) { if (*bits > 6) panic("STRING PANIC",__FILE__,__LINE__, "gen_utf7_char", "Encode error",0); if (*bits > 0) { /* Put code to most signifact bits, and fill leftover bits with 0 */ long code = *bitval << (6-*bits); int c = to64(code); if (-1 == c) { panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_utf7", "Encode error",0); } buffer[x++] = c; *bitval = 0; *bits = 0; } buffer[x++] = 0x2D; /* '-' */ *encoded = 0; } if (val > 0x7f) panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_utf7", "Value error",0); buffer[x++] = val; } buffer[x] = '\0'; return x; } static int gen_imap_char P_((int *encoded, int *bits, unsigned long *bitval, utf7_buffer buffer, unsigned int val)); static int gen_imap_char(encoded,bits,bitval,buffer,val) int *encoded; int *bits; unsigned long *bitval; utf7_buffer buffer; unsigned int val; { int x = 0; if (0x0026 /* '&' */ == val && !*encoded) { /* Special case */ buffer[x++] = 0x26; /* '&' */ buffer[x++] = 0x2D; /* '-' */ } else if (need_imap_encode(val)) { if (!*encoded) { buffer[x++] = 0x26; /* '&' */ (*encoded)++; *bits = 0; *bitval = 0; } if (val > 0xFFFF) { panic("STRING PANIC",__FILE__,__LINE__,"gen_imap_char", "Value overflow",0); } *bitval <<= 16; *bitval |= val; *bits += 16; while (*bits >= 6) { long code = *bitval >> (*bits-6); int c = toimap(code); if (-1 == c) { panic("STRING PANIC",__FILE__,__LINE__, "gen_imap_char", "Encode error",0); } buffer[x++] = c; *bitval -= code << (*bits-6); *bits -= 6; } } else { if (*encoded) { if (*bits > 6) panic("STRING PANIC",__FILE__,__LINE__, "gen_imap_char", "Encode error",0); if (*bits > 0) { /* But code to most signifact bits, and fill leftover bits with 0 */ long code = *bitval << (6-*bits); int c = toimap(code); if (-1 == c) { panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_imap", "Encode error",0); } buffer[x++] = c; *bitval = 0; *bits = 0; } buffer[x++] = 0x2D; /* '-' */ *encoded = 0; } if (val > 0x7f) panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_imap", "Value error",0); buffer[x++] = val; } buffer[x] = '\0'; return x; } static int end_utf7_char P_((int *encoded, int *bits, unsigned long *bitval, utf7_buffer buffer)); static int end_utf7_char(encoded,bits,bitval,buffer) int *encoded; int *bits; unsigned long *bitval; utf7_buffer buffer; { int x = 0; if (*encoded) { if (*bits > 0) { /* But code to most signifact bits, and fill leftover bits with 0 */ long code = *bitval << (6-*bits); int c = to64(code); if (-1 == c) { panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_utf7", "Encode error",0); } buffer[x++] = c; *bitval = 0; *bits = 0; } buffer[x++] = 0x2D; /* '-' */ *encoded = 0; } return x; } static int end_imap_char P_((int *encoded, int *bits, unsigned long *bitval, utf7_buffer buffer)); static int end_imap_char(encoded,bits,bitval,buffer) int *encoded; int *bits; unsigned long *bitval; utf7_buffer buffer; { int x = 0; if (*encoded) { if (*bits > 0) { /* But code to most signifact bits, and fill leftover bits with 0 */ long code = *bitval << (6-*bits); int c = toimap(code); if (-1 == c) { panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_imap", "Encode error",0); } buffer[x++] = c; *bitval = 0; *bits = 0; } buffer[x++] = 0x2D; /* '-' */ *encoded = 0; } return x; } static unsigned char *cs_stream_from_utf7 P_((const struct string *str, int printable, screen_info_p terminal, int *reslen)); static unsigned char *cs_stream_from_utf7(str,printable,terminal,reslen) CONST struct string *str; int printable; screen_info_p terminal; int *reslen; { unsigned char * res = safe_malloc(str->p->len*5+1); int i; int x = 0; int encoded = 0; int bits = 0; unsigned long bitval = 0; for (i = 0; i < str->p->len; i++) { uint16 val = str->p->a.words[i]; if (printable) { uint16 unicode = map_utf(str->string_type->map_info,val); if (MAPPING_NONE == unicode || !unicode_ch(unicode,UOP_printable)) val = 0x003F; /* '?' */ } x += gen_utf7_char(&encoded,&bits,&bitval,res+x,val); } x += end_utf7_char(&encoded,&bits,&bitval,res+x); res[x] = '\0'; *reslen = x; return res; } static unsigned char *cs_stream_from_imap P_((const struct string *str, int printable, screen_info_p terminal, int *reslen)); static unsigned char *cs_stream_from_imap(str,printable,terminal,reslen) CONST struct string *str; int printable; screen_info_p terminal; int *reslen; { unsigned char * res = safe_malloc(str->p->len*5+1); int i; int x = 0; int encoded = 0; int bits = 0; unsigned long bitval = 0; for (i = 0; i < str->p->len; i++) { uint16 val = str->p->a.words[i]; if (printable) { uint16 unicode = map_utf(str->string_type->map_info,val); if (MAPPING_NONE == unicode || !unicode_ch(unicode,UOP_printable)) val = 0x003F; /* '?' */ } x += gen_imap_char(&encoded,&bits,&bitval,res+x,val); } x += end_imap_char(&encoded,&bits,&bitval,res+x); res[x] = '\0'; *reslen = x; return res; } static int cs_can_ascii_utf P_((const struct string *str)); static int cs_can_ascii_utf(str) CONST struct string *str; { int i; for (i = 0; i < str->p->len; i++) { /* str->p->a.words is unsigned */ if (str->p->a.words[i] > 127) return 0; } return 1; } static unsigned char *cs_streamclip_from_utf8 P_((const struct string *str, int *pos, int len, screen_info_p terminal, struct cs_printable_len *printable_len)); static unsigned char *cs_streamclip_from_utf8(str,pos,len,terminal,printable_len) CONST struct string *str; int *pos; int len; screen_info_p terminal; /* NOT USED */ struct cs_printable_len *printable_len; /* NOT USED */ { unsigned char * ret; int l = 0,i; if (*pos < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf8", "Index out of array",0); if (len < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf8", "Negative size",0); ret = safe_malloc(len*3+1); for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { uint16 val = str->p->a.words[*pos]; uint16 unicode = map_utf(str->string_type->map_info,val); if (MAPPING_NONE == unicode || !unicode_ch(unicode,UOP_printable)) val = 0x003F; /* '?' */ l += gen_utf8_char(ret+l,val); } ret[l] = '\0'; return ret; } static unsigned char *cs_streamclip_from_utf7 P_((const struct string *str, int *pos, int len, screen_info_p terminal, struct cs_printable_len *printable_len)); static unsigned char *cs_streamclip_from_utf7(str,pos,len,terminal,printable_len) CONST struct string *str; int *pos; int len; screen_info_p terminal; /* NOT USED */ struct cs_printable_len *printable_len; /* NOT USED */ { unsigned char * ret; int l = 0,i; int encoded = 0; int bits = 0; unsigned long bitval = 0; if (*pos < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf7", "Index out of array",0); if (len < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf7", "Negative size",0); ret = safe_malloc(len*5+1); for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { uint16 val = str->p->a.words[*pos]; uint16 unicode = map_utf(str->string_type->map_info,val); if (MAPPING_NONE == unicode || !unicode_ch(unicode,UOP_printable)) val = 0x003F; /* '?' */ l += gen_utf7_char(&encoded,&bits,&bitval,ret+l,val); } l += end_utf7_char(&encoded,&bits,&bitval,ret+l); ret[l] = '\0'; return ret; } static unsigned char *cs_streamclip_from_imap P_((const struct string *str, int *pos, int len, screen_info_p terminal, struct cs_printable_len *printable_len)); static unsigned char *cs_streamclip_from_imap(str,pos,len,terminal,printable_len) CONST struct string *str; int *pos; int len; screen_info_p terminal; /* NOT USED */ struct cs_printable_len *printable_len; /* NOT USED */ { unsigned char * ret; int l = 0,i; int encoded = 0; int bits = 0; unsigned long bitval = 0; if (*pos < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_imap", "Index out of array",0); if (len < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_imap", "Negative size",0); ret = safe_malloc(len*5+1); for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { uint16 val = str->p->a.words[*pos]; uint16 unicode = map_utf(str->string_type->map_info,val); if (MAPPING_NONE == unicode || !unicode_ch(unicode,UOP_printable)) val = 0x003F; /* '?' */ l += gen_imap_char(&encoded,&bits,&bitval,ret+l,val); } l += end_imap_char(&encoded,&bits,&bitval,ret+l); ret[l] = '\0'; return ret; } static void cs_clip_from_utf P_((struct string *ret, const struct string *str, int *pos, int len)); static void cs_clip_from_utf(ret,str,pos,len) struct string *ret; CONST struct string *str; int *pos; int len; { int i; if (ret->string_type->charset_type != str->string_type->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_utf", "String type mismatch",0); if (*pos < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_utf", "Index out of array",0); if (len < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_utf", "Negative size",0); if (len > 0) { /* realloc with len == 0 is equivalent of freeing and may result corruption of memory ... */ /* NOTE: str->p->a.words is not NUL terminated */ ret->p->a.words = safe_realloc(ret->p->a.words, len* sizeof (uint16)); ret->p->len = 0; for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { ret->p->a.words[ret->p->len++] = str->p->a.words[*pos]; } } else { /* NULL (empty) string */ if (ret->p->a.words) free(ret->p->a.words); ret->p->a.words = NULL; ret->p->len = 0; } } static int cs_find_pattern_from_utf P_((const struct string *str, const struct string *pattern, int ignore_case)); static int cs_find_pattern_from_utf(str,pattern,ignore_case) CONST struct string *str; CONST struct string *pattern; int ignore_case; { int ret = 0; int i, j; if (pattern->string_type->charset_type != str->string_type->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_find_pattern_from_utf", "String type mismatch",0); if (ignore_case) { DPRINT(Debug,63,(&Debug, "cs_find_pattern_from_utf=-1\n")); return -1; /* Use UNICODE comparision on upper level instead */ } for (i = 0; i < str->p->len; ) { CONST int s = i + 1; for (j = 0; j < pattern->p->len && i < str->p->len; j++,i++) { uint16 c2 = pattern->p->a.words[j]; uint16 c1 = str->p->a.words[i]; if (c1 != c2) break; } if (j >= pattern->p->len) { DPRINT(Debug,63,(&Debug, "cs_find_pattern_from_utf=1 MATCH\n")); ret = 1; break; } i = s; } if (!ret) { DPRINT(Debug,63,(&Debug, "cs_find_pattern_from_utf=0 NO MATCH\n")); } return ret; } /* Returns number of bytes added */ static int cs_add_streambytes_to_utf8 P_((struct string *str, int count, const unsigned char *data, int *errors)); static int cs_add_streambytes_to_utf8(str,count,data, errors) struct string *str; int count; CONST unsigned char *data; int *errors; { *errors = 0; if (count > 0) { int i; /* realloc with size 0 is equivalent of free and may corrupt memory ... */ if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_utf8) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_streambytes_to_utf8", "Bad state",0); /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+count)* sizeof (uint16)); for (i = 0; i < count; i++) { if (!cs_add_streambyte_to_s_utf8(str->p->state,data[i])) { DPRINT(Debug,10,(&Debug, "cs_add_streambytes_to_utf8: Failed to add byte, idx=%d\n", i)); handle_error: (*errors)++; str->p->a.words[str->p->len++] = MAPPING_NONE; cs_soft_reset_s_utf8(str->p->state); continue; } if (str->p->state->p->ready) { if (str->p->state->p->a.utf8.value > 0xFFFF) { DPRINT(Debug,10,(&Debug, "cs_add_streambytes_to_utf8: Value (%X) out of range\n", str->p->state->p->a.utf8.value)); goto handle_error; } str->p->a.words[str->p->len++] = str->p->state->p->a.utf8.value; cs_soft_reset_s_utf8(str->p->state); } } return i; } return 0; } static int cs_add_streambytes_to_utf7 P_((struct string *str, int count, const unsigned char *data, int *errors)); static int cs_add_streambytes_to_utf7(str,count,data,errors) struct string *str; int count; CONST unsigned char *data; int *errors; { *errors = 0; if (count > 0) { int i; /* realloc with size 0 is equivalent of free and may corrupt memory ... */ if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_utf7) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_streambytes_to_utf7", "Bad state",0); /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+count)* sizeof (uint16)); for (i = 0; i < count; i++) { if (!cs_add_streambyte_to_s_utf7(str->p->state,data[i])) { DPRINT(Debug,10,(&Debug, "cs_add_streambytes_to_utf7: Failed to add byte, idx=%d\n", i)); handle_error: (*errors) ++; str->p->a.words[str->p->len++] = MAPPING_NONE; cs_soft_reset_s_utf7(str->p->state); continue; } if (str->p->state->p->ready) { if (str->p->state->p->a.utf7.value > 0xFFFF) { DPRINT(Debug,60,(&Debug, "cs_add_streambytes_to_utf7: Value (%X) out of range\n", str->p->state->p->a.utf7.value)); goto handle_error; } str->p->a.words[str->p->len++] = str->p->state->p->a.utf7.value; cs_soft_reset_s_utf7(str->p->state); } } return i; } return 0; } static int cs_add_streambytes_to_imap P_((struct string *str, int count, const unsigned char *data, int *errors)); static int cs_add_streambytes_to_imap(str,count,data,errors) struct string *str; int count; CONST unsigned char *data; int *errors; { *errors = 0; if (count > 0) { int i; /* realloc with size 0 is equivalent of free and may corrupt memory ... */ if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_imap) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_streambytes_to_imap", "Bad state",0); /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+count)* sizeof (uint16)); for (i = 0; i < count; i++) { if (!cs_add_streambyte_to_s_imap(str->p->state,data[i])) { DPRINT(Debug,10,(&Debug, "cs_add_streambytes_to_imap: Failed to add byte, idx=%d\n", i)); handle_error: (*errors)++; str->p->a.words[str->p->len++] = MAPPING_NONE; continue; } if (str->p->state->p->ready) { if (str->p->state->p->a.utf7.value > 0xFFFF) { DPRINT(Debug,60,(&Debug, "cs_add_streambytes_to_imap: Value (%X) out of range\n", str->p->state->p->a.utf7.value)); goto handle_error; } str->p->a.words[str->p->len++] = str->p->state->p->a.utf7.value; cs_soft_reset_s_utf7(str->p->state); } } return i; } return 0; } static struct map_info * cs_find_utf8 P_((const char * map_name)); static struct map_info * cs_find_utf8(map_name) CONST char * map_name; { int i; struct map_info *ret; static struct map_info ** dyn_maps = NULL; static int dyn_map_count = 0; if (0 == istrcmp(map_name,map_utf8.map_name)) return &map_utf8; for (i =0; i < dyn_map_count; i++) if (0 == strcmp(dyn_maps[i]->map_name,map_name)) return dyn_maps[i]; ret = open_utf8_map(map_name); if (ret) { dyn_maps = safe_realloc(dyn_maps, (dyn_map_count + 1) * sizeof (struct map_info *)); dyn_maps[dyn_map_count++] = ret; } return ret; } static struct map_info * cs_find_utf7 P_((const char * map_name)); static struct map_info * cs_find_utf7(map_name) CONST char * map_name; { int i; struct map_info *ret; static struct map_info ** dyn_maps = NULL; static int dyn_map_count = 0; if (0 == istrcmp(map_name,map_utf7.map_name)) return &map_utf7; for (i =0; i < dyn_map_count; i++) if (0 == strcmp(dyn_maps[i]->map_name,map_name)) return dyn_maps[i]; ret = open_utf7_map(map_name); if (ret) { dyn_maps = safe_realloc(dyn_maps, (dyn_map_count + 1) * sizeof (struct map_info *)); dyn_maps[dyn_map_count++] = ret; } return ret; } static struct map_info * cs_find_imap P_((const char * map_name)); static struct map_info * cs_find_imap(map_name) CONST char * map_name; { int i; struct map_info *ret; static struct map_info ** dyn_maps = NULL; static int dyn_map_count = 0; if (0 == istrcmp(map_name,map_imap.map_name)) return &map_imap; for (i =0; i < dyn_map_count; i++) if (0 == strcmp(dyn_maps[i]->map_name,map_name)) return dyn_maps[i]; ret = open_imap_map(map_name); if (ret) { dyn_maps = safe_realloc(dyn_maps, (dyn_map_count + 1) * sizeof (struct map_info *)); dyn_maps[dyn_map_count++] = ret; } return ret; } static void cs_remove_control_utf P_((const struct string *str)); static void cs_remove_control_utf(str) CONST struct string *str; { int i; for (i = 0; i < str->p->len; i++) { if (str->p->a.words[i] < 32 || str->p->a.words[i] == 127) str->p->a.words[i] = 32; else { uint16 unicode = map_utf(str->string_type->map_info, str->p->a.words[i]); if (MAPPING_NONE == unicode || !unicode_ch(unicode,UOP_noctrl)) str->p->a.words[i] = 32; } } } static void cs_add_state_to_utf8 P_((struct string *str, struct charset_state *ch)); static void cs_add_state_to_utf8(str,ch) struct string *str; struct charset_state *ch; { if (ch->p->a.utf8.value > 0xFFFF) return; /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+1)* sizeof (uint16)); str->p->a.words[str->p->len++] = ch->p->a.utf8.value; } static void cs_add_state_to_utf7 P_((struct string *str, struct charset_state *ch)); static void cs_add_state_to_utf7(str,ch) struct string *str; struct charset_state *ch; { if (ch->p->a.utf7.value > 0xFFFF) return; /* NOTE: str->p->a.words is not NUL terminated */ str->p->a.words = safe_realloc(str->p->a.words, (str->p->len+1)* sizeof (uint16)); str->p->a.words[str->p->len++] = ch->p->a.utf7.value; } static void cs_init_s_utf8 P_((struct charset_state *str)); static void cs_init_s_utf8(str) struct charset_state *str; { str->p->a.utf8.bytes = 0; str->p->a.utf8.idx = 0; str->p->a.utf8.value = 0; str->p->ready = 0; } static void cs_init_s_utf7 P_((struct charset_state *str)); static void cs_init_s_utf7(str) struct charset_state *str; { str->p->a.utf7.encoded = 0; str->p->a.utf7.bitcount = 0; str->p->a.utf7.bitval = 0; str->p->a.utf7.value = 0; str->p->ready = 0; } static void cs_free_s_utf8 P_((struct charset_state *str)); static void cs_free_s_utf8(str) struct charset_state *str; { str->p->a.utf8.bytes = 0; str->p->a.utf8.idx = 0; str->p->a.utf8.value = 0; str->p->ready = 0; } static void cs_free_s_utf7 P_((struct charset_state *str)); static void cs_free_s_utf7(str) struct charset_state *str; { str->p->a.utf7.encoded = 0; str->p->a.utf7.bitcount = 0; str->p->a.utf7.bitval = 0; str->p->a.utf7.value = 0; str->p->ready = 0; } static int split_utf8_byte P_((int c, unsigned char *count, unsigned char *val)); static int split_utf8_byte(c,count,val) int c; unsigned char *count; unsigned char *val; { if (c < 0 || c > 0xFF) panic("STRING PANIC",__FILE__,__LINE__,"split_utf8_byte", "Value not in range 0-255",0); if (c < 0x80) { *count = 0; *val = c; return 1; } else { int counter = 0; int mask = 0x80; for (mask=0x80, counter = 0; (mask & c) != 0 && mask != 0; mask >>= 1, counter++) { c &= ~mask; } *count = counter; *val = c; return (mask != 0); } } static int cs_add_streambyte_to_s_utf8(str,ch) struct charset_state *str; int ch; { unsigned char count; unsigned char val; if (!split_utf8_byte(ch,&count,&val)) { failure: DPRINT(Debug,10,(&Debug, "cs_add_streambyte_to_s_utf8: Bad byte 0x%02X\n", ch)); str->p->a.utf8.bytes = 0; str->p->a.utf8.idx = 0; str->p->a.utf8.value = 0; str->p->ready = 0; return 0; } if (str->p->a.utf8.idx == 0) { if (count == 1) goto failure; str->p->a.utf8.bytes = count; str->p->a.utf8.idx = 1; str->p->a.utf8.value = val; if (0 == count) /* Ascii character? */ str->p->ready = 1; } else if (count != 1) { goto failure; } else { str->p->a.utf8.idx++; str->p->a.utf8.value <<= 6; str->p->a.utf8.value |= val; } if (str->p->a.utf8.idx == str->p->a.utf8.bytes) str->p->ready = 1; return 1; } static int cs_add_streambyte_to_s_utf7(str,ch) struct charset_state *str; int ch; { if (ch < 0 || ch > 0xFF) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_s_utf7", "Value not in range 0-255",0); if (ch > 0x7F) { failure: DPRINT(Debug,10,(&Debug, "cs_add_streambyte_to_s_utf7: Bad byte 0x%02X\n", ch)); str->p->a.utf7.encoded = 0; str->p->a.utf7.bitcount = 0; str->p->a.utf7.bitval = 0; str->p->a.utf7.value = 0; str->p->ready = 0; return 0; } if (str->p->a.utf7.encoded) { int a = base64(ch); if (a < 0) { /* Encoding ends */ if (0x2D /* '-' */ == ch) { if (2 == str->p->a.utf7.encoded) { /* Hack: Is starting */ ch = 0x2B; /* '+' */ str->p->a.utf7.encoded = 0; goto normal_char; } str->p->a.utf7.encoded = 0; return 1; /* Eat end char */ } if (2 == str->p->a.utf7.encoded) goto failure; /* Encoding ends on starting ... */ str->p->a.utf7.encoded = 0; goto normal_char; } str->p->a.utf7.encoded = 1; /* Reset chack */ str->p->a.utf7.bitval <<= 6; str->p->a.utf7.bitval |= a; str->p->a.utf7.bitcount += 6; if (str->p->a.utf7.bitcount >= 16) { str->p->a.utf7.value = str->p->a.utf7.bitval >> (str->p->a.utf7.bitcount -16); str->p->a.utf7.bitval -= str->p->a.utf7.value << (str->p->a.utf7.bitcount -16); str->p->a.utf7.bitcount -= 16; str->p->ready = 1; } } else if (0x2B /* '+' */ == ch) { /* Start of endcoded string */ str->p->a.utf7.encoded = 2; /* Hack: Is start of endcoded string */ str->p->a.utf7.bitcount = 0; str->p->a.utf7.bitval = 0; } else { normal_char: str->p->a.utf7.value = ch; str->p->ready = 1; } return 1; } static int cs_add_streambyte_to_s_imap(str,ch) struct charset_state *str; int ch; { if (ch < 0 || ch > 0xFF) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_s_imap", "Value not in range 0-255",0); if (ch > 0x7F) { failure: DPRINT(Debug,10,(&Debug, "cs_add_streambyte_to_s_imap: Bad byte 0x%02X\n", ch)); str->p->a.utf7.encoded = 0; str->p->a.utf7.bitcount = 0; str->p->a.utf7.bitval = 0; str->p->a.utf7.value = 0; str->p->ready = 0; return 0; } if (str->p->a.utf7.encoded) { int a = imap(ch); if (a < 0) { /* Encoding ends */ if (0x2D /* '-' */ == ch) { if (2 == str->p->a.utf7.encoded) { /* Hack: Is starting */ ch = 0x26; /* '&' */ str->p->a.utf7.encoded = 0; goto normal_char; } str->p->a.utf7.encoded = 0; return 1; /* Eat end char */ } /* imap encoding requires ending '-' utf-7 encoding does not require ending */ goto failure; } str->p->a.utf7.encoded = 1; /* Reset chack */ str->p->a.utf7.bitval <<= 6; str->p->a.utf7.bitval |= a; str->p->a.utf7.bitcount += 6; if (str->p->a.utf7.bitcount >= 16) { str->p->a.utf7.value = str->p->a.utf7.bitval >> (str->p->a.utf7.bitcount -16); str->p->a.utf7.bitval -= str->p->a.utf7.value << (str->p->a.utf7.bitcount -16); str->p->a.utf7.bitcount -= 16; str->p->ready = 1; } } else if (0x26 /* '&' */ == ch) { /* Start of endcoded string */ str->p->a.utf7.encoded = 2; /* Hack: Is start of endcoded string */ str->p->a.utf7.bitcount = 0; str->p->a.utf7.bitval = 0; } else { normal_char: str->p->a.utf7.value = ch; str->p->ready = 1; } return 1; } static void cs_soft_reset_s_utf8(str) struct charset_state *str; { str->p->a.utf8.bytes = 0; str->p->a.utf8.idx = 0; str->p->a.utf8.value = 0; str->p->ready = 0; } static void cs_soft_reset_s_utf7(str) struct charset_state *str; { str->p->a.utf7.value = 0; str->p->ready = 0; } static uint16 cs_give_unicode_from_s_utf8 P_((struct charset_state *st, int *found)); static uint16 cs_give_unicode_from_s_utf8(st,found) struct charset_state *st; int *found; { uint16 val; *found = 0; val = map_utf(st->charset->map_info,st->p->a.utf8.value); if (val == MAPPING_NONE) val = 0x003F; /* '?' */ else *found = 1; return val; } static uint16 cs_give_unicode_from_s_utf7 P_((struct charset_state *st, int *found)); static uint16 cs_give_unicode_from_s_utf7(st,found) struct charset_state *st; int *found; { uint16 val; *found = 0; val = map_utf(st->charset->map_info,st->p->a.utf7.value); if (val == MAPPING_NONE) val = 0x003F; /* '?' */ else *found = 1; return val; } static int cs_s_utf8_same_char P_((struct charset_state *A, struct charset_state *B, int ignore_case)); static int cs_s_utf8_same_char(A,B,ignore_case) struct charset_state *A; struct charset_state *B; int ignore_case; { if (A->p->a.utf8.value == B->p->a.utf8.value) return 1; if (ignore_case) return -1; /* Use UNICODE values for comparision */ return 0; } static int cs_s_utf7_same_char P_((struct charset_state *A, struct charset_state *B, int ignore_case)); static int cs_s_utf7_same_char(A,B,ignore_case) struct charset_state *A; struct charset_state *B; int ignore_case; { if (A->p->a.utf7.value == B->p->a.utf7.value) return 1; if (ignore_case) return -1; /* Use UNICODE values for comparision */ return 0; } static int cs_s_utf8_printable P_((struct charset_state *st)); static int cs_s_utf8_printable(st) struct charset_state *st; { uint16 val; val = map_utf(st->charset->map_info,st->p->a.utf8.value); return unicode_ch(val,UOP_printable) != 0; } static int cs_s_utf7_printable P_((struct charset_state *st)); static int cs_s_utf7_printable(st) struct charset_state *st; { uint16 val; val = map_utf(st->charset->map_info,st->p->a.utf7.value); return unicode_ch(val,UOP_printable) != 0; } /* If character corresponds one byte on stream, returns it. * Otherwise returns 0. This is used implement ReadCh(). * It is assumed that returned character corresponds to * code character set (and perhaps also US-ASCII) */ static int cs_s_utf8_is_onebyte P_((struct charset_state *st)); static int cs_s_utf8_is_onebyte(st) struct charset_state *st; { if (st->p->a.utf8.bytes == 0) return st->p->a.utf8.value; return 0; } static int cs_s_utf7_is_onebyte P_((struct charset_state *st)); static int cs_s_utf7_is_onebyte(st) struct charset_state *st; { if (st->p->a.utf7.encoded == 0) return st->p->a.utf7.value; return 0; } static int cs_utf_properties P_((charset_t st)); static int cs_utf_properties(st) charset_t st; { int prop = 0; /* We know at least ascii part of charset */ prop |= CS_printable | CS_mapping; /* We know all (possible) charcters if mapping is available */ if (st->map_info) prop |= CS_universal_set; return prop; } #if ANSI_C #define S_(x) static x; #else #define S_(x) #endif S_(cs_estimate_clip_string cs_estimate_clip_unsupported) static int cs_estimate_clip_unsupported(str,pos,len,terminal,printable_len) CONST struct string *str; int pos; int len; /* UPPER LIMIT */ screen_info_p terminal; struct cs_printable_len *printable_len; { panic("STRING PANIC",__FILE__,__LINE__,"cs_estimate_clip_unsupported", "cs_estimate_clip_unsupported() called",0); return -1; } static int cs_iso2022_info_set_utf P_((struct charcode_info *new_vals, struct setlist *new_setlist, int setcount)); static int cs_iso2022_info_set_utf(new_vals, new_setlist,setcount) struct charcode_info *new_vals; struct setlist *new_setlist; int setcount; { int i; if (setcount != 1 || iso2022_other != new_setlist->sets[0]->type) { lib_error(CATGETS(elm_msg_cat, MeSet, MeIso2022OtherOnly, "Charset type %s allows only type other-set specifications"), new_vals->charset_type->type_name); return 0; /* Discard bank defination */ } for (i = setcount; i < sizeof (new_setlist->sets) / sizeof (new_setlist->sets[0]); i++) new_setlist->sets[i] = NULL; new_vals->iso2022_info = loc_setlist(*new_setlist); new_vals->flags &= ~SET_nodata; return 1; } struct charset_type cs_imap = { "imap", cs_init_utf, cs_free_utf, cs_add_streambyte_to_imap, cs_add_intdata_to_utf, cs_check_length_utf, cs_give_unicode_from_utf, cs_add_unicodedata_to_utf, cs_cmp_utf, cs_stream_from_imap, cs_can_ascii_utf, cs_streamclip_from_imap, cs_clip_from_utf, cs_find_pattern_from_utf, cs_add_streambytes_to_imap, cs_find_imap, cs_remove_control_utf, cs_add_state_to_utf7, cs_init_s_utf7, cs_free_s_utf7, cs_add_streambyte_to_s_imap, cs_soft_reset_s_utf7, cs_give_unicode_from_s_utf7, cs_s_utf7_same_char, cs_s_utf7_printable, cs_s_utf7_is_onebyte, cs_utf_properties, cs_estimate_clip_unsupported, cs_iso2022_info_set_utf, &cs_iso2022 }; struct charset_type cs_utf7 = { "utf-7", cs_init_utf, cs_free_utf, cs_add_streambyte_to_utf7, cs_add_intdata_to_utf, cs_check_length_utf, cs_give_unicode_from_utf, cs_add_unicodedata_to_utf, cs_cmp_utf, cs_stream_from_utf7, cs_can_ascii_utf, cs_streamclip_from_utf7, cs_clip_from_utf, cs_find_pattern_from_utf, cs_add_streambytes_to_utf7, cs_find_utf7, cs_remove_control_utf, cs_add_state_to_utf7, cs_init_s_utf7, cs_free_s_utf7, cs_add_streambyte_to_s_utf7, cs_soft_reset_s_utf7, cs_give_unicode_from_s_utf7, cs_s_utf7_same_char, cs_s_utf7_printable, cs_s_utf7_is_onebyte, cs_utf_properties, cs_estimate_clip_unsupported, cs_iso2022_info_set_utf, &cs_imap }; struct charset_type cs_utf8 = { "utf-8", cs_init_utf, cs_free_utf, cs_add_streambyte_to_utf8, cs_add_intdata_to_utf, cs_check_length_utf, cs_give_unicode_from_utf, cs_add_unicodedata_to_utf, cs_cmp_utf, cs_stream_from_utf8, cs_can_ascii_utf, cs_streamclip_from_utf8, cs_clip_from_utf, cs_find_pattern_from_utf, cs_add_streambytes_to_utf8, cs_find_utf8, cs_remove_control_utf, cs_add_state_to_utf8, cs_init_s_utf8, cs_free_s_utf8, cs_add_streambyte_to_s_utf8, cs_soft_reset_s_utf8, cs_give_unicode_from_s_utf8, cs_s_utf8_same_char, cs_s_utf8_printable, cs_s_utf8_is_onebyte, cs_utf_properties, cs_estimate_clip_unsupported, cs_iso2022_info_set_utf, &cs_utf7 }; /* * Local Variables: * mode:c * c-basic-offset:4 * buffer-file-coding-system: iso-8859-1 * End: */