static char rcsid[] = "@(#)$Id: cs_unknown.c,v 1.17 2006/10/28 11:33:03 hurtta Exp $"; /****************************************************************************** * The Elm (ME+) Mail System - $Revision: 1.17 $ $State: Exp $ * * Author: Kari Hurtta *****************************************************************************/ #include "headers.h" #include "s_me.h" #include "cs_imp.h" #include "cs_terminal.h" #include #ifndef ANSI_C extern int errno; #endif #ifdef WCWIDTH int wcwidth(wchar_t c); #endif DEBUG_VAR(Debug,__FILE__,"charset"); #if ANSI_C #define S_(x) static x; #else #define S_(x) #endif static unsigned char *s2us P_((char *str)); static unsigned char *s2us(str) char *str; { return (unsigned char *)str; } static char *us2s P_((unsigned char *str)); static char *us2s(str) unsigned char *str; { return(char *)str; } /* ---------------------------------------------------------------- */ #ifdef WCHAR static void convert_to_wchar P_((const struct string *str)); static void convert_to_wchar(str) const struct string *str; { unsigned char *bytes; int b_len; if (str->p->private_flag) panic("STRING PANIC",__FILE__,__LINE__,"convert_to_wchar", "Not in raw form",0); if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"convert_to_wchar", "not a system charset",0); bytes = str->p->a.bytes; b_len = str->p->len; str->p->a.bytes = NULL; str->p->private_flag = 1; /* Indicate wchars */ str->p->a.wchars = NULL; str->p->len = 0; if (bytes) { int x; mbstate_t State; wchar_t * dest = NULL; int alloced; int l = 0; const char * src, * cbytes; DPRINT(Debug,61,(&Debug, "convert_to_wchar: raw=")); for (x = 0; x < b_len; x++) { DPRINT(Debug,61,(&Debug," %02X", bytes[x])); } DPRINT(Debug,61,(&Debug," (len=%d)\n",b_len)); memset(&State, 0, sizeof(State)); /* NOTE: str->p->a.bytes is not NUL terminated * * ! ! ! NOTE! this do not convert strings including \0 * characters */ /* Add terminating \0 */ bytes = safe_realloc(bytes,b_len+1); bytes[b_len] = '\0'; cbytes = (const char *) bytes; alloced = b_len+1; dest = safe_malloc(alloced * sizeof (dest[0])); for (src = cbytes; src; ) { int left = alloced-l; size_t r = mbsrtowcs(dest+l,&src,left,&State); if ((size_t)(-1) == r) { int pos; if ('?' == *src) panic("STRING PANIC",__FILE__,__LINE__, "convert_to_wchar", "? not convertible",0); if ('\0' == *src) panic("STRING PANIC",__FILE__,__LINE__, "convert_to_wchar", "Ooops",0); pos = src-cbytes; DPRINT(Debug,61,(&Debug, "convert_to_wchar: Failed to convert 0x%02x at %d: errno = %d\n", *src,pos,errno)); bytes[pos] = '?'; } else { if (l >= 0) l += r; else panic("STRING PANIC",__FILE__,__LINE__, "convert_to_wchar", "Unexpected return from mbsrtowcs()",0); if (src) { int x; DPRINT(Debug,4,(&Debug, "convert_to_wchar: wide character string longer than original???\n")); x = b_len - (src-cbytes); alloced += x + 10; dest = safe_realloc(dest,alloced * sizeof (dest[0])); } } } if ('\0' != dest[l]) panic("STRING PANIC",__FILE__,__LINE__, "convert_to_wchar", "\\0 not written",0); free(bytes); bytes = NULL; str->p->a.wchars = dest; str->p->len = l; } } #endif /* ---------------------------------------------------------------- */ S_(cs_init_string cs_init_unknown) static void cs_init_unknown P_((struct string *str)); static void cs_init_unknown(str) struct string *str; { str->p->len = 0; str->p->private_flag = 0; str->p->a.bytes = 0; /* private_flag = 0 : Use bytes (raw form) private_flag = 1 : Use wchars (locale) */ } S_(cs_free_string cs_free_unknown) static void cs_free_unknown P_((struct string *str)); static void cs_free_unknown(str) struct string *str; { if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_free_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR if (str->p->a.wchars) { free(str->p->a.wchars); str->p->a.wchars = NULL; } #else panic("STRING PANIC",__FILE__,__LINE__,"cs_free_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ if (str->p->a.bytes) { free(str->p->a.bytes); str->p->a.bytes = NULL; } } str->p->len = 0; str->p->private_flag = 0; } S_(cs_init_state cs_init_s_unknown) static void cs_init_s_unknown P_((struct charset_state *st)); static void cs_init_s_unknown(st) struct charset_state *st; { st->p->ready = 0; #ifdef WCHAR if (st->charset == system_charset) { st->p->a.wchar.magic = STATE_WCHAR_magic; memset(&(st->p->a.wchar.s), 0, sizeof(st->p->a.wchar.s)); st->p->a.wchar.value = L'\0'; } else #endif { st->p->a.byte = 0; } } S_(cs_free_state cs_free_s_unknown) static void cs_free_s_unknown P_((struct charset_state *st)); static void cs_free_s_unknown(st) struct charset_state *st; { st->p->ready = 0; #ifdef WCHAR if (st->charset == system_charset) { if (STATE_WCHAR_magic != st->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__,"cs_free_s_unknown", "Bad magic number",0); memset(&(st->p->a.wchar.s), 0, sizeof(st->p->a.wchar.s)); st->p->a.wchar.value = L'\0'; st->p->a.wchar.magic = 0; /* Invalidate magic */ } else #endif { st->p->a.byte = 0; } } S_(cs_soft_reset_state cs_soft_reset_s_unknown) static void cs_soft_reset_s_unknown P_((struct charset_state *st)); static void cs_soft_reset_s_unknown(st) struct charset_state *st; { st->p->ready = 0; /* soft reset should reset only partially state, so ... */ #ifdef WCHAR if (st->charset == system_charset) { static int warned = 0; if (STATE_WCHAR_magic != st->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__,"cs_soft_reset_s_unknown", "Bad magic number",0); if (!warned && !mbsinit(&(st->p->a.wchar.s))) { warned = 1; lib_error(CATGETS(elm_msg_cat, MeSet,MeUnknownUnsupportedKB, "Unknown charsets are unsupported as keyboard input!")); } memset(&(st->p->a.wchar.s), 0, sizeof(st->p->a.wchar.s)); st->p->a.wchar.value = L'\0'; } else #endif { st->p->a.byte = 0; } } S_(cs_add_streambyte_to_state cs_add_streambyte_to_s_unknown) static int cs_add_streambyte_to_s_unknown P_((struct charset_state *st, int ch)); static int cs_add_streambyte_to_s_unknown(st,ch) struct charset_state *st; int ch; { #ifdef WCHAR if (st->charset == system_charset) { size_t x; CONST char s = ch; if (STATE_WCHAR_magic != st->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_streambyte_to_s_unknown", "Bad magic number",0); x = mbrtowc(& st->p->a.wchar.value, &s, 1, &(st->p->a.wchar.s)); if ((size_t)(-2) == x) { DPRINT(Debug,62,(&Debug, "cs_add_streambyte_to_s_unknown: Incomplete sequence -- OK\n")); return 1; } if ((size_t)(-1) == x) { DPRINT(Debug,4,(&Debug, "cs_add_streambyte_to_s_unknown: mbrtowc failed: errno = %d\n", errno)); memset(&(st->p->a.wchar.s), 0, sizeof(st->p->a.wchar.s)); st->p->a.wchar.value = L'\0'; return 0; } st->p->ready = 1; } else #endif { st->p->ready = 1; st->p->a.byte = ch; } return 1; } S_(cs_add_streambyte_to_string cs_add_streambyte_to_unknown) static int cs_add_streambyte_to_unknown P_((struct string *str,int ch)); static int cs_add_streambyte_to_unknown(str,ch) struct string *str; int ch; { #ifdef WCHAR if (!str->p->private_flag && str->string_type == system_charset) convert_to_wchar(str); #endif if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_unknown) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_streambyte_to_unknown", "Bad state",0); if (!cs_add_streambyte_to_s_unknown(str->p->state,ch)) { DPRINT(Debug,10,(&Debug, "cs_add_streambyte_to_unknown: Failed to add byte\n")); return 0; } if (str->p->state->p->ready) { /* NOTE: str->p->a.wchars is not NUL terminated */ str->p->a.wchars = safe_realloc(str->p->a.wchars, (str->p->len+1)* sizeof (wchar_t)); str->p->a.wchars[str->p->len++] = str->p->state->p->a.wchar.value; str->p->state->p->ready = 0; } return 1; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_free_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ /* NOTE: str->p->a.bytes is not NUL terminated */ str->p->a.bytes = safe_realloc(str->p->a.bytes,str->p->len+1); str->p->a.bytes[str->p->len++] = ch; } return 1; } S_(cs_add_state_to_string cs_add_state_to_unknown) static void cs_add_state_to_unknown P_((struct string *str, struct charset_state *ch)); static void cs_add_state_to_unknown(str,ch) struct string *str; struct charset_state *ch; { if (str->string_type->charset_type != ch->charset->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_state_to_unknown", "String/state type mismatch",0); #ifdef WCHAR if (!str->p->private_flag && str->string_type == system_charset) convert_to_wchar(str); #endif if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_state_to_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR if (ch->charset != system_charset) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_state_to_unknown", "Oops",0); if (STATE_WCHAR_magic != ch->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_state_to_unknown", "Bad magic number",0); /* NOTE: str->p->a.wchars is not NUL terminated */ str->p->a.wchars = safe_realloc(str->p->a.wchars, (str->p->len+1)* sizeof (wchar_t)); str->p->a.wchars[str->p->len++] = ch->p->a.wchar.value; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_add_state_to_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ #ifdef WCHAR if (ch->charset == system_charset) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_state_to_unknown", "Oops",0); #endif /* NOTE: str->p->a.bytes is not NUL terminated */ str->p->a.bytes = safe_realloc(str->p->a.bytes,str->p->len+1); str->p->a.bytes[str->p->len++] = ch->p->a.byte; } } S_(cs_state_same_char cs_s_unknown_same_char) static int cs_s_unknown_same_char P_((struct charset_state *A, struct charset_state *B, int ignore_case)); static int cs_s_unknown_same_char(A,B,ignore_case) struct charset_state *A; struct charset_state *B; int ignore_case; { if (A->charset != B->charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_s_unknown_same_char", "Oops",0); #ifdef WCHAR if (A->charset == system_charset) { wchar_t c2; wchar_t c1; if (STATE_WCHAR_magic != A->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__, "cs_s_unknown_same_char", "Bad magic number",0); if (STATE_WCHAR_magic != B->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__, "cs_s_unknown_same_char", "Bad magic number",0); c1 = A->p->a.wchar.value ; c2 = B->p->a.wchar.value ; if (c1 == c2) return 1; if (ignore_case && towlower(c1) == towlower(c2)) return 1; return 0; } else #endif { unsigned char c2 = B->p->a.byte; unsigned char c1 = A->p->a.byte; if (c1 == c2) return 1; if (!ignore_case) return 0; return -1; /* Use UNICODE comparision on upper level instead */ } } S_(cs_add_streambytes_to_string cs_add_streambytes_to_unknown) /* Returns number of bytes added */ static int cs_add_streambytes_to_unknown P_((struct string *str, int count, const unsigned char *data, int *errors)); static int cs_add_streambytes_to_unknown(str,count,data,errors) struct string *str; int count; CONST unsigned char *data; int *errors; { int i; if (count < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambytes_to_unknown", "Negative length",0); *errors = 0; #ifdef WCHAR if (!str->p->private_flag && str->string_type == system_charset) convert_to_wchar(str); #endif if (count > 0) { /* realloc with size 0 is equivalent of free and may corrupt memory ... */ if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambytes_to_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR if (!str->p->state) str->p->state = new_state_1(str->string_type); if (str->p->state->charset->charset_type != &cs_unknown) panic("STRING PANIC",__FILE__,__LINE__, "cs_add_streambytes_to_unknown", "Bad state",0); /* NOTE: str->p->a.wchars is not NUL terminated */ str->p->a.wchars = safe_realloc(str->p->a.wchars, (str->p->len+count)* sizeof (wchar_t)); for (i = 0; i < count; i++) { if (!cs_add_streambyte_to_s_unknown(str->p->state,data[i])) { DPRINT(Debug,10,(&Debug, "cs_add_streambytes_to_unknown: Failed to add byte, idx=%d\n", i)); (*errors)++; str->p->a.wchars[str->p->len++] = L'?'; continue; } if (str->p->state->p->ready) { str->p->a.wchars[str->p->len++] = str->p->state->p->a.wchar.value; str->p->state->p->ready = 0; } } #else panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambytes_to_binary", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ /* NOTE: str->p->a.bytes is not NUL terminated */ str->p->a.bytes = safe_realloc(str->p->a.bytes,str->p->len+count); for (i = 0; i < count; i++) str->p->a.bytes[str->p->len++] = data[i]; return count; } } return 0; } S_(cs_add_intdata_to_string cs_add_intdata_to_unknown) static void cs_add_intdata_to_unknown P_((struct string *str, const struct string *data)); static void cs_add_intdata_to_unknown(str,data) struct string *str; CONST struct string *data; { int i; if (str->string_type->charset_type != data->string_type->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_intdata_to_unknown", "String type mismatch",0); if (data->p->len > 0) { /* realloc with size 0 is equivalent of free and may corrupt memory ... */ #ifdef WCHAR if (!str->p->private_flag && data->p->private_flag && str->string_type == system_charset) convert_to_wchar(str); if (!data->p->private_flag && str->p->private_flag && data->string_type == system_charset) convert_to_wchar(data); #endif if (str->p->private_flag && data->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset || data->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_intdata_to_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR /* NOTE: str->p->a.wchars is not NUL terminated */ str->p->a.wchars = safe_realloc(str->p->a.wchars, (str->p->len+data->p->len)* sizeof (wchar_t)); for (i = 0; i < data->p->len; i++) str->p->a.wchars[str->p->len++] = data->p->a.wchars[i]; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_add_intdata_to_unknown", "No wchar_t support",0); #endif } else { if (str->p->private_flag || data->p->private_flag) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_intdata_to_unknown", "Ooops -- MIME subtypes of unknown charset is not supported",0); /* NOTE: str->p->a.bytes is not NUL terminated */ str->p->a.bytes = safe_realloc(str->p->a.bytes, data->p->len+str->p->len); for (i = 0; i < data->p->len; i++) str->p->a.bytes[str->p->len++] = data->p->a.bytes[i]; } } } static uint16 cs_unicode_unknown_helper P_((unsigned int ch, charset_t set, int *found)); static uint16 cs_unicode_unknown_helper(ch,set, found) unsigned int ch; charset_t set; int *found; { *found = 0; /* We assume that values < 32 are same control characters * on all sets -- after all MIME requires that on all * character sets characters CR and LF are on same position */ /* If charset is system character set then assume that * invariant part is on same position than on character * set used by code (ie compiler) */ if (set == system_charset) { uint16 val = map_fallback(ch); if (val != MAPPING_NONE) { *found = 1; return val; } } if (ch < 32) { *found = 1; return ch; } return 0x003F; /* '?' */ } #ifdef WCHAR static uint16 cs_unicode_unknown_whelper P_((wint_t ch, charset_t set, int *found)); static uint16 cs_unicode_unknown_whelper(ch,set, found) wint_t ch; charset_t set; int *found; { uint16 val; int c1; *found = 0; if (set != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_unicode_unknown_whelper", "Not a system charset",0); /* This function is used only for system charset * * Assume that * invariant part is on same position than on character * set used by code (ie compiler) */ val = map_wfallback(ch); if (val != MAPPING_NONE) { *found = 1; return val; } /* returns EOF if character is not exactly one byte long */ c1 = wctob(ch); /* We assume that values < 32 are same control characters * on all sets -- after all MIME requires that on all * character sets characters CR and LF are on same position * */ if (c1 < 32 && c1 >= 0) { *found = 1; return c1; } return 0x003F; /* '?' */ } #endif /* Does compression, recalculation and so on ... so 'const' is not very const */ S_(cs_check_length_string cs_check_length_unknown) static void cs_check_length_unknown P_((const struct string *str)); static void cs_check_length_unknown(str) CONST struct string *str; { #ifdef WCHAR if (!str->p->private_flag && str->string_type == system_charset) convert_to_wchar(str); #endif /* No compression */ } S_(cs_give_unicode_from_string cs_give_unicode_from_unknown) static uint16 cs_give_unicode_from_unknown P_((const struct string *str, int pos, int *found)); static uint16 cs_give_unicode_from_unknown(str,pos,found) CONST struct string *str; int pos; int *found; { if (pos < 0 || pos >= str->p->len) panic("STRING PANIC",__FILE__,__LINE__,"cs_give_unicode_from_unknown", "Index out of array",0); if (str->p->private_flag) { /* Use wchars (locale) */ #ifdef WCHAR wchar_t ch; /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_give_unicode_from_unknown", "private_flag set for non-system charset",0); ch = str->p->a.wchars[pos]; return cs_unicode_unknown_whelper(ch,str->string_type,found); #else panic("STRING PANIC",__FILE__,__LINE__,"cs_give_unicode_from_unknown", "No wchar_t support",0); return MAPPING_NONE; /* Not reached */ #endif } else { unsigned char ch; /* Use bytes (raw form) */ ch = str->p->a.bytes[pos]; return cs_unicode_unknown_helper(ch,str->string_type,found); } } S_(cs_give_unicode_from_state cs_give_unicode_from_s_unknown) static uint16 cs_give_unicode_from_s_unknown P_((struct charset_state *st, int *found)); static uint16 cs_give_unicode_from_s_unknown(st,found) struct charset_state *st; int *found; { #ifdef WCHAR if (st->charset == system_charset) { wchar_t ch; if (STATE_WCHAR_magic != st->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__, "cs_give_unicode_from_s_unknown", "Bad magic number",0); ch = st->p->a.wchar.value; return cs_unicode_unknown_whelper(ch,st->charset,found); } else #endif { unsigned char ch; ch = st->p->a.byte; return cs_unicode_unknown_helper(ch,st->charset,found); } } S_(cs_add_unicodedata_to_string cs_add_unicodedata_to_unknown) static void cs_add_unicodedata_to_unknown P_((struct string *str, int len, const uint16 *data)); static void cs_add_unicodedata_to_unknown(str,len,data) struct string *str; int len; CONST uint16 *data; { int i; if (len > 0) { /* realloc with size 0 is equivalent of free and may corrupt memory ... */ #ifdef WCHAR if (!str->p->private_flag && str->string_type == system_charset) convert_to_wchar(str); #endif if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_add_unicodedata_to_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR /* NOTE: str->p->a.wchars is not NUL terminated */ str->p->a.wchars = safe_realloc(str->p->a.wchars, (str->p->len+len)* sizeof (wchar_t)); for (i = 0; i < len; i++) { wint_t ch = 0; int found = 0; /* str->p->private_flag is set only if charset * is system character set. * * Assume that * invariant part is on same position than on character * set used by code (ie compiler) */ ch = map_wfallback_rev(data[i],&found); if (found) str->p->a.wchars[str->p->len++] = ch; /* We assume that values < 32 are same control characters * on all sets -- after all MIME requires that on all * character sets characters CR and LF are on same position */ if (!found && data[i] < 32) str->p->a.wchars[str->p->len++] = btowc(data[i]); /* * If map_wfallback_rev() is given replacement character * we use it. */ else if (ch && !found) str->p->a.wchars[str->p->len++] = ch; } #else panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambytes_to_binary", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ /* NOTE: str->p->a.bytes is not NUL terminated */ str->p->a.bytes = safe_realloc(str->p->a.bytes,len+str->p->len); for (i = 0; i < len; i++) { unsigned char ch = 0; int found = 0; /* If charset is system character set assume that * invariant part is on same position than on character * set used by code (ie compiler) */ if (str->string_type == system_charset) { ch = map_fallback_rev(data[i],&found); if (found) str->p->a.bytes[str->p->len++] = ch; } /* We assume that values < 32 are same control characters * on all sets -- after all MIME requires that on all * character sets characters CR and LF are on same position */ if (!found && data[i] < 32) str->p->a.bytes[str->p->len++] = data[i]; /* Because character set is unknow we do not know any * replacement character what we can use, therefore * we add nothing. * * If however map_fallback_rev() is given replacement character * we use it. */ else if (ch && !found) str->p->a.bytes[str->p->len++] = ch; } } } } S_(cs_cmp_string cs_cmp_unknown) static int cs_cmp_unknown P_((const struct string *str1,const struct string *str2)); static int cs_cmp_unknown(str1,str2) CONST struct string *str1; CONST struct string *str2; { int i; if (str1->string_type->charset_type != str2->string_type->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_cmp_unknown", "String type mismatch",0); #ifdef WCHAR if (!str1->p->private_flag && str2->p->private_flag) convert_to_wchar(str1); if (!str2->p->private_flag && str1->p->private_flag) convert_to_wchar(str2); #endif if (str1->p->private_flag && str2->p->private_flag) { /* Use wchars (locale) */ /* If str1->p->private_flag is set, charset is * locale charset */ if (str1->string_type != system_charset || str2->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_cmp_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR for (i = 0; i < str1->p->len && i < str2->p->len; i++) { if (str1->p->a.wchars[i] < str2->p->a.wchars[i]) return -1; if (str1->p->a.wchars[i] > str2->p->a.wchars[i]) return 1; } #else panic("STRING PANIC",__FILE__,__LINE__,"cs_cmp_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ if (str1->p->private_flag || str2->p->private_flag) panic("STRING PANIC",__FILE__,__LINE__,"cs_cmp_unknown", "Ooops",0); for (i = 0; i < str1->p->len && i < str2->p->len; i++) { if (str1->p->a.bytes[i] < str2->p->a.bytes[i]) return -1; if (str1->p->a.bytes[i] > str2->p->a.bytes[i]) return 1; } } if (i < str1->p->len) return 1; if (i < str2->p->len) return -1; return 0; } S_(cs_state_printable cs_s_unknown_printable) static int cs_s_unknown_printable P_((struct charset_state *st)); static int cs_s_unknown_printable(st) struct charset_state *st; { #ifdef WCHAR if (st->charset == system_charset) { wint_t ch; if (STATE_WCHAR_magic != st->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__, "cs_s_unknown_printable", "Bad magic number",0); ch = st->p->a.wchar.value; return iswprint(ch); } else #endif { #ifndef ASCII_CTYPE if (st->charset == system_charset) return cs_s_locale_printable(st); #endif } return -1; /* Use unicode values */ } /* If character corresponds one byte on stream, returns it. * Otherwise returns 0. This is used implement ReadCh(). * It is assumed that returned character corresponds to * code character set (and perhaps also US-ASCII) */ S_(cs_state_is_onebyte cs_s_unknown_is_onebyte) static int cs_s_unknown_is_onebyte P_((struct charset_state *st)); static int cs_s_unknown_is_onebyte(st) struct charset_state *st; { #ifdef WCHAR if (st->charset == system_charset) { wint_t ch; int c1; if (STATE_WCHAR_magic != st->p->a.wchar.magic) panic("STRING PANIC",__FILE__,__LINE__, "cs_s_unknown_printable", "Bad magic number",0); ch = st->p->a.wchar.value; /* returns EOF if character is not exactly one byte long */ c1 = wctob(ch); if (EOF == c1) return 0; return c1; } else #endif { unsigned char c1 = st->p->a.byte; return c1; } } S_(cs_stream_from_string cs_stream_from_unknown) static unsigned char *cs_stream_from_unknown P_((const struct string *str, int printable, screen_info_p terminal, int *reslen)); static unsigned char *cs_stream_from_unknown(str,printable,terminal,reslen) CONST struct string *str; int printable; screen_info_p terminal; /* NOT USED */ int *reslen; { unsigned char * ret = NULL; if (str->p->private_flag) { /* Use wchars (locale) */ #ifdef WCHAR int alloced; int l; mbstate_t State; wchar_t * tmp; const wchar_t *src, *ctmp; int i; /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_stream_from_unknown", "private_flag set for non-system charset",0); /* NOTE: str->p->a.wchars is not NUL terminated * * ! ! ! NOTE! this do not convert strings including \0 * characters */ tmp = safe_malloc((str->p->len + 1) * sizeof (tmp[0])); for (i = 0; i < str->p->len; i++) { wchar_t c = str->p->a.wchars[i]; if (printable && !iswprint(c)) c = L'?'; tmp[i] = c; } tmp[i] = L'\0'; ctmp = (const wchar_t *) tmp; memset(&State, 0, sizeof(State)); alloced = str->p->len+1; ret = safe_malloc(alloced); for (src = ctmp, l = 0; src; ) { int left = alloced - l; size_t r = wcsrtombs(us2s(ret+l),&src,left,&State); if ((size_t)(-1) == r) { int pos = src-ctmp; if (L'?' == *src) panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_unknown", "? not convertible",0); if (L'\0' == *src) panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_unknown", "Ooops",0); DPRINT(Debug,61,(&Debug, "cs_stream_from_unknown: Failed to convert 0x%04x at %d: errno = %d\n", (int)*src,pos,errno)); tmp[pos] = L'?'; } else { if (l >= 0) l += r; else panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_unknown", "Unexpected return from wcsrtombs()",0); if (src) { int x; x = str->p->len - (src-tmp); alloced += x + MB_CUR_MAX; ret = safe_realloc(ret,alloced); } } } if ('\0' != ret[l]) panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_unknown", "\\0 not written",0); free(tmp); tmp = NULL; *reslen = l; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambytes_to_binary", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ #ifndef ASCII_CTYPE if (printable && str->string_type == system_charset) return cs_stream_from_locale(str,reslen); #endif if (!printable) { int l = 0,i; ret = safe_malloc(str->p->len+1); for (i = 0; i < str->p->len; i++) ret[l++] = str->p->a.bytes[i]; ret[l] = '\0'; *reslen = l; } else if (str->string_type == system_charset) { int l = 0,i; ret = safe_malloc(str->p->len+1); /* If charset is system character set assume that * invariant part is on same position than on character * set used by code (ie compiler) */ for (i = 0; i < str->p->len; i++) { uint16 val = map_fallback(str->p->a.bytes[i]); if (val != MAPPING_NONE && val >= 0x0020) ret[l++] = str->p->a.bytes[i]; else ret[l++] = '?'; /* See above ... */ } ret[l] = '\0'; *reslen = l; } else { /* Because charset is unknown we do not have able to print * printable characters -- also we do not know replacement * characters... */ ret = s2us(safe_strdup("")); *reslen = 0; } } return ret; } S_(cs_streamclip_from_string cs_streamclip_from_unknown) static unsigned char *cs_streamclip_from_unknown P_((const struct string *str, int *pos, int len, screen_info_p terminal, struct cs_printable_len *printable_len)); static unsigned char *cs_streamclip_from_unknown(str,pos,len,terminal,printable_len) CONST struct string *str; int *pos; int len; screen_info_p terminal; struct cs_printable_len *printable_len; { unsigned char * ret = NULL; if (*pos < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_unknown", "Index out of array",0); if (len < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_unknown", "Negative size",0); if (str->p->private_flag) { /* Use wchars (locale) */ #ifdef WCHAR int alloced; int l; mbstate_t State; wchar_t * tmp; const wchar_t *ctmp, *src; int i; /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_unknown", "private_flag set for non-system charset",0); if (printable_len) printable_len->ret_len = 0; /* NOTE: str->p->a.wchars is not NUL terminated * * ! ! ! NOTE! this do not convert strings including \0 * characters */ if (len > str->p->len) len = str->p->len; if (len <= 0) goto done; tmp = safe_malloc((len + 1) * sizeof (tmp[0])); for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { wchar_t c = str->p->a.wchars[*pos]; if (!iswprint(c)) c = L'?'; if (printable_len) { #ifdef WCWIDTH int w = 1; if (terminal->wcwidth) w = wcwidth(c); if (printable_len->max_len < printable_len->ret_len + w) break; printable_len->ret_len += w; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_unknown", "printable_len not supported",0); #endif } tmp[i] = c; } tmp[i] = L'\0'; ctmp = (const wchar_t *) tmp; memset(&State, 0, sizeof(State)); alloced = len+1; ret = safe_malloc(alloced); for (src = ctmp, l = 0; src; ) { int left = alloced - l; size_t r = wcsrtombs(us2s(ret+l),&src,left,&State); if ((size_t)(-1) == r) { int pos = src-ctmp; if (L'?' == *src) panic("STRING PANIC",__FILE__,__LINE__, "cs_streamclip_from_unknown", "? not convertible",0); if (L'\0' == *src) panic("STRING PANIC",__FILE__,__LINE__, "cs_streamclip_from_unknown", "Ooops",0); DPRINT(Debug,61,(&Debug, "convert_to_wchar: Failed to convert 0x%04x at %d: errno = %d\n", (int)*src,pos,errno)); tmp[pos] = L'?'; } else { if (l >= 0) l += r; else panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_unknown", "Unexpected return from wcsrtombs()",0); if (src) { int x; x = len - (src-ctmp); alloced += x + MB_CUR_MAX; ret = safe_realloc(ret,alloced); } } } if ('\0' != ret[l]) panic("STRING PANIC",__FILE__,__LINE__, "cs_stream_from_unknown", "\\0 not written",0); free(tmp); tmp = NULL; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambytes_to_binary", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ if (printable_len) panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_unknown", "printable_len not supported",0); #ifndef ASCII_CTYPE if (str->string_type == system_charset) return cs_streamclip_from_locale(str,pos,len); #endif if (str->string_type == system_charset) { int l = 0,i; ret = safe_malloc(len+1); /* If charset is system character set then assume that * invariant part is on same position than on character * set used by code (ie compiler) */ for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { uint16 val = map_fallback(str->p->a.bytes[*pos]); if (val != MAPPING_NONE && val >= 0x0020) ret[l++] = str->p->a.bytes[*pos]; else ret[l++] = '?'; /* See above ... */ } ret[l] = '\0'; } else { done: ret = s2us(safe_strdup("")); /* Because charset is unknown we do not have able to print * printable characters -- also we not know replacement * characters... */ /* Indicate that we are 'clipped' whole string */ if (*pos < str->p->len) *pos = str->p->len; } } return ret; } S_(cs_clip_from_string cs_clip_from_unknown) static void cs_clip_from_unknown P_((struct string *ret, const struct string *str, int *pos, int len)); static void cs_clip_from_unknown(ret,str,pos,len) struct string *ret; CONST struct string *str; int *pos; int len; { int i; if (ret->string_type->charset_type != str->string_type->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_binary", "String type mismatch",0); if (*pos < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_binary", "Index out of array",0); if (len < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_binary", "Negative size",0); cs_free_unknown(ret); if (len > str->p->len) len = str->p->len; if (len <= 0) return; /* realloc with len == 0 is equivalent of freeing and may result corruption of memory ... */ if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR ret->p->private_flag = 1; /* NOTE: str->p->a.wchars is not NUL terminated */ ret->p->a.wchars = safe_malloc(len * sizeof(ret->p->a.wchars[0])); ret->p->len = 0; for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { ret->p->a.wchars[ret->p->len++] = str->p->a.wchars[*pos]; } #else panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ ret->p->private_flag = 0; /* NOTE: str->p->a.bytes is not NUL terminated */ ret->p->a.bytes = safe_malloc(len); ret->p->len = 0; for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) { ret->p->a.bytes[ret->p->len++] = str->p->a.bytes[*pos]; } } } S_(cs_can_ascii_string cs_can_ascii_unknown) static int cs_can_ascii_unknown P_((const struct string *str)); static int cs_can_ascii_unknown(str) CONST struct string *str; { if (str->p->private_flag) { /* Use wchars (locale) */ #ifdef WCHAR int i; /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_can_ascii_unknown", "private_flag set for non-system charset",0); for (i = 0; i < str->p->len; i++) { wchar_t c = str->p->a.wchars[i]; int c1 = wctob(c); uint16 val; /* wctob returns unsigned char if character is representable * as byte or EOF otehrwise */ if (EOF == c1) return 0; /* Assume that * invariant part is on same position than on character * set used by code (ie compiler) */ val = map_wfallback(c); /* MAPPING_NONE is > 127 */ if (val > 127) return 0; } return 1; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_can_ascii_unknown", "No wchar_t support",0); return 0; /* Not reached */ #endif } else { /* Use bytes (raw form) */ if (str->string_type == system_charset) { int i; /* If charset is system character set then assume that * invariant part is on same position than on character * set used by code (ie compiler) */ for (i = 0; i < str->p->len; i++) { uint16 val = map_fallback(str->p->a.bytes[i]); /* MAPPING_NONE is > 127 */ if (val > 127) return 0; } return 1; } else return 0; } } S_(cs_find_pattern_from_string cs_find_pattern_from_unknown) static int cs_find_pattern_from_unknown P_((const struct string *str, const struct string *pattern, int ignore_case)); static int cs_find_pattern_from_unknown(str,pattern,ignore_case) CONST struct string *str; CONST struct string *pattern; int ignore_case; { int ret = 0; int i, j; if (pattern->string_type->charset_type != str->string_type->charset_type) panic("STRING PANIC",__FILE__,__LINE__,"cs_find_pattern_from_unknown", "String type mismatch",0); if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_find_pattern_from_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR for (i = 0; i < str->p->len; ) { CONST int s = i + 1; for (j = 0; j < pattern->p->len && i < str->p->len; j++,i++) { wint_t c2 = pattern->p->a.wchars[j]; wint_t c1 = str->p->a.wchars[i]; if (ignore_case) { c2 = tolower(c2); c1 = tolower(c1); } if (c1 != c2) break; } if (j >= pattern->p->len) { ret = 1; break; } i = s; } #else panic("STRING PANIC",__FILE__,__LINE__,"cs_find_pattern_from_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ #ifndef ASCII_CTYPE if (ignore_case && str->string_type == system_charset) return cs_find_pattern_from_locale(str,pattern); #endif for (i = 0; i < str->p->len; ) { CONST int s = i + 1; for (j = 0; j < pattern->p->len && i < str->p->len; j++,i++) { unsigned char c2 = pattern->p->a.bytes[j]; unsigned char c1 = str->p->a.bytes[i]; /* We can not convert characters lowercase because * charset is unknown */ if (c1 != c2) break; } if (j >= pattern->p->len) { ret = 1; break; } i = s; } } return ret; } S_(cs_remove_control cs_remove_control_unknown) static void cs_remove_control_unknown P_((const struct string *str)); static void cs_remove_control_unknown(str) CONST struct string *str; { int i; if (str->p->private_flag) { /* Use wchars (locale) */ /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_remove_control_unknown", "private_flag set for non-system charset",0); #ifdef WCHAR for (i = 0; i < str->p->len; i++) { wchar_t c1 = str->p->a.wchars[i]; if (!iswprint(c1)) str->p->a.wchars[i] = L' '; } #else panic("STRING PANIC",__FILE__,__LINE__,"cs_remove_control_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ #ifndef ASCII_CTYPE if (str->string_type == system_charset) { cs_remove_control_locale(str,' '); return; } #endif for (i = 0; i < str->p->len; i++) { /* We assume that values < 32 are same control characters * on all sets -- after all MIME requires that on all * character sets characters CR and LF are on same position */ if (str->p->a.bytes[i] < 32) { if (str->string_type == system_charset) /* If charset is system character set then assume that * invariant part is on same position than on character * set used by code (ie compiler) */ str->p->a.bytes[i] = ' '; else /* We HOPE that 32 is printable character... */ str->p->a.bytes[i] = 32; } } } } S_(cs_find_map_type cs_find_unknown) static struct map_info * cs_find_unknown P_((const char * map_name)); static struct map_info * cs_find_unknown(map_name) CONST char * map_name; { return NULL; } S_(cs_set_properties cs_unknown_properties) static int cs_unknown_properties P_((charset_t st)); static int cs_unknown_properties(st) charset_t st; { int prop = 0; if (st->map_info) prop |= CS_mapping | CS_printable; #ifdef WCHAR /* We know printable characters from locale */ if (system_charset == st) { prop |= CS_printable; #ifdef WCWIDTH prop |= CS_printable_len; #endif #ifdef __STDC_ISO_10646__ prop |= CS_mapping; if (st->MIME_name && 0 == istrcmp(st->MIME_name,"UTF-8")) prop |= CS_universal_set; #endif } #endif #ifndef ASCII_CTYPE /* We know printable characters from locale */ if (system_charset == st) prop |= CS_printable; #endif return prop; } S_(cs_iso2022_info_set cs_iso2022_info_set_unknown) static int cs_iso2022_info_set_unknown P_((struct charcode_info *new_vals, struct setlist *new_setlist, int setcount)); static int cs_iso2022_info_set_unknown(new_vals, new_setlist,setcount) struct charcode_info *new_vals; struct setlist *new_setlist; int setcount; { int ptr_94 = -1; int ptr_96 = -1; int banks[ISO2022_BANK_NUM]; if (!cs_info_set_scan(new_vals,new_setlist,setcount,&ptr_94,&ptr_96, banks)) { return 0; } new_vals->iso2022_info = loc_setlist(*new_setlist); new_vals->flags &= ~SET_nodata; return 1; } /* Return number of characters consumed */ S_(cs_estimate_clip_string cs_estimate_clip_unknown) static int cs_estimate_clip_unknown(str,pos,len,terminal,printable_len) CONST struct string *str; int pos; int len; /* UPPER LIMIT */ screen_info_p terminal; struct cs_printable_len *printable_len; { if (pos < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_estimate_clip_unknown", "Index out of array",0); if (len < 0) panic("STRING PANIC",__FILE__,__LINE__,"cs_estimate_clip_unknown", "Negative size",0); printable_len->ret_len = 0; if (0 == str->p->len) { DPRINT(Debug,49,(&Debug, "cs_estimate_clip_unknown: Empty string.\n")); return 0; } if (str->p->private_flag) { /* Use wchars (locale) */ #ifdef WCHAR int counter = 0; int i; /* If str->p->private_flag is set, charset is * locale charset */ if (str->string_type != system_charset) panic("STRING PANIC",__FILE__,__LINE__,"cs_estimate_clip_unknown", "private_flag set for non-system charset",0); while (pos < str->p->len && counter < len) { wchar_t c = str->p->a.wchars[pos]; int w = 1; if (!iswprint(c)) goto bail_out; if (terminal->wcwidth) w = wcwidth(c); if (printable_len->max_len < printable_len->ret_len + w) break; printable_len->ret_len += w; counter++; pos++; } return counter; bail_out: DPRINT(Debug,49,(&Debug, "cs_estimate_clip_unknown: bailing out at %d, counter=%d\n", pos,counter)); if (counter > 0) return counter; return -1; #else panic("STRING PANIC",__FILE__,__LINE__,"cs_estimate_clip_unknown", "No wchar_t support",0); #endif } else { /* Use bytes (raw form) */ panic("STRING PANIC",__FILE__,__LINE__,"cs_estimate_clip_unknown", "printable_len not supported",0); } return -1; } struct charset_type cs_unknown = { "unknown-charset", cs_init_unknown, cs_free_unknown, cs_add_streambyte_to_unknown, cs_add_intdata_to_unknown, cs_check_length_unknown, cs_give_unicode_from_unknown, cs_add_unicodedata_to_unknown, cs_cmp_unknown, cs_stream_from_unknown, cs_can_ascii_unknown, cs_streamclip_from_unknown, cs_clip_from_unknown, cs_find_pattern_from_unknown, cs_add_streambytes_to_unknown, cs_find_unknown, cs_remove_control_unknown, cs_add_state_to_unknown, cs_init_s_unknown, cs_free_s_unknown, cs_add_streambyte_to_s_unknown, cs_soft_reset_s_unknown, cs_give_unicode_from_s_unknown, cs_s_unknown_same_char, cs_s_unknown_printable, cs_s_unknown_is_onebyte, cs_unknown_properties, cs_estimate_clip_unknown, cs_iso2022_info_set_unknown, &cs_ascii }; /* * Local Variables: * mode:c * c-basic-offset:4 * buffer-file-coding-system: iso-8859-1 * End: */