static char rcsid[] = "@(#)$Id: cs_utf.c,v 1.33 2006/04/09 07:37:07 hurtta Exp $";
/******************************************************************************
* The Elm (ME+) Mail System - $Revision: 1.33 $ $State: Exp $
*
* Author: Kari Hurtta <hurtta+elm@posti.FMI.FI> (hurtta+elm@ozone.FMI.FI)
*****************************************************************************/
#include "headers.h"
#include "s_me.h"
#include "cs_imp.h"
DEBUG_VAR(Debug,__FILE__,"charset");
char base64chars[64] = {
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105,
106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
121, 122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 43, 47
};
static int index_imap[128] = {
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, 63,-1,-1,-1,
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
};
static char imapchars[64] = {
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105,
106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
121, 122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 43, 44
};
#define imap(c) ((((c) > 0) && ((c) < 127)) ? index_imap[ (c) ] : -1)
#define toimap(c) (((c) >= 0) && ((c) < 64)) ? imapchars[(c)] : -1
/* 'Map' for UTF-8 and UTF-7 ---------------------------------------------- */
static struct utfelem {
uint16 start;
uint16 end;
uint16 unicodeval; /* Unicode value correspond with 'start' */
} utfXelems[] = {
/* Identity map excluding:
0x0000 -- 0x007F US-ASCII -- by default identity on type=utf-8
0xD800 -- 0xDFFF Surrogates Area
0xE000 -- 0xF8FF Private Use Area
*/
{ 0x0080, 0xD7FF, 0x0080 },
{ 0xF900, 0xFFFF, 0xF900 },
};
/* Mappping to unicode */
static uint16 map_utf P_((struct map_info *map,
unsigned int ch));
static uint16 map_utf(map,ch)
struct map_info *map;
unsigned int ch;
{
if (ch < 0x0080)
return ch; /* Identity */
if (map) {
int i;
if (!map->map_initialized)
map->map_init_it(map);
for (i = 0; i < map->b.utfmap->elem_count; i++) {
if (ch >= map->b.utfmap->elems[i].start &&
ch <= map->b.utfmap->elems[i].end) {
return ( ch - map->b.utfmap->elems[i].start ) +
map->b.utfmap->elems[i].unicodeval;
}
}
}
return MAPPING_NONE;
}
/* Mappping from unicode */
static uint16 map_utf_rev P_((struct map_info *map,
unsigned int val, int *found));
static uint16 map_utf_rev(map,val,found)
struct map_info *map;
unsigned int val;
int * found;
{
if (val < 0x0080) {
*found = 1;
return val; /* Identity */
}
if (map) {
int i;
if (!map->map_initialized)
map->map_init_it(map);
for (i = 0; i < map->b.utfmap->elem_count; i++) {
int n = map->b.utfmap->elems[i].end -
map->b.utfmap->elems[i].start;
if (val >= map->b.utfmap->elems[i].unicodeval &&
val <= map->b.utfmap->elems[i].unicodeval+n) {
*found = 1;
return ( val - map->b.utfmap->elems[i].unicodeval ) +
map->b.utfmap->elems[i].start;
}
}
}
*found = 0;
return 0x003F; /* '?' */
}
static void map_init_utfX P_((struct map_info *map));
static void map_init_utfX(map)
struct map_info *map;
{
map->b.utfmap = safe_malloc(sizeof (* (map -> b.utfmap)));
map->b.utfmap->elem_count =
sizeof (utfXelems) / sizeof (utfXelems[0]);
map->b.utfmap->elems = utfXelems;
map->map_initialized = 1;
DPRINT(Debug,5,(&Debug,
"Map %s initialized\n",map->map_name));
}
struct map_info map_utf8 = { &cs_utf8, "UNICODE", 0, map_init_utfX, 0 };
struct map_info map_utf7 = { &cs_utf7, "UNICODE", 0, map_init_utfX, 0 };
static struct map_info map_imap = { &cs_imap, "UNICODE", 0, map_init_utfX, 0 };
/* 'imap' charset -- used for IMAP_name_convention */
static struct charcode_info imap_encoding = {
CS_charset_magic, &cs_imap, &map_imap, SET_valid, "*IMAP*", NULL, NULL, 0, NULL
};
CONST charset_t IMAP_ENCODING = &imap_encoding;
static struct charcode_info utf8_encoding = {
CS_charset_magic, &cs_utf8, &map_utf8, SET_valid, "UTF-8", NULL, &set_utf8, 106, NULL
};
CONST charset_t UTF8_ENCODING = &utf8_encoding;
static int read_elemlist P_((const char *name,
struct utfelem ** elems,
int *elemcount));
static int read_elemlist(name,elems,elemcount)
CONST char *name;
struct utfelem ** elems;
int *elemcount;
{
static struct elemlist {
char * name;
struct utfelem * elems;
int elemcount;
} * ELIST = NULL;
static int ECOUNT = 0;
FILE *F;
int i;
char buffer[STRING];
int l;
char *fn = NULL;
for (i = 0; i < ECOUNT; i++) {
if (0 == strcmp(ELIST[i].name,name)) {
*elems = ELIST[i].elems;
*elemcount = ELIST[i].elemcount;
return 1;
}
}
F = open_mapname(name,&fn);
if (!F) {
if (fn)
free(fn);
return 0;
}
ELIST = safe_realloc(ELIST,(ECOUNT+1)*sizeof (struct elemlist));
ELIST[ECOUNT].name = safe_strdup(name);
ELIST[ECOUNT].elems = NULL;
ELIST[ECOUNT].elemcount = 0;
while (0 < (l = mail_gets(buffer,sizeof buffer,F))) {
long s1, e1, s2, e2;
char *p;
if (buffer[l-1] != '\n') {
lib_error(CATGETS(elm_msg_cat, MeSet,MeMapTooLongLine,
"Map %s: %s: Too long line: %s"),
name,fn,buffer);
}
if (buffer[0] == '#')
continue;
s1 = strtol(buffer,&p,16);
e1 = s1;
if (*p == '-')
e1 = strtol(p+1,&p,16);
if (*p == ' ' || *p == '\t') {
while (*p == ' ' || *p == '\t')
p++;
s2 = strtol(p,&p,16);
e2 = s2;
if (*p == '-')
e2 = strtol(p+1,&p,16);
} else {
lib_error(CATGETS(elm_msg_cat, MeSet,MeMapBadValue,
"Map %s: %s: Bad value: %s"),
name,fn,buffer);
continue;
}
if (*p != '\n' || e1 - s1 < 0 || e1 - s1 != e2 - s2 ||
s1 < 0x0000 || e1 > 0xFFFF || s2 < 0x0000 || e1 > 0xFFFF) {
lib_error(CATGETS(elm_msg_cat, MeSet,MeMapBadValue,
"Map %s: %s: Bad value: %s"),
name,fn,buffer);
continue;
}
ELIST[ECOUNT].elems = safe_realloc(ELIST[ECOUNT].elems,
(ELIST[ECOUNT].elemcount+1) *
sizeof (struct utfelem));
ELIST[ECOUNT].elems[ELIST[ECOUNT].elemcount].start = s1;
ELIST[ECOUNT].elems[ELIST[ECOUNT].elemcount].end = e1;
ELIST[ECOUNT].elems[ELIST[ECOUNT].elemcount].unicodeval = s2;
ELIST[ECOUNT].elemcount++;
}
*elems = ELIST[ECOUNT].elems;
*elemcount = ELIST[ECOUNT].elemcount;
ECOUNT++;
if (fn)
free(fn);
return 1;
}
static void map_init_bad P_((struct map_info *map));
static void map_init_bad(map)
struct map_info *map;
{
panic("STRING PANIC",__FILE__,__LINE__,"map_init_bad",
"map_init_bad called",0);
}
static struct map_info * open_utf8_map P_((const char * map_name));
static struct map_info * open_utf8_map(map_name)
CONST char * map_name;
{
struct map_info *ret = NULL;
struct utfelem * elems;
int elemcount;
if (read_elemlist(map_name,&elems,&elemcount)) {
ret = safe_malloc(sizeof (struct map_info));
ret -> map_type = &cs_utf8;
ret -> map_name = safe_strdup(map_name);
ret -> b.utfmap = safe_malloc(sizeof (* (ret -> b.utfmap)));
ret -> map_initialized = 1;
ret -> map_init_it = map_init_bad;
ret -> b.utfmap->elem_count = elemcount;
ret -> b.utfmap->elems = elems;
}
return ret;
}
static struct map_info * open_utf7_map P_((const char * map_name));
static struct map_info * open_utf7_map(map_name)
CONST char * map_name;
{
struct map_info *ret = NULL;
struct utfelem * elems;
int elemcount;
if (read_elemlist(map_name,&elems,&elemcount)) {
ret = safe_malloc(sizeof (struct map_info));
ret -> map_type = &cs_utf7;
ret -> map_name = safe_strdup(map_name);
ret -> b.utfmap = safe_malloc(sizeof (* (ret -> b.utfmap)));
ret -> map_initialized = 1;
ret -> map_init_it = map_init_bad;
ret -> b.utfmap->elem_count = elemcount;
ret -> b.utfmap->elems = elems;
}
return ret;
}
static struct map_info * open_imap_map P_((const char * map_name));
static struct map_info * open_imap_map(map_name)
CONST char * map_name;
{
struct map_info *ret = NULL;
struct utfelem * elems;
int elemcount;
if (read_elemlist(map_name,&elems,&elemcount)) {
ret = safe_malloc(sizeof (struct map_info));
ret -> map_type = &cs_imap;
ret -> map_name = safe_strdup(map_name);
ret -> b.utfmap = safe_malloc(sizeof (* (ret -> b.utfmap)));
ret -> map_initialized = 1;
ret -> map_init_it = map_init_bad;
ret -> b.utfmap->elem_count = elemcount;
ret -> b.utfmap->elems = elems;
}
return ret;
}
/* ------------------------------------------------------------------------ */
static void cs_init_utf P_((struct string *str));
static void cs_init_utf(str)
struct string *str;
{
str->p->len = 0;
str->p->a.words = 0;
}
static void cs_free_utf P_((struct string *str));
static void cs_free_utf(str)
struct string *str;
{
if (str->p->a.words) {
free(str->p->a.words);
str->p->a.words = NULL;
}
str->p->len = 0;
}
static int cs_add_streambyte_to_utf8 P_((struct string *str,int ch));
static int cs_add_streambyte_to_s_utf8 P_((struct charset_state *str, int ch));
static void cs_soft_reset_s_utf8 P_((struct charset_state *str));
static int cs_add_streambyte_to_utf8(str,ch)
struct string *str;
int ch;
{
if (!str->p->state)
str->p->state = new_state_1(str->string_type);
if (str->p->state->charset->charset_type != &cs_utf8)
panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_utf8",
"Bad state",0);
if (!cs_add_streambyte_to_s_utf8(str->p->state,ch)) {
DPRINT(Debug,10,(&Debug,
"cs_add_streambyte_to_utf8: Failed to add byte\n"));
return 0;
}
if (str->p->state->p->ready) {
if (str->p->state->p->a.utf8.value > 0xFFFF)
return 0;
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+1)* sizeof (uint16));
str->p->a.words[str->p->len++] =
str->p->state->p->a.utf8.value;
cs_soft_reset_s_utf8(str->p->state);
}
return 1;
}
static int cs_add_streambyte_to_utf7 P_((struct string *str,int ch));
static int cs_add_streambyte_to_s_utf7 P_((struct charset_state *str, int ch));
static void cs_soft_reset_s_utf7 P_((struct charset_state *str));
static int cs_add_streambyte_to_utf7(str,ch)
struct string *str;
int ch;
{
if (!str->p->state)
str->p->state = new_state_1(str->string_type);
if (str->p->state->charset->charset_type != &cs_utf7)
panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_utf7",
"Bad state",0);
if (!cs_add_streambyte_to_s_utf7(str->p->state,ch)) {
DPRINT(Debug,10,(&Debug,
"cs_add_streambyte_to_utf7: Failied to add byte\n"));
return 0;
}
if (str->p->state->p->ready) {
if (str->p->state->p->a.utf7.value > 0xFFFF)
return 0;
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+1)* sizeof (uint16));
str->p->a.words[str->p->len++] =
str->p->state->p->a.utf7.value;
cs_soft_reset_s_utf7(str->p->state);
}
return 1;
}
static int cs_add_streambyte_to_imap P_((struct string *str,int ch));
static int cs_add_streambyte_to_s_imap P_((struct charset_state *str, int ch));
static int cs_add_streambyte_to_imap(str,ch)
struct string *str;
int ch;
{
if (!str->p->state)
str->p->state = new_state_1(str->string_type);
if (str->p->state->charset->charset_type != &cs_imap)
panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_imap",
"Bad state",0);
if (!cs_add_streambyte_to_s_imap(str->p->state,ch)) {
DPRINT(Debug,10,(&Debug,
"cs_add_streambyte_to_imap: Failied to add byte\n"));
return 0;
}
if (str->p->state->p->ready) {
if (str->p->state->p->a.utf7.value > 0xFFFF)
return 0;
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+1)* sizeof (uint16));
str->p->a.words[str->p->len++] =
str->p->state->p->a.utf7.value;
cs_soft_reset_s_utf7(str->p->state);
}
return 1;
}
static void cs_add_intdata_to_utf P_((struct string *str,
const struct string *data));
static void cs_add_intdata_to_utf(str,data)
struct string *str;
CONST struct string *data;
{
if (data->p->len > 0) {
int i;
/* realloc with size 0 is equivalent of free and may
corrupt memory ...
*/
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+data->p->len)*
sizeof (uint16));
for (i = 0; i < data->p->len; i++) {
str->p->a.words[str->p->len++] = data->p->a.words[i];
}
}
}
/* Does compression, recalculation and so on ...
so 'const' is not very const
*/
static void cs_check_length_utf P_((const struct string *str));
static void cs_check_length_utf(str)
CONST struct string *str;
{
if (str->p->len > 0) {
int L = str->p->len;
int ok = 1;
int i;
uint16 *words = safe_malloc(L * sizeof (uint));
/* Convert to UNICODE first */
for (i = 0; i < L; i++) {
words[i] = map_utf(str->string_type->map_info,
str->p->a.words[i]);
if (words[i] == MAPPING_NONE) {
DPRINT(Debug,61,(&Debug,
"%s: WARNING: Mapping %04X for compression failed\n",
str->string_type->MIME_name ?
str->string_type->MIME_name :
"<no MIME name>",
str->p->a.words[i]));
ok = 0;
}
}
if (ok)
ok = compress_unicode(words,&L);
if (ok) {
for (i = 0; i < L; i++) {
int found;
str->p->a.words[i] =
map_utf_rev(str->string_type->map_info,words[i],&found);
if (!found) {
DPRINT(Debug,61,(&Debug,
"%s: WARNING: Mapping back %04X after compression failed\n",
str->string_type->MIME_name ?
str->string_type->MIME_name :
"<no MIME name>",
words[i]));
}
}
DPRINT(Debug,61,(&Debug,
"%s: String compressed len=%d => %d\n",
str->string_type->MIME_name ?
str->string_type->MIME_name :
"<no MIME name>",
str->p->len,L));
str->p->len = L;
}
free(words);
}
}
static uint16 cs_give_unicode_from_utf P_((const struct string *str,
int pos, int *found));
static uint16 cs_give_unicode_from_utf(str,pos,found)
CONST struct string *str;
int pos;
int *found;
{
uint16 res;
if (pos < 0 || pos >= str->p->len)
panic("STRING PANIC",__FILE__,__LINE__,"cs_give_unicode_from_utf",
"Index out of array",0);
res = map_utf(str->string_type->map_info,str->p->a.words[pos]);
if (res != MAPPING_NONE) {
*found = 1;
return res;
}
*found = 0;
return 0x003F; /* '?' */
}
static void cs_add_unicodedata_to_utf P_((struct string *str,
int len, const uint16 *data));
static void cs_add_unicodedata_to_utf(str,len,data)
struct string *str;
int len;
CONST uint16 *data;
{
if (len > 0) {
int i;
/* realloc with size 0 is equivalent of free and may
corrupt memory ...
*/
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+len)* sizeof (uint16));
for (i = 0; i < len; i++) {
int found;
str->p->a.words[str->p->len++] =
map_utf_rev(str->string_type->map_info,data[i],&found);
}
}
}
static int cs_cmp_utf P_((const struct string *str1,const struct string *str2));
static int cs_cmp_utf(str1,str2)
CONST struct string *str1;
CONST struct string *str2;
{
int i;
if (str1->string_type->charset_type !=
str2->string_type->charset_type)
panic("STRING PANIC",__FILE__,__LINE__,"cs_cmp_utf",
"String type mismatch",0);
for (i = 0; i < str1->p->len && i < str2->p->len; i++) {
if (str1->p->a.words[i] < str2->p->a.words[i])
return -1;
if (str1->p->a.words[i] > str2->p->a.words[i])
return 1;
}
if (i < str1->p->len)
return 1;
if (i < str2->p->len)
return -1;
return 0;
}
typedef unsigned char utf8_buffer[4];
static int gen_utf8_char P_((utf8_buffer buffer, unsigned int val));
static int gen_utf8_char(buffer,val)
utf8_buffer buffer;
unsigned int val;
{
if (val < 0x0080) {
buffer[0] = val;
return 1;
} else {
int l,x;
uint16 bits;
if (val < 0x7FF) {
bits = 192;
l = x = 2;
} else {
bits = 224;
l = x = 3;
}
while (x > 0) {
buffer[--x] = 128 | (val & 63);
val >>= 6;
}
if (val) {
panic("STRING PANIC",__FILE__,__LINE__,"gen_utf8_char",
"Value overflow?",0);
}
buffer[0] |= bits;
return l;
}
}
static unsigned char *cs_stream_from_utf8 P_((const struct string *str,
int printable,
screen_info_p terminal,
int *reslen));
static unsigned char *cs_stream_from_utf8(str,printable,terminal,reslen)
CONST struct string *str;
int printable;
screen_info_p terminal;
int *reslen;
{
unsigned char * res = safe_malloc(str->p->len*3+1);
int i;
int x = 0;
for (i = 0; i < str->p->len; i++) {
uint16 val = str->p->a.words[i];
if (printable) {
uint16 unicode = map_utf(str->string_type->map_info,val);
if (MAPPING_NONE == unicode ||
!unicode_ch(unicode,UOP_printable))
val = 0x003F; /* '?' */
}
x += gen_utf8_char(res+x,val);
}
res[x] = '\0';
*reslen = x;
return res;
}
static int need_utf7_encode P_((unsigned int unicode));
static int need_utf7_encode(unicode)
unsigned int unicode;
{
/* directly encoded characters */
if (0x0041 /* 'A' */ <= unicode &&
0x005A /* 'Z' */ >= unicode)
return 0;
if (0x0061 /* 'a' */ <= unicode &&
0x007A /* 'z' */ >= unicode)
return 0;
if (0x0030 /* '0' */ <= unicode &&
0x0039 /* '9' */ >= unicode)
return 0;
switch (unicode) {
/* directly encoded characters */
/* ' */ case 39: return 0;
/* ( */ case 40: return 0;
/* ) */ case 41: return 0;
/* , */ case 44: return 0;
/* - */ case 45: return 0;
/* . */ case 46: return 0;
/* / */ case 47: return 0;
/* : */ case 58: return 0;
/* ? */ case 63: return 0;
/* optional direct characters */
/* ! */ case 33: return utf7_encode_optional;
/* " */ case 34: return utf7_encode_optional;
/* # */ case 35: return utf7_encode_optional;
/* $ */ case 36: return utf7_encode_optional;
/* % */ case 37: return utf7_encode_optional;
/* & */ case 38: return utf7_encode_optional;
/* * */ case 42: return utf7_encode_optional;
/* ; */ case 59: return utf7_encode_optional;
/* < */ case 60: return utf7_encode_optional;
/* = */ case 61: return utf7_encode_optional;
/* > */ case 62: return utf7_encode_optional;
/* @ */ case 64: return utf7_encode_optional;
/* [ */ case 91: return utf7_encode_optional;
/* ] */ case 93: return utf7_encode_optional;
/* ^ */ case 94: return utf7_encode_optional;
/* _ */ case 95: return utf7_encode_optional;
/* ' */ case 96: return utf7_encode_optional;
/* { */ case 123: return utf7_encode_optional;
/* | */ case 124: return utf7_encode_optional;
/* } */ case 125: return utf7_encode_optional;
/* space */ case 32: return 0;
/* tab */ case 9: return 0;
/* cr */ case 13: return 0;
/* lf */ case 10: return 0;
}
return 1;
}
static int need_imap_encode P_((unsigned int unicode));
static int need_imap_encode(unicode)
unsigned int unicode;
{
if (unicode >= 0x20 && unicode <= 0x25)
return 0;
if (unicode >= 0x27 && unicode <= 0x7e)
return 0;
return 1;
}
typedef unsigned char utf7_buffer[4];
static int gen_utf7_char P_((int *encoded, int *bits, unsigned long *bitval,
utf7_buffer buffer, unsigned int val));
static int gen_utf7_char(encoded,bits,bitval,buffer,val)
int *encoded;
int *bits;
unsigned long *bitval;
utf7_buffer buffer;
unsigned int val;
{
int x = 0;
if (0x002B /* '+' */ == val && !*encoded) {
/* Special case */
buffer[x++] = 0x2B; /* '+' */
buffer[x++] = 0x2D; /* '-' */
} else if (need_utf7_encode(val)) {
if (!*encoded) {
buffer[x++] = 0x2B; /* '+' */
(*encoded)++;
*bits = 0;
*bitval = 0;
}
if (val > 0xFFFF) {
panic("STRING PANIC",__FILE__,__LINE__,"gen_utf7_char",
"Value overflow",0);
}
*bitval <<= 16;
*bitval |= val;
*bits += 16;
while (*bits >= 6) {
long code = *bitval >> (*bits-6);
int c = to64(code);
if (-1 == c) {
panic("STRING PANIC",__FILE__,__LINE__,
"gen_utf7_char",
"Encode error",0);
}
buffer[x++] = c;
*bitval -= code << (*bits-6);
*bits -= 6;
}
} else {
if (*encoded) {
if (*bits > 6)
panic("STRING PANIC",__FILE__,__LINE__,
"gen_utf7_char",
"Encode error",0);
if (*bits > 0) {
/* Put code to most signifact bits,
and fill leftover bits with 0
*/
long code = *bitval << (6-*bits);
int c = to64(code);
if (-1 == c) {
panic("STRING PANIC",__FILE__,__LINE__,
"cs_stream_from_utf7",
"Encode error",0);
}
buffer[x++] = c;
*bitval = 0;
*bits = 0;
}
buffer[x++] = 0x2D; /* '-' */
*encoded = 0;
}
if (val > 0x7f)
panic("STRING PANIC",__FILE__,__LINE__,
"cs_stream_from_utf7",
"Value error",0);
buffer[x++] = val;
}
buffer[x] = '\0';
return x;
}
static int gen_imap_char P_((int *encoded, int *bits, unsigned long *bitval,
utf7_buffer buffer, unsigned int val));
static int gen_imap_char(encoded,bits,bitval,buffer,val)
int *encoded;
int *bits;
unsigned long *bitval;
utf7_buffer buffer;
unsigned int val;
{
int x = 0;
if (0x0026 /* '&' */ == val && !*encoded) {
/* Special case */
buffer[x++] = 0x26; /* '&' */
buffer[x++] = 0x2D; /* '-' */
} else if (need_imap_encode(val)) {
if (!*encoded) {
buffer[x++] = 0x26; /* '&' */
(*encoded)++;
*bits = 0;
*bitval = 0;
}
if (val > 0xFFFF) {
panic("STRING PANIC",__FILE__,__LINE__,"gen_imap_char",
"Value overflow",0);
}
*bitval <<= 16;
*bitval |= val;
*bits += 16;
while (*bits >= 6) {
long code = *bitval >> (*bits-6);
int c = toimap(code);
if (-1 == c) {
panic("STRING PANIC",__FILE__,__LINE__,
"gen_imap_char",
"Encode error",0);
}
buffer[x++] = c;
*bitval -= code << (*bits-6);
*bits -= 6;
}
} else {
if (*encoded) {
if (*bits > 6)
panic("STRING PANIC",__FILE__,__LINE__,
"gen_imap_char",
"Encode error",0);
if (*bits > 0) {
/* But code to most signifact bits,
and fill leftover bits with 0
*/
long code = *bitval << (6-*bits);
int c = toimap(code);
if (-1 == c) {
panic("STRING PANIC",__FILE__,__LINE__,
"cs_stream_from_imap",
"Encode error",0);
}
buffer[x++] = c;
*bitval = 0;
*bits = 0;
}
buffer[x++] = 0x2D; /* '-' */
*encoded = 0;
}
if (val > 0x7f)
panic("STRING PANIC",__FILE__,__LINE__,
"cs_stream_from_imap",
"Value error",0);
buffer[x++] = val;
}
buffer[x] = '\0';
return x;
}
static int end_utf7_char P_((int *encoded, int *bits, unsigned long *bitval,
utf7_buffer buffer));
static int end_utf7_char(encoded,bits,bitval,buffer)
int *encoded;
int *bits;
unsigned long *bitval;
utf7_buffer buffer;
{
int x = 0;
if (*encoded) {
if (*bits > 0) {
/* But code to most signifact bits,
and fill leftover bits with 0
*/
long code = *bitval << (6-*bits);
int c = to64(code);
if (-1 == c) {
panic("STRING PANIC",__FILE__,__LINE__,
"cs_stream_from_utf7",
"Encode error",0);
}
buffer[x++] = c;
*bitval = 0;
*bits = 0;
}
buffer[x++] = 0x2D; /* '-' */
*encoded = 0;
}
return x;
}
static int end_imap_char P_((int *encoded, int *bits, unsigned long *bitval,
utf7_buffer buffer));
static int end_imap_char(encoded,bits,bitval,buffer)
int *encoded;
int *bits;
unsigned long *bitval;
utf7_buffer buffer;
{
int x = 0;
if (*encoded) {
if (*bits > 0) {
/* But code to most signifact bits,
and fill leftover bits with 0
*/
long code = *bitval << (6-*bits);
int c = toimap(code);
if (-1 == c) {
panic("STRING PANIC",__FILE__,__LINE__,
"cs_stream_from_imap",
"Encode error",0);
}
buffer[x++] = c;
*bitval = 0;
*bits = 0;
}
buffer[x++] = 0x2D; /* '-' */
*encoded = 0;
}
return x;
}
static unsigned char *cs_stream_from_utf7 P_((const struct string *str,
int printable,
screen_info_p terminal,
int *reslen));
static unsigned char *cs_stream_from_utf7(str,printable,terminal,reslen)
CONST struct string *str;
int printable;
screen_info_p terminal;
int *reslen;
{
unsigned char * res = safe_malloc(str->p->len*5+1);
int i;
int x = 0;
int encoded = 0;
int bits = 0;
unsigned long bitval = 0;
for (i = 0; i < str->p->len; i++) {
uint16 val = str->p->a.words[i];
if (printable) {
uint16 unicode = map_utf(str->string_type->map_info,val);
if (MAPPING_NONE == unicode ||
!unicode_ch(unicode,UOP_printable))
val = 0x003F; /* '?' */
}
x += gen_utf7_char(&encoded,&bits,&bitval,res+x,val);
}
x += end_utf7_char(&encoded,&bits,&bitval,res+x);
res[x] = '\0';
*reslen = x;
return res;
}
static unsigned char *cs_stream_from_imap P_((const struct string *str,
int printable,
screen_info_p terminal,
int *reslen));
static unsigned char *cs_stream_from_imap(str,printable,terminal,reslen)
CONST struct string *str;
int printable;
screen_info_p terminal;
int *reslen;
{
unsigned char * res = safe_malloc(str->p->len*5+1);
int i;
int x = 0;
int encoded = 0;
int bits = 0;
unsigned long bitval = 0;
for (i = 0; i < str->p->len; i++) {
uint16 val = str->p->a.words[i];
if (printable) {
uint16 unicode = map_utf(str->string_type->map_info,val);
if (MAPPING_NONE == unicode ||
!unicode_ch(unicode,UOP_printable))
val = 0x003F; /* '?' */
}
x += gen_imap_char(&encoded,&bits,&bitval,res+x,val);
}
x += end_imap_char(&encoded,&bits,&bitval,res+x);
res[x] = '\0';
*reslen = x;
return res;
}
static int cs_can_ascii_utf P_((const struct string *str));
static int cs_can_ascii_utf(str)
CONST struct string *str;
{
int i;
for (i = 0; i < str->p->len; i++) {
/* str->p->a.words is unsigned */
if (str->p->a.words[i] > 127)
return 0;
}
return 1;
}
static unsigned char *cs_streamclip_from_utf8 P_((const struct string *str,
int *pos, int len,
screen_info_p terminal,
struct cs_printable_len *printable_len));
static unsigned char *cs_streamclip_from_utf8(str,pos,len,terminal,printable_len)
CONST struct string *str;
int *pos;
int len;
screen_info_p terminal; /* NOT USED */
struct cs_printable_len *printable_len; /* NOT USED */
{
unsigned char * ret;
int l = 0,i;
if (*pos < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf8",
"Index out of array",0);
if (len < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf8",
"Negative size",0);
ret = safe_malloc(len*3+1);
for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) {
uint16 val = str->p->a.words[*pos];
uint16 unicode = map_utf(str->string_type->map_info,val);
if (MAPPING_NONE == unicode ||
!unicode_ch(unicode,UOP_printable))
val = 0x003F; /* '?' */
l += gen_utf8_char(ret+l,val);
}
ret[l] = '\0';
return ret;
}
static unsigned char *cs_streamclip_from_utf7 P_((const struct string *str,
int *pos, int len,
screen_info_p terminal,
struct cs_printable_len *printable_len));
static unsigned char *cs_streamclip_from_utf7(str,pos,len,terminal,printable_len)
CONST struct string *str;
int *pos;
int len;
screen_info_p terminal; /* NOT USED */
struct cs_printable_len *printable_len; /* NOT USED */
{
unsigned char * ret;
int l = 0,i;
int encoded = 0;
int bits = 0;
unsigned long bitval = 0;
if (*pos < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf7",
"Index out of array",0);
if (len < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_utf7",
"Negative size",0);
ret = safe_malloc(len*5+1);
for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) {
uint16 val = str->p->a.words[*pos];
uint16 unicode = map_utf(str->string_type->map_info,val);
if (MAPPING_NONE == unicode ||
!unicode_ch(unicode,UOP_printable))
val = 0x003F; /* '?' */
l += gen_utf7_char(&encoded,&bits,&bitval,ret+l,val);
}
l += end_utf7_char(&encoded,&bits,&bitval,ret+l);
ret[l] = '\0';
return ret;
}
static unsigned char *cs_streamclip_from_imap P_((const struct string *str,
int *pos, int len,
screen_info_p terminal,
struct cs_printable_len *printable_len));
static unsigned char *cs_streamclip_from_imap(str,pos,len,terminal,printable_len)
CONST struct string *str;
int *pos;
int len;
screen_info_p terminal; /* NOT USED */
struct cs_printable_len *printable_len; /* NOT USED */
{
unsigned char * ret;
int l = 0,i;
int encoded = 0;
int bits = 0;
unsigned long bitval = 0;
if (*pos < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_imap",
"Index out of array",0);
if (len < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_streamclip_from_imap",
"Negative size",0);
ret = safe_malloc(len*5+1);
for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) {
uint16 val = str->p->a.words[*pos];
uint16 unicode = map_utf(str->string_type->map_info,val);
if (MAPPING_NONE == unicode ||
!unicode_ch(unicode,UOP_printable))
val = 0x003F; /* '?' */
l += gen_imap_char(&encoded,&bits,&bitval,ret+l,val);
}
l += end_imap_char(&encoded,&bits,&bitval,ret+l);
ret[l] = '\0';
return ret;
}
static void cs_clip_from_utf P_((struct string *ret,
const struct string *str,
int *pos, int len));
static void cs_clip_from_utf(ret,str,pos,len)
struct string *ret;
CONST struct string *str;
int *pos;
int len;
{
int i;
if (ret->string_type->charset_type !=
str->string_type->charset_type)
panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_utf",
"String type mismatch",0);
if (*pos < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_utf",
"Index out of array",0);
if (len < 0)
panic("STRING PANIC",__FILE__,__LINE__,"cs_clip_from_utf",
"Negative size",0);
if (len > 0) {
/* realloc with len == 0 is equivalent of freeing and
may result corruption of memory ...
*/
/* NOTE: str->p->a.words is not NUL terminated */
ret->p->a.words = safe_realloc(ret->p->a.words,
len* sizeof (uint16));
ret->p->len = 0;
for (i = 0; i < len && *pos < str->p->len; i++, (*pos)++) {
ret->p->a.words[ret->p->len++] = str->p->a.words[*pos];
}
} else {
/* NULL (empty) string */
if (ret->p->a.words)
free(ret->p->a.words);
ret->p->a.words = NULL;
ret->p->len = 0;
}
}
static int cs_find_pattern_from_utf P_((const struct string *str,
const struct string *pattern,
int ignore_case));
static int cs_find_pattern_from_utf(str,pattern,ignore_case)
CONST struct string *str;
CONST struct string *pattern;
int ignore_case;
{
int ret = 0;
int i, j;
if (pattern->string_type->charset_type !=
str->string_type->charset_type)
panic("STRING PANIC",__FILE__,__LINE__,"cs_find_pattern_from_utf",
"String type mismatch",0);
if (ignore_case) {
DPRINT(Debug,63,(&Debug,
"cs_find_pattern_from_utf=-1\n"));
return -1; /* Use UNICODE comparision on upper level instead */
}
for (i = 0; i < str->p->len; ) {
CONST int s = i + 1;
for (j = 0; j < pattern->p->len && i < str->p->len; j++,i++) {
uint16 c2 = pattern->p->a.words[j];
uint16 c1 = str->p->a.words[i];
if (c1 != c2)
break;
}
if (j >= pattern->p->len) {
DPRINT(Debug,63,(&Debug,
"cs_find_pattern_from_utf=1 MATCH\n"));
ret = 1;
break;
}
i = s;
}
if (!ret) {
DPRINT(Debug,63,(&Debug,
"cs_find_pattern_from_utf=0 NO MATCH\n"));
}
return ret;
}
/* Returns number of bytes added */
static int cs_add_streambytes_to_utf8 P_((struct string *str,
int count,
const unsigned char *data,
int *errors));
static int cs_add_streambytes_to_utf8(str,count,data, errors)
struct string *str;
int count;
CONST unsigned char *data;
int *errors;
{
*errors = 0;
if (count > 0) {
int i;
/* realloc with size 0 is equivalent of free and may
corrupt memory ...
*/
if (!str->p->state)
str->p->state = new_state_1(str->string_type);
if (str->p->state->charset->charset_type != &cs_utf8)
panic("STRING PANIC",__FILE__,__LINE__,
"cs_add_streambytes_to_utf8",
"Bad state",0);
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+count)* sizeof (uint16));
for (i = 0; i < count; i++) {
if (!cs_add_streambyte_to_s_utf8(str->p->state,data[i])) {
DPRINT(Debug,10,(&Debug,
"cs_add_streambytes_to_utf8: Failed to add byte, idx=%d\n",
i));
handle_error:
(*errors)++;
str->p->a.words[str->p->len++] = MAPPING_NONE;
cs_soft_reset_s_utf8(str->p->state);
continue;
}
if (str->p->state->p->ready) {
if (str->p->state->p->a.utf8.value > 0xFFFF) {
DPRINT(Debug,10,(&Debug,
"cs_add_streambytes_to_utf8: Value (%X) out of range\n",
str->p->state->p->a.utf8.value));
goto handle_error;
}
str->p->a.words[str->p->len++] =
str->p->state->p->a.utf8.value;
cs_soft_reset_s_utf8(str->p->state);
}
}
return i;
}
return 0;
}
static int cs_add_streambytes_to_utf7 P_((struct string *str,
int count,
const unsigned char *data,
int *errors));
static int cs_add_streambytes_to_utf7(str,count,data,errors)
struct string *str;
int count;
CONST unsigned char *data;
int *errors;
{
*errors = 0;
if (count > 0) {
int i;
/* realloc with size 0 is equivalent of free and may
corrupt memory ...
*/
if (!str->p->state)
str->p->state = new_state_1(str->string_type);
if (str->p->state->charset->charset_type != &cs_utf7)
panic("STRING PANIC",__FILE__,__LINE__,
"cs_add_streambytes_to_utf7",
"Bad state",0);
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+count)* sizeof (uint16));
for (i = 0; i < count; i++) {
if (!cs_add_streambyte_to_s_utf7(str->p->state,data[i])) {
DPRINT(Debug,10,(&Debug,
"cs_add_streambytes_to_utf7: Failed to add byte, idx=%d\n",
i));
handle_error:
(*errors) ++;
str->p->a.words[str->p->len++] = MAPPING_NONE;
cs_soft_reset_s_utf7(str->p->state);
continue;
}
if (str->p->state->p->ready) {
if (str->p->state->p->a.utf7.value > 0xFFFF) {
DPRINT(Debug,60,(&Debug,
"cs_add_streambytes_to_utf7: Value (%X) out of range\n",
str->p->state->p->a.utf7.value));
goto handle_error;
}
str->p->a.words[str->p->len++] =
str->p->state->p->a.utf7.value;
cs_soft_reset_s_utf7(str->p->state);
}
}
return i;
}
return 0;
}
static int cs_add_streambytes_to_imap P_((struct string *str,
int count,
const unsigned char *data,
int *errors));
static int cs_add_streambytes_to_imap(str,count,data,errors)
struct string *str;
int count;
CONST unsigned char *data;
int *errors;
{
*errors = 0;
if (count > 0) {
int i;
/* realloc with size 0 is equivalent of free and may
corrupt memory ...
*/
if (!str->p->state)
str->p->state = new_state_1(str->string_type);
if (str->p->state->charset->charset_type != &cs_imap)
panic("STRING PANIC",__FILE__,__LINE__,
"cs_add_streambytes_to_imap",
"Bad state",0);
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+count)* sizeof (uint16));
for (i = 0; i < count; i++) {
if (!cs_add_streambyte_to_s_imap(str->p->state,data[i])) {
DPRINT(Debug,10,(&Debug,
"cs_add_streambytes_to_imap: Failed to add byte, idx=%d\n",
i));
handle_error:
(*errors)++;
str->p->a.words[str->p->len++] = MAPPING_NONE;
continue;
}
if (str->p->state->p->ready) {
if (str->p->state->p->a.utf7.value > 0xFFFF) {
DPRINT(Debug,60,(&Debug,
"cs_add_streambytes_to_imap: Value (%X) out of range\n",
str->p->state->p->a.utf7.value));
goto handle_error;
}
str->p->a.words[str->p->len++] =
str->p->state->p->a.utf7.value;
cs_soft_reset_s_utf7(str->p->state);
}
}
return i;
}
return 0;
}
static struct map_info * cs_find_utf8 P_((const char * map_name));
static struct map_info * cs_find_utf8(map_name)
CONST char * map_name;
{
int i;
struct map_info *ret;
static struct map_info ** dyn_maps = NULL;
static int dyn_map_count = 0;
if (0 == istrcmp(map_name,map_utf8.map_name))
return &map_utf8;
for (i =0; i < dyn_map_count; i++)
if (0 == strcmp(dyn_maps[i]->map_name,map_name))
return dyn_maps[i];
ret = open_utf8_map(map_name);
if (ret) {
dyn_maps = safe_realloc(dyn_maps,
(dyn_map_count + 1) *
sizeof (struct map_info *));
dyn_maps[dyn_map_count++] = ret;
}
return ret;
}
static struct map_info * cs_find_utf7 P_((const char * map_name));
static struct map_info * cs_find_utf7(map_name)
CONST char * map_name;
{
int i;
struct map_info *ret;
static struct map_info ** dyn_maps = NULL;
static int dyn_map_count = 0;
if (0 == istrcmp(map_name,map_utf7.map_name))
return &map_utf7;
for (i =0; i < dyn_map_count; i++)
if (0 == strcmp(dyn_maps[i]->map_name,map_name))
return dyn_maps[i];
ret = open_utf7_map(map_name);
if (ret) {
dyn_maps = safe_realloc(dyn_maps,
(dyn_map_count + 1) *
sizeof (struct map_info *));
dyn_maps[dyn_map_count++] = ret;
}
return ret;
}
static struct map_info * cs_find_imap P_((const char * map_name));
static struct map_info * cs_find_imap(map_name)
CONST char * map_name;
{
int i;
struct map_info *ret;
static struct map_info ** dyn_maps = NULL;
static int dyn_map_count = 0;
if (0 == istrcmp(map_name,map_imap.map_name))
return &map_imap;
for (i =0; i < dyn_map_count; i++)
if (0 == strcmp(dyn_maps[i]->map_name,map_name))
return dyn_maps[i];
ret = open_imap_map(map_name);
if (ret) {
dyn_maps = safe_realloc(dyn_maps,
(dyn_map_count + 1) *
sizeof (struct map_info *));
dyn_maps[dyn_map_count++] = ret;
}
return ret;
}
static void cs_remove_control_utf P_((const struct string *str));
static void cs_remove_control_utf(str)
CONST struct string *str;
{
int i;
for (i = 0; i < str->p->len; i++) {
if (str->p->a.words[i] < 32 ||
str->p->a.words[i] == 127)
str->p->a.words[i] = 32;
else {
uint16 unicode = map_utf(str->string_type->map_info,
str->p->a.words[i]);
if (MAPPING_NONE == unicode ||
!unicode_ch(unicode,UOP_noctrl))
str->p->a.words[i] = 32;
}
}
}
static void cs_add_state_to_utf8 P_((struct string *str,
struct charset_state *ch));
static void cs_add_state_to_utf8(str,ch)
struct string *str;
struct charset_state *ch;
{
if (ch->p->a.utf8.value > 0xFFFF)
return;
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+1)* sizeof (uint16));
str->p->a.words[str->p->len++] = ch->p->a.utf8.value;
}
static void cs_add_state_to_utf7 P_((struct string *str,
struct charset_state *ch));
static void cs_add_state_to_utf7(str,ch)
struct string *str;
struct charset_state *ch;
{
if (ch->p->a.utf7.value > 0xFFFF)
return;
/* NOTE: str->p->a.words is not NUL terminated */
str->p->a.words = safe_realloc(str->p->a.words,
(str->p->len+1)* sizeof (uint16));
str->p->a.words[str->p->len++] = ch->p->a.utf7.value;
}
static void cs_init_s_utf8 P_((struct charset_state *str));
static void cs_init_s_utf8(str)
struct charset_state *str;
{
str->p->a.utf8.bytes = 0;
str->p->a.utf8.idx = 0;
str->p->a.utf8.value = 0;
str->p->ready = 0;
}
static void cs_init_s_utf7 P_((struct charset_state *str));
static void cs_init_s_utf7(str)
struct charset_state *str;
{
str->p->a.utf7.encoded = 0;
str->p->a.utf7.bitcount = 0;
str->p->a.utf7.bitval = 0;
str->p->a.utf7.value = 0;
str->p->ready = 0;
}
static void cs_free_s_utf8 P_((struct charset_state *str));
static void cs_free_s_utf8(str)
struct charset_state *str;
{
str->p->a.utf8.bytes = 0;
str->p->a.utf8.idx = 0;
str->p->a.utf8.value = 0;
str->p->ready = 0;
}
static void cs_free_s_utf7 P_((struct charset_state *str));
static void cs_free_s_utf7(str)
struct charset_state *str;
{
str->p->a.utf7.encoded = 0;
str->p->a.utf7.bitcount = 0;
str->p->a.utf7.bitval = 0;
str->p->a.utf7.value = 0;
str->p->ready = 0;
}
static int split_utf8_byte P_((int c, unsigned char *count,
unsigned char *val));
static int split_utf8_byte(c,count,val)
int c;
unsigned char *count;
unsigned char *val;
{
if (c < 0 || c > 0xFF)
panic("STRING PANIC",__FILE__,__LINE__,"split_utf8_byte",
"Value not in range 0-255",0);
if (c < 0x80) {
*count = 0;
*val = c;
return 1;
} else {
int counter = 0;
int mask = 0x80;
for (mask=0x80, counter = 0;
(mask & c) != 0 && mask != 0;
mask >>= 1, counter++) {
c &= ~mask;
}
*count = counter;
*val = c;
return (mask != 0);
}
}
static int cs_add_streambyte_to_s_utf8(str,ch)
struct charset_state *str;
int ch;
{
unsigned char count;
unsigned char val;
if (!split_utf8_byte(ch,&count,&val)) {
failure:
DPRINT(Debug,10,(&Debug,
"cs_add_streambyte_to_s_utf8: Bad byte 0x%02X\n",
ch));
str->p->a.utf8.bytes = 0;
str->p->a.utf8.idx = 0;
str->p->a.utf8.value = 0;
str->p->ready = 0;
return 0;
}
if (str->p->a.utf8.idx == 0) {
if (count == 1)
goto failure;
str->p->a.utf8.bytes = count;
str->p->a.utf8.idx = 1;
str->p->a.utf8.value = val;
if (0 == count) /* Ascii character? */
str->p->ready = 1;
} else if (count != 1) {
goto failure;
} else {
str->p->a.utf8.idx++;
str->p->a.utf8.value <<= 6;
str->p->a.utf8.value |= val;
}
if (str->p->a.utf8.idx == str->p->a.utf8.bytes)
str->p->ready = 1;
return 1;
}
static int cs_add_streambyte_to_s_utf7(str,ch)
struct charset_state *str;
int ch;
{
if (ch < 0 || ch > 0xFF)
panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_s_utf7",
"Value not in range 0-255",0);
if (ch > 0x7F) {
failure:
DPRINT(Debug,10,(&Debug,
"cs_add_streambyte_to_s_utf7: Bad byte 0x%02X\n",
ch));
str->p->a.utf7.encoded = 0;
str->p->a.utf7.bitcount = 0;
str->p->a.utf7.bitval = 0;
str->p->a.utf7.value = 0;
str->p->ready = 0;
return 0;
}
if (str->p->a.utf7.encoded) {
int a = base64(ch);
if (a < 0) { /* Encoding ends */
if (0x2D /* '-' */ == ch) {
if (2 == str->p->a.utf7.encoded) {
/* Hack: Is starting */
ch = 0x2B; /* '+' */
str->p->a.utf7.encoded = 0;
goto normal_char;
}
str->p->a.utf7.encoded = 0;
return 1; /* Eat end char */
}
if (2 == str->p->a.utf7.encoded)
goto failure; /* Encoding ends on starting ... */
str->p->a.utf7.encoded = 0;
goto normal_char;
}
str->p->a.utf7.encoded = 1; /* Reset chack */
str->p->a.utf7.bitval <<= 6;
str->p->a.utf7.bitval |= a;
str->p->a.utf7.bitcount += 6;
if (str->p->a.utf7.bitcount >= 16) {
str->p->a.utf7.value =
str->p->a.utf7.bitval >> (str->p->a.utf7.bitcount -16);
str->p->a.utf7.bitval -=
str->p->a.utf7.value << (str->p->a.utf7.bitcount -16);
str->p->a.utf7.bitcount -= 16;
str->p->ready = 1;
}
} else if (0x2B /* '+' */ == ch) {
/* Start of endcoded string */
str->p->a.utf7.encoded = 2; /* Hack: Is start of endcoded string */
str->p->a.utf7.bitcount = 0;
str->p->a.utf7.bitval = 0;
} else {
normal_char:
str->p->a.utf7.value = ch;
str->p->ready = 1;
}
return 1;
}
static int cs_add_streambyte_to_s_imap(str,ch)
struct charset_state *str;
int ch;
{
if (ch < 0 || ch > 0xFF)
panic("STRING PANIC",__FILE__,__LINE__,"cs_add_streambyte_to_s_imap",
"Value not in range 0-255",0);
if (ch > 0x7F) {
failure:
DPRINT(Debug,10,(&Debug,
"cs_add_streambyte_to_s_imap: Bad byte 0x%02X\n",
ch));
str->p->a.utf7.encoded = 0;
str->p->a.utf7.bitcount = 0;
str->p->a.utf7.bitval = 0;
str->p->a.utf7.value = 0;
str->p->ready = 0;
return 0;
}
if (str->p->a.utf7.encoded) {
int a = imap(ch);
if (a < 0) { /* Encoding ends */
if (0x2D /* '-' */ == ch) {
if (2 == str->p->a.utf7.encoded) {
/* Hack: Is starting */
ch = 0x26; /* '&' */
str->p->a.utf7.encoded = 0;
goto normal_char;
}
str->p->a.utf7.encoded = 0;
return 1; /* Eat end char */
}
/* imap encoding requires ending '-'
utf-7 encoding does not require ending
*/
goto failure;
}
str->p->a.utf7.encoded = 1; /* Reset chack */
str->p->a.utf7.bitval <<= 6;
str->p->a.utf7.bitval |= a;
str->p->a.utf7.bitcount += 6;
if (str->p->a.utf7.bitcount >= 16) {
str->p->a.utf7.value =
str->p->a.utf7.bitval >> (str->p->a.utf7.bitcount -16);
str->p->a.utf7.bitval -=
str->p->a.utf7.value << (str->p->a.utf7.bitcount -16);
str->p->a.utf7.bitcount -= 16;
str->p->ready = 1;
}
} else if (0x26 /* '&' */ == ch) {
/* Start of endcoded string */
str->p->a.utf7.encoded = 2; /* Hack: Is start of endcoded string */
str->p->a.utf7.bitcount = 0;
str->p->a.utf7.bitval = 0;
} else {
normal_char:
str->p->a.utf7.value = ch;
str->p->ready = 1;
}
return 1;
}
static void cs_soft_reset_s_utf8(str)
struct charset_state *str;
{
str->p->a.utf8.bytes = 0;
str->p->a.utf8.idx = 0;
str->p->a.utf8.value = 0;
str->p->ready = 0;
}
static void cs_soft_reset_s_utf7(str)
struct charset_state *str;
{
str->p->a.utf7.value = 0;
str->p->ready = 0;
}
static uint16 cs_give_unicode_from_s_utf8 P_((struct charset_state *st,
int *found));
static uint16 cs_give_unicode_from_s_utf8(st,found)
struct charset_state *st;
int *found;
{
uint16 val;
*found = 0;
val = map_utf(st->charset->map_info,st->p->a.utf8.value);
if (val == MAPPING_NONE)
val = 0x003F; /* '?' */
else
*found = 1;
return val;
}
static uint16 cs_give_unicode_from_s_utf7 P_((struct charset_state *st,
int *found));
static uint16 cs_give_unicode_from_s_utf7(st,found)
struct charset_state *st;
int *found;
{
uint16 val;
*found = 0;
val = map_utf(st->charset->map_info,st->p->a.utf7.value);
if (val == MAPPING_NONE)
val = 0x003F; /* '?' */
else
*found = 1;
return val;
}
static int cs_s_utf8_same_char P_((struct charset_state *A,
struct charset_state *B,
int ignore_case));
static int cs_s_utf8_same_char(A,B,ignore_case)
struct charset_state *A;
struct charset_state *B;
int ignore_case;
{
if (A->p->a.utf8.value == B->p->a.utf8.value)
return 1;
if (ignore_case)
return -1; /* Use UNICODE values for comparision */
return 0;
}
static int cs_s_utf7_same_char P_((struct charset_state *A,
struct charset_state *B,
int ignore_case));
static int cs_s_utf7_same_char(A,B,ignore_case)
struct charset_state *A;
struct charset_state *B;
int ignore_case;
{
if (A->p->a.utf7.value == B->p->a.utf7.value)
return 1;
if (ignore_case)
return -1; /* Use UNICODE values for comparision */
return 0;
}
static int cs_s_utf8_printable P_((struct charset_state *st));
static int cs_s_utf8_printable(st)
struct charset_state *st;
{
uint16 val;
val = map_utf(st->charset->map_info,st->p->a.utf8.value);
return unicode_ch(val,UOP_printable) != 0;
}
static int cs_s_utf7_printable P_((struct charset_state *st));
static int cs_s_utf7_printable(st)
struct charset_state *st;
{
uint16 val;
val = map_utf(st->charset->map_info,st->p->a.utf7.value);
return unicode_ch(val,UOP_printable) != 0;
}
/* If character corresponds one byte on stream, returns it.
* Otherwise returns 0. This is used implement ReadCh().
* It is assumed that returned character corresponds to
* code character set (and perhaps also US-ASCII)
*/
static int cs_s_utf8_is_onebyte P_((struct charset_state *st));
static int cs_s_utf8_is_onebyte(st)
struct charset_state *st;
{
if (st->p->a.utf8.bytes == 0)
return st->p->a.utf8.value;
return 0;
}
static int cs_s_utf7_is_onebyte P_((struct charset_state *st));
static int cs_s_utf7_is_onebyte(st)
struct charset_state *st;
{
if (st->p->a.utf7.encoded == 0)
return st->p->a.utf7.value;
return 0;
}
static int cs_utf_properties P_((charset_t st));
static int cs_utf_properties(st)
charset_t st;
{
int prop = 0;
/* We know at least ascii part of charset */
prop |= CS_printable | CS_mapping;
/* We know all (possible) charcters if mapping is available */
if (st->map_info)
prop |= CS_universal_set;
return prop;
}
#if ANSI_C
#define S_(x) static x;
#else
#define S_(x)
#endif
S_(cs_estimate_clip_string cs_estimate_clip_unsupported)
static int cs_estimate_clip_unsupported(str,pos,len,terminal,printable_len)
CONST struct string *str;
int pos;
int len; /* UPPER LIMIT */
screen_info_p terminal;
struct cs_printable_len *printable_len;
{
panic("STRING PANIC",__FILE__,__LINE__,"cs_estimate_clip_unsupported",
"cs_estimate_clip_unsupported() called",0);
return -1;
}
static int cs_iso2022_info_set_utf P_((struct charcode_info *new_vals,
struct setlist *new_setlist,
int setcount));
static int cs_iso2022_info_set_utf(new_vals, new_setlist,setcount)
struct charcode_info *new_vals;
struct setlist *new_setlist;
int setcount;
{
int i;
if (setcount != 1 ||
iso2022_other != new_setlist->sets[0]->type) {
lib_error(CATGETS(elm_msg_cat, MeSet, MeIso2022OtherOnly,
"Charset type %s allows only type other-set specifications"),
new_vals->charset_type->type_name);
return 0; /* Discard bank defination */
}
for (i = setcount;
i < sizeof (new_setlist->sets) / sizeof (new_setlist->sets[0]);
i++)
new_setlist->sets[i] = NULL;
new_vals->iso2022_info = loc_setlist(*new_setlist);
new_vals->flags &= ~SET_nodata;
return 1;
}
struct charset_type cs_imap = { "imap",
cs_init_utf,
cs_free_utf,
cs_add_streambyte_to_imap,
cs_add_intdata_to_utf,
cs_check_length_utf,
cs_give_unicode_from_utf,
cs_add_unicodedata_to_utf,
cs_cmp_utf,
cs_stream_from_imap,
cs_can_ascii_utf,
cs_streamclip_from_imap,
cs_clip_from_utf,
cs_find_pattern_from_utf,
cs_add_streambytes_to_imap,
cs_find_imap,
cs_remove_control_utf,
cs_add_state_to_utf7,
cs_init_s_utf7,
cs_free_s_utf7,
cs_add_streambyte_to_s_imap,
cs_soft_reset_s_utf7,
cs_give_unicode_from_s_utf7,
cs_s_utf7_same_char,
cs_s_utf7_printable,
cs_s_utf7_is_onebyte,
cs_utf_properties,
cs_estimate_clip_unsupported,
cs_iso2022_info_set_utf,
&cs_iso2022
};
struct charset_type cs_utf7 = { "utf-7",
cs_init_utf,
cs_free_utf,
cs_add_streambyte_to_utf7,
cs_add_intdata_to_utf,
cs_check_length_utf,
cs_give_unicode_from_utf,
cs_add_unicodedata_to_utf,
cs_cmp_utf,
cs_stream_from_utf7,
cs_can_ascii_utf,
cs_streamclip_from_utf7,
cs_clip_from_utf,
cs_find_pattern_from_utf,
cs_add_streambytes_to_utf7,
cs_find_utf7,
cs_remove_control_utf,
cs_add_state_to_utf7,
cs_init_s_utf7,
cs_free_s_utf7,
cs_add_streambyte_to_s_utf7,
cs_soft_reset_s_utf7,
cs_give_unicode_from_s_utf7,
cs_s_utf7_same_char,
cs_s_utf7_printable,
cs_s_utf7_is_onebyte,
cs_utf_properties,
cs_estimate_clip_unsupported,
cs_iso2022_info_set_utf,
&cs_imap
};
struct charset_type cs_utf8 = { "utf-8",
cs_init_utf,
cs_free_utf,
cs_add_streambyte_to_utf8,
cs_add_intdata_to_utf,
cs_check_length_utf,
cs_give_unicode_from_utf,
cs_add_unicodedata_to_utf,
cs_cmp_utf,
cs_stream_from_utf8,
cs_can_ascii_utf,
cs_streamclip_from_utf8,
cs_clip_from_utf,
cs_find_pattern_from_utf,
cs_add_streambytes_to_utf8,
cs_find_utf8,
cs_remove_control_utf,
cs_add_state_to_utf8,
cs_init_s_utf8,
cs_free_s_utf8,
cs_add_streambyte_to_s_utf8,
cs_soft_reset_s_utf8,
cs_give_unicode_from_s_utf8,
cs_s_utf8_same_char,
cs_s_utf8_printable,
cs_s_utf8_is_onebyte,
cs_utf_properties,
cs_estimate_clip_unsupported,
cs_iso2022_info_set_utf,
&cs_utf7
};
/*
* Local Variables:
* mode:c
* c-basic-offset:4
* buffer-file-coding-system: iso-8859-1
* End:
*/
syntax highlighted by Code2HTML, v. 0.9.1