static char rcsid[] = "@(#)$Id: stringtok.c,v 1.7 2006/04/09 07:37:08 hurtta Exp $"; /****************************************************************************** * The Elm (ME+) Mail System - $Revision: 1.7 $ $State: Exp $ * * Author: Kari Hurtta (was hurtta+elm@ozone.FMI.FI) *****************************************************************************/ #include "headers.h" /* string_tokenize() should do for 'struct string' same than rfc822_tokenize() does for 'char *' (getaddr.c) however do not produce exactly same result */ DEBUG_VAR(Debug,__FILE__,"addr"); #define unicode_backlash 0x005C /* '\\' */ static struct pairs { uint16 start; uint16 end; } pairs[] = { { 0x0022 /* " */, 0x0022 /* " */ }, { 0x0028 /* ( */, 0x0029 /* ) */ }, { 0x005B /* [ */, 0x005D /* ] */ } }; static int is_special P_((int x, int flags)); static int is_special(x,flags) int x; int flags; { switch (x) { case 0x0028 /* '(' */ : case 0x0029 /* ')' */ : case 0x002C /* ',' */ : case 0x003A /* ':' */ : case 0x003B /* ';' */ : case 0x005C /* '\\'*/ : case 0x0022 /* '"' */ : case 0x005B /* '[' */ : case 0x005D /* ']' */ : return x; } if (flags & TOK_mail) { switch (x) { case 0x003C /* '<' */ : case 0x003E /* '>' */ : case 0x002E /* '.' */ : case 0x0040 /* '@' */ : return x; } } if (flags & TOK_mime) { switch (x) { case 0x003D /* = */ : case 0x002F /* / */ : return x; } } return 0; } static int is_space P_((int x)); static int is_space(x) int x; { switch (x) { case 0x0020 /* SPACE */ : case 0x0009 /* HT '\t' */: case 0x000A /* LF '\n' */: case 0x000D /* CR '\r' */: /* Unicoe EOLN ??? */ return x; } return 0; } #define NUM_pair ( sizeof pairs / sizeof (pairs[0])) struct string_token * string_tokenize(line,flags) CONST struct string *line; int flags; { struct string_token * result = NULL; int result_len = 0; CONST int linelen = string_len(line); int i; DPRINT(Debug,25,(&Debug, "string_tokenize: line=%S\n", line)); for (i = 0; i < linelen; ) { CONST uint16 code = give_unicode_from_string(line,i); struct pairs * mode = NULL; struct string * token = NULL; int special = 0; int j; for (j = 0; j < NUM_pair; j++) { if (code == pairs[j].start) mode = &pairs[j]; } /* Look quoted string */ if (mode) { int start = i; int len; int depth = 1; special = mode->start; for (i++; i < linelen; i++) { CONST uint16 code = give_unicode_from_string(line,i); if (unicode_backlash == code) i++; /* skip next */ else if (mode->end == code) depth--; else if (mode->start == code) depth++; if (0 == depth) { i++; break; } } len = i - start; token = clip_from_string(line,&start,len); goto add_token; } if (is_space(code)) { int start = i; int len; special = 0x0020 /* SPACE */; for (i++; i < linelen; i++) { CONST uint16 code = give_unicode_from_string(line,i); if (!is_space(code)) break; } len = i - start; token = clip_from_string(line,&start,len); goto add_token; } special = is_special(code,flags); if (!special) { int start = i; int len; for (i++; i < linelen; i++) { CONST uint16 code = give_unicode_from_string(line,i); if (is_space(code)) break; if (is_special(code,flags)) break; } len = i - start; token = clip_from_string(line,&start,len); goto add_token; } /* Is special */ if (unicode_backlash == code) { token = clip_from_string(line,&i,2); } else token = clip_from_string(line,&i,1); add_token: DPRINT(Debug,25,(&Debug, "string_tokenize: [%d] special=%04d token=%S\n", result_len,special,token)); result = safe_realloc(result, sizeof (result[0]) * (result_len+2)); result[result_len].special = special; result[result_len].token = token; result[result_len+1].special = 0; result[result_len+1].token = NULL; result_len++; } if (!result) { result = safe_realloc(result, sizeof (result[0])); result[0].special = 0; result[0].token = NULL; } DPRINT(Debug,25,(&Debug, "string_tokenize=%p result len=%d\n", result,result_len)); return result; } void free_string_tokenized(ptr) struct string_token **ptr; { struct string_token *res = *ptr; int i; for (i = 0; res[i].token; i++) { free_string(& res[i].token); } free(res); res = NULL; *ptr = res; } /* * Local Variables: * mode:c * c-basic-offset:4 * buffer-file-coding-system: iso-8859-1 * End: */