static char rcsid[] = "@(#)$Id: stringtok.c,v 1.7 2006/04/09 07:37:08 hurtta Exp $";
/******************************************************************************
* The Elm (ME+) Mail System - $Revision: 1.7 $ $State: Exp $
*
* Author: Kari Hurtta <hurtta+elm@posti.FMI.FI> (was hurtta+elm@ozone.FMI.FI)
*****************************************************************************/
#include "headers.h"
/*
string_tokenize()
should do for 'struct string' same than
rfc822_tokenize()
does for 'char *' (getaddr.c)
however do not produce exactly same result
*/
DEBUG_VAR(Debug,__FILE__,"addr");
#define unicode_backlash 0x005C /* '\\' */
static struct pairs {
uint16 start;
uint16 end;
} pairs[] = {
{ 0x0022 /* " */, 0x0022 /* " */ },
{ 0x0028 /* ( */, 0x0029 /* ) */ },
{ 0x005B /* [ */, 0x005D /* ] */ }
};
static int is_special P_((int x, int flags));
static int is_special(x,flags)
int x;
int flags;
{
switch (x) {
case 0x0028 /* '(' */ :
case 0x0029 /* ')' */ :
case 0x002C /* ',' */ :
case 0x003A /* ':' */ :
case 0x003B /* ';' */ :
case 0x005C /* '\\'*/ :
case 0x0022 /* '"' */ :
case 0x005B /* '[' */ :
case 0x005D /* ']' */ :
return x;
}
if (flags & TOK_mail) {
switch (x) {
case 0x003C /* '<' */ :
case 0x003E /* '>' */ :
case 0x002E /* '.' */ :
case 0x0040 /* '@' */ :
return x;
}
}
if (flags & TOK_mime) {
switch (x) {
case 0x003D /* = */ :
case 0x002F /* / */ :
return x;
}
}
return 0;
}
static int is_space P_((int x));
static int is_space(x)
int x;
{
switch (x) {
case 0x0020 /* SPACE */ :
case 0x0009 /* HT '\t' */:
case 0x000A /* LF '\n' */:
case 0x000D /* CR '\r' */:
/* Unicoe EOLN ??? */
return x;
}
return 0;
}
#define NUM_pair ( sizeof pairs / sizeof (pairs[0]))
struct string_token * string_tokenize(line,flags)
CONST struct string *line;
int flags;
{
struct string_token * result = NULL;
int result_len = 0;
CONST int linelen = string_len(line);
int i;
DPRINT(Debug,25,(&Debug,
"string_tokenize: line=%S\n",
line));
for (i = 0; i < linelen; ) {
CONST uint16 code = give_unicode_from_string(line,i);
struct pairs * mode = NULL;
struct string * token = NULL;
int special = 0;
int j;
for (j = 0; j < NUM_pair; j++) {
if (code == pairs[j].start)
mode = &pairs[j];
}
/* Look quoted string */
if (mode) {
int start = i;
int len;
int depth = 1;
special = mode->start;
for (i++; i < linelen; i++) {
CONST uint16 code = give_unicode_from_string(line,i);
if (unicode_backlash == code)
i++; /* skip next */
else if (mode->end == code)
depth--;
else if (mode->start == code)
depth++;
if (0 == depth) {
i++;
break;
}
}
len = i - start;
token = clip_from_string(line,&start,len);
goto add_token;
}
if (is_space(code)) {
int start = i;
int len;
special = 0x0020 /* SPACE */;
for (i++; i < linelen; i++) {
CONST uint16 code = give_unicode_from_string(line,i);
if (!is_space(code))
break;
}
len = i - start;
token = clip_from_string(line,&start,len);
goto add_token;
}
special = is_special(code,flags);
if (!special) {
int start = i;
int len;
for (i++; i < linelen; i++) {
CONST uint16 code = give_unicode_from_string(line,i);
if (is_space(code))
break;
if (is_special(code,flags))
break;
}
len = i - start;
token = clip_from_string(line,&start,len);
goto add_token;
}
/* Is special */
if (unicode_backlash == code) {
token = clip_from_string(line,&i,2);
} else
token = clip_from_string(line,&i,1);
add_token:
DPRINT(Debug,25,(&Debug,
"string_tokenize: [%d] special=%04d token=%S\n",
result_len,special,token));
result = safe_realloc(result, sizeof (result[0]) * (result_len+2));
result[result_len].special = special;
result[result_len].token = token;
result[result_len+1].special = 0;
result[result_len+1].token = NULL;
result_len++;
}
if (!result) {
result = safe_realloc(result, sizeof (result[0]));
result[0].special = 0;
result[0].token = NULL;
}
DPRINT(Debug,25,(&Debug,
"string_tokenize=%p result len=%d\n",
result,result_len));
return result;
}
void free_string_tokenized(ptr)
struct string_token **ptr;
{
struct string_token *res = *ptr;
int i;
for (i = 0; res[i].token; i++) {
free_string(& res[i].token);
}
free(res);
res = NULL;
*ptr = res;
}
/*
* Local Variables:
* mode:c
* c-basic-offset:4
* buffer-file-coding-system: iso-8859-1
* End:
*/
syntax highlighted by Code2HTML, v. 0.9.1