ports//mail/elm+ME/work/elm2.4.ME+.124/lib/stringtok.c

static char rcsid[] = "@(#)$Id: stringtok.c,v 1.7 2006/04/09 07:37:08 hurtta Exp $";

/******************************************************************************
 *  The Elm (ME+) Mail System  -  $Revision: 1.7 $   $State: Exp $
 *
 *  Author: Kari Hurtta <hurtta+elm@posti.FMI.FI> (was hurtta+elm@ozone.FMI.FI)
 *****************************************************************************/

#include "headers.h"

/* 
   string_tokenize()
        should do for 'struct string' same than
   rfc822_tokenize()
        does for 'char *' (getaddr.c)

   however do not produce exactly same result
*/

DEBUG_VAR(Debug,__FILE__,"addr");

#define  unicode_backlash    0x005C  /* '\\' */

static struct pairs {
    uint16 start;
    uint16 end;
} pairs[] = {
  { 0x0022  /* " */,  0x0022  /* " */ },
  { 0x0028  /* ( */,  0x0029  /* ) */ },
  { 0x005B  /* [ */,  0x005D  /* ] */ }
};

static int is_special P_((int x, int flags));
static int is_special(x,flags)
     int x;
     int flags;
{
    switch (x) {    
    case 0x0028    /* '(' */ :
    case 0x0029    /* ')' */ : 
    case 0x002C    /* ',' */ : 
    case 0x003A    /* ':' */ :
    case 0x003B    /* ';' */ : 
    case 0x005C    /* '\\'*/ : 
    case 0x0022    /* '"' */ : 
    case 0x005B    /* '[' */ :
    case 0x005D    /* ']' */ :

	return x;
    }

    if (flags & TOK_mail) {
	switch (x) {    
	case 0x003C    /* '<' */ : 
	case 0x003E    /* '>' */ :
	case 0x002E    /* '.' */ : 
	case 0x0040    /* '@' */ : 

	    return x;
	}
    }


    if (flags & TOK_mime) {
	switch (x) {    
	case 0x003D /* = */  :
	case 0x002F /* / */  :
	    
	    return x;
	}	
    }
    
    return 0;
}

static int is_space P_((int x));
static int is_space(x)
     int x;
{
    switch (x) {    
    case 0x0020    /* SPACE */   :
    case 0x0009    /* HT  '\t' */:
    case 0x000A    /* LF  '\n' */:
    case 0x000D    /* CR  '\r' */:
	/* Unicoe EOLN ??? */
	return x;	
    }
    return 0;
}

#define NUM_pair ( sizeof pairs / sizeof (pairs[0]))


struct string_token * string_tokenize(line,flags)
     CONST struct string *line;
     int flags;
{
    struct string_token * result = NULL;
    int    result_len = 0;

    CONST int linelen = string_len(line);

    int i;

    DPRINT(Debug,25,(&Debug, 
		     "string_tokenize: line=%S\n",
		     line));

    for (i = 0; i < linelen; ) {

	CONST uint16 code = give_unicode_from_string(line,i);

	struct pairs   * mode  = NULL;
	struct string  * token = NULL;
	int              special = 0;
	int j;

	for (j = 0; j < NUM_pair; j++) {
	    if (code == pairs[j].start)
		mode = &pairs[j];
	}

	/* Look quoted string */
	if (mode) {
	    int start = i;
	    int len;
	    int depth = 1;

	    special = mode->start;

	    for (i++; i < linelen; i++) {
		CONST uint16 code = give_unicode_from_string(line,i);

		if (unicode_backlash == code)
		    i++;  /* skip next */
		else if (mode->end == code)
		    depth--;
		else if (mode->start == code)
		    depth++;
		if (0 == depth) {
		    i++;
		    break;
		}
	    }
	    len = i - start;

	    token = clip_from_string(line,&start,len);

	    goto add_token;
	}

	if (is_space(code)) {
	    int start = i;
	    int len;
	    special = 0x0020 /* SPACE  */;

	    for (i++; i < linelen; i++) {
		CONST uint16 code = give_unicode_from_string(line,i);

		if (!is_space(code))
		    break;
	    }
	    len = i - start;
	    
	    token = clip_from_string(line,&start,len);
	    
	    goto add_token;       
	}

	special = is_special(code,flags);
	if (!special) {
	    int start = i;
	    int len;

	    for (i++; i < linelen; i++) {
		CONST uint16 code = give_unicode_from_string(line,i);

		if (is_space(code))
		    break;
		if (is_special(code,flags))
		    break;
	    }
	    len = i - start;

	    token = clip_from_string(line,&start,len);

	    goto add_token;
	}
	         
	/* Is special */
	if (unicode_backlash == code) {
	    token = clip_from_string(line,&i,2);
	} else
	    token = clip_from_string(line,&i,1);

    add_token:

	DPRINT(Debug,25,(&Debug, 
			 "string_tokenize: [%d] special=%04d token=%S\n",
			 result_len,special,token));

	result = safe_realloc(result, sizeof (result[0]) * (result_len+2));
	result[result_len].special = special;
	result[result_len].token   = token;
	result[result_len+1].special = 0;
	result[result_len+1].token   = NULL;
	result_len++;
    }

    if (!result) {
	result = safe_realloc(result, sizeof (result[0]));
	result[0].special = 0;
	result[0].token   = NULL;
    }

    DPRINT(Debug,25,(&Debug, 
		     "string_tokenize=%p  result len=%d\n",
		     result,result_len));

    return result;
}

void free_string_tokenized(ptr)
     struct string_token **ptr;
{
    struct string_token *res = *ptr;
    int i;

    for (i = 0; res[i].token; i++) {
	free_string(& res[i].token);
    }

    free(res);
    res = NULL;

    *ptr = res;
}

/*
 * Local Variables:
 *  mode:c
 *  c-basic-offset:4
 *  buffer-file-coding-system: iso-8859-1
 * End:
 */
syntax highlighted by Code2HTML, v. 0.9.1