/*
* Copyright (c) 2002-2005 Sendmail, Inc. and its suppliers.
* All rights reserved.
*
* By using this file, you agree to the terms and conditions set
* forth in the LICENSE file which can be found at the top level of
* the sendmail distribution.
*/
#include "sm/generic.h"
SM_RCSID("@(#)$Id: rfc2822.c,v 1.26 2006/12/27 03:41:09 ca Exp $")
#include "sm/assert.h"
#include "sm/error.h"
#include "sm/rfc2821.h"
#include "sm/rfc2822.h"
#include "sm/heap.h"
/*
implement RFC 2822 address parsing.
more or less...
*/
static int
t2822_ready(sm_2822_a_T *x, uint n)
{
uint i;
if (x->sm_2822a_t)
{
i = x->sm_2822a_a;
if (n > i)
{
x->sm_2822a_a = 30 + n + (n >> 3);
x->sm_2822a_t = (sm_2822_P) sm_realloc(x->sm_2822a_t,
x->sm_2822a_a * sizeof(sm_2822_T));
if (x->sm_2822a_t != NULL)
return 1;
x->sm_2822a_a = i;
return 0;
}
return 1;
}
x->sm_2822a_len = 0;
x->sm_2822a_a = n;
x->sm_2822a_t = (sm_2822_P) sm_malloc(n * sizeof(sm_2822_T));
return x->sm_2822a_t != NULL;
}
static int
t2822_readyplus(sm_2822_a_T *x, uint n)
{
uint i;
if (x->sm_2822a_t)
{
i = x->sm_2822a_a;
n += x->sm_2822a_len;
if (n > i)
{
x->sm_2822a_a = 30 + n + (n >> 3);
x->sm_2822a_t = (sm_2822_P) sm_realloc(x->sm_2822a_t,
x->sm_2822a_a * sizeof(sm_2822_T));
if (x->sm_2822a_t != NULL)
return 1;
x->sm_2822a_a = i;
return 0;
}
return 1;
}
x->sm_2822a_len = 0;
x->sm_2822a_a = n;
x->sm_2822a_t = (sm_2822_P) sm_malloc(n * sizeof(sm_2822_T));
return x->sm_2822a_t != NULL;
}
static int
t2822_append(sm_2822_a_T *x, sm_2822_P i)
{
if (!t2822_readyplus(x, 1))
return 0;
x->sm_2822a_t[x->sm_2822a_len++] = *i;
return 1;
}
static sm_2822_T comma = { T2822_COMMA, NULL, NULL, NULL };
static void
t2822_reverse(sm_2822_a_P ta)
{
int i, n;
sm_2822_T temp;
n = ta->sm_2822a_len - 1;
for (i = 0; i + i < n; ++i)
{
temp = ta->sm_2822a_t[i];
ta->sm_2822a_t[i] = ta->sm_2822a_t[n - i];
ta->sm_2822a_t[n - i] = temp;
}
}
static bool
needspace(int t1, int t2)
{
if (t1 == 0)
return false;
if (t1 == T2822_COLON ||
t1 == T2822_COMMA ||
t2 == T2822_LEFT)
return true;
/* fixme: put a macro in .h?? */
if (t1 >= T2822_ATOM &&
t2 >= T2822_ATOM)
return true;
return false;
}
static const char delim[] = " \t\r\n([\"<>;:@,.";
#if 0
static const char t2822_opchar[] = "|\"(),.:;<>@[]%!";
#endif /* 0 */
int
atomtype(uchar ch)
{
if ((ch < 32) || (ch > 126) ||
(ch == ')') || (ch == ']') || (ch == '\\'))
return T2822_QUOTED;
if (strchr(delim, (int) ch) != NULL)
return -1;
return T2822_ATOM;
}
int
t2822_unparse(sm_str_P sa, sm_2822_a_P ta)
{
uint i, j;
int ch, prevtype, curtype;
sm_2822_P t;
#if 0
int len;
len = 0;
prevtype = 0;
/* calculate length */
for (i = 0; i < ta->sm_2822a_len; ++i)
{
t = ta->sm_2822a_t + i;
curtype = t->sm_2822_type;
if (needspace(prevtype, curtype))
++len;
prevtype = curtype;
switch(curtype)
{
case T2822_COMMA:
len += 3; /* too much? */
break;
case T2822_AT:
case T2822_DOT:
case T2822_LEFT:
case T2822_RIGHT:
case T2822_SEMI:
case T2822_COLON:
++len;
break;
case T2822_ATOM:
case T2822_QUOTED:
case T2822_LITERAL:
case T2822_COMMENT:
if (t->sm_2822_type != T2822_ATOM)
len += 2;
for (j = 0; j < sm_str_getlen(t->sm_2822_val); ++j)
{
switch(ch = sm_str_rd_elem(t->sm_2822_val, j))
{
case '"': case '[': case ']': case '(': case ')':
case '\\': case '\r': case '\n': ++len;
default: ++len;
}
}
break;
}
}
len += 2;
if (sm_is_err(sm_str_space(sa, len)))
return -1;
#endif /* 0 */
prevtype = 0;
for (i = 0; i < ta->sm_2822a_len; ++i)
{
t = ta->sm_2822a_t + i;
curtype = t->sm_2822_type;
if (needspace(prevtype, curtype))
{
if (sm_str_put(sa, (uchar) ' '))
goto error;
}
prevtype = curtype;
switch(curtype)
{
case T2822_COMMA:
if (sm_str_put(sa, (uchar) ','))
goto error;
#if 0
NSUW
#endif /* 0 */
break;
case T2822_AT:
case T2822_DOT:
case T2822_LEFT:
case T2822_RIGHT:
case T2822_SEMI:
case T2822_COLON:
if (sm_str_put(sa, (uchar) curtype))
goto error;
break;
case T2822_ATOM:
case T2822_QUOTED:
case T2822_LITERAL:
case T2822_COMMENT:
if (t->sm_2822_type == T2822_QUOTED)
if (sm_str_put(sa, (uchar) '"'))
goto error;
if (t->sm_2822_type == T2822_LITERAL)
if (sm_str_put(sa, (uchar) '['))
goto error;
if (t->sm_2822_type == T2822_COMMENT)
if (sm_str_put(sa, (uchar) '('))
goto error;
for (j = 0; j < sm_str_getlen(t->sm_2822_val); ++j)
{
switch(ch = sm_str_rd_elem(t->sm_2822_val, j))
{
case '"':
case '[':
case ']':
case '(':
case ')':
case '\\':
case '\r':
case '\n':
if (sm_str_put(sa, (uchar) '\\'))
goto error;
/* FALLTHROUGH */
default:
if (sm_str_put(sa, (uchar) ch))
goto error;
}
}
if (t->sm_2822_type == T2822_QUOTED)
{
if (sm_str_put(sa, (uchar) '"'))
goto error;
}
else if (t->sm_2822_type == T2822_LITERAL)
{
if (sm_str_put(sa, (uchar) ']'))
goto error;
}
else if (t->sm_2822_type == T2822_COMMENT)
{
if (sm_str_put(sa, (uchar) ')'))
goto error;
}
break;
}
}
#if 0
NSUW
--s;
#endif /* 0 */
return 1;
error:
/* caller cleans up */
return -1;
}
int t2822_unquote(sm_str_P sa, sm_2822_a_P ta)
{
uint i, j;
sm_2822_P t;
#if 0
int len;
uchar *s;
#endif /* 0 */
#if 0
/* calculate length */
len = 0;
for (i = 0; i < ta->sm_2822a_len; ++i)
{
t = ta->sm_2822a_t + i;
switch(t->sm_2822_type)
{
case T2822_COMMA:
case T2822_AT:
case T2822_DOT:
case T2822_LEFT:
case T2822_RIGHT:
case T2822_SEMI:
case T2822_COLON:
++len;
break;
case T2822_LITERAL:
len += 2;
case T2822_ATOM:
case T2822_QUOTED:
len += sm_str_getlen(t->sm_2822_val);
}
}
/* allocate string */
if (sm_is_err(sm_str_space(sa, len)))
return -1;
s = sm_str_data(sa);
#endif /* 0 */
/* fill in */
for (i = 0; i < ta->sm_2822a_len; ++i)
{
t = ta->sm_2822a_t + i;
switch(t->sm_2822_type)
{
case T2822_COMMA:
case T2822_AT:
case T2822_DOT:
case T2822_LEFT:
case T2822_RIGHT:
case T2822_SEMI:
case T2822_COLON:
if (sm_str_put(sa, (uchar) (t->sm_2822_type)))
goto error;
break;
case T2822_ATOM:
case T2822_QUOTED:
case T2822_LITERAL:
if (t->sm_2822_type == T2822_LITERAL)
{
if (sm_str_put(sa, (uchar) '['))
goto error;
}
for (j = 0; j < sm_str_getlen(t->sm_2822_val); ++j)
{
if (sm_str_put(sa,
sm_str_rd_elem(t->sm_2822_val,
j)))
goto error;
}
if (t->sm_2822_type == T2822_LITERAL)
{
if (sm_str_put(sa, (uchar) ']'))
goto error;
}
break;
case T2822_COMMENT:
break;
}
}
#if 0
sa->sm_str_len = s - sm_str_data(sa);
#endif /* 0 */
return 1;
error:
return -1;
}
sm_ret_T
t2822_parse(sm_2822_a_T *ta, sm_str_P sa)
{
int i, salen, level, r;
int numtoks, numchars;
sm_2822_P t;
#if 0
uchar *cbuf;
#endif /* 0 */
salen = sm_str_getlen(sa);
/*ca
pass 1: figure out how many tokens (numtoks) and huch much space (numchars)
it takes to store this address, also do syntax check.
*/
numchars = 0;
numtoks = 0;
for (i = 0; i < salen; ++i)
{
switch(sm_str_rd_elem(sa, i))
{
case '.': case ',': case '@': case '<': case '>': case ':': case ';':
++numtoks; break;
case ' ': case '\t': case '\r': case '\n': break;
case ')': case ']': return 0;
/* other control chars and non-ASCII chars are also bad, in theory */
case '(':
level = 1;
while (level)
{
if (++i >= salen) return 0;
switch(sm_str_rd_elem(sa, i))
{
case '(': ++level; break;
case ')': --level; break;
case '\\': if (++i >= salen) return 0;
/* FALLTHROUGH */
default: ++numchars;
}
}
++numtoks;
break;
/*ca same code as above, but without nesting */
case '"':
level = 1;
while (level)
{
if (++i >= salen) return 0;
switch(sm_str_rd_elem(sa, i))
{
case '"': --level; break;
case '\\': if (++i >= salen) return 0;
/* FALLTHROUGH */
default: ++numchars;
}
}
++numtoks;
break;
/*ca same code as above; [...] is taken as one token, not broken apart */
case '[':
level = 1;
while (level)
{
if (++i >= salen) return 0;
switch(sm_str_rd_elem(sa, i))
{
case ']': --level; break;
case '\\': if (++i >= salen) return 0;
/* FALLTHROUGH */
default: ++numchars;
}
}
++numtoks;
break;
default:
do
{
if (sm_str_rd_elem(sa, i) == '\\')
if (++i >= salen)
break;
++numchars;
if (++i >= salen)
break;
} while (atomtype(sm_str_rd_elem(sa, i)) > 0);
--i;
++numtoks;
}
}
/*ca allocate space for tokens and chars */
if (!t2822_ready(ta, numtoks))
return -1;
#if 0
buf = (uchar *) sm_malloc(sizeof(*buf) * numchars);
if (buf == NULL)
return -1;
/*ca
note: cbuf is used to store the strings, i.e., no further allocs
are required below. nice trick.
*/
cbuf = buf;
#endif /* 0 */
ta->sm_2822a_len = numtoks;
t = ta->sm_2822a_t;
for (i = 0; i < salen; ++i)
{
switch(sm_str_rd_elem(sa, i))
{
case '.':
case ',':
case '@':
case '<':
case '>':
case ':':
case ';':
t->sm_2822_type = sm_str_rd_elem(sa, i);
t->sm_2822_val = NULL;
++t;
break;
/* error */
case ' ': case '\t': case '\r': case '\n': break;
case ')':
case ']':
goto error;
case '(':
t->sm_2822_type = T2822_COMMENT;
t->sm_2822_val = sm_str_new(NULL, 10, 1024);
if (t->sm_2822_val == NULL)
goto error;
level = 1;
while (level)
{
++i; /* assert: < salen */
switch(sm_str_rd_elem(sa, i))
{
case '(': ++level; break;
case ')': --level; break;
case '\\': ++i; /* assert: < salen */
/* FALLTHROUGH */
default:
if (sm_str_put(t->sm_2822_val,
sm_str_rd_elem(sa, i)))
goto error;
}
}
++t;
break;
case '"':
t->sm_2822_type = T2822_QUOTED;
t->sm_2822_val = sm_str_new(NULL, 10, 1024);
if (t->sm_2822_val == NULL)
goto error;
level = 1;
while (level)
{
++i; /* assert: < salen */
switch(sm_str_rd_elem(sa, i))
{
case '"': --level; break;
case '\\': ++i; /* assert: < salen */
/* FALLTHROUGH */
default:
if (sm_str_put(t->sm_2822_val,
sm_str_rd_elem(sa, i)))
goto error;
}
}
++t;
break;
case '[':
t->sm_2822_type = T2822_LITERAL;
t->sm_2822_val = sm_str_new(NULL, 10, 1024);
if (t->sm_2822_val == NULL)
goto error;
level = 1;
while (level)
{
++i; /* assert: < salen */
switch(sm_str_rd_elem(sa, i))
{
case ']': --level; break;
case '\\': ++i; /* assert: < salen */
/* FALLTHROUGH */
default:
if (sm_str_put(t->sm_2822_val,
sm_str_rd_elem(sa, i)))
goto error;
}
}
++t;
break;
default:
t->sm_2822_type = T2822_ATOM;
t->sm_2822_val = sm_str_new(NULL, 10, 1024);
if (t->sm_2822_val == NULL)
goto error;
do
{
if (sm_str_rd_elem(sa, i) == '\\')
if (++i >= salen)
break;
if (sm_str_put(t->sm_2822_val,
sm_str_rd_elem(sa, i)))
goto error;
if (++i >= salen)
break;
r = atomtype(sm_str_rd_elem(sa, i));
if (r == T2822_QUOTED)
t->sm_2822_type = r;
} while (r > 0);
--i;
++t;
}
}
return 1;
error:
/* should cleanup ta, need to remember old value, free new ones */
return -1;
}
static int
gotaddr(sm_2822_a_P taout, sm_2822_a_P taaddr)
{
uint i;
if (!t2822_readyplus(taout, taaddr->sm_2822a_len))
return 0;
for (i = 0; i < taaddr->sm_2822a_len; ++i)
taout->sm_2822a_t[taout->sm_2822a_len++] =
taaddr->sm_2822a_t[i];
taaddr->sm_2822a_len = 0;
return 1;
}
int
t2822_addrlist(sm_2822_a_P taout, sm_2822_a_P taaddr, sm_2822_a_P ta)
{
bool ingroup, wordok;
sm_2822_P t;
sm_2822_P beginning;
taout->sm_2822a_len = 0;
taaddr->sm_2822a_len = 0;
if (!t2822_readyplus(taout, 1))
return -1;
if (!t2822_readyplus(taaddr, 1))
return -1;
ingroup = false;
wordok = true;
beginning = ta->sm_2822a_t + 2;
t = ta->sm_2822a_t + ta->sm_2822a_len - 1;
/* rfc 822 address lists are easy to parse from right to left */
#define FLUSH if (taaddr->sm_2822a_len) if (!gotaddr(taout, taaddr)) return -1;
#define FLUSHCOMMA if (taaddr->sm_2822a_len) { \
if (!gotaddr(taout, taaddr)) return -1; \
if (!t2822_append(taout, &comma)) return -1; }
#define ADDRLEFT if (!t2822_append(taaddr, t--)) return -1;
#define OUTLEFT if (!t2822_append(taout, t--)) return -1;
while (t >= beginning)
{
switch(t->sm_2822_type)
{
case T2822_SEMI:
FLUSHCOMMA
if (ingroup)
return 0;
ingroup = true;
wordok = true;
break;
case T2822_COLON:
FLUSH
if (!ingroup)
return 0;
ingroup = false;
while ((t >= beginning) && (t->sm_2822_type != T2822_COMMA))
OUTLEFT
if (t >= beginning)
OUTLEFT
wordok = true;
continue;
case T2822_RIGHT:
FLUSHCOMMA
OUTLEFT
while ((t >= beginning) && (t->sm_2822_type != T2822_LEFT))
ADDRLEFT
/* important to use address here even if it's empty: <> */
if (!gotaddr(taout, taaddr))
return -1;
if (t < beginning)
return 0;
OUTLEFT
while ((t >= beginning) && ((t->sm_2822_type == T2822_COMMENT) || (t->sm_2822_type == T2822_ATOM) || (t->sm_2822_type == T2822_QUOTED) || (t->sm_2822_type == T2822_AT) || (t->sm_2822_type == T2822_DOT)))
OUTLEFT
wordok = false;
continue;
case T2822_ATOM: case T2822_QUOTED: case T2822_LITERAL:
if (!wordok)
FLUSHCOMMA
wordok = false;
ADDRLEFT
continue;
case T2822_COMMENT:
/* comment is lexically a space; shouldn't affect wordok */
break;
case T2822_COMMA:
FLUSH
wordok = true;
break;
default:
wordok = true;
ADDRLEFT
continue;
}
OUTLEFT
}
FLUSH
++t;
while (t > ta->sm_2822a_t)
{
if (!t2822_append(taout, --t))
return -1;
}
t2822_reverse(taout);
return 1;
}
syntax highlighted by Code2HTML, v. 0.9.1