/* * Copyright (c) 2002-2005 Sendmail, Inc. and its suppliers. * All rights reserved. * * By using this file, you agree to the terms and conditions set * forth in the LICENSE file which can be found at the top level of * the sendmail distribution. */ #include "sm/generic.h" SM_RCSID("@(#)$Id: rfc2822.c,v 1.26 2006/12/27 03:41:09 ca Exp $") #include "sm/assert.h" #include "sm/error.h" #include "sm/rfc2821.h" #include "sm/rfc2822.h" #include "sm/heap.h" /* implement RFC 2822 address parsing. more or less... */ static int t2822_ready(sm_2822_a_T *x, uint n) { uint i; if (x->sm_2822a_t) { i = x->sm_2822a_a; if (n > i) { x->sm_2822a_a = 30 + n + (n >> 3); x->sm_2822a_t = (sm_2822_P) sm_realloc(x->sm_2822a_t, x->sm_2822a_a * sizeof(sm_2822_T)); if (x->sm_2822a_t != NULL) return 1; x->sm_2822a_a = i; return 0; } return 1; } x->sm_2822a_len = 0; x->sm_2822a_a = n; x->sm_2822a_t = (sm_2822_P) sm_malloc(n * sizeof(sm_2822_T)); return x->sm_2822a_t != NULL; } static int t2822_readyplus(sm_2822_a_T *x, uint n) { uint i; if (x->sm_2822a_t) { i = x->sm_2822a_a; n += x->sm_2822a_len; if (n > i) { x->sm_2822a_a = 30 + n + (n >> 3); x->sm_2822a_t = (sm_2822_P) sm_realloc(x->sm_2822a_t, x->sm_2822a_a * sizeof(sm_2822_T)); if (x->sm_2822a_t != NULL) return 1; x->sm_2822a_a = i; return 0; } return 1; } x->sm_2822a_len = 0; x->sm_2822a_a = n; x->sm_2822a_t = (sm_2822_P) sm_malloc(n * sizeof(sm_2822_T)); return x->sm_2822a_t != NULL; } static int t2822_append(sm_2822_a_T *x, sm_2822_P i) { if (!t2822_readyplus(x, 1)) return 0; x->sm_2822a_t[x->sm_2822a_len++] = *i; return 1; } static sm_2822_T comma = { T2822_COMMA, NULL, NULL, NULL }; static void t2822_reverse(sm_2822_a_P ta) { int i, n; sm_2822_T temp; n = ta->sm_2822a_len - 1; for (i = 0; i + i < n; ++i) { temp = ta->sm_2822a_t[i]; ta->sm_2822a_t[i] = ta->sm_2822a_t[n - i]; ta->sm_2822a_t[n - i] = temp; } } static bool needspace(int t1, int t2) { if (t1 == 0) return false; if (t1 == T2822_COLON || t1 == T2822_COMMA || t2 == T2822_LEFT) return true; /* fixme: put a macro in .h?? */ if (t1 >= T2822_ATOM && t2 >= T2822_ATOM) return true; return false; } static const char delim[] = " \t\r\n([\"<>;:@,."; #if 0 static const char t2822_opchar[] = "|\"(),.:;<>@[]%!"; #endif /* 0 */ int atomtype(uchar ch) { if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\')) return T2822_QUOTED; if (strchr(delim, (int) ch) != NULL) return -1; return T2822_ATOM; } int t2822_unparse(sm_str_P sa, sm_2822_a_P ta) { uint i, j; int ch, prevtype, curtype; sm_2822_P t; #if 0 int len; len = 0; prevtype = 0; /* calculate length */ for (i = 0; i < ta->sm_2822a_len; ++i) { t = ta->sm_2822a_t + i; curtype = t->sm_2822_type; if (needspace(prevtype, curtype)) ++len; prevtype = curtype; switch(curtype) { case T2822_COMMA: len += 3; /* too much? */ break; case T2822_AT: case T2822_DOT: case T2822_LEFT: case T2822_RIGHT: case T2822_SEMI: case T2822_COLON: ++len; break; case T2822_ATOM: case T2822_QUOTED: case T2822_LITERAL: case T2822_COMMENT: if (t->sm_2822_type != T2822_ATOM) len += 2; for (j = 0; j < sm_str_getlen(t->sm_2822_val); ++j) { switch(ch = sm_str_rd_elem(t->sm_2822_val, j)) { case '"': case '[': case ']': case '(': case ')': case '\\': case '\r': case '\n': ++len; default: ++len; } } break; } } len += 2; if (sm_is_err(sm_str_space(sa, len))) return -1; #endif /* 0 */ prevtype = 0; for (i = 0; i < ta->sm_2822a_len; ++i) { t = ta->sm_2822a_t + i; curtype = t->sm_2822_type; if (needspace(prevtype, curtype)) { if (sm_str_put(sa, (uchar) ' ')) goto error; } prevtype = curtype; switch(curtype) { case T2822_COMMA: if (sm_str_put(sa, (uchar) ',')) goto error; #if 0 NSUW #endif /* 0 */ break; case T2822_AT: case T2822_DOT: case T2822_LEFT: case T2822_RIGHT: case T2822_SEMI: case T2822_COLON: if (sm_str_put(sa, (uchar) curtype)) goto error; break; case T2822_ATOM: case T2822_QUOTED: case T2822_LITERAL: case T2822_COMMENT: if (t->sm_2822_type == T2822_QUOTED) if (sm_str_put(sa, (uchar) '"')) goto error; if (t->sm_2822_type == T2822_LITERAL) if (sm_str_put(sa, (uchar) '[')) goto error; if (t->sm_2822_type == T2822_COMMENT) if (sm_str_put(sa, (uchar) '(')) goto error; for (j = 0; j < sm_str_getlen(t->sm_2822_val); ++j) { switch(ch = sm_str_rd_elem(t->sm_2822_val, j)) { case '"': case '[': case ']': case '(': case ')': case '\\': case '\r': case '\n': if (sm_str_put(sa, (uchar) '\\')) goto error; /* FALLTHROUGH */ default: if (sm_str_put(sa, (uchar) ch)) goto error; } } if (t->sm_2822_type == T2822_QUOTED) { if (sm_str_put(sa, (uchar) '"')) goto error; } else if (t->sm_2822_type == T2822_LITERAL) { if (sm_str_put(sa, (uchar) ']')) goto error; } else if (t->sm_2822_type == T2822_COMMENT) { if (sm_str_put(sa, (uchar) ')')) goto error; } break; } } #if 0 NSUW --s; #endif /* 0 */ return 1; error: /* caller cleans up */ return -1; } int t2822_unquote(sm_str_P sa, sm_2822_a_P ta) { uint i, j; sm_2822_P t; #if 0 int len; uchar *s; #endif /* 0 */ #if 0 /* calculate length */ len = 0; for (i = 0; i < ta->sm_2822a_len; ++i) { t = ta->sm_2822a_t + i; switch(t->sm_2822_type) { case T2822_COMMA: case T2822_AT: case T2822_DOT: case T2822_LEFT: case T2822_RIGHT: case T2822_SEMI: case T2822_COLON: ++len; break; case T2822_LITERAL: len += 2; case T2822_ATOM: case T2822_QUOTED: len += sm_str_getlen(t->sm_2822_val); } } /* allocate string */ if (sm_is_err(sm_str_space(sa, len))) return -1; s = sm_str_data(sa); #endif /* 0 */ /* fill in */ for (i = 0; i < ta->sm_2822a_len; ++i) { t = ta->sm_2822a_t + i; switch(t->sm_2822_type) { case T2822_COMMA: case T2822_AT: case T2822_DOT: case T2822_LEFT: case T2822_RIGHT: case T2822_SEMI: case T2822_COLON: if (sm_str_put(sa, (uchar) (t->sm_2822_type))) goto error; break; case T2822_ATOM: case T2822_QUOTED: case T2822_LITERAL: if (t->sm_2822_type == T2822_LITERAL) { if (sm_str_put(sa, (uchar) '[')) goto error; } for (j = 0; j < sm_str_getlen(t->sm_2822_val); ++j) { if (sm_str_put(sa, sm_str_rd_elem(t->sm_2822_val, j))) goto error; } if (t->sm_2822_type == T2822_LITERAL) { if (sm_str_put(sa, (uchar) ']')) goto error; } break; case T2822_COMMENT: break; } } #if 0 sa->sm_str_len = s - sm_str_data(sa); #endif /* 0 */ return 1; error: return -1; } sm_ret_T t2822_parse(sm_2822_a_T *ta, sm_str_P sa) { int i, salen, level, r; int numtoks, numchars; sm_2822_P t; #if 0 uchar *cbuf; #endif /* 0 */ salen = sm_str_getlen(sa); /*ca pass 1: figure out how many tokens (numtoks) and huch much space (numchars) it takes to store this address, also do syntax check. */ numchars = 0; numtoks = 0; for (i = 0; i < salen; ++i) { switch(sm_str_rd_elem(sa, i)) { case '.': case ',': case '@': case '<': case '>': case ':': case ';': ++numtoks; break; case ' ': case '\t': case '\r': case '\n': break; case ')': case ']': return 0; /* other control chars and non-ASCII chars are also bad, in theory */ case '(': level = 1; while (level) { if (++i >= salen) return 0; switch(sm_str_rd_elem(sa, i)) { case '(': ++level; break; case ')': --level; break; case '\\': if (++i >= salen) return 0; /* FALLTHROUGH */ default: ++numchars; } } ++numtoks; break; /*ca same code as above, but without nesting */ case '"': level = 1; while (level) { if (++i >= salen) return 0; switch(sm_str_rd_elem(sa, i)) { case '"': --level; break; case '\\': if (++i >= salen) return 0; /* FALLTHROUGH */ default: ++numchars; } } ++numtoks; break; /*ca same code as above; [...] is taken as one token, not broken apart */ case '[': level = 1; while (level) { if (++i >= salen) return 0; switch(sm_str_rd_elem(sa, i)) { case ']': --level; break; case '\\': if (++i >= salen) return 0; /* FALLTHROUGH */ default: ++numchars; } } ++numtoks; break; default: do { if (sm_str_rd_elem(sa, i) == '\\') if (++i >= salen) break; ++numchars; if (++i >= salen) break; } while (atomtype(sm_str_rd_elem(sa, i)) > 0); --i; ++numtoks; } } /*ca allocate space for tokens and chars */ if (!t2822_ready(ta, numtoks)) return -1; #if 0 buf = (uchar *) sm_malloc(sizeof(*buf) * numchars); if (buf == NULL) return -1; /*ca note: cbuf is used to store the strings, i.e., no further allocs are required below. nice trick. */ cbuf = buf; #endif /* 0 */ ta->sm_2822a_len = numtoks; t = ta->sm_2822a_t; for (i = 0; i < salen; ++i) { switch(sm_str_rd_elem(sa, i)) { case '.': case ',': case '@': case '<': case '>': case ':': case ';': t->sm_2822_type = sm_str_rd_elem(sa, i); t->sm_2822_val = NULL; ++t; break; /* error */ case ' ': case '\t': case '\r': case '\n': break; case ')': case ']': goto error; case '(': t->sm_2822_type = T2822_COMMENT; t->sm_2822_val = sm_str_new(NULL, 10, 1024); if (t->sm_2822_val == NULL) goto error; level = 1; while (level) { ++i; /* assert: < salen */ switch(sm_str_rd_elem(sa, i)) { case '(': ++level; break; case ')': --level; break; case '\\': ++i; /* assert: < salen */ /* FALLTHROUGH */ default: if (sm_str_put(t->sm_2822_val, sm_str_rd_elem(sa, i))) goto error; } } ++t; break; case '"': t->sm_2822_type = T2822_QUOTED; t->sm_2822_val = sm_str_new(NULL, 10, 1024); if (t->sm_2822_val == NULL) goto error; level = 1; while (level) { ++i; /* assert: < salen */ switch(sm_str_rd_elem(sa, i)) { case '"': --level; break; case '\\': ++i; /* assert: < salen */ /* FALLTHROUGH */ default: if (sm_str_put(t->sm_2822_val, sm_str_rd_elem(sa, i))) goto error; } } ++t; break; case '[': t->sm_2822_type = T2822_LITERAL; t->sm_2822_val = sm_str_new(NULL, 10, 1024); if (t->sm_2822_val == NULL) goto error; level = 1; while (level) { ++i; /* assert: < salen */ switch(sm_str_rd_elem(sa, i)) { case ']': --level; break; case '\\': ++i; /* assert: < salen */ /* FALLTHROUGH */ default: if (sm_str_put(t->sm_2822_val, sm_str_rd_elem(sa, i))) goto error; } } ++t; break; default: t->sm_2822_type = T2822_ATOM; t->sm_2822_val = sm_str_new(NULL, 10, 1024); if (t->sm_2822_val == NULL) goto error; do { if (sm_str_rd_elem(sa, i) == '\\') if (++i >= salen) break; if (sm_str_put(t->sm_2822_val, sm_str_rd_elem(sa, i))) goto error; if (++i >= salen) break; r = atomtype(sm_str_rd_elem(sa, i)); if (r == T2822_QUOTED) t->sm_2822_type = r; } while (r > 0); --i; ++t; } } return 1; error: /* should cleanup ta, need to remember old value, free new ones */ return -1; } static int gotaddr(sm_2822_a_P taout, sm_2822_a_P taaddr) { uint i; if (!t2822_readyplus(taout, taaddr->sm_2822a_len)) return 0; for (i = 0; i < taaddr->sm_2822a_len; ++i) taout->sm_2822a_t[taout->sm_2822a_len++] = taaddr->sm_2822a_t[i]; taaddr->sm_2822a_len = 0; return 1; } int t2822_addrlist(sm_2822_a_P taout, sm_2822_a_P taaddr, sm_2822_a_P ta) { bool ingroup, wordok; sm_2822_P t; sm_2822_P beginning; taout->sm_2822a_len = 0; taaddr->sm_2822a_len = 0; if (!t2822_readyplus(taout, 1)) return -1; if (!t2822_readyplus(taaddr, 1)) return -1; ingroup = false; wordok = true; beginning = ta->sm_2822a_t + 2; t = ta->sm_2822a_t + ta->sm_2822a_len - 1; /* rfc 822 address lists are easy to parse from right to left */ #define FLUSH if (taaddr->sm_2822a_len) if (!gotaddr(taout, taaddr)) return -1; #define FLUSHCOMMA if (taaddr->sm_2822a_len) { \ if (!gotaddr(taout, taaddr)) return -1; \ if (!t2822_append(taout, &comma)) return -1; } #define ADDRLEFT if (!t2822_append(taaddr, t--)) return -1; #define OUTLEFT if (!t2822_append(taout, t--)) return -1; while (t >= beginning) { switch(t->sm_2822_type) { case T2822_SEMI: FLUSHCOMMA if (ingroup) return 0; ingroup = true; wordok = true; break; case T2822_COLON: FLUSH if (!ingroup) return 0; ingroup = false; while ((t >= beginning) && (t->sm_2822_type != T2822_COMMA)) OUTLEFT if (t >= beginning) OUTLEFT wordok = true; continue; case T2822_RIGHT: FLUSHCOMMA OUTLEFT while ((t >= beginning) && (t->sm_2822_type != T2822_LEFT)) ADDRLEFT /* important to use address here even if it's empty: <> */ if (!gotaddr(taout, taaddr)) return -1; if (t < beginning) return 0; OUTLEFT while ((t >= beginning) && ((t->sm_2822_type == T2822_COMMENT) || (t->sm_2822_type == T2822_ATOM) || (t->sm_2822_type == T2822_QUOTED) || (t->sm_2822_type == T2822_AT) || (t->sm_2822_type == T2822_DOT))) OUTLEFT wordok = false; continue; case T2822_ATOM: case T2822_QUOTED: case T2822_LITERAL: if (!wordok) FLUSHCOMMA wordok = false; ADDRLEFT continue; case T2822_COMMENT: /* comment is lexically a space; shouldn't affect wordok */ break; case T2822_COMMA: FLUSH wordok = true; break; default: wordok = true; ADDRLEFT continue; } OUTLEFT } FLUSH ++t; while (t > ta->sm_2822a_t) { if (!t2822_append(taout, --t)) return -1; } t2822_reverse(taout); return 1; }