/* * Copyright (c) 2002-2005 Sendmail, Inc. and its suppliers. * All rights reserved. * * By using this file, you agree to the terms and conditions set * forth in the LICENSE file which can be found at the top level of * the sendmail distribution. */ #include "sm/generic.h" SM_RCSID("@(#)$Id: rfc2821.c,v 1.58 2006/12/27 03:40:21 ca Exp $") #include "sm/assert.h" #include "sm/error.h" #include "sm/memops.h" #include "sm/ctype.h" #include "sm/rpool.h" #include "sm/str.h" #include "sm/rdstr.h" #include "sm/rfc2821.h" #include "sm/rfc2822.h" #include "sm/net.h" /* implement RFC 2821 address parsing. ** put this into the sm-9 docs (implementation)? question: which structure should we use to store the address? just one string or a graph? a graph makes address manipulation simpler since components can be easily replaced. a string would need to consist at least of 16bit elements to store meta data or another pointer structure would be needed to "address/identify" the various parts. -> graph seems better should we use the same tree structure as for RFC 2821 addresses? that would make it simpler to apply address rewriting etc to both types... we could even go so far as postfix and use only an RFC 2822 address parser (see libpfxsm/ *822*). however, that seems to be too permissable... it should be an option whether envelope addresses can violate RFC 2821 syntax and use 2822 syntax. hence the APIs for both should be the same -> easy to replace (just set a function pointer). Notice: 4.1.1.3 RECIPIENT (RCPT) "RCPT TO:" ("" / "" / Forward-Path) [SP Rcpt-parameters] CRLF need to allow this (and maybe some other addresses) We could use the qmail trick to count the required memory space first and then allocate everything at once, This simplifies error handling but it makes the allocated space "one chunk" that can't be free()d individually. However, if it is an rpool, that doesn't matter anyway. fixme: This code currently takes domain literals as one token. This probably needs to be changed. */ /* ** T2821_FREE -- free an sm_t2821_T token ** ** Parameters: ** rpool -- rpool that has been used ** tok -- token to free ** ** Returns: ** nothing. */ void t2821_free(sm_rpool_P rpool, sm_t2821_P tok) { if (NULL == tok) return; SM_REQUIRE(tok->sm_t2821_type != T2821_ENDTOK); if (tok->sm_t2821_val != NULL) { SM_ASSERT(tok->sm_t2821_type >= T2821_MINTOK && tok->sm_t2821_type < T2821_ENDTOK); sm_str_free(tok->sm_t2821_val); tok->sm_t2821_val = NULL; } sm_rpool_free(rpool, tok); } /* ** T2821_FREE -- free an sm_a2821_T address ** ** Parameters: ** addr -- address to free ** ** Returns: ** nothing. */ void a2821_free(sm_a2821_T *addr) { sm_t2821_P tok, nxt; if (NULL == addr) return; for (tok = A2821_FIRST(addr); tok != A2821_END(addr); tok = nxt) { nxt = T2821_NEXT(tok); t2821_free(addr->sm_a2821_rpool, tok); } /* clean out addr itself? */ A2821_INIT(addr); } /* ** ATEXT -- is a char an "atext"? (see grammar in sm/rfc2821.h) ** ** Parameters: ** ch -- character to test ** ** Returns: ** ch is atext? */ static bool atext(uchar ch) { if (ISALPHA(ch) || ISDIGIT(ch)) return true; if (strchr("!#$%&'*+-/=?^_`{|}~", ch) != NULL) return true; return false; } /* ** MUSTQUOTE -- does char require quoting? (see grammar in sm/rfc2821.h) ** ** Parameters: ** ch -- character to test ** ** Returns: ** ch requires quoting? */ static bool mustquote(uchar ch) { return (ch < 32) || (ch > 126); } #if 0 static const char t2821_delim[] = ".<>@[]%!"; static const char t2821_special[] = "|\"(),.:;<>@[]%!"; #endif /* 0 */ #if 0 static bool is2821delim(uchar ch) { if (strchr(t2821_delim, (int) ch) != NULL) return true; return false; } #endif /* 0 */ /* ** T2821_STR -- convert an addr into external format ** ** Parameters: ** addr -- address to convert ** str -- string which will contain address representation (output) ** flags -- flags to control output ** ** Returns: ** usual error code */ sm_ret_T t2821_str(sm_a2821_T *addr, sm_str_P str, uint flags) { int ch, curtype; uint j; sm_t2821_P tok; #if 0 ///* //** This part calculates the space required to store the address, //** but it isn't activated because str is "selfsizing" (within limits). //*/ // int len; // // len = 0; // prevtype = 0; // // /* calculate length */ // for (tok = A2821_FIRST(addr); tok != A2821_END(addr); // tok = T2821_NEXT(tok)) // { // curtype = tok->sm_t2821_type; // if (needspace(prevtype, curtype)) // ++len; // prevtype = curtype; // switch (curtype) // { // case T2821_COMMA: // len += 2; /* too much? */ // break; // case T2821_AT: // case T2821_DOT: // case T2821_LEFT: // case T2821_RIGHT: // case T2821_SEMI: // case T2821_COLON: // ++len; // break; // case T2821_ATOM: // case T2821_QUOTED: // case T2821_LITERAL: // case T2821_COMMENT: // if (tok->sm_t2821_type != T2821_ATOM) // len += 2; // for (j = 0; j < sm_str_getlen(tok->sm_t2821_val); ++j) // { // switch (ch = sm_str_rd_elem(tok->sm_t2821_val, j)) // { // case '"': case '[': case ']': case '(': case ')': // case '\\': case '\r': case '\n': ++len; // default: ++len; // } // } // break; // } // } // len += 2; // if (sm_str_space(str, len) == 0) // return sm_error_temp(SM_EM_ADDR, ENOMEM); #endif /* 0 */ for (tok = A2821_FIRST(addr); tok != A2821_END(addr); tok = T2821_NEXT(tok)) { curtype = tok->sm_t2821_type; switch (curtype) { case T2821_COMMA: if (sm_str_put(str, (uchar) ',')) goto error; break; case T2821_LEFT: if ((!SM_IS_FLAG(flags, T2821_FL_NOANGLE) || tok != A2821_FIRST(addr)) && sm_str_put(str, (uchar) curtype)) goto error; break; case T2821_RIGHT: if ((!SM_IS_FLAG(flags, T2821_FL_NOANGLE) || T2821_NEXT(tok) != A2821_END(addr)) && sm_str_put(str, (uchar) curtype)) goto error; break; case T2821_AT: case T2821_DOT: case T2821_SEMI: case T2821_COLON: if (sm_str_put(str, (uchar) curtype)) goto error; break; case T2821_ATOM: case T2821_QUOTED: if (T2821_QUOTED == tok->sm_t2821_type) if (sm_str_put(str, (uchar) '"')) goto error; #if 0 // if (T2821_COMMENT == tok->sm_t2821_type) // if (sm_str_put(str, (uchar) '(')) // goto error; #endif /* 0 */ for (j = 0; j < sm_str_getlen(tok->sm_t2821_val); ++j) { ch = sm_str_rd_elem(tok->sm_t2821_val, j); if (ch == '"' || (curtype != T2821_QUOTED && (mustquote(ch) || !atext(ch)) )) { if (sm_str_put(str, (uchar) '\\')) goto error; } if (sm_str_put(str, (uchar) ch)) goto error; } if (T2821_QUOTED == tok->sm_t2821_type) { if (sm_str_put(str, (uchar) '"')) goto error; } #if 0 // else if (T2821_COMMENT == tok->sm_t2821_type) // { // if (sm_str_put(str, (uchar) ')')) // goto error; // } #endif /* 0 */ break; case T2821_LITERAL: if (sm_str_put(str, (uchar) '[')) goto error; for (j = 0; j < sm_str_getlen(tok->sm_t2821_val); ++j) { ch = sm_str_rd_elem(tok->sm_t2821_val, j); if (ch == '"' || (curtype != T2821_QUOTED && (mustquote(ch) || !atext(ch)) && (curtype != T2821_LITERAL && ch != '.') )) { if (sm_str_put(str, (uchar) '\\')) goto error; } if (sm_str_put(str, (uchar) ch)) goto error; } if (sm_str_put(str, (uchar) ']')) goto error; break; #if 0 // case T2821_COMMENT: // if (sm_str_put(str, (uchar) '(')) // goto error; // for (j = 0; j < sm_str_getlen(tok->sm_t2821_val); ++j) // { // ch = sm_str_rd_elem(tok->sm_t2821_val, j); // if (ch == '"' || // (curtype != T2821_QUOTED && // (mustquote(ch) || !atext(ch)) && // (curtype != T2821_LITERAL && ch != '.') // )) // { // if (sm_str_put(str, (uchar) '\\')) // goto error; // } // if (sm_str_put(str, (uchar) ch)) // goto error; // } // if (sm_str_put(str, (uchar) ')')) // goto error; // break; #endif /* 0 */ } } return SM_SUCCESS; error: /* caller cleans up */ return sm_error_temp(SM_EM_ADDR, ENOMEM); } /* ** T2821_SCAN -- scan address from str into addr starting at off ** ** Parameters: ** str -- string to read from (readonly) ** addr -- scanned address (output) ** off -- offset in string ** ** Returns: ** >=0: offset in string for next char that doesn't belong ** to the scanned address ** <0: usual error code ** ** Note: addr MUST be initialized by caller, sm_a2821_rpool must be ** set by caller if an rpool is supposed to be used. */ sm_ret_T t2821_scan(sm_rdstr_P str, sm_a2821_T *addr, int off) { uchar ch, och; uint i, strl; int level; bool mbq, dot; sm_ret_T ret; sm_t2821_P t; sm_rpool_P rpool; /* some local macros */ #define SC_NEWTOK_T(type) \ do { \ t = (sm_t2821_P) sm_rpool_zalloc(rpool, sizeof(*t)); \ if (NULL == t) \ { \ ret = sm_error_temp(SM_EM_ADDR, ENOMEM); \ goto error; \ } \ t->sm_t2821_type = type; \ } while (0) /* XXX introduce constants for these values */ #define SC_NEWTOK_S(type) \ do { \ SC_NEWTOK_T(type); \ t->sm_t2821_val = sm_str_new(rpool, 10, 1024); \ if (NULL == t->sm_t2821_val) \ { \ ret = sm_error_temp(SM_EM_ADDR, ENOMEM); \ goto error; \ } \ } while (0) /* ** Notice: we could set mbq also to false if it contains dots between ** other characters */ #define SC_SEQ(endch /*, INCR */) \ do { \ level = 1; \ mbq = dot = false; \ och = '.'; \ while (level > 0) \ { \ if (++i >= strl) \ { \ ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_TOOLONG); \ goto error; \ } \ ch = SM_RDSTR_RD_ELEM(str, i); \ switch (ch) \ { \ /* INCR */ \ case endch: \ --level; \ break; \ case '\\': \ if (++i >= strl) \ { \ ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_TRAILBACKSL);\ goto error; \ } \ ch = SM_RDSTR_RD_ELEM(str, i); \ /* FALLTHROUGH */ \ default: \ if (!mbq && \ (mustquote(ch) || !atext(ch)) && \ (ch != '.' || och == '.')) \ mbq = true; \ if (!dot && ch == '.') \ dot = true; \ if (sm_str_put(t->sm_t2821_val, (uchar) ch)) \ { \ ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_TOOLONG);\ goto error; \ } \ } \ och = ch; \ } \ if (ch == '.' || sm_str_getlen(t->sm_t2821_val) == 0) \ mbq = true; \ } while (0) /* Must be set by caller */ rpool = addr->sm_a2821_rpool; strl = SM_RDSTR_GETLEN(str); ret = -1; for (i = off; i < strl; ++i) { ch = SM_RDSTR_RD_ELEM(str, i); /* ** Don't use switch but a different classification, e.g. strchr() ** for simpler changeability */ t = NULL; switch (ch) { case '.': case ',': case '@': case '<': case '>': case ':': case ';': SC_NEWTOK_T(ch); t->sm_t2821_val = NULL; break; /* delimiter: stop scanning */ case ' ': case '\t': case '\r': case '\n': return i; case ')': ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_CLS_PARENTH); goto error; case ']': ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_CLS_BRACKET); goto error; /* no comments in RFC 2821 addresses */ case '(': ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_PARENTHESIS); goto error; #if 0 SC_NEWTOK_S(T2821_COMMENT); SC_SEQ(')', case '(': ++level; break;); break; #endif /* 0 */ case '"': SC_NEWTOK_S(T2821_QUOTED); SC_SEQ('"' /*, none */); /* ** XXX If it has a dot but it doesn't need quoting ** then it must be split into atoms (dot-string) ** Should probably done via an unquote function. */ if (!mbq && !dot) t->sm_t2821_type = T2821_ATOM; break; case '[': { uint b; SC_NEWTOK_S(T2821_LITERAL); b = i; SC_SEQ(']' /*, none */); ret = sm_inet_a2ipv4((const char *)SM_RDSTR_DATA(str) + b, NULL, NULL); if (sm_is_err(ret)) goto error; break; } default: if (mustquote(ch)) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_MUSTQUOTE); goto error; } SC_NEWTOK_S(T2821_ATOM); do { /* RFC2821 doesn't allow this but 821 does */ if (ch == '\\') { if (++i >= strl) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_TOOLONG); goto error; } ch = SM_RDSTR_RD_ELEM(str, i); } else if (mustquote(ch)) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_MUSTQUOTE); goto error; } if (sm_str_put(t->sm_t2821_val, (uchar) ch)) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_TOOLONG); goto error; } if (++i >= strl) break; ch = SM_RDSTR_RD_ELEM(str, i); } while (atext(ch) || ch == '\\'); --i; } if (t != NULL) A2821_INSERT_TAIL(addr, t); } return i; error: /* remove unconnected token */ if (t != NULL) t2821_free(rpool, t); /* should cleanup addr, need to remember old value, free new ones */ return ret; } /* ** VALIDSUBDOMAIN -- check whether a domain component is syntactically valid ** ** Parameters: ** str -- string that stores domain ** flags -- flags to influence checks ** ** Returns: ** true iff domain is syntactically valid */ static bool validsubdomain(sm_str_P str, uint flags) { size_t j, l; uchar ch; if (NULL == str) return false; l = sm_str_getlen(str); if (0 == l) return false; ch = sm_str_rd_elem(str, 0); if (!(ISALPHA(ch) || ISDIGIT(ch))) return false; for (j = 1; j < l - 1; ++j) { ch = sm_str_rd_elem(str, j); if (!(ISALPHA(ch) || ISDIGIT(ch) || ch == '-' || (HAS_R2821(flags, R2821__) && ch == '_'))) return false; } ch = sm_str_rd_elem(str, l - 1); if (!(ISALPHA(ch) || ISDIGIT(ch))) return false; return true; } /* ** T2821_DOMAIN -- parse a domain ** ** Parameters: ** addr -- address containing domain. ** ptok -- token where domain ends (output) ** flags -- flags to use for checking domain, see sm/rfc2821.h ** ** Returns: ** usual error code */ sm_ret_T t2821_domain(sm_a2821_T *addr, sm_t2821_P *ptok, uint flags) { int n; sm_t2821_P tok, cur_tok; tok = *ptok; if (NULL == tok) return sm_error_perm(SM_EM_ADDR, R2821_ERR_DOMAIN); cur_tok = NULL; /* address literal? XXX check whether flags allow it? */ if (T2821_LITERAL == tok->sm_t2821_type) { *ptok = T2821_NEXT(tok); if (HAS_R2821(flags, R2821_FREE)) A2821_REMOVE_FREE(addr, tok); return SM_SUCCESS; } /* must start with an atom */ if (tok->sm_t2821_type != T2821_ATOM) return sm_error_perm(SM_EM_ADDR, R2821_ERR_HOST); if (HAS_R2821(flags, R2821_DOMAIN) && !validsubdomain(tok->sm_t2821_val, flags)) return sm_error_perm(SM_EM_ADDR, R2821_ERR_DOMSYNTAX); /* put a limit on the number? */ for (n = 1; ; ++n) { if (T2821_ATOM == tok->sm_t2821_type) { cur_tok = tok; tok = T2821_NEXT(tok); if (HAS_R2821(flags, R2821_FREE)) A2821_REMOVE_FREE(addr, cur_tok); } /* check for delimiter */ if (NULL == tok || T2821_RIGHT == tok->sm_t2821_type || T2821_COMMA == tok->sm_t2821_type || T2821_COLON == tok->sm_t2821_type ) { *ptok = tok; /* must have FQDN, i.e., more than 1 sub-domain? */ if (HAS_R2821(flags, R2821_FQDN) && n == 1) return sm_error_perm(SM_EM_ADDR, R2821_ERR_FQDN); return SM_SUCCESS; } /* simply ending without delimiter? */ if (T2821_ENDTOK == tok->sm_t2821_type && !HAS_R2821(flags, R2821_ANGLE)) { *ptok = tok; /* must have FQDN, i.e., more than 1 sub-domain? */ if (HAS_R2821(flags, R2821_FQDN) && n == 1) return sm_error_perm(SM_EM_ADDR, R2821_ERR_FQDN); return SM_SUCCESS; } /* no delimiter, must be dot */ if (tok->sm_t2821_type != T2821_DOT) { *ptok = tok; return sm_error_perm(SM_EM_ADDR, R2821_ERR_DEL_MISS); } cur_tok = tok; tok = T2821_NEXT(tok); if (HAS_R2821(flags, R2821_FREE)) A2821_REMOVE_FREE(addr, cur_tok); /* another sub-domain after dot */ if (NULL == tok || tok->sm_t2821_type != T2821_ATOM) { if (HAS_R2821(flags, R2821_NO_TRLDOT)) { /* no trailing dot? */ *ptok = tok; return sm_error_perm(SM_EM_ADDR, R2821_ERR_TRAILDOT); } } if (T2821_ATOM == tok->sm_t2821_type && HAS_R2821(flags, R2821_DOMAIN) && !validsubdomain(tok->sm_t2821_val, flags)) return sm_error_perm(SM_EM_ADDR, R2821_ERR_DOMSYNTAX); } } /* ** T2821_PARSE -- parse address, extract base address ** ** Parameters: ** addr -- scanned address (in/output) ** flags -- flags to use for checking, see sm/rfc2821.h ** ** Returns: ** usual error code ** ** Side Effects: ** addr might be modified, it will reflect the non-routed addr */ sm_ret_T t2821_parse(sm_a2821_T *addr, uint flags) { int has; sm_ret_T ret; sm_t2821_P tok, nxt_tok; sm_t2821_T sm_t2821_end = { T2821_ENDTOK, (sm_str_P) 0, {0, 0} }; SM_REQUIRE(addr != NULL); ret = SM_SUCCESS; /* add an "end" element to address to make checks simpler? T2821_END type? */ has = 0; A2821_INSERT_TAIL(addr, &sm_t2821_end); tok = A2821_FIRST(addr); #if 0 if (NULL == tok) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_EMPTY); goto error; } #endif /* 0 */ if (T2821_LEFT == tok->sm_t2821_type) { SET_R2821(has, R2821_ANGLE); tok = T2821_NEXT(tok); } else if (HAS_R2821(flags, R2821_ANGLE)) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_LEFT); goto error; } if (T2821_ENDTOK == tok->sm_t2821_type) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_MBOX); goto error; } /* source route address? */ if (T2821_AT == tok->sm_t2821_type) { SET_R2821(has, R2821_ROUTE); nxt_tok = T2821_NEXT(tok); A2821_REMOVE_FREE(addr, tok); flags |= R2821_FREE; for (;;) { tok = nxt_tok; ret = t2821_domain(addr, &tok, flags); if (sm_is_err(ret)) goto error; if (tok->sm_t2821_type != T2821_ENDTOK) nxt_tok = T2821_NEXT(tok); if (T2821_COLON == tok->sm_t2821_type) { A2821_REMOVE_FREE(addr, tok); break; } if (tok->sm_t2821_type != T2821_COMMA) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_COMMA); goto error; } A2821_REMOVE_FREE(addr, tok); tok = nxt_tok; if (tok->sm_t2821_type != T2821_AT) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_ATDOMAIN); goto error; } nxt_tok = T2821_NEXT(tok); A2821_REMOVE_FREE(addr, tok); } flags &= ~R2821_FREE; tok = nxt_tok; } /* Local-part = Dot-string / Quoted-string */ if (T2821_ATOM == tok->sm_t2821_type) { /* Dot-string = Atom *("." Atom) */ for (;;) { tok = T2821_NEXT(tok); if (NULL == tok || tok->sm_t2821_type != T2821_DOT) break; tok = T2821_NEXT(tok); if (tok->sm_t2821_type != T2821_ATOM) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_DOTSTR); goto error; } } } else if (T2821_QUOTED == tok->sm_t2821_type) tok = T2821_NEXT(tok); else if (T2821_RIGHT == tok->sm_t2821_type && !HAS_R2821(has, R2821_ROUTE)) { /* empty address <> allowed? */ if (!HAS_R2821(flags, R2821_EMPTY)) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_LOCAL); goto error; } goto last; } else { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_LOCAL); goto error; } /* @ */ if (T2821_AT == tok->sm_t2821_type) { SET_R2821(has, R2821_AT); tok = T2821_NEXT(tok); } else if (HAS_R2821(flags, R2821_AT)) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_AT); goto error; } else goto last; /* check for domain */ ret = t2821_domain(addr, &tok, flags); if (sm_is_err(ret) && HAS_R2821(flags, R2821_DOMAIN)) goto error; last: if (tok->sm_t2821_type != T2821_RIGHT && (HAS_R2821(flags, R2821_ANGLE) || HAS_R2821(has, R2821_ANGLE))) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_RIGHT); goto error; } if (tok->sm_t2821_type != T2821_ENDTOK) tok = T2821_NEXT(tok); if (tok->sm_t2821_type != T2821_ENDTOK) { ret = sm_error_perm(SM_EM_ADDR, R2821_ERR_MORE); goto error; } #if 0 /* requirements fullfilled? */ if (has != set) return sm_error_perm(SM_EM_ADDR, 1); /* XXX real error ... */ #endif error: /* remove endtoken */ tok = A2821_LAST(addr); SM_ASSERT(tok != NULL); SM_ASSERT(tok == &sm_t2821_end); A2821_REMOVE(addr, tok); return ret; }