// nullmailer -- a simple relay-only MTA
// Copyright (C) 1999-2003 Bruce Guenter <bruce@untroubled.org>
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// You can contact me at <bruce@untroubled.org>. There is also a mailing list
// available to discuss this package. To subscribe, send an email to
// <nullmailer-subscribe@lists.untroubled.org>.
#include "config.h"
#include <ctype.h>
#include "canonicalize.h"
#include "mystring/mystring.h"
#include "list.h"
#define LSQBRACKET '['
#define RSQBRACKET ']'
#define QUOTE '"'
#define CR '\n'
#define LPAREN '('
#define RPAREN ')'
enum node_type {
EMPTY = 0,
// Full tokens, with string content:
ATOM = 'A',
QUOTED_STRING = 'Q',
DOMAIN_LITERAL = 'D',
COMMENT = 'C',
// Special characters, no content:
LABRACKET = '<',
RABRACKET = '>',
AT = '@',
COMMA = ',',
SEMICOLON = ';',
COLON = ':',
ESCAPE = '\\',
PERIOD = '.',
// End of tokens
EOT = '$',
};
struct token
{
const node_type type;
const mystring str;
token(node_type);
token(node_type, mystring);
};
token::token(node_type t)
: type(t)
{
}
token::token(node_type t, mystring s)
: type(t), str(s)
{
}
struct anode : public token
{
anode* next;
anode(node_type);
anode(node_type, const char*, const char*);
anode(node_type, mystring);
};
anode::anode(node_type t)
: token(t), next(0)
{
}
anode::anode(node_type t, const char* start, const char* end)
: token(t, mystring(start, end-start)), next(0)
{
}
anode::anode(node_type t, mystring s)
: token(t, s), next(0)
{
}
struct result
{
anode* next;
bool good;
mystring str;
mystring comment;
mystring addr;
result();
result(const result&);
result(anode*);
result(anode*, const mystring&, const mystring&, const mystring&);
bool operator!() const
{
return !good;
}
operator bool() const
{
return good;
}
};
result::result()
: next(0), good(0)
{
}
result::result(anode* n)
: next(n), good(1)
{
}
result::result(anode* n, const mystring& s,
const mystring& c, const mystring& l)
: next(n), good(1), str(s), comment(c), addr(l)
{
}
result::result(const result& r)
: next(r.next), good(r.good), str(r.str), comment(r.comment), addr(r.addr)
{
}
#ifndef TRACE
#define ENTER(R)
#define FAIL(MSG) return result()
#define RETURNR(R) return R
#define RETURN(N,S,C,L) return result(N,S,C,L)
#else
#include "fdbuf/fdbuf.h"
static const char indentstr[] = " ";
static const char* indent = indentstr + sizeof indentstr - 1;
#define ENTER(R) do{ fout << indent-- << __FUNCTION__ << ": " << node->str << ": " << R << endl; }while(0)
#define FAIL(MSG) do{ fout << ++indent << __FUNCTION__ << ": failed: " << MSG << endl; return result(); }while(0)
#define RETURNR(R) do{ fout << ++indent << __FUNCTION__ << ": succeded str=" << R.str << " comment=" << R.comment << " addr=" << R.addr << endl; return (R); }while(0)
#define RETURN(N,S,C,L) do{ result r(N,S,C,L); RETURNR(r); }while(0)
#endif
#define RULE(X) static result match_##X(anode* node)
#define MATCHTOKEN(X) do{ if(node->type != X) FAIL("node is not type " #X); else node = node->next; }while(0)
#define MATCHRULE(V,R) result V = match_##R(node); if(!V) FAIL("did not match " #R);
#define OR_RULE(ALT1,ALT2) { result r=match_##ALT1(node); if(r) RETURNR(r); }{ result r=match_##ALT2(node); if(r) RETURNR(r); } FAIL("did not match " #ALT1 " OR " #ALT2);
static bool issymbol(char c)
{
switch(c) {
case LPAREN: case RPAREN:
case LABRACKET: case RABRACKET:
case LSQBRACKET: case RSQBRACKET:
case AT: case COMMA:
case SEMICOLON: case COLON:
case ESCAPE: case QUOTE: case PERIOD:
return true;
default:
return false;
}
}
static bool isctl(char c)
{
return (c >= 0 && c <= 31) || (c == 127);
}
static bool isqtext(char c)
{
return c && c != QUOTE && c != ESCAPE && c != CR;
}
static bool isdtext(char c)
{
return c && c != LSQBRACKET && c != RSQBRACKET &&
c != ESCAPE && c != CR;
}
// quoted-pair = ESCAPE CHAR
static bool isqpair(const char* ptr)
{
return *ptr && *ptr == ESCAPE &&
*(ptr+1);
}
static bool isatom(char ch)
{
return !(isspace(ch) || issymbol(ch) || isctl(ch));
}
static anode* tokenize_atom(const char* &ptr)
{
if(!isatom(*ptr)) return 0;
const char* start = ptr;
do {
++ptr;
} while(isatom(*ptr));
return new anode(ATOM, start, ptr);
}
static anode* tokenize_comment(const char* &ptr)
{
if(*ptr != LPAREN) return 0;
unsigned count = 0;
const char* start = ptr;
char ch = *ptr;
while(ch) {
if(isqpair(ptr))
++ptr;
else if(ch == LPAREN)
++count;
else if(ch == RPAREN) {
--count;
if(!count)
return new anode(COMMENT, start, ++ptr);
}
else if(ch == CR)
return 0; // ERROR
++ptr;
ch = *ptr;
}
return 0; // ERROR
}
static anode* tokenize_domain_literal(const char* &ptr)
{
if(*ptr != LSQBRACKET) return 0;
const char* start = ptr;
++ptr;
while(isspace(*ptr)) ++ptr;
for(; *ptr; ++ptr) {
if(isdtext(*ptr))
continue;
else if(isqpair(ptr))
++ptr;
else
break;
}
while(isspace(*ptr)) ++ptr;
if(*ptr != RSQBRACKET)
return 0; // ERROR
return new anode(DOMAIN_LITERAL, start, ptr);
}
static anode* tokenize_quoted_string(const char* &ptr)
{
if(*ptr != QUOTE) return 0;
const char* start = ptr;
for(++ptr; *ptr; ++ptr) {
if(isqtext(*ptr))
continue;
else if(isqpair(ptr))
++ptr;
else
break;
}
if(*ptr != QUOTE) return 0;
++ptr;
return new anode(QUOTED_STRING, start, ptr);
}
static anode* tokenize(const char* &ptr)
{
while(isspace(*ptr)) ++ptr;
char ch = *ptr;
switch(ch) {
case 0:
return new anode(EOT);
case LABRACKET:
case RABRACKET:
case AT:
case COMMA:
case SEMICOLON:
case COLON:
case ESCAPE:
case PERIOD:
++ptr;
return new anode((node_type)ch);
case LPAREN:
return tokenize_comment(ptr);
case LSQBRACKET:
return tokenize_domain_literal(ptr);
case QUOTE:
return tokenize_quoted_string(ptr);
default:
return tokenize_atom(ptr);
}
}
anode* tokenize(const mystring str)
{
anode* head = new anode(EMPTY);
anode* tail = head;
anode* tmp;
const char* ptr = str.c_str();
while((tmp = tokenize(ptr)) != 0) {
tail = tail->next = tmp;
if(tmp->type == EOT) {
tail = head->next;
delete head;
return tail;
}
}
return 0;
}
static mystring quote(const mystring& in)
{
unsigned length = in.length();
// The result will never be more than double the length of the input plus 2
char out[length*2 + 2 + 1];
char* ptrout = out;
const char* ptrin = in.c_str();
bool quoted = false;
for(; length; ++ptrin, ++ptrout, --length) {
if(issymbol(*ptrin)) {
*ptrout++ = ESCAPE;
quoted = true;
}
*ptrout = *ptrin;
}
*ptrout = 0;
if(quoted)
return mystringjoin("\"") + out + "\"";
else
return in;
}
static mystring unquote(const mystring& in)
{
unsigned length = in.length();
// The result will never be more than the length of the input
char out[length+1];
bool modified = false;
const char* ptrin = in.c_str();
char* ptrout = out;
if(in[0] == QUOTE && in[length-1] == QUOTE) {
length -= 2;
ptrin++;
modified = true;
}
for(; length; ++ptrin, ++ptrout, --length) {
if(isqpair(ptrin)) {
++ptrin;
--length;
modified = true;
}
*ptrout = *ptrin;
}
*ptrout = 0;
if(modified)
return out;
else
return in;
}
anode* skipcomment(anode* node, mystring& comment)
{
while(node->type == COMMENT) {
comment = comment + " " + node->str;
node = node->next;
}
return node;
}
RULE(sub_domain)
{
// Note atom <= domain-ref
ENTER("atom / domain-literal");
mystring comment;
node = skipcomment(node, comment);
if(node->type == ATOM || node->type == DOMAIN_LITERAL)
RETURN(node->next, node->str, comment, node->str);
FAIL("did not match ATOM or DOMAIN-LITERAL");
}
RULE(domain)
{
ENTER("sub-domain *(PERIOD sub-domain) [PERIOD]");
MATCHRULE(r, sub_domain);
if(!r) FAIL("did not match sub-domain");
mystring comment;
for(;;) {
node = r.next = skipcomment(r.next, comment);
if(node->type != PERIOD)
break;
r.str += PERIOD;
r.addr += PERIOD;
node = node->next;
result r1 = match_sub_domain(node);
if(r1) {
r.next = r1.next;
r.str += r1.str;
comment += r1.comment;
r.addr += r1.addr;
}
else {
r.next = node;
node = r.next = skipcomment(r.next, comment);
}
}
r.comment += comment;
RETURNR(r);
}
RULE(route)
{
ENTER("1#(AT domain) COLON");
unsigned count=0;
mystring str;
mystring comment;
for(;;) {
if(node->type != AT) break;
node = node->next;
MATCHRULE(r, domain);
str += "@";
str += r.str;
comment += r.comment;
++count;
node = r.next;
}
if(count == 0)
FAIL("matched no domains");
node = skipcomment(node, comment);
MATCHTOKEN(COLON);
RETURN(node, str, comment, "");
}
RULE(word)
{
ENTER("atom / quoted-string");
mystring comment;
node = skipcomment(node, comment);
if(node->type == ATOM)
RETURN(node->next, node->str, comment, node->str);
else if(node->type == QUOTED_STRING) {
mystring addr = unquote(node->str);
RETURN(node->next, quote(addr), comment, addr);
}
FAIL("did not match ATOM or QUOTED-STRING");
}
RULE(local_part)
{
ENTER("word *(PERIOD word)");
MATCHRULE(r, word);
for(;;) {
node = r.next = skipcomment(r.next, r.comment);
if(node->type != PERIOD)
break;
node = node->next;
result r1 = match_word(node);
if(!r1)
break;
r.next = r1.next;
r.str += ".";
r.str += r1.str;
r.comment += r1.comment;
r.addr += ".";
r.addr += r1.addr;
}
RETURNR(r);
}
RULE(addr_spec)
{
ENTER("local-part *( AT domain )");
MATCHRULE(r, local_part);
mystring domain;
for(;;) {
node = r.next = skipcomment(r.next, r.comment);
if(node->type != AT)
break;
node = node->next;
result r2 = match_domain(node);
if(!r2) break;
if(!!domain) {
r.str += "@";
r.str += domain;
r.addr += "@";
r.addr += domain;
}
domain = r2.addr;
r.comment += r2.comment;
r.next = r2.next;
}
canonicalize(domain);
RETURN(r.next, r.str + "@" + domain, r.comment,
r.addr + "@" + domain + "\n");
}
RULE(route_addr)
{
ENTER("LABRACKET [route] addr-spec RABRACKET");
mystring comment;
node = skipcomment(node, comment);
MATCHTOKEN(LABRACKET);
result r1 = match_route(node);
if(r1) node = r1.next;
comment += r1.comment;
MATCHRULE(r2, addr_spec);
node = r2.next;
comment += r2.comment;
node = skipcomment(node, comment);
MATCHTOKEN(RABRACKET);
RETURN(node, "<" + r2.str + ">" + comment, "", r2.addr);
}
RULE(phrase)
{
ENTER("word *word");
MATCHRULE(r1, word);
for(;;) {
result r2 = match_word(r1.next);
if(!r2)
break;
r1.str += " ";
r1.str += r2.str;
r1.comment += r2.comment;
r1.next = r2.next;
}
RETURNR(r1);
}
RULE(route_spec)
{
ENTER("[phrase] route-addr");
result r1 = match_phrase(node);
if(r1)
node = r1.next;
MATCHRULE(r2, route_addr);
if(!r1)
RETURNR(r2);
r2.str = r1.str + r1.comment + " " + r2.str + r2.comment;
RETURNR(r2);
}
RULE(mailbox)
{
ENTER("route-spec / addr-spec");
OR_RULE(route_spec, addr_spec);
}
RULE(mailboxes)
{
ENTER("mailbox *(*(COMMA) mailbox)");
MATCHRULE(r1, mailbox);
r1.str += r1.comment;
r1.comment = "";
for(;;) {
node = r1.next;
for(;;) {
node = skipcomment(node, r1.str);
if(node->type == COMMA) node = node->next;
else break;
}
if(node->type == EOT)
break;
result r2 = match_mailbox(node);
if(!r2) break;
r1.next = r2.next;
r1.str = r1.str + ", " + r2.str + r2.comment;
r1.addr += r2.addr;
}
node = skipcomment(node, r1.str);
r1.next = node;
RETURNR(r1);
}
RULE(group)
{
ENTER("phrase COLON [#mailboxes] SEMICOLON");
MATCHRULE(r1, phrase);
node = r1.next;
MATCHTOKEN(COLON);
result r2 = match_mailboxes(node);
if(r2) node = r2.next;
mystring comment;
node = skipcomment(node, comment);
MATCHTOKEN(SEMICOLON);
RETURN(node, r1.str + ": " + r2.str + r2.comment + comment + ";",
"", r2.addr);
}
RULE(address)
{
ENTER("group / mailbox");
OR_RULE(group, mailbox);
}
RULE(addresses)
{
ENTER("[address *(*(COMMA) address)] EOF");
// Special-case handling for empty address lists
if(node->type == EOT) RETURN(0, "", "", "");
if(node->type == COMMENT && node->next->type == EOT)
RETURN(0, node->str, "", "");
MATCHRULE(r1, address);
r1.str += r1.comment;
r1.comment = "";
for(;;) {
node = r1.next;
for(;;) {
node = skipcomment(node, r1.str);
if(node->type == COMMA) node = node->next;
else break;
}
if(node->type == EOT)
break;
result r2 = match_address(node);
if(!r2) break;
r1.next = r2.next;
r1.str = r1.str + ", " + r2.str + r2.comment;
r1.addr += r2.addr;
}
node = skipcomment(node, r1.str);
if(node->next) FAIL("Rule ended before EOF");
RETURNR(r1);
}
static void del_tokens(anode* node)
{
while(node) {
anode* tmp = node->next;
delete node;
node = tmp;
}
}
bool parse_addresses(mystring& line, mystring& list)
{
anode* tokenlist = tokenize(line.c_str());
if(!tokenlist)
return false;
result r = match_addresses(tokenlist);
del_tokens(tokenlist);
if(r) {
line = r.str;
list = r.addr;
return true;
}
else
return false;
}
syntax highlighted by Code2HTML, v. 0.9.1