/* * Copyright 1990 by Rayan S. Zachariassen, all rights reserved. * This will be free software, but only when it is finished. * * Fixes done by Matti Aarnio , and at least * Zack at . */ #include "hostenv.h" #include "mailer.h" #include "libz.h" /* Start of the scanner */ /* scanner tables: definition of character classes according to RFC822 */ #define _h 01 /* header field */ #define _w 02 /* linear white-space character (space / htab) */ #define _d 04 /* digit */ #define _c 010 /* control */ #define _a 020 /* alphabetic */ #define _l 040 /* line feed */ #define _r 0100 /* carriage return */ #define _s 0200 /* specials */ #define _8 0400 /* 8th bit on -- illegal on Headers! */ /* ISO Latin 1 (8859) */ #if defined(__alpha)||defined(__alpha__) /* On Alpha ``short'' is slow to access! (this array is modified!) */ int #else /* All other systems are assumed to contain short-load/store instructions */ short #endif rfc_ctype[256] = { /* octalcode */ _c, _c, _c, _c, _c, _c, _c, _c, /* 0 - 7 */ _c, _c|_w, _l|_c, _c, _c, _r|_c, _c, _c, /* 10 - 17 */ _c, _c, _c, _c, _c, _c, _c, _c, /* 20 - 27 */ _c, _c, _c, _c, _c, _c, _c, _c, /* 30 - 37 */ _w, _h, _s|_h, _h, _h, _h, _h, _h, /* 40 - 47 */ _s|_h, _s|_h, _h, _h, _s|_h, _h, _s|_h, _h, /* 50 - 57 */ _d|_h, _d|_h, _d|_h, _d|_h, _d|_h, _d|_h, _d|_h, _d|_h, /* '0' - '7' */ _d|_h, _d|_h, _s, _s|_h, _s|_h, _h, _s|_h, _h, /* '8' - 77 */ _s|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, /* '@' - 'G' */ _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, /* 'H' - 'O' */ _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, /* 'P' - 'X' */ _a|_h, _a|_h, _a|_h, _s|_h, _s|_h, _s|_h, _h, _h, /* 'Y' - '_' */ _h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, /* '`' - 'g' */ _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, /* 'h' - 'o' */ _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, _a|_h, /* 'p' - 'x' */ _a|_h, _a|_h, _a|_h, _h, _h, _h, _h, _c, /* 'y' - 177 */ /* The class assignments of the second half are all ILLEGAL */ _8, _8, _8, _8, _8, _8, _8, _8, /* 200 - 207 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 210 - 217 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 220 - 227 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 230 - 237 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 240 - 247 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 250 - 257 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 260 - 267 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 270 - 277 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 300 - 307 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 310 - 317 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 320 - 327 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 330 - 337 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 340 - 347 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 350 - 357 */ _8, _8, _8, _8, _8, _8, _8, _8, /* 360 - 367 */ _8, _8, _8, _8, _8, _8, _8, _8 /* 370 - 377 */ }; /* * Support function for Router's $(condquote ...) a.k.a $(dequote ...) * function. */ int rfc822_mustquote(s, spc) register const char *s; const int spc; { int inquote = 0, mustquote = 0, hasquotes = 0; int c; int spckosher = 0; if ((!(rfc_ctype[spc & 0xFF] & (_w|_c|_8|_s))) || spc == '.') spckosher = 1; for (; *s; ++s) { c = *(const unsigned char *)s; if (c == '"') { hasquotes = 1; inquote = !inquote; continue; } if (c == '\\') { /* special, thus must be checked before set lookups below */ /* This is part of a quoted pair, if there is next char, pick it unchanged */ const char *s2 = s+1; if (*s2) s = s2; continue; } if (c == '@' && !inquote) /* This special outside a quote is ok */ continue; if (c == ':') { /* Special, but "HOST::USER"@gwhost syntax is even more special ... */ if (s[1] == ':' && inquote) mustquote = 1; continue; } if (c == '.' || c == ',' || c == '[' || c == ']') /* specials, thus must be checked before set lookups below */ continue; if (c == ' ' && spckosher) /* We are asked to consider replacing SPACEs with a new char, if the result is kosher, no need to quote after it */ continue; if ((rfc_ctype[c] & (_w|_c|_8|_s)) || (c == '|')) { mustquote = 1; } } return hasquotes | (inquote ? 2 : 0) | (mustquote ? 4 : 0); } /* * Tell whether we are looking at a new header line, a continuation line, * or if we are done with the header. Return the number of characters in * the name of the header, or 0 if a continuation, or < 0 if end of header. * If non-0, the cardinality of the number returned is the length of the * header field name. */ #if 1 /* NOTE: The 'octo' variable is from long ago, dead code.. */ int hdr_status(cp, lbuf, n, octo) register const char *cp, *lbuf; int n, octo; { if (*cp == ' ' || *cp == '\t') { while ((cp < lbuf + n) && (rfc_ctype[(*cp) & 0xFF] & (_w|_l))) ++cp; #if 0 /* [mea] Lets consider all-white-space line a header continuation... We process per RFC 822 rules, not BITNET 80char fixed width.. */ if (cp == lbuf + n) /* a line containing only whitespace is EOH */ return -1; #endif /* a continuation line (folded header) */ return 0; } while ((cp < lbuf + n) && (rfc_ctype[(*cp) & 0xFF] & _h)) ++cp; if ((cp < lbuf + n) && (*cp == ':') /*&& (cp > cpin)*/) /* header line */ return cp - lbuf; /* if we get to here, we have a malformed header line */ /* if (*cp == ':' && cp == cpin) return -1; */ return lbuf - cp; } #else /* DEAD CODE: From time when router did magic things to implement a semi-optimized alias database compilation... */ int hdr_status(cp, lbuf, n, octo) register const char *cp, *lbuf; int n, octo; { if (*cp == ' ' || *cp == '\t') { while ((cp < lbuf + n) && (rfc_ctype[(*cp) & 0xFF] & (_w|_l))) ++cp; if (cp == lbuf + n) /* a line containing only whitespace is EOH */ return -1; /* a continuation line (folded header) */ return 0; } if (!octo) { while ((cp < lbuf + n) && (rfc_ctype[(*cp) & 0xFF] & _h)) ++cp; if ((cp < lbuf + n) && (*cp == ':') /*&& (cp > cpin)*/) /* header line */ return cp - lbuf; /* if we get to here, we have a malformed header line */ /* if (*cp == ':' && cp == cpin) return -1; */ return lbuf - cp; } else { /* complex calling relations -- we are parsing alias database, and we want to have spaces allowed in the left-hand side.. */ char quote = 0; while (cp < lbuf + n) { char c = *cp; if (c == '\\') { ++cp; if (cp >= (lbuf + n)) break; } if (c == quote) quote = 0; else if (c == '"') quote = '"'; else if (!quote && !(rfc_ctype[c & 0xFF] & _h)) break; ++cp; } if (cp < lbuf + n && *cp == ':') return cp - lbuf; return lbuf - cp; } } #endif /* ... dead code */ #if 0 #define MKERROR(msg,prevp) tn = makeToken((msg), strlen(msg)); \ tn->t_type = Error; \ tn->t_next = *(prevp); \ *(prevp) = tn; #else static void MKERROR(msg, prevp) const char *msg; token822 **prevp; { token822 *tn = makeToken((msg), strlen(msg)); tn->t_type = Error; tn->t_next = *(prevp); *(prevp) = tn; } #endif /* * Recognize a compound token, or rather, a token which is defined by * matching start and end delimiters. A comment or quoted string is * the typical example. Comments may be recursive. */ static u_long _hdr_compound __((const char *cp, int *np, int cstart, int cend, const char **cpp, TokenType type, token822 *tp, token822 **tlist, token822 **tlistp)); static u_long _hdr_compound(cp, np, cstart, cend, cpp, type, tp, tlist, tlistp) register const char *cp; int *np; int cstart, cend; const char **cpp; TokenType type; token822 *tp, **tlist, **tlistp; { int nest = 1; int len = 1; int n = *np; if (*cp != cstart) abort(); /* Sanity check! Call fault! */ ++cp, --n; nextline: for (; n > 0; ++cp, --n, ++len) { if (*cp == cend) { if (--nest <= 0) { break; } } else if (*cp == cstart) { if (type == Comment) ++nest; else { MKERROR("illegal char in compound", tlist); } } else if (*cp == '\\') { if (n == 1) { MKERROR("missing character after backslash", tlist); /* Continue with next line, if existing! */ n = 0; break; } ++cp; --n; ++len; } else if (*cp == '\r') { /* type = Error; */ MKERROR("illegal CR in token", tlist); } } /* we either found cend, or ran off the end, either may be within a recursion */ if (n == 0) { /* we ran off the end */ char msgbuf[50]; if (tlistp != NULL && *tlistp != NULL && (*tlistp)->t_next != NULL) { /* compound token is continued on next line */ *tlistp = (*tlistp)->t_next; n = TOKENLEN(*tlistp); cp = (*tlistp)->t_pname; ++len; goto nextline; } /* type=Error; */ /* hey, no reason to refuse a message*/ sprintf(msgbuf, "missing closing '%c' in token", cend); MKERROR(msgbuf, tlist); tp->t_pname = NULL; /* ugly way of signalling scanner */ } else if (*cp == cend) { /* we found matching terminator */ ++len; --n; /* move past terminator */ } else { /* there was an error */ abort() ; /* ??? some sort of sanity check ? */ } tp->t_type = type; tp->t_len = len; *np = n; *cpp = (char*)cp; return len; } /* Unfold (see RFC822) the contents of a compound token */ static const char * _unfold __((TokenType, int, const char *, const char **, token822*)); static const char * _unfold(type, len, start, cpp, t) TokenType type; int len; /* Total length to unfold */ const char *start; const char **cpp; token822 *t; { char *s, *cp; const char *cpe = *cpp; /* Start and End may be at different tmalloc()ed objects! */ s = cp = (char *)tmalloc(len +1); while (len > 0 && start != cpe) { if (*start != 0) { if (*start == '\n') { ++start; --len; continue; } --len; *s++ = *start++; } else { t = t->t_next; start = t->t_pname; #if 0 /* zero: unfold.. */ *s++ = '\n'; #else /* Skip all folding white-space */ while (len > 0 && start != cpe && (*start == ' ' || *start == '\t' || *start == '\n' || *start == '\r')) ++start, --len; /* And replace it with *one* space */ *s++ = ' '; #endif --len; } } *cpp = start +1; if (type == Comment) { /* Strip trailing spaces at comments */ while (s > cp && s[-1] == ' ') --s; } *s = '\0'; return cp; } /* * The Scanner. * * cpp - pointer to pointer to string. * n - number of characters left in string. * c1, c2 - if non-NUL, these characters should be considered Special. * * The scanner will return a token list corresponding to the n next characters * in the string. Originally only a single token was returned per call, but * for efficiency this was changed to avoid function call overhead. The tokens * returned are classified by type (TokenType enum class). */ token822 * scan822(cpp, nn, c1, c2, allowcomments, tlistp) const char **cpp; /* pointer to pointer to text */ size_t nn; /* number of characters to scan */ int c1, c2; /* temporary specials */ int allowcomments; /* #prefix tokens are comments to EOT */ token822 **tlistp; /* continuation line tokens if any */ { register const char *cp; static token822 t; token822 *tlist, *tp, *tn, *ot; char msgbuf[50]; short ct, sc1, sc2; int n = (int) nn; if (n == 0) return NULL; sc1 = sc2 = '\0'; if (c1 != '\0') { sc1 = rfc_ctype[c1 & 0xFF]; rfc_ctype[c1] |= _s; } if (c2 != '\0') { sc2 = rfc_ctype[c2 & 0xFF]; rfc_ctype[c2 & 0xFF] |= _s; } tlist = NULL; do { cp = *cpp; ct = rfc_ctype[(*cp) & 0xFF]; t.t_len = n; t.t_pname = cp; if (ct & _w) { /* LWSP without the CR LF part */ while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _w)) continue; t.t_type = Space; } else if (ct & _r) { /* >= 1 CR followed by LFs is a fold */ while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _r)) continue; if (n == 0 || !(rfc_ctype[(*cp) & 0xFF] & _l)) { strcpy(msgbuf, "CR without LF (newline)"); MKERROR(msgbuf, &tlist); } else if (n > 1 && (rfc_ctype[(*cp) & 0xFF] & _l)) { while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _l)) continue; strcpy(msgbuf,"too many newlines (LFs) in field[1]"); MKERROR(msgbuf, &tlist); } t.t_type = Fold; } else if (ct & _l) { /* >= 1 LFs without CR is a fold too */ while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _l)) continue; strcpy(msgbuf,"too many newlines (LFs) in field[2]"); MKERROR(msgbuf, &tlist); t.t_type = Fold; } else if ((ct & _s) && (*cp=='(' || *cp=='"' || *cp=='[')) { TokenType type; char cend; int len; if (*cp == '"') { cend = '"'; type = String; } else if (*cp == '[') { cend = ']'; type = DomainLiteral; } else { cend = ')'; type = Comment; } ot = (tlistp == NULL ? NULL : *tlistp); len = _hdr_compound(cp, &n, *cp, cend, cpp, type, &t, &tlist, tlistp); if (ot != NULL && tlistp != NULL && ot != *tlistp) { /* a compound token crossed line boundary */ /* copy from ++cp for len chars */ t.t_pname = _unfold(type, len, ++cp, cpp, ot); t.t_len = strlen(t.t_pname); } else { if (t.t_pname != NULL) /* magic sign; NULL: no ending char */ --t.t_len, ++(*cpp); /* past first bracketing char */ --t.t_len; /* ++(*cpp); */ t.t_pname = ++cp; } /* compensate for calculations below */ (*cpp) -= t.t_len; t.t_len += n; } else if (ct & _s) { /* specials */ /* Double-colons as with DECNET */ if (n > 1 && *cp == ':' && cp[1] == ':') --n; /* Backslash + special: \@ \! \: ... */ if (n > 1 && *cp == '\\' && cp[1] != 0 && (rfc_ctype[cp[1] & 0xFF] & _s)) --n; --n; t.t_type = Special; } else if (!(ct & (_c|_8))) { /* atom */ while (--n > 0 && !(rfc_ctype[(*++cp) & 0xFF]&(_w|_s|_c|_l|_r))) continue; t.t_type = Atom; } else { int bit8 = 0; while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & (_c|_8))) if (rfc_ctype[(*cp) & 0xFF] & _8) { bit8 = 1; break; } if (bit8) strcpy(msgbuf, "illegal 8-bit/control character"); else strcpy(msgbuf, "illegal control character"); if (t.t_len > n+1) strcat(msgbuf, "s"); MKERROR(msgbuf, &tlist); t.t_type = Atom; } t.t_len -= n; /* return two values */ *cpp += t.t_len; if (allowcomments && t.t_len >= 1 && t.t_pname[0] == '#') { *cpp += n; break; } t.t_next = tlist; if (t.t_len > 0) tlist = copyToken(&t); else { t.t_pname = ""; t.t_len = 0; tlist = copyToken(&t); } } while (n > 0); /* Reverse the token822 chain */ tp = tn = NULL; for (tp = NULL; tlist != NULL; tlist = tn) { tn = tlist->t_next; tlist->t_next = tp; tp = tlist; } if (c1 != '\0') rfc_ctype[c1 & 0xFF] = sc1; if (c2 != '\0') rfc_ctype[c2 & 0xFF] = sc2; return tp; } /* * The UTEXT Scanner. * * cpp - pointer to pointer to string. * n - number of characters left in string. * * The scanner will return a token list corresponding to the n next characters * in the string. Originally only a single token was returned per call, but * for efficiency this was changed to avoid function call overhead. The tokens * returned are classified by type (TokenType enum class). */ token822 * scan822utext(cpp, nn, tlistp) const char **cpp; /* pointer to pointer to text */ size_t nn; /* number of characters to scan */ token822 **tlistp; /* continuation line tokens if any */ { register const char *cp; static token822 t; token822 *tlist, *tp, *tn; char msgbuf[50]; short ct; int n = (int) nn; if (n == 0) return NULL; tlist = NULL; do { cp = *cpp; ct = rfc_ctype[(*cp) & 0xFF]; t.t_len = n; t.t_pname = cp; if (ct & _w) { /* LWSP without the CR LF part */ while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _w)) continue; t.t_type = Space; } else if (ct & _r) { /* >= 1 CR followed by LFs is a fold */ while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _r)) continue; if (n == 0 || !(rfc_ctype[(*cp) & 0xFF] & _l)) { strcpy(msgbuf, "CR without LF (newline)"); MKERROR(msgbuf, &tlist); } else if (n > 1 && (rfc_ctype[(*cp) & 0xFF] & _l)) { while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _l)) continue; strcpy(msgbuf,"too many newlines (LFs) in field[1]"); MKERROR(msgbuf, &tlist); } t.t_type = Fold; } else if (ct & _l) { /* >= 1 LFs without CR is a fold too */ while (--n > 0 && (rfc_ctype[(*++cp) & 0xFF] & _l)) continue; strcpy(msgbuf,"too many newlines (LFs) in field[2]"); MKERROR(msgbuf, &tlist); t.t_type = Fold; } else { /* Anything else is unstructured foldable Atom */ while (--n > 0 && !(rfc_ctype[(*++cp) & 0xFF]&(_w|_l|_r))) continue; t.t_type = Atom; } t.t_len -= n; /* return two values */ *cpp += t.t_len; t.t_next = tlist; if (t.t_len > 0) tlist = copyToken(&t); else { t.t_pname = ""; t.t_len = 0; tlist = copyToken(&t); } } while (n > 0); /* Reverse the token822 chain */ tp = tn = NULL; for (tp = NULL; tlist != NULL; tlist = tn) { tn = tlist->t_next; tlist->t_next = tp; tp = tlist; } return tp; }