/*
** Copyright (c) 2005, 2007 Sendmail, Inc. and its suppliers.
** All rights reserved.
*/
#ifndef lint
static char rfc2822_c_id[] = "@(#)$Id: rfc2822.c,v 1.3 2007/11/12 22:08:18 msk Exp $";
#endif /* !lint */
/* system inludes */
#include <ctype.h>
#include <string.h>
#include <limits.h>
#include <stdio.h>
/* types */
typedef unsigned long cmap_elem_type;
/* symbolic names */
#define RFC2822_OK 0 /* success */
#define RFC2822_ERR_PUNBALANCED 1 /* unbalanced parentheses */
#define RFC2822_ERR_QUNBALANCED 2 /* unbalanced quotes */
#define RFC2822_ERR_SUNBALANCED 3 /* unbalanced sq. brackets */
/* a bitmap for the "specials" character class */
#define CMAP_NBITS (sizeof(cmap_elem_type) * CHAR_BIT)
#define CMAP_NELEMS ((1 + UCHAR_MAX) / CMAP_NBITS)
#define CMAP_INDEX(i) ((unsigned char)(i) / CMAP_NBITS)
#define CMAP_BIT(i) (1 << (unsigned char)(i) % CMAP_NBITS)
#define CMAP_TST(ar, c) ((ar)[CMAP_INDEX(c)] & CMAP_BIT(c))
#define CMAP_SET(ar, c) ((ar)[CMAP_INDEX(c)] |= CMAP_BIT(c))
static unsigned char const SPECIALS[] = "<>@,;:\\\"/[]?=";
/*
** UNESCAPE -- remove escape characters from a string
**
** Parameters:
** s -- the string to be unescaped
**
** Return value:
** s.
*/
static char *
unescape(char *s)
{
char *w;
char const *r, *p, *e;
if (s == NULL)
return NULL;
r = w = s;
e = s + strlen(s);
while ((p = memchr(r, '\\', e - s)) != NULL)
{
if (p > s)
{
if (r != w)
memmove(w, r, p - r);
w += p - r;
}
if (p[1] == '\0')
{
r = p + 1;
}
else
{
*w++ = p[1];
r = p + 2;
}
}
if (r > w)
{
if (e > r)
{
memmove(w, r, e - r);
w += e - r;
}
*w = '\0';
}
return s;
}
/*
** MATCHING_PAREN -- return the location past matching opposite parentheses
**
** Parameters:
** s -- start of string to be processed
** e -- end of string to be processed
** open_paren -- open parenthesis character
** close_paren -- close parenthesis character
**
** Return value:
** Location of the final close parenthesis character in the string.
** For example, given "xxx((yyyy)zz)aaaa", would return the location
** of the second ")". There may be more beyond that, but at that point
** everything is balanced.
*/
static char *
matching_paren(char *s, char *e, int open_paren, int close_paren)
{
int paren = 1;
for (; s < e; s++)
{
if (*s == close_paren)
{
if (--paren == 0)
break;
}
else if (*s == open_paren)
{
paren++;
}
else if (*s == '\\')
{
if (s[1] != '\0')
s++;
}
}
return s;
}
/*
** RFC2822_FIRST_SPECIAL -- find the first RFC2822 "special" character
**
** Parameters:
** p -- input string
** e -- end of input string
** special_out -- pointer to the first special character found
**
** Return value:
** 0 on success, or an RFC2822_ERR_* on failure.
*/
static int
rfc2822_first_special(char *p, char *e, char **special_out)
{
size_t i;
cmap_elem_type is_special[CMAP_NELEMS] = { 0 };
char *at_ptr = NULL;
/* set up special finder */
for (i = 0; SPECIALS[i] != '\0'; i++)
CMAP_SET(is_special, SPECIALS[i]);
for (; p < e && *p != '\0'; p++)
{
/* skip white space between tokens */
while (p < e && (*p == '(' ||
(isascii((unsigned char) *p) &&
isspace((unsigned char) *p))))
{
if (*p != '(')
{
p++;
}
else
{
p = matching_paren(p + 1, e, '(', ')');
if (*p == '\0')
return RFC2822_ERR_PUNBALANCED;
else
p++;
}
}
if (*p == '\0')
break;
if (*p == '"')
{
p = matching_paren(p + 1, e, '\0', '"');
if (*p == '\0')
return RFC2822_ERR_QUNBALANCED;
}
else if (*p == '[')
{
p = matching_paren(p + 1, e, '\0', ']');
if (*p == '\0')
return RFC2822_ERR_SUNBALANCED;
}
else if (CMAP_TST(is_special, *p))
{
if (*p == '<')
{
*special_out = p;
return 0;
}
else if (*p == ':' || *p == ';' || *p == ',')
{
if (at_ptr != NULL)
*special_out = at_ptr;
else
*special_out = p;
return 0;
}
else if (*p == '@')
{
at_ptr = p;
}
}
else
{
while (*p != '\0' &&
!CMAP_TST(is_special, *p) &&
(!isascii(*p) ||
!isspace((unsigned char) *p)) &&
*p != '(')
p++;
p--;
}
}
*special_out = p;
return 0;
}
/*
** RFC2822_TOKEN -- find the next token
**
** Parameters:
** s -- start of input string
** e -- end of input string
** type_out -- type of token (returned)
** start_out -- start of token (returned)
** end_out -- start of token (returned)
** uncommented_whitespace -- set to TRUE if uncommented whitespace is
** discovered (returned)
**
** Return value:
** 0 on success, or an RFC2822_ERR_* on failure.
*/
static int
rfc2822_token(char *s, char *e, int *type_out, char **start_out,
char **end_out, int *uncommented_whitespace)
{
char *p;
int err = 0;
size_t i;
int token_type;
cmap_elem_type is_special[CMAP_NELEMS] = { 0 };
char *token_start, *token_end;
*start_out = NULL;
*end_out = NULL;
*type_out = 0;
err = 0;
/* set up special finder */
for (i = 0; SPECIALS[i] != '\0'; i++)
CMAP_SET(is_special, SPECIALS[i]);
p = s;
/* skip white space between tokens */
while (p < e && (*p == '(' ||
(isascii((unsigned char) *p) &&
isspace((unsigned char) *p))))
{
if (*p != '(')
{
*uncommented_whitespace = 1;
p++;
}
else
{
p = matching_paren(p + 1, e, '(', ')');
if (*p == '\0')
return RFC2822_ERR_PUNBALANCED;
else
p++;
}
}
if (p >= e || *p == '\0')
return 0;
/* our new token starts here */
token_start = p;
/* fill in the token contents and type */
if (*p == '"')
{
token_end = matching_paren(p + 1, e, '\0', '"');
token_type = '"';
if (*token_end != '\0')
token_end++;
else
err = RFC2822_ERR_QUNBALANCED;
}
else if (*p == '[')
{
token_end = p = matching_paren(p + 1, e, '\0', ']');
token_type = '[';
if (*token_end != '\0')
token_end++;
else
err = RFC2822_ERR_SUNBALANCED;
}
else if (CMAP_TST(is_special, *p))
{
token_end = p + 1;
token_type = *p;
}
else
{
while (p < e && *p != '\0' && !CMAP_TST(is_special, *p) &&
(!isascii(*p) || !isspace((unsigned char) *p)) &&
*p != '(')
p++;
token_end = p;
token_type = 'x';
}
*start_out = token_start;
*end_out = token_end;
*type_out = token_type;
return err;
}
/*
** RFC2822_MAILBOX_SPLIT -- extract the local-part and hostname from an
** RFC2822 header, e.g. "From:"
**
** Parameters:
** line -- input line
** user_out -- pointer to "local-part" (returned)
** domain_out -- pointer to hostname (returned)
**
** Return value:
** 0 on success, or an RFC2822_ERR_* on failure.
**
** Notes:
** Input string is modified.
*/
int
rfc2822_mailbox_split(char *line, char **user_out, char **domain_out)
{
int type;
int ws;
int err;
char *e, *special;
char *tok_s, *tok_e;
char *w;
*user_out = NULL;
*domain_out = NULL;
err = 0;
w = line;
e = line + strlen(line);
ws = 0;
for (;;)
{
err = rfc2822_first_special(line, e, &special);
if (err != 0)
return err;
/* given the construct we're looking at, do the right thing */
switch (*special)
{
case '<':
/* display name <address> */
line = special + 1;
for (;;)
{
err = rfc2822_token(line, e, &type, &tok_s,
&tok_e, &ws);
if (err != 0)
return err;
if (type == '>' || type == '\0')
{
*w = '\0';
return 0;
}
else if (type == '@')
{
*w++ = '\0';
*domain_out = w;
}
else if (type == ',' || type == ':')
{
/* source route punctuation */
*user_out = NULL;
*domain_out = NULL;
}
else
{
if (*user_out == NULL)
*user_out = w;
memmove(w, tok_s, tok_e - tok_s);
w += tok_e - tok_s;
}
line = tok_e;
}
return 0;
case ';':
case ':':
case ',':
/* skip a group name or result */
line = special + 1;
break;
default:
/* (display name) addr(display name)ess */
ws = 0;
for (;;)
{
err = rfc2822_token(line, e, &type, &tok_s,
&tok_e, &ws);
if (err != 0)
return err;
if (type == '\0' || type == ',' || type == ';')
{
*w = '\0';
break;
}
else if (type == '@')
{
*w++ = '\0';
*domain_out = w;
ws = 0;
}
else
{
if (*user_out == NULL)
*user_out = w;
else if (type == 'x' && ws == 1)
*w++ = ' ';
memmove(w, tok_s, tok_e - tok_s);
w += tok_e - tok_s;
ws = 0;
}
line = tok_e;
}
return 0;
}
}
return err;
}
#ifdef RFC2822_TEST
int
main(int argc, char **argv)
{
int err;
char *domain, *user;
if (argc != 2)
{
fprintf(stderr, "Usage: %s mailheader\n", argv[0]);
exit(64);
}
err = rfc2822_mailbox_split(argv[1], &user, &domain);
if (err)
{
printf("error %d\n", err);
}
else
{
printf("user: '%s'\ndomain: '%s'\n",
user ? unescape(user) : "null",
domain ? unescape(domain) : "null");
}
return 0;
}
#endif /* RFC2822_TEST */
syntax highlighted by Code2HTML, v. 0.9.1