/*
 * Copyright (c) 2005 Sendmail, Inc. and its suppliers.
 *	All rights reserved.
 *
 * By using this file, you agree to the terms and conditions set
 * forth in the LICENSE file which can be found at the top level of
 * the sendmail distribution.
 */

#include "sm/generic.h"
SM_RCSID("@(#)$Id: sm_extracthdr.c,v 1.7 2006/05/02 17:13:40 ca Exp $")

#include "sm/assert.h"
#include "sm/error.h"
#include "sm/str.h"
#include "sm/sm_extracthdr.h"

/* possible states (use enum?) */
#define SM_XHDR_ST_INIT		0x0000 /* initialized */
#define SM_XHDR_ST_R_NAME	0x0001 /* reading a header name */
#define SM_XHDR_ST_G_NAME	0x0002 /* got header name */
#define SM_XHDR_ST_B_VAL	0x0004 /* begin of a header value */
#define SM_XHDR_ST_R_VAL	0x0008 /* reading a header value */
#define SM_XHDR_ST_G_VAL	0x0010 /* got header value */

#define SM_XHDR_ST_EOHDR	0x1000 /* reached end of header */

#define SM_XHDR_R_CR	1 /* got CR */
#define SM_XHDR_R_CRLF	2 /* got CRLF */

#define SM_RFC2822_IS_FTEXT(ch)	\
	(((ch) >= 33 && (ch) <= 57) || ((ch) >= 59 && (ch) <= 126))

/*
**  SM_XHDR_END -- free XHDR context
**
**	Parameters:
**		sm_xhdr_ctx -- XHDR context
**
**	Returns:
**		SM_SUCCESS
**
**	Last code review:
**	Last code change:
*/

sm_ret_T
sm_xhdr_end(sm_xhdr_ctx_P sm_xhdr_ctx)
{
	if (sm_xhdr_ctx == NULL)
		return SM_SUCCESS;
	SM_STR_FREE(sm_xhdr_ctx->sm_xhdr_name);
	SM_STR_FREE(sm_xhdr_ctx->sm_xhdr_value);
	sm_free_size(sm_xhdr_ctx, sizeof(*sm_xhdr_ctx));
	return SM_SUCCESS;
}

/*
**  SM_XHDR_INIT -- initialize XHDR context
**
**	Parameters:
**		flags -- flags for parsing
**		psm_xhdr_ctx -- (pointer to) XHDR context (output)
**
**	Returns:
**		usual sm_error code
**
**	Last code review:
**	Last code change:
*/

sm_ret_T
sm_xhdr_init(uint flags, sm_xhdr_ctx_P *psm_xhdr_ctx)
{
	sm_xhdr_ctx_P sm_xhdr_ctx;

	SM_ASSERT(psm_xhdr_ctx != NULL);
	*psm_xhdr_ctx = NULL;
	sm_xhdr_ctx = (sm_xhdr_ctx_P) sm_zalloc(sizeof(*sm_xhdr_ctx));
	if (sm_xhdr_ctx == NULL)
		return sm_err_temp(ENOMEM);
	sm_xhdr_ctx->sm_xhdr_name = sm_str_new(NULL, 32, 256);
	if (sm_xhdr_ctx == NULL)
		goto error;
	sm_xhdr_ctx->sm_xhdr_value = sm_str_new(NULL, 256, 4096);
	if (sm_xhdr_ctx->sm_xhdr_value == NULL)
		goto error;
	sm_xhdr_ctx->sm_xhdr_flags = flags;
	*psm_xhdr_ctx = sm_xhdr_ctx;
	return SM_SUCCESS;

  error:
	sm_xhdr_end(sm_xhdr_ctx);
	return sm_err_temp(ENOMEM);
}

/*
**  SM_XHDR -- Extract headers out of a stream
**
**	Parameters:
**		chunk -- chunk of mail
**		len -- len of chunk
**		newchunk -- is this a different chunk than last time?
**		sm_xhdr_ctx -- context
**
**	Returns:
**		>=0: see sm/sm_extracthdr.h: SM_XHDR_*
**		<0: usual sm_error code
*/

sm_ret_T
sm_xhdr(const uchar *chunk, size_t len, bool newchunk, sm_xhdr_ctx_P sm_xhdr_ctx)
{
	sm_ret_T ret;
	uchar ch;

	SM_REQUIRE(chunk != NULL);
	SM_REQUIRE(sm_xhdr_ctx != NULL);

	ret = SM_SUCCESS;
	if (newchunk)
		sm_xhdr_ctx->sm_xhdr_chunk_off = 0;
	do
	{
		if (sm_xhdr_ctx->sm_xhdr_chunk_off >= len)
			return SM_XHDR_CONT;
		ch = chunk[sm_xhdr_ctx->sm_xhdr_chunk_off++];
		switch (sm_xhdr_ctx->sm_xhdr_state)
		{
		  case SM_XHDR_ST_G_VAL:
			sm_str_clr(sm_xhdr_ctx->sm_xhdr_value);
			sm_str_clr(sm_xhdr_ctx->sm_xhdr_name);
			/* FALLTHROUGH */
		  case SM_XHDR_ST_INIT:
			sm_xhdr_ctx->sm_xhdr_state = SM_XHDR_ST_R_NAME;
			/* FALLTHROUGH */
		  case SM_XHDR_ST_R_NAME:
			if (SM_RFC2822_IS_FTEXT(ch))
			{
				ret = sm_str_put(sm_xhdr_ctx->sm_xhdr_name,
						(uchar) ch);
				break;
			}
			else if (ch != ':')
			{
				ret = sm_err_perm(SM_E_SYNTAX);
				break;
			}
			/* FALLTHROUGH */
		  case SM_XHDR_ST_G_NAME:
			sm_xhdr_ctx->sm_xhdr_state =
				SM_IS_FLAG(sm_xhdr_ctx->sm_xhdr_flags,
					SM_XHDR_FL_SKIP_FIRST_BLANK|
					SM_XHDR_FL_SKIP_LEADING_BLANKS)
				? SM_XHDR_ST_B_VAL : SM_XHDR_ST_R_VAL;
			break;
		  case SM_XHDR_ST_B_VAL:
			if (SM_IS_FLAG(sm_xhdr_ctx->sm_xhdr_flags,
					SM_XHDR_FL_SKIP_FIRST_BLANK))
				sm_xhdr_ctx->sm_xhdr_state = SM_XHDR_ST_R_VAL;

			/* skip first space; make this an option?? */
			if (ch == ' ')
				break;
			/* FALLTHROUGH */
		  case SM_XHDR_ST_R_VAL:
			if (ch == '\r')
			{
				if (sm_xhdr_ctx->sm_xhdr_crlf == SM_XHDR_R_CRLF)
				{
					/*
					**  this isn't entirely correct:
					**  need CRLF, not just CR
					*/

					sm_xhdr_ctx->sm_xhdr_state =
						SM_XHDR_ST_EOHDR;
					ret = SM_XHDR_GOTL;
				}
				else if (sm_xhdr_ctx->sm_xhdr_crlf == 0)
					sm_xhdr_ctx->sm_xhdr_crlf =
						SM_XHDR_R_CR;
				else
					ret = sm_err_perm(SM_E_SYNTAX);
				break;
			}
			if (ch == '\n')
			{
				if (sm_xhdr_ctx->sm_xhdr_crlf == SM_XHDR_R_CR)
					sm_xhdr_ctx->sm_xhdr_crlf =
						SM_XHDR_R_CRLF;
				else
					ret = sm_err_perm(SM_E_SYNTAX);
				break;
			}
			if (sm_xhdr_ctx->sm_xhdr_crlf == SM_XHDR_R_CRLF)
			{
				sm_xhdr_ctx->sm_xhdr_crlf = 0;

				/* continuation line? */
				if (!((ch) == ' ' || ch == '\t'))
				{
					bool more;

					sm_xhdr_ctx->sm_xhdr_state =
						SM_XHDR_ST_G_VAL;
					more = sm_xhdr_ctx->sm_xhdr_chunk_off <
						len;
					SM_ASSERT(sm_xhdr_ctx->sm_xhdr_chunk_off > 0);
					--sm_xhdr_ctx->sm_xhdr_chunk_off;
					return more ? SM_XHDR_GOTA
						: SM_XHDR_GOT1;
				}
				ret = sm_str_put(sm_xhdr_ctx->sm_xhdr_value,
					(uchar) '\r');
				if (sm_is_err(ret))
					break;
				ret = sm_str_put(sm_xhdr_ctx->sm_xhdr_value,
					(uchar) '\n');
				if (sm_is_err(ret))
					break;
			}
			ret = sm_str_put(sm_xhdr_ctx->sm_xhdr_value,
					(uchar) ch);
			break;
		  default:
			ret = sm_err_perm(SM_E_UNEXPECTED);
			break;
		}
	} while (ret == SM_SUCCESS);
	return ret;
}

/*
RFC 2822:
FWS             =       ([*WSP CRLF] 1*WSP) /   ; Folding white space
                        obs-FWS
ctext           =       NO-WS-CTL /     ; Non white space controls
                        %d33-39 /       ; The rest of the US-ASCII
                        %d42-91 /       ;  characters not including "(",
                        %d93-126        ;  ")", or "\"
ccontent        =       ctext / quoted-pair / comment
comment         =       "(" *([FWS] ccontent) [FWS] ")"
CFWS            =       *([FWS] comment) (([FWS] comment) / FWS)

atext           =       ALPHA / DIGIT / ; Any character except controls,
                        "!" / "#" /     ;  SP, and specials.
                        "$" / "%" /     ;  Used for atoms
                        "&" / "'" /
                        "*" / "+" /
                        "-" / "/" /
                        "=" / "?" /
                        "^" / "_" /
                        "`" / "{" /
                        "|" / "}" /
                        "~"
atom            =       [CFWS] 1*atext [CFWS]
dot-atom        =       [CFWS] dot-atom-text [CFWS]
dot-atom-text   =       1*atext *("." 1*atext)

qtext           =       NO-WS-CTL /     ; Non white space controls
                        %d33 /          ; The rest of the US-ASCII
                        %d35-91 /       ;  characters not including "\"
                        %d93-126        ;  or the quote character
qcontent        =       qtext / quoted-pair
quoted-string   =       [CFWS]
                        DQUOTE *([FWS] qcontent) [FWS] DQUOTE
                        [CFWS]

word            =       atom / quoted-string
phrase          =       1*word / obs-phrase
utext           =       NO-WS-CTL /     ; Non white space controls
                        %d33-126 /      ; The rest of US-ASCII
                        obs-utext
unstructured    =       *([FWS] utext) [FWS]
optional-field  =       field-name ":" unstructured CRLF
field-name      =       1*ftext
ftext           =       %d33-57 /               ; Any character except
                        %d59-126                ;  controls, SP, and
                                                ;  ":".
NO-WS-CTL       =       %d1-8 /         ; US-ASCII control characters
                        %d11 /          ;  that do not include the
                        %d12 /          ;  carriage return, line feed,
                        %d14-31 /       ;  and white space characters
                        %d127

text            =       %d1-9 /         ; Characters excluding CR and LF
                        %d11 /
                        %d12 /
                        %d14-127 /
                        obs-text

specials        =       "(" / ")" /     ; Special characters used in
                        "<" / ">" /     ;  other parts of the syntax
                        "[" / "]" /
                        ":" / ";" /
                        "@" / "\" /
                        "," / "." /
                        DQUOTE

obs-qp          =       "\" (%d0-127)
obs-text        =       *LF *CR *(obs-char *LF *CR)
obs-char        =       %d0-9 / %d11 /          ; %d0-127 except CR and
                        %d12 / %d14-127         ;  LF
obs-utext       =       obs-text
obs-phrase      =       word *(word / "." / CFWS)
obs-phrase-list =       phrase / 1*([phrase] [CFWS] "," [CFWS]) [phrase]

*/


syntax highlighted by Code2HTML, v. 0.9.1