/*
* Program: Synonym
* File: body_parser.c
* Author: Cristian Draghici
* Date: 11 Nov 2003
*
* $Id: body_parser.c,v 1.3.2.2 2004/01/30 08:12:11 diciu Exp $
*
* Licensed under the Modulo Consulting Software License
* (see file license.txt)
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include <syslog.h>
#include <assert.h>
#include <errno.h>
#include <sys/types.h>
#include <signal.h>
#include "body_parser.h"
#include "c-client/misc.h"
#define print_debug syslog
/* recursion level while parsing MIME parts. After this level we stop parsing considering attack. */
#define MIME_RECURSION_DEPTH 10
#define MAX_HOSTNAME_LENGTH 255
unsigned char * read_part(FILE *stream, long offset, long size);
long fputs_wrapper(void *stream, char *string);
int body_parser_parse(BODY *body, char *pfx, long i, body_parser_state *current_state);
int process_current_part(BODY *body, body_parser_state *current_state);
char * read_and_decode_part(body_parser_state *current_state,
long part_offset, long part_size, short part_encoding, long *p_decoded_part_size);
int body_parser_fill_attachment_structure(BODY *body, char *pfx, long i, body_parser_state *current_state);
int body_parser_destroy(body_parser_state *current_state);
PARAMETER * copy_parameter(BODY *body);
void free_attachment_rule_list(attachment_handling_rule * att_list);
/* INTERNAL USE FUNCTION.
use gethostname to find out the current running host.
(used by C-Client when parsing email headers to figure out stuff) */
char *get_server_hostname(char *hostname)
{
int retval;
hostname[0] = '\0';
print_debug(LOG_DEBUG, "get_server_hostname() called.");
retval = gethostname(hostname, MAX_HOSTNAME_LENGTH);
/* EFAULT can't happen. hostname is allocated on the stack */
assert(retval != EFAULT);
if(retval == EINVAL)
print_debug(LOG_ERR, "Hostname is too big for our buffer. Since the buffer is #defined as MAX_HOSTNAME_LENGTH (255)in disclaimer.c, change and recompile.");
return hostname;
}
/* call c-client to parse the body
* first stage parse
* require 3rd parties to alter where needed
* */
int body_parser_init(char *header_content, FILE *body_stream,
long body_size, body_parser_state *current_state)
{
/* c-client structures */
ENVELOPE *env;
BODY *bdy;
STRING body;
disclaimer_state * dstate;
/* the current host name */
char hostname[MAX_HOSTNAME_LENGTH];
char * decoded_html_disclaimer = NULL;
long decoded_html_size;
if(body_stream == NULL)
return BODY_PARSER_FAILURE;
if(header_content == NULL)
return BODY_PARSER_FAILURE;
/* initialise C-Client string driver */
INIT(&body,file_string,(void *)body_stream, body_size);
print_debug(LOG_DEBUG, "body_parser_init: Parsing mail message");
dstate = current_state->d_state;
/* decode the HTML disclaimer from base64 */
decoded_html_disclaimer = rfc822_base64(dstate->disclaimer_html,
strlen(dstate->disclaimer_html), &decoded_html_size);
decoded_html_disclaimer[decoded_html_size+1] = '\0';
/* connot free the existing pointer, it's allocated on the stack in synonym.c bugger!*/
//fs_give((void **)&(dstate->disclaimer_html));
dstate->disclaimer_html = decoded_html_disclaimer;
current_state->in_stream = body_stream;
/* C-Client says we should use parse_msg and not directly parse_msg_full but that issues compiler warning */
//rfc822_parse_msg(&env, &bdy, header_content, strlen(header_content), &body, get_server_hostname(), scratch_buffer);
rfc822_parse_msg_full(&env, &bdy, header_content, strlen(header_content), &body, get_server_hostname(hostname), (long)MIME_RECURSION_DEPTH, (long)0);
current_state->envelope = env;
current_state->body = bdy;
current_state->body_flag = 0;
current_state->attachment_rule = NULL;
body_parser_fill_attachment_structure(current_state->body, NIL, (long)0, current_state);
return BODY_PARSER_SUCCESS;
}
/* second stage parse */
int body_parser_finalize(body_parser_state *current_state)
{
print_debug(LOG_DEBUG, "body_parser_finalize: Calling recursive parser");
body_parser_parse(current_state->body, NIL, (long)0, current_state);
print_debug(LOG_DEBUG, "body_parser_finalize: Writing new body");
rfc822_output_body (current_state->body,
fputs_wrapper, (void *)current_state->out_stream);
fclose(current_state->out_stream);
print_debug(LOG_DEBUG, "body_parser_finalize: Cleaning up");
body_parser_destroy(current_state);
return BODY_PARSER_SUCCESS;
}
int body_parser_parse(BODY *body, char *pfx, long i, body_parser_state *current_state)
{
char tmp[MAILTMPLEN];
char *s = tmp;
PARAMETER *par;
PART *part;
int retval = 0;
/*char *boundary;*/
if(body->type == TYPEMULTIPART)
{
/* if not first time, extend prefix */
if(pfx)
sprintf(tmp,"%s%ld.",pfx,++i);
else
tmp[0] = '\0';
/*if((par = body->parameter))
do
{
if(!strcasecmp(par->attribute, "BOUNDARY"))
boundary = cpystr(par->value);
syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value);
}
while ((par = par->next));*/
for(i = 0, part = body->nested.part; part; part = part->next)
body_parser_parse(&part->body,tmp,i++, current_state);
}
else
{
/* non-multipart, output oneline descriptor */
if(!pfx)
pfx = ""; /* dummy prefix if top level */
sprintf (s,">>> %s%ld %s",pfx,++i,body_types[body->type]);
if(body->subtype)
sprintf(s += strlen (s),"/%s",body->subtype);
if(body->description)
sprintf(s += strlen (s)," (%s)",body->description);
if((par = body->parameter))
do
sprintf(s += strlen (s),";%s=%s",par->attribute,par->value);
while ((par = par->next));
if(body->id)
sprintf (s += strlen (s),", id = %s",body->id);
/* determine the part we are looking for and set the offset and size accordingly */
print_debug(LOG_DEBUG, "body_parser_parse: %s", tmp);
retval = process_current_part(body, current_state);
if(retval == BODY_PARSER_FAILURE)
{
print_debug(LOG_ERR, "process_current_part returned error.");
}
/* encapsulated message? */
if((body->type == TYPEMESSAGE) && !strcmp (body->subtype,"RFC822") &&
(body = body->nested.msg->body))
{
if (body->type == TYPEMULTIPART)
{
print_debug(LOG_DEBUG, "Multipart within Message");
body_parser_parse(body,pfx,i-1, current_state);
}
else
{
/* build encapsulation prefix */
sprintf (tmp,"%s%ld.",pfx,i);
body_parser_parse(body,tmp,(long) 0, current_state);
}
}
}
return BODY_PARSER_SUCCESS;
}
/* process_current_part is fed each MIME part from the message.
* it applies processing and adds the result to the new body of the message */
int process_current_part(BODY *body, body_parser_state *current_state)
{
unsigned char * part_content;
long part_size = 0;
char * processed_part_content;
long processed_part_size;
//PARAMETER *par = NULL;
//attachment_handling_rule *handler = NULL;
void * old_data;
/* rename .xls to .xlsrename,
* drop .doc
* add text disclaimer to text parts
* add html disclaimer to html parts */
if(body->type == TYPETEXT &&
(!strcasecmp(body->subtype, "plain")) &&
(current_state->d_state->operation_mode == SUBTYPE_PLAIN
|| current_state->d_state->operation_mode == SUBTYPE_BOTH) &&
(current_state->d_state->disclaimer_text_processed == 0))
{
print_debug(LOG_DEBUG, "Got text/plain part.");
/* decode the current part and apply disclaimer */
part_content = read_and_decode_part(current_state,
(body->contents).offset, ((body->contents).text).size,
body->encoding, &part_size);
/* add text disclaimer */
processed_part_content = add_text_disclaimer(part_content, part_size,
&processed_part_size, current_state->d_state);
fs_give((void **) &part_content);
old_data = body->contents.text.data;
fs_give(&old_data);
/* replace body content */
print_debug(LOG_DEBUG, "old size: %ld, new size %ld", body->contents.text.size, processed_part_size);
/* TODO: WIP: now reencode the part to the encoding defined in the params section */
switch(body->encoding)
{
case ENCQUOTEDPRINTABLE:
print_debug(LOG_DEBUG, "Reconverting: to quoted printable.");
old_data = processed_part_content;
processed_part_content = rfc822_8bit(processed_part_content,
processed_part_size, &processed_part_size);
fs_give(&old_data);
break;
case ENCBASE64:
print_debug(LOG_DEBUG, "Reconverting: to base 64.");
old_data = processed_part_content;
processed_part_content = rfc822_binary(processed_part_content,
processed_part_size, &processed_part_size);
fs_give(&old_data);
break;
default:
print_debug(LOG_DEBUG, "Reconverting: no conversion required.");
break;
}
body->contents.text.data = processed_part_content;
body->contents.text.size = processed_part_size;
return BODY_PARSER_SUCCESS;
}
if(body->type == TYPETEXT &&
!strcasecmp(body->subtype, "html") &&
(current_state->d_state->operation_mode == SUBTYPE_HTML
|| current_state->d_state->operation_mode == SUBTYPE_BOTH) &&
(current_state->d_state->disclaimer_html_processed == 0))
{
print_debug(LOG_DEBUG, "Got text/html part.");
/* decode the current part and apply disclaimer */
/* decode the current part and apply disclaimer */
part_content = read_and_decode_part(current_state,
(body->contents).offset, ((body->contents).text).size,
body->encoding, &part_size);
/* add text disclaimer */
processed_part_content = add_html_disclaimer(part_content, part_size,
&processed_part_size, current_state->d_state);
fs_give((void **) &part_content);
old_data = body->contents.text.data;
fs_give(&old_data);
switch(body->encoding)
{
case ENCQUOTEDPRINTABLE:
print_debug(LOG_DEBUG, "Reconverting: to quoted printable.");
old_data = processed_part_content;
processed_part_content = rfc822_8bit(processed_part_content,
processed_part_size, &processed_part_size);
fs_give(&old_data);
break;
case ENCBASE64:
print_debug(LOG_DEBUG, "Reconverting: to base 64.");
old_data = processed_part_content;
processed_part_content = rfc822_binary(processed_part_content,
processed_part_size, &processed_part_size);
fs_give(&old_data);
break;
default:
print_debug(LOG_DEBUG, "Reconverting: no conversion required.");
break;
}
body->contents.text.data = processed_part_content;
body->contents.text.size = processed_part_size;
/* add the result to the resulting body */
return BODY_PARSER_SUCCESS;
}
else
{
print_debug(LOG_ERR, "Managing unknown content");
/* check attachment name */
/*if((par = body->parameter))
do
{
if((handler = current_state->attachment_rule))
do
{
if(!strcasecmp(par->attribute, "name") &&
!strcmp(par->value, handler->src))
{
if(handler->dst == NULL)
{
syslog(LOG_DEBUG,"Skipping part.");
return BODY_PARSER_SUCCESS;
}
else
{
if(par->value != NULL)
fs_give((void **)&par->value);
par->value=cpystr(handler->dst);
syslog(LOG_DEBUG,"Renaming part.");
}
}
}
while((handler = handler->next));
syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value);
}
while ((par = par->next));*/
/* perform copy */
part_content = read_part(current_state->in_stream,
(body->contents).offset, ((body->contents).text).size);
body->contents.text.data = part_content;
//body->contents.text.size = processed_part_size;
//part_size = ((body->contents).text).size;
/* decode the current part */
//part_content = read_and_decode_part(current_state,
// (body->contents).offset, ((body->contents).text).size,
// body->encoding, &part_size);
//if(part_content == NULL)
// return BODY_PARSER_FAILURE;
//part->body.parameter = copy_parameter(body);
//print_debug(LOG_DEBUG, "Current part is %ld bytes long.", part_size);
/* could scan the attachment for viruses */
//request_scan(part_content, part_size);
//part->body.contents.text.data = part_content;
//part->body.contents.text.size = part_size;
//part_content[part_size] = '\0';
//part->next = NULL;
/* if none of the rules were hit just copy the part to the resulting message */
return BODY_PARSER_SUCCESS;
}
return BODY_PARSER_SUCCESS;
}
PARAMETER * copy_parameter(BODY *body)
{
PARAMETER * cursor = NULL;
PARAMETER * temp_parameter = NULL;
PARAMETER * new_parameter = NULL;
PARAMETER * par;
/*copy the parameters from the old body to the new one */
if((par = body->parameter))
{
do
{
new_parameter = mail_newbody_parameter();
new_parameter->next = NULL;
new_parameter->attribute = cpystr(par->attribute);
new_parameter->value = cpystr(par->value);
if(cursor != NULL)
{
for(temp_parameter=cursor; temp_parameter->next!=NULL; temp_parameter=temp_parameter->next)
;
temp_parameter->next=new_parameter;
}
else
cursor = new_parameter;
}
while ((par = par->next));
return cursor;
}
return mail_newbody_parameter();
//return NULL;
}
/* read a MIME part
* TODO: someone needs to fs_give the result!*/
unsigned char * read_part(FILE *stream, long offset, long size)
{
unsigned char * buffer = NULL;
char * pos = NULL;
int retval;
long bytes_read = 0;
buffer = (unsigned char *)fs_get(size + 1); //last char for the string terminator
if(buffer == NULL)
{
print_debug(LOG_ERR, "read_part(): Fatal error(memory allocation): fs_get failed!");
return NULL;
}
pos = buffer;
retval = fseek(stream, offset, SEEK_SET);
if(retval == -1)
{
print_debug(LOG_ERR, "read_part(): fseek() failed!");
return NULL;
}
while((pos - (char *)buffer) < size)
{
bytes_read = fread(pos, 1, size, stream);
pos += bytes_read;
if(bytes_read == 0)
break;
}
if((pos - (char *)buffer) != size)
{
print_debug(LOG_ERR, "read_part(): fread failed!");
return NULL;
}
/* Since our fputs_wrapper uses fputs we need to mark the end of the string.
Since c-client does not send the length of the body we need to use strlen based stuff.
*/
buffer[size] = '\0';
/* now buffer contains the current part */
return buffer;
}
/* return the decoded content of a part */
char * read_and_decode_part(body_parser_state *current_state,
long part_offset, long part_size, short part_encoding, long *p_decoded_part_size)
{
char *encoded_part_content;
char *decoded_part_content;
long decoded_part_size = 0;
encoded_part_content = read_part(current_state->in_stream,
part_offset, part_size);
if(encoded_part_content == NULL)
return NULL;
/* now decode the content we've just read */
switch(part_encoding)
{
case ENCBASE64:
decoded_part_content = rfc822_base64(encoded_part_content,
part_size, &decoded_part_size);
fs_give((void **)&encoded_part_content);
break;
case ENCQUOTEDPRINTABLE:
decoded_part_content = rfc822_qprint(encoded_part_content,
part_size, &decoded_part_size);
fs_give((void **)&encoded_part_content);
break;
case ENC7BIT:
decoded_part_content = encoded_part_content;
decoded_part_size = part_size;
break;
case ENC8BIT:
decoded_part_content = encoded_part_content;
decoded_part_size = part_size;
break;
case ENCBINARY:
decoded_part_content = encoded_part_content;
decoded_part_size = part_size;
break;
default:
print_debug(LOG_ERR, "Called with bad encoding %d", part_encoding);
//free((void **)&decoded_part_content);
free((void **)&encoded_part_content);
return NULL;
}
*p_decoded_part_size = decoded_part_size;
return decoded_part_content;
}
/* free memory */
int body_parser_destroy(body_parser_state *current_state)
{
print_debug(LOG_DEBUG, "body_parser_destroy: Freeing envelope and old body");
mail_free_envelope(¤t_state->envelope);
mail_free_body(¤t_state->body);
current_state->envelope = NULL;
current_state->body = NULL;
/* TODO: free attachment handler list */
print_debug(LOG_DEBUG, "body_parser_destroy: Freeing attachment list");
free_attachment_rule_list(current_state->attachment_rule);
/* free the HTML disclaimer - c-client block replaced the stack char array in init */
fs_give((void **)&(current_state->d_state->disclaimer_html));
print_debug(LOG_DEBUG, "body_parser_destroy: body");
mail_free_body(¤t_state->body);
return BODY_PARSER_SUCCESS;
}
void free_attachment_rule_list(attachment_handling_rule * att_list)
{
attachment_handling_rule *current_handler, *previous_handler;
if(att_list == NULL)
return;
current_handler = att_list;
previous_handler = current_handler;
while(current_handler != NULL)
{
if(current_handler->src != NULL)
fs_give((void **)¤t_handler->src);
if(current_handler->dst != NULL)
fs_give((void **)¤t_handler->src);
previous_handler = current_handler;
current_handler = current_handler->next;
free(previous_handler);
}
att_list = NULL;
}
/* fill in the attachment structure */
int body_parser_fill_attachment_structure(BODY *body, char *pfx, long i, body_parser_state *current_state)
{
char tmp[MAILTMPLEN];
char *s = tmp;
PARAMETER *par;
PART *part;
//attachment_handling_rule *att_handler;
//attachment_handling_rule *cursor;
if(body->type == TYPEMULTIPART)
{
/* if not first time, extend prefix */
if(pfx)
sprintf(tmp,"%s%ld.",pfx,++i);
else
tmp[0] = '\0';
for(i = 0, part = body->nested.part; part; part = part->next)
body_parser_fill_attachment_structure(&part->body,tmp,i++, current_state);
}
else
{
/* non-multipart, output oneline descriptor */
if(!pfx)
pfx = ""; /* dummy prefix if top level */
sprintf (s,">>> %s%ld %s",pfx,++i,body_types[body->type]);
if(body->subtype)
sprintf(s += strlen (s),"/%s",body->subtype);
if(body->description)
sprintf(s += strlen (s)," (%s)",body->description);
if((par = body->parameter))
do
sprintf(s += strlen (s),";%s=%s",par->attribute,par->value);
while ((par = par->next));
if(body->id)
sprintf (s += strlen (s),", id = %s",body->id);
/**** NOT IMPLEMENTED IN CONFIG *********/
//print_debug(LOG_DEBUG, "body_parser_fill_attachment_structure: %s", tmp);
/* look for the name parameter */
//if((par = body->parameter))
// do
// {
// if(!strcasecmp(par->attribute, "name"))
// {
// /* fill in the attachment handling structure with all the attachment names */
// att_handler = malloc(sizeof(attachment_handling_rule));
// if(att_handler == NULL)
// {
// print_debug(LOG_ERR, "Malloc failed on attachment handling structure.");
// return BODY_PARSER_FAILURE;
// }
// att_handler->src = cpystr(par->value);
// att_handler->dst = NULL;
// att_handler->next = NULL;
//
// /* add the new attachment name */
// if(current_state->attachment_rule != NULL)
// {
// for(cursor=current_state->attachment_rule; cursor->next!=NULL;cursor=cursor->next)
// ;
// cursor->next = att_handler;
// }
// else
// current_state->attachment_rule = att_handler;
// }
// syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value);
// }
// while ((par = par->next));
/* encapsulated message? */
if((body->type == TYPEMESSAGE) && !strcmp (body->subtype,"RFC822") &&
(body = body->nested.msg->body))
{
if (body->type == TYPEMULTIPART)
body_parser_fill_attachment_structure(body,pfx,i-1, current_state);
else
{
/* build encapsulation prefix */
sprintf (tmp,"%s%ld.",pfx,i);
body_parser_fill_attachment_structure(body,tmp,(long) 0, current_state);
}
}
}
return BODY_PARSER_SUCCESS;
}
/* dummy attachment processor */
void process_attachment_names(attachment_handling_rule * att_rule)
{
attachment_handling_rule *handler;
char temp[500];
syslog(LOG_ERR, "Attachment names");
if((handler = att_rule))
do
{
if(handler->src != NULL && strlen(handler->src) < 495)
sprintf(temp, "%s.%s", handler->src, "dat");
else
strcpy(temp, "Unknown.dat");
handler->dst = cpystr(temp);
syslog(LOG_DEBUG, "Att name: %s=%s",handler->src, handler->dst);
}
while ((handler = handler->next));
}
/* c-client callbacks
We are only using a couple of these (mm_log while parsing) but c-client.a needs to link against these symbols
*/
void mm_searched(MAILSTREAM *stream,unsigned long number)
{
}
void mm_exists(MAILSTREAM *stream,unsigned long number)
{
}
void mm_expunged(MAILSTREAM *stream,unsigned long number)
{
}
void mm_flags(MAILSTREAM *stream,unsigned long number)
{
}
void mm_notify(MAILSTREAM *stream,char *string,long errflg)
{
mm_log (string,errflg);
}
void mm_list(MAILSTREAM *stream,int delimiter,char *mailbox,long attributes)
{
}
void mm_lsub(MAILSTREAM *stream,int delimiter,char *mailbox,long attributes)
{
}
void mm_status(MAILSTREAM *stream,char *mailbox,MAILSTATUS *status)
{
}
void mm_log(char *string,long errflg)
{
switch((short) errflg)
{
case NIL:
print_debug(LOG_DEBUG, "body_parser: mm_log: [%s]",string);
break;
case PARSE:
case WARN:
print_debug(LOG_DEBUG, "body_parser: mm_log: %%%s",string);
break;
case ERROR:
print_debug(LOG_ERR, "body_parser: mm_log: ?%s",string);
break;
}
}
void mm_dlog(char *string)
{
print_debug(LOG_ERR, "body_parser: mm_dlog: %s", string);
}
void mm_login(NETMBX *mb,char *user,char *pwd,long trial)
{
}
void mm_critical(MAILSTREAM *stream)
{
}
void mm_nocritical(MAILSTREAM *stream)
{
}
long mm_diskerror(MAILSTREAM *stream,long errcode,long serious)
{
kill(getpid (), 9);
return NIL;
}
void mm_fatal(char *string)
{
print_debug(LOG_ERR, "body_parser: mm_fatal: ?%s", string);
}
long fputs_wrapper(void *stream, char *string)
{
if(fputs((char *)string, (FILE *)stream)==EOF)
return NIL;
else
return T;
}
syntax highlighted by Code2HTML, v. 0.9.1