/*
 * Program: Synonym
 * File: body_parser.c
 * Author: Cristian Draghici
 * Date: 11 Nov 2003
 *
 * $Id: body_parser.c,v 1.3.2.2 2004/01/30 08:12:11 diciu Exp $
 *
 * Licensed under the Modulo Consulting Software License
 * (see file license.txt)
 * 
 */
 
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#include <unistd.h>
#include <sys/stat.h>
#include <syslog.h>

#include <assert.h>
#include <errno.h>

#include <sys/types.h>
#include <signal.h>

#include "body_parser.h"
#include "c-client/misc.h" 

#define print_debug syslog
/* recursion level while parsing MIME parts. After this level we stop parsing considering attack. */
#define MIME_RECURSION_DEPTH	10
#define MAX_HOSTNAME_LENGTH		255

unsigned char * read_part(FILE *stream, long offset, long size);
long fputs_wrapper(void *stream, char *string);
int body_parser_parse(BODY *body, char *pfx, long i, body_parser_state *current_state);
int process_current_part(BODY *body, body_parser_state *current_state);
char * read_and_decode_part(body_parser_state *current_state,
	long part_offset, long part_size, short part_encoding, long *p_decoded_part_size);
int body_parser_fill_attachment_structure(BODY *body, char *pfx, long i, body_parser_state *current_state);
int body_parser_destroy(body_parser_state *current_state);
PARAMETER * copy_parameter(BODY *body);
void free_attachment_rule_list(attachment_handling_rule * att_list);

/* INTERNAL USE FUNCTION.
use gethostname to find out the current running host.
(used by C-Client when parsing email headers to figure out stuff) */
char *get_server_hostname(char *hostname)
{
	int retval;
	hostname[0] = '\0';
	
	print_debug(LOG_DEBUG, "get_server_hostname() called.");
	
	retval = gethostname(hostname, MAX_HOSTNAME_LENGTH);

	/* EFAULT can't happen. hostname is allocated on the stack */
	assert(retval != EFAULT);

	if(retval == EINVAL)
		print_debug(LOG_ERR, "Hostname is too big for our buffer. Since the buffer is #defined as MAX_HOSTNAME_LENGTH (255)in disclaimer.c, change and recompile.");

	return hostname;
}

/* call c-client to parse the body
 * first stage parse
 * require 3rd parties to alter where needed
 * */
int body_parser_init(char *header_content, FILE *body_stream, 
	long body_size, body_parser_state *current_state)
{
	/* c-client structures */
	ENVELOPE *env;
	BODY *bdy;
	STRING body;
	disclaimer_state * dstate;

	/* the current host name */
	char hostname[MAX_HOSTNAME_LENGTH];
	char * decoded_html_disclaimer = NULL;
	long decoded_html_size;


	if(body_stream == NULL)
		return BODY_PARSER_FAILURE;
	if(header_content == NULL)
		return BODY_PARSER_FAILURE;

	/* initialise C-Client string driver */
	INIT(&body,file_string,(void *)body_stream, body_size);
	print_debug(LOG_DEBUG, "body_parser_init: Parsing mail message");

	dstate = current_state->d_state;
	/* decode the HTML disclaimer from base64 */
	decoded_html_disclaimer = rfc822_base64(dstate->disclaimer_html, 
		strlen(dstate->disclaimer_html), &decoded_html_size);
	decoded_html_disclaimer[decoded_html_size+1] = '\0';
	
	/* connot free the existing pointer, it's allocated on the stack in synonym.c bugger!*/
	//fs_give((void **)&(dstate->disclaimer_html));
	dstate->disclaimer_html = decoded_html_disclaimer;
	
	current_state->in_stream = body_stream;
	
	/* C-Client says we should use parse_msg and not directly parse_msg_full but that issues compiler warning */
	//rfc822_parse_msg(&env, &bdy, header_content, strlen(header_content), &body, get_server_hostname(), scratch_buffer);
	rfc822_parse_msg_full(&env, &bdy, header_content, strlen(header_content), &body, get_server_hostname(hostname), (long)MIME_RECURSION_DEPTH, (long)0);
	
	current_state->envelope = env;
	current_state->body = bdy;
	current_state->body_flag = 0;
		
	current_state->attachment_rule = NULL;

	body_parser_fill_attachment_structure(current_state->body, NIL, (long)0, current_state);

	return BODY_PARSER_SUCCESS;
}

/* second stage parse */
int body_parser_finalize(body_parser_state *current_state)
{
	print_debug(LOG_DEBUG, "body_parser_finalize: Calling recursive parser");
	body_parser_parse(current_state->body, NIL, (long)0, current_state);
	
	
	print_debug(LOG_DEBUG, "body_parser_finalize: Writing new body");
	
	rfc822_output_body (current_state->body, 
		fputs_wrapper, (void *)current_state->out_stream);
	
	fclose(current_state->out_stream);

	print_debug(LOG_DEBUG, "body_parser_finalize: Cleaning up");
	body_parser_destroy(current_state);
	
	return BODY_PARSER_SUCCESS;
}

int body_parser_parse(BODY *body, char *pfx, long i, body_parser_state *current_state)
{
	char tmp[MAILTMPLEN];
	char *s = tmp;
	PARAMETER *par;
	PART *part;
	int retval = 0;
	/*char *boundary;*/
	
	if(body->type == TYPEMULTIPART)
	{
		/* if not first time, extend prefix */
		if(pfx) 
			sprintf(tmp,"%s%ld.",pfx,++i);
		else
			tmp[0] = '\0';
		/*if((par = body->parameter)) 
		do
		{
			if(!strcasecmp(par->attribute, "BOUNDARY"))
				boundary = cpystr(par->value);
			syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value);
		}
		while ((par = par->next));*/
		
		for(i = 0, part = body->nested.part; part; part = part->next)
			body_parser_parse(&part->body,tmp,i++, current_state);
	}
	else 
	{
		/* non-multipart, output oneline descriptor */
		if(!pfx)
			pfx = "";         /* dummy prefix if top level */
		sprintf (s,">>> %s%ld %s",pfx,++i,body_types[body->type]);
		if(body->subtype) 
			sprintf(s += strlen (s),"/%s",body->subtype);
		if(body->description) 
			sprintf(s += strlen (s)," (%s)",body->description);
		if((par = body->parameter)) 
			do
				sprintf(s += strlen (s),";%s=%s",par->attribute,par->value);
			while ((par = par->next));    
		if(body->id)
			sprintf (s += strlen (s),", id = %s",body->id);

		/* determine the part we are looking for and set the offset and size accordingly */
		print_debug(LOG_DEBUG, "body_parser_parse: %s", tmp);
		
		retval = process_current_part(body, current_state);
		if(retval == BODY_PARSER_FAILURE)
		{
			print_debug(LOG_ERR, "process_current_part returned error.");
		}
		
		/* encapsulated message? */
		if((body->type == TYPEMESSAGE) && !strcmp (body->subtype,"RFC822") &&
			(body = body->nested.msg->body)) 
		{
			if (body->type == TYPEMULTIPART)
			{
				print_debug(LOG_DEBUG, "Multipart within Message");
				body_parser_parse(body,pfx,i-1, current_state);
			}
			else 
			{
				/* build encapsulation prefix */
				sprintf (tmp,"%s%ld.",pfx,i);
				body_parser_parse(body,tmp,(long) 0, current_state);
			}
		}
	}
	return BODY_PARSER_SUCCESS;
}


/* process_current_part is fed each MIME part from the message.
 * it applies processing and adds the result to the new body of the message */
int process_current_part(BODY *body, body_parser_state *current_state)
{
	unsigned char * part_content;
	long part_size = 0;
	char * processed_part_content;
	long processed_part_size;
	//PARAMETER *par = NULL;
	//attachment_handling_rule *handler = NULL;
	void * old_data;
	
	/* rename .xls to .xlsrename,
	 * drop .doc
	 * add text disclaimer to text parts
	 * add html disclaimer to html parts */

	if(body->type == TYPETEXT && 
		(!strcasecmp(body->subtype, "plain")) && 
		(current_state->d_state->operation_mode == SUBTYPE_PLAIN 
			|| current_state->d_state->operation_mode == SUBTYPE_BOTH) && 
			(current_state->d_state->disclaimer_text_processed == 0))
	{
		print_debug(LOG_DEBUG, "Got text/plain part.");
		
		/* decode the current part and apply disclaimer */
		part_content = read_and_decode_part(current_state, 
			(body->contents).offset, ((body->contents).text).size, 
			body->encoding, &part_size);
		
		/* add text disclaimer */
		processed_part_content = add_text_disclaimer(part_content, part_size, 
			&processed_part_size, current_state->d_state);
		
		fs_give((void **) &part_content);
		
			
		old_data = body->contents.text.data;
		fs_give(&old_data);
		/* replace body content */
		print_debug(LOG_DEBUG, "old size: %ld, new size %ld", body->contents.text.size, processed_part_size);
		
		/* TODO: WIP: now reencode the part to the encoding defined in the params section */
		switch(body->encoding)
		{
			case ENCQUOTEDPRINTABLE:
				print_debug(LOG_DEBUG, "Reconverting: to quoted printable.");
				old_data = processed_part_content;
				processed_part_content = rfc822_8bit(processed_part_content,
						processed_part_size, &processed_part_size);
				fs_give(&old_data);
				break;
			case ENCBASE64:
				print_debug(LOG_DEBUG, "Reconverting: to base 64.");
				old_data = processed_part_content;
				processed_part_content = rfc822_binary(processed_part_content,
						processed_part_size, &processed_part_size);
				fs_give(&old_data);
				break;
			default:
				print_debug(LOG_DEBUG, "Reconverting: no conversion required.");
				break;
		}
		body->contents.text.data = processed_part_content;
		body->contents.text.size = processed_part_size;
		
		return BODY_PARSER_SUCCESS;
	}
	if(body->type == TYPETEXT && 
		!strcasecmp(body->subtype, "html") &&
		(current_state->d_state->operation_mode == SUBTYPE_HTML 
			|| current_state->d_state->operation_mode == SUBTYPE_BOTH) && 
		(current_state->d_state->disclaimer_html_processed == 0))

	{
		print_debug(LOG_DEBUG, "Got text/html part.");
		/* decode the current part and apply disclaimer */

		/* decode the current part and apply disclaimer */
		part_content = read_and_decode_part(current_state, 
			(body->contents).offset, ((body->contents).text).size, 
			body->encoding, &part_size);
		
		/* add text disclaimer */
		processed_part_content = add_html_disclaimer(part_content, part_size, 
			&processed_part_size, current_state->d_state);
		
		fs_give((void **) &part_content);
		
		old_data = body->contents.text.data;
		fs_give(&old_data);

		switch(body->encoding)
		{
			case ENCQUOTEDPRINTABLE:
				print_debug(LOG_DEBUG, "Reconverting: to quoted printable.");
				old_data = processed_part_content;
				processed_part_content = rfc822_8bit(processed_part_content,
						processed_part_size, &processed_part_size);
				fs_give(&old_data);
				break;
			case ENCBASE64:
				print_debug(LOG_DEBUG, "Reconverting: to base 64.");
				old_data = processed_part_content;
				processed_part_content = rfc822_binary(processed_part_content,
						processed_part_size, &processed_part_size);
				fs_give(&old_data);
				break;
			default:
				print_debug(LOG_DEBUG, "Reconverting: no conversion required.");
				break;
		}
		
		body->contents.text.data = processed_part_content;
		body->contents.text.size = processed_part_size;
		
		/* add the result to the resulting body */
		return BODY_PARSER_SUCCESS;
	}
	else
	{
		print_debug(LOG_ERR, "Managing unknown content");
		
		/* check attachment name */
		/*if((par = body->parameter)) 
		do
		{
			if((handler = current_state->attachment_rule))
				do
				{
					if(!strcasecmp(par->attribute, "name") && 
						!strcmp(par->value, handler->src))
					{
						if(handler->dst == NULL)
						{
							syslog(LOG_DEBUG,"Skipping part.");
							return BODY_PARSER_SUCCESS;
						}
						else
						{
							if(par->value != NULL)
								fs_give((void **)&par->value);
							par->value=cpystr(handler->dst);
							syslog(LOG_DEBUG,"Renaming part.");
						}							
					}
				}
				while((handler = handler->next));
			syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value);
		}
		while ((par = par->next));*/
		
		/* perform copy */
		part_content = read_part(current_state->in_stream, 
			(body->contents).offset, ((body->contents).text).size);
		
		body->contents.text.data = part_content;
		
		//body->contents.text.size = processed_part_size;
		
		//part_size = ((body->contents).text).size;
		/* decode the current part */
		//part_content = read_and_decode_part(current_state, 
		//	(body->contents).offset, ((body->contents).text).size, 
		//	body->encoding, &part_size);
		
		//if(part_content == NULL)
		//	return BODY_PARSER_FAILURE;
			
			
		//part->body.parameter = copy_parameter(body);
		
		
		//print_debug(LOG_DEBUG, "Current part is %ld bytes long.", part_size);
		
		/* could scan the attachment for viruses */
		//request_scan(part_content, part_size);
		
		//part->body.contents.text.data = part_content;
		//part->body.contents.text.size = part_size;
		//part_content[part_size] = '\0';
		
		//part->next = NULL;	
		
		/* if none of the rules were hit just copy the part to the resulting message */
		return BODY_PARSER_SUCCESS;
	}
	
	return BODY_PARSER_SUCCESS;
}

PARAMETER * copy_parameter(BODY *body)
{
	PARAMETER * cursor = NULL;
	PARAMETER * temp_parameter = NULL;
	PARAMETER * new_parameter = NULL;
	PARAMETER * par;
	
	/*copy the parameters from the old body to the new one */
	if((par = body->parameter)) 
	{
		do
		{
			new_parameter = mail_newbody_parameter();
			new_parameter->next = NULL;
			new_parameter->attribute = cpystr(par->attribute);
			new_parameter->value = cpystr(par->value);
		
			if(cursor != NULL)
			{
				for(temp_parameter=cursor; temp_parameter->next!=NULL; temp_parameter=temp_parameter->next)
				;
				temp_parameter->next=new_parameter;
			}
			else
				cursor = new_parameter;
		
		}
		while ((par = par->next));
		return cursor;
	}
	return mail_newbody_parameter();
	//return NULL;	
}

/* read a MIME part 
 * TODO: someone needs to fs_give the result!*/
unsigned char * read_part(FILE *stream, long offset, long size)
{
	unsigned char * buffer = NULL;
	char * pos = NULL;
	int retval;
	long bytes_read = 0;
	buffer = (unsigned char *)fs_get(size + 1); //last char for the string terminator
	
	if(buffer == NULL)
	{
		print_debug(LOG_ERR, "read_part(): Fatal error(memory allocation): fs_get failed!");
		return NULL;
	}
	pos = buffer;
	retval = fseek(stream, offset, SEEK_SET);
	if(retval == -1)
	{
		print_debug(LOG_ERR, "read_part(): fseek() failed!");
		return NULL;
	}
	while((pos - (char *)buffer) < size)
	{
		bytes_read = fread(pos, 1, size, stream);
		pos += bytes_read;
		
		if(bytes_read == 0)
			break;
	}
	if((pos - (char *)buffer) != size)
	{
		print_debug(LOG_ERR, "read_part(): fread failed!");
		return NULL;
	}
	
	/* Since our fputs_wrapper uses fputs we need to mark the end of the string.
	Since c-client does not send the length of the body we need to use strlen based stuff.
	*/
	buffer[size] = '\0';
	/* now buffer contains the current part */
	return buffer;
}

/* return the decoded content of a part */
char * read_and_decode_part(body_parser_state *current_state, 
	long part_offset, long part_size, short part_encoding, long *p_decoded_part_size)
{
	char *encoded_part_content;
	char *decoded_part_content;
	long decoded_part_size = 0;
	
	encoded_part_content = read_part(current_state->in_stream, 
		part_offset, part_size);
	if(encoded_part_content == NULL)
		return NULL;
	
	/* now decode the content we've just read */
	switch(part_encoding)
	{
		case ENCBASE64:
			decoded_part_content = rfc822_base64(encoded_part_content, 
				part_size, &decoded_part_size);
			fs_give((void **)&encoded_part_content);
			break;
		case ENCQUOTEDPRINTABLE:
			decoded_part_content = rfc822_qprint(encoded_part_content, 
				part_size, &decoded_part_size);
			fs_give((void **)&encoded_part_content);
			break;
		case ENC7BIT:
			decoded_part_content = encoded_part_content;
			decoded_part_size = part_size;
			break;
		case ENC8BIT:
			decoded_part_content = encoded_part_content;
			decoded_part_size = part_size;
			break;
		case ENCBINARY:
			decoded_part_content = encoded_part_content;
			decoded_part_size = part_size;
			break;
		default:
			print_debug(LOG_ERR, "Called with bad encoding %d", part_encoding);
			
			//free((void **)&decoded_part_content);
			free((void **)&encoded_part_content);
			return NULL;
	}
	
	*p_decoded_part_size = decoded_part_size;
	return decoded_part_content;	
}

/* free memory */
int body_parser_destroy(body_parser_state *current_state)
{
	print_debug(LOG_DEBUG, "body_parser_destroy: Freeing envelope and old body");
	mail_free_envelope(&current_state->envelope);
	mail_free_body(&current_state->body);
	
	current_state->envelope = NULL;
	current_state->body = NULL;

	/* TODO: free attachment handler list */
	print_debug(LOG_DEBUG, "body_parser_destroy: Freeing attachment list");
	free_attachment_rule_list(current_state->attachment_rule);
	
	/* free the HTML disclaimer - c-client block replaced the stack char array in init */
	fs_give((void **)&(current_state->d_state->disclaimer_html));
	
	print_debug(LOG_DEBUG, "body_parser_destroy: body");
	mail_free_body(&current_state->body);
	
	return BODY_PARSER_SUCCESS;
}

void free_attachment_rule_list(attachment_handling_rule * att_list)
{
	attachment_handling_rule *current_handler, *previous_handler;
	
	if(att_list == NULL)
		return;

	current_handler = att_list;
	previous_handler = current_handler;
	while(current_handler != NULL)
	{
		if(current_handler->src != NULL)
			fs_give((void **)&current_handler->src);
		if(current_handler->dst != NULL)
			fs_give((void **)&current_handler->src);
		previous_handler = current_handler;
		current_handler = current_handler->next;
		free(previous_handler);
	}
	att_list = NULL;
}

/* fill in the attachment structure */
int body_parser_fill_attachment_structure(BODY *body, char *pfx, long i, body_parser_state *current_state)
{
	char tmp[MAILTMPLEN];
	char *s = tmp;
	PARAMETER *par;
	PART *part;
	//attachment_handling_rule *att_handler;
	//attachment_handling_rule *cursor;
	
	if(body->type == TYPEMULTIPART)
	{
		/* if not first time, extend prefix */
		if(pfx) 
			sprintf(tmp,"%s%ld.",pfx,++i);
		else
			tmp[0] = '\0';
		for(i = 0, part = body->nested.part; part; part = part->next)
			body_parser_fill_attachment_structure(&part->body,tmp,i++, current_state);
	}
	else 
	{
		/* non-multipart, output oneline descriptor */
		if(!pfx)
			pfx = "";         /* dummy prefix if top level */
		sprintf (s,">>> %s%ld %s",pfx,++i,body_types[body->type]);
		if(body->subtype) 
			sprintf(s += strlen (s),"/%s",body->subtype);
		if(body->description) 
			sprintf(s += strlen (s)," (%s)",body->description);
		if((par = body->parameter)) 
			do
				sprintf(s += strlen (s),";%s=%s",par->attribute,par->value);
			while ((par = par->next));    
		if(body->id)
			sprintf (s += strlen (s),", id = %s",body->id);

		/**** NOT IMPLEMENTED IN CONFIG *********/
		//print_debug(LOG_DEBUG, "body_parser_fill_attachment_structure: %s", tmp);
		
		/* look for the name parameter */
		//if((par = body->parameter)) 
		//	do
		//	{
		//		if(!strcasecmp(par->attribute, "name"))
		//		{
		//			/* fill in the attachment handling structure with all the attachment names */
		//			att_handler = malloc(sizeof(attachment_handling_rule));
		//			if(att_handler == NULL)
		//			{
		//				print_debug(LOG_ERR, "Malloc failed on attachment handling structure.");
		//				return BODY_PARSER_FAILURE;
		//			}
		//			att_handler->src = cpystr(par->value);
		//			att_handler->dst = NULL;
		//			att_handler->next = NULL;
		//							
		//			/* add the new attachment name */
		//			if(current_state->attachment_rule != NULL)
		//			{
		//				for(cursor=current_state->attachment_rule; cursor->next!=NULL;cursor=cursor->next)
		//					;	
		//				cursor->next = att_handler;
		//			}
		//			else
		//				current_state->attachment_rule = att_handler;
		//		}
		//		syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value);
		//	}
		//	while ((par = par->next));
		
		/* encapsulated message? */
		if((body->type == TYPEMESSAGE) && !strcmp (body->subtype,"RFC822") &&
			(body = body->nested.msg->body)) 
		{
			if (body->type == TYPEMULTIPART)
				body_parser_fill_attachment_structure(body,pfx,i-1, current_state);
			else 
			{
				/* build encapsulation prefix */
				sprintf (tmp,"%s%ld.",pfx,i);
				body_parser_fill_attachment_structure(body,tmp,(long) 0, current_state);
			}
		}
	}
	return BODY_PARSER_SUCCESS;
}


/* dummy attachment processor */
void process_attachment_names(attachment_handling_rule * att_rule)
{
	attachment_handling_rule *handler;
	char temp[500];
	
	syslog(LOG_ERR, "Attachment names");
	if((handler = att_rule)) 
	do
	{
		if(handler->src != NULL && strlen(handler->src) < 495)
			sprintf(temp, "%s.%s", handler->src, "dat");
		else
			strcpy(temp, "Unknown.dat");
		
		handler->dst = cpystr(temp);
		syslog(LOG_DEBUG, "Att name: %s=%s",handler->src, handler->dst);
	}
	while ((handler = handler->next));
}


/* c-client callbacks 
We are only using a couple of these (mm_log while parsing) but c-client.a needs to link against these symbols
*/
void mm_searched(MAILSTREAM *stream,unsigned long number)
{
}


void mm_exists(MAILSTREAM *stream,unsigned long number)
{
}


void mm_expunged(MAILSTREAM *stream,unsigned long number)
{
}


void mm_flags(MAILSTREAM *stream,unsigned long number)
{
}


void mm_notify(MAILSTREAM *stream,char *string,long errflg)
{
	mm_log (string,errflg);
}


void mm_list(MAILSTREAM *stream,int delimiter,char *mailbox,long attributes)
{
}

void mm_lsub(MAILSTREAM *stream,int delimiter,char *mailbox,long attributes)
{
}


void mm_status(MAILSTREAM *stream,char *mailbox,MAILSTATUS *status)
{
}

void mm_log(char *string,long errflg)
{
	switch((short) errflg)
	{
		case NIL:
			print_debug(LOG_DEBUG, "body_parser: mm_log: [%s]",string);
			break;
		case PARSE:
		case WARN:
			print_debug(LOG_DEBUG, "body_parser: mm_log: %%%s",string);
			break;
		case ERROR:
			print_debug(LOG_ERR, "body_parser: mm_log: ?%s",string);
			break;
	}
}


void mm_dlog(char *string)
{
	print_debug(LOG_ERR, "body_parser: mm_dlog: %s", string);
}

void mm_login(NETMBX *mb,char *user,char *pwd,long trial)
{
}


void mm_critical(MAILSTREAM *stream)
{
}


void mm_nocritical(MAILSTREAM *stream)
{
}


long mm_diskerror(MAILSTREAM *stream,long errcode,long serious)
{
	kill(getpid (), 9);
	return NIL;
}


void mm_fatal(char *string)
{
	print_debug(LOG_ERR, "body_parser: mm_fatal: ?%s", string);
}

long fputs_wrapper(void *stream, char *string)
{

	if(fputs((char *)string, (FILE *)stream)==EOF)
		return NIL;
	else
		return T;
}



syntax highlighted by Code2HTML, v. 0.9.1