/* * Program: Synonym * File: body_parser.c * Author: Cristian Draghici * Date: 11 Nov 2003 * * $Id: body_parser.c,v 1.3.2.2 2004/01/30 08:12:11 diciu Exp $ * * Licensed under the Modulo Consulting Software License * (see file license.txt) * */ #include #include #include #include #include #include #include #include #include #include #include "body_parser.h" #include "c-client/misc.h" #define print_debug syslog /* recursion level while parsing MIME parts. After this level we stop parsing considering attack. */ #define MIME_RECURSION_DEPTH 10 #define MAX_HOSTNAME_LENGTH 255 unsigned char * read_part(FILE *stream, long offset, long size); long fputs_wrapper(void *stream, char *string); int body_parser_parse(BODY *body, char *pfx, long i, body_parser_state *current_state); int process_current_part(BODY *body, body_parser_state *current_state); char * read_and_decode_part(body_parser_state *current_state, long part_offset, long part_size, short part_encoding, long *p_decoded_part_size); int body_parser_fill_attachment_structure(BODY *body, char *pfx, long i, body_parser_state *current_state); int body_parser_destroy(body_parser_state *current_state); PARAMETER * copy_parameter(BODY *body); void free_attachment_rule_list(attachment_handling_rule * att_list); /* INTERNAL USE FUNCTION. use gethostname to find out the current running host. (used by C-Client when parsing email headers to figure out stuff) */ char *get_server_hostname(char *hostname) { int retval; hostname[0] = '\0'; print_debug(LOG_DEBUG, "get_server_hostname() called."); retval = gethostname(hostname, MAX_HOSTNAME_LENGTH); /* EFAULT can't happen. hostname is allocated on the stack */ assert(retval != EFAULT); if(retval == EINVAL) print_debug(LOG_ERR, "Hostname is too big for our buffer. Since the buffer is #defined as MAX_HOSTNAME_LENGTH (255)in disclaimer.c, change and recompile."); return hostname; } /* call c-client to parse the body * first stage parse * require 3rd parties to alter where needed * */ int body_parser_init(char *header_content, FILE *body_stream, long body_size, body_parser_state *current_state) { /* c-client structures */ ENVELOPE *env; BODY *bdy; STRING body; disclaimer_state * dstate; /* the current host name */ char hostname[MAX_HOSTNAME_LENGTH]; char * decoded_html_disclaimer = NULL; long decoded_html_size; if(body_stream == NULL) return BODY_PARSER_FAILURE; if(header_content == NULL) return BODY_PARSER_FAILURE; /* initialise C-Client string driver */ INIT(&body,file_string,(void *)body_stream, body_size); print_debug(LOG_DEBUG, "body_parser_init: Parsing mail message"); dstate = current_state->d_state; /* decode the HTML disclaimer from base64 */ decoded_html_disclaimer = rfc822_base64(dstate->disclaimer_html, strlen(dstate->disclaimer_html), &decoded_html_size); decoded_html_disclaimer[decoded_html_size+1] = '\0'; /* connot free the existing pointer, it's allocated on the stack in synonym.c bugger!*/ //fs_give((void **)&(dstate->disclaimer_html)); dstate->disclaimer_html = decoded_html_disclaimer; current_state->in_stream = body_stream; /* C-Client says we should use parse_msg and not directly parse_msg_full but that issues compiler warning */ //rfc822_parse_msg(&env, &bdy, header_content, strlen(header_content), &body, get_server_hostname(), scratch_buffer); rfc822_parse_msg_full(&env, &bdy, header_content, strlen(header_content), &body, get_server_hostname(hostname), (long)MIME_RECURSION_DEPTH, (long)0); current_state->envelope = env; current_state->body = bdy; current_state->body_flag = 0; current_state->attachment_rule = NULL; body_parser_fill_attachment_structure(current_state->body, NIL, (long)0, current_state); return BODY_PARSER_SUCCESS; } /* second stage parse */ int body_parser_finalize(body_parser_state *current_state) { print_debug(LOG_DEBUG, "body_parser_finalize: Calling recursive parser"); body_parser_parse(current_state->body, NIL, (long)0, current_state); print_debug(LOG_DEBUG, "body_parser_finalize: Writing new body"); rfc822_output_body (current_state->body, fputs_wrapper, (void *)current_state->out_stream); fclose(current_state->out_stream); print_debug(LOG_DEBUG, "body_parser_finalize: Cleaning up"); body_parser_destroy(current_state); return BODY_PARSER_SUCCESS; } int body_parser_parse(BODY *body, char *pfx, long i, body_parser_state *current_state) { char tmp[MAILTMPLEN]; char *s = tmp; PARAMETER *par; PART *part; int retval = 0; /*char *boundary;*/ if(body->type == TYPEMULTIPART) { /* if not first time, extend prefix */ if(pfx) sprintf(tmp,"%s%ld.",pfx,++i); else tmp[0] = '\0'; /*if((par = body->parameter)) do { if(!strcasecmp(par->attribute, "BOUNDARY")) boundary = cpystr(par->value); syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value); } while ((par = par->next));*/ for(i = 0, part = body->nested.part; part; part = part->next) body_parser_parse(&part->body,tmp,i++, current_state); } else { /* non-multipart, output oneline descriptor */ if(!pfx) pfx = ""; /* dummy prefix if top level */ sprintf (s,">>> %s%ld %s",pfx,++i,body_types[body->type]); if(body->subtype) sprintf(s += strlen (s),"/%s",body->subtype); if(body->description) sprintf(s += strlen (s)," (%s)",body->description); if((par = body->parameter)) do sprintf(s += strlen (s),";%s=%s",par->attribute,par->value); while ((par = par->next)); if(body->id) sprintf (s += strlen (s),", id = %s",body->id); /* determine the part we are looking for and set the offset and size accordingly */ print_debug(LOG_DEBUG, "body_parser_parse: %s", tmp); retval = process_current_part(body, current_state); if(retval == BODY_PARSER_FAILURE) { print_debug(LOG_ERR, "process_current_part returned error."); } /* encapsulated message? */ if((body->type == TYPEMESSAGE) && !strcmp (body->subtype,"RFC822") && (body = body->nested.msg->body)) { if (body->type == TYPEMULTIPART) { print_debug(LOG_DEBUG, "Multipart within Message"); body_parser_parse(body,pfx,i-1, current_state); } else { /* build encapsulation prefix */ sprintf (tmp,"%s%ld.",pfx,i); body_parser_parse(body,tmp,(long) 0, current_state); } } } return BODY_PARSER_SUCCESS; } /* process_current_part is fed each MIME part from the message. * it applies processing and adds the result to the new body of the message */ int process_current_part(BODY *body, body_parser_state *current_state) { unsigned char * part_content; long part_size = 0; char * processed_part_content; long processed_part_size; //PARAMETER *par = NULL; //attachment_handling_rule *handler = NULL; void * old_data; /* rename .xls to .xlsrename, * drop .doc * add text disclaimer to text parts * add html disclaimer to html parts */ if(body->type == TYPETEXT && (!strcasecmp(body->subtype, "plain")) && (current_state->d_state->operation_mode == SUBTYPE_PLAIN || current_state->d_state->operation_mode == SUBTYPE_BOTH) && (current_state->d_state->disclaimer_text_processed == 0)) { print_debug(LOG_DEBUG, "Got text/plain part."); /* decode the current part and apply disclaimer */ part_content = read_and_decode_part(current_state, (body->contents).offset, ((body->contents).text).size, body->encoding, &part_size); /* add text disclaimer */ processed_part_content = add_text_disclaimer(part_content, part_size, &processed_part_size, current_state->d_state); fs_give((void **) &part_content); old_data = body->contents.text.data; fs_give(&old_data); /* replace body content */ print_debug(LOG_DEBUG, "old size: %ld, new size %ld", body->contents.text.size, processed_part_size); /* TODO: WIP: now reencode the part to the encoding defined in the params section */ switch(body->encoding) { case ENCQUOTEDPRINTABLE: print_debug(LOG_DEBUG, "Reconverting: to quoted printable."); old_data = processed_part_content; processed_part_content = rfc822_8bit(processed_part_content, processed_part_size, &processed_part_size); fs_give(&old_data); break; case ENCBASE64: print_debug(LOG_DEBUG, "Reconverting: to base 64."); old_data = processed_part_content; processed_part_content = rfc822_binary(processed_part_content, processed_part_size, &processed_part_size); fs_give(&old_data); break; default: print_debug(LOG_DEBUG, "Reconverting: no conversion required."); break; } body->contents.text.data = processed_part_content; body->contents.text.size = processed_part_size; return BODY_PARSER_SUCCESS; } if(body->type == TYPETEXT && !strcasecmp(body->subtype, "html") && (current_state->d_state->operation_mode == SUBTYPE_HTML || current_state->d_state->operation_mode == SUBTYPE_BOTH) && (current_state->d_state->disclaimer_html_processed == 0)) { print_debug(LOG_DEBUG, "Got text/html part."); /* decode the current part and apply disclaimer */ /* decode the current part and apply disclaimer */ part_content = read_and_decode_part(current_state, (body->contents).offset, ((body->contents).text).size, body->encoding, &part_size); /* add text disclaimer */ processed_part_content = add_html_disclaimer(part_content, part_size, &processed_part_size, current_state->d_state); fs_give((void **) &part_content); old_data = body->contents.text.data; fs_give(&old_data); switch(body->encoding) { case ENCQUOTEDPRINTABLE: print_debug(LOG_DEBUG, "Reconverting: to quoted printable."); old_data = processed_part_content; processed_part_content = rfc822_8bit(processed_part_content, processed_part_size, &processed_part_size); fs_give(&old_data); break; case ENCBASE64: print_debug(LOG_DEBUG, "Reconverting: to base 64."); old_data = processed_part_content; processed_part_content = rfc822_binary(processed_part_content, processed_part_size, &processed_part_size); fs_give(&old_data); break; default: print_debug(LOG_DEBUG, "Reconverting: no conversion required."); break; } body->contents.text.data = processed_part_content; body->contents.text.size = processed_part_size; /* add the result to the resulting body */ return BODY_PARSER_SUCCESS; } else { print_debug(LOG_ERR, "Managing unknown content"); /* check attachment name */ /*if((par = body->parameter)) do { if((handler = current_state->attachment_rule)) do { if(!strcasecmp(par->attribute, "name") && !strcmp(par->value, handler->src)) { if(handler->dst == NULL) { syslog(LOG_DEBUG,"Skipping part."); return BODY_PARSER_SUCCESS; } else { if(par->value != NULL) fs_give((void **)&par->value); par->value=cpystr(handler->dst); syslog(LOG_DEBUG,"Renaming part."); } } } while((handler = handler->next)); syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value); } while ((par = par->next));*/ /* perform copy */ part_content = read_part(current_state->in_stream, (body->contents).offset, ((body->contents).text).size); body->contents.text.data = part_content; //body->contents.text.size = processed_part_size; //part_size = ((body->contents).text).size; /* decode the current part */ //part_content = read_and_decode_part(current_state, // (body->contents).offset, ((body->contents).text).size, // body->encoding, &part_size); //if(part_content == NULL) // return BODY_PARSER_FAILURE; //part->body.parameter = copy_parameter(body); //print_debug(LOG_DEBUG, "Current part is %ld bytes long.", part_size); /* could scan the attachment for viruses */ //request_scan(part_content, part_size); //part->body.contents.text.data = part_content; //part->body.contents.text.size = part_size; //part_content[part_size] = '\0'; //part->next = NULL; /* if none of the rules were hit just copy the part to the resulting message */ return BODY_PARSER_SUCCESS; } return BODY_PARSER_SUCCESS; } PARAMETER * copy_parameter(BODY *body) { PARAMETER * cursor = NULL; PARAMETER * temp_parameter = NULL; PARAMETER * new_parameter = NULL; PARAMETER * par; /*copy the parameters from the old body to the new one */ if((par = body->parameter)) { do { new_parameter = mail_newbody_parameter(); new_parameter->next = NULL; new_parameter->attribute = cpystr(par->attribute); new_parameter->value = cpystr(par->value); if(cursor != NULL) { for(temp_parameter=cursor; temp_parameter->next!=NULL; temp_parameter=temp_parameter->next) ; temp_parameter->next=new_parameter; } else cursor = new_parameter; } while ((par = par->next)); return cursor; } return mail_newbody_parameter(); //return NULL; } /* read a MIME part * TODO: someone needs to fs_give the result!*/ unsigned char * read_part(FILE *stream, long offset, long size) { unsigned char * buffer = NULL; char * pos = NULL; int retval; long bytes_read = 0; buffer = (unsigned char *)fs_get(size + 1); //last char for the string terminator if(buffer == NULL) { print_debug(LOG_ERR, "read_part(): Fatal error(memory allocation): fs_get failed!"); return NULL; } pos = buffer; retval = fseek(stream, offset, SEEK_SET); if(retval == -1) { print_debug(LOG_ERR, "read_part(): fseek() failed!"); return NULL; } while((pos - (char *)buffer) < size) { bytes_read = fread(pos, 1, size, stream); pos += bytes_read; if(bytes_read == 0) break; } if((pos - (char *)buffer) != size) { print_debug(LOG_ERR, "read_part(): fread failed!"); return NULL; } /* Since our fputs_wrapper uses fputs we need to mark the end of the string. Since c-client does not send the length of the body we need to use strlen based stuff. */ buffer[size] = '\0'; /* now buffer contains the current part */ return buffer; } /* return the decoded content of a part */ char * read_and_decode_part(body_parser_state *current_state, long part_offset, long part_size, short part_encoding, long *p_decoded_part_size) { char *encoded_part_content; char *decoded_part_content; long decoded_part_size = 0; encoded_part_content = read_part(current_state->in_stream, part_offset, part_size); if(encoded_part_content == NULL) return NULL; /* now decode the content we've just read */ switch(part_encoding) { case ENCBASE64: decoded_part_content = rfc822_base64(encoded_part_content, part_size, &decoded_part_size); fs_give((void **)&encoded_part_content); break; case ENCQUOTEDPRINTABLE: decoded_part_content = rfc822_qprint(encoded_part_content, part_size, &decoded_part_size); fs_give((void **)&encoded_part_content); break; case ENC7BIT: decoded_part_content = encoded_part_content; decoded_part_size = part_size; break; case ENC8BIT: decoded_part_content = encoded_part_content; decoded_part_size = part_size; break; case ENCBINARY: decoded_part_content = encoded_part_content; decoded_part_size = part_size; break; default: print_debug(LOG_ERR, "Called with bad encoding %d", part_encoding); //free((void **)&decoded_part_content); free((void **)&encoded_part_content); return NULL; } *p_decoded_part_size = decoded_part_size; return decoded_part_content; } /* free memory */ int body_parser_destroy(body_parser_state *current_state) { print_debug(LOG_DEBUG, "body_parser_destroy: Freeing envelope and old body"); mail_free_envelope(¤t_state->envelope); mail_free_body(¤t_state->body); current_state->envelope = NULL; current_state->body = NULL; /* TODO: free attachment handler list */ print_debug(LOG_DEBUG, "body_parser_destroy: Freeing attachment list"); free_attachment_rule_list(current_state->attachment_rule); /* free the HTML disclaimer - c-client block replaced the stack char array in init */ fs_give((void **)&(current_state->d_state->disclaimer_html)); print_debug(LOG_DEBUG, "body_parser_destroy: body"); mail_free_body(¤t_state->body); return BODY_PARSER_SUCCESS; } void free_attachment_rule_list(attachment_handling_rule * att_list) { attachment_handling_rule *current_handler, *previous_handler; if(att_list == NULL) return; current_handler = att_list; previous_handler = current_handler; while(current_handler != NULL) { if(current_handler->src != NULL) fs_give((void **)¤t_handler->src); if(current_handler->dst != NULL) fs_give((void **)¤t_handler->src); previous_handler = current_handler; current_handler = current_handler->next; free(previous_handler); } att_list = NULL; } /* fill in the attachment structure */ int body_parser_fill_attachment_structure(BODY *body, char *pfx, long i, body_parser_state *current_state) { char tmp[MAILTMPLEN]; char *s = tmp; PARAMETER *par; PART *part; //attachment_handling_rule *att_handler; //attachment_handling_rule *cursor; if(body->type == TYPEMULTIPART) { /* if not first time, extend prefix */ if(pfx) sprintf(tmp,"%s%ld.",pfx,++i); else tmp[0] = '\0'; for(i = 0, part = body->nested.part; part; part = part->next) body_parser_fill_attachment_structure(&part->body,tmp,i++, current_state); } else { /* non-multipart, output oneline descriptor */ if(!pfx) pfx = ""; /* dummy prefix if top level */ sprintf (s,">>> %s%ld %s",pfx,++i,body_types[body->type]); if(body->subtype) sprintf(s += strlen (s),"/%s",body->subtype); if(body->description) sprintf(s += strlen (s)," (%s)",body->description); if((par = body->parameter)) do sprintf(s += strlen (s),";%s=%s",par->attribute,par->value); while ((par = par->next)); if(body->id) sprintf (s += strlen (s),", id = %s",body->id); /**** NOT IMPLEMENTED IN CONFIG *********/ //print_debug(LOG_DEBUG, "body_parser_fill_attachment_structure: %s", tmp); /* look for the name parameter */ //if((par = body->parameter)) // do // { // if(!strcasecmp(par->attribute, "name")) // { // /* fill in the attachment handling structure with all the attachment names */ // att_handler = malloc(sizeof(attachment_handling_rule)); // if(att_handler == NULL) // { // print_debug(LOG_ERR, "Malloc failed on attachment handling structure."); // return BODY_PARSER_FAILURE; // } // att_handler->src = cpystr(par->value); // att_handler->dst = NULL; // att_handler->next = NULL; // // /* add the new attachment name */ // if(current_state->attachment_rule != NULL) // { // for(cursor=current_state->attachment_rule; cursor->next!=NULL;cursor=cursor->next) // ; // cursor->next = att_handler; // } // else // current_state->attachment_rule = att_handler; // } // syslog(LOG_DEBUG,"%s=%s", par->attribute, par->value); // } // while ((par = par->next)); /* encapsulated message? */ if((body->type == TYPEMESSAGE) && !strcmp (body->subtype,"RFC822") && (body = body->nested.msg->body)) { if (body->type == TYPEMULTIPART) body_parser_fill_attachment_structure(body,pfx,i-1, current_state); else { /* build encapsulation prefix */ sprintf (tmp,"%s%ld.",pfx,i); body_parser_fill_attachment_structure(body,tmp,(long) 0, current_state); } } } return BODY_PARSER_SUCCESS; } /* dummy attachment processor */ void process_attachment_names(attachment_handling_rule * att_rule) { attachment_handling_rule *handler; char temp[500]; syslog(LOG_ERR, "Attachment names"); if((handler = att_rule)) do { if(handler->src != NULL && strlen(handler->src) < 495) sprintf(temp, "%s.%s", handler->src, "dat"); else strcpy(temp, "Unknown.dat"); handler->dst = cpystr(temp); syslog(LOG_DEBUG, "Att name: %s=%s",handler->src, handler->dst); } while ((handler = handler->next)); } /* c-client callbacks We are only using a couple of these (mm_log while parsing) but c-client.a needs to link against these symbols */ void mm_searched(MAILSTREAM *stream,unsigned long number) { } void mm_exists(MAILSTREAM *stream,unsigned long number) { } void mm_expunged(MAILSTREAM *stream,unsigned long number) { } void mm_flags(MAILSTREAM *stream,unsigned long number) { } void mm_notify(MAILSTREAM *stream,char *string,long errflg) { mm_log (string,errflg); } void mm_list(MAILSTREAM *stream,int delimiter,char *mailbox,long attributes) { } void mm_lsub(MAILSTREAM *stream,int delimiter,char *mailbox,long attributes) { } void mm_status(MAILSTREAM *stream,char *mailbox,MAILSTATUS *status) { } void mm_log(char *string,long errflg) { switch((short) errflg) { case NIL: print_debug(LOG_DEBUG, "body_parser: mm_log: [%s]",string); break; case PARSE: case WARN: print_debug(LOG_DEBUG, "body_parser: mm_log: %%%s",string); break; case ERROR: print_debug(LOG_ERR, "body_parser: mm_log: ?%s",string); break; } } void mm_dlog(char *string) { print_debug(LOG_ERR, "body_parser: mm_dlog: %s", string); } void mm_login(NETMBX *mb,char *user,char *pwd,long trial) { } void mm_critical(MAILSTREAM *stream) { } void mm_nocritical(MAILSTREAM *stream) { } long mm_diskerror(MAILSTREAM *stream,long errcode,long serious) { kill(getpid (), 9); return NIL; } void mm_fatal(char *string) { print_debug(LOG_ERR, "body_parser: mm_fatal: ?%s", string); } long fputs_wrapper(void *stream, char *string) { if(fputs((char *)string, (FILE *)stream)==EOF) return NIL; else return T; }