# File: message.py # Purpose: a message instance import string import os import os.path import time import cStringIO import binascii import tempfile import pynei18n import msgeditbox import rfc822 import copy import utils import base64 import mimify import mimetypes import mimetools from pyneheaders import * def mime_decode(headerdict, headers): """ Decode these headers :-( """ for i in headers: if headerdict.has_key(i): headerdict[i] = mimify.mime_decode_header(headerdict[i]) def parse_2_body_and_headers(head_and_body): """ Parse a message header+body to headers (as a dictionary) and body text """ # Headers are terminated by a double newline. endhead = string.find(head_and_body, "\n\n") # Header and upper case version header_text = head_and_body[0:endhead+1] body_text = head_and_body[endhead+2:] # Parse headers to a dictionary header_dict = headers_2_dict( cStringIO.StringIO(header_text) ) return (header_dict, body_text) def headers_2_dict(f): """ Takes an open file object of message headers and parses the headers to a dictionary. """ keys = {} lastkey = None while 1: s = f.readline() if s == "": break s = string.replace(s, "\r", "") s = string.replace(s, "\n", "") s = string.replace(s, "\t", " ") keyname = string.lower( string.split(s, ":")[0] ) # if we found if len(string.split(s, ":")) > 1 and s[:1] != " ": keys[keyname] = string.join(string.split(s,":")[1:], ":") # remove leading space if keys[keyname][:1] == " ": keys[keyname] = keys[keyname][1:] lastkey = keyname elif lastkey != None: keys[lastkey] = keys[lastkey] + s # Evil white space for i in keys.keys(): keys[i] = string.strip(keys[i]) return keys class pynemsg: """ An email or news article """ def __init__(self): self.headers = {} # Format (seconds since epoch) self.date = 0 # to decide when to expire news articles self.date_received = 0 # Message text (inc headers) self.body = "" self.opts = 0 # messages waiting in the outbox will additionally have the # uid of the box they came from in: self.senduid = None # Message parts. This will contain strings of encoded attachments # including the attachment header thingy self.parts_text = [] self.parts_header = [] def make_source(self, presend=0): """ Take a message and it's contents (attachments and such like) and create a single body with all the headers required to post. Set presend=1 before actual posting to include full attachments. """ import pyne # for pyne.ver_string mimenc = mimify.mime_encode_header num_parts = len(self.parts_text) body = "" ############# HEADERS if self.headers.has_key("from"): body = mimenc("From: "+self.headers["from"]+"\n") if self.headers.has_key("reply-to"): body = body + mimenc("Reply-To: "+self.headers["reply-to"]+"\n") if self.headers.has_key("organization"): body = body + mimenc("Organization: "+self.headers["organization"]+"\n") if self.headers.has_key("to"): body = body + mimenc("To: "+self.headers["to"]+"\n") if self.headers.has_key("subject"): body = body + mimenc("Subject: "+self.headers["subject"]+"\n") # date stamp of when last edited body = body + time.strftime("Date: %a, %d %b %Y %H:%M:%S +0000\n", time.localtime(self.date)) if self.headers.has_key("references"): body = body + "References: "+self.headers["references"]+"\n" if self.headers.has_key("newsgroups"): body = body + "X-Newsreader: "+pyne.ver_string+"\n" else: body = body + "X-Mailer: "+pyne.ver_string+"\n" # Content type if num_parts == 1: body = body + "Content-Type: text/plain\n" else: body = body + "Content-Type: multipart/mixed; boundary=\""+multi_part_boundary+"\"\n" body = body + "MIME-Version: 1.0\n" # Confirm delivery if self.headers.has_key("return-receipt-to"): body = body + "Return-Receipt-To: "+self.headers["from"]+"\n" # Optional Cc if self.headers.has_key("cc"): if self.headers["cc"] != "": body = body + mimenc("Cc: "+self.headers["cc"]+"\n") if self.headers.has_key("bcc"): if self.headers["bcc"] != "": body = body + mimenc("Bcc: "+self.headers["bcc"]+"\n") if presend == 0: body = body + "Message-ID: "+self.headers["message-id"]+"\n" # news stuff: if self.headers.has_key("newsgroups"): body = body + "Lines: "+str(len(string.split(self.parts_text[0], "\n")))+"\n" body = body + "Newsgroups: "+self.headers["newsgroups"]+"\n" # simple single part messages may simply have the body grafted # on and that's all. if num_parts == 1: if presend: body = body + "\n" + utils.string_line_wrap (self.parts_text[0], 76) else: body = body + "\n" + self.parts_text[0] self.body = body return else: # It's a multi-part message. # Add the plain text bit first. body = body + "\nThis is a multi-part message in MIME format.\n" body = body + "\n--" + multi_part_boundary + "\n" body = body + "Content-Type: text/plain\n" body = body + "Content-Transfer-Encoding: 8bit\n\n" if presend: body = body + utils.string_line_wrap (self.parts_text[0], 76) + "\n" else: body = body + self.parts_text[0] + "\n" # Then add the other parts for x in range(1, num_parts): body = body + "--" + multi_part_boundary + "\n" body = body + "Content-Type: "+self.parts_header[x]["content-type"]+"\n" body = body + "Content-Transfer-Encoding: "+self.parts_header[x]["content-transfer-encoding"]+"\n" body = body + "Content-Disposition: "+self.parts_header[x]["content-disposition"]+"\n\n" body = body + self.parts_text[x] # And terminate body = body + "--" + multi_part_boundary + "--\n" self.body = body return def edit(self, folder, user, is_new_msg=0): """ Create a message composing window and let it do the hard work. """ # If the message has already been sent then # we want to make a copy of it in the outbox # and edit that. if self.opts & MSG_SENT: outbox = user.get_folder_by_uid("outbox") # Copy message, but change message-id msg = self msg.headers = copy.copy(self.headers) del msg.headers["message-id"] msg.opts = msg.opts & ~(MSG_SENT) # This should give it a message id outbox.save_new_article(msg) outbox.changed = 1 user.update() msgeditbox.msgeditbox(msg, outbox, user, 1) else: msgeditbox.msgeditbox(self, folder, user, is_new_msg) def parseheaders(self, user, headers_only=0): """ The body now contains headers and body text. Extract sender, subject and date and split up if it's multi-part. Pass headers_only=1 if you don't need bodies to be parsed (multipart decoded, etc..) """ # Convert and "\r\n"s to "\n" self.body = string.replace(self.body, "\r\n", "\n") # Wipe the contents. We are going to get that self.parts_text = [] self.parts_header = [] part_header, part_text = parse_2_body_and_headers(self.body) self.parts_text.append(part_text) self.parts_header.append(part_header) self.headers = part_header # Too long and flatfile boxformat craps itself if self.headers.has_key("message-id"): self.headers["message-id"] = self.headers["message-id"][:186] # We *need* some header fields if not self.headers.has_key("subject"): self.headers["subject"] = "" if not self.headers.has_key("from"): self.headers["from"] = "" # Mime decode some headers mime_decode(self.headers, [ "subject", "from", "to", "cc", "bcc", "reply-to", "organization" ]) # Clean up dodgy references if self.headers.has_key("references"): s = self.headers["references"] l = [] while string.find(s, "<") != -1: x = string.find(s, "<") y = string.find(s, ">") l.append(s[x:y+1]) s = s[y+1:] if y == -1: break self.headers["references"] = string.join(l, " ") # Get a nice gmtime format time from the one in the header if self.headers.has_key("date"): self.date = rfc822.parsedate(self.headers["date"]) # rfc822.parsedate is really anal and fails on small # padding errors in the date line. XXX XXX if self.date == None: self.date = int(time.time()) # for mentally retarded mailers, 2 digit years elif self.date[0] < 1000: # date[0] is year year = 2000 + (self.date[0] % 100) try: self.date = int(time.mktime((year,) + self.date[1:])) except OverflowError: # fuck it... self.date = int(time.time ()) else: self.date = int(time.mktime(self.date)) else: # Just set current time... self.date = int(time.time()) # We are parsing a message with headers only if headers_only == 1: self.opts = self.opts | MSG_NO_BODY return # we do have a body >:-( self.opts = self.opts & (~MSG_NO_BODY) # Break multipart messages up i = 0 while i < len(self.parts_text): headers = self.parts_header[i] body = self.parts_text[i] if not headers.has_key("content-type"): # not a multi-part section i = i+1 continue content_type = string.lower(headers["content-type"]) if string.find(content_type, "boundary=") == -1: # not a multi-part section i = i+1 continue else: # get boundary string start_boundary = 9 + string.find(content_type, "boundary=") #end_boundary = start_boundary + string.find(content_type[start_boundary:], "\"") boundary = headers["content-type"][start_boundary:]#end_boundary] if boundary[0] == "\"": boundary = boundary[1:] if boundary[-1] == "\"": boundary = boundary[:-1] # get chunks of body between this boundary offset = b = 0 subtexts = [] subheads = [] while 1: # find start of a boundary b = string.find(body[offset:], "--"+boundary) if b == -1: # no attachment break if b == string.find(body[offset:], "--"+boundary+"--"): # end of attachments break # line after start boundary b = offset + b + len(boundary) + 3 # 3==len("--"+"\n") # stupid messages with no terminating boundary if string.find(body[b:], "--"+boundary) == -1: offset = len(body) else: # end boundary offset = string.find(body[b:], "--"+boundary) + b # append section head, text = parse_2_body_and_headers(body[b:offset]) subheads.append(head) subtexts.append(text) # dump new bits on # remove this part, since it isn't a single part if len(subheads): del self.parts_header[i] del self.parts_text[i] # now add the seperate parts it was composed of for x in range(0, len(subheads)): self.parts_header.insert(i+x, subheads[x]) self.parts_text.insert(i+x, subtexts[x]) else: i = i+1 # Some very strange messages don't have text bodies # If they are binary we really don't want to stick them in a GtkText... if self.parts_header[0].has_key("content-type"): if string.lower(self.parts_header[0]["content-type"])[:4] != "text": self.parts_header.insert(0, {}) self.parts_text.insert(0, "\n") # Check text part 0 for yenc stuff inline # Ignore that shit-arsed subject line yenc stuff not_yenc = "" pos = 0 endpos = 0 bod = self.parts_text[0] found_yenc = 0 while 1: npos = string.find(bod[pos:], "=ybegin") if npos == -1: break if (npos == 0) or (bod[pos+npos-1] == '\n'): pass else: # Not at start of a line. Reject. pos = pos + npos + 1 continue # Multi part. skip next '=ypart' line pos = pos + npos # Add bit before this to not uuenc not_yenc = not_yenc + bod[endpos:pos] endheadpos = pos+string.find(bod[pos:], '\n') hdr = bod[pos:endheadpos] print hdr # find body span endpos = string.find(bod[pos:], "\n=yend") if endpos == -1: break # skip 2nd header line '=ypart' if it exists temp = string.find(bod[pos:], "=ypart") if temp != -1: endheadpos = pos+temp+string.find(bod[pos+temp:], '\n') found_yenc = 1 yenc_part = bod[endheadpos+1:endpos+pos]+"\n" print "CUNT: "+str(hdr) # get headers yenc_head = {} hdrsplit = hdr.split() # ignoring '=ybegin' line last_key = None for h in hdrsplit[1:]: try: head, data = h.split("=", 1) except ValueError: if last_key: yenc_head["yenc_"+last_key] = yenc_head["yenc_"+last_key] + " " + h continue else: print "Strange yenc header: '%s'" % h continue last_key = head yenc_head["yenc_"+head] = data if yenc_head.has_key("yenc_name"): filename = yenc_head["yenc_name"] mimetype = mimetypes.guess_type(filename)[0] else: filename = "noname" mimetype = "application/octet-stream" yenc_head["content-transfer-encoding"] = "yenc" yenc_head["content-type"] = "%s; name=\"%s\"" % (mimetype, filename) self.parts_text.insert(1, yenc_part) self.parts_header.insert(1, yenc_head) pos = endpos if found_yenc: # Set original part 0 to minus uuenc bit self.parts_text[0] = not_yenc # Check text part 0 for uuencoded stuff inline not_uuenc = "" pos = 0 endpos = 0 bod = self.parts_text[0] found_uuenc = 0 while 1: npos = string.find(bod[pos:], "begin") if npos == -1: if found_uuenc == 1: break # None found yet. But maybe it is that retarded # kind with no begin header at all # (part of a huge split up file) x = string.find(bod, "\nM") y = string.find(bod[x+1:], "\nM") z = string.find(bod[x+y+2:], "\nM") #print "dodgy uuenc",x,y,z # find marked end (if there is one) endpos = string.find(bod, "\n`\nend") if endpos == -1: endpos = string.find(bod, "\nend") if endpos == -1: endpos = len(bod) # Three lines in a row starting with 'M', # 61 words long each. Looks like a uuencoded thingy if x != -1 and y == 61 and y == 61: uuenc_part = bod[:endpos].strip()+"\n" mimetype = "application/octet-stream" uuenc_head = { "content-transfer-encoding": "uuencode", "content-type": "%s; name=\"%s\"" % (mimetype, "noname") } self.parts_text.insert(1, uuenc_part) self.parts_header.insert(1, uuenc_head) found_uuenc = 1 break if (npos == 0) or (bod[pos+npos-1] == '\n'): pass else: pos = pos + npos + 1 continue pos = pos + npos # Add bit before this to not uuenc not_uuenc = not_uuenc + bod[endpos:pos] endheadpos = pos+string.find(bod[pos:], '\n') hdr = bod[pos:endheadpos] #print hdr hdrfields = string.split(hdr, " ", 2) # Should be: ("begin", "644 (or some mode)", "filename") if len(hdrfields) == 3 and hdrfields[0] == 'begin': try: int(hdrfields[1], 8) except ValueError: break else: break found_uuenc = 1 #print hdrfields # filename hdrfields[2] = string.strip(hdrfields[2]) # find body span endpos = string.find(bod[pos:], "\n`\nend") if endpos == -1: endpos = string.find(bod[pos:], "\nend") if endpos == -1: endpos = len(bod) else: endpos = endpos + pos uuenc_part = bod[endheadpos+1:endpos].strip()+"\n" mimetype = mimetypes.guess_type(hdrfields[2])[0] if mimetype == None: mimetype = "application/octet-stream" uuenc_head = { "content-transfer-encoding": "uuencode", "content-type": "%s; name=\"%s\"" % (mimetype, hdrfields[2]) } self.parts_text.insert(1, uuenc_part) self.parts_header.insert(1, uuenc_head) pos = endpos if found_uuenc: # Set original part 0 to minus uuenc bit self.parts_text[0] = not_uuenc # We don't like empty first parts if there are text parts after it while len(self.parts_header) >= 2: if string.strip(self.parts_text[0]) == "" and self.parts_header[1].has_key("content-type"): if self.parts_header[1]["content-type"][:4] == "text": del self.parts_text[0] del self.parts_header[0] continue else: break else: break def external_parser(self, user, index): # Parse html bodies to plain text if self.get_attachment_info(index)[0] == "text/html": # Get a temporary filename tempfilename = tempfile.mktemp(".html") # Open it and write the html to it temp = open(tempfilename, "w") temp.write(self.decode_attachment(index)) temp.close() # get output from user's html parsy proggy f = os.popen(user.html_parser+" %s" % tempfilename) parsed = f.read() os.remove(tempfilename) f.close() if parsed == "": # um. failed. maybe the prog doesn't exist parsed = self.parts_text[index] return parsed else: return self.decode_attachment(index) def decode_attachment(self, index): """ Decode attachment. Return as string. Big fucking string. May return None if we fail to decode. Stuff that is unencoded should pass through unchanged. """ content_type, filename, size, content_enc = self.get_attachment_info(index) if content_enc == "base64": f = cStringIO.StringIO(self.parts_text[index]) o = cStringIO.StringIO() utils.line_decoder(f, o, binascii.a2b_base64) o.seek(0) decoded = o.read() f.close() o.close() elif content_enc == "yenc": # A C version would be so much faster i = self.parts_text[index] o = cStringIO.StringIO() # Try for nice fast C yenc decoder # Stick the shit in a tempfile tempfilename = tempfile.mktemp(".yenc") temp = open(tempfilename, "w") temp.write(self.parts_text[index]) temp.close() f = os.popen("yencdec %s" % tempfilename) decoded = f.read() os.remove(tempfilename) f.close() # Damn. Probably yencdec not found. Default to slow python decoder if decoded == "": print "yencdec not found. using slower python decoder" # Iterate through characters # XXX nukes on escape on last char.. fix pos = 0 while pos < len(i): c = i[pos] pos += 1 if c == '\n': continue elif c == '=': # Escape character if pos >= len(i)-1: # End of input. can't grab escaped char break c = i[pos] pos += 1 # I wish they looped like real 8 bitty things c = chr((ord(c)-106)%256) else: # Normal character c = chr((ord(c)-42)%256) o.write(c) o.seek(0) decoded = o.read() elif content_enc == "uuencode": f = cStringIO.StringIO(self.parts_text[index]) o = cStringIO.StringIO() utils.line_decoder(f, o, binascii.a2b_uu) o.seek(0) decoded = o.read() f.close() o.close() elif content_enc == "quoted-printable": f = cStringIO.StringIO(self.parts_text[index]) o = cStringIO.StringIO() try: mimetools.decode(f, o, "quoted-printable") except Exception, e: print "Error decoding attachment: %s" % str(e) return None else: o.seek(0) decoded = o.read() f.close() o.close() else: if not content_enc in ["unknown", "7bit", "8bit"]: print "Unknown content encoding:", content_enc # pass through undecoded # line wrap if the lines are huge though decoded = self.parts_text[index] return decoded def save_attachment(self, index, filename): """ Save attachment parts[index] to file 'filename'. """ f = open(filename, "w") f.write(self.decode_attachment(index)) f.close() def add_attachment(self, filename): """ Add a base64 encoded attachment of file 'filename' onto the message. """ # Read the file to be attached try: f = open(filename, "r") except IOError: return rawfile = f.read() f.close() # get mime type mimetype = mimetypes.guess_type(filename)[0] if mimetype == None: mimetype = "application/octet-stream" # Truncate filenames like '/home/yourname/file.txt' # to 'file.txt' smallname = os.path.basename(filename) part_header = {} part_text = base64.encodestring(rawfile) # Create headers part_header["content-type"] = "%s; name=\"%s\"" % (mimetype, smallname) part_header["content-transfer-encoding"] = "base64" part_header["content-disposition"] = "attachment; filename=\"%s\"" % smallname # Add to message self.parts_text.append(part_text) self.parts_header.append(part_header) def get_attachment_info(self, index): """ Returns list of attachment data in the form: [ content-type, filename, size, content-transfer-encoding ] """ part_header = self.parts_header[index] part_text = self.parts_text[index] # Get content-type if part_header.has_key("content-type"): content_type = part_header["content-type"] x = string.find(content_type, ";") if x != -1: content_type = content_type[:x] else: content_type = "unknown" # Get filename if part_header.has_key("content-id"): filename = os.path.basename( part_header["content-id"] ) elif part_header.has_key("content-disposition"): content_disposition = part_header["content-disposition"] x = string.find(content_disposition, "filename=\"") if x==-1: filename = "noname" else: y = string.find(content_disposition[x+10:], "\"") filename = content_disposition[x+10:x+10+y] elif part_header.has_key("content-type"): content_disposition = part_header["content-type"] x = string.find(content_disposition, "name=\"") if x==-1: filename = "noname" else: y = string.find(content_disposition[x+6:], "\"") filename = content_disposition[x+6:x+6+y] else: filename = "noname" # Get content-transfer-type if part_header.has_key("content-transfer-encoding"): content_transfer_encoding = string.lower(part_header["content-transfer-encoding"]) else: content_transfer_encoding = "unknown" # Ignores if we have been decoded yet length = str(len(part_text)) return [ content_type, filename, length, content_transfer_encoding ]