ports//news/pyne/work/pyne-1.1.0/src/pynemsg.py

# File: message.py
# Purpose: a message instance

import string
import os
import os.path
import time
import cStringIO
import binascii
import tempfile
import pynei18n
import msgeditbox
import rfc822
import copy
import utils
import base64
import mimify
import mimetypes
import mimetools
from pyneheaders import *

def mime_decode(headerdict, headers):
	"""
	Decode these headers :-(
	"""
	for i in headers:
		if headerdict.has_key(i):
			headerdict[i] = mimify.mime_decode_header(headerdict[i])

def parse_2_body_and_headers(head_and_body):
	"""
	Parse a message header+body to headers (as a
	dictionary) and body text
	"""
	# Headers are terminated by a double newline.
	endhead = string.find(head_and_body, "\n\n")
	
	# Header and upper case version
	header_text = head_and_body[0:endhead+1]
	body_text   = head_and_body[endhead+2:]
	# Parse headers to a dictionary 
	header_dict = headers_2_dict( cStringIO.StringIO(header_text) )

	return (header_dict, body_text)

def headers_2_dict(f):
	"""
	Takes an open file object of message headers and parses the
	headers to a dictionary.
	"""
	keys = {}
	lastkey = None

	while 1:
		s = f.readline()

		if s == "":
			break

		s = string.replace(s, "\r", "")
		s = string.replace(s, "\n", "")
		s = string.replace(s, "\t", " ")

		keyname = string.lower( string.split(s, ":")[0] )

		# if we found
		if len(string.split(s, ":")) > 1 and s[:1] != " ":
			keys[keyname] = string.join(string.split(s,":")[1:], ":")
			# remove leading space
			if keys[keyname][:1] == " ":
				keys[keyname] = keys[keyname][1:]
			lastkey = keyname
		elif lastkey != None:
			keys[lastkey] = keys[lastkey] + s
	# Evil white space
	for i in keys.keys():
		keys[i] = string.strip(keys[i])
	return keys

class pynemsg:
	"""
	An email or news article
	"""
	def __init__(self):
		self.headers = {}
		# Format (seconds since epoch)
		self.date = 0
		# to decide when to expire news articles
		self.date_received = 0
		# Message text (inc headers)
		self.body = ""
		self.opts = 0
		# messages waiting in the outbox will additionally have the
		# uid of the box they came from in:
		self.senduid = None
		# Message parts. This will contain strings of encoded attachments
		# including the attachment header thingy
		self.parts_text = []
		self.parts_header = []

	def make_source(self, presend=0):
		"""
		Take a message and it's contents (attachments and such like)
		and create a single body with all the headers required to
		post.
		Set presend=1 before actual posting to include full attachments.
		"""
		import pyne # for pyne.ver_string

		mimenc = mimify.mime_encode_header

		num_parts = len(self.parts_text)
		body = ""
		############# HEADERS
		if self.headers.has_key("from"):
			body =        mimenc("From: "+self.headers["from"]+"\n")
		if self.headers.has_key("reply-to"):
			body = body + mimenc("Reply-To: "+self.headers["reply-to"]+"\n")
		if self.headers.has_key("organization"):
			body = body + mimenc("Organization: "+self.headers["organization"]+"\n")
		if self.headers.has_key("to"):
			body = body + mimenc("To: "+self.headers["to"]+"\n")
		if self.headers.has_key("subject"):
			body = body + mimenc("Subject: "+self.headers["subject"]+"\n")
		# date stamp of when last edited
		body = body + time.strftime("Date: %a, %d %b %Y %H:%M:%S +0000\n", time.localtime(self.date))
		if self.headers.has_key("references"):
			body = body + "References: "+self.headers["references"]+"\n"
		if self.headers.has_key("newsgroups"):
			body = body + "X-Newsreader: "+pyne.ver_string+"\n"
		else:
			body = body + "X-Mailer: "+pyne.ver_string+"\n"
		# Content type
		if num_parts == 1:
			body = body + "Content-Type: text/plain\n"
		else:
			body = body + "Content-Type: multipart/mixed; boundary=\""+multi_part_boundary+"\"\n"
		body = body + "MIME-Version: 1.0\n"
		# Confirm delivery
		if self.headers.has_key("return-receipt-to"):
			body = body + "Return-Receipt-To: "+self.headers["from"]+"\n"
		# Optional Cc
		if self.headers.has_key("cc"):
			if self.headers["cc"] != "":
				body = body + mimenc("Cc: "+self.headers["cc"]+"\n")
		if self.headers.has_key("bcc"):
			if self.headers["bcc"] != "":
				body = body + mimenc("Bcc: "+self.headers["bcc"]+"\n")
		if presend == 0:
			body = body + "Message-ID: "+self.headers["message-id"]+"\n"
		# news stuff:
		if self.headers.has_key("newsgroups"):
			body = body + "Lines: "+str(len(string.split(self.parts_text[0], "\n")))+"\n"
			body = body + "Newsgroups: "+self.headers["newsgroups"]+"\n"

		# simple single part messages may simply have the body grafted
		# on and that's all.
		if num_parts == 1:
			if presend:
				body = body + "\n" + utils.string_line_wrap (self.parts_text[0], 76)
			else:
				body = body + "\n" + self.parts_text[0]
			self.body = body
			return
		else:
			# It's a multi-part message.
			# Add the plain text bit first.
			body = body + "\nThis is a multi-part message in MIME format.\n"
			body = body + "\n--" + multi_part_boundary + "\n"
			body = body + "Content-Type: text/plain\n"
			body = body + "Content-Transfer-Encoding: 8bit\n\n"
			if presend:
				body = body + utils.string_line_wrap (self.parts_text[0], 76) + "\n"
			else:
				body = body + self.parts_text[0] + "\n"

			# Then add the other parts
			for x in range(1, num_parts):
				body = body + "--" + multi_part_boundary + "\n"
				body = body + "Content-Type: "+self.parts_header[x]["content-type"]+"\n"
				body = body + "Content-Transfer-Encoding: "+self.parts_header[x]["content-transfer-encoding"]+"\n"
				body = body + "Content-Disposition: "+self.parts_header[x]["content-disposition"]+"\n\n"
				body = body + self.parts_text[x]

			# And terminate
			body = body + "--" + multi_part_boundary + "--\n"

			self.body = body
		return

	def edit(self, folder, user, is_new_msg=0):
		"""
		Create a message composing window and let it do the hard work.
		"""
		# If the message has already been sent then
		# we want to make a copy of it in the outbox
		# and edit that.
		if self.opts & MSG_SENT:
			outbox = user.get_folder_by_uid("outbox")
			# Copy message, but change message-id
			msg = self
			msg.headers = copy.copy(self.headers)
			del msg.headers["message-id"]
			msg.opts = msg.opts & ~(MSG_SENT)
			# This should give it a message id
			outbox.save_new_article(msg)
			outbox.changed = 1

			user.update()
			msgeditbox.msgeditbox(msg, outbox, user, 1)
		else:
			msgeditbox.msgeditbox(self, folder, user, is_new_msg)

	def parseheaders(self, user, headers_only=0):
		"""
		The body now contains headers and body text.
		Extract sender, subject and date and split up
		if it's multi-part.
		Pass headers_only=1 if you don't need bodies to be
		parsed (multipart decoded, etc..)
		"""
		# Convert and "\r\n"s to "\n"
		self.body = string.replace(self.body, "\r\n", "\n")

		# Wipe the contents. We are going to get that
		self.parts_text = []
		self.parts_header = []

		part_header, part_text = parse_2_body_and_headers(self.body)
		self.parts_text.append(part_text)
		self.parts_header.append(part_header)
		self.headers = part_header
		
		# Too long and flatfile boxformat craps itself
		if self.headers.has_key("message-id"):
			self.headers["message-id"] = self.headers["message-id"][:186]

		# We *need* some header fields
		if not self.headers.has_key("subject"):
			self.headers["subject"] = ""
		if not self.headers.has_key("from"):
			self.headers["from"] = ""

		# Mime decode some headers
		mime_decode(self.headers, [ "subject", "from", "to", "cc", "bcc", "reply-to", "organization" ])

		# Clean up dodgy references
		if self.headers.has_key("references"):
			s = self.headers["references"]
			l = []
			while string.find(s, "<") != -1:
				x = string.find(s, "<")
				y = string.find(s, ">")
				l.append(s[x:y+1])
				s = s[y+1:]
				if y == -1:
					break
			self.headers["references"] = string.join(l, " ")
	
		# Get a nice gmtime format time from the one in the header
		if self.headers.has_key("date"):
			self.date = rfc822.parsedate(self.headers["date"])
			# rfc822.parsedate is really anal and fails on small
			# padding errors in the date line. XXX XXX
			if self.date == None:
				self.date = int(time.time())
			# for mentally retarded mailers, 2 digit years
			elif self.date[0] < 1000: # date[0] is year
				year = 2000 + (self.date[0] % 100)
				try:
					self.date = int(time.mktime((year,) + self.date[1:]))
				except OverflowError:
					# fuck it...
					self.date = int(time.time ())
			else:
				self.date = int(time.mktime(self.date))
		else:
			# Just set current time...
			self.date = int(time.time())

		# We are parsing a message with headers only
		if headers_only == 1:
			self.opts = self.opts | MSG_NO_BODY
			return
		# we do have a body >:-(
		self.opts = self.opts & (~MSG_NO_BODY)
	
		# Break multipart messages up
		i = 0
		while i < len(self.parts_text):
			headers = self.parts_header[i]
			body = self.parts_text[i]
			if not headers.has_key("content-type"):
				# not a multi-part section	
				i = i+1
				continue
			content_type = string.lower(headers["content-type"])
			if string.find(content_type, "boundary=") == -1:
				# not a multi-part section
				i = i+1
				continue
			else:
				# get boundary string
				start_boundary = 9 + string.find(content_type, "boundary=")
				#end_boundary = start_boundary + string.find(content_type[start_boundary:], "\"")
				boundary = headers["content-type"][start_boundary:]#end_boundary]
				if boundary[0] == "\"":
					boundary = boundary[1:]
				if boundary[-1] == "\"":
					boundary = boundary[:-1]
				# get chunks of body between this boundary
				offset = b = 0
				subtexts = []
				subheads = []
				while 1:
					# find start of a boundary
					b = string.find(body[offset:], "--"+boundary)
					if b == -1:
						# no attachment
						break
					if b == string.find(body[offset:], "--"+boundary+"--"):
						# end of attachments
						break
					# line after start boundary
					b = offset + b + len(boundary) + 3 # 3==len("--"+"\n")
					# stupid messages with no terminating boundary
					if string.find(body[b:], "--"+boundary) == -1:
						offset = len(body)
					else:
						# end boundary
						offset = string.find(body[b:], "--"+boundary) + b
					# append section
					head, text = parse_2_body_and_headers(body[b:offset])
					subheads.append(head)
					subtexts.append(text)
				# dump new bits on
				# remove this part, since it isn't a single part
				if len(subheads):
					del self.parts_header[i]
					del self.parts_text[i]
					# now add the seperate parts it was composed of
					for x in range(0, len(subheads)):
						self.parts_header.insert(i+x, subheads[x])
						self.parts_text.insert(i+x, subtexts[x])
				else:
					i = i+1
		# Some very strange messages don't have text bodies
		# If they are binary we really don't want to stick them in a GtkText...
		if self.parts_header[0].has_key("content-type"):
			if string.lower(self.parts_header[0]["content-type"])[:4] != "text":
				self.parts_header.insert(0, {})
				self.parts_text.insert(0, "\n")
		
		# Check text part 0 for yenc stuff inline
		# Ignore that shit-arsed subject line yenc stuff
		not_yenc = ""
		pos = 0
		endpos = 0
		bod = self.parts_text[0]
		found_yenc = 0
		while 1:
			npos = string.find(bod[pos:], "=ybegin")
			if npos == -1:
				break
			if (npos == 0) or (bod[pos+npos-1] == '\n'):
				pass
			else:
				# Not at start of a line. Reject.
				pos = pos + npos + 1
				continue
			# Multi part. skip next '=ypart' line
			pos = pos + npos

			# Add bit before this to not uuenc
			not_yenc = not_yenc + bod[endpos:pos]

			endheadpos = pos+string.find(bod[pos:], '\n')
			hdr = bod[pos:endheadpos]
			print hdr
			
			# find body span
			endpos = string.find(bod[pos:], "\n=yend")
			if endpos == -1:
				break
			
			# skip 2nd header line '=ypart' if it exists
			temp = string.find(bod[pos:], "=ypart")
			if temp != -1:
				endheadpos = pos+temp+string.find(bod[pos+temp:], '\n')
			
			found_yenc = 1
			yenc_part = bod[endheadpos+1:endpos+pos]+"\n"
			print "CUNT: "+str(hdr)
			# get headers
			yenc_head = {}
			hdrsplit = hdr.split()

			# ignoring '=ybegin' line
			last_key = None
			for h in hdrsplit[1:]:
				try:
					head, data = h.split("=", 1)
				except ValueError:
					if last_key:
						yenc_head["yenc_"+last_key] = yenc_head["yenc_"+last_key] + " " + h
						continue
					else:
						print "Strange yenc header: '%s'" % h
						continue
				last_key = head
				yenc_head["yenc_"+head] = data

			if yenc_head.has_key("yenc_name"):
				filename = yenc_head["yenc_name"]
				mimetype = mimetypes.guess_type(filename)[0]
			else:
				filename = "noname"
				mimetype = "application/octet-stream"
				
			yenc_head["content-transfer-encoding"] = "yenc"
			yenc_head["content-type"] = "%s; name=\"%s\"" % (mimetype, filename)
			self.parts_text.insert(1, yenc_part)
			self.parts_header.insert(1, yenc_head)

			pos = endpos
		if found_yenc:
			# Set original part 0 to minus uuenc bit
			self.parts_text[0] = not_yenc

		# Check text part 0 for uuencoded stuff inline
		not_uuenc = ""
		pos = 0
		endpos = 0
		bod = self.parts_text[0]
		found_uuenc = 0
		while 1:
			npos = string.find(bod[pos:], "begin")
			if npos == -1:
				if found_uuenc == 1:
					break
				# None found yet. But maybe it is that retarded
				# kind with no begin header at all
				# (part of a huge split up file)
				x = string.find(bod, "\nM")
				y = string.find(bod[x+1:], "\nM")
				z = string.find(bod[x+y+2:], "\nM")
				#print "dodgy uuenc",x,y,z
				# find marked end (if there is one)
				endpos = string.find(bod, "\n`\nend")
				if endpos == -1:
					endpos = string.find(bod, "\nend")
				if endpos == -1:
					endpos = len(bod)
				# Three lines in a row starting with 'M',
				# 61 words long each. Looks like a uuencoded thingy
				if x != -1 and y == 61 and y == 61:
					uuenc_part = bod[:endpos].strip()+"\n"
			
					mimetype = "application/octet-stream"
					uuenc_head = { "content-transfer-encoding": "uuencode",
					               "content-type": "%s; name=\"%s\"" % (mimetype, "noname") }
					self.parts_text.insert(1, uuenc_part)
					self.parts_header.insert(1, uuenc_head)
					found_uuenc = 1
				break
			if (npos == 0) or (bod[pos+npos-1] == '\n'):
				pass
			else:
				pos = pos + npos + 1
				continue
			pos = pos + npos

			# Add bit before this to not uuenc
			not_uuenc = not_uuenc + bod[endpos:pos]

			endheadpos = pos+string.find(bod[pos:], '\n')
			hdr = bod[pos:endheadpos]
			#print hdr
			hdrfields = string.split(hdr, " ", 2)
			# Should be: ("begin", "644 (or some mode)", "filename")
			if len(hdrfields) == 3 and hdrfields[0] == 'begin':
				try:
					int(hdrfields[1], 8)
				except ValueError:
					break
			else:
				break
			found_uuenc = 1
			#print hdrfields
			# filename
			hdrfields[2] = string.strip(hdrfields[2])
		
			# find body span
			endpos = string.find(bod[pos:], "\n`\nend")
			if endpos == -1:
				endpos = string.find(bod[pos:], "\nend")
			if endpos == -1:
				endpos = len(bod)
			else:
				endpos = endpos + pos
			uuenc_part = bod[endheadpos+1:endpos].strip()+"\n"

			mimetype = mimetypes.guess_type(hdrfields[2])[0]
			if mimetype == None:
				mimetype = "application/octet-stream"
			uuenc_head = { "content-transfer-encoding": "uuencode",
			               "content-type": "%s; name=\"%s\"" % (mimetype, hdrfields[2]) }
			self.parts_text.insert(1, uuenc_part)
			self.parts_header.insert(1, uuenc_head)

			pos = endpos
		if found_uuenc:
			# Set original part 0 to minus uuenc bit
			self.parts_text[0] = not_uuenc

		# We don't like empty first parts if there are text parts after it
		while len(self.parts_header) >= 2:
			if string.strip(self.parts_text[0]) == "" and self.parts_header[1].has_key("content-type"):
				if self.parts_header[1]["content-type"][:4] == "text":
					del self.parts_text[0]
					del self.parts_header[0]
					continue
				else:
					break
			else:
				break
	
	def external_parser(self, user, index):
		# Parse html bodies to plain text
		if self.get_attachment_info(index)[0] == "text/html":
			# Get a temporary filename
			tempfilename = tempfile.mktemp(".html")
			# Open it and write the html to it
			temp = open(tempfilename, "w")
			temp.write(self.decode_attachment(index))
			temp.close()
			# get output from user's html parsy proggy
			f = os.popen(user.html_parser+" %s" % tempfilename)
			parsed = f.read()
			os.remove(tempfilename)
			f.close()
			if parsed == "":
				# um. failed. maybe the prog doesn't exist
				parsed = self.parts_text[index]
			return parsed
		else:
			return self.decode_attachment(index)

	def decode_attachment(self, index):
		"""
		Decode attachment. Return as string. Big fucking string.
		May return None if we fail to decode.
		Stuff that is unencoded should pass through unchanged.
		"""
		content_type, filename, size, content_enc = self.get_attachment_info(index)
		
		if content_enc == "base64":
			f = cStringIO.StringIO(self.parts_text[index])
			o = cStringIO.StringIO()
			utils.line_decoder(f, o, binascii.a2b_base64)
			o.seek(0)
			decoded = o.read()
			f.close()
			o.close()

		elif content_enc == "yenc":
			# A C version would be so much faster
			i = self.parts_text[index]
			o = cStringIO.StringIO()
			
			# Try for nice fast C yenc decoder
			# Stick the shit in a tempfile
			tempfilename = tempfile.mktemp(".yenc")
			temp = open(tempfilename, "w")
			temp.write(self.parts_text[index])
			temp.close()
			
			f = os.popen("yencdec %s" % tempfilename)
			decoded = f.read()
			os.remove(tempfilename)
			f.close()

			# Damn. Probably yencdec not found. Default to slow python decoder
			if decoded == "":
				print "yencdec not found. using slower python decoder"
				# Iterate through characters
				# XXX nukes on escape on last char.. fix 
				pos = 0
				while pos < len(i):
					c = i[pos]
					pos += 1

					if c == '\n':
						continue
					elif c == '=':
						# Escape character
						if pos >= len(i)-1:
							# End of input. can't grab escaped char
							break
						c = i[pos]
						pos += 1
						# I wish they looped like real 8 bitty things
						c = chr((ord(c)-106)%256)
					else:
						# Normal character
						c = chr((ord(c)-42)%256)
					o.write(c)
				o.seek(0)
				decoded = o.read()

		elif content_enc == "uuencode":
			f = cStringIO.StringIO(self.parts_text[index])
			o = cStringIO.StringIO()
			utils.line_decoder(f, o, binascii.a2b_uu)
			o.seek(0)
			decoded = o.read()
			f.close()
			o.close()

		elif content_enc == "quoted-printable":
			f = cStringIO.StringIO(self.parts_text[index])
			o = cStringIO.StringIO()
			try:
				mimetools.decode(f, o, "quoted-printable")
			except Exception, e:
				print "Error decoding attachment: %s" % str(e)
				return None
			else:
				o.seek(0)
				decoded = o.read()
			f.close()
			o.close()
		else:
			if not content_enc in ["unknown", "7bit", "8bit"]:
				print "Unknown content encoding:", content_enc
			# pass through undecoded
			# line wrap if the lines are huge though
			decoded = self.parts_text[index]
		
		return decoded

	def save_attachment(self, index, filename):
		"""
		Save attachment parts[index] to file 'filename'.
		"""
		f = open(filename, "w")
		f.write(self.decode_attachment(index))
		f.close()

	def add_attachment(self, filename):
		"""
		Add a base64 encoded attachment of file 'filename'
		onto the message.
		"""
		# Read the file to be attached
		try:
			f = open(filename, "r")
		except IOError:
			return
		rawfile = f.read()
		f.close()

		# get mime type
		mimetype = mimetypes.guess_type(filename)[0]

		if mimetype == None:
			mimetype = "application/octet-stream"

		# Truncate filenames like '/home/yourname/file.txt'
		# to 'file.txt'
		smallname = os.path.basename(filename)

		part_header = {}
		part_text = base64.encodestring(rawfile)
		# Create headers
		part_header["content-type"] = "%s; name=\"%s\"" % (mimetype, smallname)
		part_header["content-transfer-encoding"] = "base64"
		part_header["content-disposition"] = "attachment; filename=\"%s\"" % smallname

		# Add to message
		self.parts_text.append(part_text)
		self.parts_header.append(part_header)

	def get_attachment_info(self, index):
		"""
		Returns list of attachment data in the form:
		[ content-type, filename, size, content-transfer-encoding ]
		"""
		part_header = self.parts_header[index]
		part_text = self.parts_text[index]
		# Get content-type
		if part_header.has_key("content-type"):
			content_type = part_header["content-type"]
			x = string.find(content_type, ";")
			if x != -1:
				content_type = content_type[:x]
		else:
			content_type = "unknown"
		# Get filename
		if part_header.has_key("content-id"):
			filename = os.path.basename( part_header["content-id"] )
		elif part_header.has_key("content-disposition"):
			content_disposition = part_header["content-disposition"]
			x = string.find(content_disposition, "filename=\"")
			if x==-1:
				filename = "noname"
			else:
				y = string.find(content_disposition[x+10:], "\"")
				filename = content_disposition[x+10:x+10+y]
		elif part_header.has_key("content-type"):
			content_disposition = part_header["content-type"]
			x = string.find(content_disposition, "name=\"")
			if x==-1:
				filename = "noname"
			else:
				y = string.find(content_disposition[x+6:], "\"")
				filename = content_disposition[x+6:x+6+y]
		else:
			filename = "noname"
		# Get content-transfer-type
		if part_header.has_key("content-transfer-encoding"):
			content_transfer_encoding = string.lower(part_header["content-transfer-encoding"])
		else:
			content_transfer_encoding = "unknown"
		# Ignores if we have been decoded yet
		length = str(len(part_text))
	
		return [ content_type, filename, length, content_transfer_encoding ]
syntax highlighted by Code2HTML, v. 0.9.1