## $Id: pygmymultifile.py,v 1.5 2001/08/08 10:46:33 kjetilja Exp $

## System modules
import string

Error = 'multifile.Error'

# This is basically the same class as the multifile distributed with
# Python, and is used when parsing multipart mime documents.  The
# difference is that EOF is treated as an EOM delimiter instead of
# throwing an exception.  Such behaviour should probably not be
# expected in a regular multifile environment as premature EOF is bad,
# but is necessary in the case where we have to deal with spam
# attachments and similar bogus stuff from crappy clients disregarding
# boundary rules.

class MultiFile:
	#
	seekable = 0
	#
	def __init__(self, fp, stop, seekable=1):
		self.fp = fp
		self.stop = stop
		self.stack = [] # Grows down
		self.level = 0
		self.last = 0
		if seekable:
			self.seekable = 1
			self.start = self.fp.tell()
			self.posstack = [] # Grows down
	#
	def tell(self):
		if self.level > 0:
			return self.lastpos
		return self.fp.tell() - self.start
	#
	def seek(self, pos, whence=0):
		here = self.tell()
		if whence:
			if whence == 1:
				pos = pos + here
			elif whence == 2:
				if self.level > 0:
					pos = pos + self.lastpos
				else:
					raise Error, "can't use whence=2 yet"
		if not 0 <= pos <= here or \
				self.level > 0 and pos > self.lastpos:
			raise Error, 'bad MultiFile.seek() call'
		self.fp.seek(pos + self.start)
		self.level = 0
		self.last = 0
	#
	def readline(self):
		if self.level > 0:
			return ''
		line = self.fp.readline()
		# If the delimiters lack we may end up here, and it's not pretty
		if (self.stop != 0) and (self.fp.tell() > self.stop):
			#print 'Bogus stuff, yeah, sure, dammit!'
			self.last = 1
			self.lastpos = self.stop
			return ''
		# Real EOF?
		if not line:
			self.level = len(self.stack)
			self.last = (self.level > 0)
			if self.last:
				#print 'Multipart lacks EOM delimiter -- ignoring'
				# Set the last position to the last position in the
				# position stack (should cover our tracks, right?)
				self.lastpos = self.posstack[-1]
			return ''
		assert self.level == 0
		# Fast check to see if this is just data
		if self.is_data(line):
			return line
		else:
			# Ignore trailing whitespace on marker lines 
			k = len(line) - 1;
			while line[k] in string.whitespace:
				k = k - 1
			marker = line[:k+1]
		# No?  OK, try to match a boundary.
		# Return the line (unstripped) if we don't.
		for i in range(len(self.stack)):
			sep = self.stack[i]
			if marker == self.section_divider(sep):
				self.last = 0
				break
			elif marker == self.end_marker(sep):
				self.last = 1
				break
		else:
			return line
		# We only get here if we see a section divider or EOM line
		if self.seekable:
			self.lastpos = self.tell() - len(line)
		self.level = i+1
		if self.level > 1:
			raise Error,'Missing endmarker in MultiFile.readline()'
		return ''
	#
	def readlines(self):
		list = []
		while 1:
			line = self.readline()
			if not line: break
			list.append(line)
		return list
	#
	def read(self): # Note: no size argument -- read until EOF only!
		return string.joinfields(self.readlines(), '')
	#
	def next(self):
		while self.readline(): pass
		if self.level > 1 or self.last:
			return 0
		self.level = 0
		self.last = 0
		if self.seekable:
			self.start = self.fp.tell()
		return 1
	#
	def push(self, sep):
		if self.level > 0:
			raise Error, 'bad MultiFile.push() call'
		self.stack.insert(0, sep)
		if self.seekable:
			self.posstack.insert(0, self.start)
			self.start = self.fp.tell()
	#
	def pop(self):
		if self.stack == []:
			raise Error, 'bad MultiFile.pop() call'
		if self.level <= 1:
			self.last = 0
		else:
			abslastpos = self.lastpos + self.start
		self.level = max(0, self.level - 1)
		del self.stack[0]
		if self.seekable:
			self.start = self.posstack[0]
			del self.posstack[0]
			if self.level > 0:
				self.lastpos = abslastpos - self.start
	#
	def is_data(self, line):
		return line[:2] <> '--'
	#
	def section_divider(self, str):
		return "--" + str
	#
	def end_marker(self, str):
		return "--" + str + "--"


syntax highlighted by Code2HTML, v. 0.9.1