#!/usr/bin/python # mlpatch.py: Run with no arguments for usage import sys, os import sgmllib from htmlentitydefs import entitydefs import fileinput from urllib2 import urlopen CHUNKSIZE = 8 * 1024 class MyParser(sgmllib.SGMLParser): def __init__(self): self.baseclass = sgmllib.SGMLParser self.baseclass.__init__(self) self.entitydefs = entitydefs self.entitydefs["nbsp"] = " " self.inbody = False self.complete_line = False self.discard_gathered() def discard_gathered(self): self.gather_data = False self.gathered_data = "" def noop(self): pass def out(self, data): sys.stdout.write(data) def handle_starttag(self, tag, method, attrs): if not self.inbody: return self.baseclass.handle_starttag(self, tag, method, attrs) def handle_endtag(self, tag, method): if not self.inbody: return self.baseclass.handle_endtag(self, tag, method) def handle_data(self, data): if not self.inbody: return data = data.replace('\n','') if len(data) == 0: return if self.gather_data: self.gathered_data += data else: if self.complete_line: if data[0] in ('+', '-', ' ', '#') \ or data.startswith("Index:") \ or data.startswith("@@ ") \ or data.startswith("======"): # Real new line self.out('\n') else: # Presume that we are wrapped self.out(' ') self.complete_line = False self.out(data) def handle_charref(self, ref): if not self.inbody: return self.baseclass.handle_charref(self, ref) def handle_entityref(self, ref): if not self.inbody: return self.baseclass.handle_entityref(self, ref) def handle_comment(self, comment): if comment == ' body="start" ': self.inbody = True elif comment == ' body="end" ': self.inbody = False def handle_decl(self, data): if not self.inbody: return print "DECL: " + data def unknown_starttag(self, tag, attrs): if not self.inbody: return print "UNKTAG: %s %s" % (tag, attrs) def unknown_endtag(self, tag): if not self.inbody: return print "UNKTAG: /%s" % (tag) def do_br(self, attrs): self.complete_line = True def do_p(self, attrs): if self.complete_line: self.out('\n') self.out(' ') self.complete_line = True def start_a(self, attrs): self.gather_data = True def end_a(self): self.out(self.gathered_data.replace('_at_', '@')) self.discard_gathered() def close(self): if self.complete_line: self.out('\n') self.baseclass.close(self) def main(): if len(sys.argv) == 1: sys.stderr.write( "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" + "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" + """ Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives mangle inline patches, and provide no raw message download facility (other than for an entire month's email as an mbox). So, I wrote this script, to demangle them. It's not perfect, as it has to guess about whitespace, but it does an acceptable job.\n""") sys.exit(0) elif len(sys.argv) != 5: sys.stderr.write("error: mlpatch.py: Bad parameters - run with no " + "parameters for usage\n") sys.exit(1) else: list, year, month, msgno = sys.argv[1:] url = "http://svn.haxx.se/" \ + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals() print "MsgUrl: " + url msgfile = urlopen(url) p = MyParser() buffer = msgfile.read(CHUNKSIZE) while buffer: p.feed(buffer) buffer = msgfile.read(CHUNKSIZE) p.close() msgfile.close() if __name__ == '__main__': main()