#!/usr/bin/env python
# $Id: SourceReader.py,v 1.15 2007/04/03 01:57:42 tavis_rudd Exp $
"""SourceReader class for Cheetah's Parser and CodeGenerator
Meta-Data
================================================================================
Author: Tavis Rudd <tavis@damnsimple.com>
License: This software is released for unlimited distribution under the
terms of the MIT license. See the LICENSE file.
Version: $Revision: 1.15 $
Start Date: 2001/09/19
Last Revision Date: $Date: 2007/04/03 01:57:42 $
"""
__author__ = "Tavis Rudd <tavis@damnsimple.com>"
__revision__ = "$Revision: 1.15 $"[11:-2]
import re
import sys
EOLre = re.compile(r'[ \f\t]*(?:\r\n|\r|\n)')
EOLZre = re.compile(r'(?:\r\n|\r|\n|\Z)')
ENCODINGsearch = re.compile("coding[=:]\s*([-\w.]+)").search
class Error(Exception):
pass
class SourceReader:
def __init__(self, src, filename=None, breakPoint=None, encoding=None):
## @@TR 2005-01-17: the following comes from a patch Terrel Shumway
## contributed to add unicode support to the reading of Cheetah source
## files with dynamically compiled templates. All the existing unit
## tests pass but, it needs more testing and some test cases of its
## own. My instinct is to move this up into the code that passes in the
## src string rather than leaving it here. As implemented here it
## forces all src strings to unicode, which IMO is not what we want.
# if encoding is None:
# # peek at the encoding in the first two lines
# m = EOLZre.search(src)
# pos = m.end()
# if pos<len(src):
# m = EOLZre.search(src,pos)
# pos = m.end()
# m = ENCODINGsearch(src,0,pos)
# if m:
# encoding = m.group(1)
# else:
# encoding = sys.getfilesystemencoding()
# self._encoding = encoding
# if type(src) is not unicode:
# src = src.decode(encoding)
## end of Terrel's patch
self._src = src
self._filename = filename
self._srcLen = len(src)
if breakPoint == None:
self._breakPoint = self._srcLen
else:
self.setBreakPoint(breakPoint)
self._pos = 0
self._bookmarks = {}
self._posTobookmarkMap = {}
## collect some meta-information
self._EOLs = []
pos = 0
while pos < len(self):
EOLmatch = EOLZre.search(src, pos)
self._EOLs.append(EOLmatch.start())
pos = EOLmatch.end()
self._BOLs = []
for pos in self._EOLs:
BOLpos = self.findBOL(pos)
self._BOLs.append(BOLpos)
def src(self):
return self._src
def filename(self):
return self._filename
def __len__(self):
return self._breakPoint
def __getitem__(self, i):
self.checkPos(i)
return self._src[i]
def __getslice__(self, i, j):
i = max(i, 0); j = max(j, 0)
return self._src[i:j]
def splitlines(self):
if not hasattr(self, '_srcLines'):
self._srcLines = self._src.splitlines()
return self._srcLines
def lineNum(self, pos=None):
if pos == None:
pos = self._pos
for i in range(len(self._BOLs)):
if pos >= self._BOLs[i] and pos <= self._EOLs[i]:
return i
def getRowCol(self, pos=None):
if pos == None:
pos = self._pos
lineNum = self.lineNum(pos)
BOL, EOL = self._BOLs[lineNum], self._EOLs[lineNum]
return lineNum+1, pos-BOL+1
def getRowColLine(self, pos=None):
if pos == None:
pos = self._pos
row, col = self.getRowCol(pos)
return row, col, self.splitlines()[row-1]
def getLine(self, pos):
if pos == None:
pos = self._pos
lineNum = self.lineNum(pos)
return self.splitlines()[lineNum]
def pos(self):
return self._pos
def setPos(self, pos):
self.checkPos(pos)
self._pos = pos
def validPos(self, pos):
return pos <= self._breakPoint and pos >=0
def checkPos(self, pos):
if not pos <= self._breakPoint:
raise Error("pos (" + str(pos) + ") is invalid: beyond the stream's end (" +
str(self._breakPoint-1) + ")" )
elif not pos >=0:
raise Error("pos (" + str(pos) + ") is invalid: less than 0" )
def breakPoint(self):
return self._breakPoint
def setBreakPoint(self, pos):
if pos > self._srcLen:
raise Error("New breakpoint (" + str(pos) +
") is invalid: beyond the end of stream's source string (" +
str(self._srcLen) + ")" )
elif not pos >= 0:
raise Error("New breakpoint (" + str(pos) + ") is invalid: less than 0" )
self._breakPoint = pos
def setBookmark(self, name):
self._bookmarks[name] = self._pos
self._posTobookmarkMap[self._pos] = name
def hasBookmark(self, name):
return self._bookmarks.has_key(name)
def gotoBookmark(self, name):
if not self.hasBookmark(name):
raise Error("Invalid bookmark (" + name + ") is invalid: does not exist")
pos = self._bookmarks[name]
if not self.validPos(pos):
raise Error("Invalid bookmark (" + name + ', '+
str(pos) + ") is invalid: pos is out of range" )
self._pos = pos
def atEnd(self):
return self._pos >= self._breakPoint
def atStart(self):
return self._pos == 0
def peek(self, offset=0):
self.checkPos(self._pos+offset)
pos = self._pos + offset
return self._src[pos]
def getc(self):
pos = self._pos
if self.validPos(pos+1):
self._pos += 1
return self._src[pos]
def ungetc(self, c=None):
if not self.atStart():
raise Error('Already at beginning of stream')
self._pos -= 1
if not c==None:
self._src[self._pos] = c
def advance(self, offset=1):
self.checkPos(self._pos + offset)
self._pos += offset
def rev(self, offset=1):
self.checkPos(self._pos - offset)
self._pos -= offset
def read(self, offset):
self.checkPos(self._pos + offset)
start = self._pos
self._pos += offset
return self._src[start:self._pos]
def readTo(self, to, start=None):
self.checkPos(to)
if start == None:
start = self._pos
self._pos = to
return self._src[start:to]
def readToEOL(self, start=None, gobble=True):
EOLmatch = EOLZre.search(self.src(), self.pos())
if gobble:
pos = EOLmatch.end()
else:
pos = EOLmatch.start()
return self.readTo(to=pos, start=start)
def find(self, it, pos=None):
if pos == None:
pos = self._pos
return self._src.find(it, pos )
def startswith(self, it, pos=None):
if self.find(it, pos) == self.pos():
return True
else:
return False
def rfind(self, it, pos):
if pos == None:
pos = self._pos
return self._src.rfind(it, pos)
def findBOL(self, pos=None):
if pos == None:
pos = self._pos
src = self.src()
return max(src.rfind('\n',0,pos)+1, src.rfind('\r',0,pos)+1, 0)
def findEOL(self, pos=None, gobble=False):
if pos == None:
pos = self._pos
match = EOLZre.search(self.src(), pos)
if gobble:
return match.end()
else:
return match.start()
def isLineClearToPos(self, pos=None):
if pos == None:
pos = self.pos()
self.checkPos(pos)
src = self.src()
BOL = self.findBOL()
return BOL == pos or src[BOL:pos].isspace()
def matches(self, strOrRE):
if isinstance(strOrRE, (str, unicode)):
return self.startswith(strOrRE, pos=self.pos())
else: # assume an re object
return strOrRE.match(self.src(), self.pos())
def matchWhiteSpace(self, WSchars=' \f\t'):
return (not self.atEnd()) and self.peek() in WSchars
def getWhiteSpace(self, max=None, WSchars=' \f\t'):
if not self.matchWhiteSpace(WSchars):
return ''
start = self.pos()
breakPoint = self.breakPoint()
if max is not None:
breakPoint = min(breakPoint, self.pos()+max)
while self.pos() < breakPoint:
self.advance()
if not self.matchWhiteSpace(WSchars):
break
return self.src()[start:self.pos()]
def matchNonWhiteSpace(self, WSchars=' \f\t\n\r'):
return self.atEnd() or not self.peek() in WSchars
def getNonWhiteSpace(self, WSchars=' \f\t\n\r'):
if not self.matchNonWhiteSpace(WSchars):
return ''
start = self.pos()
while self.pos() < self.breakPoint():
self.advance()
if not self.matchNonWhiteSpace(WSchars):
break
return self.src()[start:self.pos()]
syntax highlighted by Code2HTML, v. 0.9.1