# -*-python-*-
#
# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewVC
# distribution or at http://viewvc.org/license-1.html.
#
# For more information, visit http://viewvc.org/
#
# -----------------------------------------------------------------------
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------
import string
import common
class _TokenStream:
token_term = string.whitespace + ';'
# the algorithm is about the same speed for any CHUNK_SIZE chosen.
# grab a good-sized chunk, but not too large to overwhelm memory.
# note: we use a multiple of a standard block size
CHUNK_SIZE = 192 * 512 # about 100k
# CHUNK_SIZE = 5 # for debugging, make the function grind...
def __init__(self, file):
self.rcsfile = file
self.idx = 0
self.buf = self.rcsfile.read(self.CHUNK_SIZE)
if self.buf == '':
raise RuntimeError, 'EOF'
def get(self):
"Get the next token from the RCS file."
# Note: we can afford to loop within Python, examining individual
# characters. For the whitespace and tokens, the number of iterations
# is typically quite small. Thus, a simple iterative loop will beat
# out more complex solutions.
buf = self.buf
idx = self.idx
while 1:
if idx == len(buf):
buf = self.rcsfile.read(self.CHUNK_SIZE)
if buf == '':
# signal EOF by returning None as the token
del self.buf # so we fail if get() is called again
return None
idx = 0
if buf[idx] not in string.whitespace:
break
idx = idx + 1
if buf[idx] == ';':
self.buf = buf
self.idx = idx + 1
return ';'
if buf[idx] != '@':
end = idx + 1
token = ''
while 1:
# find token characters in the current buffer
while end < len(buf) and buf[end] not in self.token_term:
end = end + 1
token = token + buf[idx:end]
if end < len(buf):
# we stopped before the end, so we have a full token
idx = end
break
# we stopped at the end of the buffer, so we may have a partial token
buf = self.rcsfile.read(self.CHUNK_SIZE)
idx = end = 0
self.buf = buf
self.idx = idx
return token
# a "string" which starts with the "@" character. we'll skip it when we
# search for content.
idx = idx + 1
chunks = [ ]
while 1:
if idx == len(buf):
idx = 0
buf = self.rcsfile.read(self.CHUNK_SIZE)
if buf == '':
raise RuntimeError, 'EOF'
i = string.find(buf, '@', idx)
if i == -1:
chunks.append(buf[idx:])
idx = len(buf)
continue
if i == len(buf) - 1:
chunks.append(buf[idx:i])
idx = 0
buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
if buf == '@':
raise RuntimeError, 'EOF'
continue
if buf[i + 1] == '@':
chunks.append(buf[idx:i+1])
idx = i + 2
continue
chunks.append(buf[idx:i])
self.buf = buf
self.idx = i + 1
return string.join(chunks, '')
# _get = get
# def get(self):
token = self._get()
print 'T:', `token`
return token
def match(self, match):
"Try to match the next token from the input buffer."
token = self.get()
if token != match:
raise RuntimeError, ('Unexpected parsing error in RCS file.\n' +
'Expected token: %s, but saw: %s' % (match, token))
def unget(self, token):
"Put this token back, for the next get() to return."
# Override the class' .get method with a function which clears the
# overridden method then returns the pushed token. Since this function
# will not be looked up via the class mechanism, it should be a "normal"
# function, meaning it won't have "self" automatically inserted.
# Therefore, we need to pass both self and the token thru via defaults.
# note: we don't put this into the input buffer because it may have been
# @-unescaped already.
def give_it_back(self=self, token=token):
del self.get
return token
self.get = give_it_back
def mget(self, count):
"Return multiple tokens. 'next' is at the end."
result = [ ]
for i in range(count):
result.append(self.get())
result.reverse()
return result
class Parser(common._Parser):
stream_class = _TokenStream
def parse_rcs_admin(self):
while 1:
# Read initial token at beginning of line
token = self.ts.get()
# We're done once we reach the description of the RCS tree
if token[0] in string.digits:
self.ts.unget(token)
return
if token == "head":
semi, rev = self.ts.mget(2)
self.sink.set_head_revision(rev)
if semi != ';':
raise common.RCSExpected(semi, ';')
elif token == "branch":
semi, branch = self.ts.mget(2)
if semi == ';':
self.sink.set_principal_branch(branch)
else:
if branch == ';':
self.ts.unget(semi);
else:
raise common.RCSExpected(semi, ';')
elif token == "symbols":
while 1:
tag = self.ts.get()
if tag == ';':
break
(tag_name, tag_rev) = string.split(tag, ':')
self.sink.define_tag(tag_name, tag_rev)
elif token == "comment":
semi, comment = self.ts.mget(2)
self.sink.set_comment(comment)
if semi != ';':
raise common.RCSExpected(semi, ';')
elif token == "expand":
semi, expand_mode = self.ts.mget(2)
self.sink.set_expansion(expand_mode)
if semi != ';':
raise RCSExpected(semi, ';')
elif token == "locks":
while 1:
tag = self.ts.get()
if tag == ';':
break
(locker, rev) = string.split(tag,':')
self.sink.set_locker(rev, locker)
tag = self.ts.get()
if tag == "strict":
self.sink.set_locking("strict")
self.ts.match(';')
else:
self.ts.unget(tag)
elif token == "access":
accessors = []
while 1:
tag = self.ts.get()
if tag == ';':
if accessors != []:
self.sink.set_access(accessors)
break
accessors = accessors + [ tag ]
# Chew up "newphrase".
else:
pass
# warn("Unexpected RCS token: $token\n")
raise RuntimeError, "Unexpected EOF"
syntax highlighted by Code2HTML, v. 0.9.1