# -*-python-*- # # Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved. # # By using this file, you agree to the terms and conditions set forth in # the LICENSE.html file which can be found at the top level of the ViewVC # distribution or at http://viewvc.org/license-1.html. # # For more information, visit http://viewvc.org/ # # ----------------------------------------------------------------------- # # This file was originally based on portions of the blame.py script by # Curt Hagenlocher. # # ----------------------------------------------------------------------- import string import common class _TokenStream: token_term = string.whitespace + ';' # the algorithm is about the same speed for any CHUNK_SIZE chosen. # grab a good-sized chunk, but not too large to overwhelm memory. # note: we use a multiple of a standard block size CHUNK_SIZE = 192 * 512 # about 100k # CHUNK_SIZE = 5 # for debugging, make the function grind... def __init__(self, file): self.rcsfile = file self.idx = 0 self.buf = self.rcsfile.read(self.CHUNK_SIZE) if self.buf == '': raise RuntimeError, 'EOF' def get(self): "Get the next token from the RCS file." # Note: we can afford to loop within Python, examining individual # characters. For the whitespace and tokens, the number of iterations # is typically quite small. Thus, a simple iterative loop will beat # out more complex solutions. buf = self.buf idx = self.idx while 1: if idx == len(buf): buf = self.rcsfile.read(self.CHUNK_SIZE) if buf == '': # signal EOF by returning None as the token del self.buf # so we fail if get() is called again return None idx = 0 if buf[idx] not in string.whitespace: break idx = idx + 1 if buf[idx] == ';': self.buf = buf self.idx = idx + 1 return ';' if buf[idx] != '@': end = idx + 1 token = '' while 1: # find token characters in the current buffer while end < len(buf) and buf[end] not in self.token_term: end = end + 1 token = token + buf[idx:end] if end < len(buf): # we stopped before the end, so we have a full token idx = end break # we stopped at the end of the buffer, so we may have a partial token buf = self.rcsfile.read(self.CHUNK_SIZE) idx = end = 0 self.buf = buf self.idx = idx return token # a "string" which starts with the "@" character. we'll skip it when we # search for content. idx = idx + 1 chunks = [ ] while 1: if idx == len(buf): idx = 0 buf = self.rcsfile.read(self.CHUNK_SIZE) if buf == '': raise RuntimeError, 'EOF' i = string.find(buf, '@', idx) if i == -1: chunks.append(buf[idx:]) idx = len(buf) continue if i == len(buf) - 1: chunks.append(buf[idx:i]) idx = 0 buf = '@' + self.rcsfile.read(self.CHUNK_SIZE) if buf == '@': raise RuntimeError, 'EOF' continue if buf[i + 1] == '@': chunks.append(buf[idx:i+1]) idx = i + 2 continue chunks.append(buf[idx:i]) self.buf = buf self.idx = i + 1 return string.join(chunks, '') # _get = get # def get(self): token = self._get() print 'T:', `token` return token def match(self, match): "Try to match the next token from the input buffer." token = self.get() if token != match: raise RuntimeError, ('Unexpected parsing error in RCS file.\n' + 'Expected token: %s, but saw: %s' % (match, token)) def unget(self, token): "Put this token back, for the next get() to return." # Override the class' .get method with a function which clears the # overridden method then returns the pushed token. Since this function # will not be looked up via the class mechanism, it should be a "normal" # function, meaning it won't have "self" automatically inserted. # Therefore, we need to pass both self and the token thru via defaults. # note: we don't put this into the input buffer because it may have been # @-unescaped already. def give_it_back(self=self, token=token): del self.get return token self.get = give_it_back def mget(self, count): "Return multiple tokens. 'next' is at the end." result = [ ] for i in range(count): result.append(self.get()) result.reverse() return result class Parser(common._Parser): stream_class = _TokenStream def parse_rcs_admin(self): while 1: # Read initial token at beginning of line token = self.ts.get() # We're done once we reach the description of the RCS tree if token[0] in string.digits: self.ts.unget(token) return if token == "head": semi, rev = self.ts.mget(2) self.sink.set_head_revision(rev) if semi != ';': raise common.RCSExpected(semi, ';') elif token == "branch": semi, branch = self.ts.mget(2) if semi == ';': self.sink.set_principal_branch(branch) else: if branch == ';': self.ts.unget(semi); else: raise common.RCSExpected(semi, ';') elif token == "symbols": while 1: tag = self.ts.get() if tag == ';': break (tag_name, tag_rev) = string.split(tag, ':') self.sink.define_tag(tag_name, tag_rev) elif token == "comment": semi, comment = self.ts.mget(2) self.sink.set_comment(comment) if semi != ';': raise common.RCSExpected(semi, ';') elif token == "expand": semi, expand_mode = self.ts.mget(2) self.sink.set_expansion(expand_mode) if semi != ';': raise RCSExpected(semi, ';') elif token == "locks": while 1: tag = self.ts.get() if tag == ';': break (locker, rev) = string.split(tag,':') self.sink.set_locker(rev, locker) tag = self.ts.get() if tag == "strict": self.sink.set_locking("strict") self.ts.match(';') else: self.ts.unget(tag) elif token == "access": accessors = [] while 1: tag = self.ts.get() if tag == ';': if accessors != []: self.sink.set_access(accessors) break accessors = accessors + [ tag ] # Chew up "newphrase". else: pass # warn("Unexpected RCS token: $token\n") raise RuntimeError, "Unexpected EOF"