import os, types, stat, re, urllib from xml.parsers import expat import Components from Errors import * def stripWhitespace(node): pos = 1 while pos < len(node): if isinstance(node[pos], (str, unicode)): if not node[pos].strip(): # Only whitespace del node[pos] continue pos = pos + 1 return class BisonGenReader: def __init__(self, stack=None): parser = expat.ParserCreate() parser.StartElementHandler = self.startElement parser.EndElementHandler = self.endElement parser.StartCdataSectionHandler = self.text parser.ProcessingInstructionHandler = self.command parser.CharacterDataHandler = self.text self.parser = parser self.stack = stack or [Components.Parser()] def parse(self, filename): mtime = os.stat(filename)[stat.ST_MTIME] if mtime > self.stack[0].last_modified: self.stack[0].last_modified = mtime self.base = os.path.dirname(filename) self.filename = urllib.pathname2url(os.path.normpath(filename)) try: self.parser.Parse(open(filename).read()) except expat.ExpatError, exc: print 'Exception during parse of %s' % filename raise SystemExit(str(exc)) return self.stack[-1] def command(self, target, data): if target == 'include': filename = os.path.join(self.base, data) reader = BisonGenReader(self.stack) reader.parse(filename) return def text(self, data): if len(self.stack[-1]): # We have an element on the stack existing = self.stack[-1][-1] if isinstance(existing, (str, unicode)): # It is text, add the new data to it self.stack[-1][-1] = existing + data return # No previous text, just add it self.stack[-1].append(data) return def startElement(self, name, attribs): if name != 'fragment': info = (self.parser.ErrorLineNumber, self.filename) node = [(name, attribs, info)] # Make it the 'working' node self.stack.append(node) return def endElement(self, name): if name == 'fragment': # Skip the fragments, just for include wrapping anyway return component = self.stack[-1] del self.stack[-1] stripWhitespace(component) handler = getattr(self, 'handle_' + name.replace('-', '_'), None) if handler: component = handler(component) if not component: return self.stack[-1].append(component) return def handle_bisongen(self, node): (name, attrs, info) = node[0] try: self.stack[0].name = attrs['name'] except: raise MissingAttribute(name, 'name', info) try: self.stack[0].project = attrs['project'] except: raise MissingAttribute(name, 'project', info) return def handle_options(self, node): (name, attrs, info) = node[0] if attrs.has_key('debug'): try: debug = int(attrs['debug']) except: debug = 0 attrs['debug'] = debug self.stack[0].options.update(attrs) return def handle_modules(self, node): for child in node: if isinstance(child, list): (name, attrs, info) = child[0] if name != 'import': # skip any child that are not import continue package = attrs.get('from') module = child[1].strip() self.stack[0].imports.append((package, module)) return def handle_tokens(self, node): for child in node: if isinstance(child, list): (name, attrs, info) = child[0] if name != 'token': continue self.stack[0].tokens.append(child[1].strip()) return def handle_grammar(self, node): rules = [] attrs = node[0][1] start = attrs.get('start') for child in node[1:]: if isinstance(child, list): rules.extend(child) self.stack[0].grammar = Components.Grammar(start, rules) return def handle_production(self, node): prod_num = node[0][1].get('name', '') for child in node: if isinstance(child, list): (name, attrs, info) = child[0] if name == 'non-terminal': lhs = child[1].strip() break else: lhs = '' rules = [ x for x in node if isinstance(x, types.InstanceType) ] for rule in rules: rule.lhs = lhs rule.production = prod_num return rules def handle_rule(self, node): symbols = [] (lineno, filename) = node[0][2] rule = Components.Rule(lineno, filename) for child in node: if isinstance(child, list): (name, attrs, info) = child[0] if name == 'symbol': symbols.append(child[1].strip()) if name == 'code': if not attrs.has_key('language'): raise MissingAttribute(name, 'language', info) language = attrs['language'].lower() if len(child) == 2: code = child[1] else: code = '' rule.action[language] = code rule.rhs = symbols return rule def handle_lexer(self, node): defines = Components.Defines() states = Components.States() states.addState('exclusive', 'INITIAL') patterns = {'INITIAL' : []} # We don't need the information for child in node[1:]: if isinstance(child, types.InstanceType): if isinstance(child, Components.Defines): defines.merge(child) elif isinstance(child, Components.States): states.merge(child) else: # It is a pattern patterns['INITIAL'].append(child) elif isinstance(child, dict): patterns.update(child) # Don't store on the stack, instead directly on the Parser self.stack[0].lexer = Components.Lexer(defines, states, patterns) return _defining_name_patt = re.compile('[a-zA-Z_][\w_-]*') def handle_defines(self, node): defines = Components.Defines() for child in node: if isinstance(child, list): (name, attrs, info) = child[0] if name != 'define': raise UnknownElement('defines', name, info) try: defining = attrs['name'] except: raise MissingAttribute(name, 'name', info) if not self._defining_name_patt.match(defining): raise GeneralError("bad defining name '%s'" % defining, info) defines.addDefine(defining, child[1].strip()) return defines def handle_states(self, node): states = Components.States() for child in node: if isinstance(child, list): (name, attrs, info) = child[0] if states.addState(name, child[1].strip()): raise UnknownElement('states', name, info) return states def handle_scope(self, node): (name, attrs, info) = node[0] # Patterns are constructed already patterns = [ x for x in node[1:] if isinstance(x, types.InstanceType) ] try: state = attrs['state'] except: raise MissingAttribute(name, 'state', info) return {state : patterns} def handle_pattern(self, node): (name, attrs, info) = node[0] try: expression = attrs['expression'] except: raise MissingAttribute(name, 'expression', info) (lineno, filename) = node[0][2] pattern = Components.Pattern(lineno, filename, expression) for child in node: if isinstance(child, list): (name, attrs, info) = child[0] if 0: #name == 'code': if not attrs.has_key('language'): raise MissingAttribute(name, 'language', info) language = attrs['language'].lower() if len(child) == 2: code = child[1] else: code = '' pattern.action[language] = code elif name in ('begin', 'value', 'token'): setattr(pattern, name, child[1].strip()) else: raise UnknownElement('pattern', name, info) return pattern