import sys, os import Warshall import SymbolTable import State DEBUG = 0 # From bison-1.28 class Bison: def __init__(self, options=None): self.program_name = sys.argv[0] self.set_options(options or {}) return def set_options(self, options): self.options = {'verbose' : 0, # -v, --verbose 'debug' : 0, # -t, --debug 'stats' : 0, 'input' : '', } self.options.update(options) return # main.c def generate(self, parser): self.failure = 0 self.openfiles(parser) # Convert the raw grammar into useful tables self.reader(parser.tokens, parser.grammar) # Find useless nonterminals and productions and reduce the grammar. self.reduce_grammar() # Record other info about the grammar self.set_derives() self.set_nullable() # Convert to nondeterministic state machine. self.generate_states() # Make it deterministic self.lalr() # Find and record any conflicts: # Places where one token of lookahead is not enough to disambiguate # the parsing. Also resolve any s/r conflicts based on precedence. has_conflicts = self.initialize_conflicts() # Print information about the results, if requested. if self.options['verbose']: if has_conflicts: self.verbose_conflict_log() self.print_grammar() for i in range(self.nstates): self.print_state(i) elif has_conflicts: self.conflict_log() self.closefiles() return def openfiles(self, parser): if self.options['verbose']: filename = '%s.output' % parser.name base_dir = os.path.dirname(self.options['input']) output_dir = self.options.get('output', base_dir) self.foutput = open(os.path.join(output_dir, filename), 'w') else: self.foutput = None return def closefiles(self): self.foutput and self.foutput.close() def fatal(self, msg): banner = self.options.get('input', self.program_name) raise SystemExit('%s: fatal error: %s\n' % (banner, msg)) def warn(self, msg): banner = self.options.get('input', self.program_name) sys.stderr.write('%s: %s\n' % (banner, msg)) return # ------------------------------------------------------ # reader.c def reader(self, tokens, grammar): symtab = SymbolTable.SymbolTable() # construct the error token errtoken = symtab.generate('error') errtoken.ident = SymbolTable.TOKEN errtoken.user_token_number = 256 # construct a token that represents all undefined literal tokens. # it is always token number 2 undeftoken = symtab.generate('$undefined.') undeftoken.ident = SymbolTable.TOKEN undeftoken.user_token_number = 2 # construct symbols for defined tokens for token in tokens: symbol = symtab.generate(token) symbol.ident = SymbolTable.TOKEN if grammar.start: self.startval = symtab.generate(grammar.start) else: self.startval = None self.build_grammar(grammar.rules, symtab) self.pack_symbols(symtab) self.pack_grammar() self.error_token_number = errtoken.value if self.startval.ident == SymbolTable.UNKNOWN: self.fatal('the start symbol %s is undefined' % startval.tag) elif self.startval.ident == SymbolTable.TOKEN: self.fatal('the start symbol %s is a token' % startval.tag) self.start_symbol = self.startval.value return def build_grammar(self, rules, symtab): self.rline = [('', 0)] self.rules = rules grammar = [] nvars = 0 nitems = 0 if not rules: self.fatal('no rules in the input grammar') for rule in rules: # start a new rule and record its line number self.rline.append((rule.filename, rule.lineno)) lhs = symtab.generate(rule.lhs) if not self.startval: self.startval = lhs nitems = nitems + 1 grammar.append(lhs) # Mark the rule's lhs as a nonterminal if not already so if lhs.ident == SymbolTable.UNKNOWN: lhs.ident = SymbolTable.NTERM lhs.value = nvars nvars = nvars + 1 elif lhs.ident == SymbolTable.TOKEN: self.warn('rule given for %s, which is a token' % lhs.tag) # Now check the rhs of the rule for name in rule.rhs: symval = symtab.generate(name) nitems = nitems + 1 grammar.append(symval) grammar.append(None) # Report any undefined symbols and consider them nonterminals for symbol in symtab.symbols: if symbol.ident == SymbolTable.UNKNOWN: self.warn('symbol %s is used, but is not defined' % symbol.tag) symbol.ident = SymbolTable.NTERM symbol.value = nvars nvars = nvars + 1 self.grammar = grammar self.nvars = nvars self.nsyms = len(symtab.symbols) + 1 self.ntokens = self.nsyms - nvars self.nrules = len(rules) self.nitems = nitems return def pack_symbols(self, symtab): tags = [0]*(self.nsyms+1) tags[0] = '' user_toknums = [0]*(self.nsyms+1) sprec = [0]*(self.nsyms) sassoc = [0]*(self.nsyms) max_user_token = last_user_token = 256 tokno = 1 for symbol in symtab.symbols: if symbol.ident == SymbolTable.NTERM: symbol.value = symbol.value + self.ntokens elif symbol.alias: # Used for precedence and association #FIXME: implement pass else: # symbol.ident == TOKEN symbol.value = tokno tokno = tokno + 1 if symbol.ident == SymbolTable.TOKEN: if not symbol.user_token_number: last_user_token = last_user_token + 1 symbol.user_token_number = last_user_token if max_user_token < last_user_token: max_user_token = last_user_token tags[symbol.value] = symbol.tag user_toknums[symbol.value] = symbol.user_token_number sprec[symbol.value] = symbol.prec sassoc[symbol.value] = symbol.assoc # initialize all entries for literal tokens to 2, the internal # token number for $undefined, which represents all invalid inputs. token_translations = [2]*(max_user_token+1) for symbol in symtab.symbols: if symbol.value >= self.ntokens: continue if symbol.user_token_number == SymbolTable.ALIAS: continue if token_translations[symbol.user_token_number] != 2: self.warn('tokens %s and %s both assigned number %d' % ( tags[token_translations[symbol.user_token_number]], symbol.tag, symbol.user_token_number)) token_translations[symbol.user_token_number] = symbol.value self.tags = tags self.user_toknums = user_toknums self.sprec = sprec self.sassoc = sassoc self.token_translations = token_translations self.max_user_token = max_user_token return def pack_grammar(self): ritem = [0]*(self.nitems+1) rlhs = [0]*(self.nrules+1) rrhs = [0]*(self.nrules+1) rprec = [0]*(self.nrules+1) rassoc = [0]*(self.nrules+1) itemno = 0 ruleno = 1 pos = 0 while pos < len(self.grammar) and self.grammar[pos]: rlhs[ruleno] = self.grammar[pos].value rrhs[ruleno] = itemno pos = pos + 1 while self.grammar[pos]: symbol = self.grammar[pos] ritem[itemno] = symbol.value itemno = itemno + 1 # A rule gets by default the precedence and associativity # of the last token in it. if symbol.ident == SymbolTable.TOKEN: rprec[ruleno] = symbol.prec rassoc[ruleno] = symbol.assoc pos = pos + 1 ritem[itemno] = -ruleno itemno = itemno + 1 ruleno = ruleno + 1 pos = pos + 1 ritem[itemno] = 0 self.ritem = ritem self.rlhs = rlhs self.rrhs = rrhs self.rprec = rprec self.rassoc = rassoc return # ------------------------------------------------------ # reduce.c def reduce_grammar(self): N = [0]*self.nvars P = [0]*(self.nrules+1) V = [0]*(self.nsyms) V1 = [0]*(self.nsyms) # P is modified in place N = self.useless_nonterminals(N, P) # N is modified in place (P, V) = self.inaccessable_symbols(N, P, V) if self.options['verbose']: self.print_results(P, V, V1) if (self.nuseless_productions + self.nuseless_nonterminals) > 0: self.print_notices() if not N[self.start_symbol - self.ntokens]: self.fatal('Start symbol %s does not derive any sentence' % self.tags[self.start_symbol]) self.reduce_grammar_tables(P, V) if self.options['verbose']: self.foutput.write('REDUCED GRAMMAR\n\n') self.dump_grammar() if self.options['stats']: sys.stderr.write('reduced %s defines %d terminal%s, ' % ( self.options['input'], self.ntokens, self.ntokens > 1 and 's' or '')) sys.stderr.write('%d nonterminal%s, ' % ( self.nvars, self.nvars > 1 and 's' or '')) sys.stderr.write('and %d production%s.\n' % ( self.nrules, self.nrules > 1 and 's' or '')) return def useful_production(self, i, N): """Helper for useless_nonterminals()""" n = self.rrhs[i] while self.ritem[n] > 0: r = self.ritem[n] if r >= self.ntokens: # ISVAR(r) if not N[r - self.ntokens]: return 0 n = n + 1 return 1 def useless_nonterminals(self, N, P): while 1: Np = N[:] for i in range(1, self.nrules+1): if not P[i]: if self.useful_production(i, N): Np[self.rlhs[i] - self.ntokens] = 1 P[i] = 1 if Np == N: break swap = Np Np = N N = swap return Np def inaccessable_symbols(self, N, P, V): Vp = [0]*(self.nsyms) Pp = [0]*(self.nrules+1) # If the start symbol isn't useful, then nothing will be useful if N[self.start_symbol - self.ntokens]: V[self.start_symbol] = 1 while 1: Vp = V[:] for i in range(1, self.nrules+1): if not Pp[i] and P[i] and V[self.rlhs[i]]: n = self.rrhs[i] while self.ritem[n] >= 0: t = self.ritem[n] if t < self.ntokens or N[t - self.ntokens]: Vp[t] = 1 n = n + 1 Pp[i] = 1 if Vp == V: break swap = Vp Vp = V V = swap V = Vp # Tokens 0, 1, 2 are internal. Consider them useful. V[0] = 1 # end-of-input V[1] = 1 # error token V[2] = 1 # some undefined token P = Pp nuseful_productions = reduce(lambda a, b: a + b, P, 0) self.nuseless_productions = self.nrules - nuseful_productions nuseful_nonterminals = 0 for i in range(self.ntokens, self.nsyms): if V[i]: nuseful_nonterminals = nuseful_nonterminals + 1 self.nuseless_nonterminals = self.nvars - nuseful_nonterminals return (P, V) def print_results(self, P, V, V1): if self.nuseless_nonterminals > 0: self.foutput.write('Useless nonterminals:\n\n') for i in range(self.ntokens, self.nsyms): if not V[i]: self.foutput.write(' %s\n' % self.tags[i]) title = 0 for i in range(self.ntokens): if not (V[i] or V1[i]): if not title: self.foutput.write('\n\nTerminals which are not used:\n\n') title = 1 self.foutput.write(' %s\n' % self.tags[i]) if self.nuseless_productions > 0: self.foutput.write('\n\nUseless rules:\n\n') for i in range(1,self.nrules+1): if not P[i]: self.foutput.write('#%-4d %s :\t' % ( i, self.tags[self.rlhs[i]])) n = self.rlhs[i] while self.ritem[n]: self.foutput.write(' %s' % self.tags[self.ritem[n]]) n = n + 1 self.foutput.write(';\n') if self.nuseless_nonterminals or self.nuseless_productions or title: self.foutput.write('\n\n') return def reduce_grammar_tables(self, P, V): # Disable useless productions, since they may contain useless # nonterms that would get mapped below to -1 and confuse everyone if self.nuseless_productions > 0: for i in range(1, self.nrules+1): if not P[i]: self.rlhs[i] = -1 # remove useless symbols if self.nuseless_nonterminals > 0: # create a map of nonterminal number to new nonterminal # number. -1 in the map means it was useless and is being # eliminated. nontermmap = [0]*(self.nsyms) n = self.ntokens for i in range(self.ntokens, self.nsyms): if V[i]: nontermmap[i] = n n = n + 1 else: nontermmap[i] = -1 # Shuffle elements of tables indexed by symbol number for i in range(self.ntokens, self.nsyms): n = nontermmap[i] if n >= 0: self.sassoc[n] = self.sassoc[i] self.sprec[n] = self.sprec[i] self.tags[n] = self.tags[i] else: self.tags[i] = None # Replace all symbol numbers in valid data structures for i in range(1, self.nrules+1): # Ignore the rules disabled above if self.rlhs[i] >= 0: self.rlhs[i] = nontermmap[self.rlhs[i]] for i in range(self.nitems): if self.ritem[i] >= self.ntokens: # ISVAR(r) self.ritem[i] = nontermmap[self.ritem[i]] self.start_symbol = nontermmap[self.start_symbol] self.nsyms = self.nsyms - self.nuseless_nonterminals self.nvars = self.nvars - self.nuseless_nonterminals return def dump_grammar(self): write = self.foutput.write write('ntokens=%d, nvars=%d, nsyms=%d, nrules=%d, nitems=%d\n\n' % ( self.ntokens, self.nvars, self.nsyms, self.nrules, self.nitems)) write('Variables\n---------\n\n') write('Value Sprec Sassoc Tag\n') for i in range(self.ntokens, self.nsyms): write('%5d %5d %5d %s\n' % ( i, self.sprec[i], self.sassoc[i], self.tags[i])) write('\n\n') write('Rules\n-----\n\n') for i in range(1, self.nrules+1): write('%-5d(%5d%5d)%5d : (@%-5d)' % ( i, self.rprec[i], self.rassoc[i], self.rlhs[i], self.rrhs[i])) n = self.rrhs[i] while self.ritem[n] > 0: write('%5d' % self.ritem[n]) n = n + 1 write(' [%d]\n' % -self.ritem[n]) write('\n\n') write('Rules interpreted\n-----------------\n\n') for i in range(1, self.nrules+1): write('%-5d %s :' % (i, self.tags[self.rlhs[i]])) n = self.rrhs[i] while self.ritem[n] > 0: write(' %s' % self.tags[self.ritem[n]]) n = n + 1 write('\n') write('\n\n') return def print_notices(self): write = sys.stderr.write if self.nuseless_productions: write('%d rules never reduced\n' % self.nuseless_productions) write('%s contains ' % self.options['input']) if self.nuseless_nonterminals: write('%d useless nonterminal%s' % ( self.nuseless_nonterminals, self.nuseless_nonterminals > 1 and 's' or '')) if self.nuseless_nonterminals and self.nuseless_productions: write(' and ') if self.nuseless_productions: write('%d useless rules%s' % ( self.nuseless_productions, self.nuseless_productions > 1 and 's' or '')) write('\n') return # ------------------------------------------------------ # derives.c def set_derives(self): self.derives = [] for i in range(self.nsyms): self.derives.append([]) last_lhs = 0 for i in range(1, self.nrules+1): lhs = self.rlhs[i] if lhs >= 0: self.derives[lhs].append(i) if DEBUG: print print '-'*40 print 'DERIVES' for i in range(self.ntokens, self.nsyms): print "\n%s derives" % self.tags[i], for sp in self.derives[i]: print sp, print return # ------------------------------------------------------ # nullable.c def set_nullable(self): rcount = [0]*(self.nrules+1) rsets = [None]*(self.nsyms) squeue = [] self.nullable = [0]*(self.nsyms) itemno = 0 while self.ritem[itemno]: r = self.ritem[itemno] if r < 0: symbol = self.rlhs[-r] itemno = itemno + 1 if symbol >= 0 and not self.nullable[symbol]: self.nullable[symbol] = 1 squeue.append(symbol) else: any_tokens = 0 old_item = itemno symbol = self.ritem[itemno] itemno = itemno + 1 while symbol > 0: if symbol < self.ntokens: any_tokens = 1 symbol = self.ritem[itemno] itemno = itemno + 1 if not any_tokens: ruleno = -symbol itemno = old_item symbol = self.ritem[itemno] itemno = itemno + 1 while symbol > 0: if not rsets[symbol]: rsets[symbol] = [ruleno] else: rsets[symbol].append(ruleno) rcount[ruleno] = rcount[ruleno] + 1 symbol = self.ritem[itemno] itemno = itemno + 1 for symbol in squeue: rulesets = rsets[symbol] or [] for ruleno in rulesets: rcount[ruleno] = rcount[ruleno] - 1 if rcount[ruleno] == 0: symbol = self.rlhs[ruleno] if symbol >= 0 and not self.nullable[symbol]: self.nullable[symbol] = 1 squeue.append(symbol) return ISVAR = lambda self, symbol: symbol >= self.ntokens ISTOKEN = lambda self, symbol: symbol < self.ntokens # ------------------------------------------------------ # closure.c def initialize_closure(self): self.set_firsts() self.set_fderives() return def set_firsts(self): """ set firsts to be an nvars by nvars matrix indicating which items can represent the beginning of input corresponding to which other items. """ self.firsts = [None]*self.nvars for i in range(self.nvars): self.firsts[i] = [0]*self.nvars for i in range(self.ntokens, self.nsyms): for sp in self.derives[i]: symbol = self.ritem[self.rrhs[sp]] if self.ISVAR(symbol): symbol = symbol - self.ntokens self.firsts[i - self.ntokens][symbol] = 1 Warshall.RTC(self.firsts, self.nvars) if DEBUG: print print '-'*40 print 'FIRSTS' for i in range(self.nvars): print "\n%s firsts\n" % self.tags[i + self.ntokens] for j in range(self.nvars): if self.firsts[i][j]: print " %s" % self.tags[j + self.ntokens] return def set_fderives(self): """ set fderives to a nvars by nrules matrix indicating which rules can help derive the beginning of data for each nonterminal. """ self.fderives = [None]*self.nvars for i in range(self.nvars): self.fderives[i] = [0]*(self.nrules+1) for i in range(self.nvars): for j in range(self.nvars): if self.firsts[i][j]: for ruleno in self.derives[j + self.ntokens]: self.fderives[i][ruleno] = 1 if DEBUG: print print '-'*40 print "FDERIVES" for i in range(self.nvars): print "\n%s derives\n" % self.tags[i+self.ntokens] for j in range(self.nrules+1): if self.fderives[i][j]: print " %d" % j return def closure(self, items): """ Given a vector of item numbers items, set up ruleset and itemset to indicate what rules could be run and which items could be accepted when those items are the active ones. """ if len(items) == 0: ruleset = self.fderives[self.start_symbol - self.ntokens][:] else: ruleset = [0]*(self.nrules+1) for itemno in items: symbol = self.ritem[itemno] if self.ISVAR(symbol): ruleset = map(lambda a, b: a or b, ruleset, self.fderives[symbol - self.ntokens]) pos = 0 itemset = [] for ruleno in range(self.nrules+1): if ruleset[ruleno]: itemno = self.rrhs[ruleno] while pos < len(items) and items[pos] < itemno: itemset.append(items[pos]) pos = pos + 1 itemset.append(itemno) while pos < len(items): itemset.append(items[pos]) pos = pos + 1 return itemset # ------------------------------------------------------ # LR0.c def generate_states(self): self.allocate_storage() self.initialize_closure() self.states = [State.Core(0,0,[])] for this_state in self.states: # Set up ruleset and itemset for the transitions out of this state. # ruleset gets 1 bit for each rule that could reduce now. # itemset gets a vector of all the items that could be accepted next. itemset = self.closure(this_state.items) # record the reductions allowed out of this state self.save_reductions(this_state, itemset) # find the itemsets of the states that shifts can reach shift_symbol = self.new_itemsets(itemset) # find or create the core structures for those states shiftset = self.append_states(shift_symbol) # create the shifts structures for the shifts to those states, # now that the state numbers transitioning to are known. if len(shiftset): self.shifts.append(State.Shifts(this_state.number, shiftset)) # set up initial and final states as parser wants them self.augment_automaton() self.nstates = len(self.states) return def allocate_storage(self): self.kernel_items = [] for i in range(self.nsyms): self.kernel_items.append([]) self.state_table = {} self.shifts = [] self.reductions = [] return def save_reductions(self, state, itemset): """ find which rules can be used for reduction transitions from the current state and make a reductions structure for the state to record their rule numbers. """ # find and count the active items that represent ends of rules redset = [] for itemno in itemset: item = self.ritem[itemno] if item < 0: redset.append(-item) # make a reductions structure and copy the data into it if redset: self.reductions.append(State.Reductions(state.number, redset)) return def new_itemsets(self, itemset): """ find which symbols can be shifted in the current state, and for each one record which items would be active after the shift. """ shift_symbol = [] for itemno in itemset: symbol = self.ritem[itemno] if symbol > 0: if symbol not in shift_symbol: shift_symbol.append(symbol) self.kernel_items[symbol] = [] self.kernel_items[symbol].append(itemno + 1) return shift_symbol def append_states(self, shift_symbol): """ Use the information computed by new_itemsets to find the state numbers reached by each shift transition from the current state. """ # first sort shift_symbol into increasing order shift_symbol.sort() shiftset = [] for symbol in shift_symbol: # find the state number for the state we would get to # (from the current state) by shifting symbol key = tuple(self.kernel_items[symbol]) state = self.state_table.get(key) if not state: state = State.Core(len(self.states), symbol, self.kernel_items[symbol]) self.state_table[key] = state self.states.append(state) shiftset.append(state.number) return shiftset def augment_automaton(self): if self.shifts: shift = self.shifts[0] if shift.number == 0: # The states reached by shifts from the first state are # numbered 1...K. Look for one reached # by the start_symbol. k = len(shift.shifts) for state in self.states[1:k]: if state.symbol == self.start_symbol: # We already have a next-to-final state. # Make sure it has a shift to what will be the final state. k = state.number pos = 0 # shift is already the first one while pos < len(self.shifts) and shift.number < k: pos = pos + 1 shift = self.shifts[pos] if shift.number == k: shift.shifts.insert(0, len(self.states)) else: self.shifts.insert(pos, State.Shifts(k, [len(self.states)])) break else: # There is no next-to-final state as yet. # Add one more shift in first_shift, going to be # next-to-final state (to be made) pos = 0 for state in self.states[1:k]: if state.symbol > self.start_symbol: break pos = pos + 1 stateno = len(self.states) shift.shifts.insert(pos, stateno) # Create the next-to-final state, with shift to # what will be the final state. state = State.Core(stateno, self.start_symbol, []) self.states.append(state) shift = State.Shifts(stateno, [len(self.states)]) self.shifts.append(shift) else: # The initial state didn't even have any shifts. # Give it one shift, to the next-to-final state. shift = State.Shifts(0, [len(self.states)]) self.shifts.insert(0, shift) # Create the next-to-final state, with shift to # what will be the final state. stateno = len(self.states) state = State.Core(stateno, self.start_symbol, []) self.states.append(state) shift = State.Shifts(stateno, [len(self.states)]) self.shifts.append(shift) else: # There are no shifts for any state. Make one shift, # from the initial state to the next-to-final state. shift = State.Shifts(0, [len(self.states)]) self.shifts.append(shift) # Create the next-to-final state, with shift to # what will be the final state. stateno = len(self.states) state = State.Core(stateno, self.start_symbol, []) self.states.append(state) shift = State.Shifts(stateno, [len(self.states)]) self.shifts.append(shift) # Make the final state -- the one that follows a shift from # the next-to-final state. The symbol is 0 end-(of-file). stateno = len(self.states) state = State.Core(stateno, 0, []) self.states.append(state) # Make the shift from the final state to the termination state. shift = State.Shifts(stateno, [len(self.states)]) self.shifts.append(shift) # Note: the variable `final_state' refers to what we sometimes # call the termination state. self.final_state = len(self.states) # Make the termination state. state = State.Core(len(self.states), 0, []) self.states.append(state) return # ------------------------------------------------------ # lalr.c def lalr(self): # set_state_table() self.state_table = [0]*self.nstates self.accessing_symbol = [0]*self.nstates for state in self.states: self.state_table[state.number] = state self.accessing_symbol[state.number] = state.symbol # set_shift_table() self.shift_table = [0]*self.nstates for shift in self.shifts: self.shift_table[shift.number] = shift # set_reduction_table() self.reduction_table = [0]*self.nstates for reduction in self.reductions: self.reduction_table[reduction.number] = reduction self.initialize_LA() self.set_goto_map() self.initialize_F() self.build_relations() # compute_lookaheads() for i in range(self.lookaheads[self.nstates]): for j in self.lookback[i]: self.LA[i] = map(lambda a, b: a or b, self.LA[i], self.F[j]) del self.lookback del self.F return def initialize_LA(self): consistent = [0]*self.nstates lookaheads = [0]*(self.nstates+1) count = 0 for i in range(self.nstates): lookaheads[i] = count rp = self.reduction_table[i] sp = self.shift_table[i] if rp and (len(rp.rules) > 1 or (sp and self.accessing_symbol[sp.shifts[0]] < self.ntokens)): count = count + len(rp.rules) else: consistent[i] = 1 if sp: for k in range(len(sp.shifts)): if self.accessing_symbol[sp.shifts[k]] == self.error_token_number: consistent[i] = 0 break lookaheads[self.nstates] = count if count == 0: self.lookback = [None] self.LA = [0]*self.ntokens else: self.lookback = [None]*count self.LA = [] for i in range(count): self.LA.append([0]*self.ntokens) self.LAruleno = [] for i in range(self.nstates): if not consistent[i]: if self.reduction_table[i]: self.LAruleno.extend(self.reduction_table[i].rules) self.lookaheads = lookaheads self.consistent = consistent return def set_goto_map(self): goto_map = [0]*(self.nsyms+1) temp_map = [0]*(self.nsyms+1) self.ngotos = 0 for sp in self.shifts: for i in range(1, len(sp.shifts)+1): state = sp.shifts[-i] symbol = self.accessing_symbol[state] if symbol < self.ntokens: break self.ngotos = self.ngotos + 1 goto_map[symbol] = goto_map[symbol] + 1 k = 0 for i in range(self.ntokens, self.nsyms): temp_map[i] = k k = k + goto_map[i] temp_map[self.nsyms] = self.ngotos self.goto_map = temp_map[:] self.from_state = [0]*self.ngotos self.to_state = [0]*self.ngotos for sp in self.shifts: state1 = sp.number for i in range(1, len(sp.shifts)+1): state2 = sp.shifts[-i] symbol = self.accessing_symbol[state2] if symbol < self.ntokens: break k = temp_map[symbol] temp_map[symbol] = k + 1 self.from_state[k] = state1 self.to_state[k] = state2 return def map_goto(self, state, symbol): """ maps a state/symbol pair into its numeric representation. """ low = self.goto_map[symbol] high = self.goto_map[symbol + 1] - 1 while low <= high: middle = (low + high) / 2 s = self.from_state[middle] if s == state: return middle elif s < state: low = middle + 1 else: high = middle - 1 self.fatal('map_goto') return 0 def initialize_F(self): self.F = [None]*self.ngotos reads = [None]*self.ngotos for i in range(self.ngotos): self.F[i] = [0]*self.ntokens reads[i] = [] stateno = self.to_state[i] sp = self.shift_table[stateno] if sp: k = len(sp.shifts) for j in range(k): symbol = self.accessing_symbol[sp.shifts[j]] if symbol >= self.ntokens: break self.F[i][symbol] = 1 else: continue while j < k: symbol = self.accessing_symbol[sp.shifts[j]] if self.nullable[symbol]: reads[i].append(self.map_goto(stateno, symbol)) j = j + 1 self.digraph(reads) return def digraph(self, relation): self.infinity = self.ngotos + 2 self.INDEX = [0]*(self.ngotos+1) self.VERTICES = [0]*(self.ngotos+1) self.top = 0 self.R = relation for i in range(self.ngotos): if self.INDEX[i] == 0 and self.R[i]: self.traverse(i) del self.INDEX del self.VERTICES del self.top del self.R return def traverse(self, i): self.top = self.top + 1 self.VERTICES[self.top] = i self.INDEX[i] = height = self.top for j in self.R[i]: if self.INDEX[j] == 0: self.traverse(j) if self.INDEX[i] > self.INDEX[j]: self.INDEX[i] = self.INDEX[j] self.F[i] = map(lambda a, b: a or b, self.F[i], self.F[j]) if self.INDEX[i] == height: while 1: j = self.VERTICES[self.top] self.top = self.top - 1 self.INDEX[j] = self.ngotos + 2 if i == j: break self.F[j] = self.F[i][:] return def get_maxrhs(self): length = 0 maxrhs = 0 for itemno in self.ritem: if itemno > 0: length = length + 1 else: if length > maxrhs: maxrhs = length length = 0 return maxrhs def build_relations(self): maxrhs = self.get_maxrhs() includes = [] states = [0]*(maxrhs+1) for i in range(self.ngotos): includes.append([]) state1 = self.from_state[i] symbol1 = self.accessing_symbol[self.to_state[i]] for rule in self.derives[symbol1]: length = 1 states[0] = state1 stateno = state1 rp = self.rrhs[rule] while self.ritem[rp] > 0: symbol2 = self.ritem[rp] sp = self.shift_table[stateno] for stateno in sp.shifts: if self.accessing_symbol[stateno] == symbol2: break states[length] = stateno length = length + 1 rp = rp + 1 if not self.consistent[stateno]: self.add_lookback_edge(stateno, rule, i) length = length - 1 done = 0 while not done: done = 1 rp = rp - 1 if (rp >= 0) and (self.ritem[rp] >= self.ntokens): sym = self.ritem[rp] length = length - 1 stateno = states[length] includes[i].append(self.map_goto(stateno, sym)) done = not self.nullable[sym] relation = [] for i in range(self.ngotos): relation.append([]) for i in range(self.ngotos): for j in includes[i]: relation[j].append(i) self.digraph(relation) return def add_lookback_edge(self, stateno, ruleno, gotono): i = self.lookaheads[stateno] k = self.lookaheads[stateno + 1] found = 0 while not found and (i < k): if self.LAruleno[i] == ruleno: found = 1 else: i = i + 1 if not found: self.fatal('add_lookback_edge') if self.lookback[i]: self.lookback[i].insert(0, gotono) else: self.lookback[i] = [gotono] return # ------------------------------------------------------ # conflicts.c def initialize_conflicts(self): self.conflicts = [0]*len(self.states) self.shiftset = [0]*self.ntokens self.err_table = [None]*len(self.states) has_conflicts = 0 for i in range(len(self.states)): if self.consistent[i]: has_conflicts = self.set_conflicts(i) return has_conflicts def set_conflicts(self, state): lookaheadset = [0]*self.ntokens any_conflicts = 0 shift = self.shift_table[state] if shift: for stateno in shift.shifts: symbol = self.accessing_symbol[stateno] if symbol >= self.ntokens: break lookaheadset[symbol] = 1 k = self.lookaheads[state + 1] # loop over all rules which require lookahead in this state. # first check for shift-reduce conflict, and try to resolve # using precedence. for i in range(self.lookaheads[state], k): if self.rprec[self.LAruleno[i]]: for j in range(self.ntokens): if self.LA[i][j] and lookaheadset[j]: self.resolve_sr_conflict(state, i) break # loop over all rules which require lookahead in this state. # Check for conflicts not resolved above. for i in range(self.lookaheads[state], k): for j in range(self.ntokens): if self.LA[i][j] and lookaheadset[j]: self.conflicts[state] = 1 any_conflicts = 1 break lookaheadset = map(lambda a, b: a | b, lookaheadset, self.LA[i]) return any_conflicts def resolve_sr_conflict(self, state, LAnum): """ Attempt to resolve shift-reduce conflicts for one rule by means of precedence declarations. A conflict is resolved by modifying the shift or reduce tables so that there is no longer a conflict. """ self.err_table[state] = [] #FIXME: implement precedence return def conflict_log(self): sr_total = 0 rr_total = 0 for i in range(self.nstates): if self.conflicts[i]: sr_total = sr_total + self.count_sr_conflicts(i) rr_total = rr_total + self.count_rr_conflicts(i) self.total_conflicts(sr_total, rr_total) return def verbose_conflict_log(self): write = self.foutput.write sr_total = 0 rr_total = 0 for i in range(self.nstates): if self.conflicts[i]: sr_total = sr_total + self.count_sr_conflicts(i) rr_total = rr_total + self.count_rr_conflicts(i) write('State %d contains ' % i) if sr_total: write('%d shift/reduce conflict%s' % ( sr_total, sr_total > 1 and 's' or '')) if sr_total and rr_total: write(' and ') if rr_total: write('%d reduce/reduce conflict%s' % ( rr_total, rr_total > 1 and 's' or '')) write('.\n') self.total_conflicts(sr_total, rr_total) return def total_conflicts(self, sr_total, rr_total): write = sys.stderr.write write('%s contains ' % self.options['input']) if sr_total: write('%d shift/reduce conflict%s' % ( sr_total, sr_total > 1 and 's' or '')) if sr_total and rr_total: write(' and ') if rr_total: write('%d reduce/reduce conflict%s' % ( rr_total, rr_total > 1 and 's' or '')) write('.\n') return def count_sr_conflicts(self, state): shift = self.shift_table[state] if not shift: return 0 self.shiftset = [0]*self.ntokens lookaheadset = [0]*self.ntokens for stateno in shift.shifts: if not stateno: continue symbol = self.accessing_symbol[stateno] if symbol >= self.ntokens: break self.shiftset[symbol] = 1 k = self.lookaheads[state + 1] for i in range(self.lookaheads[state], k): lookaheadset = map(lambda a, b: a or b, lookaheadset, self.LA[i]) sr_count = 0 for i in range(self.ntokens): if lookaheadset[i] and self.shiftset[i]: sr_count = sr_count + 1 return sr_count def count_rr_conflicts(self, state): m = self.lookaheads[state] n = self.lookaheads[state + 1] if (n - m) < 2: return 0 rr_count = 0 for i in range(self.ntokens): count = 0 for j in range(m, n): if self.LA[j][i]: count = count + 1 if count >= 2: rr_count = rr_count + 1 return rr_count # ------------------------------------------------------ # print.c def print_grammar(self): write = self.foutput.write write('\nGrammar\n') # rule # : LHS -> RHS for i in range(1, self.nrules+1): # Don't print rules disabled in reduce_grammar_tables if self.rlhs[i] >= 0: write('rule %-4d %s ->' % (i, self.tags[self.rlhs[i]])) rule = self.rrhs[i] if self.ritem[rule] > 0: while self.ritem[rule] > 0: write(' %s' % self.tags[self.ritem[rule]]) rule = rule + 1 else: write(" /* empty */") write('\n') def end_test(write, column, buffer, end): if (column + len(buffer)) > end: write('%s\n ' % buffer) column = 3 buffer = '' return (column, buffer) # TERMINAL (type #) : rule #s terminal is on RHS write('\nTerminals, with rules where they appear\n\n') write('%s (-1)\n' % self.tags[0]) for i in range(self.max_user_token+1): if self.token_translations[i] != 2: tag = self.tags[self.token_translations[i]] column = len(tag) write('%s' % tag) column, buffer = end_test(write, column, '', 50) buffer = buffer + ' (%d)' % i for j in range(1, self.nrules+1): ruleno = self.rrhs[j] while self.ritem[ruleno] > 0: if self.ritem[ruleno] == self.token_translations[i]: column, buffer = end_test(write, column, buffer, 65) buffer = buffer + ' %d' % j break ruleno = ruleno + 1 write('%s\n' % buffer) write('\nNonterminals, with rules where they appear\n\n') for i in range(self.ntokens, self.nsyms): left_count = right_count = 0 for j in range(1, self.nrules+1): if self.rlhs[j] == i: left_count = left_count + 1 ruleno = self.rrhs[j] while self.ritem[ruleno] > 0: if self.ritem[ruleno] == i: right_count = right_count + 1 break ruleno = ruleno + 1 write('%s (%d)\n ' % (self.tags[i], i)) column = 3 buffer = '' if left_count: column, buffer = end_test(write, column, buffer, 50) buffer = buffer + ' on left:' for j in range(1, self.nrules+1): column, buffer = end_test(write, column, buffer, 65) if self.rlhs[j] == i: buffer = buffer + ' %d' % j if right_count: if left_count: buffer = buffer + ',' column, buffer = end_test(write, column, buffer, 50) buffer = buffer + ' on right:' for j in range(1, self.nrules+1): ruleno = self.rrhs[j] while self.ritem[ruleno] > 0: if self.ritem[ruleno] == i: column, buffer = end_test(write, column, buffer, 65) buffer = buffer + ' %d' % j break ruleno = ruleno + 1 write('%s\n' % buffer) return def print_state(self, state): self.foutput.write('\n\nstate %d\n\n' % state) self.print_core(state) self.print_actions(state) return def print_core(self, stateno): state = self.state_table[stateno] k = len(state.items) if not k: return write = self.foutput.write for i in range(k): curr = ruleno = state.items[i] while self.ritem[ruleno] > 0: ruleno = ruleno + 1 rule = -self.ritem[ruleno] write(' %s -> ' % self.tags[self.rlhs[rule]]) ruleno = state.items[i] for symbol in self.ritem[self.rrhs[rule]:ruleno]: write('%s ' % self.tags[symbol]) write('.') while self.ritem[ruleno] > 0: write(' %s' % self.tags[self.ritem[ruleno]]) ruleno = ruleno + 1 write(' (rule %d)\n' % rule) write('\n') return def print_actions(self, stateno): write = self.foutput.write shift = self.shift_table[stateno] reduction = self.reduction_table[stateno] if not (shift or reduction): if self.final_state == stateno: write(' $default\taccept\n') else: write(' NO ACTIONS\n') return if shift: k = len(shift.shifts) for i in range(k): if not shift.shifts[i]: continue state1 = shift.shifts[i] symbol = self.accessing_symbol[state1] if symbol >= self.ntokens: break if symbol == 0: write(' $ \tgo to state %d\n' % state1) else: write(' %-4s\tshift, and go to state %d\n' % ( self.tags[symbol], state1)) else: i = i + 1 if i > 0: write('\n') else: i = k = 0 if self.consistent[stateno] and reduction: rule = reduction.rules[0] symbol = self.rlhs[rule] write(' $default\treduce using rule %d (%s)\n\n' % ( rule, self.tags[symbol])) elif reduction: self.print_reductions(stateno) for j in range(i, k): state1 = shift.shifts[j] if not state1: continue symbol = self.accessing_symbol[state1] write(' %-4s\tgo to state %d\n' % ( self.tags[symbol], state1)) if i < k: write('\n') return # ------------------------------------------------------ # conflicts.c def print_reductions(self, state): write = self.foutput.write shiftset = [0]*self.ntokens default = 1 shift = self.shift_table[state] if shift: for state1 in shift.shifts: if not state1: continue symbol = self.accessing_symbol[state1] if symbol >= self.ntokens: break # if this state has a shift for the error token, # don't use a default rule. if symbol == self.error_token_number: default = 0 shiftset[symbol] = 1 m = self.lookaheads[state] n = self.lookaheads[state + 1] if (n - m) == 1 and default: default_rule = self.LAruleno[m] for i in range(self.ntokens): if self.LA[m][i] and shiftset[i]: write(' %-4s\t[reduce using rule %d (%s)]\n' % ( self.tags[i], default_rule, self.tags[self.rlhs[default_rule]])) write(' $default\treduce using rule %d (%s)\n' % ( default_rule, self.tags[self.rlhs[default_rule]])) elif (n - m) >= 1: default_LA = -1 if default: cmax = 0 saved = shiftset for i in range(m, n): lookaheadset = map(lambda a, b: a and not b, self.LA[i], shiftset) count = reduce(lambda a,b: a + b, lookaheadset, 0) if count > cmax: cmax = count default_LA = i default_rule = self.LAruleno[i] shiftset = map(lambda a, b: a or b, shiftset, lookaheadset) shiftset = saved for i in range(self.ntokens): defaulted = 0 count = shiftset[i] for j in range(m, n): if self.LA[j][i]: if count == 0: if j != default_LA: rule = self.LAruleno[j] write(' %-4s\treduce using rule %d (%s)\n' % ( self.tags[i], rule, self.tags[self.rlhs[rule]])) else: defaulted = 1 count = 1 else: if defaulted: rule = self.LAruleno[default_LA] write(' %-4s\treduce using rule %d (%s)\n' % ( self.tags[i], rule, self.tags[self.rlhs[rule]])) defaulted = 0 rule = self.LAruleno[j] write(' %-4s\t[reduce using rule %d (%s)]\n' % ( self.tags[i], rule, self.tags[self.rlhs[rule]])) if default_LA >= 0: write(' $default\treduce using rule %d (%s)\n' % ( default_rule, self.tags[self.rlhs[default_rule]])) write('\n') return