from BisonGen.Lexer import Constants def OutputTables(lexer, outfile): write = outfile.write write('\n') # write out the compiled expression constants opcodes = Constants.OP_CODES.items() opcodes.sort(lambda a, b: cmp(a[1], b[1])) for name, value in opcodes: name = 'LEXER_OP_%s' % name.replace(' ', '_').upper() write('#define %s %s\n' % (name, value)) write('\n') opcodes = Constants.CHARSET_CODES.items() opcodes.sort(lambda a, b: cmp(a[1], b[1])) for name, value in opcodes: name = 'LEXER_CHARSET_%s' % name.replace(' ', '_').upper() write('#define %s %s\n' % (name, value)) write('\n') # write out the state defines defines = lexer.states.items() defines.sort(lambda a, b: cmp(a[1], b[1])) for define in defines: write("#define LEXER_%s %d\n" % define) write("\n") count = len(lexer.charsets) write('static unsigned char lexer_charsets[%d][32] = {\n' % count) for charset in lexer.charsets: width = 0 write(' {'); for item in charset[:-1]: if width > 70: write('\n ') width = 4 write(' 0x%02X,' % item) width = width + 6 if width > 70: write('\n ') write(' 0x%02X },\n' % charset[-1]) write('};\n\n') # optimize storage if count < 256: datatype = 'unsigned char' format_width = 2 elif count < 65536: datatype = 'unsigned short' format_width = 4 else: datatype = 'Py_UCS4' format_width = 8 count = len(lexer.blockmaps) write('static %s lexer_blockmaps[%d][256] = {\n' % (datatype, count)) for block in lexer.blockmaps: width = 0 write(' {'); for item in block[:-1]: if width > 70: write('\n ') width = 4 write(' 0x%0*X,' % (format_width, item)) width = width + 6 if width > 70: write('\n ') write(' 0x%0*X },\n' % (format_width, block[-1])) write('};\n\n') state_lookup = {} for name, index in lexer.states.items(): state_lookup[index] = name max_state = max(lexer.states.values()) + 1 table_names = ['NULL'] * max_state for state_idx, patterns in lexer.patterns.items(): pattern_names = [] table_base = 'lexer_%s_pattern_%%d' % state_lookup[state_idx] for pattern in patterns: name = table_base % len(pattern_names) pattern_names.append(name) header = 'static const Py_UCS4 %s[] = { ' % name write(header) width = len(header) for code in pattern.compiled[:-1]: if width > 70: write('\n ') width = 2 out = '%d, ' % code write(out) width += len(out) if width > 70: write('\n ') write('%d };\n\n' % pattern.compiled[-1]) name = 'lexer_%s_patterns' % state_lookup[state_idx] table_names[state_idx] = name write('static const Py_UCS4 *%s[] = {\n' % name) for name in pattern_names: write(' %s,\n' % name) write(' NULL\n};\n\n') write('static const Py_UCS4 **lexer_patterns[] = {\n') for name in table_names[:-1]: write(' %s,\n' % name) write(' %s\n' % table_names[-1]) write('};\n\n') # write the action tables table_names = ['NULL'] * max_state for state_idx, actions in lexer.action_index.items(): table = 'lexer_%s_actions' % state_lookup[state_idx] table_names[state_idx] = table header = 'static const int %s[] = { ' % table write(header) width = len(header) for index in actions[:-1]: if width > 70: write('\n ') width = 2 out = '%d, ' % index write(out) width += len(out) if width > 70: write('\n ') write('%d };\n\n' % actions[-1]) write('static const int *lexer_actions[] = {\n') for name in table_names[:-1]: write(' %s,\n' % name) write(' %s\n' % table_names[-1]) write('};\n\n') return def OutputActions(parser, lexer, outfile): for num in range(len(lexer.actions)): outfile.write('case %d: {\n' % num) pattern = lexer.actions[num] outfile.write(' /* from %s, line %d */\n' % (pattern.filename, pattern.lineno)) outfile.write(' TRACE("using rule from %s[%d], ");\n' % (pattern.filename, pattern.lineno)) ActionBody(outfile.write, pattern) outfile.write(' break;\n') outfile.write('}\n') return def ActionBody(write, pattern): if not (pattern.begin or pattern.token): write(' /* skip over the matched text */\n') write(' yytext = lexer->position;\n') return if pattern.begin: state = pattern.begin write(' lexer->state = LEXER_%s;\n' % state) write(' TRACE("switching to start condition %s, ");\n' % state) if pattern.token: write(' /* create the Python object for the matched text */\n') write(' *yylval = PyUnicode_FromUnicode(yytext, yylen);\n') write(' if (*yylval == NULL) {\n') write(' PyErr_NoMemory();\n') write(' return -1;\n') write(' }\n') write('\n') if pattern.token == '@ASCII@': write(' yychar = (int)*yytext;\n') else: write(' yychar = %s;\n' % pattern.token); write('\n') write(' if (self->verbose) {\n') write(' char *repr = unicode_escape(yytext, yylen);\n') write(' TRACE("accepting \'%s\' (%d)\\n", repr, yychar);\n') write(' PyMem_Del(repr);\n') write(' }\n') write('\n') write(' /* update the saved position */\n') write(' yytext = lexer->position;\n') return