#
# Example - Python scanner
#
import exceptions
from Plex import *
class NaughtyNaughty(exceptions.Exception):
pass
class PythonScanner(Scanner):
def open_bracket_action(self, text):
self.bracket_nesting_level = self.bracket_nesting_level + 1
return text
def close_bracket_action(self, text):
self.bracket_nesting_level = self.bracket_nesting_level - 1
return text
def current_level(self):
return self.indentation_stack[-1]
def newline_action(self, text):
if self.bracket_nesting_level == 0:
self.begin('indent')
return 'newline'
def indentation_action(self, text):
# Check that tabs and spaces are being used consistently.
if text:
c = text[0]
if self.indentation_char is None:
self.indentation_char = c
else:
if self.indentation_char <> c:
raise NaughtyNaughty("Mixed up tabs and spaces!")
# Figure out how many indents/dedents to do
current_level = self.current_level()
new_level = len(text)
if new_level > current_level:
self.indent_to(new_level)
elif new_level < current_level:
self.dedent_to(new_level)
# Change back to default state
self.begin('')
def indent_to(self, new_level):
self.indentation_stack.append(new_level)
self.produce('INDENT', '')
def dedent_to(self, new_level):
while new_level < self.current_level():
del self.indentation_stack[-1]
self.produce('DEDENT', '')
if new_level <> self.current_level():
raise NaughtyNaughty("Indentation booboo!")
def eof(self):
self.dedent_to(0)
letter = Range("AZaz") | Any("_")
digit = Range("09")
hexdigit = Range("09AFaf")
name = letter + Rep(letter | digit)
number = Rep1(digit) | (Str("0x") + Rep1(hexdigit))
sq_string = (
Str("'") +
Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) +
Str("'"))
dq_string = (
Str('"') +
Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) +
Str('"'))
non_dq = AnyBut('"') | (Str('\\') + AnyChar)
tq_string = (
Str('"""') +
Rep(
non_dq |
(Str('"') + non_dq) |
(Str('""') + non_dq)) + Str('"""'))
stringlit = sq_string | dq_string | tq_string
opening_bracket = Any("([{")
closing_bracket = Any(")]}")
punct1 = Any(":,;+-*/|&<>=.%`~^")
punct2 = Str("==", "<>", "!=", "<=", "<<", ">>", "**")
punctuation = punct1 | punct2
spaces = Rep1(Any(" \t"))
indentation = Rep(Str(" ")) | Rep(Str("\t"))
lineterm = Str("\n") | Eof
escaped_newline = Str("\\\n")
comment = Str("#") + Rep(AnyBut("\n"))
blank_line = indentation + Opt(comment) + lineterm
lexicon = Lexicon([
(name, 'name'),
(number, 'number'),
(stringlit, 'string'),
(punctuation, TEXT),
(opening_bracket, open_bracket_action),
(closing_bracket, close_bracket_action),
(lineterm, newline_action),
(comment, IGNORE),
(spaces, IGNORE),
(escaped_newline, IGNORE),
State('indent', [
(blank_line, IGNORE),
(indentation, indentation_action),
]),
])
def __init__(self, file):
Scanner.__init__(self, self.lexicon, file)
self.indentation_stack = [0]
self.bracket_nesting_level = 0
self.indentation_char = None
self.begin('indent')
f = open("python.in", "r")
scanner = PythonScanner(f)
level = 0
while 1:
token, text = scanner.read()
if token is None:
break
if token == 'INDENT':
level = level + 1
elif token == 'DEDENT':
level = level - 1
indent = ' ' * (level * 4)
if not text or token == text:
value = token
else:
value = "%s(%s)" % (token, repr(text))
print indent + value
syntax highlighted by Code2HTML, v. 0.9.1