#
#   Example - Python scanner
#

import exceptions
from Plex import *

class NaughtyNaughty(exceptions.Exception):
  pass

class PythonScanner(Scanner):	 
  
  def open_bracket_action(self, text):
    self.bracket_nesting_level = self.bracket_nesting_level + 1
    return text

  def close_bracket_action(self, text):
    self.bracket_nesting_level = self.bracket_nesting_level - 1
    return text

  def current_level(self):
    return self.indentation_stack[-1]

  def newline_action(self, text):
    if self.bracket_nesting_level == 0:
      self.begin('indent')
      return 'newline'

  def indentation_action(self, text):
    # Check that tabs and spaces are being used consistently.
    if text:
      c = text[0]
      if self.indentation_char is None:
        self.indentation_char = c
      else:
        if self.indentation_char <> c:
          raise NaughtyNaughty("Mixed up tabs and spaces!")
    # Figure out how many indents/dedents to do
    current_level = self.current_level()
    new_level = len(text)
    if new_level > current_level:
      self.indent_to(new_level)
    elif new_level < current_level:
      self.dedent_to(new_level)
    # Change back to default state
    self.begin('')

  def indent_to(self, new_level):
    self.indentation_stack.append(new_level)
    self.produce('INDENT', '')

  def dedent_to(self, new_level):
    while new_level < self.current_level():
      del self.indentation_stack[-1]
      self.produce('DEDENT', '')
    if new_level <> self.current_level():
      raise NaughtyNaughty("Indentation booboo!")

  def eof(self):
    self.dedent_to(0)

  letter = Range("AZaz") | Any("_")
  digit = Range("09")
  hexdigit = Range("09AFaf")

  name = letter + Rep(letter | digit)
  number = Rep1(digit) | (Str("0x") + Rep1(hexdigit))
  
  sq_string = (
    Str("'") + 
    Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) + 
    Str("'"))
    
  dq_string = (
    Str('"') + 
    Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) + 
    Str('"'))
    
  non_dq = AnyBut('"') | (Str('\\') + AnyChar)
  tq_string = (
    Str('"""') +
    Rep(
      non_dq |
      (Str('"') + non_dq) |
      (Str('""') + non_dq)) + Str('"""'))
      
  stringlit = sq_string | dq_string | tq_string
  opening_bracket = Any("([{")
  closing_bracket = Any(")]}")
  punct1 = Any(":,;+-*/|&<>=.%`~^")
  punct2 = Str("==", "<>", "!=", "<=", "<<", ">>", "**")
  punctuation = punct1 | punct2

  spaces = Rep1(Any(" \t"))
  indentation = Rep(Str(" ")) | Rep(Str("\t"))
  lineterm = Str("\n") | Eof
  escaped_newline = Str("\\\n")
  comment = Str("#") + Rep(AnyBut("\n"))
  blank_line = indentation + Opt(comment) + lineterm
  
  lexicon = Lexicon([
    (name,            'name'),
    (number,          'number'),
    (stringlit,       'string'),
    (punctuation,     TEXT),
    (opening_bracket, open_bracket_action),
    (closing_bracket, close_bracket_action),
    (lineterm,        newline_action),
    (comment,         IGNORE),
    (spaces,          IGNORE),
    (escaped_newline, IGNORE),
      State('indent', [
      (blank_line,    IGNORE),
      (indentation,   indentation_action),
    ]),
  ])

  def __init__(self, file):
    Scanner.__init__(self, self.lexicon, file)
    self.indentation_stack = [0]
    self.bracket_nesting_level = 0
    self.indentation_char = None
    self.begin('indent')

f = open("python.in", "r")
scanner = PythonScanner(f)
level = 0
while 1:
    token, text = scanner.read()
    if token is None:
        break
    if token == 'INDENT':
      level = level + 1
    elif token == 'DEDENT':
      level = level - 1
    indent = ' ' * (level * 4)
    if not text or token == text:
      value = token
    else:
      value = "%s(%s)" % (token, repr(text))
    print indent + value




syntax highlighted by Code2HTML, v. 0.9.1