"""$URL: svn+ssh://svn/repos/trunk/grouch/lib/type_parser.py $
$Id: type_parser.py 24750 2004-07-21 15:26:51Z dbinger $

SPARK-based scanner and parser for Grouch's type language.
"""

from types import StringType

from grouch.spark import GenericScanner, GenericParser

#
# Grammar for types:
# type : NAME                     # atomic, alias, or instance type
#      | container_type           # list, tuple, dictionary
#      | NAME container_type      # instance-container type
#      | union_type
#
# container_type : list_type
#                | tuple_type
#                | dictionary_type
#                | set_type
# list_type      : "[" type "]"
# tuple_type     : "(" (type ",")* type "*"? ","? ")"
# dictionary_type: "{" type ":" type "}"
# set_type       : "{" type "}"
#
# union_type : type ("|" type)+
#
# Terminals (tokens):
#   NAME : [a-zA-Z0-9_]+(\.[a-zA-Z_][a-zA-Z0-9_]*)*
#   "[" "]"
#   "(" ")" ","
#   "{" "}" ":"
#   "|"


class Name:
    def __init__ (self, name):
        self.name = name

    def __cmp__ (self, other):
        if other == "NAME":             # for the parser below
            return 0
        elif type(other) is StringType:
            return 1
        elif isinstance(other, Name):   # for comparing names
            return cmp(self.name, other.name)
        else:
            raise TypeError, "can't compare Name instance to %s" % `other`

    def __repr__ (self):
        return "<Name at %x: %s>" % (id(self), self.name)

    def __str__ (self):
        return self.name


class TypeScanner (GenericScanner):

    def tokenize (self, input):
        self.tokens = []
        GenericScanner.tokenize(self, input)
        return self.tokens

    def t_whitespace (self, s):
        r'\s+'
        pass

    def t_name (self, s):
        r'[a-zA-Z0-9_-]+(\.[a-zA-Z_][a-zA-Z0-9_]*)*'
        self.tokens.append(Name(s))

    def t_punct (self, s):
        r'[\[\]\(\)\{\}\,\:\|\*]'
        self.tokens.append(s)


    # Looks like errors can be handled two ways:
    #   * have a 't_default()' that recognizes nothing, and an
    #     'error()' that raises an exception with 'pos'
    #   * have a 't_default()' that accepts anything and then
    #     raises an exception
    # The first gives you the position where the error occurs,
    # the second gives you the text that causes the error.

    #def t_default (self, s):
    #    r'.'
    #    raise ValueError, "invalid token %s" % `s`

    def t_default (self, s):
        pass

    def error (self, s, pos):
        raise ValueError, "invalid token at position %d: '%s'" % (pos, s[pos:])


class TypeParser (GenericParser):

    def __init__ (self, start="type", schema=None):
        GenericParser.__init__(self, start)
        self.schema = schema

    def set_schema (self, schema):
        self.schema = schema

    def parse (self, tokens):
        if self.schema is None:
            raise RuntimeError, "schema not set"
        return GenericParser.parse(self, tokens)

    def error (self, token):
        raise ValueError, "Syntax error at or near '%s'" % token

    def p_type_1 (self, (name,)):
        'type ::= NAME'
        #print "type_1: got a name: %s" % `name`
        return self.schema.get_type(str(name))

    def p_type_2 (self, args):
        'type ::= container_type'
        #print "type_2: args =", args
#        assert isinstance(args[0], valuetype.ContainerType)
        return args[0]

    def p_type_3 (self, args):
        'type ::= NAME container_type'
        (name, ctype) = args
        #print "type_3: got an instance-container type:", args

        return self.schema.make_instance_container_type(name, ctype)

    def p_type_4 (self, args):
        'type ::= union_type'
        typelist = args[0]
        #print "type_4: got a union type:", typelist
        return self.schema.make_union_type(typelist)


    # Container types are about the only interesting syntax in the whole
    # grammar.  Of those, list and dictionary types are pretty simple:
    #   list_type      : "[" type "]"
    #   dictionary_type: "{" type ":" type "}"
    # So they are implemented directly as alternate productions
    # for the 'container_type' terminal.
    #
    # Tuple types are a tad more interesting.  In extended BNF:
    #   tuple_type     : "(" (type ",")* type "*"? ","? ")"
    # which refactors into vanilla BNF as:
    #   tuple_type : "(" type_list tl_tail ")"
    #   type_list  : type "," type_list
    #              | type
    #   tl_tail    : ","
    #              | "*"
    #              | "," "*"
    #              | null

    def p_container_type_1 (self, args): # list type
        'container_type ::= [ type ]'
        (_, eltype, _) = args
        #print "container_type_1: got a list type: %s" % `eltype`
        return self.schema.make_list_type(eltype)

    def p_container_type_2 (self, args): # tuple type
        'container_type ::= ( type_list tl_tail )'
        (_, eltypes, extended, _) = args
        #print "container_type_3: got a tuple type: %s (extended=%s)" % \
        #      (map(repr, eltypes), extended)
        return self.schema.make_tuple_type(eltypes, extended)

    def p_container_type_3 (self, args): # set type
        'container_type ::= { type }'
        (_, eltype, _) = args
        #print "container_type_3: got a set type: %s" % `eltype`
        return self.schema.make_set_type(eltype)

    def p_container_type_4 (self, args): # dictionary type
        'container_type ::= { type : type }'
        (_, keytype, _, valtype, _) = args
        #print "container_type_4: got a dict type: %s -> %s" % \
        #  (`keytype`, `valtype`)
        return self.schema.make_dictionary_type(keytype, valtype)


    # Grunt-work productions for parsing tuples

    def p_type_list_1 (self, args):
        'type_list ::= type , type_list'
        (type, _, typelist) = args
        typelist.insert(0, type)
        return typelist

    def p_type_list_2 (self, args):
        'type_list ::= type'
        return [args[0]]

    def p_tl_tail_1 (self, args):
        'tl_tail ::= ,'
        return 0                        # not an extended tuple

    def p_tl_tail_2 (self, args):
        'tl_tail ::= *'
        return 1                        # extended tuple

    def p_tl_tail_3 (self, args):
        'tl_tail ::= * ,'
        return 1                        # ditto

    def p_tl_tail_4 (self, args):
        'tl_tail ::='
        return 0                        # not extended


    # Finally, union types.  In EBNF:
    #   union_type : type ("|" type)+
    # which refactors to
    #   union_type     : type "|" union_type
    #   union_type     : type "|" type

    def p_union_type_1 (self, args):
        'union_type ::= type | union_type'
        (type, _, typelist) = args
        typelist.insert(0, type)
        return typelist

    def p_union_type_2 (self, args):
        'union_type ::= type | type'
        (type1, _, type2) = args
        return [type1, type2]


def get_class_def (schema, name):
    klass_def = schema.get_class_definition(name)
    if not klass_def:
        raise ValueError, \
              "invalid class %s: no class definition in schema" % `name`
    return klass_def