"""$URL: svn+ssh://svn/repos/trunk/grouch/lib/type_parser.py $ $Id: type_parser.py 24750 2004-07-21 15:26:51Z dbinger $ SPARK-based scanner and parser for Grouch's type language. """ from types import StringType from grouch.spark import GenericScanner, GenericParser # # Grammar for types: # type : NAME # atomic, alias, or instance type # | container_type # list, tuple, dictionary # | NAME container_type # instance-container type # | union_type # # container_type : list_type # | tuple_type # | dictionary_type # | set_type # list_type : "[" type "]" # tuple_type : "(" (type ",")* type "*"? ","? ")" # dictionary_type: "{" type ":" type "}" # set_type : "{" type "}" # # union_type : type ("|" type)+ # # Terminals (tokens): # NAME : [a-zA-Z0-9_]+(\.[a-zA-Z_][a-zA-Z0-9_]*)* # "[" "]" # "(" ")" "," # "{" "}" ":" # "|" class Name: def __init__ (self, name): self.name = name def __cmp__ (self, other): if other == "NAME": # for the parser below return 0 elif type(other) is StringType: return 1 elif isinstance(other, Name): # for comparing names return cmp(self.name, other.name) else: raise TypeError, "can't compare Name instance to %s" % `other` def __repr__ (self): return "" % (id(self), self.name) def __str__ (self): return self.name class TypeScanner (GenericScanner): def tokenize (self, input): self.tokens = [] GenericScanner.tokenize(self, input) return self.tokens def t_whitespace (self, s): r'\s+' pass def t_name (self, s): r'[a-zA-Z0-9_-]+(\.[a-zA-Z_][a-zA-Z0-9_]*)*' self.tokens.append(Name(s)) def t_punct (self, s): r'[\[\]\(\)\{\}\,\:\|\*]' self.tokens.append(s) # Looks like errors can be handled two ways: # * have a 't_default()' that recognizes nothing, and an # 'error()' that raises an exception with 'pos' # * have a 't_default()' that accepts anything and then # raises an exception # The first gives you the position where the error occurs, # the second gives you the text that causes the error. #def t_default (self, s): # r'.' # raise ValueError, "invalid token %s" % `s` def t_default (self, s): pass def error (self, s, pos): raise ValueError, "invalid token at position %d: '%s'" % (pos, s[pos:]) class TypeParser (GenericParser): def __init__ (self, start="type", schema=None): GenericParser.__init__(self, start) self.schema = schema def set_schema (self, schema): self.schema = schema def parse (self, tokens): if self.schema is None: raise RuntimeError, "schema not set" return GenericParser.parse(self, tokens) def error (self, token): raise ValueError, "Syntax error at or near '%s'" % token def p_type_1 (self, (name,)): 'type ::= NAME' #print "type_1: got a name: %s" % `name` return self.schema.get_type(str(name)) def p_type_2 (self, args): 'type ::= container_type' #print "type_2: args =", args # assert isinstance(args[0], valuetype.ContainerType) return args[0] def p_type_3 (self, args): 'type ::= NAME container_type' (name, ctype) = args #print "type_3: got an instance-container type:", args return self.schema.make_instance_container_type(name, ctype) def p_type_4 (self, args): 'type ::= union_type' typelist = args[0] #print "type_4: got a union type:", typelist return self.schema.make_union_type(typelist) # Container types are about the only interesting syntax in the whole # grammar. Of those, list and dictionary types are pretty simple: # list_type : "[" type "]" # dictionary_type: "{" type ":" type "}" # So they are implemented directly as alternate productions # for the 'container_type' terminal. # # Tuple types are a tad more interesting. In extended BNF: # tuple_type : "(" (type ",")* type "*"? ","? ")" # which refactors into vanilla BNF as: # tuple_type : "(" type_list tl_tail ")" # type_list : type "," type_list # | type # tl_tail : "," # | "*" # | "," "*" # | null def p_container_type_1 (self, args): # list type 'container_type ::= [ type ]' (_, eltype, _) = args #print "container_type_1: got a list type: %s" % `eltype` return self.schema.make_list_type(eltype) def p_container_type_2 (self, args): # tuple type 'container_type ::= ( type_list tl_tail )' (_, eltypes, extended, _) = args #print "container_type_3: got a tuple type: %s (extended=%s)" % \ # (map(repr, eltypes), extended) return self.schema.make_tuple_type(eltypes, extended) def p_container_type_3 (self, args): # set type 'container_type ::= { type }' (_, eltype, _) = args #print "container_type_3: got a set type: %s" % `eltype` return self.schema.make_set_type(eltype) def p_container_type_4 (self, args): # dictionary type 'container_type ::= { type : type }' (_, keytype, _, valtype, _) = args #print "container_type_4: got a dict type: %s -> %s" % \ # (`keytype`, `valtype`) return self.schema.make_dictionary_type(keytype, valtype) # Grunt-work productions for parsing tuples def p_type_list_1 (self, args): 'type_list ::= type , type_list' (type, _, typelist) = args typelist.insert(0, type) return typelist def p_type_list_2 (self, args): 'type_list ::= type' return [args[0]] def p_tl_tail_1 (self, args): 'tl_tail ::= ,' return 0 # not an extended tuple def p_tl_tail_2 (self, args): 'tl_tail ::= *' return 1 # extended tuple def p_tl_tail_3 (self, args): 'tl_tail ::= * ,' return 1 # ditto def p_tl_tail_4 (self, args): 'tl_tail ::=' return 0 # not extended # Finally, union types. In EBNF: # union_type : type ("|" type)+ # which refactors to # union_type : type "|" union_type # union_type : type "|" type def p_union_type_1 (self, args): 'union_type ::= type | union_type' (type, _, typelist) = args typelist.insert(0, type) return typelist def p_union_type_2 (self, args): 'union_type ::= type | type' (type1, _, type2) = args return [type1, type2] def get_class_def (schema, name): klass_def = schema.get_class_definition(name) if not klass_def: raise ValueError, \ "invalid class %s: no class definition in schema" % `name` return klass_def