%{ /* ** This is a bare-bones prototype for an ANSI C parser. ** ** It is based on _The C Programming Language, ** Second Edition_, Kernighan and Ritchie, Printice Hall, 1988. */ #include #include "ansi.h" #include "host.h" #include "hash.h" #include "files.h" #include "il.h" #include "nodeop.h" #include "types.h" #include "type_util.h" #define NO_LNINFO static symbol_t *cur_func; %} %union { int val; node_t *nod; symbol_t *sym; typeinfo_t *typ; } %token BAD_TOKEN %token INTEGER_CONSTANT %token CHARACTER_CONSTANT %token FLOATING_CONSTANT %token ENUMERATION_CONSTANT %token IDENTIFIER %token STRING %token TYPEDEF_NAME %token SIZEOF %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN %token XOR_ASSIGN OR_ASSIGN %token TYPEDEF EXTERN STATIC AUTO REGISTER %token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID %token STRUCT UNION ENUM ELIPSIS DOTDOT %token CASE DEFAULT IF SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN %left THEN %left ELSE %type type_adjective %type type_qualifier %type type_qualifier_list %type storage_class_specifier %type unary_operator %type assignment_operator %type struct_or_union %type type_specifier %type type_name %type actual_type_specifier %type specifier_qualifier_list %type declaration_specifiers %type declaration %type declaration_list %type enumerator %type enumerator_list %type enum_specifier %type struct_or_union_specifier %type struct_declaration_list %type struct_declaration %type parameter_declaration %type parameter_list %type parameter_type_list %type function_definition %type function_head %type constant %type identifier %type identifier_list %type constant_expression %type primary_expression %type assignment_expression %type expression %type postfix_expression %type unary_expression %type cast_expression %type multiplicative_expression %type additive_expression %type shift_expression %type relational_expression %type equality_expression %type and_expression %type exclusive_or_expression %type inclusive_or_expression %type logical_and_expression %type logical_or_expression %type conditional_expression %type argument_expression_list %type initializer %type initializer_list %type declarator %type direct_declarator %type init_declarator %type init_declarator_list %type struct_declarator %type struct_declarator_list %type pointer %type abstract_declarator %type direct_abstract_declarator %type function_declarator %type direct_function_declarator %start translation_unit %% /**************************************************************** ********* Name-Space and scanner-feedback productions ********** ****************************************************************/ /* The occurance of a type_specifier in the input turns off * scanner-recognition of typedef-names as such, so that they can * be re-defined within a declarator-list. The switch is called * "name_space_types". * * The call to lex_sync() assures that the switch gets toggled after * the next token is pre-fetched as a lookahead. */ NS_ntd : /* { lex_sync(); ntd(); } */ ; /* Once the declarators (if any) are parsed, the scanner is returned * to the state where typedef-names are recognized. */ NS_td : /* { lex_sync(); td(); } */ ; /* NS_scope_push creates a new scope in the id/typedef/enum-const * name-space. New levels are created by function-declarators * and are created and destroyed by compound-statements. * Thus, every occurance of a function-declarator must be * followed, at the end of the scope of that declarator, * by an NS_scope_pop. */ NS_scope_push : { scope_push(); td(); } ; NS_scope_pop : { scope_pop(); } ; /* NS_struct_push creates a new name-space for a struct or union * NS_struct_pop finishes one. */ NS_struct_push : /* { struct_push(); td(); } */ ; NS_struct_pop: /* { struct_pop(); } */ ; NS_id: /* { new_declaration(name_space_decl); } */ ; /* Begin a new declaration of a parameter */ NS_new_parm: /* { new_declaration(name_space_decl); } */ ; /* Remember that declarators while define typedef-names. */ NS_is_typedef: /* { set_typedef(); } */ ; /* Finish a direct-declarator */ NS_direct_decl: /* { direct_declarator(); } */ ; /* Finish a pointer-declarator */ NS_ptr_decl: /* { pointer_declarator(); } */ ; /* The scanner must be aware of the name-space which * differentiates typedef-names from identifiers. But the * distinction is only useful within limited scopes. In other * scopes the distinction may be invalid, or in cases where * typedef-names are not legal, the semantic-analysis phase * may be able to generate a better error message if the parser * does not flag a syntax error. We therefore use the following * production... */ identifier : NS_ntd TYPEDEF_NAME NS_td {$$ = id_from_typedef($2);} | IDENTIFIER | ENUMERATION_CONSTANT ; /************************************************************ ***************** The C grammar per se. ******************* ************************************************************/ /* * What follows is based on the gammar in _The C Programming Language_, * Kernighan & Ritchie, Prentice Hall 1988. See the README file. */ translation_unit : /* empty */ | translation_unit external_declaration ; external_declaration : NS_id function_definition {function_def($2);} | NS_id declaration {typed_external_decl($2);} | NS_id untyped_declaration ; function_definition : function_head function_body | function_head declaration_list {KnR_params($1, $2);} function_body ; function_head : function_declarator NS_td { $$ = function_spec(0, $1); cur_func = $$; cur_func->_assume_int = 1; } | declaration_specifiers function_declarator NS_td { $$ = function_spec($1, $2); cur_func = $$; } ; function_body : compound_statement NS_scope_pop ; declaration : declaration_specifiers NS_td ';' {$$ = novar_declaration($1);} | declaration_specifiers init_declarator_list NS_td ';' {$$ = var_declaration($1, $2);} ; untyped_declaration : init_declarator_list ';' ; declaration_list : declaration | declaration_list declaration {$$ = concat_symbols($1,$2);} ; declaration_specifiers : storage_class_specifier {$$ = typeof_typemod($1);} | storage_class_specifier declaration_specifiers {$$ = concat_types(typeof_typemod($1), $2);} | type_specifier | type_specifier declaration_specifiers {$$ = concat_types($1, $2);} | type_qualifier {$$ = typeof_typemod($1);} | type_qualifier declaration_specifiers {$$ = concat_types(typeof_typemod($1), $2);} ; storage_class_specifier : NS_is_typedef TYPEDEF {$$ = TYPEMOD_TYPEDEF;} | EXTERN {$$ = TYPEMOD_EXTERN;} | STATIC {$$ = TYPEMOD_STATIC;} | AUTO {$$ = TYPEMOD_AUTO;} | REGISTER {$$ = TYPEMOD_REGISTER;} ; /* Once an actual type-specifier is seen, it acts as a "trigger" to * turn typedef-recognition off while scanning declarators, etc. */ type_specifier : NS_ntd actual_type_specifier {$$ = $2;} | type_adjective {$$ = typeof_typemod($1);} ; actual_type_specifier : VOID {$$ = typeof_void();} | CHAR {$$ = typeof_char();} | INT {$$ = typeof_int(0);} | FLOAT {$$ = typeof_float();} | DOUBLE {$$ = typeof_double();} | TYPEDEF_NAME | struct_or_union_specifier {$$ = typeof_specifier($1);} | enum_specifier {$$ = typeof_specifier($1);} ; type_adjective : SHORT {$$ = TYPEMOD_SHORT;} | LONG {$$ = TYPEMOD_LONG;} | SIGNED {$$ = TYPEMOD_SIGNED;} | UNSIGNED {$$ = TYPEMOD_UNSIGNED;} ; type_qualifier : CONST {$$ = TYPEMOD_CONST;} | VOLATILE {$$ = TYPEMOD_VOLATILE;} ; struct_or_union_specifier : struct_or_union NS_struct_push '{' struct_declaration_list NS_struct_pop '}' { $$ = anonymous_rec($1, $4);} | struct_or_union identifier NS_struct_push '{' struct_declaration_list NS_struct_pop '}' { $$ = named_rec($1, $2, $5);} | struct_or_union identifier { $$ = rec_reference($1, $2);} ; struct_or_union : STRUCT {$$ = 0;} | UNION {$$ = 1;} ; struct_declaration_list : struct_declaration | struct_declaration_list struct_declaration {$$ = concat_symbols($1,$2);} ; init_declarator_list : init_declarator | init_declarator_list ',' init_declarator {$$ = new_node(_List, $1, $3);} ; init_declarator : declarator | declarator NS_td '=' initializer NS_ntd {$$ = new_node(_Assign, $1, $4);} ; struct_declaration : /* { new_declaration(struct_decl); } */ specifier_qualifier_list struct_declarator_list NS_td ';' {$$ = field_declaration($1, $2);} ; specifier_qualifier_list : type_specifier {$$ = typeof_typespec($1);} | type_specifier specifier_qualifier_list {$$ = typeof_typespec(concat_types($1, $2));} | type_qualifier {$$ = typeof_typespec(typeof_typemod($1));} | type_qualifier specifier_qualifier_list {$$ = typeof_typespec(concat_types(typeof_typemod($1),$2));} ; struct_declarator_list : struct_declarator | struct_declarator_list ',' struct_declarator {$$ = new_node(_List, $1, $3);} ; struct_declarator : declarator | ':' constant_expression {$$ = new_node(_Bit_Field, 0, $2);} | declarator ':' constant_expression {$$ = new_node(_Bit_Field, $1, $3);} ; enum_specifier : ENUM '{' enumerator_list '}' {$$ = anonymous_enum($3);} | ENUM identifier '{' enumerator_list '}' {$$ = named_enum($2, $4);} | ENUM identifier {$$ = enum_reference($2);} ; enumerator_list : enumerator | enumerator_list ',' enumerator {$$ = concat_symbols($1,$3);} ; enumerator : IDENTIFIER {$$ = grok_enumerator($1,0);} | IDENTIFIER '=' constant_expression {$$ = grok_enumerator($1,$3);} ; declarator : direct_declarator NS_direct_decl | pointer direct_declarator NS_ptr_decl {$$ = access_to($1, $2);} ; direct_declarator : IDENTIFIER | '(' declarator ')' {$$ = $2;} | direct_declarator '[' ']' {$$ = new_node(_Array_Index, $1, 0);} | direct_declarator '[' constant_expression ']' {$$ = new_node(_Array_Index, $1, $3);} | direct_declarator NS_scope_push '(' parameter_type_list ')' NS_scope_pop {$$ = new_node(_Func_Call, $1, new_node(_Sym,$4));} | direct_declarator NS_scope_push '(' ')' NS_scope_pop {$$ = new_node(_Func_Call, $1, 0);} | direct_declarator NS_scope_push '(' identifier_list ')' NS_scope_pop {$$ = new_node(_Func_Call, $1, $4);} ; function_declarator : direct_function_declarator NS_direct_decl | pointer direct_function_declarator NS_ptr_decl {$$ = access_to($1, $2);} ; direct_function_declarator : direct_declarator NS_scope_push '(' parameter_type_list ')' {$$ = new_node(_Func_Call, $1, new_node(_Sym,$4));} | direct_declarator NS_scope_push '(' ')' {$$ = new_node(_Func_Call, $1, 0);} | direct_declarator NS_scope_push '(' identifier_list ')' {$$ = new_node(_Func_Call, $1, $4);} ; pointer : '*' {$$ = new_node(_Indirect, 0);} | '*' type_qualifier_list {$$ = new_node(_Indirect, 0);} | '*' pointer {$$ = new_node(_Indirect, $2);} | '*' type_qualifier_list pointer {$$ = new_node(_Indirect, $3);} ; type_qualifier_list : type_qualifier | type_qualifier_list type_qualifier {$$ = $1 | $2;} ; parameter_type_list : parameter_list | parameter_list ',' ELIPSIS { $$ = concat_symbols($1, elipsis_arg()); } ; parameter_list : parameter_declaration | parameter_list ',' parameter_declaration {$$ = concat_symbols($1,$3);} ; parameter_declaration : NS_new_parm declaration_specifiers declarator NS_td {$$ = named_abstract_param($2, $3);} | NS_new_parm declaration_specifiers NS_td {$$ = noname_simple_param($2);} | NS_new_parm declaration_specifiers abstract_declarator NS_td {$$ = noname_abstract_param($2, $3);} ; identifier_list : IDENTIFIER | identifier_list ',' IDENTIFIER {$$ = new_node(_List, $1, $3);} ; initializer : assignment_expression | '{' initializer_list '}' {$$ = new_node(_Aggregate, $2);} | '{' initializer_list ',' '}' {$$ = new_node(_Aggregate, new_node(_List, $2, 0));} ; initializer_list : initializer | initializer_list ',' initializer {$$ = new_node(_List, $1, $3);} ; type_name : specifier_qualifier_list NS_td | specifier_qualifier_list NS_td abstract_declarator { $$ = noname_type($1,$3); } ; abstract_declarator : pointer | direct_abstract_declarator | pointer direct_abstract_declarator {$$ = access_to($1, $2);} ; direct_abstract_declarator : '(' abstract_declarator ')' {$$ = $2;} | '[' ']' {$$ = new_node(_Array_Index, 0, 0);} | '[' constant_expression ']' {$$ = new_node(_Array_Index, 0, $2);} | direct_abstract_declarator '[' ']' {$$ = new_node(_Array_Index, $1, 0);} | direct_abstract_declarator '[' constant_expression ']' {$$ = new_node(_Array_Index, $1, $3);} | '(' ')' {$$ = new_node(_Func_Call, 0, 0);} | '(' parameter_type_list ')' {$$ = new_node(_Func_Call, 0, new_node(_Sym,$2));} | direct_abstract_declarator '(' ')' {$$ = new_node(_Func_Call, $1, 0);} | direct_abstract_declarator '(' parameter_type_list ')' {$$ = new_node(_Func_Call, $1, new_node(_Sym,$3));} ; statement : labeled_statement | compound_statement | expression_statement | selection_statement | iteration_statement | jump_statement ; labeled_statement : identifier ':' statement | CASE constant_expression ':' statement | DEFAULT ':' statement ; expression_statement : ';' | expression ';' ; compound_statement : NS_scope_push '{' NS_scope_pop '}' | NS_scope_push '{' statement_list NS_scope_pop '}' | NS_scope_push '{' declaration_list NS_scope_pop '}' | NS_scope_push '{' declaration_list statement_list NS_scope_pop '}' ; /* For now we're skipping function bodies */ /* mjs@12/ 5/94 compound_statement : '{' {yyskip();} '}' ; */ statement_list : statement | statement_list statement ; selection_statement : IF '(' expression ')' statement %prec THEN | IF '(' expression ')' statement ELSE statement | SWITCH '(' expression ')' statement ; iteration_statement : WHILE '(' expression ')' statement | DO statement WHILE '(' expression ')' ';' | FOR '(' ';' ';' ')' statement | FOR '(' ';' ';' expression ')' statement | FOR '(' ';' expression ';' ')' statement | FOR '(' ';' expression ';' expression ')' statement | FOR '(' expression ';' ';' ')' statement | FOR '(' expression ';' ';' expression ')' statement | FOR '(' expression ';' expression ';' ')' statement | FOR '(' expression ';' expression ';' expression ')' statement ; jump_statement : GOTO identifier ';' | CONTINUE ';' | BREAK ';' | RETURN ';' | RETURN expression ';' { if (cur_func) cur_func->_eret = 1; } ; expression : assignment_expression | expression ',' assignment_expression {$$ = new_node(_List, $1, $3);} ; assignment_expression : conditional_expression | unary_expression assignment_operator assignment_expression {$$ = new_node($2, $1, $3);} ; assignment_operator : '=' {$$ = _Assign;} | MUL_ASSIGN {$$ = _Mul_Assign;} | DIV_ASSIGN {$$ = _Div_Assign;} | MOD_ASSIGN {$$ = _Mod_Assign;} | ADD_ASSIGN {$$ = _Add_Assign;} | SUB_ASSIGN {$$ = _Sub_Assign;} | LEFT_ASSIGN {$$ = _Shl_Assign;} | RIGHT_ASSIGN {$$ = _Shr_Assign;} | AND_ASSIGN {$$ = _Band_Assign;} | XOR_ASSIGN {$$ = _Xor_Assign;} | OR_ASSIGN {$$ = _Bor_Assign;} ; conditional_expression : logical_or_expression | logical_or_expression '?' expression ':' conditional_expression {$$ = new_node(_Cond, $1, $3, $5);} ; constant_expression : conditional_expression ; logical_or_expression : logical_and_expression | logical_or_expression OR_OP logical_and_expression {$$ = new_node(_Lor, $1, $3);} ; logical_and_expression : inclusive_or_expression | logical_and_expression AND_OP inclusive_or_expression {$$ = new_node(_Land, $1, $3);} ; inclusive_or_expression : exclusive_or_expression | inclusive_or_expression '|' exclusive_or_expression {$$ = new_node(_Bor, $1, $3);} ; exclusive_or_expression : and_expression | exclusive_or_expression '^' and_expression {$$ = new_node(_Xor, $1, $3);} ; and_expression : equality_expression | and_expression '&' equality_expression {$$ = new_node(_Band, $1, $3);} ; equality_expression : relational_expression | equality_expression EQ_OP relational_expression {$$ = new_node(_Eq, $1, $3);} | equality_expression NE_OP relational_expression {$$ = new_node(_Ne, $1, $3);} ; relational_expression : shift_expression | relational_expression '<' shift_expression {$$ = new_node(_Lt, $1, $3);} | relational_expression '>' shift_expression {$$ = new_node(_Gt, $1, $3);} | relational_expression LE_OP shift_expression {$$ = new_node(_Le, $1, $3);} | relational_expression GE_OP shift_expression {$$ = new_node(_Ge, $1, $3);} ; shift_expression : additive_expression | shift_expression LEFT_OP additive_expression {$$ = new_node(_Shl, $1, $3);} | shift_expression RIGHT_OP additive_expression {$$ = new_node(_Shr, $1, $3);} ; additive_expression : multiplicative_expression | additive_expression '+' multiplicative_expression {$$ = new_node(_Add, $1, $3);} | additive_expression '-' multiplicative_expression {$$ = new_node(_Sub, $1, $3);} ; multiplicative_expression : cast_expression | multiplicative_expression '*' cast_expression {$$ = new_node(_Mul, $1, $3);} | multiplicative_expression '/' cast_expression {$$ = new_node(_Div, $1, $3);} | multiplicative_expression '%' cast_expression {$$ = new_node(_Rem, $1, $3);} ; cast_expression : unary_expression | '(' type_name ')' cast_expression {$$ = new_node(_Type_Cast, new_node(_Type, $2), $4);} ; unary_expression : postfix_expression | INC_OP unary_expression {$$ = new_node(_Pre_Inc, $2);} | DEC_OP unary_expression {$$ = new_node(_Pre_Dec, $2);} | unary_operator cast_expression {$$ = new_node($1, $2);} | SIZEOF unary_expression {$$ = new_node(_Sizeof, $2);} | SIZEOF '(' type_name ')' {$$ = new_node(_Sizeof, new_node(_Type, $3));} ; unary_operator : '&' {$$ = _Addrof;} | '*' {$$ = _Indirect;} | '+' {$$ = _Unary_Plus;} | '-' {$$ = _Unary_Minus;} | '~' {$$ = _Ones_Complement;} | '!' {$$ = _Not;} ; postfix_expression : primary_expression | postfix_expression '[' expression ']' {$$ = new_node(_Array_Index, $1, $3);} | postfix_expression '(' ')' {$$ = new_node(_Func_Call, $1, 0);} | postfix_expression '(' argument_expression_list ')' {$$ = new_node(_Func_Call, $1, $3);} | postfix_expression '.' identifier {$$ = new_node(_Dot_Selected, $1, $3);} | postfix_expression PTR_OP identifier {$$ = new_node(_Arrow_Selected, $1, $3);} | postfix_expression INC_OP {$$ = new_node(_Post_Inc, $1);} | postfix_expression DEC_OP {$$ = new_node(_Post_Dec, $1);} ; primary_expression : IDENTIFIER | constant | STRING | '(' expression ')' {$$ = $2;} ; argument_expression_list : assignment_expression | argument_expression_list ',' assignment_expression {$$ = new_node(_List, $1, $3);} ; constant : INTEGER_CONSTANT | CHARACTER_CONSTANT | FLOATING_CONSTANT | ENUMERATION_CONSTANT ; %%