# A C++ Preprocessor -*- coding: iso-8859-1 -*- # Copyright 2002 Martin v. Löwis # # This is an implementation of ISO/IEC 14882:1998, # sections 2.1/{1-6} [lex.phases], and 16 [cpp] # It deviates from this standards in the following aspects: # - Universal character names (2.2/2) are not supported # - Computations in #if directives are performed in the Python integer type # TODO: # - predefined macros names (16.8) # - add file/line information into SyntaxError exceptions # - implement #line directives # - implement #error directives """A C++ Preprocessor This module has three modes of operations: 1. As a script, it implements the cpp(1) executable, with support for -I and -D command line options. This has the same effect as invoking cpp.run(). 2. With cpp.process_command_line a Preprocessor object can be created for a command line. 3. Creating a cpp.Preprocessor object directly allows to specify include directories and the input file from other sources. A Preprocessor object then primarily supports the get_token method, which returns cpp.Token objects.""" import re, os trigraphs = { '??=': '#', '??(': '[', '??<': '{', '??/': '\\', '??)': ']', '??>': '}', "??'": '^', '??!': '|', '??-': '~' } whitespace = " \t\f" # token classes IDENTIFIER = 'identifier' HEADERNAME = 'headername' NUMBER = 'number' CHARLITERAL = 'charliteral' STRINGLITERAL = 'stringliteral' PREPROCESSING_OP_OR_PUNC = 'preprocessing-op-or-punc' PRAGMA = 'pragma' INCLUDE_START = 'include_start' INCLUDE_END = 'include_end' OTHER = 'other' # [lex.operators], except for identifier-like punctuators preprocessing_op_or_punc = [ '{', '}', '[', ']', '#', '##', '(', ')', '<:', ':>', '<%', '%>', '%:', '%:%:', ';', ':', '...', '?', '::', '.', '.*', '+', '-', '*', '/', '%', '^', '&', '|', '~', '!', '=', '<', '>', '+=', '-=', '*=', '/=', '%=', '^=', '&=', '|=', '<<', '>>', '>>=', '<<=', '==', '!=', '<=', '>=', '&&', '||', '++', '--', ',', '->*', '->' ] # [lex.digraph] alternative_tokens = { '<%':'{', '%>':'}', '<:':'[', ':>':']', '%:':'#', '%:%:':'##', 'and' : '&&', 'bitor' : '|', 'or' : '||', 'xor' : '^', 'compl' : '~', 'bitand': '&', 'and_eq': '&=', 'or_eq' : '|=', 'xor_eq': '^=', 'not' : '!', 'not_eq': '!=' } punctuators = (({},4),({},3),({},2),({},1)) for p in preprocessing_op_or_punc: punctuators[4-len(p)][0][p] = 1 # [lex.pptoken], except for preprocessing_op_or_punc and "other whitespace" # XXX for some reason, headername never matches pp_tokens = re.compile(r"""\ (?P(<[^>]+>|"[^"]+"))| (?P[_a-zA-Z][_a-zA-Z0-9]*)| (?P\.?[0-9]([_a-df-zA-DF-Z0-9.]|[eE][-+]?)*)| (?PL?'([^\\']|\\['"?\abfnrtv]|\\[0-8]{1,3}|\\x[0-9a-fA-F]+)*')| (?PL?"([^\\"]|\\['"?\abfnrtv]|\\[0-8]{1,3}|\\x[0-9a-fA-F]+)*") """, re.VERBOSE) #' to make emacs mode happy def find_path(path, file): for p in path: fn = os.path.join(p, file) if os.path.exists(fn): return fn return None class SyntaxError(Exception): pass class Token: """Class representing a preprocessor token. The following attributes are available: type - indicates the token class, one of IDENTIFIER, HEADERNAME, CHARLITERAL, STRINGLITERAL, PREPROCESSING_OP_OR_PUNC, PRAGMA, INCLUDE_START, INCLUDE_END, OTHER text - indicates the token text file, line - source code reference bol - true if token was at the beginning of a line space_follows - true if the token had immediate subsequent white space replacable - true if the token is available for further macro replacement """ def __init__(self, type, text, file, line): self.type = type self.text = text self.file = file self.line = line self.bol = 0 self.space_follows = 0 self.replacable = 1 def copy_tokens(tokens): result = [] for t in tokens: t1 = Token(t.type, t.text, t.file, t.line) t1.bol = t.bol t1.space_follows = t.space_follows t1.replacable = t.replacable result.append(t) return result class InputFile: """Class representing an preprocessor source file.""" def __init__(self, name, path): """Construct the file, given the file name (as it appeared in the include directive or on the command line), and its path.""" self.name = name self.path = path self.line = 0 file = open(path, "rb") data = file.read() self.pos = 0 self.include = None self.comment = 0 self.ifstack = [] self.bol = 1 self.backlog = [] self.included = 0 # Replace trigraphs, [lex.trigraph], [lex.phases]/1 while 1: pos = data.find("??") if pos == -1: break tg = data[pos:pos+3] data = data.replace(tg, trigraphs[tg]) # Universal Character Names, [lex.charset]/2 for esc, hlen in (('\\u', 4), ('\\U', 8)): pos = data.find(esc) if pos!=-1: for i in range(pos+2, pos+2+hlen): if data[i] not in '0123456789ABCDEFabcdef': break else: raise NotImplementedError, "universal character names" # Split input into physical source lines if data.find("\n")!=-1: data = data.split("\n") # strip any \r originating from \r\n for i in range(len(data)): if data[i] and data[i][-1] == '\r': data[i] = data[i][:-1] else: data = data.split("\r") # Add line numbers for i in range(len(data)): data[i] = i+1, data[i] # Form logical source lines, [lex.phases]/2 # XXX may want to preserve the original line break positions i = 0 while i < len(data): if data[i][1] and data[i][1][-1] == '\\': data[i] = data[i][0], data[i][1][:-1]+data[i+1][1] else: i += 1 self.lines = data self.curline = data[0][1] self.lineno = 1 def unget_token(self, t): """Put a token into the backlog.""" self.backlog.insert(0, t) def skip_whitespace(self): """Advance until the file is consumed, or a non-whitespace token is encountered.""" while self.pos < len(self.curline): if self.curline[self.pos] in whitespace: self.pos += 1 continue next = self.curline[self.pos:self.pos+2] if next == '//': # C++ comment self.pos = len(self.curline) return if next == '/*': # C comment pos = self.curline.find('*/') if pos != -1: # closed within the same line self.pos = pos + 2 else: # not closed in the same line self.pos += 2 self.comment = 1 else: return def get_token(self): """Return a token from the file.""" if self.backlog: result = self.backlog[0] del self.backlog[0] return result # skip empty lines, open comments self.skip_whitespace() while self.pos >= len(self.curline) or self.comment: self.pos = 0 self.line += 1 if self.line >= len(self.lines): # End of file if self.comment: raise SyntaxError, "incomplete comment" return None self.lineno = self.lines[self.line][0] self.curline = self.lines[self.line][1] self.bol = 1 if self.comment: pos = self.curline.find("*/") if pos != -1: self.pos = pos + 2 self.comment = 0 # skip whitespace at the beginning of the new line self.skip_whitespace() self.skip_whitespace() bol = self.bol self.bol = 0 # match token classes m = pp_tokens.match(self.curline, self.pos) if m: text = m.group() self.pos += len(text) kind = m.lastgroup # XXX: new, delete should be preprocessing_op_or_punc??? else: for d, l in punctuators: text = self.curline[self.pos:self.pos+l] if d.has_key(text): self.pos += l kind = PREPROCESSING_OP_OR_PUNC break else: text = self.curline[self.pos] self.pos += 1 kind = OTHER result = Token(kind, text, self.name, self.line) result.bol = bol try: result.space_follows = self.curline[self.pos] in whitespace except IndexError: pass return result class TokenList: """Class representing a stand-alone list of tokens, such as a macro replacement list.""" def __init__(self, tokens): self.tokens = tokens self.pos = 0 def get_token(self): """Get a token from the list.""" pos = self.pos self.pos = pos + 1 try: return self.tokens[pos] except IndexError: return None def unget_token(self, t): """Put a token back into the list. This currently assumes that this very token has been taken from the list immediately before.""" assert self.tokens[self.pos - 1] is t self.pos -= 1 def unget_tokens(self, tokens): """Put a list of tokens into this list at the current position.""" self.tokens[self.pos:self.pos] = tokens ################## The preprocessor proper ############################# class Preprocessor: """A C++ preprocessor. The major method is get_token.""" def __init__(self, topfile, incdirs = ['.'], sys_incdirs=[], digraphs = 1): """Initialize a preprocessor for a source file. Optional arguments are: inc_dirs: list of include directories sys_incdirs: list of include directories for system headers digraphs: true if the preprocessor should replace digraphs.""" self.defines = {} self.incdirs = incdirs self.sys_incdirs = sys_incdirs self.files = [InputFile(topfile, topfile)] self.backlog = [] self.expanded_macros = [] self.pending_pop = 0 self.digraphs = digraphs def _pop_file(self): """Internal. Remove a file from the stack.""" f = self.files.pop() if f.ifstack: raise SyntaxError, "unterminated conditional" if f.included: self._unget_file_token(Token(INCLUDE_END, f.path, None, None)) def _get_file_token(self, pop = 1): """Internal. Get a token from the innermost file.""" if self.pending_pop: self._pop_file() self.pending_pop = 0 while 1: if len(self.files) == 0: return None t = self.files[-1].get_token() if t is None: if pop: self._pop_file() return self._get_file_token() else: self.pending_pop = 1 return None elif self.digraphs and \ t.type in [IDENTIFIER, PREPROCESSING_OP_OR_PUNC]: try: t1 = alternative_tokens[t.text] except KeyError: return t else: (t,) = copy_tokens([t]) t.type = PREPROCESSING_OP_OR_PUNC t.text = t1 return t else: return t def _unget_file_token(self, t): """Internal. Put back a token to the innermost file.""" self.files[-1].unget_token(t) def skipped_section(self): """Return true if we are in a skipped section.""" stack = self.files and self.files[-1].ifstack if stack: return stack[-1] <= 0 return 0 def get_token(self): """Return the next token, or None at the end of the input.""" if self.backlog: t = self.backlog[0] del self.backlog[0] return t token = self._get_file_token() if token is None: return token while self.skipped_section(): if token.bol and token.text in ['#', '%:'] : self.preprocess() token = self._get_file_token() if token is None: return None if token.bol and token.text in ['#', '%:']: self.preprocess() return self.get_token() if token.type == IDENTIFIER and token.replacable: try: macro = self.defines[token.text] except KeyError: pass else: body = self.expand_macro(token, self) token = body[0] self.backlog[0:0] = body[1:] return token def unget_token(self, t): """Put back a token to the preprocessor.""" self._unget_file_token(t) def preprocess(self): """Implement # directives.""" tokens = [] while 1: # If we reach EOF, we shall not pop to the previous file, # since a preprocessor instruction must not span files t = self._get_file_token(pop = 0) if t is None: break if t.bol: self._unget_file_token(t) break else: tokens.append(t) if not tokens: # empty preprocessing directive return t = tokens[0] del tokens[0] if self.skipped_section() and \ t.text not in ['if', 'ifdef', 'elif', 'else', 'endif']: return getattr(self, 'do_'+t.text)(tokens) return def do_line(self, tokens): """Implement #line directives.""" # Line directive, ignore for now return def do_pragma(self, tokens): """Implement pragma directives, by creating a new token for the pragma.""" # Join fragments to a single text text = [] for t in tokens: text.append(t.text) text = " ".join(text) # Create a new token t = Token(PRAGMA, text, tokens[0].file, tokens[0].line) # preserve original tokens t.pragma = tokens self._unget_file_token(t) def do_include(self, tokens): """Process an include directive. On return, a new file is on the stack.""" # [cpp.include] # parse the directive if len(tokens) == 1: # [cpp.include]/2,3 if tokens[0].type == HEADERNAME: filename = tokens[0].text elif tokens[0].type == STRINGLITERAL and tokens[0].text[0]!='L': filename = tokens[0].text else: raise SyntaxError, "Invalid include directive "+tokens[0].text else: # [cpp.include]/4 tokens = self.expand_macros(tokens) if len(tokens) < 3: raise SyntaxError, "invalid include directive" ftext = tokens[0].text ltext = tokens[-1].text if ftext == '<' and ltext=='>' \ or ftext == ltext == '"': texts = [] for t in tokens: texts.append(t.text) filename = "".join(texts) print filename else: raise SyntaxError, "invalid include directive" # find the file if filename[0] == '<': # [cpp.include]/2 file = find_path(self.incdirs, filename[1:-1]) if not file: file = find_path(self.sys_incdirs, filename[1:-1]) else: # [cpp.include]/3 file = find_path(self.incdirs, filename[1:-1]) if not file: raise SyntaxError, "include file "+filename+" not found" # open the file f = InputFile(filename[1:-1], file) f.included = 1 self.files.append(f) # Add the INCLUDE_START token self._unget_file_token(Token(INCLUDE_START, file, # XXX filename? tokens[0].file, tokens[0].line)) def do_define(self, tokens): """Process a #define.""" macro = tokens[0] if macro.type != IDENTIFIER: raise SyntaxError, "invalid define directive" function_like = 0 body_start = 1 args = [] if not macro.space_follows and len(tokens)>1 and tokens[1].text == '(': function_like = 1 body_start += 1 try: if tokens[body_start].text != ')': while 1: arg = tokens[body_start] if arg.type != IDENTIFIER: raise SyntaxError, "Error in argument list" # [cpp.replace]/4 for a in args: if a == arg.text: raise SyntaxError, "Duplicate macro argument" args.append(arg.text) if tokens[body_start+1].text == ')': body_start += 2 break if tokens[body_start+1].text != ',': raise SyntaxError, "Error in argument list" body_start += 2 except IndexError: raise SyntaxError, "missing ) in function-like macro" body = tokens[body_start:] old_macro = self.defines.get(macro.text) if old_macro: self.compare_macros(macro.text, old_macro, (function_like, args, body)) self.defines[macro.text] = (function_like, args, body) def compare_macros(self, name, (oldf, olda, oldb), (newf, newa, newb)): """Return if two macros definitions are equivalent. Otherwise, raise a SyntaxError.""" # [cpp.replace]/{1-3} if oldf != newf or len(olda) != len(newa) or len(oldb) != len(newb): raise SyntaxError, "Invalid redefinition of "+name # [cpp.replace]/1 for i in range(len(oldb)): if oldb[i].text != newb[i].text: raise SyntaxError, "Invalid redefinition of "+name # [cpp.replace]/3 for i in range(len(olda)): if olda[i].text != newa[i].text: raise SyntaxError, "Invalid redefinition of "+name def expand_macro(self, macro, token_list): """Return the macro replacement list for macro. This may consume further tokens, if the macro is function-like.""" if macro.text in self.expanded_macros: macro.replacable = 0 return [macro] self.expanded_macros.append(macro.text) function_like, params, body = self.defines[macro.text] if function_like: #look for opening ( t = token_list.get_token() if t.text != '(': return [macro, t] arguments = self.get_arguments(token_list) if len(arguments) != len(params): raise SyntaxError, "Incorrect number of macro arguments" body = self.replace_arguments_in_body(params, arguments, body) body = self.replace_hash_param(params, body) else: arguments = [] body = self.replace_hashhash_body(params, arguments, body) self.expanded_macros.pop() # adjust line numbers to line of macro application for t in body: t.file = macro.file t.line = macro.line return body def get_arguments(self, token_list): """Return the arguments for a macro from token_list.""" arguments = [] arg = [] parens = 1 while 1: t = token_list.get_token() if t is None: raise SyntaxError, "Missing )" if t.text == '(': arg.append(t) parens += 1 elif t.text == ')': if parens == 1: if arg: arguments.append(arg) break arg.append(t) parens -= 1 elif t.text == ',' and parens == 1: arguments.append(arg) arg = [] else: arg.append(t) return arguments def replace_arguments_in_body(self, params, arguments, body): """Find all occurrences of arguments in the macro body, and return an new token list with the arguments replaced.""" final_body = [] for i in range(len(body)): t = body[i] if t.type != IDENTIFIER: final_body.append(t) continue try: pos = params.index(t.text) except ValueError: final_body.append(t) continue # Don't replace parameters preceded or followed # by ## or preceded by # if i == 0 or (body[i-1].text in ['#', '##', '%:', '%:%:']): final_body.append(t) continue if i == len(body)-1 or body[i+1].text in ['##', '%:%:']: final_body.append(t) new_tokens = self.expand_macros(arguments[pos]) final_body.extend(copy_tokens(new_tokens)) return final_body def replace_hash_param(self, params, body): """Return a macro list with all occurrences of the # stringification operator replaced.""" # Replace # param final_body = [] for i in range(len(body)): t = body[i] if t.text not in ['#', '%:']: final_body.append(t) continue pos = params.index(body[i+1]) arg = [a.text for a in final_arguments[pos]] arg = ' '.join(arg) # XXX: stringifying strings? assert '"' not in arg arg = Token(STRINGLITERAL,arg, None, None) final_body.append(arg) return final_body def replace_hashhash_body(self, params, arguments, body): """Return a token list with all occurrences of the ## token pasting operator replaced.""" i = 1 replaced = 0 # Replace parameters first while i < len(body): if body[i].text not in ['##', '%:%:']: i += 1 continue replaced = 1 try: pos = params.index(body[i-1].text) except ValueError: pass else: new_tokens = arguments[pos] body[i-1:i] = copy_tokens(new_tokens) i += len(new_tokens)-1 try: pos = params.index(body[i+1].text) except ValueError: pass else: new_tokens = arguments[pos] body[i+1:i+2] = copy_tokens(new_tokens) i += len(new_tokens)-1 i += 3 if not replaced: return body # perform the token pasting i = 1 while i < len(body): if body[i].text not in ['##', '%:%:']: i += 1 continue if body[i-1].type == IDENTIFIER and body[i+1].type == IDENTIFIER: new_type = IDENTIFIER else: new_type = 'uncertain' new_token = Token(new_type, body[i-1].text+body[i+1].text, None, None) body[i-1:i+2] = [new_token] # no need to adjust i; it is already advanced # reclassify uncertain tokens for t in body: if t.type == 'uncertain': raise NotImplementedError, "uncertain token after pasting" return body def expand_macros(self, tokens): """Return a token list with all macros replaced in the tokens token list.""" result = [] tokens = TokenList(tokens) while 1: t = tokens.get_token() if t is None: break if t.type == IDENTIFIER and t.replacable: try: macro = self.defines[t.text] except KeyError: pass else: body = self.expand_macro(t, tokens) t = body[0] tokens.unget_tokens(body[1:]) result.append(t) return result def do_undef(self, tokens): """Process an #undef directive.""" if len(tokens) != 1: raise SyntaxError, "extra tokens in #undef" try: del self.defines[tokens[0].text] except KeyError: pass def do_ifdef(self, tokens): """Process an #ifdef directive.""" if len(tokens) != 1: raise SyntaxError, "extra tokens in #ifdef" is_defined = self.defines.has_key(tokens[0].text) self.push_condition(is_defined) def do_ifndef(self, tokens): """Process an #ifdef directive.""" if len(tokens) != 1: raise SyntaxError, "extra tokens in #ifdef" is_defined = self.defines.has_key(tokens[0].text) self.push_condition(not is_defined) def do_endif(self, tokens): """Process an #endif directive.""" if len(tokens) != 0: raise SyntaxError, "extra tokens in #endif" self.pop_condition() def do_else(self, tokens): """Process an #else directive.""" if len(tokens) != 0: raise SyntaxError, "extra tokens in #endif" self.toggle_condition(1) def do_if(self, tokens): """Process an #if directive.""" val = self.eval_if(tokens) self.push_condition(val) def do_elif(self, tokens): """Process an #elif directive.""" val = self.eval_if(tokens) self.toggle_condition(val) def eval_if(self, tokens): """Return the value of the expression in a #if or #elif.""" # Replace defined(ident) with 0 or 1 self.replace_defined(tokens) # Expand all macros tokens = self.expand_macros(tokens) # Replace all identifiers with 0. Alternative tokens are not # replaced. Processing of new and delete is uncertain. for i in range(len(tokens)): t = tokens[i] if t.type == IDENTIFIER and \ not alternative_tokens.has_key(t.text): tokens[i] = Token(NUMBER, "0", t.file, t.line) tokens = TokenList(tokens) return constant_expression(tokens) def replace_defined(self, tokens): """Replace all occurrences of the defined construct in tokens.""" pos = 0 while pos < len(tokens): if tokens[pos].text != 'defined': pos += 1 continue try: next = tokens[pos+1] if next.text == '(': # defined ( ident ) ident = tokens[pos+2] if tokens[pos+3].text != ')': raise SyntaxError, "Invalid defined()" end = pos + 4 else: # defined ident ident = tokens[pos+1] end = pos + 2 except IndexError: raise SyntaxError, "Invalid defined()" if ident.type != IDENTIFIER: raise SyxntaxError, "invalid defined()" if self.defines.has_key(ident.text): val = "1" else: val = "0" tokens[pos:end] = Token(NUMBER, val, ident.file, ident.line) def push_condition(self, value): """Put a new condition with the initial value on the conditional inclusion stack.""" self.files[-1].ifstack.append(value) def toggle_condition(self, value): """Toggle the topmost value of the inclusion stack to value. If a previous section was already included, this does nothing.""" stack = self.files[-1].ifstack if not stack: raise SyntaxError,"unexpected #else" # If the previous alternative was false, the current alternative # decides. Otherwise, a previous alternative was taken, and # the values stays at -1 if stack[-1] == 0: stack[-1] = value else: stack[-1] = -1 def pop_condition(self): """Remove the topmost value from the inclusion stack.""" stack = self.files[-1].ifstack if not stack: raise SyntaxError,"unexpected #endif" stack.pop() def process_command_line(args): """Process the cpp(1) command line arguments. Return a Preprocessor.""" incdirs = ['.'] defines = [] while args[0][0] == '-': arg = args[0] del args[0] if arg == '-I': incdirs.append(args[0]) del args[0] elif arg.startswith('-I'): incdirs.append(arg[2:]) elif arg.startswith('-D'): if arg.find('='): key, value = arg[2:].split('=') value = [value] else: key = arg[2:] value = [] else: raise NotImplementedError, "Argument "+args if len(args) != 1: raise NotImplementedError, "Remaining arguments %s" % (args,) result = Preprocessor(args[0], incdirs) for k, v in defines: result.add_define(k, v) return result ########### Evaluation of expressions in #if/elif #################### def expecting(tokens, literals): """Return the next token if it is in literals, else raise a SyntaxError.""" t = tokens.get_token() if t and t.text in literals: return t if t: tokens.unget_token(t) raise SyntaxError,"error in expression, expecting " + repr(literals) def peek(tokens, literals): """Return the next token if it is in literals, else return None.""" t = tokens.get_token() if t and t.text in literals: return t if t: tokens.unget_token(t) return None def constant_expression(tokens): """Evaluate a constant expression, as it may appear in #if.""" val = conditional_expression(tokens) if tokens.get_token() != None: raise SyntaxError, "error at "+t.text return val def conditional_expression(tokens): """conditional-expression: logical-or-expression logical-or-expression ? expression : assignment-expression""" # C++ disallows comma expressions and assignment expressions # in constant expressions, so this reduces to conditional-expression # in each alternative cond = logical_or_expression(tokens) t = peek(tokens, ['?']) if t is None: return cond answer1 = conditional_expression(tokens) t = expecting(tokens, [':']) answer2 = conditional_expression(tokens) if cond: return answer1 else: return answer2 def logical_or_expression(tokens): """logical-or-expression: logical-and-expression logical-or-expression || logical-and-expression""" val1 = logical_and_expression(tokens) while 1: t = peek(tokens, ['or','||']) if t is None: return val1 val2 = logical_and_expression(tokens) val1 = val1 or val2 def logical_and_expression(tokens): """logical-and-expression: inclusive-or-expression logical-and-expression && inclusive-or-expression""" val1 = inclusive_or_expression(tokens) while 1: t = peek(tokens, ['and','&&']) if t is None: return val1 val2 = inclusive_or_expression(tokens) val2 = val1 and val2 def inclusive_or_expression(tokens): """inclusive-or-expression: exclusive-or-expression inclusive-or-expression | exclusive-or-expression""" val1 = exclusive_or_expression(tokens) while 1: t = peek(tokens, ['bitor', '|']) if t is None: return val1 val2 = exclusive_or_expression(tokens) val1 = val1 | val2 def exclusive_or_expression(tokens): """exclusive-or-expression: and-expression exclusive-or-expression ^ and-expression""" val1 = and_expression(tokens) while 1: t = peek(tokens, ['xor', '^']) if t is None: return val1 val2 = and_expression(tokens) val1 = val1 ^ val2 def and_expression(tokens): """and-expression: equality-expression and-expression & equality-expression""" val1 = equality_expression(tokens) while 1: t = peek(tokens, ['bitand', '&']) if t is None: return val1 val2 = equality_expression(tokens) val1 = val1 & val2 def equality_expression(tokens): """equality-expression: relational-expression equality-expression == relational-expression equality-expression != relational-expression""" val1 = relational_expression(tokens) while 1: t = peek(tokens, ['==', '!=']) if t is None: return val1 val2 = relational_expression(tokens) if t.text == '==': val1 = val1 == val2 else: val1 = val1 != val2 def relational_expression(tokens): """relational-expression: shift-expression relational-expression < shift-expression relational-expression > shift-expression relational-expression <= shift-expression relational-expression >= shift-expression""" val1 = shift_expression(tokens) while 1: t = peek(tokens, ['<', '>', '<=', '>=']) if t is None: return val1 val2 = shift_expression(tokens) if t.text == '<': val1 = val1 < val2 elif t.text == '>': val1 = val1 > val2 elif t.text == '<=': val1 = val1 <= val2 elif t.text == '>=': val1 = val1 >= val2 else: assert 0 def shift_expression(tokens): """shift-expression: additive-expression shift-expression << additive-expression shift-expression >> additive-expression""" val1 = additive_expression(tokens) while 1: t = peek(tokens, ['<<', '>>']) if t is None: return val1 val2 = additive_expression(tokens) if t.text == '<<': val1 = val1 << val2 else: val1 = val1 >> val2 def additive_expression(tokens): """additive-expression: multiplicative-expression additive-expression + multiplicative-expression additive-expression - multiplicative-expression""" val1 = multiplicative_expression(tokens) while 1: t = peek(tokens, ['+', '-']) if t is None: return val1 val2 = multiplicative_expression(tokens) if t.text == '+': val1 = val1 + val2 else: val1 = val1 - val2 def multiplicative_expression(tokens): """multiplicative-expression: pm-expression multiplicative-expression * pm-expression multiplicative-expression / pm-expression multiplicative-expression % pm-expression""" val1 = pm_expression(tokens) while 1: t = peek(tokens, ['*', '/', '%']) if t is None: return val1 val2 = pm_expression(tokens) if t.text == '*': val1 = val1 * val2 elif t.text == '/': val1 = val1 / val2 elif t.text == '%': val1 = val1 % val2 def pm_expression(tokens): """pm-expression: cast-expression pm-expression .* cast-expression pm-expression ->* cast-expression""" # In the preprocessor, pointer-to-member cannot occur return cast_expression(tokens) def cast_expression(tokens): """cast-expression: unary-expression ( type-id ) cast-expression""" # In the preprocessor, casts cannot occur, since type-id is # replaced with 0 return unary_expression(tokens) def unary_expression(tokens): """unary-expression: postfix-expression ++ cast-expression -- cast-expression unary-operator cast-expression sizeof unary-expression sizeof ( type-id ) new-expression delete-expression""" # In a constant expression, increment and decrement are banned. # In the preprocessor, sizeof, new, and delete cannot occur, since # the keyword is replaced by 0 t = peek(tokens, ['*', '&', '+', '!', '~']) if t is not None: val = cast_expression(tokens) if t.text in ['*', '&']: raise SyntaxError, "Invalid operator "+t.text if t.text == '+': return val elif t.text == '-': return -val elif t.text == '!': return not val elif t.text == '~': return ~val assert 0 return postfix_expression(tokens) def postfix_expression(tokens): """postfix-expression: primary-expression postfix-expression [ expression ] postfix-expression ( expression-list-opt ) simple-type-specifier ( expression-list-opt ) typename ::opt nested-name-specifier identifier ( expression-list-opt ) typename ::opt nested-name-specifier templateopt template-id ( expression-list opt ) postfix-expression . template-opt id-expression postfix-expression -> templateopt id-expression postfix-expression . pseudo-destructor-name postfix-expression -> pseudo-destructor-name postfix-expression ++ postfix-expression -- dynamic_cast < type-id > ( expression ) static_cast < type-id > ( expression ) reinterpret_cast < type-id > ( expression ) const_cast < type-id > ( expression ) typeid ( expression ) typeid ( type-id )""" # In the preprocessor, anything involving template, typename, *_cast, # or typeid is not available, since the keyword is replaced by 0. # Objects and increment, decrement are banned from constant expressions; # likewise function calls. XXX: What about "foo"[0]? return primary_expression(tokens) def primary_expression(tokens): """primary-expression: literal this ( expression ) id-expression""" # this cannot occur, since it is replaced by 0. # id-expression can only expand to identifier, which is replaced by 0. # t = tokens.get_token() if t is None: raise SyntaxError, "error at and of expression" if t.text == '(': val = conditional_expression(tokens) expecting(tokens, [')']) return val if t.type == NUMBER: if '.' in t.text or 'e' in t.text or 'E' in t.text: # Floating point literals can only occur if they are cast # to integers, however, casts cannot occur. raise SyntaxError, "Floating-point literals not allowed" return int(t.text) if t.type == IDENTIFIER: if t.text == 'true': return 1 elif t.text == 'false': return 0 assert 0 raise SyntaxError, "Unexpected token "+t.text ################## Filter mode: cpp command line ####################### def run(): """Implement the cpp(1) command.""" import sys p = process_command_line(sys.argv[1:]) file = line = None bol = 1 while 1: try: t = p.get_token() except SyntaxError, s: import sys print >>sys.stderr, "syntax error:", s.args[0] raise SystemExit if t is None: return if t.type in [INCLUDE_START, INCLUDE_END]: continue if t.file != file: if not bol: print print '# %d "%s"' % (t.line, t.file) file = t.file line = t.line bol = 1 elif t.line == line+1: print line += 1 bol = 1 elif line != t.line: if not bol: print print '# %d "%s"' % (t.line, t.file) line = t.line bol = 1 print t.text, bol = 0 if __name__ == '__main__': run()