#
# Lexer for C code
#

#
# The types value keeps track of type definitions
#
public.types. =
    extends $(Map)

    # Initial base types
    $|void|    = type_builtin
    $|char|    = type_builtin
    $|int|     = type_builtin
    $|float|   = type_builtin
    $|double|  = type_builtin

    #
    # GCC extensions
    #
    $|__builtin_va_list| = type

public.add-type-name(v) =
    types = $(types.add $(v), type)
    export

#
# Keywords
#
public.keywords. =
    extends $(Map)

    $|auto|     = tyclass
    $|break|    = break
    $|case|     = case
    $|const|    = tyqual
    $|continue| = break
    $|default|  = default
    $|do|       = do
    $|else|     = else
    $|enum|     = enum
    $|extern|   = tyclass
    $|for|      = for
    $|goto|     = goto
    $|if|       = if
    $|inline|   = tyclass
    $|long|     = tymod
    $|register| = tyclass
    $|return|   = return
    $|short|    = tymod
    $|signed|   = tyqual
    $|sizeof|   = sizeof
    $|static|   = tyclass
    $|struct|   = struct
    $|switch|   = switch
    $|typedef|  = typedef
    $|union|    = struct
    $|unsigned| = tyqual
    $|volatile| = tyclass
    $|while|    = while

    #
    # GCC extensions
    #
    $|__attribute__| = __attribute__
    $|__const|       = tyqual
    $|__extension__| = tyclass
    $|__inline|      = tyclass
    $|__restrict|    = tyqual

    #
    # Our extensions
    #
    $|__dll_callback| = callback
    $|__dll_hidden|   = tyclass
    $|__tagged_union| = __tagged_union

#
# Operators
#
public.operators. =
    extends $(Map)

    $|(|  = lparen
    $|)|  = rparen
    $|[|  = lbrack
    $|]|  = rbrack
    $|{|  = lbrace
    $|}|  = rbrace

    $|->| = binop0
    $|.|  = binop0

    $|!|  = unop1
    $|~|  = unop1
    $|++| = incop1
    $|--| = incop1
    $|*|  = star
    $|&|  = amp
    $|-|  = minus
    $|+|  = plus

    # |*| = binop2
    $|/| = binop2
    $|%| = binop2

    # |+| = binop3
    # |-| = binop3

    $|<<| = binop4
    $|>>| = binop4

    $|<|  = binop5
    $|<=| = binop5
    $|>=| = binop5
    $|>|  = binop5

    $|==| = binop6
    $|!=| = binop6

    # |&| = binop7

    $|^|  = binop8

    $|&&| = binop10

    $|?| = quest
    $|:| = colon

    $|+=| = eqop13
    $|-=| = eqop13
    $|*=| = eqop13
    $|/=| = eqop13
    $|&=| = eqop13
    $|^=| = eqop13
    $|%=| = eqop13
    $|<<=| = eqop13
    $|>>=| = eqop13
    $|=|   = eq

    $|,| = comma
    $|;| = semi

    $|...| = elide

operators = $(operators.add |, binop9)
operators = $(operators.add ||, binop11)
operators = $(operators.add $'|=', eqop13)

########################################################################
# The lexer
#
public.lexer. =
    extends $(Lexer)

    #
    # If all else fails, it is a syntax error
    #
    other: .
        eprintln(Illegal character: $0)
        lex()

    #
    # Numbers
    #
    int: $'([[:digit:]]+|0x[[:xdigit:]]*)[UL]?[UL]?'
        Token.pair($(loc), int, $0)

    float: $'([[:digit:]]+[.][[:digit:]]*([eE][+-]?[[:digit:]]*)?|[.][[:digit:]]+([eE][+-]?[[:digit:]]*)?)L?'
        Token.pair($(loc), float, $0)

    #
    # Strings and chars
    #
    string: $'"(\\.|[^"])*"'
        Token.pair($(loc), string, $0)

    char: $''['](\\.|[^'])*[']''
        Token.pair($(loc), char, $0)

    #
    # Names
    #
    id: $'[[:alpha:]_][[:alnum:]_]*'
        id = $0
        if $(keywords.mem $(id))
            Token.pair($(loc), $(keywords.find $(id)), $(id))
        elseif $(types.mem $(id))
            Token.pair($(loc), $(types.find $(id)), $(id))
        else
            Token.pair($(loc), id, $(id))

    #
    # Operators
    #
    op: $'[-+*/%^|&!~)(}{.<>?:=,;]|\[|\]|[|][|]|&&|<<|>>|->|++|--|[-+*/&^%<>=!|]=|<<=|>>=|[.][.][.]'
        Token.pair($(loc), $(operators.find $0), $0)

    #
    # CPP directive
    #
    cpp: $'#[[:space:]]*[[:alpha:]](\\\n|[^\n])*'
        lex()

    line: $'#[[:space:]]*\([[:digit:]]+\)[[:space:]]*"\((\\.|[^"\n])*\)"(\\\n|[^\n])*'
        set-line($2, $1)
        lex()

    #
    # Ignore comments and whitespace
    #
    white: $'[[:space:]]+'
        lex()

    normal-comment: $'/[*]([*][^/]|[^*])*[*]/'
        lex()

    line-comment: $'//[^\n]*\n'
        lex()

    #
    # End of file
    #
    eof: $"\'"
        Token.unit($(loc), eof)