/* * GAS-compatible re2c lexer * * Copyright (C) 2005-2007 Peter Johnson * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of other contributors * may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include RCSID("$Id: gas-token.re 1978 2007-09-21 16:59:14Z peter $"); #include #include "modules/parsers/gas/gas-parser.h" #define BSIZE 8192 #define YYCURSOR cursor #define YYLIMIT (s->lim) #define YYMARKER (s->ptr) #define YYFILL(n) {cursor = fill(parser_gas, cursor);} #define RETURN(i) do {s->cur = cursor; parser_gas->tokch = s->tok[0]; \ return i;} while (0) #define SCANINIT() {s->tok = cursor;} #define TOK ((char *)s->tok) #define TOKLEN (size_t)(cursor-s->tok) static size_t rept_input(yasm_parser_gas *parser_gas, /*@out@*/ YYCTYPE *buf, size_t max_size) { gas_rept *rept = parser_gas->rept; size_t numleft = max_size; YYCTYPE *bufp = buf; /* If numrept is 0, copy out just the line end characters */ if (rept->numrept == 0) { /* Skip first line, which contains .line */ rept->line = STAILQ_NEXT(rept->line, link); if (!rept->line) { rept->numrept = 1; rept->numdone = 1; } while (rept->numrept == 0 && numleft > 0) { *bufp++ = rept->line->data[rept->line->len-1]; rept->line = STAILQ_NEXT(rept->line, link); if (!rept->line) { rept->numrept = 1; rept->numdone = 1; } } } /* Copy out the previous fill buffer until we're *really* done */ if (rept->numdone == rept->numrept) { size_t numcopy = rept->oldbuflen - rept->oldbufpos; if (numcopy > numleft) numcopy = numleft; memcpy(bufp, &rept->oldbuf[rept->oldbufpos], numcopy); numleft -= numcopy; bufp += numcopy; rept->oldbufpos += numcopy; if (rept->oldbufpos == rept->oldbuflen) { /* Delete lines, then delete rept and clear rept state */ gas_rept_line *cur, *next; cur = STAILQ_FIRST(&rept->lines); while (cur) { next = STAILQ_NEXT(cur, link); yasm_xfree(cur->data); yasm_xfree(cur); cur = next; } yasm_xfree(rept->oldbuf); yasm_xfree(rept); parser_gas->rept = NULL; } } while (numleft > 0 && rept->numdone < rept->numrept) { /* Copy from line data to buf */ size_t numcopy = rept->line->len - rept->linepos; if (numcopy > numleft) numcopy = numleft; memcpy(bufp, &rept->line->data[rept->linepos], numcopy); numleft -= numcopy; bufp += numcopy; rept->linepos += numcopy; /* Update locations if needed */ if (rept->linepos == rept->line->len) { rept->line = STAILQ_NEXT(rept->line, link); rept->linepos = 0; } if (rept->line == NULL) { rept->numdone++; rept->line = STAILQ_FIRST(&rept->lines); } } return (max_size-numleft); } #if 0 static size_t fill_input(void *d, unsigned char *buf, size_t max) { return yasm_preproc_input((yasm_preproc *)d, (char *)buf, max); } #endif static YYCTYPE * fill(yasm_parser_gas *parser_gas, YYCTYPE *cursor) { yasm_scanner *s = &parser_gas->s; int first = 0; if(!s->eof){ size_t cnt = s->tok - s->bot; if(cnt){ memmove(s->bot, s->tok, (size_t)(s->lim - s->tok)); s->tok = s->bot; s->ptr -= cnt; cursor -= cnt; s->lim -= cnt; } if (!s->bot) first = 1; if((s->top - s->lim) < BSIZE){ YYCTYPE *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE); memcpy(buf, s->tok, (size_t)(s->lim - s->tok)); s->tok = buf; s->ptr = &buf[s->ptr - s->bot]; cursor = &buf[cursor - s->bot]; s->lim = &buf[s->lim - s->bot]; s->top = &s->lim[BSIZE]; if (s->bot) yasm_xfree(s->bot); s->bot = buf; } if (parser_gas->rept && parser_gas->rept->ended) { /* Pull from rept lines instead of preproc */ cnt = rept_input(parser_gas, s->lim, BSIZE); } else if((cnt = yasm_preproc_input(parser_gas->preproc, (char *)s->lim, BSIZE)) == 0) { s->eof = &s->lim[cnt]; *s->eof++ = '\n'; } s->lim += cnt; if (first && parser_gas->save_input) { int i; YYCTYPE *saveline; parser_gas->save_last ^= 1; saveline = parser_gas->save_line[parser_gas->save_last]; /* save next line into cur_line */ for (i=0; i<79 && &s->tok[i] < s->lim && s->tok[i] != '\n'; i++) saveline[i] = s->tok[i]; saveline[i] = '\0'; } } return cursor; } static YYCTYPE * save_line(yasm_parser_gas *parser_gas, YYCTYPE *cursor) { yasm_scanner *s = &parser_gas->s; int i = 0; YYCTYPE *saveline; parser_gas->save_last ^= 1; saveline = parser_gas->save_line[parser_gas->save_last]; /* save next line into cur_line */ if ((YYLIMIT - YYCURSOR) < 80) YYFILL(80); for (i=0; i<79 && &cursor[i] < s->lim && cursor[i] != '\n'; i++) saveline[i] = cursor[i]; saveline[i] = '\0'; return cursor; } /* starting size of string buffer */ #define STRBUF_ALLOC_SIZE 128 /* string buffer used when parsing strings/character constants */ static YYCTYPE *strbuf = NULL; /* length of strbuf (including terminating NULL character) */ static size_t strbuf_size = 0; static void strbuf_append(size_t count, YYCTYPE *cursor, yasm_scanner *s, int ch) { if (count >= strbuf_size) { strbuf = yasm_xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE); strbuf_size += STRBUF_ALLOC_SIZE; } strbuf[count] = ch; } /*!re2c any = [\000-\377]; digit = [0-9]; iletter = [a-zA-Z]; bindigit = [01]; octdigit = [0-7]; hexdigit = [0-9a-fA-F]; ws = [ \t\r]; dquot = ["]; */ int gas_parser_lex(YYSTYPE *lvalp, yasm_parser_gas *parser_gas) { /*@null@*/ gas_rept *rept = parser_gas->rept; yasm_scanner *s = &parser_gas->s; YYCTYPE *cursor = s->cur; size_t count; YYCTYPE savech; int linestart; gas_rept_line *new_line; /* Handle one token of lookahead */ if (parser_gas->peek_token != NONE) { int tok = parser_gas->peek_token; *lvalp = parser_gas->peek_tokval; /* structure copy */ parser_gas->tokch = parser_gas->peek_tokch; parser_gas->peek_token = NONE; return tok; } /* Catch EOF */ if (s->eof && cursor == s->eof) return 0; /* Handle rept */ if (rept && !rept->ended) goto rept_directive; /* Jump to proper "exclusive" states */ switch (parser_gas->state) { case COMMENT: goto comment; case SECTION_DIRECTIVE: goto section_directive; default: break; } scan: SCANINIT(); /*!re2c /* standard decimal integer */ ([1-9] digit*) | "0" { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; lvalp->intn = yasm_intnum_create_dec(TOK); s->tok[TOKLEN] = savech; RETURN(INTNUM); } /* 0b10010011 - binary number */ '0b' bindigit+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; lvalp->intn = yasm_intnum_create_bin(TOK+2); s->tok[TOKLEN] = savech; RETURN(INTNUM); } /* 0777 - octal number */ "0" octdigit+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; lvalp->intn = yasm_intnum_create_oct(TOK); s->tok[TOKLEN] = savech; RETURN(INTNUM); } /* 0xAA - hexidecimal number */ '0x' hexdigit+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; /* skip 0 and x */ lvalp->intn = yasm_intnum_create_hex(TOK+2); s->tok[TOKLEN] = savech; RETURN(INTNUM); } /* floating point value */ "0" [DdEeFfTt] [-+]? (digit+)? ("." digit*)? ('e' [-+]? digit+)? { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; lvalp->flt = yasm_floatnum_create(TOK+2); s->tok[TOKLEN] = savech; RETURN(FLTNUM); } /* character constant values */ ['] { goto charconst; } /* string constant values */ dquot { goto stringconst; } /* operators */ "<<" { RETURN(LEFT_OP); } ">>" { RETURN(RIGHT_OP); } "<" { RETURN(LEFT_OP); } ">" { RETURN(RIGHT_OP); } [-+|^!*&/~$():@=,] { RETURN(s->tok[0]); } ";" { parser_gas->state = INITIAL; RETURN(s->tok[0]); } /* label or maybe directive */ [_.][a-zA-Z0-9_$.]* { lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(ID); } /* register or segment register */ [%][a-zA-Z0-9]+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; switch (yasm_arch_parse_check_regtmod (p_object->arch, TOK+1, TOKLEN-1, &lvalp->arch_data)) { case YASM_ARCH_REG: s->tok[TOKLEN] = savech; RETURN(REG); case YASM_ARCH_REGGROUP: s->tok[TOKLEN] = savech; RETURN(REGGROUP); case YASM_ARCH_SEGREG: s->tok[TOKLEN] = savech; RETURN(SEGREG); default: break; } yasm_error_set(YASM_ERROR_GENERAL, N_("Unrecognized register name `%s'"), s->tok); s->tok[TOKLEN] = savech; lvalp->arch_data = 0; RETURN(REG); } /* label */ [a-zA-Z][a-zA-Z0-9_$.]* ws* ':' { /* strip off colon and any whitespace */ count = TOKLEN-1; while (s->tok[count] == ' ' || s->tok[count] == '\t' || s->tok[count] == '\r') count--; /* Just an identifier, return as such. */ lvalp->str_val = yasm__xstrndup(TOK, count); RETURN(LABEL); } /* local label */ [0-9] ':' { /* increment label index */ parser_gas->local[s->tok[0]-'0']++; /* build local label name */ lvalp->str_val = yasm_xmalloc(30); sprintf(lvalp->str_val, "L%c\001%lu", s->tok[0], parser_gas->local[s->tok[0]-'0']); RETURN(LABEL); } /* local label forward reference */ [0-9] 'f' { /* build local label name */ lvalp->str_val = yasm_xmalloc(30); sprintf(lvalp->str_val, "L%c\001%lu", s->tok[0], parser_gas->local[s->tok[0]-'0']+1); RETURN(ID); } /* local label backward reference */ [0-9] 'b' { /* build local label name */ lvalp->str_val = yasm_xmalloc(30); sprintf(lvalp->str_val, "L%c\001%lu", s->tok[0], parser_gas->local[s->tok[0]-'0']); RETURN(ID); } /* identifier that may be an instruction, etc. */ [a-zA-Z][a-zA-Z0-9_$.]* { /* Can only be an instruction/prefix when not inside an * instruction or directive. */ if (parser_gas->state != INSTDIR) { uintptr_t prefix; savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; switch (yasm_arch_parse_check_insnprefix (p_object->arch, TOK, TOKLEN, cur_line, &lvalp->bc, &prefix)) { case YASM_ARCH_INSN: s->tok[TOKLEN] = savech; parser_gas->state = INSTDIR; RETURN(INSN); case YASM_ARCH_PREFIX: lvalp->arch_data = prefix; s->tok[TOKLEN] = savech; RETURN(PREFIX); default: s->tok[TOKLEN] = savech; } } /* Propagate errors in case we got a warning from the arch */ yasm_errwarn_propagate(parser_gas->errwarns, cur_line); /* Just an identifier, return as such. */ lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(ID); } "/*" { parser_gas->state = COMMENT; goto comment; } "#" { if (strcmp(((yasm_preproc_base*)parser_gas->preproc)->module->keyword, "cpp") == 0) { RETURN(LINE_MARKER); } else goto line_comment; } ws+ { goto scan; } "\n" { if (parser_gas->save_input) cursor = save_line(parser_gas, cursor); parser_gas->state = INITIAL; RETURN(s->tok[0]); } any { yasm_warn_set(YASM_WARN_UNREC_CHAR, N_("ignoring unrecognized character `%s'"), yasm__conv_unprint(s->tok[0])); goto scan; } */ /* C-style comment; nesting not supported */ comment: SCANINIT(); /*!re2c /* End of comment */ "*/" { parser_gas->state = INITIAL; goto scan; } "\n" { if (parser_gas->save_input) cursor = save_line(parser_gas, cursor); RETURN(s->tok[0]); } any { if (cursor == s->eof) return 0; goto comment; } */ /* Single line comment. */ line_comment: /*!re2c (any \ [\n])* { goto scan; } */ /* .section directive (the section name portion thereof) */ section_directive: SCANINIT(); /*!re2c [a-zA-Z0-9_$.-]+ { lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); parser_gas->state = INITIAL; RETURN(ID); } dquot { goto stringconst; } ws+ { goto section_directive; } "," { parser_gas->state = INITIAL; RETURN(s->tok[0]); } "\n" { if (parser_gas->save_input) cursor = save_line(parser_gas, cursor); parser_gas->state = INITIAL; RETURN(s->tok[0]); } any { yasm_warn_set(YASM_WARN_UNREC_CHAR, N_("ignoring unrecognized character `%s'"), yasm__conv_unprint(s->tok[0])); goto section_directive; } */ /* character constant values */ charconst: /*TODO*/ /* string constant values */ stringconst: strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE); strbuf_size = STRBUF_ALLOC_SIZE; count = 0; stringconst_scan: SCANINIT(); /*!re2c /* Handle escaped double-quote by copying and continuing */ "\\\"" { if (cursor == s->eof) { yasm_error_set(YASM_ERROR_SYNTAX, N_("unexpected end of file in string")); lvalp->str.contents = (char *)strbuf; lvalp->str.len = count; RETURN(STRING); } strbuf_append(count++, cursor, s, '"'); goto stringconst_scan; } dquot { strbuf_append(count, cursor, s, '\0'); yasm_unescape_cstring(strbuf, &count); lvalp->str.contents = (char *)strbuf; lvalp->str.len = count; RETURN(STRING); } any { if (cursor == s->eof) { yasm_error_set(YASM_ERROR_SYNTAX, N_("unexpected end of file in string")); lvalp->str.contents = (char *)strbuf; lvalp->str.len = count; RETURN(STRING); } strbuf_append(count++, cursor, s, s->tok[0]); goto stringconst_scan; } */ rept_directive: strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE); strbuf_size = STRBUF_ALLOC_SIZE; count = 0; linestart = 1; rept_scan: SCANINIT(); /*!re2c [\n;] { /* Line ending, save in lines */ new_line = yasm_xmalloc(sizeof(gas_rept_line)); if (cursor == s->eof) { yasm_xfree(strbuf); return 0; } strbuf_append(count++, cursor, s, s->tok[0]); new_line->data = strbuf; new_line->len = count; STAILQ_INSERT_TAIL(&rept->lines, new_line, link); /* Allocate new strbuf */ strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE); strbuf_size = STRBUF_ALLOC_SIZE; count = 0; /* Mark start of line */ linestart = 1; goto rept_scan; } '.rept' { int i; if (linestart) { /* We don't support nested right now, error */ yasm_error_set(YASM_ERROR_GENERAL, N_("nested rept not supported")); yasm_errwarn_propagate(parser_gas->errwarns, cur_line); } for (i=0; i<6; i++) strbuf_append(count++, cursor, s, s->tok[i]); goto rept_scan; } '.endr' { if (linestart) { /* We're done, kick off the main lexer */ rept->line = STAILQ_FIRST(&rept->lines); if (!rept->line) { /* Didn't get any intervening data? Empty repeat, so * don't even bother. */ yasm_xfree(strbuf); yasm_xfree(rept); parser_gas->rept = NULL; } else { rept->ended = 1; /* Add .line as first line to get line numbers correct */ new_line = yasm_xmalloc(sizeof(gas_rept_line)); new_line->data = yasm_xmalloc(40); sprintf((char *)new_line->data, ".line %lu;", rept->startline+1); new_line->len = strlen((char *)new_line->data); STAILQ_INSERT_HEAD(&rept->lines, new_line, link); /* Save previous fill buffer */ rept->oldbuf = parser_gas->s.bot; rept->oldbuflen = s->lim - s->bot; rept->oldbufpos = cursor - s->bot; /* Reset fill */ s->bot = NULL; s->tok = NULL; s->ptr = NULL; s->cur = NULL; s->lim = NULL; s->top = NULL; s->eof = NULL; cursor = NULL; YYFILL(1); } goto scan; } else { int i; for (i=0; i<6; i++) strbuf_append(count++, cursor, s, s->tok[i]); goto rept_scan; } } any { if (cursor == s->eof) { yasm_xfree(strbuf); return 0; } strbuf_append(count++, cursor, s, s->tok[0]); linestart = 0; goto rept_scan; } */ }