/* * picasm -- token.c * * Copyright 1995-2004 Timo Rossi, * See the file LICENSE for license terms. * * Include handling, macro expansion, lexical analysis * */ #include #include #include #include #include #include #include "picasm.h" #include "util.h" #include "token.h" #include "symtab.h" /* * keyword table for tokenizer * * this must be in sync with the token definitions in picasm.h */ static char Keyword_Table[] = { "include\0" "macro\0" "endm\0" "exitm\0" "if\0" "else\0" "endif\0" "equ\0" "set\0" "end\0" "org\0" "ds\0" "edata\0" "dt\0" "data\0" "cblock\0" "endc\0" "config\0" "picid\0" "device\0" "defined\0" "hibyte\0" "streq\0" "streqcase\0" "isstr\0" "chrval\0" "opt\0" "local\0" "endlocal\0" "error\0" /* 12/14-bit PIC instruction mnemonics */ "addlw\0" "addwf\0" "andlw\0" "andwf\0" "bcf\0" "bsf\0" "btfsc\0" "btfss\0" "call\0" "clrf\0" "clrw\0" "clrwdt\0" "comf\0" "decf\0" "decfsz\0" "goto\0" "incf\0" "incfsz\0" "iorlw\0" "iorwf\0" "movlw\0" "movf\0" "movwf\0" "nop\0" "option\0" "retfie\0" "retlw\0" "return\0" "rlf\0" "rrf\0" "sleep\0" "sublw\0" "subwf\0" "swapf\0" "tris\0" "xorlw\0" "xorwf\0" /* 16-bit PIC instruction mnemonics */ "addwfc\0" "btg\0" "cpfseq\0" "cpfsgt\0" "cpfslt\0" "daw\0" "dcfsnz\0" "infsnz\0" "lcall\0" "movfp\0" "movpf\0" "movlb\0" "movlr\0" "mullw\0" "mulwf\0" "negw\0" "rlcf\0" "rlncf\0" "rrcf\0" "rrncf\0" "setf\0" "subwfb\0" "tablrd\0" "tablwt\0" "tlrd\0" "tlwt\0" "tstfsz\0" "set_pic_type\0" "\0" }; /* tokenizer definitions & variables */ int tok_char; int token_type, line_buf_off; char token_string[TOKSIZE]; long token_int_val; int ifskip_mode; /* TRUE when skipping code inside if..endif */ struct incpath_entry { struct incpath_entry *next; char dirname[1]; /*struct hack*/ }; static struct incpath_entry *include_path, *inc_path_next; void init_includes(void) { include_path = NULL; inc_path_next = NULL; } /* * include file handling */ void add_include_path(char *dirname) { int addslash = 0; char *p; struct incpath_entry *ent; if(dirname == NULL || *dirname == '\0') fatal_error("Empty directory path"); p = dirname + (strlen(dirname) - 1); #if defined(MSDOS) || defined(_WIN32) || defined(AMIGA) #ifdef AMIGA if(!(*p == ':' || *p == '/')) addslash = 1; #else if(!(*p == ':' || *p == '/' || *p == '\\')) addslash = 1; #endif #else /* defined(MSDOS) ... */ /* Unix etc. */ if(!(*p == '/')) addslash = 1; #endif ent = mem_alloc(sizeof(struct incpath_entry) + strlen(dirname) + addslash); ent->next = NULL; strcpy(ent->dirname, dirname); if(addslash) strcat(ent->dirname, "/"); if(include_path == NULL) { include_path = ent; inc_path_next = ent; } else { inc_path_next->next = ent; inc_path_next = ent; } } void begin_include(char *fname, int dopath, int err_fatal) { struct inc_file *p; struct incpath_entry *ent; FILE *fp; char fnamebuf[512]; int err; strcpy(fnamebuf, fname); fp = fopen(fnamebuf, "r"); err = errno; if(fp == NULL && dopath) { for(ent = include_path; ent != NULL; ent = ent->next) { int elen = strlen(ent->dirname); if(elen + strlen(fname) >= sizeof(fnamebuf)-1) { warning("Include directory/file name too long"); continue; } strcpy(fnamebuf, ent->dirname); strcpy(fnamebuf+elen, fname); if((fp = fopen(fnamebuf, "r")) != NULL) break; err = errno; } /*for*/ } /*if*/ if(fp == NULL) { if(err_fatal) { fatal_error("Can't open '%s': %s", fnamebuf, strerror(err)); } else { error(0, "Can't open include file '%s': %s", fnamebuf, strerror(err)); } line_buf_ptr = NULL; tok_char = ' '; return; } p = mem_alloc(sizeof(struct inc_file)); p->type = INC_FILE; p->v.f.fname = mem_alloc(strlen(fnamebuf)+1); strcpy(p->v.f.fname, fnamebuf); p->linenum = 0; p->cond_nest_count = cond_nest_count; p->config_flag = 0; p->v.f.fp = fp; p->next = current_file; current_file = p; line_buf_ptr = NULL; tok_char = ' '; } /* * Move to previous level of include/macro */ void end_include(void) { struct inc_file *p; struct macro_arg *arg1, *arg2; int prev_config_flag; if(current_file != NULL) { if(cond_nest_count != current_file->cond_nest_count) { error(0, "conditional assembly not terminated by ENDIF"); cond_nest_count = current_file->cond_nest_count; } p = current_file->next; prev_config_flag = current_file->config_flag; if(current_file->type == INC_FILE) { fclose(current_file->v.f.fp); mem_free(current_file->v.f.fname); } else { /* free macro arguments */ arg1 = current_file->v.m.args; while(arg1 != NULL) { arg2 = arg1->next; mem_free(arg1); arg1 = arg2; } } mem_free(current_file); current_file = p; if(current_file != NULL && current_file->config_flag == 0 && prev_config_flag != 0) { config_done(); } } } /* * Expand a macro */ void expand_macro(struct symbol *sym) { struct inc_file *minc; struct macro_arg *arg; char *cp; int narg; int parcnt, d_char; int nch; write_listing_line(0); /* list the macro call line */ minc = mem_alloc(sizeof(struct inc_file)); minc->type = INC_MACRO; minc->v.m.sym = sym; minc->v.m.ml = sym->v.text; minc->linenum = 0; minc->cond_nest_count = cond_nest_count; minc->config_flag = 0; minc->v.m.args = NULL; minc->v.m.uniq_id = unique_id_count++; arg = NULL; for(narg = 1;;narg++) { while(isspace(tok_char)) /* skip whitespace */ read_src_char(); if(tok_char == '\0' || tok_char == ';' || tok_char == EOF) break; cp = line_buf_ptr-1; nch = 0; /* * Macro parameters are separated by commas. However, strings and * character constants (using double and single quotes) * can be used even if they contain commas. Also commas * inside parenthesis (such as function parameter delimiters) * don't count as macro parameter separators. * */ parcnt = 0; /* parenthesis nesting count */ while(!isspace(tok_char) && tok_char != '\0' && tok_char != ';' && tok_char != EOF) { if(parcnt == 0 && tok_char == ',') break; if(tok_char == '(') { parcnt++; } else if(tok_char == ')') { parcnt--; } else if(tok_char == '"' || tok_char == '\'') { /* quoted string or character constant */ d_char = tok_char; do { read_src_char(); nch++; } while(tok_char != d_char && tok_char != '\0' && tok_char != EOF); if(tok_char != d_char) break; } read_src_char(); nch++; } if(narg >= 10) warning("Too many macro arguments (max. 9)"); if(arg == NULL) { arg = mem_alloc(sizeof(struct macro_arg) + nch); minc->v.m.args = arg; } else { arg->next = mem_alloc(sizeof(struct macro_arg) + nch); arg = arg->next; } strncpy(arg->text, cp, nch); arg->text[nch] = '\0'; arg->next = NULL; /* skip whitespace */ while(isspace(tok_char)) read_src_char(); if(tok_char != ',') break; read_src_char(); } if(tok_char != ';' && tok_char != '\0' && tok_char != EOF) error(0, "Extraneous characters after a valid source line"); minc->next = current_file; current_file = minc; line_buf_ptr = NULL; tok_char = ' '; get_token(); } /* * This is used by the new config system * * This must not have the new token stuff in the end. * (see 3 last lines of expand_macro()) * */ void expand_macro_with_args(struct symbol *sym, char *args[], int nargs, int config_flag) { struct inc_file *minc; struct macro_arg *arg; int i; minc = mem_alloc(sizeof(struct inc_file)); minc->type = INC_MACRO; minc->v.m.sym = sym; minc->v.m.ml = sym->v.text; minc->linenum = 0; minc->cond_nest_count = cond_nest_count; minc->config_flag = config_flag; minc->v.m.args = NULL; minc->v.m.uniq_id = unique_id_count++; arg = NULL; for(i = 0; i < nargs; i++) { if(arg == NULL) { arg = mem_alloc(sizeof(struct macro_arg) + strlen(args[i])); minc->v.m.args = arg; } else { arg->next = mem_alloc(sizeof(struct macro_arg) + strlen(args[i])); arg = arg->next; } strcpy(arg->text, args[i]); arg->next = NULL; } minc->next = current_file; current_file = minc; } /* * Read a character from source file. * Handles includes and macros. */ void read_src_char(void) { char *scp, *pcp, *dcp; int parm; struct macro_arg *arg; static char tmpbuf[12]; if(line_buf_ptr != NULL) { tok_char = (unsigned char)(*line_buf_ptr++); if(tok_char == '\0') line_buf_ptr = NULL; return; } if(current_file == NULL) { tok_char = EOF; return; } getc1: if(current_file->type == INC_MACRO) { if(current_file->v.m.ml == NULL) { end_include(); goto getc1; } scp = current_file->v.m.ml->text; dcp = line_buffer; while(*scp != '\0' && dcp < &line_buffer[sizeof(line_buffer)]) { if(*scp == '\\') { scp++; if(*scp >= '1' && *scp <= '9') { /* macro arg */ parm = *scp - '1'; /* macro arg #, starting from 0 */ for(arg = current_file->v.m.args; arg != NULL && parm > 0; arg = arg->next, parm--); if(arg != NULL) { for(pcp = arg->text; *pcp != '\0' && dcp < &line_buffer[sizeof(line_buffer)];) *dcp++ = *pcp++; } scp++; } else if(*scp == '0' || *scp == '@') { p_snprintf(tmpbuf, sizeof tmpbuf, "%03d", current_file->v.m.uniq_id); for(pcp = tmpbuf; *pcp != '\0' && dcp < &line_buffer[sizeof(line_buffer)];) *dcp++ = *pcp++; scp++; } else if(*scp == '#') { /* number of arguments */ for(parm = 0, arg = current_file->v.m.args; arg != NULL; arg = arg->next, parm++); p_snprintf(tmpbuf, sizeof tmpbuf, "%d", parm); for(pcp = tmpbuf; *pcp != '\0' && dcp < &line_buffer[sizeof(line_buffer)];) *dcp++ = *pcp++; scp++; } else { *dcp++ = *scp; } } else { *dcp++ = *scp++; } } if(dcp == &line_buffer[sizeof(line_buffer)]) { error(0, "Line buffer overflow"); dcp--; } *dcp = '\0'; /* NUL-terminate the line */ current_file->v.m.ml = current_file->v.m.ml->next; } else { if(fgets(line_buffer, sizeof(line_buffer)-1, current_file->v.f.fp) == NULL) { if(current_file->next != NULL) { end_include(); goto getc1; } tok_char = EOF; return; } if(line_buffer[0] != '\0') { char *p; p = line_buffer + strlen(line_buffer) - 1; if(*p == '\n') *p = '\0'; } } current_file->linenum++; line_buf_ptr = line_buffer; tok_char = ((unsigned char)(*line_buf_ptr++)); if(tok_char == '\0') line_buf_ptr = NULL; } /* * Lexical analyzer * Returns the next token from the source file */ void get_token(void) { int tp, base; char *cp; /* * skip spaces */ while(isspace(tok_char)) read_src_char(); if(tok_char == EOF) { token_type = TOK_EOF; token_string[0] = '\0'; return; } if(tok_char == ';') { /* comment */ skip_eol(); token_type = TOK_NEWLINE; strcpy(token_string, "\\n"); return; } /* for(;;) */ /* * character constant (integer) * (does not currently handle the quote character) */ if(tok_char == '\'') { read_src_char(); token_string[0] = tok_char; read_src_char(); if(tok_char != '\'') goto invalid_token; read_src_char(); token_string[1] = '\0'; token_int_val = (long)((unsigned char)token_string[0]); token_type = TOK_INTCONST; return; } if(tok_char == '"') { /* string constant (include filename) */ read_src_char(); tp = 0; while(tp < TOKSIZE-1 && tok_char != '"' && tok_char != EOF) { token_string[tp++] = tok_char; read_src_char(); } if(tok_char != '\"' && !ifskip_mode) error(0, "String not terminated"); token_string[tp] = '\0'; read_src_char(); token_type = TOK_STRCONST; return; } /* * integer number */ if(isdigit(tok_char)) { token_type = TOK_INTCONST; token_string[0] = tok_char; tp = 1; read_src_char(); if(token_string[0] == '0') { if(tok_char == 'x' || tok_char == 'X') { /* hex number */ token_string[tp++] = tok_char; read_src_char(); while(tp < TOKSIZE-1 && isxdigit(tok_char)) { token_string[tp++] = tok_char; read_src_char(); } token_string[tp] = '\0'; token_int_val = strtoul(&token_string[2], NULL, 16); /* should put range check here */ return; } } while(tp < TOKSIZE-2 && isxdigit(tok_char)) { token_string[tp++] = tok_char; read_src_char(); } base = default_radix; switch(tok_char) { case 'H': /* hex */ case 'h': base = 16; /* hex */ token_string[tp++] = tok_char; read_src_char(); break; case 'O': /* octal */ case 'o': base = 8; /* octal */ token_string[tp++] = tok_char; read_src_char(); break; default: if(token_string[0] == '0' && (token_string[1] == 'b' || token_string[1] == 'B')) { token_string[tp] = '\0'; token_int_val = strtoul(&token_string[2], &cp, 2); if(cp != &token_string[tp] && !ifskip_mode) error(0, "Invalid digit in a number"); /* should put range check here */ return; } else if(token_string[tp-1] == 'B' || token_string[tp-1] == 'b') { base = 2; } else { if(token_string[tp-1] != 'D' && token_string[tp-1] != 'd') token_string[tp++] = '\0'; } break; } token_string[tp] = '\0'; token_int_val = strtoul(token_string, &cp, base); if(cp != &token_string[tp-1] && !ifskip_mode) error(0, "Invalid digit in a number"); /* should put range check here */ return; } /* * Handle B'10010100' binary etc. */ if((tok_char == 'b' || tok_char == 'B' || tok_char == 'd' || tok_char == 'D' || tok_char == 'h' || tok_char == 'H' || tok_char == 'o' || tok_char == 'O') && line_buf_ptr != NULL && *line_buf_ptr == '\'') { token_string[0] = tok_char; read_src_char(); token_string[1] = tok_char; read_src_char(); tp = 2; while(tp < TOKSIZE-1 && isxdigit(tok_char)) { token_string[tp++] = tok_char; read_src_char(); } if(tok_char != '\'') goto invalid_token; token_string[tp++] = tok_char; read_src_char(); token_string[tp] = '\0'; switch(token_string[0]) { case 'b': case 'B': base = 2; break; case 'o': case 'O': base = 8; break; case 'h': case 'H': base = 16; break; case 'd': case 'D': default: base = 10; break; } token_int_val = strtoul(&token_string[2], &cp, base); if(cp != &token_string[tp-1] && !ifskip_mode) error(0, "Invalid digit in a number"); /* should put range check here */ token_type = TOK_INTCONST; return; } /* * keyword or identifier */ if(tok_char == '_' || tok_char == '.' || isalpha(tok_char)) { line_buf_off = (line_buf_ptr - &line_buffer[1]); token_string[0] = tok_char; tp = 1; read_src_char(); if(token_string[0] == '.' && tok_char != '_' && !isalnum(tok_char)) { token_string[1] = '\0'; token_type = TOK_PERIOD; return; } while(tp < TOKSIZE-1 && (tok_char == '_' || tok_char == '.' || isalnum(tok_char))) { token_string[tp++] = tok_char; read_src_char(); } token_string[tp] = '\0'; token_type = FIRST_KW; cp = Keyword_Table; while(*cp) { if(strcasecmp(token_string, cp) == 0) return; while(*cp++) ; token_type++; } token_type = TOK_IDENTIFIER; return; } /* * non-numeric & non-alpha tokens */ switch(tok_char) { case '\0': token_type = TOK_NEWLINE; strcpy(token_string, "\\n"); skip_eol(); return; case '<': token_string[0] = tok_char; read_src_char(); if(tok_char == '<') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_LSHIFT; read_src_char(); return; } if(tok_char == '=') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_LESS_EQ; read_src_char(); return; } if(tok_char == '>') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_NOT_EQ; read_src_char(); return; } token_type = TOK_LESS; token_string[1] = '\0'; return; case '>': token_string[0] = tok_char; read_src_char(); if(tok_char == '>') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_RSHIFT; read_src_char(); return; } if(tok_char == '=') { token_string[1] = tok_char; token_string[2] = '\0'; token_type = TOK_GT_EQ; read_src_char(); return; } token_string[1] = '\0'; token_type = TOK_GREATER; return; case '!': token_string[0] = tok_char; read_src_char(); if(tok_char != '=') goto invalid_token; token_string[1] = tok_char; token_string[2] = '\0'; read_src_char(); token_type = TOK_NOT_EQ; return; case '=': token_string[0] = tok_char; read_src_char(); if(tok_char == '=') { token_string[1] = tok_char; read_src_char(); token_string[2] = '\0'; token_type = TOK_EQ; return; } if(tok_char == '<') { token_string[1] = tok_char; read_src_char(); token_string[2] = '\0'; token_type = TOK_LESS_EQ; return; } if(tok_char == '>') { token_string[1] = tok_char; read_src_char(); token_string[2] = '\0'; token_type = TOK_GT_EQ; return; } if(tok_char == '_' || tok_char == '.' || isalpha(tok_char)) { /* local symbol */ line_buf_off = (line_buf_ptr - &line_buffer[2]); token_string[0] = tok_char; tp = 1; read_src_char(); while(tp < TOKSIZE-1 && (tok_char == '_' || tok_char == '.' || isalnum(tok_char))) { token_string[tp++] = tok_char; read_src_char(); } token_string[tp] = '\0'; token_type = TOK_LOCAL_ID; return; } token_string[1] = '\0'; token_type = TOK_EQUAL; return; case '$': read_src_char(); if(!isxdigit(tok_char)) { token_string[0] = '$'; token_string[1] = '\0'; token_type = TOK_DOLLAR; return; } tp = 0; do { token_string[tp++] = tok_char; read_src_char(); } while(tp < TOKSIZE-1 && isxdigit(tok_char)); token_string[tp] = '\0'; token_int_val = strtoul(&token_string[0], NULL, 16); token_type = TOK_INTCONST; /* should put range check here */ return; /* * "#include" and "include" both generate KW_INCLUDE */ case '#': if(strncasecmp(line_buf_ptr, "include", 7) != 0) goto invalid_token; strcpy(token_string, "#include"); line_buf_ptr += 7; tok_char = (unsigned char)(*line_buf_ptr); if(*line_buf_ptr == '\0') { line_buf_ptr = NULL; tok_char = ' '; } token_type = KW_INCLUDE; return; case '\\': token_type = TOK_BACKSLASH; break; case ',': token_type = TOK_COMMA; break; case '(': token_type = TOK_LEFTPAR; break; case ')': token_type = TOK_RIGHTPAR; break; case '+': token_type = TOK_PLUS; break; case '-': token_type = TOK_MINUS; break; case '&': token_type = TOK_BITAND; break; case '|': token_type = TOK_BITOR; break; case '^': token_type = TOK_BITXOR; break; case '~': token_type = TOK_BITNOT; break; case '*': token_type = TOK_ASTERISK; break; case '/': token_type = TOK_SLASH; break; case '%': token_type = TOK_PERCENT; break; case ':': token_type = TOK_COLON; break; case '[': token_type = TOK_LEFTBRAK; break; case ']': token_type = TOK_RIGHTBRAK; break; default: goto invalid_token; } token_string[0] = tok_char; token_string[1] = '\0'; read_src_char(); return; invalid_token: if(!ifskip_mode) error(0, "Invalid token"); token_string[0] = '\0'; token_type = TOK_INVALID; } /* skip to the next line */ void skip_eol(void) { line_buf_ptr = NULL; tok_char = ' '; }