/* $Id: parse_lang.c 1555 2006-08-17 20:10:28Z mipsator $ */
/*
* Copyright (c) 2005 Damien Couderc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* - Neither the name of the copyright holder(s) nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "compat/pmk_stdio.h"
#include "compat/pmk_string.h"
#include "common.h"
#include "parse_lang.h"
/*#define DEBUG_PRSC 1*/
/*************
* variables *
***********************************************************************/
/* preprocessor keywords ***********************************************/
char *pp_keywords[] = {
RKW_PP_DEF,
RKW_PP_ELIF,
RKW_PP_ELSE,
RKW_PP_ENDF,
RKW_PP_ERR,
RKW_PP_IF,
RKW_PP_IFDF,
RKW_PP_IFND,
RKW_PP_INCL,
RKW_PP_LINE,
RKW_PP_PRGM,
RKW_PP_UDEF
};
/* assembler directives ************************************************/
char *as_keywords[] = {
RKW_AS_TSEG,
RKW_AS_DSEG
};
size_t nb_as_keywords = sizeof(as_keywords) / sizeof(char *);
/* C language reserved keywords (without types) ************************/
char *c_keywords[] = {
RKW_C_BOOL, RKW_C_CMPLX, RKW_C_IMGNR,
RKW_C_AUTO,
RKW_C_BREAK,
RKW_C_CASE, RKW_C_CHAR, RKW_C_CONST, RKW_C_CONTN,
RKW_C_DFLT, RKW_C_DO, RKW_C_DBL,
RKW_C_ELSE, RKW_C_ENUM, RKW_C_EXTRN,
RKW_C_FLOAT, RKW_C_FOR,
RKW_C_GOTO,
RKW_C_IF, RKW_C_INLN, RKW_C_INT,
RKW_C_LONG,
RKW_C_RGSTR, RKW_C_RSTCT, RKW_C_RTRN,
RKW_C_SHORT, RKW_C_SGND, RKW_C_SIZOF, RKW_C_STTC, RKW_C_STRCT, RKW_C_SWTCH,
RKW_C_TPDEF,
RKW_C_UNION, RKW_C_USGND,
RKW_C_VOID, RKW_C_VLTL,
RKW_C_WHILE
};
/* C language reserved type keywords ***********************************/
char *c_type_keywords[] = {
RKW_C_BOOL, RKW_C_CMPLX, RKW_C_IMGNR,
RKW_C_CHAR,
RKW_C_DBL,
RKW_C_FLOAT,
RKW_C_INT,
RKW_C_LONG,
RKW_C_SHORT,
RKW_C_VOID
};
/* C++ language reserved keywords (without types) **********************/
char *cxx_keywords[] = {
RKW_CXX_AND, RKW_CXX_ANDEQ, RKW_CXX_ASM, RKW_CXX_AUTO,
RKW_CXX_BITAND, RKW_CXX_BITOR, RKW_CXX_BOOL, RKW_CXX_BREAK,
RKW_CXX_CASE, RKW_CXX_CATCH, RKW_CXX_CHAR, RKW_CXX_CLASS,
RKW_CXX_COMPL, RKW_CXX_CONST, RKW_CXX_CNSTCST, RKW_CXX_CONTN,
RKW_CXX_DFLT, RKW_CXX_DELETE, RKW_CXX_DO, RKW_CXX_DBL, RKW_CXX_DYNCAST,
RKW_CXX_ELSE, RKW_CXX_ENUM, RKW_CXX_EXPLI, RKW_CXX_EXPORT, RKW_CXX_EXTRN,
RKW_CXX_FALSE, RKW_CXX_FLOAT, RKW_CXX_FOR, RKW_CXX_FRIEND,
RKW_CXX_GOTO,
RKW_CXX_IF, RKW_CXX_INLN, RKW_CXX_INT,
RKW_CXX_LONG,
RKW_CXX_MUTABL,
RKW_CXX_NSPC, RKW_CXX_NEW, RKW_CXX_NOT, RKW_CXX_NOTEQ,
RKW_CXX_OPER, RKW_CXX_OR, RKW_CXX_OREQ,
RKW_CXX_PRIV, RKW_CXX_PROT, RKW_CXX_PUBLIC,
RKW_CXX_RGSTR, RKW_CXX_RINTCST, RKW_CXX_RTRN,
RKW_CXX_SHORT, RKW_CXX_SGND, RKW_CXX_SIZOF, RKW_CXX_STTC,
RKW_CXX_STCCST, RKW_CXX_STRCT, RKW_CXX_SWTCH,
RKW_CXX_TMPLT, RKW_CXX_THIS, RKW_CXX_THROW, RKW_CXX_TRUE,
RKW_CXX_TRY, RKW_CXX_TYPEDEF, RKW_CXX_TYPEID, RKW_CXX_TYPENAM,
RKW_CXX_UNION, RKW_CXX_USGND, RKW_CXX_USING,
RKW_CXX_VIRT, RKW_CXX_VOID, RKW_CXX_VLTL,
RKW_CXX_WCHART, RKW_CXX_WHILE,
RKW_CXX_XOR, RKW_CXX_XOREQ
};
size_t nb_cxx_keywords = sizeof(cxx_keywords) / sizeof(char *);
/* C++ language reserved type keywords *********************************/
char *cxx_type_keywords[] = {
RKW_CXX_BOOL,
RKW_CXX_CHAR,
RKW_CXX_DBL,
RKW_CXX_FLOAT,
RKW_CXX_INT,
RKW_CXX_LONG,
RKW_CXX_SHORT,
RKW_CXX_VOID,
RKW_CXX_WCHART
};
size_t nb_cxx_type_keywords = sizeof(cxx_type_keywords) / sizeof(char *);
/******************************
* assembly parsing functions *
**********************************************************************/
/******************
* prs_asm_file() *
***********************************************************************
DESCR
C file parsing main function
IN
pcmn : common parsing structure
fp : file to parse
OUT
boolean
***********************************************************************/
bool prs_asm_file(prs_cmn_t *pcmn, FILE *fp) {
bool askw_flag = false;
char idtf[MAX_IDTF_LEN];
unsigned int segtype = SEG_TYPE_UNKNW;
prseng_t *ppe;
#ifdef DEBUG_PRSC
debugf("prs_asm_file() BEGIN");
#endif
/* init prseng */
ppe = prseng_init(fp, NULL);
if (ppe == NULL) {
return(false);
}
/* while end of file is not reached */
while (prseng_eof(ppe) == false) {
#ifdef DEBUG_PRSC
debugf("cursor: '%.16s'", ppe->cur);
#endif
prs_c_skip(ppe);
#ifdef DEBUG_PRSC
debugf("cursor after skipping useless : '%.16s'", ppe->cur);
#endif
if (prseng_test_char(ppe, '#') == true) {
/* parse preprocessing directive */
#ifdef DEBUG_PRSC
debugf("found preprocessor directive: '%.16s'", ppe->cur);
#endif
prs_c_prepro(pcmn, ppe);
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not '#'");
#endif
}
/* assembler keyword */
if (prseng_test_char(ppe, '.') == true) {
/* mark as assembler directive */
askw_flag = true;
#ifdef DEBUG_PRSC
debugf("found assembler directive '%.16s'.", ppe->cur);
#endif
prseng_next_char(ppe);
}
prseng_get_idtf(ppe, idtf, sizeof(idtf), PRS_C_IDTF_STR);
#ifdef DEBUG_PRSC
debugf("found identifier : '%.16s'", idtf);
#endif
if (askw_flag == true) {
/* check if the identifier is a keyword */
if (strncmp(idtf, RKW_AS_TSEG, strlen(idtf) + 1) == 0) {
/* start of text segment */
segtype = SEG_TYPE_TEXT;
#ifdef DEBUG_PRSC
debugf("entered text section.");
#endif
}
if (strncmp(idtf, RKW_AS_DSEG, strlen(idtf) + 1) == 0) {
/* start of data segment */
segtype = SEG_TYPE_DATA;
#ifdef DEBUG_PRSC
debugf("entered data section.");
#endif
}
askw_flag = false;
} else {
if ((segtype == SEG_TYPE_TEXT) &&
(prseng_test_char(ppe, ':') == true)) {
#ifdef DEBUG_PRSC
debugf("possible function declaration of '%s'", idtf);
#endif
/* => function call */
if (pcmn->func_decl != NULL) {
if (pcmn->func_decl(pcmn->data, idtf, ppe) == false) {
#ifdef DEBUG_PRSC
debugf("prs_asm_file() END");
#endif
return(false);
}
}
}
}
/* skip until end of line */
prs_c_skip_to_char(ppe, '\n');
}
#ifdef DEBUG_PRSC
debugf("prs_asm_file() END");
#endif
return(true);
}
/********************************
* C language parsing functions *
**********************************************************************/
/************************
* prs_c_skip_to_char() *
***********************************************************************
DESCR
skip until a given char
IN
ppe : parsing engine structure
c : delimiter character
OUT
boolean
***********************************************************************/
bool prs_c_skip_to_char(prseng_t *ppe, char c) {
bool flag = false;
/* check for end of line or end of file */
while ((prseng_eof(ppe) == false) &&
((prseng_test_char(ppe, c) == false) || (flag == false))) {
if (prseng_test_char(ppe, '\\') == true) {
/* unset the flag to avoid stopping if newline is
preceded by the '\' character */
flag = false;
} else {
/* else unset flag */
flag = true;
}
/* skip character */
if (prseng_next_char(ppe) == false) {
return(false);
}
}
if (prseng_test_char(ppe, c) == true) {
/* skip given character */
if (prseng_next_char(ppe) == false) {
return(false);
}
}
return(true);
}
/*********************
* prs_c_line_skip() *
***********************************************************************
DESCR
skip until end of file
IN
ppe : parsing engine structure
OUT
boolean
***********************************************************************/
bool prs_c_line_skip(prseng_t *ppe) {
bool flag = false;
/* check for end of line or end of file */
while ((prseng_eof(ppe) == false) &&
((prseng_test_char(ppe, '\n') == false) || (flag == false))) {
if (prseng_test_char(ppe, '\\') == true) {
/* unset the flag to avoid stopping if newline is
preceded by the '\' character */
flag = false;
} else {
/* else unset flag */
flag = true;
}
/* skip character */
if (prseng_next_char(ppe) == false) {
return(false);
}
}
if (prseng_test_char(ppe, '\n') == true) {
/* skip newline */
if (prseng_next_char(ppe) == false) {
return(false);
}
}
return(true);
}
/************************
* prs_c_comment_skip() *
***********************************************************************
DESCR
skip C style comments
IN
ppe : parsing engine structure
OUT
boolean
***********************************************************************/
bool prs_c_comment_skip(prseng_t *ppe) {
bool flag = false,
loop = true;
/* skip '/' character */
prseng_next_char(ppe);
if (prseng_eof(ppe) == true) {
/* XXX err msg unexpected eof */
return(false);
}
switch(prseng_get_char(ppe)) {
case '*' :
/* skip '*' character */
prseng_next_char(ppe);
if (prseng_eof(ppe) == true) {
/* XXX err msg unexpected eof */
return(false);
}
/* loop until end of comment is found */
while (loop == true) {
if (flag == true) {
if (prseng_test_char(ppe, '/') == true) {
/* can exit from the loop after skipping this char */
loop = false;
} else {
/* else unset flag */
flag = false;
}
}
if (prseng_test_char(ppe, '*') == true) {
/* set flag to stop if the next char is '/' */
flag = true;
}
/* going to next char */
prseng_next_char(ppe);
if (prseng_eof(ppe) == true) {
/* XXX err msg unexpected eof */
return(false);
}
}
break;
case '/' :
/* skip second '/' character */
prseng_next_char(ppe);
if (prs_c_line_skip(ppe) == false) {
return(false);
}
break;
}
return(true);
}
/***********************
* prs_c_squote_skip() *
***********************************************************************
DESCR
process simple quotes
IN
ppe : parsing engine structure
OUT
boolean
***********************************************************************/
bool prs_c_squote_skip(prseng_t *ppe) {
/* skip starting quote */
prseng_next_char(ppe);
if (prseng_eof(ppe) == true) {
/* XXX err msg unexpected eof */
return(false);
}
/* if it's an escape character ... */
if (prseng_test_char(ppe, '\\') == true) {
/* ... then skip it */
prseng_next_char(ppe);
}
if (prseng_eof(ppe) == true) {
/* XXX err msg unexpected eof */
return(false);
}
/* skip quoted character */
prseng_next_char(ppe);
if (prseng_eof(ppe) == true) {
/* XXX err msg unexpected eof */
return(false);
}
if (prseng_test_char(ppe, '\'') == false) {
/* XXX msg cannot found ending quote ! */
return(false);
}
/* skip ending quote */
prseng_next_char(ppe);
return(true);
}
/***********************
* prs_c_dquote_skip() *
***********************************************************************
DESCR
process double quotes
IN
ppe : parsing engine structure
OUT
boolean
***********************************************************************/
bool prs_c_dquote_skip(prseng_t *ppe) {
bool escape = false;
/* skip starting double quote */
prseng_next_char(ppe);
while ((prseng_test_char(ppe, '"') == false) || (escape == true)) {
escape = false;
#ifdef DEBUG_PRSC
debugf("current char = %c", prseng_get_char(ppe));
#endif
if (prseng_eof(ppe) == true) {
/* XXX msg cannot find ending double quote ! */
return(false);
}
if (prseng_test_char(ppe, '\\') == true) {
escape = true;
}
prseng_next_char(ppe);
}
/* skip ending double quote */
prseng_next_char(ppe);
return(true);
}
/****************
* prs_c_skip() *
***********************************************************************
DESCR
skip useless stuff like spaces, tabs, newlines and comments
IN
ppe : parsing engine structure
OUT
boolean
***********************************************************************/
bool prs_c_skip(prseng_t *ppe) {
bool do_exit = false;
#ifdef DEBUG_PRSC
/*debugf("prs_c_skip() : start");*/
#endif
while (do_exit == false) {
#ifdef DEBUG_PRSC
/*debugf("prs_c_skip() : char number is %d", (int) prseng_get_char(ppe));*/
#endif
switch(prseng_get_char(ppe)) {
case ' ' :
case '\t' :
case '\n' :
case '\f' :
case 13 :
/* skip character */
prseng_next_char(ppe);
break;
case '/' :
/* comment ? */
if (prs_c_comment_skip(ppe) == false) {
return(false);
}
break;
default:
do_exit = true;
}
}
#ifdef DEBUG_PRSC
/*debugf("prs_c_skip() : stop");*/
#endif
return(true);
}
/******************
* prs_c_prepro() *
***********************************************************************
DESCR
handle preprocesor directives
IN
pcmn : common parsing structure
ppe : parsing engine structure
OUT
boolean
***********************************************************************/
bool prs_c_prepro(prs_cmn_t *pcmn, prseng_t *ppe) {
char pp_idtf[MAX_IDTF_LEN];
/* skip leading '#' character */
prseng_next_char(ppe);
if (prseng_eof(ppe) == true) {
/* XXX err msg unexpected eof */
return(false);
}
if (pcmn->func_ppro != NULL) {
/* process directive with a call to pcmn->func_ppro() */
prseng_get_idtf(ppe, pp_idtf, sizeof(pp_idtf), PRS_C_IDTF_STR);
pcmn->func_ppro(pcmn->data, pp_idtf, ppe);
} else {
/* skip directive */
if (prs_c_line_skip(ppe) == false) {
return(false);
}
}
return(true);
}
/*****************
* prs_c_is_kw() *
***********************************************************************
DESCR
check if identifier is a keyword
IN
XXX
OUT
boolean
***********************************************************************/
bool prs_c_is_kw(char *idtf, char **kw, size_t nbkw) {
size_t i,
s;
/* get the size of the identifier */
s = strlen(idtf) + 1;
/* loop into the list of keywords */
for (i = 0 ; i < nbkw ; i++) {
if (strncmp(idtf, kw[i], s) == 0) {
/* and return true if one matches */
return(true);
}
}
/* not a known C keyword */
return(false);
}
/****************
* prs_c_file() *
***********************************************************************
DESCR
C file parsing main function
IN
pcmn : common parsing structure
fp : file to parse
OUT
boolean
***********************************************************************/
bool prs_c_file(prs_cmn_t *pcmn, FILE *fp) {
return(prs_c_common(pcmn, fp, c_keywords, c_type_keywords));
}
/******************
* prs_cxx_file() *
***********************************************************************
DESCR
C file parsing main function
IN
pcmn : common parsing structure
fp : file to parse
OUT
boolean
***********************************************************************/
bool prs_cxx_file(prs_cmn_t *pcmn, FILE *fp) {
return(prs_c_common(pcmn, fp, cxx_keywords, cxx_type_keywords));
}
/******************
* prs_c_common() *
***********************************************************************
DESCR
C file parsing main function
IN
pcmn : common parsing structure
fp : file to parse
lkw : language keywords
nb_lkw : number of language keywords
OUT
boolean
***********************************************************************/
bool prs_c_common(prs_cmn_t *pcmn, FILE *fp, char **lkw, char **tkw) {
bool idtf_flag = false,
type_flag = false;
char idtf[MAX_IDTF_LEN],
type[MAX_IDTF_LEN];
prseng_t *ppe;
size_t nb_lkw,
nb_tkw;
/* compute keyword array size */
nb_lkw = sizeof(lkw) / sizeof(char *);
nb_tkw = sizeof(tkw) / sizeof(char *);
/* init prseng */
ppe = prseng_init(fp, NULL);
if (ppe == NULL) {
errorf("parse engine init failed.");
return(false);
}
/* while end of file is not reached */
while (prseng_eof(ppe) == false) {
#ifdef DEBUG_PRSC
/*debugf("cursor: '%.16s'", ppe->cur);*/
#endif
prs_c_skip(ppe);
#ifdef DEBUG_PRSC
debugf("cursor after skipping useless : '%.16s'", ppe->cur);
#endif
if (prseng_test_char(ppe, '#') == true) {
/* parse preprocessing directive */
prs_c_prepro(pcmn, ppe);
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not '#'");
#endif
}
if (prseng_test_char(ppe, '(') == true) {
if (idtf_flag == true) {
if (type_flag == true) {
#ifdef DEBUG_PRSC
debugf("possible function declaration of '%s'", idtf);
#endif
/* if an identifier flag is on => function call */
if (pcmn->func_decl != NULL) {
if (pcmn->func_decl(pcmn->data, idtf, ppe) == false) {
errorf("error in processing of function declarator.");
return(false);
}
}
} else {
#ifdef DEBUG_PRSC
debugf("possible function call of '%s'", idtf);
#endif
/* if an identifier flag is on => function call */
if (pcmn->func_proc != NULL) {
if (pcmn->func_proc(pcmn->data, idtf, ppe) == false) {
errorf("error in processing of function call.");
return(false);
}
}
}
idtf_flag = false;
}
/* skip character */
prseng_next_char(ppe);
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not '('");
#endif
}
if (prseng_test_char(ppe, '*') == true) {
if (idtf_flag == true) {
/* if the idtf flag is on => type identifier */
#ifdef DEBUG_PRSC
debugf("possible type identifier '%s'", idtf);
#endif
/* processing, call to pcmn->func_type */
if (pcmn->func_type(pcmn->data, idtf, ppe) == false) {
errorf("error in processing of type.");
return(false);
}
idtf_flag = false;
type_flag = true;
}
/* skip character */
prseng_next_char(ppe);
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not '*'");
#endif
}
if (prseng_test_char(ppe, '[') == true) {
if (type_flag == true) {
/* if a type flag is on => type identifier */
#ifdef DEBUG_PRSC
debugf("possible type identifier '%s'", type);
#endif
/* processing, call to pcmn->func_type */
if (pcmn->func_type(pcmn->data, idtf, ppe) == false) {
errorf("error in processing of type.");
return(false);
}
type_flag = false;
}
/* skip character */
prseng_next_char(ppe);
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not '['");
#endif
}
if (prseng_test_char(ppe, '"') == true) {
if (prs_c_dquote_skip(ppe) == false) {
return(false);
}
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not '\"'");
#endif
}
if (prseng_test_char(ppe, '\'') == true) {
if (prs_c_squote_skip(ppe) == false) {
return(false);
}
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not '\''");
#endif
}
/* if it's a misc char ... */
if (prseng_test_idtf_char(PRS_C_MISC_STR,
prseng_get_char(ppe)) == true) {
#ifdef DEBUG_PRSC
debugf("cursor '%.16s is a misc char", ppe->cur);
#endif
/* clear flags */
idtf_flag = false; /* XXX here idtf could contain a constant */
type_flag = false;
/* skip character */
prseng_next_char(ppe);
continue;
#ifdef DEBUG_PRSC
} else {
debugf("cursor is not a misc char");
#endif
}
if (idtf_flag == true) {
/* save previous idtf as it could be a type */
strlcpy(type, idtf, sizeof(type)); /* no check needed */
#ifdef DEBUG_PRSC
debugf("save type identifier '%s'", type);
#endif
idtf_flag = false;
type_flag = true;
}
prseng_get_idtf(ppe, idtf, sizeof(idtf), PRS_C_IDTF_STR);
#ifdef DEBUG_PRSC
debugf("found identifier '%s'", idtf);
#endif
if (idtf[0] == '\0') {
if (prseng_eof(ppe) == true) {
#ifdef DEBUG_PRSC
debugf("normal end of parsing");
#endif
return(true);
} else {
#ifdef DEBUG_PRSC
debugf("problem with '%d'", prseng_get_char(ppe));
#endif
return(false);
}
}
/* check if the identifier is a type keyword */
if (prs_c_is_kw(idtf, tkw, nb_tkw) == true) {
/* if yes then we have to mark this identifier */
type_flag = true;
#ifdef DEBUG_PRSC
debugf("skipped type keyword '%s'", idtf);
#endif
continue;
}
/* check if the identifier is a keyword */
if (prs_c_is_kw(idtf, lkw, nb_lkw) == false) {
/* if not then we have to mark this identifier */
idtf_flag = true;
#ifdef DEBUG_PRSC
} else {
debugf("skipped keyword '%s'", idtf);
#endif
}
}
prseng_destroy(ppe);
return(true);
}
/* vim: set noexpandtab tabstop=4 softtabstop=4 shiftwidth=4: */
syntax highlighted by Code2HTML, v. 0.9.1