/*
* $Id: defLex.c,v 4.21 2007/07/04 20:51:11 bkorb Exp $
*
* Time-stamp: "2007-07-04 11:16:53 bkorb"
* Last Committed: $Date: 2007/07/04 20:51:11 $
*
* This module scans the template variable declarations and passes
* tokens back to the parser.
*
* This file is part of AutoGen.
* AutoGen copyright (c) 1992-2007 by Bruce Korb - all rights reserved
*
* AutoGen is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* AutoGen is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program. If not, see .
*/
tSCC zErrMsg[] = "%s Error: %s in %s on line %d\n";
/*
* This keyword table must match those found in agParse.y.
* You will find them in a %token statement that follows
* a comment "Keywords"
*/
#define KEYWORD_TABLE \
_KW_( AUTOGEN ) \
_KW_( DEFINITIONS )
#define _KW_(w) tSCC z ## w [] = #w;
KEYWORD_TABLE
#undef _KW_
#define _KW_(w) z ## w,
tSCC* apzKeywords[] = { KEYWORD_TABLE };
#undef _KW_
#define _KW_(w) DP_EV_ ## w,
te_dp_event aKeywordTkn[] = { KEYWORD_TABLE };
#undef _KW_
#define KEYWORD_CT (sizeof( apzKeywords ) / sizeof( apzKeywords[0] ))
#define ERROR (-1)
#define FINISH (-1)
#define SET_LIT_TKN(t) lastToken = DP_EV_LIT_ ## t; *(pCurCtx->pzScan++) = NUL;
/* = = = START-STATIC-FORWARD = = = */
/* static forward declarations maintained by :mkfwd */
static void
loadScheme( void );
static void
alist_to_autogen_def( void );
static char*
assembleName( char* pzScan, te_dp_event* pRetVal );
static char*
assembleHereString( char* pzScan );
/* = = = END-STATIC-FORWARD = = = */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* LEXICAL SCANNER
*/
LOCAL te_dp_event
yylex( void )
{
lastToken = DP_EV_INVALID;
scanAgain:
/*
* Start the process of locating a token.
* We branch here after skipping over a comment
* or processing a directive (which may change our context).
*/
if (isspace( *pCurCtx->pzScan )) {
char* pz = pCurCtx->pzScan;
if (*pz == '\n')
pCurCtx->lineNo++;
*(pz++) = NUL;
/*
* This ensures that any names found previously
* are NUL terminated.
*/
while (isspace(*pz)) {
if (*pz == '\n')
pCurCtx->lineNo++;
pz++;
}
pCurCtx->pzScan = pz;
}
switch (*pCurCtx->pzScan) {
case NUL:
/*
* IF we are not inside an include context,
* THEN go finish.
*/
if (pCurCtx->pCtx == NULL)
goto lex_done;
/*
* Pop off an include context and resume
* from the including file.
*/
{
tScanCtx* pCX = pCurCtx;
pCurCtx = pCurCtx->pCtx;
pCX->pCtx = pDoneCtx;
pDoneCtx = pCX;
}
goto scanAgain;
case '#':
{
extern char* processDirective( char* );
char* pz = processDirective( pCurCtx->pzScan+1 );
/*
* Ensure that the compiler doesn't try to save a copy of
* "pCurCtx" in a register. It must be reloaded from memory.
*/
pCurCtx->pzScan = pz;
goto scanAgain;
}
case '{': SET_LIT_TKN( O_BRACE ); break;
case '=': SET_LIT_TKN( EQ ); break;
case '}': SET_LIT_TKN( C_BRACE ); break;
case '[': SET_LIT_TKN( OPEN_BKT ); break;
case ']': SET_LIT_TKN( CLOSE_BKT ); break;
case ';': SET_LIT_TKN( SEMI ); break;
case ',': SET_LIT_TKN( COMMA ); break;
case '\'':
case '"':
{
char* pz = ao_string_cook( pCurCtx->pzScan, &(pCurCtx->lineNo));
if (pz == NULL)
goto NUL_error;
pz_token = pCurCtx->pzScan;
lastToken = DP_EV_STRING;
pCurCtx->pzScan = pz;
break;
}
case '<':
{
char* pz;
if (pCurCtx->pzScan[1] != '<')
goto BrokenToken;
pz = assembleHereString( pCurCtx->pzScan + 2 );
if (pz == NULL) {
lastToken = DP_EV_INVALID;
return DP_EV_INVALID;
}
lastToken = DP_EV_HERE_STRING;
pCurCtx->pzScan = pz;
break;
}
case '(':
loadScheme();
break;
case '\\':
if (strncmp( pCurCtx->pzScan+1, "'(", (size_t)2) == 0) {
alist_to_autogen_def();
goto scanAgain;
} else {
char* pz = strchr( pCurCtx->pzScan, ';' );
for (;;) {
if (pz == NULL) {
pz = pCurCtx->pzScan + strlen( pCurCtx->pzScan );
break;
}
if (isspace( pz[1] )) {
*pz = NUL;
pz[1] = ';';
break;
}
pz = strchr( pz+1, ';' );
}
lastToken = DP_EV_STRING;
pz_token = pz;
break;
}
case '`':
{
char* pz = ao_string_cook( pCurCtx->pzScan, &(pCurCtx->lineNo));
if (pz == NULL)
goto NUL_error;
pz_token = pCurCtx->pzScan;
pCurCtx->pzScan = pz;
if (pzShellProgram == NULL)
pzShellProgram = getDefine( zShellEnv, AG_TRUE );
lastToken = DP_EV_STRING;
pz = runShell( (char const*)pz_token );
if (pz == NULL)
goto scanAgain;
TAGMEM( pz, "shell definition string" );
pz_token = pz;
manageAllocatedData( pz );
break;
}
case '/':
/*
* Allow for a comment, C or C++ style
*/
switch (pCurCtx->pzScan[1]) {
case '*':
{
char* pz = strstr( pCurCtx->pzScan+2, "*/" );
if (pz != NULL) {
char* p = pCurCtx->pzScan+1;
for (;;) {
p = strchr( p+1, '\n' );
if ((p == NULL) || (p > pz))
break;
pCurCtx->lineNo++;
}
pCurCtx->pzScan = pz+2;
goto scanAgain;
}
break;
}
case '/':
{
char* pz = strchr( pCurCtx->pzScan+2, '\n' );
if (pz != NULL) {
pCurCtx->pzScan = pz+1;
pCurCtx->lineNo++;
goto scanAgain;
}
break;
}
}
/* FALLTHROUGH */ /* to Invalid input char */
default:
BrokenToken:
pCurCtx->pzScan = assembleName( pCurCtx->pzScan, &lastToken );
break;
} /* switch (*pCurCtx->pzScan) */
return lastToken;
NUL_error:
AG_ABEND( aprf( zErrMsg, pzProg, "unterminated quote in definition",
pCurCtx->pzCtxFname, pCurCtx->lineNo ));
return DP_EV_INVALID;
lex_done:
/*
* First time through, return the DP_EV_END token.
* Second time through, we really finish.
*/
if (pCurCtx->pzScan == zNil) {
pCurCtx->pCtx = pDoneCtx;
pDoneCtx = pCurCtx;
return DP_EV_INVALID;
}
pCurCtx->pzScan = (char*)zNil;
return DP_EV_END;
}
LOCAL void
yyerror( char* s )
{
tSCC zErrTkn[] = "%s: ``%s''\n";
tSCC zDf[] = "`%s'\n";
char* pz;
if (strlen( pCurCtx->pzScan ) > 64 )
pCurCtx->pzScan[64] = NUL;
switch (lastToken) {
case DP_EV_VAR_NAME:
case DP_EV_OTHER_NAME:
case DP_EV_STRING:
case DP_EV_NUMBER:
if (strlen( pz_token ) > 64 )
pz_token[64] = NUL;
pz = aprf( zErrTkn, DP_EVT_NAME( lastToken ), pz_token );
break;
default:
pz = aprf( zDf, DP_EVT_NAME( lastToken ));
}
AG_ABEND( aprf( "%s: in %s on line %d\n"
"\ttoken in error: %s\n"
"\t[[...]] %s\n\n"
"Likely causes: a mismatched quote, a value that needs "
"quoting,\n\t\tor a missing semi-colon\n",
s, pCurCtx->pzCtxFname, pCurCtx->lineNo, pz,
pCurCtx->pzScan ));
}
static void
loadScheme( void )
{
char* pzText = pCurCtx->pzScan;
char* pzEnd = (char*)skipScheme( pzText, pzText + strlen( pzText ));
char endCh = *pzEnd;
int schemeLen = (pzEnd - pzText);
SCM res;
/*
* NUL terminate the Scheme expression, run it, then restore
* the NUL-ed character.
*/
*pzEnd = NUL;
procState = PROC_STATE_GUILE_PRELOAD;
res = ag_scm_c_eval_string_from_file_line(
pzText, pCurCtx->pzCtxFname, pCurCtx->lineNo );
procState = PROC_STATE_LOAD_DEFS;
*pzEnd = endCh;
pCurCtx->pzScan = pzEnd;
pzEnd = (char*)resolveSCM( res ); /* ignore const-ness */
if (strlen( pzEnd ) >= schemeLen) {
AGDUPSTR( pzEnd, pzEnd, "SCM Result" );
pz_token = pzEnd;
manageAllocatedData( pz_token );
}
else {
/*
* We know the result is smaller than the source. Copy in place.
*/
strcpy( pzText, pzEnd );
pz_token = pzText;
}
lastToken = DP_EV_STRING;
}
/*
* process a single scheme expression, yielding text that gets processed
* into AutoGen definitions.
*/
static void
alist_to_autogen_def( void )
{
tSCC zSchemeText[] = "Scheme Computed Definitions";
tSCC zWrap[] = "(alist->autogen-def %s)";
char* pzText = ++(pCurCtx->pzScan);
char* pzEnd = (char*)skipScheme( pzText, pzText + strlen( pzText ));
SCM res;
size_t res_len;
tScanCtx* pCtx;
/*
* Wrap the scheme expression with the `alist->autogen-def' function
*/
{
char endCh = *pzEnd;
*pzEnd = NUL;
pzText = aprf( zWrap, pzText );
*pzEnd = endCh;
}
/*
* NUL terminate the Scheme expression, run it, then restore
* the NUL-ed character.
*/
procState = PROC_STATE_GUILE_PRELOAD;
res = ag_scm_c_eval_string_from_file_line(
pzText, pCurCtx->pzCtxFname, pCurCtx->lineNo );
/*
* The result *must* be a string, or we choke.
*/
if (! AG_SCM_STRING_P( res )) {
tSCC zEr[] = "Scheme definition expression does not yield string:\n";
AG_ABEND( zEr );
}
res_len = AG_SCM_STRLEN( res );
procState = PROC_STATE_LOAD_DEFS;
pCurCtx->pzScan = pzEnd;
AGFREE( (void*)pzText );
/*
* Now, push the resulting string onto the input stack
* and link the new scan data into the context stack
*/
pCtx = (tScanCtx*)AGALOC( sizeof(tScanCtx) + 4 + res_len, "lex scan ctx" );
pCtx->pCtx = pCurCtx;
pCurCtx = pCtx;
/*
* Set up the rest of the context structure
*/
AGDUPSTR( pCtx->pzCtxFname, zSchemeText, "scheme text" );
pCtx->pzScan = \
pCtx->pzData = (char*)(pCtx+1);
pCtx->lineNo = 0;
memcpy( (void*)(pCtx->pzScan), (void*)AG_SCM_CHARS( res ), res_len );
pCtx->pzScan[ res_len ] = NUL;
/*
* At this point, the next token will be obtained
* from the newly allocated context structure.
* When empty, input will resume from the '}' that we
* left as the next input token in the old context.
*/
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* It may be a number, a name, a keyword or garbage.
* Figure out which.
*/
static char*
assembleName( char* pzScan, te_dp_event* pRetVal )
{
/*
* Check for a number.
* Scan it in and advance "pzScan".
*/
if ( isdigit( *pzScan )
|| ( (*pzScan == '-')
&& isdigit( pzScan[1] )
) ) {
pz_token = pzScan;
(void)strtol( pzScan, &pzScan, 0 );
*pRetVal = DP_EV_NUMBER;
return pzScan;
}
{
static unsigned char zNameChars[ 256 ];
unsigned char* pz = (unsigned char*)pzScan;
if (zNameChars[ (unsigned)'a' ] == 0) {
/*
* Default to accepting as "OTHER_NAME" all characters
*/
u_int idx = ((unsigned)' ') + 1;
do {
zNameChars[ idx ] = ISNAMECHAR(idx) ? 1 : 2;
} while (++idx <= (unsigned)'~');
/*
* Now disallow entirely characters we use specially
*/
zNameChars[ (unsigned)'"' ] = 0;
zNameChars[ (unsigned)'#' ] = 0;
zNameChars[ (unsigned)'(' ] = 0;
zNameChars[ (unsigned)')' ] = 0;
zNameChars[ (unsigned)',' ] = 0;
zNameChars[ (unsigned)';' ] = 0;
zNameChars[ (unsigned)'<' ] = 0;
zNameChars[ (unsigned)'=' ] = 0;
zNameChars[ (unsigned)'>' ] = 0;
zNameChars[ (unsigned)'[' ] = 0;
zNameChars[ (unsigned)'\''] = 0;
zNameChars[ (unsigned)']' ] = 0;
zNameChars[ (unsigned)'`' ] = 0;
zNameChars[ (unsigned)'{' ] = 0;
zNameChars[ (unsigned)'}' ] = 0;
}
/*
* Skip over VAR_NAME characters
*/
while (zNameChars[ *pz ] == 1) pz++;
/*
* IF the next character terminates the token,
* THEN see if we got any characters at all
* ELSE skip over the rest of the OTHER_NAME
*/
if (zNameChars[ *pz ] == 0) {
if (pz == (unsigned char*)pzScan)
AG_ABEND( aprf( "%s Error: Invalid input char '%c' "
"in %s on line %d\n", pzProg, *pzScan,
pCurCtx->pzCtxFname, pCurCtx->lineNo ));
*pRetVal = DP_EV_VAR_NAME;
} else {
*pRetVal = DP_EV_OTHER_NAME;
while (zNameChars[ *pz ] != 0) pz++;
}
/*
* Return a NAME token, maybe.
* If the name is actually a keyword,
* we will return that token code instead.
*/
pz_token = pzScan;
pzScan = (char*)pz;
}
/*
* Now scan the keyword table.
*/
if (*pRetVal == DP_EV_VAR_NAME) {
char sv_ch = *pzScan; /* preserve the following character */
int kw_ix = 0;
*pzScan = NUL; /* NUL terminate the name */
do {
if (streqvcmp( apzKeywords[ kw_ix ], (char*)pz_token ) == 0) {
/*
* Return the keyword token code instead of DP_EV_NAME
*/
*pRetVal = aKeywordTkn[ kw_ix ];
break;
}
} while (++kw_ix < KEYWORD_CT);
*pzScan = sv_ch; /* restore the following character */
}
return pzScan;
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* A quoted string has been found.
* Find the end of it and compress any escape sequences.
*/
static char*
assembleHereString( char* pzScan )
{
ag_bool trimTabs = AG_FALSE;
char zMark[ MAX_HEREMARK_LEN ];
size_t markLen = 0;
char* pzDest;
/*
* See if we are to strip leading tab chars
*/
if (*pzScan == '-') {
trimTabs = AG_TRUE;
pzScan++;
}
/*
* Skip white space up to the marker or EOL
*/
while (isspace( *pzScan )) {
if (*pzScan++ == '\n')
AG_ABEND( aprf( zErrMsg, pzProg, "HereString missing the mark",
pCurCtx->pzCtxFname, pCurCtx->lineNo ));
}
/*
* Copy the marker, noting its length
*/
{
char* pz = zMark;
while (ISNAMECHAR( *pzScan )) {
if (++markLen >= sizeof(zMark))
AG_ABEND( aprf( zErrMsg, pzProg, "HereString mark "
STR( MAX_HEREMARK_LEN ) " or more chars",
pCurCtx->pzCtxFname, pCurCtx->lineNo ));
*(pz++) = *(pzScan++);
}
if (markLen == 0)
AG_ABEND( aprf( zErrMsg, pzProg, "HereString missing the mark",
pCurCtx->pzCtxFname, pCurCtx->lineNo ));
*pz = NUL;
}
pzDest = pzScan;
pz_token = pzDest;
/*
* Skip forward to the EOL after the marker.
*/
pzScan = strchr( pzScan, '\n' );
if (pzScan == NULL)
AG_ABEND( aprf( zErrMsg, pzProg, "Unterminated HereString",
pCurCtx->pzCtxFname, pCurCtx->lineNo ));
/*
* And skip the first new line + conditionally skip tabs
*/
pCurCtx->lineNo++;
pzScan++;
if (trimTabs)
while (*pzScan == '\t') ++pzScan;
/*
* FOR as long as the text does not match the mark
* OR it matches but is a substring
* DO copy characters
*/
while ( (strncmp( pzScan, zMark, markLen ) != 0)
|| ISNAMECHAR( pzScan[ markLen ]) ) {
for (;;) {
switch (*(pzDest++) = *(pzScan++)) {
case '\n':
pCurCtx->lineNo++;
goto lineDone;
case NUL:
AG_ABEND( aprf( zErrMsg, pzProg, "Unterminated HereString",
pCurCtx->pzCtxFname, pCurCtx->lineNo ));
}
} lineDone:;
if (trimTabs)
while (*pzScan == '\t') ++pzScan;
} /* while strncmp ... */
/*
* pzDest may still equal pz_token, if no data were copied
*/
if (pzDest > (char*)pz_token)
pzDest[-1] = NUL;
else pzDest[0] = NUL;
return pzScan + markLen;
}
/*
* Local Variables:
* mode: C
* c-file-style: "stroustrup"
* indent-tabs-mode: nil
* End:
* end of agen5/defLex.c */