%{
/* $Id: lex.l,v 2.0.1.32 2000/02/25 01:41:14 greyham Exp greyham $
*
* C manual page generator
* Lexical analyzer specification
*/
#include <ctype.h>
extern boolean inbasefile; /* Steven Haehn Mar 19, 1996 */
static char *cur_file; /* current file name (malloced) */
int line_num = 1; /* current line number in file */
static int curly = 0; /* number of curly brace nesting levels */
static int square = 0; /* number of square bracket nesting levels */
static int ly_count = 0; /* number of occurrences of %% */
static int embedded = 0; /* flag for embedded compiler directives */
/* temporary string buffer */
static char buf[MAX_TEXT_LENGTH];
#define DYNBUF_ALLOC 240 /* size of increment of dynamic buf */
static char *dynbuf; /* start of dynamic buf */
static int dynbuf_size; /* number of bytes allocated */
static int dynbuf_current; /* current end of buffer */
static boolean comment_ateol; /* does comment start & end at end of a line? */
static boolean comment_remember;/* remember contents of current comment? */
static boolean comment_caller; /* state we were in before */
static boolean body_start = FALSE; /* At the start of a function body */
typedef struct {
#ifdef FLEX_SCANNER
YY_BUFFER_STATE buffer;
#else
FILE *fp;
#endif
char *file;
int line_num;
} IncludeStack;
static int inc_depth = 0; /* include nesting level */
static IncludeStack inc_stack[MAX_INC_DEPTH]; /* stack of included files */
static void update_line_num _((void));
static void do_include _((char *filename, int sysinc));
static void new_dynbuf();
static void add_dynbuf _((int c));
static char *return_dynbuf();
static void get_cpp_directive();
static boolean process_line_directive _((const char *new_file));
/*
* The initial comment processing is done primarily by the rather complex lex
* rules in the various comment start states, the main functions being removal
* of leading *'s, /'s and whitespace on a line, the removal of trailing
* whitespace on a line, and the coalescing of separate comments on adjacent
* lines. The remaining bits of textual content are collected by the following
* functions, which simply strip leading and trailing blank lines.
*/
void start_comment _((boolean ateol));
int end_comment _((boolean ateol));
void add_comment _((const char *s));
void newline_comment _((void));
static int comment_newlines; /* number of newlines hit in comment */
static boolean comment_started; /* have preceding empty lines been skipped */
#ifdef FLEX_SCANNER /* flex uses YY_START instead of YYSTATE */
#define YYSTATE YY_START
#ifndef YY_START /* flex 2.3.8 & before didn't support it at all */
#define YY_START ((yy_start - 1) / 2)
#endif
#endif
#undef yywrap /* for flex */
/* SKIP skipping value assignment in an enum */
%}
WS [ \t]
CWS [ ]
WLF [ \t\n\f]*
LETTER [A-Za-z_]
DIGIT [0-9]
ID {LETTER}({LETTER}|{DIGIT})*
STRING \"(\\.|\\\n|[^"\\])*\"
QUOTED ({STRING}|\'(\\\'|[^'\n])*\'|\\.)
%p 5000
%e 2000
%s CPP1 INIT1 INIT2 CURLY SQUARE LEXYACC SKIP COMMENT COMMLINE CPPCOMMENT EMBEDDED
%%
<LEXYACC>^"%%" {
if (++ly_count >= 2)
BEGIN INITIAL;
}
<LEXYACC>^"%{" BEGIN INITIAL;
<LEXYACC>{QUOTED} update_line_num();
<LEXYACC>. ;
<INITIAL>^"%}" BEGIN LEXYACC;
<INITIAL>^{WS}*#{WS}* BEGIN CPP1;
<CPP1>define{WS}+{ID} {
sscanf(yytext, "define %s", buf);
get_cpp_directive();
new_symbol(typedef_names, buf, DS_EXTERN);
}
<CPP1>include{WS}*\"[^"]+\" {
sscanf(yytext, "include \"%[^\"]\"", buf);
get_cpp_directive();
do_include(buf, FALSE);
}
<CPP1>include{WS}*\<[^>]+\> {
sscanf(yytext, "include <%[^>]>", buf);
get_cpp_directive();
do_include(buf, TRUE);
}
<CPP1>line{WS}+[0-9]+{WS}+\".*$ {
sscanf(yytext, "line %d \"%[^\"]\"",
&line_num, buf);
--line_num;
BEGIN INITIAL;
if (process_line_directive(buf))
inbasefile = yylval.boolean;
}
<CPP1>[0-9]+{WS}+\".*$ {
sscanf(yytext, "%d \"%[^\"]\"", &line_num, buf);
--line_num;
BEGIN INITIAL;
if (process_line_directive(buf))
inbasefile = yylval.boolean;
}
<CPP1>[0-9]+.*$ {
sscanf(yytext, "%d ", &line_num);
--line_num;
BEGIN INITIAL;
}
<CPP1>. get_cpp_directive();
<INITIAL>"(" return '(';
<INITIAL>")" return ')';
<INITIAL>"*" return '*';
<INITIAL,SKIP>"," {
BEGIN INITIAL; /* stop skipping */
return ',';
}
<INITIAL>";" return ';';
<INITIAL>"..." return T_ELLIPSIS;
<INITIAL>{STRING} { update_line_num(); return T_STRING_LITERAL; }
<INITIAL>auto return T_AUTO;
<INITIAL>extern return T_EXTERN;
<INITIAL>register return T_REGISTER;
<INITIAL>static return T_STATIC;
<INITIAL>typedef return T_TYPEDEF;
<INITIAL>char return T_CHAR;
<INITIAL>double return T_DOUBLE;
<INITIAL>float return T_FLOAT;
<INITIAL>int return T_INT;
<INITIAL>void return T_VOID;
<INITIAL>long return T_LONG;
<INITIAL>short return T_SHORT;
<INITIAL>signed return T_SIGNED;
<INITIAL>__signed__ return T_SIGNED;
<INITIAL>__signed return T_SIGNED;
<INITIAL>unsigned return T_UNSIGNED;
<INITIAL>enum { enum_state = KEYWORD; return T_ENUM; }
<INITIAL>struct return T_STRUCT;
<INITIAL>union return T_UNION;
<INITIAL>const return T_CONST;
<INITIAL>__const__ return T_CONST;
<INITIAL>__const return T_CONST;
<INITIAL>volatile return T_VOLATILE;
<INITIAL>__volatile__ return T_VOLATILE;
<INITIAL>__volatile return T_VOLATILE;
<INITIAL>inline return T_INLINE;
<INITIAL>__inline__ return T_INLINE;
<INITIAL>__inline return T_INLINE;
<INITIAL>cdecl return T_CDECL;
<INITIAL>far return T_FAR;
<INITIAL>huge return T_HUGE;
<INITIAL>interrupt return T_INTERRUPT;
<INITIAL>near return T_NEAR;
<INITIAL>pascal return T_PASCAL;
<INITIAL>__extension__ ;
<INITIAL>__attribute__ {
BEGIN EMBEDDED;
}
<EMBEDDED>"(" ++embedded;
<EMBEDDED>")" {
if (--embedded == 0)
BEGIN INITIAL;
}
<EMBEDDED>{ID}|","|{DIGIT}+|{WS} ;
<EMBEDDED>{QUOTED} update_line_num();
<INITIAL>{ID} {
if (enum_state == BRACES) BEGIN SKIP;
yylval.text = strduplicate(yytext);
if (is_typedef_name(yytext))
return T_TYPEDEF_NAME;
else
return T_IDENTIFIER;
}
<INITIAL>"=" BEGIN INIT1;
<INIT1>"{" { curly = 1; BEGIN INIT2; }
<INIT1>[,;] {
unput(yytext[yyleng-1]);
BEGIN INITIAL;
return T_INITIALIZER;
}
<INIT1>{QUOTED} update_line_num();
<INIT1>. ;
<INIT2>"{" ++curly;
<INIT2>"}" {
if (--curly == 0) {
BEGIN INITIAL;
return T_INITIALIZER;
}
}
<INIT2>{QUOTED} update_line_num();
<INIT2>. ;
<INITIAL,SKIP>"{" {
if (enum_state == KEYWORD)
{
enum_state = BRACES;
return '{';
}
else
{
curly = 1;
BEGIN CURLY;
body_start = TRUE; /* Look for first comment
* in the func body.
*/
safe_free(body_comment);
body_comment = NULL;
}
}
<INITIAL,SKIP>"}" {
BEGIN INITIAL; /* stop skipping */
return '}';
}
<CURLY>"{" ++curly;
<CURLY>"}" {
if (--curly == 0) {
BEGIN INITIAL;
return T_BRACES;
}
}
<CURLY,SKIP>{QUOTED} update_line_num();
<CURLY,SKIP>. body_start = FALSE;
<INITIAL>"[" {
new_dynbuf(); add_dynbuf(yytext[0]);
square = 1; BEGIN SQUARE;
}
<SQUARE>"[" { ++square; add_dynbuf(yytext[0]); }
<SQUARE>"]" {
add_dynbuf(yytext[0]);
if (--square == 0) {
BEGIN INITIAL;
yylval.text = return_dynbuf();
return T_BRACKETS;
}
}
<SQUARE>{QUOTED}|. {
int i;
for (i = 0; i < yyleng; ++i)
{
if (yytext[i] == '\n') ++line_num;
add_dynbuf(yytext[i]);
}
}
<INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP,EMBEDDED>^{WS}*"/*"[*=-]*{CWS}+ {
comment_caller = YYSTATE;
start_comment(FALSE);
BEGIN COMMENT; }
<INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP,EMBEDDED>^{WS}*"/*"[*=-]*[^/] {
yyless(yyleng-1);
comment_caller = YYSTATE;
start_comment(FALSE);
BEGIN COMMENT; }
<INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP,EMBEDDED>"/*"[*=-]*{CWS}+ {
comment_caller = YYSTATE;
start_comment(TRUE);
BEGIN COMMENT; }
<INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP,EMBEDDED>"/*"[*=-]*[^/] {
yyless(yyleng-1);
comment_caller = YYSTATE;
start_comment(TRUE);
BEGIN COMMENT; }
<COMMLINE>^{CWS}*"/"+{CWS}* |
<COMMLINE>^{CWS}*"/"*"*"*{CWS}+ BEGIN COMMENT;
<COMMLINE>^{WS}*"/"*"*"*[^/] { yyless(yyleng-1); BEGIN COMMENT; }
<COMMLINE>. { yyless(0); BEGIN COMMENT; }
<COMMLINE>\n newline_comment();
<COMMENT>{WS}*"*"+"/"{WS}*\n{WS}*"/*""*"*{CWS}+ newline_comment();
<COMMENT>{WS}*"*"+"/"{WS}*\n{WS}*"/*""*"*[^/] {
yyless(yyleng-1); newline_comment(); }
<COMMENT>{WS}*[*=-]*"*/"{WS}*$ { int ret = end_comment(TRUE);
BEGIN comment_caller;
if (ret) return ret; }
<COMMENT>{WS}*[*=-]*"*/" { int ret = end_comment(FALSE);
BEGIN comment_caller;
if (ret) return ret; }
<COMMENT>[^*\n ]* |
<COMMENT>{WS}* |
<COMMENT>"*"+[^*/\n]* add_comment(yytext);
<COMMENT>{WS}*\n { newline_comment(); BEGIN COMMLINE; }
<INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP,EMBEDDED>^{WS}*"//"[/*=-]*{WS}* {
comment_caller = YYSTATE;
start_comment(FALSE);
BEGIN CPPCOMMENT; }
<INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP,EMBEDDED>"//"[/*=-]*{WS}* {
comment_caller = YYSTATE;
start_comment(TRUE);
BEGIN CPPCOMMENT; }
<CPPCOMMENT>.* add_comment(yytext);
<CPPCOMMENT>\n{WS}*"//"[/*=-]*{WS}* newline_comment();
<CPPCOMMENT>\n { int ret = end_comment(TRUE);
++line_num;
BEGIN comment_caller;
if (ret) return ret; }
[ \t\f]+ ;
\n ++line_num;
. {
output_error();
fprintf(stderr, "bad character '%c'\n", yytext[0]);
}
%%
/* If the matched text contains any new line characters, then update the
* current line number.
*/
static void
update_line_num ()
{
const char *p = (const char *)yytext;
while (*p != '\0') {
if (*p++ == '\n')
line_num++;
}
}
void start_comment(ateol)
boolean ateol; /* does comment start at end of an existing line? */
{
comment_remember = (look_at_body_start && body_start) ||
((comment_caller == INITIAL || comment_caller == SKIP) &&
(inbasefile || enum_state == BRACES));
if (comment_remember)
{
comment_ateol = ateol;
comment_newlines = 0;
comment_started = FALSE;
new_dynbuf();
}
}
int end_comment(ateol)
boolean ateol; /* does comment end at end of line? */
{
if (comment_remember)
{
if (!ateol) comment_ateol = FALSE;
yylval.text = return_dynbuf();
if (yylval.text[0] == '\0' ||
/* ignore lint directives entirely */
strcmp("EMPTY", yylval.text) == 0 ||
strcmp("FALLTHROUGH", yylval.text) == 0 ||
strcmp("FALLTHRU", yylval.text) == 0 ||
strcmp("LINTED", yylval.text) == 0 ||
strcmp("LINTLIBRARY", yylval.text) == 0 ||
strcmp("LINTSTDLIB", yylval.text) == 0 ||
strcmp("NOTDEFINED", yylval.text) == 0 ||
strcmp("NOTREACHED", yylval.text) == 0 ||
strcmp("NOTUSED", yylval.text) == 0 ||
strncmp("ARGSUSED", yylval.text, 8) == 0 ||
strncmp("PRINTFLIKE", yylval.text, 10) == 0 ||
strncmp("SCANFLIKE", yylval.text, 9) == 0 ||
strncmp("VARARGS", yylval.text, 7) == 0)
{
free(yylval.text);
return 0;
}
if (body_start) { /* first comment at start of func body */
safe_free(body_comment);
body_comment = yylval.text;
body_start = FALSE;
return 0;
}
#ifdef DEBUG
fprintf(stderr,"`%s'\n", yylval.text);
#endif
return comment_ateol ? T_EOLCOMMENT : T_COMMENT;
}
return 0;
}
/* add a newline to the comment, deferring to remove trailing ones */
void newline_comment()
{
++line_num;
if (!comment_remember || !comment_started) return;
comment_newlines++;
}
/* add some true text to the comment */
void add_comment(s)
const char *s;
{
#ifdef DEBUG
fprintf(stderr,"`%s'\n", s);
#endif
if (!comment_remember) return;
comment_started = TRUE;
while (comment_newlines)
{
add_dynbuf('\n');
comment_newlines--;
}
while(*s)
add_dynbuf(*s++);
}
/* Scan rest of preprocessor statement.
*/
static void
get_cpp_directive ()
{
int c, lastc = '\0';
while ((c = input()) > 0) {
switch (c) {
case '\n':
if (lastc != '\\') {
unput(c);
BEGIN INITIAL;
return;
}
line_num++;
break;
case '*':
if (lastc == '/')
{
/* might be able to attach comments to #defines one day */
comment_caller = YYSTATE;
start_comment(TRUE);
BEGIN COMMENT;
}
break;
case '/':
if (lastc == '/')
{
/* might be able to attach comments to #defines one day */
comment_caller = YYSTATE;
start_comment(TRUE);
BEGIN CPPCOMMENT;
}
break;
}
lastc = c;
}
}
/* Process include directive.
*/
static void
do_include (filename, sysinc)
char *filename; /* file name */
int sysinc; /* 1 = do not search current directory */
{
char path[MAX_TEXT_LENGTH];
int i;
FILE *fp;
IncludeStack *sp;
if (inc_depth >= MAX_INC_DEPTH) {
output_error();
fprintf(stderr, "includes too deeply nested\n");
return;
}
for (i = sysinc != 0; i < num_inc_dir; ++i) {
strcpy(path, inc_dir[i]);
strcat(path, filename);
if ((fp = fopen(path, "r")) != NULL) {
sp = inc_stack + inc_depth;
sp->file = cur_file;
sp->line_num = line_num;
#ifdef FLEX_SCANNER
sp->buffer = YY_CURRENT_BUFFER;
yy_switch_to_buffer(yy_create_buffer(fp, YY_BUF_SIZE));
#else
sp->fp = yyin;
yyin = fp;
#endif
++inc_depth;
cur_file = strduplicate(filename);
line_num = 0;
return;
}
}
}
/* returns TRUE if the basefile status has changed */
static boolean process_line_directive(new_file)
const char *new_file;
{
boolean new_stdin;
/* strip leading ./ that Sun acc prepends */
if (!strncmp(new_file,"./",2))
new_file += 2;
new_stdin = new_file[0] == '\0' || !strcmp(new_file,"stdin");
/* return BASEFILE token only when file changes */
if ((cur_file == NULL && !new_stdin) ||
(cur_file != NULL &&strcmp(cur_file, new_file)))
{
safe_free(cur_file);
cur_file = new_stdin ? NULL : strduplicate(new_file);
yylval.boolean = basefile ? !strcmp(cur_file,basefile) :
cur_file == basefile;
return TRUE;
}
return FALSE;
}
/* When the end of the current input file is reached, pop any
* nested includes.
*/
int
yywrap ()
{
IncludeStack *sp;
if (inc_depth > 0) {
--inc_depth;
sp = inc_stack + inc_depth;
fclose(yyin);
#ifdef FLEX_SCANNER
yy_delete_buffer(YY_CURRENT_BUFFER);
yy_switch_to_buffer(sp->buffer);
#else
yyin = sp->fp;
#endif
safe_free(cur_file);
cur_file = sp->file;
line_num = sp->line_num + 1;
return 0;
} else {
return 1;
}
}
static void new_dynbuf()
{
if ((dynbuf = malloc(dynbuf_size = DYNBUF_ALLOC)) == 0)
outmem();
dynbuf_current = 0;
}
static void add_dynbuf(c)
int c;
{
if (dynbuf_current == dynbuf_size &&
((dynbuf = realloc(dynbuf,dynbuf_size += DYNBUF_ALLOC)) == 0))
outmem();
dynbuf[dynbuf_current++] = c;
}
static char *return_dynbuf()
{
add_dynbuf('\0');
/* chop it back to size */
if ((dynbuf = realloc(dynbuf,dynbuf_current)) == 0)
outmem();
return dynbuf;
}
/* Output an error message along with the current line number in the
* source file.
*/
void
output_error ()
{
errors++;
fprintf(stderr, "%s:%d: ", cur_file ? cur_file : "stdin", line_num);
fprintf(stderr, "\n(%s) ", yytext);
}
syntax highlighted by Code2HTML, v. 0.9.1