ports//devel/plan9port/work/plan9/9pm/src/cmd/rcsh/lex.c

#include "rc.h"
#include "y.tab.h"

#define	NTOK	8192

int getnext(void);

int	future=EOF;
int	doprompt=1;
int	inquote;
int	nerror;
char	*promptstr;

char	tok[NTOK];

int	lastdol;	/* was the last token read '$' or '$#' or '"'? */
int	lastword;	/* was the last token read a word or compound word terminator? */
int	lastc;

void
kinit(void)
{
	kenter(FOR, "for");
	kenter(IN, "in");
	kenter(WHILE, "while");
	kenter(IF, "if");
	kenter(NOT, "not");
	kenter(TWIDDLE, "~");
	kenter(BANG, "!");
	kenter(SUBSHELL, "@");
	kenter(SWITCH, "switch");
	kenter(FN, "fn");
}

int
wordchr(int c)
{
	return !strchr("\n \t\r#;&|^$=`'{}()<>", c) && c!=EOF;
}

int
idchr(int c)
{
	/*
	 * Formerly:
	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
	 *	|| c=='_' || c=='*';
	 */
	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
}

/*
 * Look ahead in the input stream
 */
int
nextc(void)
{
	if(future==EOF)
		future=getnext();
	return future;
}

/*
 * Consume the lookahead character.
 */
int
advance(void)
{
	int c=nextc();
	lastc=future;
	future=EOF;
	return c;
}

/*
 * read a character from the input stream
 */	
int
getnext(void)
{
	register int c;
	static peekc=EOF;
	if(peekc!=EOF){
		c=peekc;
		peekc=EOF;
		return c;
	}
	if(runq->eof) return EOF;
	if(doprompt)
		pprompt();
	c=rchr(runq->cmdfd);
	if(!inquote && c=='\\'){
		c=rchr(runq->cmdfd);
		if(c=='\n'){
			doprompt=1;
			c=' ';
		}
		else{
			peekc=c;
			c='\\';
		}
	}
	doprompt=doprompt || c=='\n' || c==EOF;
	if(c==EOF) runq->eof++;
	else if(flag['V'] || ndot>=2 && flag['v'])
		pchr(err, c);
	return c;
}

void
pprompt(void)
{
	Var *prompt;

	if(runq->iflag){
		pstr(err, promptstr);
		flush(err);
		prompt=vlook("prompt");
		if(prompt->val && prompt->val->next)
			promptstr=prompt->val->next->word;
		else
			promptstr="\t";
	}
	runq->lineno++;
	doprompt=0;
}

void
skipwhite(void)
{
	int c;
	for(;;){
		c=nextc();
		if(c=='#'){	/* Why did this used to be  if(!inquote && c=='#') ?? */
			for(;;){
				c=nextc();
				if(c=='\n' || c==EOF) break;
				advance();
			}
		}
		if(c==' ' || c=='\t' || c=='\r') advance();
		else return;
	}
}

void
skipnl(void)
{
	int c;
	for(;;){
		skipwhite();
		c=nextc();
		if(c!='\n') return;
		advance();
	}
}

int
nextis(int c)
{
	if(nextc()==c){
		advance();
		return 1;
	}
	return 0;
}

char *
addtok(char *p, int val)
{
	if(p==0) return 0;
	if(p==&tok[NTOK]){
		*p=0;
		yyerror("token buffer too short");
		return 0;
	}
	*p++=val;
	return p;
}

char *
addutf(char *p, int c)
{
	p=addtok(p, c);
	if(twobyte(c))	 /* 2-byte escape */
		return addtok(p, advance());
	if(threebyte(c)){	/* 3-byte escape */
		p=addtok(p, advance());
		return addtok(p, advance());
	}
	return p;
}

int
yylex(void)
{
	int c, d=nextc();
	char *w=tok;
	Tree *t;

	yylval.tree=0;
	/*
	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
	 * WORD then we alter the meaning of what follows.  If the next character
	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
	 * if the next character is the first character of a simple or compound word,
	 * we insert a `^' before it.
	 */
	if(lastword){
		lastword=0;
		if(d=='('){
			advance();
			strcpy(tok, "( [SUB]");
			return SUB;
		}
		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
			strcpy(tok, "^");
			return '^';
		}
	}
	inquote=0;
	skipwhite();
	switch(c=advance()){
	case EOF:
		lastdol=0;
		strcpy(tok, "EOF");
		return EOF;
	case '$':
		lastdol=1;
		if(nextis('#')){
			strcpy(tok, "$#");
			return COUNT;
		}
		if(nextis('"')){
			strcpy(tok, "$\"");
			return '"';
		}
		strcpy(tok, "$");
		return '$';
	case '&':
		lastdol=0;
		if(nextis('&')){
			skipnl();
			strcpy(tok, "&&");
			return ANDAND;
		}
		strcpy(tok, "&");
		return '&';
	case '|':
		lastdol=0;
		if(nextis(c)){
			skipnl();
			strcpy(tok, "||");
			return OROR;
		}
	case '<':
	case '>':
		lastdol=0;
		/*
		 * funny redirection tokens:
		 *	redir:	arrow | arrow '[' fd ']'
		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
		 *	fd:	digit | digit '=' | digit '=' digit
		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
		 * some possibilities are nonsensical and get a message.
		 */
		*w++=c;
		t=newtree();
		switch(c){
		case '|':
			t->type=PIPE;
			t->fd0=1;
			t->fd1=0;
			break;
		case '>':
			t->type=REDIR;
			if(nextis(c)){
				t->rtype=APPEND;
				*w++=c;
			}
			else t->rtype=WRITE;
			t->fd0=1;
			break;
		case '<':
			t->type=REDIR;
			if(nextis(c)){
				t->rtype=HERE;
				*w++=c;
			}
			else t->rtype=READ;
			t->fd0=0;
			break;
		}
		if(nextis('[')){
			*w++='[';
			c=advance();
			*w++=c;
			if(c<'0' || '9'<c){
			RedirErr:
				*w=0;
				yyerror(t->type==PIPE?"pipe syntax"
						:"redirection syntax");
				return EOF;
			}
			t->fd0=0;
			do{
				t->fd0=t->fd0*10+c-'0';
				*w++=c;
				c=advance();
			}while('0'<=c && c<='9');
			if(c=='='){
				*w++='=';
				if(t->type==REDIR)
					t->type=DUP;
				c=advance();
				if('0'<=c && c<='9'){
					t->rtype=DUPFD;
					t->fd1=t->fd0;
					t->fd0=0;
					do{
						t->fd0=t->fd0*10+c-'0';
						*w++=c;
						c=advance();
					}while('0'<=c && c<='9');
				}
				else{
					if(t->type==PIPE) goto RedirErr;
					t->rtype=CLOSE;
				}
			}
			if(c!=']' || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
				goto RedirErr;
			*w++=']';
		}
		*w='\0';
		yylval.tree=t;
		if(t->type==PIPE) skipnl();
		return t->type;
	case '\'':
		lastdol=0;
		lastword=1;
		inquote=1;
		for(;;){
			c=advance();
			if(c==EOF) break;
			if(c=='\''){
				if(nextc()!='\'')
					break;
				advance();
			}
			w=addutf(w, c);
		}
		if(w!=0) *w='\0';
		t=token(tok, WORD);
		t->quoted=1;
		yylval.tree=t;
		return t->type;
	}
	if(!wordchr(c)){
		lastdol=0;
		tok[0]=c;
		tok[1]='\0';
		return c;
	}
	for(;;){
		/* next line should have (char)c==GLOB, but ken's compiler is broken */
		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
			w=addtok(w, GLOB);
		w=addutf(w, c);
		c=nextc();
		if(lastdol?!idchr(c):!wordchr(c)) break;
		advance();
	}
Out:
	lastword=1;
	lastdol=0;
	if(w!=0) *w='\0';
	t=klook(tok);
	if(t->type!=WORD) lastword=0;
	t->quoted=0;
	yylval.tree=t;
	return t->type;
}

void 
yyerror(char *m)
{
	pfmt(err, "rcsh: ");
	if(runq->cmdfile) pfmt(err, "file %s: ", runq->cmdfile);
	if(!runq->iflag) pfmt(err, "line %d: ", runq->lineno);
	if(tok[0] && tok[0]!='\n') pfmt(err, "token %q: ", tok);
	pfmt(err, "%s\n", m);
	flush(err);
	lastword=0;
	lastdol=0;
	while(lastc!='\n' && lastc!=EOF) advance();
	nerror++;
}
syntax highlighted by Code2HTML, v. 0.9.1