rc: fix everything

Untangle the lexer and interpreter thread state. Fix the file and line number error reporting, getting rid of Xsrcfile instruction, as the whole code block can only come from a single file, stuff the source file in slot[1] of the code block instead. Remove limitations for globber (path element limits) and be more intelligent about handling globbing by inserting Xglob instruction only when needed and not run it over every Xsimple argument list. Remove fragile ndot magic and make it explicit by adding the -q flag to . builtin command. Add -b flag for full compilation. Make exitnext() smart, so we can speculate thru rcmain and avoid the fork(). Get rid of all print(2) format functions and use io instead. Improve the io library, adding rstr() to handle tokenization, which allows us to look ahead in the already read buffer for the terminators, avoiding alot of string copies. Auto indent pcmd(), to make line number reporting more usefull. Implement here documents properly, so they can work everywhere.
author: cinap_lenrek <cinap_lenrek@felloff.net> 2021-12-31 15:27:10 +0000
committer: cinap_lenrek <cinap_lenrek@felloff.net> 2021-12-31 15:27:10 +0000
commit: b90036a062ca330ac5f667cd1ee503686cbe0b80 (patch)
tree: ab9538715188d5017843c6d94e2ee4c5e155448a /sys/src/cmd/rc/lex.c
parent: 855cf4326f5a07d7142c2d8918f5fa856d912b85 (diff)
1 files changed, 130 insertions, 100 deletions
diff --git a/sys/src/cmd/rc/lex.c b/sys/src/cmd/rc/lex.c
index 3462bed69..5e061b989 100644
--- a/sys/src/cmd/rc/lex.c
+++ b/sys/src/cmd/rc/lex.c
@@ -1,9 +1,12 @@
 #include "rc.h"
-#include "exec.h"
 #include "io.h"
 #include "getflags.h"
 #include "fns.h"
-int getnext(void);
+
+lexer *lex;
+
+int doprompt = 1;
+int nerror;
 
 int
 wordchr(int c)
@@ -21,105 +24,109 @@ idchr(int c)
 	 */
 	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
 }
-int future = EOF;
-int doprompt = 1;
-int inquote;
-int incomm;
-int lastc;
-int ndot;
-int nerror;
-int nlexpath;
-int lexpathsz;
 
-/*
- * Look ahead in the input stream
- */
-
-int
-nextc(void)
+lexer*
+newlexer(io *input, char *file)
 {
-	if(future==EOF)
-		future = getnext();
-	return future;
+	lexer *n = new(struct lexer);
+	n->input = input;
+	n->file = file;
+	n->line = 1;
+	n->eof = 0;
+	n->future = EOF;
+	n->peekc = '{';
+	n->epilog = "}\n";
+	n->lastc = 0;
+	n->inquote = 0;
+	n->incomm = 0;
+	n->lastword = 0;
+	n->lastdol = 0;
+	n->iflast = 0;
+	n->qflag = 0;
+	n->tok[0] = 0;
+	return n;
 }
-/*
- * Consume the lookahead character.
- */
-int
-advance(void)
+
+void
+freelexer(lexer *p)
 {
-	int c = nextc();
-	lastc = future;
-	future = EOF;
-	if(c == '\n')
-		runq->lexline++;
-	return c;
+	closeio(p->input);
+	free(p->file);
+	free(p);
 }
+
 /*
  * read a character from the input stream
  */	
-
-int
+static int
 getnext(void)
 {
 	int c;
-	static int peekc = EOF;
-	if(peekc!=EOF){
-		c = peekc;
-		peekc = EOF;
+
+	if(lex->peekc!=EOF){
+		c = lex->peekc;
+		lex->peekc = EOF;
 		return c;
 	}
-	if(runq->eof)
+	if(lex->eof){
+epilog:
+		if(*lex->epilog)
+			return *lex->epilog++;
+		doprompt = 1;
 		return EOF;
+	}
 	if(doprompt)
 		pprompt();
-	c = rchr(runq->cmdfd);
-	if(!inquote && c=='\\'){
-		c = rchr(runq->cmdfd);
-		if(c=='\n' && !incomm){		/* don't continue a comment */
+	c = rchr(lex->input);
+	if(c=='\\' && !lex->inquote){
+		c = rchr(lex->input);
+		if(c=='\n' && !lex->incomm){		/* don't continue a comment */
 			doprompt = 1;
 			c=' ';
 		}
 		else{
-			peekc = c;
+			lex->peekc = c;
 			c='\\';
 		}
 	}
-	doprompt = doprompt || c=='\n' || c==EOF;
-	if(c==EOF)
-		runq->eof++;
-	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
+	if(c==EOF){
+		lex->eof = 1;
+		goto epilog;
+	} else {
+		if(c=='\n')
+			doprompt = 1;
+		if((!lex->qflag && flag['v']!=0) || flag['V'])
+			pchr(err, c);
+	}
 	return c;
 }
 
-void
-pprompt(void)
+/*
+ * Look ahead in the input stream
+ */
+static int
+nextc(void)
 {
-	var *prompt;
-	if(runq->iflag){
-		pstr(err, promptstr);
-		flush(err);
-		if(newwdir){
-			char dir[4096];
-			int fd;
-			if((fd=open("/dev/wdir", OWRITE))>=0){
-				getwd(dir, sizeof(dir));
-				write(fd, dir, strlen(dir));
-				close(fd);
-			}
-			newwdir = 0;
-		}
-		prompt = vlook("prompt");
-		if(prompt->val && prompt->val->next)
-			promptstr = prompt->val->next->word;
-		else
-			promptstr="\t";
-	}
-	runq->lineno++;
-	doprompt = 0;
+	if(lex->future==EOF)
+		lex->future = getnext();
+	return lex->future;
 }
 
-void
+/*
+ * Consume the lookahead character.
+ */
+static int
+advance(void)
+{
+	int c = nextc();
+	lex->lastc = lex->future;
+	lex->future = EOF;
+	if(c == '\n')
+		lex->line++;
+	return c;
+}
+
+static void
 skipwhite(void)
 {
 	int c;
@@ -127,11 +134,11 @@ skipwhite(void)
 		c = nextc();
 		/* Why did this used to be  if(!inquote && c=='#') ?? */
 		if(c=='#'){
-			incomm = 1;
+			lex->incomm = 1;
 			for(;;){
 				c = nextc();
 				if(c=='\n' || c==EOF) {
-					incomm = 0;
+					lex->incomm = 0;
 					break;
 				}
 				advance();
@@ -156,7 +163,7 @@ skipnl(void)
 	}
 }
 
-int
+static int
 nextis(int c)
 {
 	if(nextc()==c){
@@ -166,12 +173,12 @@ nextis(int c)
 	return 0;
 }
 
-char*
+static char*
 addtok(char *p, int val)
 {
 	if(p==0)
 		return 0;
-	if(p==&tok[NTOK-1]){
+	if(p==&lex->tok[NTOK-1]){
 		*p = 0;
 		yyerror("token buffer too short");
 		return 0;
@@ -180,7 +187,7 @@ addtok(char *p, int val)
 	return p;
 }
 
-char*
+static char*
 addutf(char *p, int c)
 {
 	uchar b, m;
@@ -202,16 +209,16 @@ addutf(char *p, int c)
 	return p;
 }
 
-int lastdol;	/* was the last token read '$' or '$#' or '"'? */
-int lastword;	/* was the last token read a word or compound word terminator? */
-
 int
 yylex(void)
 {
-	int c, d = nextc();
+	int glob, c, d = nextc();
+	char *tok = lex->tok;
 	char *w = tok;
-	struct tree *t;
+	tree *t;
+
 	yylval.tree = 0;
+
 	/*
 	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
 	 * WORD then we alter the meaning of what follows.  If the next character
@@ -219,8 +226,8 @@ yylex(void)
 	 * if the next character is the first character of a simple or compound word,
 	 * we insert a `^' before it.
 	 */
-	if(lastword){
-		lastword = 0;
+	if(lex->lastword){
+		lex->lastword = 0;
 		if(d=='('){
 			advance();
 			strcpy(tok, "( [SUB]");
@@ -231,15 +238,15 @@ yylex(void)
 			return '^';
 		}
 	}
-	inquote = 0;
+	lex->inquote = 0;
 	skipwhite();
 	switch(c = advance()){
 	case EOF:
-		lastdol = 0;
+		lex->lastdol = 0;
 		strcpy(tok, "EOF");
 		return EOF;
 	case '$':
-		lastdol = 1;
+		lex->lastdol = 1;
 		if(nextis('#')){
 			strcpy(tok, "$#");
 			return COUNT;
@@ -251,7 +258,7 @@ yylex(void)
 		strcpy(tok, "$");
 		return '$';
 	case '&':
-		lastdol = 0;
+		lex->lastdol = 0;
 		if(nextis('&')){
 			skipnl();
 			strcpy(tok, "&&");
@@ -260,7 +267,7 @@ yylex(void)
 		strcpy(tok, "&");
 		return '&';
 	case '|':
-		lastdol = 0;
+		lex->lastdol = 0;
 		if(nextis(c)){
 			skipnl();
 			strcpy(tok, "||");
@@ -268,7 +275,7 @@ yylex(void)
 		}
 	case '<':
 	case '>':
-		lastdol = 0;
+		lex->lastdol = 0;
 		/*
 		 * funny redirection tokens:
 		 *	redir:	arrow | arrow '[' fd ']'
@@ -355,9 +362,9 @@ yylex(void)
 			skipnl();
 		return t->type;
 	case '\'':
-		lastdol = 0;
-		lastword = 1;
-		inquote = 1;
+		lex->lastdol = 0;
+		lex->lastword = 1;
+		lex->inquote = 1;
 		for(;;){
 			c = advance();
 			if(c==EOF)
@@ -377,28 +384,51 @@ yylex(void)
 		return t->type;
 	}
 	if(!wordchr(c)){
-		lastdol = 0;
+		lex->lastdol = 0;
 		tok[0] = c;
 		tok[1]='\0';
 		return c;
 	}
+	glob = 0;
 	for(;;){
-		if(c=='*' || c=='[' || c=='?' || c==GLOB)
+		if(c=='*' || c=='[' || c=='?' || c==GLOB){
+			glob = 1;
 			w = addtok(w, GLOB);
+		}
 		w = addutf(w, c);
 		c = nextc();
-		if(lastdol?!idchr(c):!wordchr(c)) break;
+		if(lex->lastdol?!idchr(c):!wordchr(c)) break;
 		advance();
 	}
 
-	lastword = 1;
-	lastdol = 0;
+	lex->lastword = 1;
+	lex->lastdol = 0;
 	if(w!=0)
 		*w='\0';
 	t = klook(tok);
 	if(t->type!=WORD)
-		lastword = 0;
+		lex->lastword = 0;
+	else
+		t->glob = glob;
 	t->quoted = 0;
 	yylval.tree = t;
 	return t->type;
 }
+
+void
+yyerror(char *m)
+{
+	pfln(err, lex->file, lex->line);
+	pstr(err, ": ");
+	if(lex->tok[0] && lex->tok[0]!='\n')
+		pfmt(err, "token %q: ", lex->tok);
+	pfmt(err, "%s\n", m);
+	flushio(err);
+
+	lex->lastword = 0;
+	lex->lastdol = 0;
+	while(lex->lastc!='\n' && lex->lastc!=EOF) advance();
+	nerror++;
+
+	setstatus(m);
+}
author	cinap_lenrek <cinap_lenrek@felloff.net>	2021-12-31 15:27:10 +0000
committer	cinap_lenrek <cinap_lenrek@felloff.net>	2021-12-31 15:27:10 +0000
commit	b90036a062ca330ac5f667cd1ee503686cbe0b80 (patch)
tree	ab9538715188d5017843c6d94e2ee4c5e155448a /sys/src/cmd/rc/lex.c
parent	855cf4326f5a07d7142c2d8918f5fa856d912b85 (diff)