summaryrefslogtreecommitdiff
path: root/sys/src/cmd/rc/lex.c
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@felloff.net>2021-12-31 15:27:10 +0000
committercinap_lenrek <cinap_lenrek@felloff.net>2021-12-31 15:27:10 +0000
commitb90036a062ca330ac5f667cd1ee503686cbe0b80 (patch)
treeab9538715188d5017843c6d94e2ee4c5e155448a /sys/src/cmd/rc/lex.c
parent855cf4326f5a07d7142c2d8918f5fa856d912b85 (diff)
rc: fix everything
Untangle the lexer and interpreter thread state. Fix the file and line number error reporting, getting rid of Xsrcfile instruction, as the whole code block can only come from a single file, stuff the source file in slot[1] of the code block instead. Remove limitations for globber (path element limits) and be more intelligent about handling globbing by inserting Xglob instruction only when needed and not run it over every Xsimple argument list. Remove fragile ndot magic and make it explicit by adding the -q flag to . builtin command. Add -b flag for full compilation. Make exitnext() smart, so we can speculate thru rcmain and avoid the fork(). Get rid of all print(2) format functions and use io instead. Improve the io library, adding rstr() to handle tokenization, which allows us to look ahead in the already read buffer for the terminators, avoiding alot of string copies. Auto indent pcmd(), to make line number reporting more usefull. Implement here documents properly, so they can work everywhere.
Diffstat (limited to 'sys/src/cmd/rc/lex.c')
-rw-r--r--sys/src/cmd/rc/lex.c230
1 files changed, 130 insertions, 100 deletions
diff --git a/sys/src/cmd/rc/lex.c b/sys/src/cmd/rc/lex.c
index 3462bed69..5e061b989 100644
--- a/sys/src/cmd/rc/lex.c
+++ b/sys/src/cmd/rc/lex.c
@@ -1,9 +1,12 @@
#include "rc.h"
-#include "exec.h"
#include "io.h"
#include "getflags.h"
#include "fns.h"
-int getnext(void);
+
+lexer *lex;
+
+int doprompt = 1;
+int nerror;
int
wordchr(int c)
@@ -21,105 +24,109 @@ idchr(int c)
*/
return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
}
-int future = EOF;
-int doprompt = 1;
-int inquote;
-int incomm;
-int lastc;
-int ndot;
-int nerror;
-int nlexpath;
-int lexpathsz;
-/*
- * Look ahead in the input stream
- */
-
-int
-nextc(void)
+lexer*
+newlexer(io *input, char *file)
{
- if(future==EOF)
- future = getnext();
- return future;
+ lexer *n = new(struct lexer);
+ n->input = input;
+ n->file = file;
+ n->line = 1;
+ n->eof = 0;
+ n->future = EOF;
+ n->peekc = '{';
+ n->epilog = "}\n";
+ n->lastc = 0;
+ n->inquote = 0;
+ n->incomm = 0;
+ n->lastword = 0;
+ n->lastdol = 0;
+ n->iflast = 0;
+ n->qflag = 0;
+ n->tok[0] = 0;
+ return n;
}
-/*
- * Consume the lookahead character.
- */
-int
-advance(void)
+
+void
+freelexer(lexer *p)
{
- int c = nextc();
- lastc = future;
- future = EOF;
- if(c == '\n')
- runq->lexline++;
- return c;
+ closeio(p->input);
+ free(p->file);
+ free(p);
}
+
/*
* read a character from the input stream
*/
-
-int
+static int
getnext(void)
{
int c;
- static int peekc = EOF;
- if(peekc!=EOF){
- c = peekc;
- peekc = EOF;
+
+ if(lex->peekc!=EOF){
+ c = lex->peekc;
+ lex->peekc = EOF;
return c;
}
- if(runq->eof)
+ if(lex->eof){
+epilog:
+ if(*lex->epilog)
+ return *lex->epilog++;
+ doprompt = 1;
return EOF;
+ }
if(doprompt)
pprompt();
- c = rchr(runq->cmdfd);
- if(!inquote && c=='\\'){
- c = rchr(runq->cmdfd);
- if(c=='\n' && !incomm){ /* don't continue a comment */
+ c = rchr(lex->input);
+ if(c=='\\' && !lex->inquote){
+ c = rchr(lex->input);
+ if(c=='\n' && !lex->incomm){ /* don't continue a comment */
doprompt = 1;
c=' ';
}
else{
- peekc = c;
+ lex->peekc = c;
c='\\';
}
}
- doprompt = doprompt || c=='\n' || c==EOF;
- if(c==EOF)
- runq->eof++;
- else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
+ if(c==EOF){
+ lex->eof = 1;
+ goto epilog;
+ } else {
+ if(c=='\n')
+ doprompt = 1;
+ if((!lex->qflag && flag['v']!=0) || flag['V'])
+ pchr(err, c);
+ }
return c;
}
-void
-pprompt(void)
+/*
+ * Look ahead in the input stream
+ */
+static int
+nextc(void)
{
- var *prompt;
- if(runq->iflag){
- pstr(err, promptstr);
- flush(err);
- if(newwdir){
- char dir[4096];
- int fd;
- if((fd=open("/dev/wdir", OWRITE))>=0){
- getwd(dir, sizeof(dir));
- write(fd, dir, strlen(dir));
- close(fd);
- }
- newwdir = 0;
- }
- prompt = vlook("prompt");
- if(prompt->val && prompt->val->next)
- promptstr = prompt->val->next->word;
- else
- promptstr="\t";
- }
- runq->lineno++;
- doprompt = 0;
+ if(lex->future==EOF)
+ lex->future = getnext();
+ return lex->future;
}
-void
+/*
+ * Consume the lookahead character.
+ */
+static int
+advance(void)
+{
+ int c = nextc();
+ lex->lastc = lex->future;
+ lex->future = EOF;
+ if(c == '\n')
+ lex->line++;
+ return c;
+}
+
+static void
skipwhite(void)
{
int c;
@@ -127,11 +134,11 @@ skipwhite(void)
c = nextc();
/* Why did this used to be if(!inquote && c=='#') ?? */
if(c=='#'){
- incomm = 1;
+ lex->incomm = 1;
for(;;){
c = nextc();
if(c=='\n' || c==EOF) {
- incomm = 0;
+ lex->incomm = 0;
break;
}
advance();
@@ -156,7 +163,7 @@ skipnl(void)
}
}
-int
+static int
nextis(int c)
{
if(nextc()==c){
@@ -166,12 +173,12 @@ nextis(int c)
return 0;
}
-char*
+static char*
addtok(char *p, int val)
{
if(p==0)
return 0;
- if(p==&tok[NTOK-1]){
+ if(p==&lex->tok[NTOK-1]){
*p = 0;
yyerror("token buffer too short");
return 0;
@@ -180,7 +187,7 @@ addtok(char *p, int val)
return p;
}
-char*
+static char*
addutf(char *p, int c)
{
uchar b, m;
@@ -202,16 +209,16 @@ addutf(char *p, int c)
return p;
}
-int lastdol; /* was the last token read '$' or '$#' or '"'? */
-int lastword; /* was the last token read a word or compound word terminator? */
-
int
yylex(void)
{
- int c, d = nextc();
+ int glob, c, d = nextc();
+ char *tok = lex->tok;
char *w = tok;
- struct tree *t;
+ tree *t;
+
yylval.tree = 0;
+
/*
* Embarassing sneakiness: if the last token read was a quoted or unquoted
* WORD then we alter the meaning of what follows. If the next character
@@ -219,8 +226,8 @@ yylex(void)
* if the next character is the first character of a simple or compound word,
* we insert a `^' before it.
*/
- if(lastword){
- lastword = 0;
+ if(lex->lastword){
+ lex->lastword = 0;
if(d=='('){
advance();
strcpy(tok, "( [SUB]");
@@ -231,15 +238,15 @@ yylex(void)
return '^';
}
}
- inquote = 0;
+ lex->inquote = 0;
skipwhite();
switch(c = advance()){
case EOF:
- lastdol = 0;
+ lex->lastdol = 0;
strcpy(tok, "EOF");
return EOF;
case '$':
- lastdol = 1;
+ lex->lastdol = 1;
if(nextis('#')){
strcpy(tok, "$#");
return COUNT;
@@ -251,7 +258,7 @@ yylex(void)
strcpy(tok, "$");
return '$';
case '&':
- lastdol = 0;
+ lex->lastdol = 0;
if(nextis('&')){
skipnl();
strcpy(tok, "&&");
@@ -260,7 +267,7 @@ yylex(void)
strcpy(tok, "&");
return '&';
case '|':
- lastdol = 0;
+ lex->lastdol = 0;
if(nextis(c)){
skipnl();
strcpy(tok, "||");
@@ -268,7 +275,7 @@ yylex(void)
}
case '<':
case '>':
- lastdol = 0;
+ lex->lastdol = 0;
/*
* funny redirection tokens:
* redir: arrow | arrow '[' fd ']'
@@ -355,9 +362,9 @@ yylex(void)
skipnl();
return t->type;
case '\'':
- lastdol = 0;
- lastword = 1;
- inquote = 1;
+ lex->lastdol = 0;
+ lex->lastword = 1;
+ lex->inquote = 1;
for(;;){
c = advance();
if(c==EOF)
@@ -377,28 +384,51 @@ yylex(void)
return t->type;
}
if(!wordchr(c)){
- lastdol = 0;
+ lex->lastdol = 0;
tok[0] = c;
tok[1]='\0';
return c;
}
+ glob = 0;
for(;;){
- if(c=='*' || c=='[' || c=='?' || c==GLOB)
+ if(c=='*' || c=='[' || c=='?' || c==GLOB){
+ glob = 1;
w = addtok(w, GLOB);
+ }
w = addutf(w, c);
c = nextc();
- if(lastdol?!idchr(c):!wordchr(c)) break;
+ if(lex->lastdol?!idchr(c):!wordchr(c)) break;
advance();
}
- lastword = 1;
- lastdol = 0;
+ lex->lastword = 1;
+ lex->lastdol = 0;
if(w!=0)
*w='\0';
t = klook(tok);
if(t->type!=WORD)
- lastword = 0;
+ lex->lastword = 0;
+ else
+ t->glob = glob;
t->quoted = 0;
yylval.tree = t;
return t->type;
}
+
+void
+yyerror(char *m)
+{
+ pfln(err, lex->file, lex->line);
+ pstr(err, ": ");
+ if(lex->tok[0] && lex->tok[0]!='\n')
+ pfmt(err, "token %q: ", lex->tok);
+ pfmt(err, "%s\n", m);
+ flushio(err);
+
+ lex->lastword = 0;
+ lex->lastdol = 0;
+ while(lex->lastc!='\n' && lex->lastc!=EOF) advance();
+ nerror++;
+
+ setstatus(m);
+}