diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2021-12-31 15:27:10 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2021-12-31 15:27:10 +0000 |
commit | b90036a062ca330ac5f667cd1ee503686cbe0b80 (patch) | |
tree | ab9538715188d5017843c6d94e2ee4c5e155448a /sys/src/cmd/rc/lex.c | |
parent | 855cf4326f5a07d7142c2d8918f5fa856d912b85 (diff) |
rc: fix everything
Untangle the lexer and interpreter thread state.
Fix the file and line number error reporting, getting rid of
Xsrcfile instruction, as the whole code block can only come
from a single file, stuff the source file in slot[1] of the
code block instead.
Remove limitations for globber (path element limits)
and be more intelligent about handling globbing by
inserting Xglob instruction only when needed and not
run it over every Xsimple argument list.
Remove fragile ndot magic and make it explicit by adding
the -q flag to . builtin command.
Add -b flag for full compilation.
Make exitnext() smart, so we can speculate thru rcmain and
avoid the fork().
Get rid of all print(2) format functions and use io
instead.
Improve the io library, adding rstr() to handle tokenization,
which allows us to look ahead in the already read buffer
for the terminators, avoiding alot of string copies.
Auto indent pcmd(), to make line number reporting more usefull.
Implement here documents properly, so they can work everywhere.
Diffstat (limited to 'sys/src/cmd/rc/lex.c')
-rw-r--r-- | sys/src/cmd/rc/lex.c | 230 |
1 files changed, 130 insertions, 100 deletions
diff --git a/sys/src/cmd/rc/lex.c b/sys/src/cmd/rc/lex.c index 3462bed69..5e061b989 100644 --- a/sys/src/cmd/rc/lex.c +++ b/sys/src/cmd/rc/lex.c @@ -1,9 +1,12 @@ #include "rc.h" -#include "exec.h" #include "io.h" #include "getflags.h" #include "fns.h" -int getnext(void); + +lexer *lex; + +int doprompt = 1; +int nerror; int wordchr(int c) @@ -21,105 +24,109 @@ idchr(int c) */ return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); } -int future = EOF; -int doprompt = 1; -int inquote; -int incomm; -int lastc; -int ndot; -int nerror; -int nlexpath; -int lexpathsz; -/* - * Look ahead in the input stream - */ - -int -nextc(void) +lexer* +newlexer(io *input, char *file) { - if(future==EOF) - future = getnext(); - return future; + lexer *n = new(struct lexer); + n->input = input; + n->file = file; + n->line = 1; + n->eof = 0; + n->future = EOF; + n->peekc = '{'; + n->epilog = "}\n"; + n->lastc = 0; + n->inquote = 0; + n->incomm = 0; + n->lastword = 0; + n->lastdol = 0; + n->iflast = 0; + n->qflag = 0; + n->tok[0] = 0; + return n; } -/* - * Consume the lookahead character. - */ -int -advance(void) + +void +freelexer(lexer *p) { - int c = nextc(); - lastc = future; - future = EOF; - if(c == '\n') - runq->lexline++; - return c; + closeio(p->input); + free(p->file); + free(p); } + /* * read a character from the input stream */ - -int +static int getnext(void) { int c; - static int peekc = EOF; - if(peekc!=EOF){ - c = peekc; - peekc = EOF; + + if(lex->peekc!=EOF){ + c = lex->peekc; + lex->peekc = EOF; return c; } - if(runq->eof) + if(lex->eof){ +epilog: + if(*lex->epilog) + return *lex->epilog++; + doprompt = 1; return EOF; + } if(doprompt) pprompt(); - c = rchr(runq->cmdfd); - if(!inquote && c=='\\'){ - c = rchr(runq->cmdfd); - if(c=='\n' && !incomm){ /* don't continue a comment */ + c = rchr(lex->input); + if(c=='\\' && !lex->inquote){ + c = rchr(lex->input); + if(c=='\n' && !lex->incomm){ /* don't continue a comment */ doprompt = 1; c=' '; } else{ - peekc = c; + lex->peekc = c; c='\\'; } } - doprompt = doprompt || c=='\n' || c==EOF; - if(c==EOF) - runq->eof++; - else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); + if(c==EOF){ + lex->eof = 1; + goto epilog; + } else { + if(c=='\n') + doprompt = 1; + if((!lex->qflag && flag['v']!=0) || flag['V']) + pchr(err, c); + } return c; } -void -pprompt(void) +/* + * Look ahead in the input stream + */ +static int +nextc(void) { - var *prompt; - if(runq->iflag){ - pstr(err, promptstr); - flush(err); - if(newwdir){ - char dir[4096]; - int fd; - if((fd=open("/dev/wdir", OWRITE))>=0){ - getwd(dir, sizeof(dir)); - write(fd, dir, strlen(dir)); - close(fd); - } - newwdir = 0; - } - prompt = vlook("prompt"); - if(prompt->val && prompt->val->next) - promptstr = prompt->val->next->word; - else - promptstr="\t"; - } - runq->lineno++; - doprompt = 0; + if(lex->future==EOF) + lex->future = getnext(); + return lex->future; } -void +/* + * Consume the lookahead character. + */ +static int +advance(void) +{ + int c = nextc(); + lex->lastc = lex->future; + lex->future = EOF; + if(c == '\n') + lex->line++; + return c; +} + +static void skipwhite(void) { int c; @@ -127,11 +134,11 @@ skipwhite(void) c = nextc(); /* Why did this used to be if(!inquote && c=='#') ?? */ if(c=='#'){ - incomm = 1; + lex->incomm = 1; for(;;){ c = nextc(); if(c=='\n' || c==EOF) { - incomm = 0; + lex->incomm = 0; break; } advance(); @@ -156,7 +163,7 @@ skipnl(void) } } -int +static int nextis(int c) { if(nextc()==c){ @@ -166,12 +173,12 @@ nextis(int c) return 0; } -char* +static char* addtok(char *p, int val) { if(p==0) return 0; - if(p==&tok[NTOK-1]){ + if(p==&lex->tok[NTOK-1]){ *p = 0; yyerror("token buffer too short"); return 0; @@ -180,7 +187,7 @@ addtok(char *p, int val) return p; } -char* +static char* addutf(char *p, int c) { uchar b, m; @@ -202,16 +209,16 @@ addutf(char *p, int c) return p; } -int lastdol; /* was the last token read '$' or '$#' or '"'? */ -int lastword; /* was the last token read a word or compound word terminator? */ - int yylex(void) { - int c, d = nextc(); + int glob, c, d = nextc(); + char *tok = lex->tok; char *w = tok; - struct tree *t; + tree *t; + yylval.tree = 0; + /* * Embarassing sneakiness: if the last token read was a quoted or unquoted * WORD then we alter the meaning of what follows. If the next character @@ -219,8 +226,8 @@ yylex(void) * if the next character is the first character of a simple or compound word, * we insert a `^' before it. */ - if(lastword){ - lastword = 0; + if(lex->lastword){ + lex->lastword = 0; if(d=='('){ advance(); strcpy(tok, "( [SUB]"); @@ -231,15 +238,15 @@ yylex(void) return '^'; } } - inquote = 0; + lex->inquote = 0; skipwhite(); switch(c = advance()){ case EOF: - lastdol = 0; + lex->lastdol = 0; strcpy(tok, "EOF"); return EOF; case '$': - lastdol = 1; + lex->lastdol = 1; if(nextis('#')){ strcpy(tok, "$#"); return COUNT; @@ -251,7 +258,7 @@ yylex(void) strcpy(tok, "$"); return '$'; case '&': - lastdol = 0; + lex->lastdol = 0; if(nextis('&')){ skipnl(); strcpy(tok, "&&"); @@ -260,7 +267,7 @@ yylex(void) strcpy(tok, "&"); return '&'; case '|': - lastdol = 0; + lex->lastdol = 0; if(nextis(c)){ skipnl(); strcpy(tok, "||"); @@ -268,7 +275,7 @@ yylex(void) } case '<': case '>': - lastdol = 0; + lex->lastdol = 0; /* * funny redirection tokens: * redir: arrow | arrow '[' fd ']' @@ -355,9 +362,9 @@ yylex(void) skipnl(); return t->type; case '\'': - lastdol = 0; - lastword = 1; - inquote = 1; + lex->lastdol = 0; + lex->lastword = 1; + lex->inquote = 1; for(;;){ c = advance(); if(c==EOF) @@ -377,28 +384,51 @@ yylex(void) return t->type; } if(!wordchr(c)){ - lastdol = 0; + lex->lastdol = 0; tok[0] = c; tok[1]='\0'; return c; } + glob = 0; for(;;){ - if(c=='*' || c=='[' || c=='?' || c==GLOB) + if(c=='*' || c=='[' || c=='?' || c==GLOB){ + glob = 1; w = addtok(w, GLOB); + } w = addutf(w, c); c = nextc(); - if(lastdol?!idchr(c):!wordchr(c)) break; + if(lex->lastdol?!idchr(c):!wordchr(c)) break; advance(); } - lastword = 1; - lastdol = 0; + lex->lastword = 1; + lex->lastdol = 0; if(w!=0) *w='\0'; t = klook(tok); if(t->type!=WORD) - lastword = 0; + lex->lastword = 0; + else + t->glob = glob; t->quoted = 0; yylval.tree = t; return t->type; } + +void +yyerror(char *m) +{ + pfln(err, lex->file, lex->line); + pstr(err, ": "); + if(lex->tok[0] && lex->tok[0]!='\n') + pfmt(err, "token %q: ", lex->tok); + pfmt(err, "%s\n", m); + flushio(err); + + lex->lastword = 0; + lex->lastdol = 0; + while(lex->lastc!='\n' && lex->lastc!=EOF) advance(); + nerror++; + + setstatus(m); +} |