diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/lex/sub2.c |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/lex/sub2.c')
-rwxr-xr-x | sys/src/cmd/lex/sub2.c | 851 |
1 files changed, 851 insertions, 0 deletions
diff --git a/sys/src/cmd/lex/sub2.c b/sys/src/cmd/lex/sub2.c new file mode 100755 index 000000000..673fccb75 --- /dev/null +++ b/sys/src/cmd/lex/sub2.c @@ -0,0 +1,851 @@ +# include "ldefs.h" +void +cfoll(int v) +{ + int i,j,k; + uchar *p; + i = name[v]; + if(i < NCH) i = 1; /* character */ + switch(i){ + case 1: case RSTR: case RCCL: case RNCCL: case RNULLS: + for(j=0;j<tptr;j++) + tmpstat[j] = FALSE; + count = 0; + follow(v); +# ifdef PP + padd(foll,v); /* packing version */ +# endif +# ifndef PP + add(foll,v); /* no packing version */ +# endif + if(i == RSTR) cfoll(left[v]); + else if(i == RCCL || i == RNCCL){ /* compress ccl list */ + for(j=1; j<NCH;j++) + symbol[j] = (i==RNCCL); + p = ptr[v]; + while(*p) + symbol[*p++] = (i == RCCL); + p = pcptr; + for(j=1;j<NCH;j++) + if(symbol[j]){ + for(k=0;p+k < pcptr; k++) + if(cindex[j] == *(p+k)) + break; + if(p+k >= pcptr)*pcptr++ = cindex[j]; + } + *pcptr++ = 0; + if(pcptr > pchar + pchlen) + error("Too many packed character classes"); + ptr[v] = p; + name[v] = RCCL; /* RNCCL eliminated */ +# ifdef DEBUG + if(debug && *p){ + print("ccl %d: %d",v,*p++); + while(*p) + print(", %d",*p++); + print("\n"); + } +# endif + } + break; + case CARAT: + cfoll(left[v]); + break; + case STAR: case PLUS: case QUEST: case RSCON: + cfoll(left[v]); + break; + case BAR: case RCAT: case DIV: case RNEWE: + cfoll(left[v]); + cfoll(right[v]); + break; +# ifdef DEBUG + case FINAL: + case S1FINAL: + case S2FINAL: + break; + default: + warning("bad switch cfoll %d",v); +# endif + } +} + +# ifdef DEBUG +void +pfoll(void) + { + int i,k,*p; + int j; + /* print sets of chars which may follow positions */ + print("pos\tchars\n"); + for(i=0;i<tptr;i++) + if(p=foll[i]){ + j = *p++; + if(j >= 1){ + print("%d:\t%d",i,*p++); + for(k=2;k<=j;k++) + print(", %d",*p++); + print("\n"); + } + } +} +# endif + +void +add(int **array, int n) +{ + int i, *temp; + uchar *ctemp; + + temp = nxtpos; + ctemp = tmpstat; + array[n] = nxtpos; /* note no packing is done in positions */ + *temp++ = count; + for(i=0;i<tptr;i++) + if(ctemp[i] == TRUE) + *temp++ = i; + nxtpos = temp; + if(nxtpos >= positions+maxpos) + error("Too many positions %s",(maxpos== MAXPOS?"\nTry using %p num":"")); +} + +void +follow(int v) +{ + int p; + if(v >= tptr-1)return; + p = parent[v]; + if(p == 0) return; + switch(name[p]){ + /* will not be CHAR RNULLS FINAL S1FINAL S2FINAL RCCL RNCCL */ + case RSTR: + if(tmpstat[p] == FALSE){ + count++; + tmpstat[p] = TRUE; + } + break; + case STAR: case PLUS: + first(v); + follow(p); + break; + case BAR: case QUEST: case RNEWE: + follow(p); + break; + case RCAT: case DIV: + if(v == left[p]){ + if(nullstr[right[p]]) + follow(p); + first(right[p]); + } + else follow(p); + break; + case RSCON: case CARAT: + follow(p); + break; +# ifdef DEBUG + default: + warning("bad switch follow %d",p); +# endif + } +} + +void +first(int v) /* calculate set of positions with v as root which can be active initially */ +{ + int i; + uchar *p; + i = name[v]; + if(i < NCH)i = 1; + switch(i){ + case 1: case RCCL: case RNCCL: case RNULLS: case FINAL: case S1FINAL: case S2FINAL: + if(tmpstat[v] == FALSE){ + count++; + tmpstat[v] = TRUE; + } + break; + case BAR: case RNEWE: + first(left[v]); + first(right[v]); + break; + case CARAT: + if(stnum % 2 == 1) + first(left[v]); + break; + case RSCON: + i = stnum/2 +1; + p = (uchar *)right[v]; + while(*p) + if(*p++ == i){ + first(left[v]); + break; + } + break; + case STAR: case QUEST: case PLUS: case RSTR: + first(left[v]); + break; + case RCAT: case DIV: + first(left[v]); + if(nullstr[left[v]]) + first(right[v]); + break; +# ifdef DEBUG + default: + warning("bad switch first %d",v); +# endif + } +} + +void +cgoto(void) +{ + int i, j, s; + int npos, curpos, n; + int tryit; + uchar tch[NCH]; + int tst[NCH]; + uchar *q; + + /* generate initial state, for each start condition */ + Bprint(&fout,"int yyvstop[] = {\n0,\n"); + while(stnum < 2 || stnum/2 < sptr){ + for(i = 0; i<tptr; i++) tmpstat[i] = 0; + count = 0; + if(tptr > 0)first(tptr-1); + add(state,stnum); +# ifdef DEBUG + if(debug){ + if(stnum > 1) + print("%s:\n",sname[stnum/2]); + pstate(stnum); + } +# endif + stnum++; + } + stnum--; + /* even stnum = might not be at line begin */ + /* odd stnum = must be at line begin */ + /* even states can occur anywhere, odd states only at line begin */ + for(s = 0; s <= stnum; s++){ + tryit = FALSE; + cpackflg[s] = FALSE; + sfall[s] = -1; + acompute(s); + for(i=0;i<NCH;i++) symbol[i] = 0; + npos = *state[s]; + for(i = 1; i<=npos; i++){ + curpos = *(state[s]+i); + if(name[curpos] < NCH) symbol[name[curpos]] = TRUE; + else switch(name[curpos]){ + case RCCL: + tryit = TRUE; + q = ptr[curpos]; + while(*q){ + for(j=1;j<NCH;j++) + if(cindex[j] == *q) + symbol[j] = TRUE; + q++; + } + break; + case RSTR: + symbol[right[curpos]] = TRUE; + break; +# ifdef DEBUG + case RNULLS: + case FINAL: + case S1FINAL: + case S2FINAL: + break; + default: + warning("bad switch cgoto %d state %d",curpos,s); + break; +# endif + } + } +# ifdef DEBUG + if(debug){ + print("State %d transitions on:\n\t",s); + charc = 0; + for(i = 1; i<NCH; i++){ + if(symbol[i]) allprint(i); + if(charc > LINESIZE){ + charc = 0; + print("\n\t"); + } + } + print("\n"); + } +# endif + /* for each char, calculate next state */ + n = 0; + for(i = 1; i<NCH; i++){ + if(symbol[i]){ + nextstate(s,i); /* executed for each state, transition pair */ + xstate = notin(stnum); + if(xstate == -2) warning("bad state %d %o",s,i); + else if(xstate == -1){ + if(stnum >= nstates) + error("Too many states %s",(nstates == NSTATES ? "\nTry using %n num":"")); + add(state,++stnum); +# ifdef DEBUG + if(debug)pstate(stnum); +# endif + tch[n] = i; + tst[n++] = stnum; + } else { /* xstate >= 0 ==> state exists */ + tch[n] = i; + tst[n++] = xstate; + } + } + } + tch[n] = 0; + tst[n] = -1; + /* pack transitions into permanent array */ + if(n > 0) packtrans(s,tch,tst,n,tryit); + else gotof[s] = -1; + } + Bprint(&fout,"0};\n"); +} + +/* Beware -- 70% of total CPU time is spent in this subroutine - + if you don't believe me - try it yourself ! */ +void +nextstate(int s, int c) +{ + int j, *newpos; + uchar *temp, *tz; + int *pos, i, *f, num, curpos, number; + /* state to goto from state s on char c */ + num = *state[s]; + temp = tmpstat; + pos = state[s] + 1; + for(i = 0; i<num; i++){ + curpos = *pos++; + j = name[curpos]; + if(j < NCH && j == c + || j == RSTR && c == right[curpos] + || j == RCCL && member(c, ptr[curpos])){ + f = foll[curpos]; + number = *f; + newpos = f+1; + for(j=0;j<number;j++) + temp[*newpos++] = 2; + } + } + j = 0; + tz = temp + tptr; + while(temp < tz){ + if(*temp == 2){ + j++; + *temp++ = 1; + } + else *temp++ = 0; + } + count = j; +} + +int +notin(int n) +{ /* see if tmpstat occurs previously */ + int *j,k; + uchar *temp; + int i; + + if(count == 0) + return(-2); + temp = tmpstat; + for(i=n;i>=0;i--){ /* for each state */ + j = state[i]; + if(count == *j++){ + for(k=0;k<count;k++) + if(!temp[*j++])break; + if(k >= count) + return(i); + } + } + return(-1); +} + +void +packtrans(int st, uchar *tch, int *tst, int cnt, int tryit) +{ + /* pack transitions into nchar, nexts */ + /* nchar is terminated by '\0', nexts uses cnt, followed by elements */ + /* gotof[st] = index into nchr, nexts for state st */ + + /* sfall[st] = t implies t is fall back state for st */ + /* == -1 implies no fall back */ + + int i,j,k; + int cmin, cval, tcnt, diff, p, *ast; + uchar *ach; + int go[NCH], temp[NCH], c; + int swork[NCH]; + uchar cwork[NCH]; + int upper; + + rcount += cnt; + cmin = -1; + cval = NCH; + ast = tst; + ach = tch; + /* try to pack transitions using ccl's */ + if(tryit){ /* ccl's used */ + for(i=1;i<NCH;i++){ + go[i] = temp[i] = -1; + symbol[i] = 1; + } + for(i=0;i<cnt;i++){ + go[tch[i]] = tst[i]; + symbol[tch[i]] = 0; + } + for(i=0; i<cnt;i++){ + c = match[tch[i]]; + if(go[c] != tst[i] || c == tch[i]) + temp[tch[i]] = tst[i]; + } + /* fill in error entries */ + for(i=1;i<NCH;i++) + if(symbol[i]) temp[i] = -2; /* error trans */ + /* count them */ + k = 0; + for(i=1;i<NCH;i++) + if(temp[i] != -1)k++; + if(k <cnt){ /* compress by char */ +# ifdef DEBUG + if(debug) print("use compression %d, %d vs %d\n",st,k,cnt); +# endif + k = 0; + for(i=1;i<NCH;i++) + if(temp[i] != -1){ + cwork[k] = i; + swork[k++] = (temp[i] == -2 ? -1 : temp[i]); + } + cwork[k] = 0; +# ifdef PC + ach = cwork; + ast = swork; + cnt = k; + cpackflg[st] = TRUE; +# endif + } + } + for(i=0; i<st; i++){ /* get most similar state */ + /* reject state with more transitions, state already represented by a third state, + and state which is compressed by char if ours is not to be */ + if(sfall[i] != -1) continue; + if(cpackflg[st] == 1) if(!(cpackflg[i] == 1)) continue; + p = gotof[i]; + if(p == -1) /* no transitions */ continue; + tcnt = nexts[p]; + if(tcnt > cnt) continue; + diff = 0; + j = 0; + upper = p + tcnt; + while(ach[j] && p < upper){ + while(ach[j] < nchar[p] && ach[j]){diff++; j++; } + if(ach[j] == 0)break; + if(ach[j] > nchar[p]){diff=NCH;break;} + /* ach[j] == nchar[p] */ + if(ast[j] != nexts[++p] || ast[j] == -1 || (cpackflg[st] && ach[j] != match[ach[j]]))diff++; + j++; + } + while(ach[j]){ + diff++; + j++; + } + if(p < upper)diff = NCH; + if(diff < cval && diff < tcnt){ + cval = diff; + cmin = i; + if(cval == 0)break; + } + } + /* cmin = state "most like" state st */ +# ifdef DEBUG + if(debug)print("select st %d for st %d diff %d\n",cmin,st,cval); +# endif +# ifdef PS + if(cmin != -1){ /* if we can use st cmin */ + gotof[st] = nptr; + k = 0; + sfall[st] = cmin; + p = gotof[cmin]+1; + j = 0; + while(ach[j]){ + /* if cmin has a transition on c, then so will st */ + /* st may be "larger" than cmin, however */ + while(ach[j] < nchar[p-1] && ach[j]){ + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + j++; + } + if(nchar[p-1] == 0)break; + if(ach[j] > nchar[p-1]){ + warning("bad transition %d %d",st,cmin); + goto nopack; + } + /* ach[j] == nchar[p-1] */ + if(ast[j] != nexts[p] || ast[j] == -1 || (cpackflg[st] && ach[j] != match[ach[j]])){ + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + } + p++; + j++; + } + while(ach[j]){ + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j++]; + k++; + } + nexts[gotof[st]] = cnt = k; + nchar[nptr++] = 0; + } else { +# endif +nopack: + /* stick it in */ + gotof[st] = nptr; + nexts[nptr] = cnt; + for(i=0;i<cnt;i++){ + nchar[nptr] = ach[i]; + nexts[++nptr] = ast[i]; + } + nchar[nptr++] = 0; +# ifdef PS + } +# endif + if(cnt < 1){ + gotof[st] = -1; + nptr--; + } else + if(nptr > ntrans) + error("Too many transitions %s",(ntrans==NTRANS?"\nTry using %a num":"")); +} + +# ifdef DEBUG +void +pstate(int s) +{ + int *p,i,j; + print("State %d:\n",s); + p = state[s]; + i = *p++; + if(i == 0) return; + print("%4d",*p++); + for(j = 1; j<i; j++){ + print(", %4d",*p++); + if(j%30 == 0)print("\n"); + } + print("\n"); +} +# endif + +int +member(int d, uchar *t) +{ + int c; + uchar *s; + + c = d; + s = t; + c = cindex[c]; + while(*s) + if(*s++ == c) return(1); + return(0); +} + +# ifdef DEBUG +void +stprt(int i) +{ + int p, t; + + print("State %d:",i); + /* print actions, if any */ + t = atable[i]; + if(t != -1)print(" final"); + print("\n"); + if(cpackflg[i] == TRUE)print("backup char in use\n"); + if(sfall[i] != -1)print("fall back state %d\n",sfall[i]); + p = gotof[i]; + if(p == -1) return; + print("(%d transitions)\n",nexts[p]); + while(nchar[p]){ + charc = 0; + if(nexts[p+1] >= 0) + print("%d\t",nexts[p+1]); + else print("err\t"); + allprint(nchar[p++]); + while(nexts[p] == nexts[p+1] && nchar[p]){ + if(charc > LINESIZE){ + charc = 0; + print("\n\t"); + } + allprint(nchar[p++]); + } + print("\n"); + } + print("\n"); +} +# endif + +void +acompute(int s) /* compute action list = set of poss. actions */ +{ + int *p, i, j; + int cnt, m; + int temp[300], k, neg[300], n; + + k = 0; + n = 0; + p = state[s]; + cnt = *p++; + if(cnt > 300) + error("Too many positions for one state - acompute"); + for(i=0;i<cnt;i++){ + if(name[*p] == FINAL)temp[k++] = left[*p]; + else if(name[*p] == S1FINAL){temp[k++] = left[*p]; + if (left[*p] >= NACTIONS) error("Too many right contexts"); + extra[left[*p]] = 1; + } else if(name[*p] == S2FINAL)neg[n++] = left[*p]; + p++; + } + atable[s] = -1; + if(k < 1 && n < 1) return; +# ifdef DEBUG + if(debug) print("final %d actions:",s); +# endif + /* sort action list */ + for(i=0; i<k; i++) + for(j=i+1;j<k;j++) + if(temp[j] < temp[i]){ + m = temp[j]; + temp[j] = temp[i]; + temp[i] = m; + } + /* remove dups */ + for(i=0;i<k-1;i++) + if(temp[i] == temp[i+1]) temp[i] = 0; + /* copy to permanent quarters */ + atable[s] = aptr; +# ifdef DEBUG + Bprint(&fout,"/* actions for state %d */",s); +# endif + Bputc(&fout, '\n'); + for(i=0;i<k;i++) + if(temp[i] != 0){ + Bprint(&fout,"%d,\n",temp[i]); +# ifdef DEBUG + if(debug) + print("%d ",temp[i]); +# endif + aptr++; + } + for(i=0;i<n;i++){ /* copy fall back actions - all neg */ + Bprint(&fout,"%d,\n",neg[i]); + aptr++; +# ifdef DEBUG + if(debug)print("%d ",neg[i]); +# endif + } +# ifdef DEBUG + if(debug)print("\n"); +# endif + Bprint(&fout,"0,\n"); + aptr++; +} + +# ifdef DEBUG +void +pccl(void) { + /* print character class sets */ + int i, j; + + print("char class intersection\n"); + for(i=0; i< ccount; i++){ + charc = 0; + print("class %d:\n\t",i); + for(j=1;j<NCH;j++) + if(cindex[j] == i){ + allprint(j); + if(charc > LINESIZE){ + print("\n\t"); + charc = 0; + } + } + print("\n"); + } + charc = 0; + print("match:\n"); + for(i=0;i<NCH;i++){ + allprint(match[i]); + if(charc > LINESIZE){ + print("\n"); + charc = 0; + } + } + print("\n"); +} +# endif + +void +mkmatch(void) +{ + int i; + uchar tab[NCH]; + + for(i=0; i<ccount; i++) + tab[i] = 0; + for(i=1;i<NCH;i++) + if(tab[cindex[i]] == 0) + tab[cindex[i]] = i; + /* tab[i] = principal char for new ccl i */ + for(i = 1; i<NCH; i++) + match[i] = tab[cindex[i]]; +} + +void +layout(void) +{ + /* format and output final program's tables */ + int i, j, k; + int top, bot, startup, omin; + + for(i=0; i<outsize;i++) + verify[i] = advance[i] = 0; + omin = 0; + yytop = 0; + for(i=0; i<= stnum; i++){ /* for each state */ + j = gotof[i]; + if(j == -1){ + stoff[i] = 0; + continue; + } + bot = j; + while(nchar[j])j++; + top = j - 1; +# ifdef DEBUG + if (debug) { + print("State %d: (layout)\n", i); + for(j=bot; j<=top;j++) { + print(" %o", nchar[j]); + if (j%10==0) print("\n"); + } + print("\n"); + } +# endif + while(verify[omin+NCH]) omin++; + startup = omin; +# ifdef DEBUG + if (debug) print("bot,top %d, %d startup begins %d\n",bot,top,startup); +# endif + do { + startup += 1; + if(startup > outsize - NCH) + error("output table overflow"); + for(j = bot; j<= top; j++){ + k = startup + nchar[j]; + if(verify[k])break; + } + } while (j <= top); + /* have found place */ +# ifdef DEBUG + if (debug) print(" startup going to be %d\n", startup); +# endif + for(j = bot; j<= top; j++){ + k = startup + nchar[j]; + verify[k] = i+1; /* state number + 1*/ + advance[k] = nexts[j+1]+1; /* state number + 1*/ + if(yytop < k) yytop = k; + } + stoff[i] = startup; + } + + /* stoff[i] = offset into verify, advance for trans for state i */ + /* put out yywork */ + Bprint(&fout,"# define YYTYPE %s\n",stnum+1 >= NCH ? "int" : "Uchar"); + Bprint(&fout,"struct yywork { YYTYPE verify, advance; } yycrank[] = {\n"); + for(i=0;i<=yytop;i+=4){ + for(j=0;j<4;j++){ + k = i+j; + if(verify[k]) + Bprint(&fout,"%d,%d,\t",verify[k],advance[k]); + else + Bprint(&fout,"0,0,\t"); + } + Bputc(&fout, '\n'); + } + Bprint(&fout,"0,0};\n"); + + /* put out yysvec */ + + Bprint(&fout,"struct yysvf yysvec[] = {\n"); + Bprint(&fout,"0,\t0,\t0,\n"); + for(i=0;i<=stnum;i++){ /* for each state */ + if(cpackflg[i])stoff[i] = -stoff[i]; + Bprint(&fout,"yycrank+%d,\t",stoff[i]); + if(sfall[i] != -1) + Bprint(&fout,"yysvec+%d,\t", sfall[i]+1); /* state + 1 */ + else Bprint(&fout,"0,\t\t"); + if(atable[i] != -1) + Bprint(&fout,"yyvstop+%d,",atable[i]); + else Bprint(&fout,"0,\t"); +# ifdef DEBUG + Bprint(&fout,"\t\t/* state %d */",i); +# endif + Bputc(&fout, '\n'); + } + Bprint(&fout,"0,\t0,\t0};\n"); + + /* put out yymatch */ + + Bprint(&fout,"struct yywork *yytop = yycrank+%d;\n",yytop); + Bprint(&fout,"struct yysvf *yybgin = yysvec+1;\n"); + Bprint(&fout,"Uchar yymatch[] = {\n"); + for(i=0; i<NCH; i+=8){ + for(j=0; j<8; j++){ + int fbch; + fbch = match[i+j]; + if(isprint(fbch) && fbch != '\'' && fbch != '\\') + Bprint(&fout,"'%c' ,",fbch); + else Bprint(&fout,"0%-3o,",fbch); + } + Bputc(&fout, '\n'); + } + Bprint(&fout,"0};\n"); + /* put out yyextra */ + Bprint(&fout,"Uchar yyextra[] = {\n"); + for(i=0;i<casecount;i+=8){ + for(j=0;j<8;j++) + Bprint(&fout, "%d,", i+j<NACTIONS ? + extra[i+j] : 0); + Bputc(&fout, '\n'); + } + Bprint(&fout,"0};\n"); +} + +# ifdef PP +void +padd(int **array, int n) +{ + int i, *j, k; + + array[n] = nxtpos; + if(count == 0){ + *nxtpos++ = 0; + return; + } + for(i=tptr-1;i>=0;i--){ + j = array[i]; + if(j && *j++ == count){ + for(k=0;k<count;k++) + if(!tmpstat[*j++])break; + if(k >= count){ + array[n] = array[i]; + return; + } + } + } + add(array,n); +} +# endif |