diff options
author | ben <ben@rana> | 2016-04-26 22:23:44 -0500 |
---|---|---|
committer | ben <ben@rana> | 2016-04-26 22:23:44 -0500 |
commit | 0a460e1722c50e31653359f8a86fe0b606d2b513 (patch) | |
tree | b5a00bbfc883aa98709db012e0a7bacc67e234af /sys/src/libregexp/rregexec.c | |
parent | 651d6c2bc68e7e5224c3ba41b094e37b1c1890ed (diff) |
New libregexp and APE ported to native
Diffstat (limited to 'sys/src/libregexp/rregexec.c')
-rw-r--r-- | sys/src/libregexp/rregexec.c | 369 |
1 files changed, 173 insertions, 196 deletions
diff --git a/sys/src/libregexp/rregexec.c b/sys/src/libregexp/rregexec.c index 301a3589b..572718733 100644 --- a/sys/src/libregexp/rregexec.c +++ b/sys/src/libregexp/rregexec.c @@ -1,212 +1,189 @@ #include <u.h> #include <libc.h> -#include "regexp.h" -#include "regcomp.h" +#include <regexp.h> +#include "regimpl.h" -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -rregexec1(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j) +typedef struct RethreadQ RethreadQ; +struct RethreadQ { - int flag=0; - Reinst *inst; - Relist *tlp; - Rune *s; - int i, checkstart; - Rune r, *rp, *ep; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - Rune *p; + Rethread *head; + Rethread **tail; +}; - match = 0; - checkstart = j->startchar; - if(mp) - for(i=0; i<ms; i++) { - mp[i].rsp = 0; - mp[i].rep = 0; - } - j->relist[0][0].inst = 0; - j->relist[1][0].inst = 0; +int +rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) +{ + RethreadQ lists[2], *clist, *nlist, *tmp; + Rethread *t, *nextthr, **availthr; + Reinst *curinst; + Rune *rsp, *rep, endr, last; + int i, match, first, gen, pri, matchpri; - /* Execute machine once for each character, including terminal NUL */ - s = j->rstarts; - do{ - /* fast check for first char */ - if(checkstart) { - switch(j->starttype) { - case RUNE: - p = runestrchr(s, j->startchar); - if(p == 0 || s == j->reol) - return match; - s = p; - break; - case BOL: - if(s == bol) - break; - p = runestrchr(s, '\n'); - if(p == 0 || s == j->reol) - return match; - s = p+1; - break; - } - } + if(msize > NSUBEXPM) + msize = NSUBEXPM; - r = *s; + if(prog->startinst->gen != 0) { + for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++) + curinst->gen = 0; + } - /* switch run lists */ - tl = j->relist[flag]; - tle = j->reliste[flag]; - nl = j->relist[flag^=1]; - nle = j->reliste[flag]; - nl->inst = 0; + clist = lists; + clist->head = nil; + clist->tail = &clist->head; + nlist = lists + 1; + nlist->head = nil; + nlist->tail = &nlist->head; - /* Add first instruction to current list */ - _rrenewemptythread(tl, progp->startinst, ms, s); + for(i = 0; i < prog->nthr; i++) + prog->thrpool[i] = prog->threads + i; + availthr = prog->thrpool + prog->nthr; - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ - for(inst=tlp->inst; ; inst = inst->next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r == r) - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case LBRA: - tlp->se.m[inst->subid].rsp = s; - continue; - case RBRA: - tlp->se.m[inst->subid].rep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(s == j->reol || r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].rep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } + pri = matchpri = gen = match = 0; + rsp = str; + rep = nil; + endr = L'\0'; + if(sem != nil && msize > 0) { + if(sem->rsp != nil) + rsp = sem->rsp; + if(sem->rep != nil && *sem->rep != L'\0') { + rep = sem->rep; + endr = *sem->rep; + *sem->rep = '\0'; + } + } + last = 1; + for(; last != L'\0'; rsp++) { + gen++; + last = *rsp; + first = 1; + t = clist->head; + if(t == nil) + goto Start; + curinst = t->pc; +Again: + if(curinst->gen == gen) + goto Done; + curinst->gen = gen; + switch(curinst->op) { + case ORUNE: + if(*rsp != curinst->r) + goto Done; + case OANY: /* fallthrough */ + Any: + nextthr = t->next; + t->pc = curinst + 1; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(nextthr == nil) + break; + t = nextthr; + curinst = t->pc; + goto Again; + case OCLASS: + Class: + if(*rsp < curinst->r) + goto Done; + if(*rsp > curinst->r1) { + curinst++; + goto Class; + } + nextthr = t->next; + t->pc = curinst->a; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(nextthr == nil) break; + t = nextthr; + curinst = t->pc; + goto Again; + case ONOTNL: + if(*rsp != L'\n') { + curinst++; + goto Again; + } + goto Done; + case OBOL: + if(rsp == str || rsp[-1] == L'\n') { + curinst++; + goto Again; + } + goto Done; + case OEOL: + if(*rsp == L'\0' && rep == nil) { + curinst++; + goto Again; + } + if(*rsp == '\n') + goto Any; + goto Done; + case OJMP: + curinst = curinst->a; + goto Again; + case OSPLIT: + nextthr = *--availthr; + nextthr->pc = curinst->b; + if(msize > 0) + memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize); + nextthr->pri = t->pri; + nextthr->next = t->next; + t->next = nextthr; + curinst = curinst->a; + goto Again; + case OSAVE: + if(curinst->sub < msize) + t->sem[curinst->sub].rsp = rsp; + curinst++; + goto Again; + case OUNSAVE: + if(curinst->sub == 0) { + /* "Highest" priority is the left-most longest. */ + if (t->pri > matchpri) + goto Done; + match = 1; + matchpri = t->pri; + if(sem != nil && msize > 0) { + memcpy(sem, t->sem, sizeof(Resub)*msize); + sem->rep = rsp; + } + goto Done; } + if(curinst->sub < msize) + t->sem[curinst->sub].rep = rsp; + curinst++; + goto Again; + Done: + *availthr++ = t; + t = t->next; + if(t == nil) + break; + curinst = t->pc; + goto Again; + } +Start: + /* Start again once if we haven't found anything. */ + if(first == 1 && match == 0) { + first = 0; + t = *--availthr; + if(msize > 0) + memset(t->sem, 0, sizeof(Resub)*msize); + /* "Lower" priority thread */ + t->pri = matchpri = pri++; + t->next = nil; + curinst = prog->startinst; + goto Again; } - if(s == j->reol) + /* If we have a match and no extant threads, we are done. */ + if(match == 1 && nlist->head == nil) break; - checkstart = j->startchar && nl->inst==0; - s++; - }while(r); - return match; -} - -static int -rregexec2(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j -) -{ - Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; - - /* mark space */ - j->relist[0] = relist0; - j->relist[1] = relist1; - j->reliste[0] = relist0 + nelem(relist0) - 2; - j->reliste[1] = relist1 + nelem(relist1) - 2; - - return rregexec1(progp, bol, mp, ms, j); -} - -extern int -rregexec(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - Reljunk j; - Relist relist0[LISTSIZE], relist1[LISTSIZE]; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - j.rstarts = bol; - j.reol = 0; - if(mp && ms>0){ - if(mp->sp) - j.rstarts = mp->rsp; - if(mp->ep) - j.reol = mp->rep; + tmp = clist; + clist = nlist; + nlist = tmp; + nlist->head = nil; + nlist->tail = &nlist->head; } - j.starttype = 0; - j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { - j.starttype = RUNE; - j.startchar = progp->startinst->r; - } - if(progp->startinst->type == BOL) - j.starttype = BOL; - - /* mark space */ - j.relist[0] = relist0; - j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 2; - j.reliste[1] = relist1 + nelem(relist1) - 2; - - rv = rregexec1(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - rv = rregexec2(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - return -1; + if(rep != nil) + *rep = endr; + return match; } |