From 0a460e1722c50e31653359f8a86fe0b606d2b513 Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 26 Apr 2016 22:23:44 -0500 Subject: New libregexp and APE ported to native --- sys/src/libregexp/regexec.c | 390 ++++++++++++++++++++------------------------ 1 file changed, 174 insertions(+), 216 deletions(-) (limited to 'sys/src/libregexp/regexec.c') diff --git a/sys/src/libregexp/regexec.c b/sys/src/libregexp/regexec.c index b806ba60a..a153d77e9 100644 --- a/sys/src/libregexp/regexec.c +++ b/sys/src/libregexp/regexec.c @@ -1,232 +1,190 @@ #include #include -#include "regexp.h" -#include "regcomp.h" +#include +#include "regimpl.h" +typedef struct RethreadQ RethreadQ; +struct RethreadQ +{ + Rethread *head; + Rethread **tail; +}; -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -regexec1(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j -) +int +regexec(Reprog *prog, char *str, Resub *sem, int msize) { - int flag=0; - Reinst *inst; - Relist *tlp; - char *s; - int i, checkstart; - Rune r, *rp, *ep; - int n; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - char *p; + RethreadQ lists[2], *clist, *nlist, *tmp; + Rethread *t, *nextthr, **availthr; + Reinst *curinst; + Rune r; + char *sp, *ep, endc; + int i, match, first, gen, matchpri, pri; - match = 0; - checkstart = j->starttype; - if(mp) - for(i=0; irelist[0][0].inst = 0; - j->relist[1][0].inst = 0; + if(msize > NSUBEXPM) + msize = NSUBEXPM; - /* Execute machine once for each character, including terminal NUL */ - s = j->starts; - do{ - /* fast check for first char */ - if(checkstart) { - switch(j->starttype) { - case RUNE: - p = utfrune(s, j->startchar); - if(p == 0 || s == j->eol) - return match; - s = p; - break; - case BOL: - if(s == bol) - break; - p = utfrune(s, '\n'); - if(p == 0 || s == j->eol) - return match; - s = p+1; - break; - } - } - r = *(uchar*)s; - if(r < Runeself) - n = 1; - else - n = chartorune(&r, s); + if(prog->startinst->gen != 0) { + for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++) + curinst->gen = 0; + } - /* switch run lists */ - tl = j->relist[flag]; - tle = j->reliste[flag]; - nl = j->relist[flag^=1]; - nle = j->reliste[flag]; - nl->inst = 0; + clist = lists; + clist->head = nil; + clist->tail = &clist->head; + nlist = lists + 1; + nlist->head = nil; + nlist->tail = &nlist->head; - /* Add first instruction to current list */ - if(match == 0) - _renewemptythread(tl, progp->startinst, ms, s); + for(i = 0; i < prog->nthr; i++) + prog->thrpool[i] = prog->threads + i; + availthr = prog->thrpool + prog->nthr; - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ - for(inst = tlp->inst; ; inst = inst->next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r == r){ - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - } - break; - case LBRA: - tlp->se.m[inst->subid].sp = s; - continue; - case RBRA: - tlp->se.m[inst->subid].ep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(s == j->eol || r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].ep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } + pri = matchpri = gen = match = 0; + sp = str; + ep = nil; + endc = '\0'; + if(sem != nil && msize > 0) { + if(sem->sp != nil) + sp = sem->sp; + if(sem->ep != nil && *sem->ep != '\0') { + ep = sem->ep; + endc = *sem->ep; + *sem->ep = '\0'; + } + } + r = Runemax + 1; + for(; r != L'\0'; sp += i) { + gen++; + i = chartorune(&r, sp); + first = 1; + t = clist->head; + if(t == nil) + goto Start; + curinst = t->pc; +Again: + if(curinst->gen == gen) + goto Done; + curinst->gen = gen; + switch(curinst->op) { + case ORUNE: + if(r != curinst->r) + goto Done; + case OANY: /* fallthrough */ + Any: + nextthr = t->next; + t->pc = curinst + 1; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(nextthr == nil) + break; + t = nextthr; + curinst = t->pc; + goto Again; + case OCLASS: + Class: + if(r < curinst->r) + goto Done; + if(r > curinst->r1) { + curinst++; + goto Class; + } + nextthr = t->next; + t->pc = curinst->a; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(nextthr == nil) break; + t = nextthr; + curinst = t->pc; + goto Again; + case ONOTNL: + if(r != L'\n') { + curinst++; + goto Again; + } + goto Done; + case OBOL: + if(sp == str || sp[-1] == '\n') { + curinst++; + goto Again; } + goto Done; + case OEOL: + if(r == L'\0' && ep == nil) { + curinst++; + goto Again; + } + if(r == L'\n') + goto Any; + goto Done; + case OJMP: + curinst = curinst->a; + goto Again; + case OSPLIT: + nextthr = *--availthr; + nextthr->pc = curinst->b; + if(msize > 0) + memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize); + nextthr->pri = t->pri; + nextthr->next = t->next; + t->next = nextthr; + curinst = curinst->a; + goto Again; + case OSAVE: + if(curinst->sub < msize) + t->sem[curinst->sub].sp = sp; + curinst++; + goto Again; + case OUNSAVE: + if(curinst->sub == 0) { + /* "Highest" priority is the left-most longest. */ + if (t->pri > matchpri) + goto Done; + match = 1; + matchpri = t->pri; + if(sem != nil && msize > 0) { + memcpy(sem, t->sem, sizeof(Resub)*msize); + sem->ep = sp; + } + goto Done; + } + if(curinst->sub < msize) + t->sem[curinst->sub].ep = sp; + curinst++; + goto Again; + Done: + *availthr++ = t; + t = t->next; + if(t == nil) + break; + curinst = t->pc; + goto Again; } - if(s == j->eol) +Start: + /* Start again once if we haven't found anything. */ + if(first == 1 && match == 0) { + first = 0; + t = *--availthr; + if(msize > 0) + memset(t->sem, 0, sizeof(Resub)*msize); + /* "Lower" priority thread */ + t->pri = matchpri = pri++; + t->next = nil; + curinst = prog->startinst; + goto Again; + } + /* If we have a match and no extant threads, we are done. */ + if(match == 1 && nlist->head == nil) break; - checkstart = j->starttype && nl->inst==0; - s += n; - }while(r); - return match; -} - -static int -regexec2(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j -) -{ - int rv; - Relist *relist0, *relist1; - - /* mark space */ - relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist0 == nil) - return -1; - relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist1 == nil){ - free(relist1); - return -1; - } - j->relist[0] = relist0; - j->relist[1] = relist1; - j->reliste[0] = relist0 + BIGLISTSIZE - 2; - j->reliste[1] = relist1 + BIGLISTSIZE - 2; - - rv = regexec1(progp, bol, mp, ms, j); - free(relist0); - free(relist1); - return rv; -} - -extern int -regexec(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - Reljunk j; - Relist relist0[LISTSIZE], relist1[LISTSIZE]; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - j.starts = bol; - j.eol = 0; - if(mp && ms>0){ - if(mp->sp) - j.starts = mp->sp; - if(mp->ep) - j.eol = mp->ep; + tmp = clist; + clist = nlist; + nlist = tmp; + nlist->head = nil; + nlist->tail = &nlist->head; } - j.starttype = 0; - j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { - j.starttype = RUNE; - j.startchar = progp->startinst->r; - } - if(progp->startinst->type == BOL) - j.starttype = BOL; - - /* mark space */ - j.relist[0] = relist0; - j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 2; - j.reliste[1] = relist1 + nelem(relist1) - 2; - - rv = regexec1(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - rv = regexec2(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - return -1; + if(ep != nil) + *ep = endc; + return match; } -- cgit v1.2.3