From 9ae083d81642be3a3ee7ff1e8d28fb9381bb1abf Mon Sep 17 00:00:00 2001 From: spew Date: Thu, 2 Feb 2017 21:21:34 -0600 Subject: libregexp: simplify regular expression vm implementation Make the logic around who has priority over the final match simpler by merging the priority generation and match fields in a smarter way. Move the creation of new thread matches up to the top to avoid jumping all over the place. --- sys/src/libregexp/rregexec.c | 143 ++++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 76 deletions(-) (limited to 'sys/src/libregexp/rregexec.c') diff --git a/sys/src/libregexp/rregexec.c b/sys/src/libregexp/rregexec.c index b1320de4b..663bf4569 100644 --- a/sys/src/libregexp/rregexec.c +++ b/sys/src/libregexp/rregexec.c @@ -4,29 +4,30 @@ #include "regimpl.h" typedef struct RethreadQ RethreadQ; -struct RethreadQ -{ +struct RethreadQ { Rethread *head; Rethread **tail; }; int -rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) +rregexec(Reprog *p, Rune *str, Resub *sem, int msize) { RethreadQ lists[2], *clist, *nlist, *tmp; - Rethread *t, *next, *pooltop, *avail; - Reinst *curinst; - Rune *rsp, *rep, endr, last; - int match, first, gen, pri, matchpri; + Rethread *t, *next, *pool, *avail; + Reinst *ci; + Rune *rsp, *rep, endr, r; + int matchgen, gen; if(msize > NSUBEXPM) msize = NSUBEXPM; - if(prog->startinst->gen != 0) { - for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++) - curinst->gen = 0; + if(p->startinst->gen != 0) { + for(ci = p->startinst; ci < p->startinst + p->len; ci++) + ci->gen = 0; } + memset(p->threads, 0, sizeof(Rethread)*p->nthr); + clist = lists; clist->head = nil; clist->tail = &clist->head; @@ -34,10 +35,10 @@ rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) nlist->head = nil; nlist->tail = &nlist->head; - pooltop = prog->threads + prog->nthr; + pool = p->threads; avail = nil; - pri = matchpri = gen = match = 0; + gen = matchgen = 0; rsp = str; rep = nil; endr = L'\0'; @@ -50,109 +51,120 @@ rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) *sem->rep = '\0'; } } - last = 1; - for(; last != L'\0'; rsp++) { + for(r = 1; r != L'\0'; rsp++) { + r = *rsp; gen++; - last = *rsp; - first = 1; + if(matchgen == 0) { + if(avail == nil) { + assert(pool < p->threads + p->nthr); + t = pool++; + } else { + t = avail; + avail = avail->next; + } + t->i = p->startinst; + if(msize > 0) + memset(t->sem, 0, sizeof(Resub)*msize); + t->next = nil; + t->gen = gen; + *clist->tail = t; + clist->tail = &t->next; + } t = clist->head; if(t == nil) - goto Start; - curinst = t->pc; + break; + ci = t->i; Again: - if(curinst->gen == gen) + if(ci->gen == gen || matchgen && t->gen > matchgen) goto Done; - curinst->gen = gen; - switch(curinst->op) { + ci->gen = gen; + switch(ci->op) { case ORUNE: - if(*rsp != curinst->r) + if(r != ci->r) goto Done; case OANY: /* fallthrough */ next = t->next; - t->pc = curinst + 1; + t->i = ci + 1; t->next = nil; *nlist->tail = t; nlist->tail = &t->next; if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; case OCLASS: Class: - if(*rsp < curinst->r) + if(r < ci->r) goto Done; - if(*rsp > curinst->r1) { - curinst++; + if(r > ci->r1) { + ci++; goto Class; } next = t->next; - t->pc = curinst->a; + t->i = ci->a; t->next = nil; *nlist->tail = t; nlist->tail = &t->next; if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; case ONOTNL: - if(*rsp != L'\n') { - curinst++; + if(r != L'\n') { + ci++; goto Again; } goto Done; case OBOL: if(rsp == str || rsp[-1] == L'\n') { - curinst++; + ci++; goto Again; } goto Done; case OEOL: - if(*rsp == '\n' || *rsp == L'\0' && rep == nil) { - curinst++; + if(r == L'\n' || r == L'\0' && rep == nil) { + ci++; goto Again; } goto Done; case OJMP: - curinst = curinst->a; + ci = ci->a; goto Again; case OSPLIT: - if(avail == nil) - next = --pooltop; - else { + if(avail == nil) { + assert(pool < p->threads + p->nthr); + next = pool++; + } else { next = avail; avail = avail->next; } - next->pc = curinst->b; + next->i = ci->b; if(msize > 0) memcpy(next->sem, t->sem, sizeof(Resub)*msize); - next->pri = t->pri; next->next = t->next; + next->gen = t->gen; t->next = next; - curinst = curinst->a; + ci = ci->a; goto Again; case OSAVE: - if(curinst->sub < msize) - t->sem[curinst->sub].rsp = rsp; - curinst++; + if(ci->sub < msize) + t->sem[ci->sub].rsp = rsp; + ci++; goto Again; case OUNSAVE: - if(curinst->sub == 0) { - /* "Highest" priority is the left-most longest. */ - if (t->pri > matchpri) - goto Done; - match = 1; - matchpri = t->pri; + if(ci->sub == 0) { + matchgen = t->gen; if(sem != nil && msize > 0) { memcpy(sem, t->sem, sizeof(Resub)*msize); sem->rep = rsp; } goto Done; } - if(curinst->sub < msize) - t->sem[curinst->sub].rep = rsp; - curinst++; + if(ci->sub < msize) + t->sem[ci->sub].rep = rsp; + ci++; goto Again; Done: next = t->next; @@ -161,30 +173,9 @@ Again: if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; } -Start: - /* Start again once if we haven't found anything. */ - if(first == 1 && match == 0) { - first = 0; - if(avail == nil) - t = --pooltop; - else { - t = avail; - avail = avail->next; - } - if(msize > 0) - memset(t->sem, 0, sizeof(Resub)*msize); - /* "Lower" priority thread */ - t->pri = matchpri = pri++; - t->next = nil; - curinst = prog->startinst; - goto Again; - } - /* If we have a match and no extant threads, we are done. */ - if(match == 1 && nlist->head == nil) - break; tmp = clist; clist = nlist; nlist = tmp; @@ -193,5 +184,5 @@ Start: } if(rep != nil) *rep = endr; - return match; + return matchgen > 0 ? 1 : 0; } -- cgit v1.2.3