diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/lib/regexp/regexec.c |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/lib/regexp/regexec.c')
-rwxr-xr-x | sys/src/ape/lib/regexp/regexec.c | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/sys/src/ape/lib/regexp/regexec.c b/sys/src/ape/lib/regexp/regexec.c new file mode 100755 index 000000000..dee888b09 --- /dev/null +++ b/sys/src/ape/lib/regexp/regexec.c @@ -0,0 +1,191 @@ +#include <stdlib.h> +#include <stdio.h> +#include "regexp.h" +#include "regcomp.h" + +static Resublist sempty; /* empty set of matches */ + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +regexec1(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + char *starts, + char *eol, + wchar_t startchar) +{ + int flag=0; + Reinst *inst; + Relist *tlp; + char *s; + int i, checkstart; + wchar_t r, *rp, *ep; + int n; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + + match = 0; + checkstart = startchar; + sempty.m[0].s.sp = 0; + if(mp!=0) + for(i=0; i<ms; i++) + mp[i].s.sp = mp[i].e.ep = 0; + _relist[0][0].inst = _relist[1][0].inst = 0; + + /* Execute machine once for each character, including terminal NUL */ + s = starts; + do{ + /* fast check for first char */ + r = *(unsigned char*)s; + if(checkstart && r != startchar){ + s++; + continue; + } + + if(r < Runeself) + n = 1; + else { + n = mbtowc(&r, s, MB_CUR_MAX); + if (n <= 0) + n = 1; + } + + /* switch run lists */ + tl = _relist[flag]; + tle = _reliste[flag]; + nl = _relist[flag^=1]; + nle = _reliste[flag]; + nl->inst = 0; + + /* Add first instruction to current list */ + if(match == 0){ + sempty.m[0].s.sp = s; + _renewthread(tl, progp->startinst, &sempty); + } + + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ + if(s == eol) + break; + + for(inst = tlp->inst; ; inst = inst->l.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->r.r == r) + if(_renewthread(nl, inst->l.next, &tlp->se)==nle) + return -1; + break; + case LBRA: + tlp->se.m[inst->r.subid].s.sp = s; + continue; + case RBRA: + tlp->se.m[inst->r.subid].e.ep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->l.next, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->l.next, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->r.cp->end; + for(rp = inst->r.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->l.next, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->r.cp->end; + for(rp = inst->r.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->l.next, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->r.right, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.ep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + checkstart = startchar && nl->inst==0; + s += n; + }while(r); + return match; +} + +extern int +regexec(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ +{ + char *starts; /* where to start match */ + char *eol; /* where to end match */ + wchar_t startchar; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + starts = bol; + eol = 0; + if(mp && ms>0){ + if(mp->s.sp) + starts = mp->s.sp; + if(mp->e.ep) + eol = mp->e.ep; + } + startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) + ? progp->startinst->r.r : 0; + + /* keep trying till we have enough list space to terminate */ + for(;;){ + if(_relist[0] == 0){ + _relist[0] = malloc(2*_relistsize*sizeof(Relist)); + _relist[1] = _relist[0] + _relistsize; + _reliste[0] = _relist[0] + _relistsize - 1; + _reliste[1] = _relist[1] + _relistsize - 1; + if(_relist[0] == 0) + regerror("_relist overflow"); + } + rv = regexec1(progp, bol, mp, ms, starts, eol, startchar); + if(rv >= 0) + return rv; + free(_relist[0]); + _relist[0] = 0; + _relistsize += LISTINCREMENT; + } +} |