diff options
author | ben <ben@rana> | 2016-04-27 07:52:41 -0500 |
---|---|---|
committer | ben <ben@rana> | 2016-04-27 07:52:41 -0500 |
commit | 85824350b5f65053053245d141aaf7d668089d28 (patch) | |
tree | 075fe72eef40032c692d31194b2faac58eec5d18 /sys/src/ape | |
parent | 0f8168038af32828fcdc39575dea0e4de0c01122 (diff) |
remove ape regexp library, add utility for awk native port
Diffstat (limited to 'sys/src/ape')
-rw-r--r-- | sys/src/ape/lib/mkfile | 2 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/mkfile | 15 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/regaux.c | 56 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/regcomp.c | 560 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/regcomp.h | 61 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/regerror.c | 16 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/regexec.c | 191 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/regsub.c | 64 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/rregexec.c | 181 | ||||
-rw-r--r-- | sys/src/ape/lib/regexp/rregsub.c | 64 |
10 files changed, 1 insertions, 1209 deletions
diff --git a/sys/src/ape/lib/mkfile b/sys/src/ape/lib/mkfile index 3762294d3..6c24c3674 100644 --- a/sys/src/ape/lib/mkfile +++ b/sys/src/ape/lib/mkfile @@ -1,6 +1,6 @@ </$objtype/mkfile -DIRS=9 ap auth bio bsd bz2 draw fmt l mp net regexp sec utf v z +DIRS=9 ap auth bio bsd bz2 draw fmt l mp net sec utf v z none:V: echo mk all, install, installall, clean, or nuke diff --git a/sys/src/ape/lib/regexp/mkfile b/sys/src/ape/lib/regexp/mkfile deleted file mode 100644 index e5d8bc209..000000000 --- a/sys/src/ape/lib/regexp/mkfile +++ /dev/null @@ -1,15 +0,0 @@ -APE=/sys/src/ape -<$APE/config - -LIB=/$objtype/lib/ape/libregexp.a -OFILES=regcomp.$O\ - regerror.$O\ - regexec.$O\ - regsub.$O\ - regaux.$O\ - rregexec.$O\ - rregsub.$O\ - -</sys/src/cmd/mksyslib - -CFLAGS=-c -DUTF -D_REGEXP_EXTENSION diff --git a/sys/src/ape/lib/regexp/regaux.c b/sys/src/ape/lib/regexp/regaux.c deleted file mode 100644 index 6643b8ff8..000000000 --- a/sys/src/ape/lib/regexp/regaux.c +++ /dev/null @@ -1,56 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include "regexp.h" -#include "regcomp.h" - -/* - * Machine state - */ -Relist* _relist[2]; -Relist* _reliste[2]; -int _relistsize = LISTINCREMENT; - -/* - * save a new match in mp - */ -extern void -_renewmatch(Resub *mp, int ms, Resublist *sp) -{ - int i; - - if(mp==0 || ms<=0) - return; - if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp || - (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){ - for(i=0; i<ms && i<NSUBEXP; i++) - mp[i] = sp->m[i]; - for(; i<ms; i++) - mp[i].s.sp = mp[i].e.ep = 0; - } -} - -/* - * Note optimization in _renewthread: - * *lp must be pending when _renewthread called; if *l has been looked - * at already, the optimization is a bug. - */ -extern Relist* -_renewthread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - Resublist *sep) /* pointers to subexpressions */ -{ - Relist *p; - - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if((sep)->m[0].s.sp < p->se.m[0].s.sp) - p->se = *sep; - return 0; - } - } - p->inst = ip; - p->se = *sep; - (++p)->inst = 0; - return p; -} - diff --git a/sys/src/ape/lib/regexp/regcomp.c b/sys/src/ape/lib/regexp/regcomp.c deleted file mode 100644 index 5d31fc2f6..000000000 --- a/sys/src/ape/lib/regexp/regcomp.c +++ /dev/null @@ -1,560 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <setjmp.h> -#include <string.h> -#include "regexp.h" -#include "regcomp.h" - -#define TRUE 1 -#define FALSE 0 - -/* - * Parser Information - */ -typedef -struct Node -{ - Reinst* first; - Reinst* last; -}Node; - -#define NSTACK 20 -static Node andstack[NSTACK]; -static Node *andp; -static int atorstack[NSTACK]; -static int* atorp; -static int cursubid; /* id of current subexpression */ -static int subidstack[NSTACK]; /* parallel to atorstack */ -static int* subidp; -static int lastwasand; /* Last token was operand */ -static int nbra; -static char* exprp; /* pointer to next character in source expression */ -static int lexdone; -static int nclass; -static Reclass*classp; -static Reinst* freep; -static int errors; -static wchar_t yyrune; /* last lex'd rune */ -static Reclass*yyclassp; /* last lex'd class */ - -/* predeclared crap */ -static void operator(int); -static void pushand(Reinst*, Reinst*); -static void pushator(int); -static void evaluntil(int); -static int bldcclass(void); - -static jmp_buf regkaboom; - -static void -rcerror(char *s) -{ - errors++; - regerror(s); - longjmp(regkaboom, 1); -} - -static Reinst* -newinst(int t) -{ - freep->type = t; - freep->l.left = 0; - freep->r.right = 0; - return freep++; -} - -static void -operand(int t) -{ - Reinst *i; - - if(lastwasand) - operator(CAT); /* catenate is implicit */ - i = newinst(t); - - if(t == CCLASS || t == NCCLASS) - i->r.cp = yyclassp; - if(t == RUNE) - i->r.r = yyrune; - - pushand(i, i); - lastwasand = TRUE; -} - -static void -operator(int t) -{ - if(t==RBRA && --nbra<0) - rcerror("unmatched right paren"); - if(t==LBRA){ - if(++cursubid >= NSUBEXP) - rcerror ("too many subexpressions"); - nbra++; - if(lastwasand) - operator(CAT); - } else - evaluntil(t); - if(t != RBRA) - pushator(t); - lastwasand = FALSE; - if(t==STAR || t==QUEST || t==PLUS || t==RBRA) - lastwasand = TRUE; /* these look like operands */ -} - -static void -regerr2(char *s, int c) -{ - char buf[100]; - char *cp = buf; - while(*s) - *cp++ = *s++; - *cp++ = c; - *cp = '\0'; - rcerror(buf); -} - -static void -cant(char *s) -{ - char buf[100]; - strcpy(buf, "can't happen: "); - strcat(buf, s); - rcerror(buf); -} - -static void -pushand(Reinst *f, Reinst *l) -{ - if(andp >= &andstack[NSTACK]) - cant("operand stack overflow"); - andp->first = f; - andp->last = l; - andp++; -} - -static void -pushator(int t) -{ - if(atorp >= &atorstack[NSTACK]) - cant("operator stack overflow"); - *atorp++ = t; - *subidp++ = cursubid; -} - -static Node* -popand(int op) -{ - Reinst *inst; - - if(andp <= &andstack[0]){ - regerr2("missing operand for ", op); - inst = newinst(NOP); - pushand(inst,inst); - } - return --andp; -} - -static int -popator(void) -{ - if(atorp <= &atorstack[0]) - cant("operator stack underflow"); - --subidp; - return *--atorp; -} - -static void -evaluntil(int pri) -{ - Node *op1, *op2; - Reinst *inst1, *inst2; - - while(pri==RBRA || atorp[-1]>=pri){ - switch(popator()){ - default: - rcerror("unknown operator in evaluntil"); - break; - case LBRA: /* must have been RBRA */ - op1 = popand('('); - inst2 = newinst(RBRA); - inst2->r.subid = *subidp; - op1->last->l.next = inst2; - inst1 = newinst(LBRA); - inst1->r.subid = *subidp; - inst1->l.next = op1->first; - pushand(inst1, inst2); - return; - case OR: - op2 = popand('|'); - op1 = popand('|'); - inst2 = newinst(NOP); - op2->last->l.next = inst2; - op1->last->l.next = inst2; - inst1 = newinst(OR); - inst1->r.right = op1->first; - inst1->l.left = op2->first; - pushand(inst1, inst2); - break; - case CAT: - op2 = popand(0); - op1 = popand(0); - op1->last->l.next = op2->first; - pushand(op1->first, op2->last); - break; - case STAR: - op2 = popand('*'); - inst1 = newinst(OR); - op2->last->l.next = inst1; - inst1->r.right = op2->first; - pushand(inst1, inst1); - break; - case PLUS: - op2 = popand('+'); - inst1 = newinst(OR); - op2->last->l.next = inst1; - inst1->r.right = op2->first; - pushand(op2->first, inst1); - break; - case QUEST: - op2 = popand('?'); - inst1 = newinst(OR); - inst2 = newinst(NOP); - inst1->l.left = inst2; - inst1->r.right = op2->first; - op2->last->l.next = inst2; - pushand(inst1, inst2); - break; - } - } -} - -static Reprog* -optimize(Reprog *pp) -{ - Reinst *inst, *target; - int size; - Reprog *npp; - int diff; - - /* - * get rid of NOOP chains - */ - for(inst=pp->firstinst; inst->type!=END; inst++){ - target = inst->l.next; - while(target->type == NOP) - target = target->l.next; - inst->l.next = target; - } - - /* - * The original allocation is for an area larger than - * necessary. Reallocate to the actual space used - * and then relocate the code. - */ - size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); - npp = realloc(pp, size); - if(npp==0 || npp==pp) - return pp; - diff = (char *)npp - (char *)pp; - freep = (Reinst *)((char *)freep + diff); - for(inst=npp->firstinst; inst<freep; inst++){ - switch(inst->type){ - case OR: - case STAR: - case PLUS: - case QUEST: - case CCLASS: - case NCCLASS: - *(char **)&inst->r.right += diff; - break; - } - *(char **)&inst->l.left += diff; - } - *(char **)&npp->startinst += diff; - return npp; -} - -#ifdef DEBUG -static void -dumpstack(void){ - Node *stk; - int *ip; - - print("operators\n"); - for(ip=atorstack; ip<atorp; ip++) - print("0%o\n", *ip); - print("operands\n"); - for(stk=andstack; stk<andp; stk++) - print("0%o\t0%o\n", stk->first->type, stk->last->type); -} - -static void -dump(Reprog *pp) -{ - Reinst *l; - wchar_t *p; - - l = pp->firstinst; - do{ - print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, - l->l.left-pp->firstinst, l->l.right-pp->firstinst); - if(l->type == RUNE) - print("\t%C\n", l->r); - else if(l->type == CCLASS || l->type == NCCLASS){ - print("\t["); - if(l->type == NCCLASS) - print("^"); - for(p = l->r.cp->spans; p < l->r.cp->end; p += 2) - if(p[0] == p[1]) - print("%C", p[0]); - else - print("%C-%C", p[0], p[1]); - print("]\n"); - } else - print("\n"); - }while(l++->type); -} -#endif - -static Reclass* -newclass(void) -{ - if(nclass >= NCLASS) - regerr2("too many character classes; limit", NCLASS+'0'); - return &(classp[nclass++]); -} - -static int -nextc(wchar_t *rp) -{ - int n; - - if(lexdone){ - *rp = 0; - return 1; - } - n = mbtowc(rp, exprp, MB_CUR_MAX); - if (n <= 0) - n = 1; - exprp += n; - if(*rp == L'\\'){ - n = mbtowc(rp, exprp, MB_CUR_MAX); - if (n <= 0) - n = 1; - exprp += n; - return 1; - } - if(*rp == 0) - lexdone = 1; - return 0; -} - -static int -lex(int literal, int dot_type) -{ - int quoted; - - quoted = nextc(&yyrune); - if(literal || quoted){ - if(yyrune == 0) - return END; - return RUNE; - } - - switch(yyrune){ - case 0: - return END; - case L'*': - return STAR; - case L'?': - return QUEST; - case L'+': - return PLUS; - case L'|': - return OR; - case L'.': - return dot_type; - case L'(': - return LBRA; - case L')': - return RBRA; - case L'^': - return BOL; - case L'$': - return EOL; - case L'[': - return bldcclass(); - } - return RUNE; -} - -static int -bldcclass(void) -{ - int type; - wchar_t r[NCCRUNE]; - wchar_t *p, *ep, *np; - wchar_t rune; - int quoted; - - /* we have already seen the '[' */ - type = CCLASS; - yyclassp = newclass(); - - /* look ahead for negation */ - ep = r; - quoted = nextc(&rune); - if(!quoted && rune == L'^'){ - type = NCCLASS; - quoted = nextc(&rune); - *ep++ = L'\n'; - *ep++ = L'\n'; - } - - /* parse class into a set of spans */ - for(; ep<&r[NCCRUNE];){ - if(rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - if(!quoted && rune == L']') - break; - if(!quoted && rune == L'-'){ - if(ep == r){ - rcerror("malformed '[]'"); - return 0; - } - quoted = nextc(&rune); - if((!quoted && rune == L']') || rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - *(ep-1) = rune; - } else { - *ep++ = rune; - *ep++ = rune; - } - quoted = nextc(&rune); - } - - /* sort on span start */ - for(p = r; p < ep; p += 2){ - for(np = p; np < ep; np += 2) - if(*np < *p){ - rune = np[0]; - np[0] = p[0]; - p[0] = rune; - rune = np[1]; - np[1] = p[1]; - p[1] = rune; - } - } - - /* merge spans */ - np = yyclassp->spans; - p = r; - if(r == ep) - yyclassp->end = np; - else { - np[0] = *p++; - np[1] = *p++; - for(; p < ep; p += 2) - if(p[0] <= np[1]){ - if(p[1] > np[1]) - np[1] = p[1]; - } else { - np += 2; - np[0] = p[0]; - np[1] = p[1]; - } - yyclassp->end = np+2; - } - - return type; -} - -static Reprog* -regcomp1(char *s, int literal, int dot_type) -{ - int token; - Reprog *pp; - - /* get memory for the program */ - pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); - if(pp == 0){ - regerror("out of memory"); - return 0; - } - freep = pp->firstinst; - classp = pp->class; - errors = 0; - - if(setjmp(regkaboom)) - goto out; - - /* go compile the sucker */ - lexdone = 0; - exprp = s; - nclass = 0; - nbra = 0; - atorp = atorstack; - andp = andstack; - subidp = subidstack; - lastwasand = FALSE; - cursubid = 0; - - /* Start with a low priority operator to prime parser */ - pushator(START-1); - while((token = lex(literal, dot_type)) != END){ - if((token&0300) == OPERATOR) - operator(token); - else - operand(token); - } - - /* Close with a low priority operator */ - evaluntil(START); - - /* Force END */ - operand(END); - evaluntil(START); -#ifdef DEBUG - dumpstack(); -#endif - if(nbra) - rcerror("unmatched left paren"); - --andp; /* points to first and only operand */ - pp->startinst = andp->first; -#ifdef DEBUG - dump(pp); -#endif - pp = optimize(pp); -#ifdef DEBUG - print("start: %d\n", andp->first-pp->firstinst); - dump(pp); -#endif -out: - if(errors){ - free(pp); - pp = 0; - } - return pp; -} - -extern Reprog* -regcomp(char *s) -{ - return regcomp1(s, 0, ANY); -} - -extern Reprog* -regcomplit(char *s) -{ - return regcomp1(s, 1, ANY); -} - -extern Reprog* -regcompnl(char *s) -{ - return regcomp1(s, 0, ANYNL); -} diff --git a/sys/src/ape/lib/regexp/regcomp.h b/sys/src/ape/lib/regexp/regcomp.h deleted file mode 100644 index 082e6137e..000000000 --- a/sys/src/ape/lib/regexp/regcomp.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * substitution list - */ -typedef struct Resublist Resublist; -struct Resublist -{ - Resub m[32]; -}; - -/* max subexpressions per program */ -Resublist ReSuBlIsT; -#define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub)) - -/* max character classes per program */ -Reprog RePrOg; -#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass)) - -/* max rune ranges per character class */ -#define NCCRUNE (sizeof(Reclass)/sizeof(wchar_t)) - -/* - * Actions and Tokens (Reinst types) - * - * 02xx are operators, value == precedence - * 03xx are tokens, i.e. operands for operators - */ -#define RUNE 0177 -#define OPERATOR 0200 /* Bitmask of all operators */ -#define START 0200 /* Start, used for marker on stack */ -#define RBRA 0201 /* Right bracket, ) */ -#define LBRA 0202 /* Left bracket, ( */ -#define OR 0203 /* Alternation, | */ -#define CAT 0204 /* Concatentation, implicit operator */ -#define STAR 0205 /* Closure, * */ -#define PLUS 0206 /* a+ == aa* */ -#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ -#define ANY 0300 /* Any character except newline, . */ -#define ANYNL 0301 /* Any character including newline, . */ -#define NOP 0302 /* No operation, internal use only */ -#define BOL 0303 /* Beginning of line, ^ */ -#define EOL 0304 /* End of line, $ */ -#define CCLASS 0305 /* Character class, [] */ -#define NCCLASS 0306 /* Negated character class, [] */ -#define END 0377 /* Terminate: match found */ - -/* - * regexec execution lists - */ -#define LISTINCREMENT 8 -typedef struct Relist Relist; -struct Relist -{ - Reinst *inst; /* Reinstruction of the thread */ - Resublist se; /* matched subexpressions in this thread */ -}; -extern Relist* _relist[2]; -extern Relist* _reliste[2]; -extern int _relistsize; - -extern Relist* _renewthread(Relist*, Reinst*, Resublist*); -extern void _renewmatch(Resub*, int, Resublist*); diff --git a/sys/src/ape/lib/regexp/regerror.c b/sys/src/ape/lib/regexp/regerror.c deleted file mode 100644 index bfa158299..000000000 --- a/sys/src/ape/lib/regexp/regerror.c +++ /dev/null @@ -1,16 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include "regexp.h" - -void -regerror(char *s) -{ - char buf[132]; - - strcpy(buf, "regerror: "); - strcat(buf, s); - strcat(buf, "\n"); - fwrite(buf, 1, strlen(buf), stderr); - exit(1); -} diff --git a/sys/src/ape/lib/regexp/regexec.c b/sys/src/ape/lib/regexp/regexec.c deleted file mode 100644 index dee888b09..000000000 --- a/sys/src/ape/lib/regexp/regexec.c +++ /dev/null @@ -1,191 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include "regexp.h" -#include "regcomp.h" - -static Resublist sempty; /* empty set of matches */ - -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -regexec1(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - char *starts, - char *eol, - wchar_t startchar) -{ - int flag=0; - Reinst *inst; - Relist *tlp; - char *s; - int i, checkstart; - wchar_t r, *rp, *ep; - int n; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - - match = 0; - checkstart = startchar; - sempty.m[0].s.sp = 0; - if(mp!=0) - for(i=0; i<ms; i++) - mp[i].s.sp = mp[i].e.ep = 0; - _relist[0][0].inst = _relist[1][0].inst = 0; - - /* Execute machine once for each character, including terminal NUL */ - s = starts; - do{ - /* fast check for first char */ - r = *(unsigned char*)s; - if(checkstart && r != startchar){ - s++; - continue; - } - - if(r < Runeself) - n = 1; - else { - n = mbtowc(&r, s, MB_CUR_MAX); - if (n <= 0) - n = 1; - } - - /* switch run lists */ - tl = _relist[flag]; - tle = _reliste[flag]; - nl = _relist[flag^=1]; - nle = _reliste[flag]; - nl->inst = 0; - - /* Add first instruction to current list */ - if(match == 0){ - sempty.m[0].s.sp = s; - _renewthread(tl, progp->startinst, &sempty); - } - - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ - if(s == eol) - break; - - for(inst = tlp->inst; ; inst = inst->l.next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r.r == r) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case LBRA: - tlp->se.m[inst->r.subid].s.sp = s; - continue; - case RBRA: - tlp->se.m[inst->r.subid].e.ep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->r.right, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].e.ep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } - break; - } - } - checkstart = startchar && nl->inst==0; - s += n; - }while(r); - return match; -} - -extern int -regexec(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - char *starts; /* where to start match */ - char *eol; /* where to end match */ - wchar_t startchar; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - starts = bol; - eol = 0; - if(mp && ms>0){ - if(mp->s.sp) - starts = mp->s.sp; - if(mp->e.ep) - eol = mp->e.ep; - } - startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) - ? progp->startinst->r.r : 0; - - /* keep trying till we have enough list space to terminate */ - for(;;){ - if(_relist[0] == 0){ - _relist[0] = malloc(2*_relistsize*sizeof(Relist)); - _relist[1] = _relist[0] + _relistsize; - _reliste[0] = _relist[0] + _relistsize - 1; - _reliste[1] = _relist[1] + _relistsize - 1; - if(_relist[0] == 0) - regerror("_relist overflow"); - } - rv = regexec1(progp, bol, mp, ms, starts, eol, startchar); - if(rv >= 0) - return rv; - free(_relist[0]); - _relist[0] = 0; - _relistsize += LISTINCREMENT; - } -} diff --git a/sys/src/ape/lib/regexp/regsub.c b/sys/src/ape/lib/regexp/regsub.c deleted file mode 100644 index 0c891e9c6..000000000 --- a/sys/src/ape/lib/regexp/regsub.c +++ /dev/null @@ -1,64 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include "regexp.h" - -/* substitute into one string using the matches from the last regexec() */ -extern void -regsub(char *sp, /* source string */ - char *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ -{ - char *ssp, *ep; - int i; - - ep = dp+dlen-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp[i].s.sp != 0 && mp!=0 && ms>i) - for(ssp = mp[i].s.sp; - ssp < mp[i].e.ep; - ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; - } - }else if(*sp == '&'){ - if(mp[0].s.sp != 0 && mp!=0 && ms>0) - if(mp[0].s.sp != 0) - for(ssp = mp[0].s.sp; - ssp < mp[0].e.ep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; - } - sp++; - } - *dp = '\0'; -} diff --git a/sys/src/ape/lib/regexp/rregexec.c b/sys/src/ape/lib/regexp/rregexec.c deleted file mode 100644 index 519ac25ad..000000000 --- a/sys/src/ape/lib/regexp/rregexec.c +++ /dev/null @@ -1,181 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include "regexp.h" -#include "regcomp.h" - -static Resublist sempty; /* empty set of matches */ - -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -rregexec1(Reprog *progp, /* program to run */ - wchar_t *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - wchar_t *starts, - wchar_t *eol, - wchar_t startchar) -{ - int flag=0; - Reinst *inst; - Relist *tlp; - wchar_t *s; - int i, checkstart; - wchar_t r, *rp, *ep; - int n; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - - match = 0; - checkstart = startchar; - sempty.m[0].s.rsp = 0; - if(mp!=0) - for(i=0; i<ms; i++) - mp[i].s.rsp = mp[i].e.rep = 0; - _relist[0][0].inst = _relist[1][0].inst = 0; - - /* Execute machine once for each character, including terminal NUL */ - s = starts; - do{ - r = *s; - - /* fast check for first char */ - if(checkstart && r!=startchar){ - s++; - continue; - } - - /* switch run lists */ - tl = _relist[flag]; - tle = _reliste[flag]; - nl = _relist[flag^=1]; - nle = _reliste[flag]; - nl->inst = 0; - - /* Add first instruction to current list */ - sempty.m[0].s.rsp = s; - _renewthread(tl, progp->startinst, &sempty); - - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ - if(s == eol) - break; - - for(inst=tlp->inst; ; inst = inst->l.next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r.r == r) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case LBRA: - tlp->se.m[inst->r.subid].s.rsp = s; - continue; - case RBRA: - tlp->se.m[inst->r.subid].e.rep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->r.right, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].e.rep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } - break; - } - } - checkstart = startchar && nl->inst==0; - s++; - }while(r); - return match; -} - -extern int -rregexec(Reprog *progp, /* program to run */ - wchar_t *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - wchar_t *starts; /* where to start match */ - wchar_t *eol; /* where to end match */ - wchar_t startchar; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - starts = bol; - eol = 0; - if(mp && ms>0){ - if(mp->s.rsp) - starts = mp->s.rsp; - if(mp->e.rep) - eol = mp->e.rep; - } - startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0; - - /* keep trying till we have enough list space to terminate */ - for(;;){ - if(_relist[0] == 0){ - _relist[0] = malloc(2*_relistsize*sizeof(Relist)); - _relist[1] = _relist[0] + _relistsize; - _reliste[0] = _relist[0] + _relistsize - 1; - _reliste[1] = _relist[1] + _relistsize - 1; - if(_relist[0] == 0) - regerror("_relist overflow"); - } - rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar); - if(rv >= 0) - return rv; - free(_relist[0]); - _relist[0] = 0; - _relistsize += LISTINCREMENT; - } -} diff --git a/sys/src/ape/lib/regexp/rregsub.c b/sys/src/ape/lib/regexp/rregsub.c deleted file mode 100644 index 5fc3e5960..000000000 --- a/sys/src/ape/lib/regexp/rregsub.c +++ /dev/null @@ -1,64 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include "regexp.h" - -/* substitute into one string using the matches from the last regexec() */ -extern void -rregsub(wchar_t *sp, /* source string */ - wchar_t *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ -{ - wchar_t *ssp, *ep; - int i; - - ep = dp+(dlen/sizeof(wchar_t))-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp[i].s.rsp != 0 && mp!=0 && ms>i) - for(ssp = mp[i].s.rsp; - ssp < mp[i].e.rep; - ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; - } - }else if(*sp == '&'){ - if(mp[0].s.rsp != 0 && mp!=0 && ms>0) - if(mp[0].s.rsp != 0) - for(ssp = mp[0].s.rsp; - ssp < mp[0].e.rep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; - } - sp++; - } - *dp = '\0'; -} |