summaryrefslogtreecommitdiff
path: root/sys/src/ape/cmd/expr/regexp.h
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/cmd/expr/regexp.h
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/cmd/expr/regexp.h')
-rwxr-xr-xsys/src/ape/cmd/expr/regexp.h410
1 files changed, 410 insertions, 0 deletions
diff --git a/sys/src/ape/cmd/expr/regexp.h b/sys/src/ape/cmd/expr/regexp.h
new file mode 100755
index 000000000..7bce33f9b
--- /dev/null
+++ b/sys/src/ape/cmd/expr/regexp.h
@@ -0,0 +1,410 @@
+#define CBRA 2
+#define CCHR 4
+#define CDOT 8
+#define CCL 12
+#define CDOL 20
+#define CEOF 22
+#define CKET 24
+#define CBACK 36
+
+#define STAR 01
+#define RNGE 03
+
+#define NBRA 9
+
+#define PLACE(c) ep[c >> 3] |= bittab[c & 07]
+#define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
+
+char *braslist[NBRA];
+char *braelist[NBRA];
+int nbra, ebra;
+char *loc1, *loc2, *locs;
+int sed;
+
+int circf;
+int low;
+int size;
+
+char bittab[] = {
+ 1,
+ 2,
+ 4,
+ 8,
+ 16,
+ 32,
+ 64,
+ 128
+};
+
+char *
+compile(instring, ep, endbuf, seof)
+register char *ep;
+char *instring, *endbuf;
+{
+ INIT /* Dependent declarations and initializations */
+ register c;
+ register eof = seof;
+ char *lastep = instring;
+ int cclcnt;
+ char bracket[NBRA], *bracketp;
+ int closed;
+ char neg;
+ int lc;
+ int i, cflg;
+
+ lastep = 0;
+ if((c = GETC()) == eof) {
+ if(*ep == 0 && !sed)
+ ERROR(41);
+ RETURN(ep);
+ }
+ bracketp = bracket;
+ circf = closed = nbra = ebra = 0;
+ if (c == '^')
+ circf++;
+ else
+ UNGETC(c);
+ for (;;) {
+ if (ep >= endbuf)
+ ERROR(50);
+ if((c = GETC()) != '*' && ((c != '\\') || (PEEKC() != '{')))
+ lastep = ep;
+ if (c == eof) {
+ *ep++ = CEOF;
+ RETURN(ep);
+ }
+ switch (c) {
+
+ case '.':
+ *ep++ = CDOT;
+ continue;
+
+ case '\n':
+ ERROR(36);
+ case '*':
+ if (lastep==0 || *lastep==CBRA || *lastep==CKET)
+ goto defchar;
+ *lastep |= STAR;
+ continue;
+
+ case '$':
+ if(PEEKC() != eof)
+ goto defchar;
+ *ep++ = CDOL;
+ continue;
+
+ case '[':
+ if(&ep[17] >= endbuf)
+ ERROR(50);
+
+ *ep++ = CCL;
+ lc = 0;
+ for(i = 0; i < 16; i++)
+ ep[i] = 0;
+
+ neg = 0;
+ if((c = GETC()) == '^') {
+ neg = 1;
+ c = GETC();
+ }
+
+ do {
+ if(c == '\0' || c == '\n')
+ ERROR(49);
+ if(c == '-' && lc != 0) {
+ if ((c = GETC()) == ']') {
+ PLACE('-');
+ break;
+ }
+ while(lc < c) {
+ PLACE(lc);
+ lc++;
+ }
+ }
+ lc = c;
+ PLACE(c);
+ } while((c = GETC()) != ']');
+ if(neg) {
+ for(cclcnt = 0; cclcnt < 16; cclcnt++)
+ ep[cclcnt] ^= -1;
+ ep[0] &= 0376;
+ }
+
+ ep += 16;
+
+ continue;
+
+ case '\\':
+ switch(c = GETC()) {
+
+ case '(':
+ if(nbra >= NBRA)
+ ERROR(43);
+ *bracketp++ = nbra;
+ *ep++ = CBRA;
+ *ep++ = nbra++;
+ continue;
+
+ case ')':
+ if(bracketp <= bracket || ++ebra != nbra)
+ ERROR(42);
+ *ep++ = CKET;
+ *ep++ = *--bracketp;
+ closed++;
+ continue;
+
+ case '{':
+ if(lastep == (char *) (0))
+ goto defchar;
+ *lastep |= RNGE;
+ cflg = 0;
+ nlim:
+ c = GETC();
+ i = 0;
+ do {
+ if ('0' <= c && c <= '9')
+ i = 10 * i + c - '0';
+ else
+ ERROR(16);
+ } while(((c = GETC()) != '\\') && (c != ','));
+ if (i > 255)
+ ERROR(11);
+ *ep++ = i;
+ if (c == ',') {
+ if(cflg++)
+ ERROR(44);
+ if((c = GETC()) == '\\')
+ *ep++ = 255;
+ else {
+ UNGETC(c);
+ goto nlim; /* get 2'nd number */
+ }
+ }
+ if(GETC() != '}')
+ ERROR(45);
+ if(!cflg) /* one number */
+ *ep++ = i;
+ else if((ep[-1] & 0377) < (ep[-2] & 0377))
+ ERROR(46);
+ continue;
+
+ case '\n':
+ ERROR(36);
+
+ case 'n':
+ c = '\n';
+ goto defchar;
+
+ default:
+ if(c >= '1' && c <= '9') {
+ if((c -= '1') >= closed)
+ ERROR(25);
+ *ep++ = CBACK;
+ *ep++ = c;
+ continue;
+ }
+ }
+ /* Drop through to default to use \ to turn off special chars */
+
+ defchar:
+ default:
+ lastep = ep;
+ *ep++ = CCHR;
+ *ep++ = c;
+ }
+ }
+}
+
+step(p1, p2)
+register char *p1, *p2;
+{
+ register c;
+
+ if (circf) {
+ loc1 = p1;
+ return(advance(p1, p2));
+ }
+ /* fast check for first character */
+ if (*p2==CCHR) {
+ c = p2[1];
+ do {
+ if (*p1 != c)
+ continue;
+ if (advance(p1, p2)) {
+ loc1 = p1;
+ return(1);
+ }
+ } while (*p1++);
+ return(0);
+ }
+ /* regular algorithm */
+ do {
+ if (advance(p1, p2)) {
+ loc1 = p1;
+ return(1);
+ }
+ } while (*p1++);
+ return(0);
+}
+
+advance(lp, ep)
+register char *lp, *ep;
+{
+ register char *curlp;
+ char c;
+ char *bbeg;
+ int ct;
+
+ for (;;) switch (*ep++) {
+
+ case CCHR:
+ if (*ep++ == *lp++)
+ continue;
+ return(0);
+
+ case CDOT:
+ if (*lp++)
+ continue;
+ return(0);
+
+ case CDOL:
+ if (*lp==0)
+ continue;
+ return(0);
+
+ case CEOF:
+ loc2 = lp;
+ return(1);
+
+ case CCL:
+ c = *lp++ & 0177;
+ if(ISTHERE(c)) {
+ ep += 16;
+ continue;
+ }
+ return(0);
+ case CBRA:
+ braslist[*ep++] = lp;
+ continue;
+
+ case CKET:
+ braelist[*ep++] = lp;
+ continue;
+
+ case CCHR|RNGE:
+ c = *ep++;
+ getrnge(ep);
+ while(low--)
+ if(*lp++ != c)
+ return(0);
+ curlp = lp;
+ while(size--)
+ if(*lp++ != c)
+ break;
+ if(size < 0)
+ lp++;
+ ep += 2;
+ goto star;
+
+ case CDOT|RNGE:
+ getrnge(ep);
+ while(low--)
+ if(*lp++ == '\0')
+ return(0);
+ curlp = lp;
+ while(size--)
+ if(*lp++ == '\0')
+ break;
+ if(size < 0)
+ lp++;
+ ep += 2;
+ goto star;
+
+ case CCL|RNGE:
+ getrnge(ep + 16);
+ while(low--) {
+ c = *lp++ & 0177;
+ if(!ISTHERE(c))
+ return(0);
+ }
+ curlp = lp;
+ while(size--) {
+ c = *lp++ & 0177;
+ if(!ISTHERE(c))
+ break;
+ }
+ if(size < 0)
+ lp++;
+ ep += 18; /* 16 + 2 */
+ goto star;
+
+ case CBACK:
+ bbeg = braslist[*ep];
+ ct = braelist[*ep++] - bbeg;
+
+ if(ecmp(bbeg, lp, ct)) {
+ lp += ct;
+ continue;
+ }
+ return(0);
+
+ case CBACK|STAR:
+ bbeg = braslist[*ep];
+ ct = braelist[*ep++] - bbeg;
+ curlp = lp;
+ while(ecmp(bbeg, lp, ct))
+ lp += ct;
+
+ while(lp >= curlp) {
+ if(advance(lp, ep)) return(1);
+ lp -= ct;
+ }
+ return(0);
+
+
+ case CDOT|STAR:
+ curlp = lp;
+ while (*lp++);
+ goto star;
+
+ case CCHR|STAR:
+ curlp = lp;
+ while (*lp++ == *ep);
+ ep++;
+ goto star;
+
+ case CCL|STAR:
+ curlp = lp;
+ do {
+ c = *lp++ & 0177;
+ } while(ISTHERE(c));
+ ep += 16;
+ goto star;
+
+ star:
+ do {
+ if(--lp == locs)
+ break;
+ if (advance(lp, ep))
+ return(1);
+ } while (lp > curlp);
+ return(0);
+
+ }
+}
+
+getrnge(str)
+register char *str;
+{
+ low = *str++ & 0377;
+ size = *str == 255 ? 20000 : (*str &0377) - low;
+}
+
+ecmp(a, b, count)
+register char *a, *b;
+register count;
+{
+ while(count--)
+ if(*a++ != *b++) return(0);
+ return(1);
+}