diff options
author | ben <ben@rana> | 2016-04-26 22:26:03 -0500 |
---|---|---|
committer | ben <ben@rana> | 2016-04-26 22:26:03 -0500 |
commit | 0f8168038af32828fcdc39575dea0e4de0c01122 (patch) | |
tree | 58c3e51227504adb0fce952dee899363747ade89 /sys/src/cmd/upas | |
parent | 0a460e1722c50e31653359f8a86fe0b606d2b513 (diff) |
remove old libregexp files; add headers for upas/bayes
Diffstat (limited to 'sys/src/cmd/upas')
-rw-r--r-- | sys/src/cmd/upas/bayes/regcomp.h | 63 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/regexp.h | 66 |
2 files changed, 129 insertions, 0 deletions
diff --git a/sys/src/cmd/upas/bayes/regcomp.h b/sys/src/cmd/upas/bayes/regcomp.h new file mode 100644 index 000000000..402fe7d5c --- /dev/null +++ b/sys/src/cmd/upas/bayes/regcomp.h @@ -0,0 +1,63 @@ +/* + * substitution list + */ +#define NSUBEXP 32 +typedef struct Resublist Resublist; +struct Resublist +{ + Resub m[NSUBEXP]; +}; + +/* + * Actions and Tokens (Reinst types) + * + * 02xx are operators, value == precedence + * 03xx are tokens, i.e. operands for operators + */ +#define RUNE 0177 +#define OPERATOR 0200 /* Bitmask of all operators */ +#define START 0200 /* Start, used for marker on stack */ +#define RBRA 0201 /* Right bracket, ) */ +#define LBRA 0202 /* Left bracket, ( */ +#define OR 0203 /* Alternation, | */ +#define CAT 0204 /* Concatentation, implicit operator */ +#define STAR 0205 /* Closure, * */ +#define PLUS 0206 /* a+ == aa* */ +#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ +#define ANY 0300 /* Any character except newline, . */ +#define ANYNL 0301 /* Any character including newline, . */ +#define NOP 0302 /* No operation, internal use only */ +#define BOL 0303 /* Beginning of line, ^ */ +#define EOL 0304 /* End of line, $ */ +#define CCLASS 0305 /* Character class, [] */ +#define NCCLASS 0306 /* Negated character class, [] */ +#define END 0377 /* Terminate: match found */ + +/* + * regexec execution lists + */ +#define LISTSIZE 10 +#define BIGLISTSIZE (25*LISTSIZE) +typedef struct Relist Relist; +struct Relist +{ + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +}; +typedef struct Reljunk Reljunk; +struct Reljunk +{ + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + Rune startchar; + char* starts; + char* eol; + Rune* rstarts; + Rune* reol; +}; + +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/sys/src/cmd/upas/bayes/regexp.h b/sys/src/cmd/upas/bayes/regexp.h new file mode 100644 index 000000000..780dc8001 --- /dev/null +++ b/sys/src/cmd/upas/bayes/regexp.h @@ -0,0 +1,66 @@ +#pragma src "/sys/src/oldlibregexp" +#pragma lib "oldlibregexp.a" + +typedef struct Resub Resub; +typedef struct Reclass Reclass; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; + +/* + * Sub expression matches + */ +struct Resub{ + union + { + char *sp; + Rune *rsp; + }; + union + { + char *ep; + Rune *rep; + }; +}; + +/* + * character class, each pair of rune's defines a range + */ +struct Reclass{ + Rune *end; + Rune spans[64]; +}; + +/* + * Machine instructions + */ +struct Reinst{ + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }; +}; + +/* + * Reprogram definition + */ +struct Reprog{ + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +}; + +extern Reprog *regcomp(char*); +extern Reprog *regcomplit(char*); +extern Reprog *regcompnl(char*); +extern void regerror(char*); +extern int regexec(Reprog*, char*, Resub*, int); +extern void regsub(char*, char*, int, Resub*, int); +extern int rregexec(Reprog*, Rune*, Resub*, int); +extern void rregsub(Rune*, Rune*, int, Resub*, int); |