diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2016-04-28 20:53:53 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2016-04-28 20:53:53 +0200 |
commit | 4c21da2ededfb5fc628dbee0d75b0aa5263ef97b (patch) | |
tree | 1d3e8ab7c177b55c74e03c5f4c26f31b523abfbc /sys/src/cmd/upas | |
parent | ca5b491753ad363da8d9deb5978b56231ec27bf5 (diff) | |
parent | eb168924840977fe0941d4fc90e56db0d484a6e4 (diff) |
merge
Diffstat (limited to 'sys/src/cmd/upas')
-rw-r--r-- | sys/src/cmd/upas/bayes/addhash.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/bayes.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/dfa.c | 4 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/dump.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/msgtok.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/regcomp.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/regcomp.h | 63 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/regen.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/upas/bayes/regexp.h | 66 |
9 files changed, 137 insertions, 8 deletions
diff --git a/sys/src/cmd/upas/bayes/addhash.c b/sys/src/cmd/upas/bayes/addhash.c index 91fb90dc3..b3f843ac3 100644 --- a/sys/src/cmd/upas/bayes/addhash.c +++ b/sys/src/cmd/upas/bayes/addhash.c @@ -1,7 +1,7 @@ #include <u.h> #include <libc.h> #include <bio.h> -#include <regexp.h> +#include "regexp.h" #include "hash.h" Hash hash; diff --git a/sys/src/cmd/upas/bayes/bayes.c b/sys/src/cmd/upas/bayes/bayes.c index a04042906..e4ebad736 100644 --- a/sys/src/cmd/upas/bayes/bayes.c +++ b/sys/src/cmd/upas/bayes/bayes.c @@ -1,7 +1,7 @@ #include <u.h> #include <libc.h> #include <bio.h> -#include <regexp.h> +#include "regexp.h" #include "hash.h" enum diff --git a/sys/src/cmd/upas/bayes/dfa.c b/sys/src/cmd/upas/bayes/dfa.c index 533d5958a..9c64c8cc2 100644 --- a/sys/src/cmd/upas/bayes/dfa.c +++ b/sys/src/cmd/upas/bayes/dfa.c @@ -2,8 +2,8 @@ #include <libc.h> #include <bin.h> #include <bio.h> -#include <regexp.h> -#include "/sys/src/libregexp/regcomp.h" +#include "regexp.h" +#include "regcomp.h" #include "dfa.h" void rdump(Reprog*); diff --git a/sys/src/cmd/upas/bayes/dump.c b/sys/src/cmd/upas/bayes/dump.c index 9c7babd35..3753a9095 100644 --- a/sys/src/cmd/upas/bayes/dump.c +++ b/sys/src/cmd/upas/bayes/dump.c @@ -1,7 +1,7 @@ #include <u.h> #include <libc.h> #include <bio.h> -#include <regexp.h> +#include "regexp.h" #include "/sys/src/libregexp/regcomp.h" #include "dfa.h" diff --git a/sys/src/cmd/upas/bayes/msgtok.c b/sys/src/cmd/upas/bayes/msgtok.c index cbba30dc8..9f640df04 100644 --- a/sys/src/cmd/upas/bayes/msgtok.c +++ b/sys/src/cmd/upas/bayes/msgtok.c @@ -7,7 +7,7 @@ #include <u.h> #include <libc.h> #include <bio.h> -#include <regexp.h> +#include "regexp.h" #include <ctype.h> #include "dfa.h" diff --git a/sys/src/cmd/upas/bayes/regcomp.c b/sys/src/cmd/upas/bayes/regcomp.c index 8afaf2157..1ef57a4cb 100644 --- a/sys/src/cmd/upas/bayes/regcomp.c +++ b/sys/src/cmd/upas/bayes/regcomp.c @@ -4,7 +4,7 @@ #include <u.h> #include <libc.h> #include "regexp.h" -#include "/sys/src/libregexp/regcomp.h" +#include "regcomp.h" #define TRUE 1 #define FALSE 0 diff --git a/sys/src/cmd/upas/bayes/regcomp.h b/sys/src/cmd/upas/bayes/regcomp.h new file mode 100644 index 000000000..402fe7d5c --- /dev/null +++ b/sys/src/cmd/upas/bayes/regcomp.h @@ -0,0 +1,63 @@ +/* + * substitution list + */ +#define NSUBEXP 32 +typedef struct Resublist Resublist; +struct Resublist +{ + Resub m[NSUBEXP]; +}; + +/* + * Actions and Tokens (Reinst types) + * + * 02xx are operators, value == precedence + * 03xx are tokens, i.e. operands for operators + */ +#define RUNE 0177 +#define OPERATOR 0200 /* Bitmask of all operators */ +#define START 0200 /* Start, used for marker on stack */ +#define RBRA 0201 /* Right bracket, ) */ +#define LBRA 0202 /* Left bracket, ( */ +#define OR 0203 /* Alternation, | */ +#define CAT 0204 /* Concatentation, implicit operator */ +#define STAR 0205 /* Closure, * */ +#define PLUS 0206 /* a+ == aa* */ +#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ +#define ANY 0300 /* Any character except newline, . */ +#define ANYNL 0301 /* Any character including newline, . */ +#define NOP 0302 /* No operation, internal use only */ +#define BOL 0303 /* Beginning of line, ^ */ +#define EOL 0304 /* End of line, $ */ +#define CCLASS 0305 /* Character class, [] */ +#define NCCLASS 0306 /* Negated character class, [] */ +#define END 0377 /* Terminate: match found */ + +/* + * regexec execution lists + */ +#define LISTSIZE 10 +#define BIGLISTSIZE (25*LISTSIZE) +typedef struct Relist Relist; +struct Relist +{ + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +}; +typedef struct Reljunk Reljunk; +struct Reljunk +{ + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + Rune startchar; + char* starts; + char* eol; + Rune* rstarts; + Rune* reol; +}; + +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/sys/src/cmd/upas/bayes/regen.c b/sys/src/cmd/upas/bayes/regen.c index 4f550095c..9bc7bdc85 100644 --- a/sys/src/cmd/upas/bayes/regen.c +++ b/sys/src/cmd/upas/bayes/regen.c @@ -1,7 +1,7 @@ #include <u.h> #include <libc.h> #include <bio.h> -#include <regexp.h> +#include "regexp.h" #include "dfa.h" /*** diff --git a/sys/src/cmd/upas/bayes/regexp.h b/sys/src/cmd/upas/bayes/regexp.h new file mode 100644 index 000000000..780dc8001 --- /dev/null +++ b/sys/src/cmd/upas/bayes/regexp.h @@ -0,0 +1,66 @@ +#pragma src "/sys/src/oldlibregexp" +#pragma lib "oldlibregexp.a" + +typedef struct Resub Resub; +typedef struct Reclass Reclass; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; + +/* + * Sub expression matches + */ +struct Resub{ + union + { + char *sp; + Rune *rsp; + }; + union + { + char *ep; + Rune *rep; + }; +}; + +/* + * character class, each pair of rune's defines a range + */ +struct Reclass{ + Rune *end; + Rune spans[64]; +}; + +/* + * Machine instructions + */ +struct Reinst{ + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }; +}; + +/* + * Reprogram definition + */ +struct Reprog{ + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +}; + +extern Reprog *regcomp(char*); +extern Reprog *regcomplit(char*); +extern Reprog *regcompnl(char*); +extern void regerror(char*); +extern int regexec(Reprog*, char*, Resub*, int); +extern void regsub(char*, char*, int, Resub*, int); +extern int rregexec(Reprog*, Rune*, Resub*, int); +extern void rregsub(Rune*, Rune*, int, Resub*, int); |