summaryrefslogtreecommitdiff
path: root/sys/src/cmd/wikifs/parse.c
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/wikifs/parse.c
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/wikifs/parse.c')
-rwxr-xr-xsys/src/cmd/wikifs/parse.c331
1 files changed, 331 insertions, 0 deletions
diff --git a/sys/src/cmd/wikifs/parse.c b/sys/src/cmd/wikifs/parse.c
new file mode 100755
index 000000000..dc8924317
--- /dev/null
+++ b/sys/src/cmd/wikifs/parse.c
@@ -0,0 +1,331 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <String.h>
+#include <ctype.h>
+#include <thread.h>
+#include "wiki.h"
+
+static Wpage*
+mkwtxt(int type, char *text)
+{
+ Wpage *w;
+
+ w = emalloc(sizeof(*w));
+ w->type = type;
+ w->text = text;
+ setmalloctag(w, getcallerpc(&type));
+ return w;
+}
+
+/*
+ * turn runs of whitespace into single spaces,
+ * eliminate whitespace at beginning and end.
+ */
+char*
+strcondense(char *s, int cutbegin)
+{
+ char *r, *w, *es;
+ int inspace;
+
+ es = s+strlen(s);
+ inspace = cutbegin;
+ for(r=w=s; *r; r++){
+ if(isspace(*r)){
+ if(!inspace){
+ inspace=1;
+ *w++ = ' ';
+ }
+ }else{
+ inspace=0;
+ *w++ = *r;
+ }
+ }
+ assert(w <= es);
+ if(inspace && w>s){
+ --w;
+ *w = '\0';
+ }
+ else
+ *w = '\0';
+ return s;
+}
+
+/*
+ * turn runs of Wplain into single Wplain.
+ */
+static Wpage*
+wcondense(Wpage *wtxt)
+{
+ Wpage *ow, *w;
+
+ for(w=wtxt; w; ){
+ if(w->type == Wplain)
+ strcondense(w->text, 1);
+
+ if(w->type != Wplain || w->next==nil
+ || w->next->type != Wplain){
+ w=w->next;
+ continue;
+ }
+
+ w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);
+ strcat(w->text, " ");
+ strcat(w->text, w->next->text);
+
+ ow = w->next;
+ w->next = ow->next;
+ ow->next = nil;
+ freepage(ow);
+ }
+ return wtxt;
+}
+
+/*
+ * Parse a link, without the brackets.
+ */
+static Wpage*
+mklink(char *s)
+{
+ char *q;
+ Wpage *w;
+
+ for(q=s; *q && *q != '|'; q++)
+ ;
+
+ if(*q == '\0'){
+ w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
+ w->url = nil;
+ }else{
+ *q = '\0';
+ w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
+ w->url = estrdup(strcondense(q+1, 1));
+ }
+ setmalloctag(w, getcallerpc(&s));
+ return w;
+}
+
+/*
+ * Parse Wplains, inserting Wlink nodes where appropriate.
+ */
+static Wpage*
+wlink(Wpage *wtxt)
+{
+ char *p, *q, *r, *s;
+ Wpage *w, *nw;
+
+ for(w=wtxt; w; w=nw){
+ nw = w->next;
+ if(w->type != Wplain)
+ continue;
+ while(w->text[0]){
+ p = w->text;
+ for(q=p; *q && *q != '['; q++)
+ ;
+ if(*q == '\0')
+ break;
+ for(r=q; *r && *r != ']'; r++)
+ ;
+ if(*r == '\0')
+ break;
+ *q = '\0';
+ *r = '\0';
+ s = w->text;
+ w->text = estrdup(w->text);
+ w->next = mklink(q+1);
+ w = w->next;
+ w->next = mkwtxt(Wplain, estrdup(r+1));
+ free(s);
+ w = w->next;
+ w->next = nw;
+ }
+ assert(w->next == nw);
+ }
+ return wtxt;
+}
+
+static int
+ismanchar(int c)
+{
+ return ('a' <= c && c <= 'z')
+ || ('A' <= c && c <= 'Z')
+ || ('0' <= c && c <= '9')
+ || c=='_' || c=='-' || c=='.' || c=='/'
+ || (c < 0); /* UTF */
+}
+
+static Wpage*
+findmanref(char *p, char **beginp, char **endp)
+{
+ char *q, *r;
+ Wpage *w;
+
+ q=p;
+ for(;;){
+ for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)
+ ;
+ if(*q == '\0')
+ break;
+ for(r=q; r>p && ismanchar(r[-1]); r--)
+ ;
+ if(r==q){
+ q += 3;
+ continue;
+ }
+ *q = '\0';
+ w = mkwtxt(Wman, estrdup(r));
+ *beginp = r;
+ *q = '(';
+ w->section = q[1]-'0';
+ *endp = q+3;
+ setmalloctag(w, getcallerpc(&p));
+ return w;
+ }
+ return nil;
+}
+
+/*
+ * Parse Wplains, looking for man page references.
+ * This should be done by using a plumb(6)-style
+ * control file rather than hard-coding things here.
+ */
+static Wpage*
+wman(Wpage *wtxt)
+{
+ char *q, *r;
+ Wpage *w, *mw, *nw;
+
+ for(w=wtxt; w; w=nw){
+ nw = w->next;
+ if(w->type != Wplain)
+ continue;
+ while(w->text[0]){
+ if((mw = findmanref(w->text, &q, &r)) == nil)
+ break;
+ *q = '\0';
+ w->next = mw;
+ w = w->next;
+ w->next = mkwtxt(Wplain, estrdup(r));
+ w = w->next;
+ w->next = nw;
+ }
+ assert(w->next == nw);
+ }
+ return wtxt;
+}
+
+static int isheading(char *p) {
+ Rune r;
+ int hasupper=0;
+ while(*p) {
+ p+=chartorune(&r,p);
+ if(isupperrune(r))
+ hasupper=1;
+ else if(islowerrune(r))
+ return 0;
+ }
+ return hasupper;
+}
+
+Wpage*
+Brdpage(char *(*rdline)(void*,int), void *b)
+{
+ char *p, *c;
+ int waspara;
+ Wpage *w, **pw;
+
+ w = nil;
+ pw = &w;
+ waspara = 1;
+ while((p = rdline(b, '\n')) != nil){
+ if(p[0] != '!')
+ p = strcondense(p, 1);
+ if(p[0] == '\0'){
+ if(waspara==0){
+ waspara=1;
+ *pw = mkwtxt(Wpara, nil);
+ pw = &(*pw)->next;
+ }
+ continue;
+ }
+ waspara = 0;
+ switch(p[0]){
+ case '*':
+ *pw = mkwtxt(Wbullet, nil);
+ pw = &(*pw)->next;
+ *pw = mkwtxt(Wplain, estrdup(p+1));
+ pw = &(*pw)->next;
+ break;
+ case '!':
+ *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));
+ pw = &(*pw)->next;
+ break;
+ case '-':
+ for(c = p; *c != '\0'; c++) {
+ if(*c != '-') {
+ c = p;
+ break;
+ }
+ }
+
+ if( (c-p) > 4) {
+ *pw = mkwtxt(Whr, nil);
+ pw = &(*pw)->next;
+ break;
+ }
+ /* else fall thru */
+ default:
+ if(isheading(p)){
+ *pw = mkwtxt(Wheading, estrdup(p));
+ pw = &(*pw)->next;
+ continue;
+ }
+ *pw = mkwtxt(Wplain, estrdup(p));
+ pw = &(*pw)->next;
+ break;
+ }
+ }
+ if(w == nil)
+ werrstr("empty page");
+
+ *pw = nil;
+ w = wcondense(w);
+ w = wlink(w);
+ w = wman(w);
+ setmalloctag(w, getcallerpc(&rdline));
+
+ return w;
+}
+
+void
+printpage(Wpage *w)
+{
+ for(; w; w=w->next){
+ switch(w->type){
+ case Wpara:
+ print("para\n");
+ break;
+ case Wheading:
+ print("heading '%s'\n", w->text);
+ break;
+ case Wbullet:
+ print("bullet\n");
+ break;
+ case Wlink:
+ print("link '%s' '%s'\n", w->text, w->url);
+ break;
+ case Wman:
+ print("man %d %s\n", w->section, w->text);
+ break;
+ case Wplain:
+ print("plain '%s'\n", w->text);
+ break;
+ case Whr:
+ print("hr\n");
+ break;
+ case Wpre:
+ print("pre '%s'\n", w->text);
+ break;
+ }
+ }
+}