diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/webfs |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/webfs')
-rwxr-xr-x | sys/src/cmd/webfs/buf.c | 89 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/client.c | 394 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/cookies.c | 1173 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/dat.h | 103 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/fns.h | 62 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/fs.c | 616 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/http.c | 539 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/io.c | 84 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/main.c | 67 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/mkfile | 35 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/plumb.c | 165 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/url.c | 1092 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/util.c | 86 | ||||
-rwxr-xr-x | sys/src/cmd/webfs/webget.c | 87 |
14 files changed, 4592 insertions, 0 deletions
diff --git a/sys/src/cmd/webfs/buf.c b/sys/src/cmd/webfs/buf.c new file mode 100755 index 000000000..ffd249407 --- /dev/null +++ b/sys/src/cmd/webfs/buf.c @@ -0,0 +1,89 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ip.h> +#include <plumb.h> +#include <thread.h> +#include <fcall.h> +#include <9p.h> +#include "dat.h" +#include "fns.h" + +void +initibuf(Ibuf *b, Ioproc *io, int fd) +{ + b->fd = fd; + b->io = io; + b->rp = b->wp = b->buf; +} + +int +readibuf(Ibuf *b, char *buf, int len) +{ + int n; + + n = b->wp - b->rp; + if(n > 0){ + if(n > len) + n = len; + memmove(buf, b->rp, n); + b->rp += n; + return n; + } + return ioreadn(b->io, b->fd, buf, len); +} + +void +unreadline(Ibuf *b, char *line) +{ + int i, n; + + i = strlen(line); + n = b->wp - b->rp; + memmove(&b->buf[i+1], b->rp, n); + memmove(b->buf, line, i); + b->buf[i] = '\n'; + b->rp = b->buf; + b->wp = b->rp+i+1+n; +} + +int +readline(Ibuf *b, char *buf, int len) +{ + int n; + char *p; + + len--; + + for(p = buf;;){ + if(b->rp >= b->wp){ + n = ioread(b->io, b->fd, b->wp, sizeof(b->buf)/2); + if(n < 0) + return -1; + if(n == 0) + break; + b->wp += n; + } + n = *b->rp++; + if(len > 0){ + *p++ = n; + len--; + } + if(n == '\n') + break; + } + + /* drop trailing white */ + for(;;){ + if(p <= buf) + break; + n = *(p-1); + if(n != ' ' && n != '\t' && n != '\r' && n != '\n') + break; + p--; + } + + *p = 0; + return p-buf; +} + diff --git a/sys/src/cmd/webfs/client.c b/sys/src/cmd/webfs/client.c new file mode 100755 index 000000000..c133adbc2 --- /dev/null +++ b/sys/src/cmd/webfs/client.c @@ -0,0 +1,394 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ip.h> +#include <plumb.h> +#include <thread.h> +#include <fcall.h> +#include <9p.h> +#include "dat.h" +#include "fns.h" + +int nclient; +Client **client; + +static void clientthread(void*); +int +newclient(int plumbed) +{ + int i; + Client *c; + + for(i=0; i<nclient; i++) + if(client[i]->ref==0) + return i; + + c = emalloc(sizeof(Client)); + c->plumbed = plumbed; + c->creq = chancreate(sizeof(Req*), 8); + threadcreate(clientthread, c, STACK); + + c->io = ioproc(); + c->num = nclient; + c->ctl = globalctl; + clonectl(&c->ctl); + if(nclient%16 == 0) + client = erealloc(client, (nclient+16)*sizeof(client[0])); + client[nclient++] = c; + return nclient-1; +} + +void +closeclient(Client *c) +{ + if(--c->ref == 0){ + if(c->bodyopened){ + if(c->url && c->url->close) + (*c->url->close)(c); + c->bodyopened = 0; + } + free(c->contenttype); + c->contenttype = nil; + free(c->postbody); + c->postbody = nil; + freeurl(c->url); + c->url = nil; + free(c->redirect); + c->redirect = nil; + free(c->authenticate); + c->authenticate = nil; + c->npostbody = 0; + c->havepostbody = 0; + c->bodyopened = 0; + } +} + +void +clonectl(Ctl *c) +{ + if(c->useragent) + c->useragent = estrdup(c->useragent); +} + +void +clientbodyopen(Client *c, Req *r) +{ + char e[ERRMAX], *next; + int i, nauth; + Url *u; + + nauth = 0; + next = nil; + for(i=0; i<=c->ctl.redirectlimit; i++){ + if(c->url == nil){ + werrstr("nil url"); + goto Error; + } + if(c->url->open == nil){ + werrstr("unsupported url type"); + goto Error; + } + if(fsdebug) + fprint(2, "try %s\n", c->url->url); + if(c->url->open(c, c->url) < 0){ + Error: + if(next) + fprint(2, "next %s (but for error)\n", next); + free(next); + rerrstr(e, sizeof e); + c->iobusy = 0; + if(r != nil) + r->fid->omode = -1; + closeclient(c); /* not opening */ + if(r != nil) + respond(r, e); + return; + } + if (c->authenticate && nauth++ < 1) + continue; + if(!c->redirect) + break; + next = c->redirect; + c->redirect = nil; + if(i==c->ctl.redirectlimit){ + werrstr("redirect limit reached"); + goto Error; + } + if((u = parseurl(next, c->url)) == nil) + goto Error; + if(urldebug) + fprint(2, "parseurl %s got scheme %d\n", next, u->ischeme); + if(u->ischeme == USunknown){ + werrstr("redirect with unknown URL scheme"); + goto Error; + } + if(u->ischeme == UScurrent){ + werrstr("redirect to URL relative to current document"); + goto Error; + } + freeurl(c->url); + c->url = u; + } + free(next); + c->iobusy = 0; + if(r != nil) + respond(r, nil); +} + +void +plumburl(char *url, char *base) +{ + int i; + Client *c; + Url *ubase, *uurl; + + ubase = nil; + if(base){ + ubase = parseurl(base, nil); + if(ubase == nil) + return; + } + uurl = parseurl(url, ubase); + if(uurl == nil){ + freeurl(ubase); + return; + } + i = newclient(1); + c = client[i]; + c->ref++; + c->baseurl = ubase; + c->url = uurl; + sendp(c->creq, nil); +} + +void +clientbodyread(Client *c, Req *r) +{ + char e[ERRMAX]; + + if(c->url->read == nil){ + respond(r, "unsupported url type"); + return; + } + if(c->url->read(c, r) < 0){ + rerrstr(e, sizeof e); + c->iobusy = 0; + respond(r, e); + return; + } + c->iobusy = 0; + respond(r, nil); +} + +static void +clientthread(void *a) +{ + Client *c; + Req *r; + + c = a; + if(c->plumbed) { + recvp(c->creq); + if(c->url == nil){ + fprint(2, "bad url got plumbed\n"); + return; + } + clientbodyopen(c, nil); + replumb(c); + } + while((r = recvp(c->creq)) != nil){ + if(fsdebug) + fprint(2, "clientthread %F\n", &r->ifcall); + switch(r->ifcall.type){ + case Topen: + if(c->plumbed) { + c->plumbed = 0; + c->ref--; /* from plumburl() */ + respond(r, nil); + } + else + clientbodyopen(c, r); + break; + case Tread: + clientbodyread(c, r); + break; + case Tflush: + respond(r, nil); + } + if(fsdebug) + fprint(2, "clientthread finished req\n"); + } +} + +enum +{ + Bool, + Int, + String, + XUrl, + Fn, +}; + +typedef struct Ctab Ctab; +struct Ctab { + char *name; + int type; + void *offset; +}; + +Ctab ctltab[] = { + "acceptcookies", Bool, (void*)offsetof(Ctl, acceptcookies), + "sendcookies", Bool, (void*)offsetof(Ctl, sendcookies), + "redirectlimit", Int, (void*)offsetof(Ctl, redirectlimit), + "useragent", String, (void*)offsetof(Ctl, useragent), +}; + +Ctab globaltab[] = { + "chatty9p", Int, &chatty9p, + "fsdebug", Int, &fsdebug, + "cookiedebug", Int, &cookiedebug, + "urldebug", Int, &urldebug, + "httpdebug", Int, &httpdebug, +}; + +Ctab clienttab[] = { + "baseurl", XUrl, (void*)offsetof(Client, baseurl), + "url", XUrl, (void*)offsetof(Client, url), +}; + +static Ctab* +findcmd(char *cmd, Ctab *tab, int ntab) +{ + int i; + + for(i=0; i<ntab; i++) + if(strcmp(tab[i].name, cmd) == 0) + return &tab[i]; + return nil; +} + +static void +parseas(Req *r, char *arg, int type, void *a) +{ + Url *u; + char e[ERRMAX]; + + switch(type){ + case Bool: + if(strcmp(arg, "on")==0 || strcmp(arg, "1")==0) + *(int*)a = 1; + else + *(int*)a = 0; + break; + case String: + free(*(char**)a); + *(char**)a = estrdup(arg); + break; + case XUrl: + u = parseurl(arg, nil); + if(u == nil){ + snprint(e, sizeof e, "parseurl: %r"); + respond(r, e); + return; + } + freeurl(*(Url**)a); + *(Url**)a = u; + break; + case Int: + if(strcmp(arg, "on")==0) + *(int*)a = 1; + else + *(int*)a = atoi(arg); + break; + } + respond(r, nil); +} + +int +ctlwrite(Req *r, Ctl *ctl, char *cmd, char *arg) +{ + void *a; + Ctab *t; + + if((t = findcmd(cmd, ctltab, nelem(ctltab))) == nil) + return 0; + a = (void*)((uintptr)ctl+(uintptr)t->offset); + parseas(r, arg, t->type, a); + return 1; +} + +int +clientctlwrite(Req *r, Client *c, char *cmd, char *arg) +{ + void *a; + Ctab *t; + + if((t = findcmd(cmd, clienttab, nelem(clienttab))) == nil) + return 0; + a = (void*)((uintptr)c+(uintptr)t->offset); + parseas(r, arg, t->type, a); + return 1; +} + +int +globalctlwrite(Req *r, char *cmd, char *arg) +{ + void *a; + Ctab *t; + + if((t = findcmd(cmd, globaltab, nelem(globaltab))) == nil) + return 0; + a = t->offset; + parseas(r, arg, t->type, a); + return 1; +} + +static void +ctlfmt(Ctl *c, char *s) +{ + int i; + void *a; + char *t; + + for(i=0; i<nelem(ctltab); i++){ + a = (void*)((uintptr)c+(uintptr)ctltab[i].offset); + switch(ctltab[i].type){ + case Bool: + s += sprint(s, "%s %s\n", ctltab[i].name, *(int*)a ? "on" : "off"); + break; + case Int: + s += sprint(s, "%s %d\n", ctltab[i].name, *(int*)a); + break; + case String: + t = *(char**)a; + if(t != nil) + s += sprint(s, "%s %.*s%s\n", ctltab[i].name, utfnlen(t, 100), t, strlen(t)>100 ? "..." : ""); + break; + } + } +} + +void +ctlread(Req *r, Client *c) +{ + char buf[1024]; + + sprint(buf, "%11d \n", c->num); + ctlfmt(&c->ctl, buf+strlen(buf)); + readstr(r, buf); + respond(r, nil); +} + +void +globalctlread(Req *r) +{ + char buf[1024], *s; + int i; + + s = buf; + for(i=0; i<nelem(globaltab); i++) + s += sprint(s, "%s %d\n", globaltab[i].name, *(int*)globaltab[i].offset); + ctlfmt(&globalctl, s); + readstr(r, buf); + respond(r, nil); +} diff --git a/sys/src/cmd/webfs/cookies.c b/sys/src/cmd/webfs/cookies.c new file mode 100755 index 000000000..6028bde92 --- /dev/null +++ b/sys/src/cmd/webfs/cookies.c @@ -0,0 +1,1173 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ndb.h> +#include <fcall.h> +#include <thread.h> +#include <9p.h> +#include <ctype.h> +#include "dat.h" +#include "fns.h" + +int cookiedebug; + +typedef struct Cookie Cookie; +typedef struct Jar Jar; + +struct Cookie +{ + /* external info */ + char* name; + char* value; + char* dom; /* starts with . */ + char* path; + char* version; + char* comment; /* optional, may be nil */ + + uint expire; /* time of expiration: ~0 means when webcookies dies */ + int secure; + int explicitdom; /* dom was explicitly set */ + int explicitpath; /* path was explicitly set */ + int netscapestyle; + + /* internal info */ + int deleted; + int mark; + int ondisk; +}; + +struct Jar +{ + Cookie *c; + int nc; + int mc; + + Qid qid; + int dirty; + char *file; + char *lockfile; +}; + +struct { + char *s; + int offset; + int ishttp; +} stab[] = { + "domain", offsetof(Cookie, dom), 1, + "path", offsetof(Cookie, path), 1, + "name", offsetof(Cookie, name), 0, + "value", offsetof(Cookie, value), 0, + "comment", offsetof(Cookie, comment), 1, + "version", offsetof(Cookie, version), 1, +}; + +struct { + char *s; + int offset; +} itab[] = { + "expire", offsetof(Cookie, expire), + "secure", offsetof(Cookie, secure), + "explicitdomain", offsetof(Cookie, explicitdom), + "explicitpath", offsetof(Cookie, explicitpath), + "netscapestyle", offsetof(Cookie, netscapestyle), +}; + +#pragma varargck type "J" Jar* +#pragma varargck type "K" Cookie* + +/* HTTP format */ +static int +jarfmt(Fmt *fp) +{ + int i; + Jar *jar; + + jar = va_arg(fp->args, Jar*); + + if(jar == nil || jar->nc == 0) + return 0; + + fmtstrcpy(fp, "Cookie: "); + if(jar->c[0].version) + fmtprint(fp, "$Version=%s; ", jar->c[0].version); + for(i=0; i<jar->nc; i++) + fmtprint(fp, "%s%s=%s", i ? "; ": "", jar->c[i].name, jar->c[i].value); + fmtstrcpy(fp, "\r\n"); + return 0; +} + +/* individual cookie */ +static int +cookiefmt(Fmt *fp) +{ + int j, k, first; + char *t; + Cookie *c; + + c = va_arg(fp->args, Cookie*); + + first = 1; + for(j=0; j<nelem(stab); j++){ + t = *(char**)((uintptr)c+stab[j].offset); + if(t == nil) + continue; + if(first) + first = 0; + else + fmtstrcpy(fp, " "); + fmtprint(fp, "%s=%q", stab[j].s, t); + } + for(j=0; j<nelem(itab); j++){ + k = *(int*)((uintptr)c+itab[j].offset); + if(k == 0) + continue; + if(first) + first = 0; + else + fmtstrcpy(fp, " "); + fmtprint(fp, "%s=%ud", itab[j].s, k); + } + return 0; +} + +/* + * sort cookies: + * - alpha by name + * - alpha by domain + * - longer paths first, then alpha by path (RFC2109 4.3.4) + */ +static int +cookiecmp(Cookie *a, Cookie *b) +{ + int i; + + if((i = strcmp(a->name, b->name)) != 0) + return i; + if((i = cistrcmp(a->dom, b->dom)) != 0) + return i; + if((i = strlen(b->path) - strlen(a->path)) != 0) + return i; + if((i = strcmp(a->path, b->path)) != 0) + return i; + return 0; +} + +static int +exactcookiecmp(Cookie *a, Cookie *b) +{ + int i; + + if((i = cookiecmp(a, b)) != 0) + return i; + if((i = strcmp(a->value, b->value)) != 0) + return i; + if(a->version || b->version){ + if(!a->version) + return -1; + if(!b->version) + return 1; + if((i = strcmp(a->version, b->version)) != 0) + return i; + } + if(a->comment || b->comment){ + if(!a->comment) + return -1; + if(!b->comment) + return 1; + if((i = strcmp(a->comment, b->comment)) != 0) + return i; + } + if((i = b->expire - a->expire) != 0) + return i; + if((i = b->secure - a->secure) != 0) + return i; + if((i = b->explicitdom - a->explicitdom) != 0) + return i; + if((i = b->explicitpath - a->explicitpath) != 0) + return i; + if((i = b->netscapestyle - a->netscapestyle) != 0) + return i; + + return 0; +} + +static void +freecookie(Cookie *c) +{ + int i; + + for(i=0; i<nelem(stab); i++) + free(*(char**)((uintptr)c+stab[i].offset)); +} + +static void +copycookie(Cookie *c) +{ + int i; + char **ps; + + for(i=0; i<nelem(stab); i++){ + ps = (char**)((uintptr)c+stab[i].offset); + if(*ps) + *ps = estrdup9p(*ps); + } +} + +static void +delcookie(Jar *j, Cookie *c) +{ + int i; + + j->dirty = 1; + i = c - j->c; + if(i < 0 || i >= j->nc) + abort(); + c->deleted = 1; +} + +static void +addcookie(Jar *j, Cookie *c) +{ + int i; + + if(!c->name || !c->value || !c->path || !c->dom){ + fprint(2, "not adding incomplete cookie\n"); + return; + } + + if(cookiedebug) + fprint(2, "add %K\n", c); + + for(i=0; i<j->nc; i++) + if(cookiecmp(&j->c[i], c) == 0){ + if(cookiedebug) + fprint(2, "cookie %K matches %K\n", &j->c[i], c); + if(exactcookiecmp(&j->c[i], c) == 0){ + if(cookiedebug) + fprint(2, "\texactly\n"); + j->c[i].mark = 0; + return; + } + delcookie(j, &j->c[i]); + } + + j->dirty = 1; + if(j->nc == j->mc){ + j->mc += 16; + j->c = erealloc9p(j->c, j->mc*sizeof(Cookie)); + } + j->c[j->nc] = *c; + copycookie(&j->c[j->nc]); + j->nc++; +} + +static void +purgejar(Jar *j) +{ + int i; + + for(i=j->nc-1; i>=0; i--){ + if(!j->c[i].deleted) + continue; + freecookie(&j->c[i]); + --j->nc; + j->c[i] = j->c[j->nc]; + } +} + +static void +addtojar(Jar *jar, char *line, int ondisk) +{ + Cookie c; + int i, j, nf, *pint; + char *f[20], *attr, *val, **pstr; + + memset(&c, 0, sizeof c); + c.expire = ~0; + c.ondisk = ondisk; + nf = tokenize(line, f, nelem(f)); + for(i=0; i<nf; i++){ + attr = f[i]; + if((val = strchr(attr, '=')) != nil) + *val++ = '\0'; + else + val = ""; + /* string attributes */ + for(j=0; j<nelem(stab); j++){ + if(strcmp(stab[j].s, attr) == 0){ + pstr = (char**)((uintptr)&c+stab[j].offset); + *pstr = val; + } + } + /* integer attributes */ + for(j=0; j<nelem(itab); j++){ + if(strcmp(itab[j].s, attr) == 0){ + pint = (int*)((uintptr)&c+itab[j].offset); + if(val[0]=='\0') + *pint = 1; + else + *pint = strtoul(val, 0, 0); + } + } + } + if(c.name==nil || c.value==nil || c.dom==nil || c.path==nil){ + if(cookiedebug) + fprint(2, "ignoring fractional cookie %K\n", &c); + return; + } + addcookie(jar, &c); +} + +static Jar* +newjar(void) +{ + Jar *jar; + + jar = emalloc9p(sizeof(Jar)); + return jar; +} + +static int +expirejar(Jar *jar, int exiting) +{ + int i, n; + uint now; + + now = time(0); + n = 0; + for(i=0; i<jar->nc; i++){ + if(jar->c[i].expire < now || (exiting && jar->c[i].expire==~0)){ + delcookie(jar, &jar->c[i]); + n++; + } + } + return n; +} + +static void +dumpjar(Jar *jar, char *desc) +{ + int i; + Biobuf *b; + char *s; + + print("%s\n", desc); + print("\tin memory:\n"); + + for(i=0; i<jar->nc; i++) + print("\t%K%s%s%s\n", &jar->c[i], + jar->c[i].ondisk ? " ondisk" : "", + jar->c[i].deleted ? " deleted" : "", + jar->c[i].mark ? " mark" : ""); + print("\n\ton disk:\n"); + if((b = Bopen(jar->file, OREAD)) == nil){ + print("\tno file\n"); + }else{ + while((s = Brdstr(b, '\n', 1)) != nil){ + print("\t%s\n", s); + free(s); + } + Bterm(b); + } + print("\n"); +} + +static int +syncjar(Jar *jar) +{ + int i, fd; + char *line; + Dir *d; + Biobuf *b; + Qid q; + + if(jar->file==nil) + return 0; + + memset(&q, 0, sizeof q); + if((d = dirstat(jar->file)) != nil){ + q = d->qid; + if(d->qid.path != jar->qid.path || d->qid.vers != jar->qid.vers) + jar->dirty = 1; + free(d); + } + + if(jar->dirty == 0) + return 0; + + fd = -1; + for(i=0; i<50; i++){ + if((fd = create(jar->lockfile, OWRITE, DMEXCL|0666)) < 0){ + sleep(100); + continue; + } + break; + } + if(fd < 0){ + if(cookiedebug) + fprint(2, "open %s: %r", jar->lockfile); + werrstr("cannot acquire jar lock: %r"); + return -1; + } + + for(i=0; i<jar->nc; i++) /* mark is cleared by addcookie */ + jar->c[i].mark = jar->c[i].ondisk; + + if((b = Bopen(jar->file, OREAD)) == nil){ + if(cookiedebug) + fprint(2, "Bopen %s: %r", jar->file); + werrstr("cannot read cookie file %s: %r", jar->file); + close(fd); + return -1; + } + for(; (line = Brdstr(b, '\n', 1)) != nil; free(line)){ + if(*line == '#') + continue; + addtojar(jar, line, 1); + } + Bterm(b); + + for(i=0; i<jar->nc; i++) + if(jar->c[i].mark && jar->c[i].expire != ~0) + delcookie(jar, &jar->c[i]); + + purgejar(jar); + + b = Bopen(jar->file, OWRITE); + if(b == nil){ + if(cookiedebug) + fprint(2, "Bopen write %s: %r", jar->file); + close(fd); + return -1; + } + Bprint(b, "# webcookies cookie jar\n"); + Bprint(b, "# comments and non-standard fields will be lost\n"); + for(i=0; i<jar->nc; i++){ + if(jar->c[i].expire == ~0) + continue; + Bprint(b, "%K\n", &jar->c[i]); + jar->c[i].ondisk = 1; + } + Bterm(b); + + jar->dirty = 0; + close(fd); + if((d = dirstat(jar->file)) != nil){ + jar->qid = d->qid; + free(d); + } + return 0; +} + +static Jar* +readjar(char *file) +{ + char *lock, *p; + Jar *jar; + + jar = newjar(); + lock = emalloc9p(strlen(file)+10); + strcpy(lock, file); + if((p = strrchr(lock, '/')) != nil) + p++; + else + p = lock; + memmove(p+2, p, strlen(p)+1); + p[0] = 'L'; + p[1] = '.'; + jar->lockfile = lock; + jar->file = file; + jar->dirty = 1; + + if(syncjar(jar) < 0){ + free(jar->file); + free(jar->lockfile); + free(jar); + return nil; + } + return jar; +} + +static void +closejar(Jar *jar) +{ + int i; + + if(jar == nil) + return; + expirejar(jar, 0); + if(syncjar(jar) < 0) + fprint(2, "warning: cannot rewrite cookie jar: %r\n"); + + for(i=0; i<jar->nc; i++) + freecookie(&jar->c[i]); + + free(jar->file); + free(jar); +} + +/* + * Domain name matching is per RFC2109, section 2: + * + * Hosts names can be specified either as an IP address or a FQHN + * string. Sometimes we compare one host name with another. Host A's + * name domain-matches host B's if + * + * * both host names are IP addresses and their host name strings match + * exactly; or + * + * * both host names are FQDN strings and their host name strings match + * exactly; or + * + * * A is a FQDN string and has the form NB, where N is a non-empty name + * string, B has the form .B', and B' is a FQDN string. (So, x.y.com + * domain-matches .y.com but not y.com.) + * + * Note that domain-match is not a commutative operation: a.b.c.com + * domain-matches .c.com, but not the reverse. + * + * (This does not verify that IP addresses and FQDN's are well-formed.) + */ +static int +isdomainmatch(char *name, char *pattern) +{ + int lname, lpattern; + + if(cistrcmp(name, pattern)==0) + return 1; + + if(strcmp(ipattr(name), "dom")==0 && pattern[0]=='.'){ + lname = strlen(name); + lpattern = strlen(pattern); + /* e.g., name: www.google.com && pattern: .google.com */ + if(lname >= lpattern && cistrcmp(name+lname-lpattern, pattern)==0) + return 1; + /* e.g., name: google.com && pattern: .google.com */ + if(lpattern > lname && + cistrcmp(pattern+lpattern-lname, name) == 0) + return 1; + } + return 0; +} + +/* + * RFC2109 4.3.4: + * - domain must match + * - path in cookie must be a prefix of request path + * - cookie must not have expired + */ +static int +iscookiematch(Cookie *c, char *dom, char *path, uint now) +{ + return isdomainmatch(dom, c->dom) + && strncmp(c->path, path, strlen(c->path))==0 + && (c->expire == 0 || c->expire >= now); +} + +/* + * Produce a subjar of matching cookies. + * Secure cookies are only included if secure is set. + */ +static Jar* +cookiesearch(Jar *jar, char *dom, char *path, int issecure) +{ + int i; + Jar *j; + uint now; + + if(cookiedebug) + fprint(2, "cookiesearch %s %s %d\n", dom, path, issecure); + now = time(0); + j = newjar(); + for(i=0; i<jar->nc; i++){ + if(cookiedebug) + fprint(2, "\ttry %s %s %d %s\n", jar->c[i].dom, + jar->c[i].path, jar->c[i].secure, + jar->c[i].name); + if((issecure || !jar->c[i].secure) && + iscookiematch(&jar->c[i], dom, path, now)){ + if(cookiedebug) + fprint(2, "\tmatched\n"); + addcookie(j, &jar->c[i]); + } + } + if(j->nc == 0){ + closejar(j); + werrstr("no cookies found"); + return nil; + } + qsort(j->c, j->nc, sizeof(j->c[0]), (int(*)(void*, void*))cookiecmp); + return j; +} + +/* + * RFC2109 4.3.2 security checks + */ +static char* +isbadcookie(Cookie *c, char *dom, char *path) +{ + int lcdom, ldom; + + if(strncmp(c->path, path, strlen(c->path)) != 0) + return "cookie path is not a prefix of the request path"; + + /* + * fgb says omitting this test is necessary to get some sites to work, + * but it seems dubious. + */ + if(c->explicitdom && c->dom[0] != '.') + return "cookie domain doesn't start with dot"; + + lcdom = strlen(c->dom); + if(memchr(c->dom+1, '.', lcdom-1-1) == nil) + return "cookie domain doesn't have embedded dots"; + + if(!isdomainmatch(dom, c->dom)) + return "request host does not match cookie domain"; + + ldom = strlen(dom); + if(strcmp(ipattr(dom), "dom")==0 && lcdom > ldom && + memchr(dom, '.', lcdom - ldom) != nil) + return "request host contains dots before cookie domain"; + + return 0; +} + +/* + * Sunday, 25-Jan-2002 12:24:36 GMT + * Sunday, 25 Jan 2002 12:24:36 GMT + * Sun, 25 Jan 02 12:24:36 GMT + */ +static int +isleap(int year) +{ + return year%4==0 && (year%100!=0 || year%400==0); +} + +static uint +strtotime(char *s) +{ + char *os; + int i; + Tm tm; + + static int mday[2][12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + }; + static char *wday[] = { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday", + }; + static char *mon[] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", + }; + + os = s; + /* Sunday, */ + for(i=0; i<nelem(wday); i++){ + if(cistrncmp(s, wday[i], strlen(wday[i])) == 0){ + s += strlen(wday[i]); + break; + } + if(cistrncmp(s, wday[i], 3) == 0){ + s += 3; + break; + } + } + if(i==nelem(wday)){ + if(cookiedebug) + fprint(2, "bad wday (%s)\n", os); + return -1; + } + if(*s++ != ',' || *s++ != ' '){ + if(cookiedebug) + fprint(2, "bad wday separator (%s)\n", os); + return -1; + } + + /* 25- */ + if(!isdigit(s[0]) || !isdigit(s[1]) || (s[2]!='-' && s[2]!=' ')){ + if(cookiedebug) + fprint(2, "bad day of month (%s)\n", os); + return -1; + } + tm.mday = strtol(s, 0, 10); + s += 3; + + /* Jan- */ + for(i=0; i<nelem(mon); i++) + if(cistrncmp(s, mon[i], 3) == 0){ + tm.mon = i; + s += 3; + break; + } + if(i==nelem(mon)){ + if(cookiedebug) + fprint(2, "bad month (%s)\n", os); + return -1; + } + if(s[0] != '-' && s[0] != ' '){ + if(cookiedebug) + fprint(2, "bad month separator (%s)\n", os); + return -1; + } + s++; + + /* 2002 */ + if(!isdigit(s[0]) || !isdigit(s[1])){ + if(cookiedebug) + fprint(2, "bad year (%s)\n", os); + return -1; + } + tm.year = strtol(s, 0, 10); + s += 2; + if(isdigit(s[0]) && isdigit(s[1])) + s += 2; + else{ + if(tm.year <= 68) + tm.year += 2000; + else + tm.year += 1900; + } + if(tm.mday==0 || tm.mday > mday[isleap(tm.year)][tm.mon]){ + if(cookiedebug) + fprint(2, "invalid day of month (%s)\n", os); + return -1; + } + tm.year -= 1900; + if(*s++ != ' '){ + if(cookiedebug) + fprint(2, "bad year separator (%s)\n", os); + return -1; + } + + if(!isdigit(s[0]) || !isdigit(s[1]) || s[2]!=':' + || !isdigit(s[3]) || !isdigit(s[4]) || s[5]!=':' + || !isdigit(s[6]) || !isdigit(s[7]) || s[8]!=' '){ + if(cookiedebug) + fprint(2, "bad time (%s)\n", os); + return -1; + } + + tm.hour = atoi(s); + tm.min = atoi(s+3); + tm.sec = atoi(s+6); + if(tm.hour >= 24 || tm.min >= 60 || tm.sec >= 60){ + if(cookiedebug) + fprint(2, "invalid time (%s)\n", os); + return -1; + } + s += 9; + + if(cistrcmp(s, "GMT") != 0){ + if(cookiedebug) + fprint(2, "time zone not GMT (%s)\n", os); + return -1; + } + strcpy(tm.zone, "GMT"); + tm.yday = 0; + return tm2sec(&tm); +} + +/* + * skip linear whitespace. we're a bit more lenient than RFC2616 2.2. + */ +static char* +skipspace(char *s) +{ + while(*s=='\r' || *s=='\n' || *s==' ' || *s=='\t') + s++; + return s; +} + +/* + * Try to identify old netscape headers. + * The old headers: + * - didn't allow spaces around the '=' + * - used an 'Expires' attribute + * - had no 'Version' attribute + * - had no quotes + * - allowed whitespace in values + * - apparently separated attr/value pairs with ';' exclusively + */ +static int +isnetscape(char *hdr) +{ + char *s; + + for(s=hdr; (s=strchr(s, '=')) != nil; s++){ + if(isspace(s[1]) || (s > hdr && isspace(s[-1]))) + return 0; + if(s[1]=='"') + return 0; + } + if(cistrstr(hdr, "version=")) + return 0; + return 1; +} + +/* + * Parse HTTP response headers, adding cookies to jar. + * Overwrites the headers. May overwrite path. + */ +static char* parsecookie(Cookie*, char*, char**, int, char*, char*); +static int +parsehttp(Jar *jar, char *hdr, char *dom, char *path) +{ + static char setcookie[] = "Set-Cookie:"; + char *e, *p, *nextp; + Cookie c; + int isns, n; + + isns = isnetscape(hdr); + n = 0; + for(p=hdr; p; p=nextp){ + p = skipspace(p); + if(*p == '\0') + break; + nextp = strchr(p, '\n'); + if(nextp != nil) + *nextp++ = '\0'; + if(cistrncmp(p, setcookie, strlen(setcookie)) != 0) + continue; + if(cookiedebug) + fprint(2, "%s\n", p); + p = skipspace(p+strlen(setcookie)); + for(; *p; p=skipspace(p)){ + if((e = parsecookie(&c, p, &p, isns, dom, path)) != nil){ + if(cookiedebug) + fprint(2, "parse cookie: %s\n", e); + break; + } + if((e = isbadcookie(&c, dom, path)) != nil){ + if(cookiedebug) + fprint(2, "reject cookie; %s\n", e); + continue; + } + addcookie(jar, &c); + n++; + } + } + return n; +} + +static char* +skipquoted(char *s) +{ + /* + * Sec 2.2 of RFC2616 defines a "quoted-string" as: + * + * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + * qdtext = <any TEXT except <">> + * quoted-pair = "\" CHAR + * + * TEXT is any octet except CTLs, but including LWS; + * LWS is [CR LF] 1*(SP | HT); + * CHARs are ASCII octets 0-127; (NOTE: we reject 0's) + * CTLs are octets 0-31 and 127; + */ + if(*s != '"') + return s; + + for(s++; 32 <= *s && *s < 127 && *s != '"'; s++) + if(*s == '\\' && *(s+1) != '\0') + s++; + return s; +} + +static char* +skiptoken(char *s) +{ + /* + * Sec 2.2 of RFC2616 defines a "token" as + * 1*<any CHAR except CTLs or separators>; + * CHARs are ASCII octets 0-127; + * CTLs are octets 0-31 and 127; + * separators are "()<>@,;:\/[]?={}", double-quote, SP (32), and HT (9) + */ + while(32 <= *s && *s < 127 && strchr("()<>@,;:[]?={}\" \t\\", *s)==nil) + s++; + + return s; +} + +static char* +skipvalue(char *s, int isns) +{ + char *t; + + /* + * An RFC2109 value is an HTTP token or an HTTP quoted string. + * Netscape servers ignore the spec and rely on semicolons, apparently. + */ + if(isns){ + if((t = strchr(s, ';')) == nil) + t = s+strlen(s); + return t; + } + if(*s == '"') + return skipquoted(s); + return skiptoken(s); +} + +/* + * RMID=80b186bb64c03c65fab767f8; expires=Monday, 10-Feb-2003 04:44:39 GMT; + * path=/; domain=.nytimes.com + */ +static char* +parsecookie(Cookie *c, char *p, char **e, int isns, char *dom, char *path) +{ + int i, done; + char *t, *u, *attr, *val; + + c->expire = ~0; + memset(c, 0, sizeof *c); + + /* NAME=VALUE */ + t = skiptoken(p); + c->name = p; + p = skipspace(t); + if(*p != '='){ + Badname: + return "malformed cookie: no NAME=VALUE"; + } + *t = '\0'; + p = skipspace(p+1); + t = skipvalue(p, isns); + if(*t) + *t++ = '\0'; + c->value = p; + p = skipspace(t); + if(c->name[0]=='\0' || c->value[0]=='\0') + goto Badname; + + done = 0; + for(; *p && !done; p=skipspace(p)){ + attr = p; + t = skiptoken(p); + u = skipspace(t); + switch(*u){ + case '\0': + *t = '\0'; + val = p = u; + break; + case ';': + *t = '\0'; + val = ""; + p = u+1; + break; + case '=': + *t = '\0'; + val = skipspace(u+1); + p = skipvalue(val, isns); + if(*p==',') + done = 1; + if(*p) + *p++ = '\0'; + break; + case ',': + if(!isns){ + val = ""; + p = u; + *p++ = '\0'; + done = 1; + break; + } + default: + if(cookiedebug) + fprint(2, "syntax: %s\n", p); + return "syntax error"; + } + for(i=0; i<nelem(stab); i++) + if(stab[i].ishttp && cistrcmp(stab[i].s, attr)==0) + *(char**)((uintptr)c+stab[i].offset) = val; + if(cistrcmp(attr, "expires") == 0){ + if(!isns) + return "non-netscape cookie has Expires tag"; + if(!val[0]) + return "bad expires tag"; + c->expire = strtotime(val); + if(c->expire == ~0) + return "cannot parse netscape expires tag"; + } + if(cistrcmp(attr, "max-age") == 0) + c->expire = time(0)+atoi(val); + if(cistrcmp(attr, "secure") == 0) + c->secure = 1; + } + + if(c->dom) + c->explicitdom = 1; + else + c->dom = dom; + if(c->path) + c->explicitpath = 1; + else{ + c->path = path; + if((t = strchr(c->path, '?')) != 0) + *t = '\0'; + if((t = strrchr(c->path, '/')) != 0) + *t = '\0'; + } + c->netscapestyle = isns; + *e = p; + + return nil; +} + +Jar *jar; + +typedef struct Aux Aux; +struct Aux +{ + char *dom; + char *path; + char *inhttp; + char *outhttp; + char *ctext; + int rdoff; +}; +enum +{ + AuxBuf = 4096, + MaxCtext = 16*1024*1024, +}; + +void +cookieopen(Req *r) +{ + char *s, *es; + int i, sz; + Aux *a; + + syncjar(jar); + a = emalloc9p(sizeof(Aux)); + r->fid->aux = a; + if(r->ifcall.mode&OTRUNC){ + a->ctext = emalloc9p(1); + a->ctext[0] = '\0'; + }else{ + sz = 256*jar->nc+1024; /* BUG should do better */ + a->ctext = emalloc9p(sz); + a->ctext[0] = '\0'; + s = a->ctext; + es = s+sz; + for(i=0; i<jar->nc; i++) + s = seprint(s, es, "%K\n", &jar->c[i]); + } + respond(r, nil); +} + +void +cookieread(Req *r) +{ + Aux *a; + + a = r->fid->aux; + readstr(r, a->ctext); + respond(r, nil); +} + +void +cookiewrite(Req *r) +{ + Aux *a; + int sz; + + a = r->fid->aux; + sz = r->ifcall.count+r->ifcall.offset; + if(sz > strlen(a->ctext)){ + if(sz >= MaxCtext){ + respond(r, "cookie file too large"); + return; + } + a->ctext = erealloc9p(a->ctext, sz+1); + a->ctext[sz] = '\0'; + } + memmove(a->ctext+r->ifcall.offset, r->ifcall.data, r->ifcall.count); + r->ofcall.count = r->ifcall.count; + respond(r, nil); +} + +void +cookieclunk(Fid *fid) +{ + char *p, *nextp; + Aux *a; + int i; + + a = fid->aux; + if(a == nil) + return; + for(i=0; i<jar->nc; i++) + jar->c[i].mark = 1; + for(p=a->ctext; *p; p=nextp){ + if((nextp = strchr(p, '\n')) != nil) + *nextp++ = '\0'; + else + nextp = ""; + addtojar(jar, p, 0); + } + for(i=0; i<jar->nc; i++) + if(jar->c[i].mark) + delcookie(jar, &jar->c[i]); + syncjar(jar); + free(a->dom); + free(a->path); + free(a->inhttp); + free(a->outhttp); + free(a->ctext); + free(a); +} + +void +closecookies(void) +{ + closejar(jar); +} + +void +initcookies(char *file) +{ + char *home; + + fmtinstall('J', jarfmt); + fmtinstall('K', cookiefmt); + + if(file == nil){ + home = getenv("home"); + if(home == nil) + sysfatal("no cookie file specified and no $home"); + file = emalloc9p(strlen(home)+30); + strcpy(file, home); + strcat(file, "/lib/webcookies"); + } + jar = readjar(file); + if(jar == nil) + sysfatal("readjar: %r"); +} + +void +httpsetcookie(char *hdr, char *dom, char *path) +{ + if(path == nil) + path = "/"; + + parsehttp(jar, hdr, dom, path); + syncjar(jar); +} + +char* +httpcookies(char *dom, char *path, int issecure) +{ + char buf[1024]; + Jar *j; + + syncjar(jar); + j = cookiesearch(jar, dom, path, issecure); + snprint(buf, sizeof buf, "%J", j); + closejar(j); + return estrdup(buf); +} diff --git a/sys/src/cmd/webfs/dat.h b/sys/src/cmd/webfs/dat.h new file mode 100755 index 000000000..cde79c3ff --- /dev/null +++ b/sys/src/cmd/webfs/dat.h @@ -0,0 +1,103 @@ +typedef struct Client Client; +typedef struct Ctl Ctl; +typedef struct Ibuf Ibuf; +typedef struct Url Url; + +/* simple buffered i/o for network connections; shared by http, ftp */ +struct Ibuf +{ + int fd; + Ioproc *io; + char buf[4096]; + char *rp, *wp; +}; + +struct Ctl +{ + int acceptcookies; + int sendcookies; + int redirectlimit; + char *useragent; +}; + +struct Client +{ + Url *url; + Url *baseurl; + Ctl ctl; + Channel *creq; /* chan(Req*) */ + int num; + int plumbed; + char *contenttype; + char *postbody; + char *redirect; + char *authenticate; + char *ext; + int npostbody; + int havepostbody; + int iobusy; + int bodyopened; + Ioproc *io; + int ref; + void *aux; +}; + +/* + * If ischeme is USunknown, then the given URL is a relative + * URL which references the "current document" in the context of the base. + * If this is the case, only the "fragment" and "url" members will have + * meaning, and the given URL structure may not be used as a base URL itself. + */ +enum +{ + USunknown, + UShttp, + UShttps, + USftp, + USfile, + UScurrent, +}; + +struct Url +{ + int ischeme; + char* url; + char* scheme; + int (*open)(Client*, Url*); + int (*read)(Client*, Req*); + void (*close)(Client*); + char* schemedata; + char* authority; + char* user; + char* passwd; + char* host; + char* port; + char* path; + char* query; + char* fragment; + union { + struct { + char *page_spec; + } http; + struct { + char *path_spec; + char *type; + } ftp; + }; +}; + +enum +{ + STACK = 32*1024, /* was 16*1024; there are big arrays on the stack */ +}; + +extern Client** client; +extern int cookiedebug; +extern Srv fs; +extern int fsdebug; +extern Ctl globalctl; +extern int nclient; +extern int urldebug; +extern int httpdebug; +extern char* status[]; + diff --git a/sys/src/cmd/webfs/fns.h b/sys/src/cmd/webfs/fns.h new file mode 100755 index 000000000..7bebe018f --- /dev/null +++ b/sys/src/cmd/webfs/fns.h @@ -0,0 +1,62 @@ +/* buf.c */ +void initibuf(Ibuf*, Ioproc*, int); +int readibuf(Ibuf*, char*, int); +void unreadline(Ibuf*, char*); +int readline(Ibuf*, char*, int); + +/* client.c */ +int newclient(int); +void closeclient(Client*); +void clonectl(Ctl*); +int ctlwrite(Req*, Ctl*, char*, char*); +int clientctlwrite(Req*, Client*, char*, char*); +int globalctlwrite(Req*, char*, char*); +void ctlread(Req*, Client*); +void globalctlread(Req*); +void plumburl(char*, char*); + +/* cookies.c */ +void cookieread(Req*); +void cookiewrite(Req*); +void cookieopen(Req*); +void cookieclunk(Fid*); +void initcookies(char*); +void closecookies(void); +void httpsetcookie(char*, char*, char*); +char* httpcookies(char*, char*, int); + +/* fs.c */ +void initfs(void); + +/* http.c */ +int httpopen(Client*, Url*); +int httpread(Client*, Req*); +void httpclose(Client*); + +/* io.c */ +int iotlsdial(Ioproc*, char*, char*, char*, int*, int); +int ioprint(Ioproc*, int, char*, ...); +#pragma varargck argpos ioprint 3 + +/* plumb.c */ +void plumbinit(void); +void plumbstart(void); +void replumb(Client*); + +/* url.c */ +Url* parseurl(char*, Url*); +void freeurl(Url*); +void rewriteurl(Url*); +int seturlquery(Url*, char*); +Url* copyurl(Url*); +char* escapeurl(char*, int(*)(int)); +char* unescapeurl(char*); +void initurl(void); + +/* util.c */ +char* estrdup(char*); +char* estrmanydup(char*, ...); +char* estredup(char*, char*); +void* emalloc(uint); +void* erealloc(void*, uint); +char* strlower(char*); diff --git a/sys/src/cmd/webfs/fs.c b/sys/src/cmd/webfs/fs.c new file mode 100755 index 000000000..087390d72 --- /dev/null +++ b/sys/src/cmd/webfs/fs.c @@ -0,0 +1,616 @@ +/* + * Web file system. Conventionally mounted at /mnt/web + * + * ctl send control messages (might go away) + * cookies list of cookies, editable + * clone open and read to obtain new connection + * n connection directory + * ctl control messages (like get url) + * body retrieved data + * content-type mime content-type of body + * postbody data to be posted + * parsed parsed version of url + * url entire url + * scheme http, ftp, etc. + * host hostname + * path path on host + * query query after path + * fragment #foo anchor reference + * user user name (ftp) + * password password (ftp) + * ftptype transfer mode (ftp) + */ + +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ip.h> +#include <plumb.h> +#include <thread.h> +#include <fcall.h> +#include <9p.h> +#include "dat.h" +#include "fns.h" + +int fsdebug; + +enum +{ + Qroot, + Qrootctl, + Qclone, + Qcookies, + Qclient, + Qctl, + Qbody, + Qbodyext, + Qcontenttype, + Qpostbody, + Qparsed, + Qurl, + Qscheme, + Qschemedata, + Quser, + Qpasswd, + Qhost, + Qport, + Qpath, + Qquery, + Qfragment, + Qftptype, + Qend, +}; + +#define PATH(type, n) ((type)|((n)<<8)) +#define TYPE(path) ((int)(path) & 0xFF) +#define NUM(path) ((uint)(path)>>8) + +Channel *creq; +Channel *creqwait; +Channel *cclunk; +Channel *cclunkwait; + +typedef struct Tab Tab; +struct Tab +{ + char *name; + ulong mode; + int offset; +}; + +Tab tab[] = +{ + "/", DMDIR|0555, 0, + "ctl", 0666, 0, + "clone", 0666, 0, + "cookies", 0666, 0, + "XXX", DMDIR|0555, 0, + "ctl", 0666, 0, + "body", 0444, 0, + "XXX", 0444, 0, + "contenttype", 0444, 0, + "postbody", 0666, 0, + "parsed", DMDIR|0555, 0, + "url", 0444, offsetof(Url, url), + "scheme", 0444, offsetof(Url, scheme), + "schemedata", 0444, offsetof(Url, schemedata), + "user", 0444, offsetof(Url, user), + "passwd", 0444, offsetof(Url, passwd), + "host", 0444, offsetof(Url, host), + "port", 0444, offsetof(Url, port), + "path", 0444, offsetof(Url, path), + "query", 0444, offsetof(Url, query), + "fragment", 0444, offsetof(Url, fragment), + "ftptype", 0444, offsetof(Url, ftp.type), +}; + +ulong time0; + +static void +fillstat(Dir *d, uvlong path, ulong length, char *ext) +{ + Tab *t; + int type; + char buf[32]; + + memset(d, 0, sizeof(*d)); + d->uid = estrdup("web"); + d->gid = estrdup("web"); + d->qid.path = path; + d->atime = d->mtime = time0; + d->length = length; + type = TYPE(path); + t = &tab[type]; + if(type == Qbodyext) { + snprint(buf, sizeof buf, "body.%s", ext == nil ? "xxx" : ext); + d->name = estrdup(buf); + } + else if(t->name) + d->name = estrdup(t->name); + else{ /* client directory */ + snprint(buf, sizeof buf, "%ud", NUM(path)); + d->name = estrdup(buf); + } + d->qid.type = t->mode>>24; + d->mode = t->mode; +} + +static void +fsstat(Req *r) +{ + fillstat(&r->d, r->fid->qid.path, 0, nil); + respond(r, nil); +} + +static int +rootgen(int i, Dir *d, void*) +{ + char buf[32]; + + i += Qroot+1; + if(i < Qclient){ + fillstat(d, i, 0, nil); + return 0; + } + i -= Qclient; + if(i < nclient){ + fillstat(d, PATH(Qclient, i), 0, nil); + snprint(buf, sizeof buf, "%d", i); + free(d->name); + d->name = estrdup(buf); + return 0; + } + return -1; +} + +static int +clientgen(int i, Dir *d, void *aux) +{ + Client *c; + + c = aux; + i += Qclient+1; + if(i <= Qparsed){ + fillstat(d, PATH(i, c->num), 0, c->ext); + return 0; + } + return -1; +} + +static int +parsedgen(int i, Dir *d, void *aux) +{ + Client *c; + + c = aux; + i += Qparsed+1; + if(i < Qend){ + fillstat(d, PATH(i, c->num), 0, nil); + return 0; + } + return -1; +} + +static void +fsread(Req *r) +{ + char *s; + char e[ERRMAX]; + Client *c; + ulong path; + + path = r->fid->qid.path; + switch(TYPE(path)){ + default: + snprint(e, sizeof e, "bug in webfs path=%lux\n", path); + respond(r, e); + break; + + case Qroot: + dirread9p(r, rootgen, nil); + respond(r, nil); + break; + + case Qrootctl: + globalctlread(r); + break; + + case Qcookies: + cookieread(r); + break; + + case Qclient: + dirread9p(r, clientgen, client[NUM(path)]); + respond(r, nil); + break; + + case Qctl: + ctlread(r, client[NUM(path)]); + break; + + case Qcontenttype: + c = client[NUM(path)]; + if(c->contenttype == nil) + r->ofcall.count = 0; + else + readstr(r, c->contenttype); + respond(r, nil); + break; + + case Qpostbody: + c = client[NUM(path)]; + readbuf(r, c->postbody, c->npostbody); + respond(r, nil); + break; + + case Qbody: + case Qbodyext: + c = client[NUM(path)]; + if(c->iobusy){ + respond(r, "already have i/o pending"); + break; + } + c->iobusy = 1; + sendp(c->creq, r); + break; + + case Qparsed: + dirread9p(r, parsedgen, client[NUM(path)]); + respond(r, nil); + break; + + case Qurl: + case Qscheme: + case Qschemedata: + case Quser: + case Qpasswd: + case Qhost: + case Qport: + case Qpath: + case Qquery: + case Qfragment: + case Qftptype: + c = client[NUM(path)]; + r->ofcall.count = 0; + if(c->url != nil + && (s = *(char**)((uintptr)c->url+tab[TYPE(path)].offset)) != nil) + readstr(r, s); + respond(r, nil); + break; + } +} + +static void +fswrite(Req *r) +{ + int m; + ulong path; + char e[ERRMAX], *buf, *cmd, *arg; + Client *c; + + path = r->fid->qid.path; + switch(TYPE(path)){ + default: + snprint(e, sizeof e, "bug in webfs path=%lux\n", path); + respond(r, e); + break; + + case Qcookies: + cookiewrite(r); + break; + + case Qrootctl: + case Qctl: + if(r->ifcall.count >= 1024){ + respond(r, "ctl message too long"); + return; + } + buf = estredup(r->ifcall.data, (char*)r->ifcall.data+r->ifcall.count); + cmd = buf; + arg = strpbrk(cmd, "\t "); + if(arg){ + *arg++ = '\0'; + arg += strspn(arg, "\t "); + }else + arg = ""; + r->ofcall.count = r->ifcall.count; + if(TYPE(path)==Qrootctl){ + if(!ctlwrite(r, &globalctl, cmd, arg) + && !globalctlwrite(r, cmd, arg)) + respond(r, "unknown control command"); + }else{ + c = client[NUM(path)]; + if(!ctlwrite(r, &c->ctl, cmd, arg) + && !clientctlwrite(r, c, cmd, arg)) + respond(r, "unknown control command"); + } + free(buf); + break; + + case Qpostbody: + c = client[NUM(path)]; + if(c->bodyopened){ + respond(r, "cannot write postbody after opening body"); + break; + } + if(r->ifcall.offset >= 128*1024*1024){ /* >128MB is probably a mistake */ + respond(r, "offset too large"); + break; + } + m = r->ifcall.offset + r->ifcall.count; + if(c->npostbody < m){ + c->postbody = erealloc(c->postbody, m); + memset(c->postbody+c->npostbody, 0, m-c->npostbody); + c->npostbody = m; + } + memmove(c->postbody+r->ifcall.offset, r->ifcall.data, r->ifcall.count); + r->ofcall.count = r->ifcall.count; + respond(r, nil); + break; + } +} + +static void +fsopen(Req *r) +{ + static int need[4] = { 4, 2, 6, 1 }; + ulong path; + int n; + Client *c; + Tab *t; + + /* + * lib9p already handles the blatantly obvious. + * we just have to enforce the permissions we have set. + */ + path = r->fid->qid.path; + t = &tab[TYPE(path)]; + n = need[r->ifcall.mode&3]; + if((n&t->mode) != n){ + respond(r, "permission denied"); + return; + } + + switch(TYPE(path)){ + case Qcookies: + cookieopen(r); + break; + + case Qpostbody: + c = client[NUM(path)]; + c->havepostbody++; + c->ref++; + respond(r, nil); + break; + + case Qbody: + case Qbodyext: + c = client[NUM(path)]; + if(c->url == nil){ + respond(r, "url is not yet set"); + break; + } + c->bodyopened = 1; + c->ref++; + sendp(c->creq, r); + break; + + case Qclone: + n = newclient(0); + path = PATH(Qctl, n); + r->fid->qid.path = path; + r->ofcall.qid.path = path; + if(fsdebug) + fprint(2, "open clone => path=%lux\n", path); + t = &tab[Qctl]; + /* fall through */ + default: + if(t-tab >= Qclient) + client[NUM(path)]->ref++; + respond(r, nil); + break; + } +} + +static void +fsdestroyfid(Fid *fid) +{ + sendp(cclunk, fid); + recvp(cclunkwait); +} + +static void +fsattach(Req *r) +{ + if(r->ifcall.aname && r->ifcall.aname[0]){ + respond(r, "invalid attach specifier"); + return; + } + r->fid->qid.path = PATH(Qroot, 0); + r->fid->qid.type = QTDIR; + r->fid->qid.vers = 0; + r->ofcall.qid = r->fid->qid; + respond(r, nil); +} + +static char* +fswalk1(Fid *fid, char *name, Qid *qid) +{ + int i, n; + ulong path; + char buf[32], *ext; + + path = fid->qid.path; + if(!(fid->qid.type&QTDIR)) + return "walk in non-directory"; + + if(strcmp(name, "..") == 0){ + switch(TYPE(path)){ + case Qparsed: + qid->path = PATH(Qclient, NUM(path)); + qid->type = tab[Qclient].mode>>24; + return nil; + case Qclient: + case Qroot: + qid->path = PATH(Qroot, 0); + qid->type = tab[Qroot].mode>>24; + return nil; + default: + return "bug in fswalk1"; + } + } + + i = TYPE(path)+1; + for(; i<nelem(tab); i++){ + if(i==Qclient){ + n = atoi(name); + snprint(buf, sizeof buf, "%d", n); + if(n < nclient && strcmp(buf, name) == 0){ + qid->path = PATH(i, n); + qid->type = tab[i].mode>>24; + return nil; + } + break; + } + if(i==Qbodyext){ + ext = client[NUM(path)]->ext; + snprint(buf, sizeof buf, "body.%s", ext == nil ? "xxx" : ext); + if(strcmp(buf, name) == 0){ + qid->path = PATH(i, NUM(path)); + qid->type = tab[i].mode>>24; + return nil; + } + } + else if(strcmp(name, tab[i].name) == 0){ + qid->path = PATH(i, NUM(path)); + qid->type = tab[i].mode>>24; + return nil; + } + if(tab[i].mode&DMDIR) + break; + } + return "directory entry not found"; +} + +static void +fsflush(Req *r) +{ + Req *or; + int t; + Client *c; + ulong path; + + or=r; + while(or->ifcall.type==Tflush) + or = or->oldreq; + + if(or->ifcall.type != Tread && or->ifcall.type != Topen) + abort(); + + path = or->fid->qid.path; + t = TYPE(path); + if(t != Qbody && t != Qbodyext) + abort(); + + c = client[NUM(path)]; + sendp(c->creq, r); + iointerrupt(c->io); +} + +static void +fsthread(void*) +{ + ulong path; + Alt a[3]; + Fid *fid; + Req *r; + + threadsetname("fsthread"); + plumbstart(); + + a[0].op = CHANRCV; + a[0].c = cclunk; + a[0].v = &fid; + a[1].op = CHANRCV; + a[1].c = creq; + a[1].v = &r; + a[2].op = CHANEND; + + for(;;){ + switch(alt(a)){ + case 0: + path = fid->qid.path; + if(TYPE(path)==Qcookies) + cookieclunk(fid); + if(fid->omode != -1 && TYPE(path) >= Qclient) + closeclient(client[NUM(path)]); + sendp(cclunkwait, nil); + break; + case 1: + switch(r->ifcall.type){ + case Tattach: + fsattach(r); + break; + case Topen: + fsopen(r); + break; + case Tread: + fsread(r); + break; + case Twrite: + fswrite(r); + break; + case Tstat: + fsstat(r); + break; + case Tflush: + fsflush(r); + break; + default: + respond(r, "bug in fsthread"); + break; + } + sendp(creqwait, 0); + break; + } + } +} + +static void +fssend(Req *r) +{ + sendp(creq, r); + recvp(creqwait); /* avoids need to deal with spurious flushes */ +} + +void +initfs(void) +{ + time0 = time(0); + creq = chancreate(sizeof(void*), 0); + creqwait = chancreate(sizeof(void*), 0); + cclunk = chancreate(sizeof(void*), 0); + cclunkwait = chancreate(sizeof(void*), 0); + procrfork(fsthread, nil, STACK, RFNAMEG); +} + +void +takedown(Srv*) +{ + closecookies(); + threadexitsall("done"); +} + +Srv fs = +{ +.attach= fssend, +.destroyfid= fsdestroyfid, +.walk1= fswalk1, +.open= fssend, +.read= fssend, +.write= fssend, +.stat= fssend, +.flush= fssend, +.end= takedown, +}; + diff --git a/sys/src/cmd/webfs/http.c b/sys/src/cmd/webfs/http.c new file mode 100755 index 000000000..0d25a7c20 --- /dev/null +++ b/sys/src/cmd/webfs/http.c @@ -0,0 +1,539 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ip.h> +#include <plumb.h> +#include <thread.h> +#include <fcall.h> +#include <9p.h> +#include <libsec.h> +#include <auth.h> +#include "dat.h" +#include "fns.h" + +char PostContentType[] = "application/x-www-form-urlencoded"; +int httpdebug; + +typedef struct HttpState HttpState; +struct HttpState +{ + int fd; + Client *c; + char *location; + char *setcookie; + char *netaddr; + char *credentials; + char autherror[ERRMAX]; + Ibuf b; +}; + +static void +location(HttpState *hs, char *value) +{ + if(hs->location == nil) + hs->location = estrdup(value); +} + +static void +contenttype(HttpState *hs, char *value) +{ + if(hs->c->contenttype != nil) + free(hs->c->contenttype); + hs->c->contenttype = estrdup(value); +} + +static void +setcookie(HttpState *hs, char *value) +{ + char *s, *t; + Fmt f; + + s = hs->setcookie; + fmtstrinit(&f); + if(s) + fmtprint(&f, "%s", s); + fmtprint(&f, "set-cookie: "); + fmtprint(&f, "%s", value); + fmtprint(&f, "\n"); + t = fmtstrflush(&f); + if(t){ + free(s); + hs->setcookie = t; + } +} + +static char* +unquote(char *s, char **ps) +{ + char *p; + + if(*s != '"'){ + p = strpbrk(s, " \t\r\n"); + *p++ = 0; + *ps = p; + return s; + } + for(p=s+1; *p; p++){ + if(*p == '\"'){ + *p++ = 0; + break; + } + if(*p == '\\' && *(p+1)){ + p++; + continue; + } + } + memmove(s, s+1, p-(s+1)); + s[p-(s+1)] = 0; + *ps = p; + return s; +} + +static char* +servername(char *addr) +{ + char *p; + + if(strncmp(addr, "tcp!", 4) == 0 + || strncmp(addr, "net!", 4) == 0) + addr += 4; + addr = estrdup(addr); + p = addr+strlen(addr); + if(p>addr && *(p-1) == 's') + p--; + if(p>addr+5 && strcmp(p-5, "!http") == 0) + p[-5] = 0; + return addr; +} + +void +wwwauthenticate(HttpState *hs, char *line) +{ + char cred[64], *user, *pass, *realm, *s, *spec, *name; + Fmt fmt; + UserPasswd *up; + + spec = nil; + up = nil; + cred[0] = 0; + hs->autherror[0] = 0; + if(cistrncmp(line, "basic ", 6) != 0){ + werrstr("unknown auth: %s", line); + goto error; + } + line += 6; + if(cistrncmp(line, "realm=", 6) != 0){ + werrstr("missing realm: %s", line); + goto error; + } + line += 6; + user = hs->c->url->user; + pass = hs->c->url->passwd; + if(user==nil || pass==nil){ + realm = unquote(line, &line); + fmtstrinit(&fmt); + name = servername(hs->netaddr); + fmtprint(&fmt, "proto=pass service=http server=%q realm=%q", name, realm); + free(name); + if(hs->c->url->user) + fmtprint(&fmt, " user=%q", hs->c->url->user); + spec = fmtstrflush(&fmt); + if(spec == nil) + goto error; + if((up = auth_getuserpasswd(nil, "%s", spec)) == nil) + goto error; + user = up->user; + pass = up->passwd; + } + if((s = smprint("%s:%s", user, pass)) == nil) + goto error; + free(up); + enc64(cred, sizeof(cred), (uchar*)s, strlen(s)); + memset(s, 0, strlen(s)); + free(s); + hs->credentials = smprint("Basic %s", cred); + if(hs->credentials == nil) + goto error; + return; + +error: + free(up); + free(spec); + snprint(hs->autherror, sizeof hs->autherror, "%r"); + fprint(2, "%s: Authentication failed: %r\n", argv0); +} + +struct { + char *name; /* Case-insensitive */ + void (*fn)(HttpState *hs, char *value); +} hdrtab[] = { + { "location:", location }, + { "content-type:", contenttype }, + { "set-cookie:", setcookie }, + { "www-authenticate:", wwwauthenticate }, +}; + +static int +httprcode(HttpState *hs) +{ + int n; + char *p; + char buf[256]; + + n = readline(&hs->b, buf, sizeof(buf)-1); + if(n <= 0) + return n; + if(httpdebug) + fprint(2, "-> %s\n", buf); + p = strchr(buf, ' '); + if(memcmp(buf, "HTTP/", 5) != 0 || p == nil){ + werrstr("bad response from server"); + return -1; + } + buf[n] = 0; + return atoi(p+1); +} + +/* + * read a single mime header, collect continuations. + * + * this routine assumes that there is a blank line twixt + * the header and the message body, otherwise bytes will + * be lost. + */ +static int +getheader(HttpState *hs, char *buf, int n) +{ + char *p, *e; + int i; + + n--; + p = buf; + for(e = p + n; ; p += i){ + i = readline(&hs->b, p, e-p); + if(i < 0) + return i; + + if(p == buf){ + /* first line */ + if(strchr(buf, ':') == nil) + break; /* end of headers */ + } else { + /* continuation line */ + if(*p != ' ' && *p != '\t'){ + unreadline(&hs->b, p); + *p = 0; + break; /* end of this header */ + } + } + } + + if(httpdebug) + fprint(2, "-> %s\n", buf); + return p-buf; +} + +static int +httpheaders(HttpState *hs) +{ + char buf[2048]; + char *p; + int i, n; + + for(;;){ + n = getheader(hs, buf, sizeof(buf)); + if(n < 0) + return -1; + if(n == 0) + return 0; + // print("http header: '%.*s'\n", n, buf); + for(i = 0; i < nelem(hdrtab); i++){ + n = strlen(hdrtab[i].name); + if(cistrncmp(buf, hdrtab[i].name, n) == 0){ + /* skip field name and leading white */ + p = buf + n; + while(*p == ' ' || *p == '\t') + p++; + (*hdrtab[i].fn)(hs, p); + break; + } + } + } +} + +int +httpopen(Client *c, Url *url) +{ + int fd, code, redirect, authenticate; + char *cookies; + Ioproc *io; + HttpState *hs; + char *service; + + if(httpdebug) + fprint(2, "httpopen\n"); + io = c->io; + hs = emalloc(sizeof(*hs)); + hs->c = c; + + if(url->port) + service = url->port; + else + service = url->scheme; + hs->netaddr = estrdup(netmkaddr(url->host, 0, service)); + c->aux = hs; + if(httpdebug){ + fprint(2, "dial %s\n", hs->netaddr); + fprint(2, "dial port: %s\n", url->port); + } + fd = iotlsdial(io, hs->netaddr, 0, 0, 0, url->ischeme==UShttps); + if(fd < 0){ + Error: + if(httpdebug) + fprint(2, "iodial: %r\n"); + free(hs->location); + free(hs->setcookie); + free(hs->netaddr); + free(hs->credentials); + if(fd >= 0) + ioclose(io, hs->fd); + hs->fd = -1; + free(hs); + c->aux = nil; + return -1; + } + hs->fd = fd; + if(httpdebug) + fprint(2, "<- %s %s HTTP/1.0\n<- Host: %s\n", + c->havepostbody? "POST": "GET", url->http.page_spec, url->host); + ioprint(io, fd, "%s %s HTTP/1.0\r\nHost: %s\r\n", + c->havepostbody? "POST" : "GET", url->http.page_spec, url->host); + if(httpdebug) + fprint(2, "<- User-Agent: %s\n", c->ctl.useragent); + if(c->ctl.useragent) + ioprint(io, fd, "User-Agent: %s\r\n", c->ctl.useragent); + if(c->ctl.sendcookies){ + /* should we use url->page here? sometimes it is nil. */ + cookies = httpcookies(url->host, url->http.page_spec, + url->ischeme == UShttps); + if(cookies && cookies[0]) + ioprint(io, fd, "%s", cookies); + if(httpdebug) + fprint(2, "<- %s", cookies); + free(cookies); + } + if(c->havepostbody){ + ioprint(io, fd, "Content-type: %s\r\n", PostContentType); + ioprint(io, fd, "Content-length: %ud\r\n", c->npostbody); + if(httpdebug){ + fprint(2, "<- Content-type: %s\n", PostContentType); + fprint(2, "<- Content-length: %ud\n", c->npostbody); + } + } + if(c->authenticate){ + ioprint(io, fd, "Authorization: %s\r\n", c->authenticate); + if(httpdebug) + fprint(2, "<- Authorization: %s\n", c->authenticate); + } + ioprint(io, fd, "\r\n"); + if(c->havepostbody) + if(iowrite(io, fd, c->postbody, c->npostbody) != c->npostbody) + goto Error; + + redirect = 0; + authenticate = 0; + initibuf(&hs->b, io, fd); + code = httprcode(hs); + + switch(code){ + case -1: /* connection timed out */ + goto Error; + +/* + case Eof: + werrstr("EOF from HTTP server"); + goto Error; +*/ + + case 200: /* OK */ + case 201: /* Created */ + case 202: /* Accepted */ + case 204: /* No Content */ + case 205: /* Reset Content */ +#ifdef NOT_DEFINED + if(ofile == nil && r->start != 0) + sysfatal("page changed underfoot"); +#endif + break; + + case 206: /* Partial Content */ + werrstr("Partial Content (206)"); + goto Error; + + case 301: /* Moved Permanently */ + case 302: /* Moved Temporarily */ + case 303: /* See Other */ + case 307: /* Temporary Redirect */ + redirect = 1; + break; + + case 304: /* Not Modified */ + break; + + case 400: /* Bad Request */ + werrstr("Bad Request (400)"); + goto Error; + + case 401: /* Unauthorized */ + if(c->authenticate){ + werrstr("Authentication failed (401)"); + goto Error; + } + authenticate = 1; + break; + case 402: /* Payment Required */ + werrstr("Payment Required (402)"); + goto Error; + + case 403: /* Forbidden */ + werrstr("Forbidden by server (403)"); + goto Error; + + case 404: /* Not Found */ + werrstr("Not found on server (404)"); + goto Error; + + case 405: /* Method Not Allowed */ + werrstr("Method not allowed (405)"); + goto Error; + + case 406: /* Not Acceptable */ + werrstr("Not Acceptable (406)"); + goto Error; + + case 407: /* Proxy auth */ + werrstr("Proxy authentication required (407)"); + goto Error; + + case 408: /* Request Timeout */ + werrstr("Request Timeout (408)"); + goto Error; + + case 409: /* Conflict */ + werrstr("Conflict (409)"); + goto Error; + + case 410: /* Gone */ + werrstr("Gone (410)"); + goto Error; + + case 411: /* Length Required */ + werrstr("Length Required (411)"); + goto Error; + + case 412: /* Precondition Failed */ + werrstr("Precondition Failed (412)"); + goto Error; + + case 413: /* Request Entity Too Large */ + werrstr("Request Entity Too Large (413)"); + goto Error; + + case 414: /* Request-URI Too Long */ + werrstr("Request-URI Too Long (414)"); + goto Error; + + case 415: /* Unsupported Media Type */ + werrstr("Unsupported Media Type (415)"); + goto Error; + + case 416: /* Requested Range Not Satisfiable */ + werrstr("Requested Range Not Satisfiable (416)"); + goto Error; + + case 417: /* Expectation Failed */ + werrstr("Expectation Failed (417)"); + goto Error; + + case 500: /* Internal server error */ + werrstr("Server choked (500)"); + goto Error; + + case 501: /* Not implemented */ + werrstr("Server can't do it (501)"); + goto Error; + + case 502: /* Bad gateway */ + werrstr("Bad gateway (502)"); + goto Error; + + case 503: /* Service unavailable */ + werrstr("Service unavailable (503)"); + goto Error; + + default: + /* Bogus: we should treat unknown code XYZ as code X00 */ + werrstr("Unknown response code %d", code); + goto Error; + } + + if(httpheaders(hs) < 0) + goto Error; + if(c->ctl.acceptcookies && hs->setcookie) + httpsetcookie(hs->setcookie, url->host, url->path); + if(authenticate){ + if(!hs->credentials){ + if(hs->autherror[0]) + werrstr("%s", hs->autherror); + else + werrstr("unauthorized; no www-authenticate: header"); + goto Error; + } + c->authenticate = hs->credentials; + hs->credentials = nil; + }else if(c->authenticate) + c->authenticate = 0; + if(redirect){ + if(!hs->location){ + werrstr("redirection without Location: header"); + goto Error; + } + c->redirect = hs->location; + hs->location = nil; + } + return 0; +} + +int +httpread(Client *c, Req *r) +{ + HttpState *hs; + long n; + + hs = c->aux; + n = readibuf(&hs->b, r->ofcall.data, r->ifcall.count); + if(n < 0) + return -1; + + r->ofcall.count = n; + return 0; +} + +void +httpclose(Client *c) +{ + HttpState *hs; + + hs = c->aux; + if(hs == nil) + return; + if(hs->fd >= 0) + ioclose(c->io, hs->fd); + hs->fd = -1; + free(hs->location); + free(hs->setcookie); + free(hs->netaddr); + free(hs->credentials); + free(hs); + c->aux = nil; +} diff --git a/sys/src/cmd/webfs/io.c b/sys/src/cmd/webfs/io.c new file mode 100755 index 000000000..9eea91264 --- /dev/null +++ b/sys/src/cmd/webfs/io.c @@ -0,0 +1,84 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ip.h> +#include <plumb.h> +#include <thread.h> +#include <fcall.h> +#include <9p.h> +#include <mp.h> +#include <libsec.h> +#include "dat.h" +#include "fns.h" + +static long +_iovfprint(va_list *arg) +{ + int fd; + char *fmt; + va_list arg2; + + fd = va_arg(*arg, int); + fmt = va_arg(*arg, char*); + arg2 = va_arg(*arg, va_list); + return vfprint(fd, fmt, arg2); +} + +int +iovfprint(Ioproc *io, int fd, char *fmt, va_list arg) +{ + return iocall(io, _iovfprint, fd, fmt, arg); +} + +int +ioprint(Ioproc *io, int fd, char *fmt, ...) +{ + int n; + va_list arg; + + va_start(arg, fmt); + n = iovfprint(io, fd, fmt, arg); + va_end(arg); + return n; +} + +static long +_iotlsdial(va_list *arg) +{ + char *addr, *local, *dir; + int *cfdp, fd, tfd, usetls; + TLSconn conn; + + addr = va_arg(*arg, char*); + local = va_arg(*arg, char*); + dir = va_arg(*arg, char*); + cfdp = va_arg(*arg, int*); + usetls = va_arg(*arg, int); + + fd = dial(addr, local, dir, cfdp); + if(fd < 0) + return -1; + if(!usetls) + return fd; + + memset(&conn, 0, sizeof conn); + /* does no good, so far anyway */ + // conn.chain = readcertchain("/sys/lib/ssl/vsignss.pem"); + + tfd = tlsClient(fd, &conn); + close(fd); + if(tfd < 0) + fprint(2, "%s: tlsClient: %r\n", argv0); + else { + /* BUG: check cert here? */ + if(conn.cert) + free(conn.cert); + } + return tfd; +} + +int +iotlsdial(Ioproc *io, char *addr, char *local, char *dir, int *cfdp, int usetls) +{ + return iocall(io, _iotlsdial, addr, local, dir, cfdp, usetls); +} diff --git a/sys/src/cmd/webfs/main.c b/sys/src/cmd/webfs/main.c new file mode 100755 index 000000000..0792dbf13 --- /dev/null +++ b/sys/src/cmd/webfs/main.c @@ -0,0 +1,67 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ip.h> +#include <plumb.h> +#include <thread.h> +#include <fcall.h> +#include <9p.h> +#include "dat.h" +#include "fns.h" + +char *cookiefile; +char *mtpt = "/mnt/web"; +char *service; + +Ctl globalctl = +{ + 1, /* accept cookies */ + 1, /* send cookies */ + 10, /* redirect limit */ + "webfs/2.0 (plan 9)" /* user agent */ +}; + +void +usage(void) +{ + fprint(2, "usage: webfs [-c cookies] [-m mtpt] [-s service]\n"); + threadexitsall("usage"); +} + +#include <pool.h> +void +threadmain(int argc, char **argv) +{ + rfork(RFNOTEG); + ARGBEGIN{ + case 'd': + mainmem->flags |= POOL_PARANOIA|POOL_ANTAGONISM; + break; + case 'D': + chatty9p++; + break; + case 'c': + cookiefile = EARGF(usage()); + break; + case 'm': + mtpt = EARGF(usage()); + break; + case 's': + service = EARGF(usage()); + break; + default: + usage(); + }ARGEND + + quotefmtinstall(); + if(argc != 0) + usage(); + + plumbinit(); + globalctl.useragent = estrdup(globalctl.useragent); + initcookies(cookiefile); + initurl(); + initfs(); + threadpostmountsrv(&fs, service, mtpt, MREPL); + threadexits(nil); +} diff --git a/sys/src/cmd/webfs/mkfile b/sys/src/cmd/webfs/mkfile new file mode 100755 index 000000000..879ab16b9 --- /dev/null +++ b/sys/src/cmd/webfs/mkfile @@ -0,0 +1,35 @@ +</$objtype/mkfile +BIN=/$objtype/bin + +TARG=webfs + +SCHEMEOFILES=\ + file.$O\ + ftp.$O\ + http.$O\ + +OFILES=\ + buf.$O\ + client.$O\ + cookies.$O\ + fs.$O\ + http.$O\ + io.$O\ + main.$O\ + plumb.$O\ + url.$O\ + util.$O\ +# $SCHEMEOFILES + +HFILES=\ + dat.h\ + fns.h\ + +UPDATE=\ + mkfile\ + $HFILES\ + ${OFILES:%.$O=%.c}\ + ${TARG:%=/386/bin/%}\ + +</sys/src/cmd/mkone + diff --git a/sys/src/cmd/webfs/plumb.c b/sys/src/cmd/webfs/plumb.c new file mode 100755 index 000000000..ada0f4168 --- /dev/null +++ b/sys/src/cmd/webfs/plumb.c @@ -0,0 +1,165 @@ +#include <u.h> +#include <libc.h> +#include <auth.h> +#include <fcall.h> +#include <thread.h> +#include <plumb.h> +#include <9p.h> + +#include "dat.h" +#include "fns.h" + +static int plumbsendfd; +static int plumbwebfd; +static Channel *plumbchan; + +static void plumbwebproc(void*); +static void plumbwebthread(void*); +static void plumbsendproc(void*); + +void +plumbinit(void) +{ + plumbsendfd = plumbopen("send", OWRITE|OCEXEC); + plumbwebfd = plumbopen("web", OREAD|OCEXEC); +} + +void +plumbstart(void) +{ + plumbchan = chancreate(sizeof(Plumbmsg*), 0); + proccreate(plumbwebproc, nil, STACK); + threadcreate(plumbwebthread, nil, STACK); +} + +static void +plumbwebthread(void*) +{ + char *base; + Plumbmsg *m; + + for(;;){ + m = recvp(plumbchan); + if(m == nil) + threadexits(nil); + base = plumblookup(m->attr, "baseurl"); + if(base == nil) + base = m->wdir; + plumburl(m->data, base); + plumbfree(m); + } +} + +static void +plumbwebproc(void*) +{ + Plumbmsg *m; + + for(;;){ + m = plumbrecv(plumbwebfd); + sendp(plumbchan, m); + if(m == nil) + threadexits(nil); + } +} + +static void +addattr(Plumbmsg *m, char *name, char *value) +{ + Plumbattr *a; + + a = malloc(sizeof(Plumbattr)); + a->name = name; + a->value = value; + a->next = m->attr; + m->attr = a; +} + +static void +freeattrs(Plumbmsg *m) +{ + Plumbattr *a, *next; + + a = m->attr; + while(a != nil) { + next = a->next; + free(a); + a = next; + } +} + +static struct +{ + char *ctype; + char *ext; +} +ctypes[] = +{ + { "application/msword", "doc" }, + { "application/pdf", "pdf" }, + { "application/postscript", "ps" }, + { "application/rtf", "rtf" }, + { "image/gif", "gif" }, + { "image/jpeg", "jpg" }, + { "image/png", "png" }, + { "image/ppm", "ppm" }, + { "image/tiff", "tiff" }, + { "text/html", "html" }, + { "text/plain", "txt" }, + { "text/xml", "xml" }, +}; + +void +replumb(Client *c) +{ + int i; + Plumbmsg *m; + char name[128], *ctype, *ext, *p; + + if(!c->plumbed) + return; + m = emalloc(sizeof(Plumbmsg)); + m->src = "webfs"; + m->dst = nil; + m->wdir = "/"; + m->type = "text"; + m->attr = nil; + addattr(m, "url", c->url->url); + ctype = c->contenttype; + ext = nil; + if(ctype != nil) { + addattr(m, "content-type", ctype); + for(i = 0; i < nelem(ctypes); i++) { + if(strcmp(ctype, ctypes[i].ctype) == 0) { + ext = ctypes[i].ext; + break; + } + } + } + if(ext == nil) { + p = strrchr(c->url->url, '/'); + if(p != nil) + p = strrchr(p+1, '.'); + if(p != nil && strlen(p) <= 5) + ext = p+1; + else + ext = "txt"; /* punt */ + } + c->ext = ext; +if(0)fprint(2, "content type %s -> extension .%s\n", ctype, ext); + m->ndata = snprint(name, sizeof name, "/mnt/web/%d/body.%s", c->num, ext); + m->data = estrdup(name); + proccreate(plumbsendproc, m, STACK); /* separate proc to avoid a deadlock */ +} + +static void +plumbsendproc(void *x) +{ + Plumbmsg *m; + + m = x; + plumbsend(plumbsendfd, m); + freeattrs(m); + free(m->data); + free(m); +} diff --git a/sys/src/cmd/webfs/url.c b/sys/src/cmd/webfs/url.c new file mode 100755 index 000000000..f46c8b47b --- /dev/null +++ b/sys/src/cmd/webfs/url.c @@ -0,0 +1,1092 @@ +/* + * This is a URL parser, written to parse "Common Internet Scheme" URL + * syntax as described in RFC1738 and updated by RFC2396. Only absolute URLs + * are supported, using "server-based" naming authorities in the schemes. + * Support for literal IPv6 addresses is included, per RFC2732. + * + * Current "known" schemes: http, ftp, file. + * + * We can do all the parsing operations without Runes since URLs are + * defined to be composed of US-ASCII printable characters. + * See RFC1738, RFC2396. + */ + +#include <u.h> +#include <libc.h> +#include <ctype.h> +#include <regexp.h> +#include <plumb.h> +#include <thread.h> +#include <fcall.h> +#include <9p.h> +#include "dat.h" +#include "fns.h" + +int urldebug; + +/* If set, relative paths with leading ".." segments will have them trimmed */ +#define RemoveExtraRelDotDots 0 +#define ExpandCurrentDocUrls 1 + +static char* +schemestrtab[] = +{ + nil, + "http", + "https", + "ftp", + "file", +}; + +static int +ischeme(char *s) +{ + int i; + + for(i=0; i<nelem(schemestrtab); i++) + if(schemestrtab[i] && strcmp(s, schemestrtab[i])==0) + return i; + return USunknown; +} + +/* + * URI splitting regexp is from RFC2396, Appendix B: + * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + * 12 3 4 5 6 7 8 9 + * + * Example: "http://www.ics.uci.edu/pub/ietf/uri/#Related" + * $2 = scheme "http" + * $4 = authority "www.ics.uci.edu" + * $5 = path "/pub/ietf/uri/" + * $7 = query <undefined> + * $9 = fragment "Related" + */ + +/* + * RFC2396, Sec 3.1, contains: + * + * Scheme names consist of a sequence of characters beginning with a + * lower case letter and followed by any combination of lower case + * letters, digits, plus ("+"), period ("."), or hyphen ("-"). For + * resiliency, programs interpreting URI should treat upper case letters + * as equivalent to lower case in scheme names (e.g., allow "HTTP" as + * well as "http"). + */ + +/* + * For server-based naming authorities (RFC2396 Sec 3.2.2): + * server = [ [ userinfo "@" ] hostport ] + * userinfo = *( unreserved | escaped | + * ";" | ":" | "&" | "=" | "+" | "$" | "," ) + * hostport = host [ ":" port ] + * host = hostname | IPv4address + * hostname = *( domainlabel "." ) toplabel [ "." ] + * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + * toplabel = alpha | alpha *( alphanum | "-" ) alphanum + * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit + * port = *digit + * + * The host is a domain name of a network host, or its IPv4 address as a + * set of four decimal digit groups separated by ".". Literal IPv6 + * addresses are not supported. + * + * Note that literal IPv6 address support is outlined in RFC2732: + * host = hostname | IPv4address | IPv6reference + * ipv6reference = "[" IPv6address "]" (RFC2373) + * + * Since hostnames and numbers will have to be resolved by the OS anyway, + * we don't have to parse them too pedantically (counting '.'s, checking + * for well-formed literal IP addresses, etc.). + * + * In FTP/file paths, we reject most ";param"s and querys. In HTTP paths, + * we just pass them through. + * + * Instead of letting a "path" be 0-or-more characters as RFC2396 suggests, + * we'll say it's 1-or-more characters, 0-or-1 times. This way, an absent + * path yields a nil substring match, instead of an empty one. + * + * We're more restrictive than RFC2396 indicates with "userinfo" strings, + * insisting they have the form "[user[:password]]". This may need to + * change at some point, however. + */ + +/* RE character-class components -- these go in brackets */ +#define PUNCT "\\-_.!~*'()" +#define RES ";/?:@&=+$," +#define ALNUM "a-zA-Z0-9" +#define HEX "0-9a-fA-F" +#define UNRES ALNUM PUNCT + +/* RE components; _N => has N parenthesized subexpressions when expanded */ +#define ESCAPED_1 "(%[" HEX "][" HEX "])" +#define URIC_2 "([" RES UNRES "]|" ESCAPED_1 ")" +#define URICNOSLASH_2 "([" UNRES ";?:@&=+$,]|" ESCAPED_1 ")" +#define USERINFO_2 "([" UNRES ";:&=+$,]|" ESCAPED_1 ")" +#define PCHAR_2 "([" UNRES ":@&=+$,]|" ESCAPED_1 ")" +#define PSEGCHAR_3 "([/;]|" PCHAR_2 ")" + +typedef struct Retab Retab; +struct Retab +{ + char *str; + Reprog *prog; + int size; + int ind[5]; +}; + +enum +{ + REsplit = 0, + REscheme, + REunknowndata, + REauthority, + REhost, + REuserinfo, + REabspath, + REquery, + REfragment, + REhttppath, + REftppath, + REfilepath, + + MaxResub= 20, +}; + +Retab retab[] = /* view in constant width Font */ +{ +[REsplit] + "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]+)?(\\?([^#]*))?(#(.*))?$", nil, 0, + /* |-scheme-| |-auth.-| |path--| |query| |--|frag */ + { 2, 4, 5, 7, 9}, + +[REscheme] + "^[a-z][a-z0-9+-.]*$", nil, 0, + { 0, }, + +[REunknowndata] + "^" URICNOSLASH_2 URIC_2 "*$", nil, 0, + { 0, }, + +[REauthority] + "^(((" USERINFO_2 "*)@)?(((\\[[^\\]@]+\\])|([^:\\[@]+))(:([0-9]*))?)?)?$", nil, 0, + /* |----user info-----| |--------host----------------| |-port-| */ + { 3, 7, 11, }, + +[REhost] + "^(([a-zA-Z0-9\\-.]+)|(\\[([a-fA-F0-9.:]+)\\]))$", nil, 0, + /* |--regular host--| |-IPv6 literal-| */ + { 2, 4, }, + +[REuserinfo] + "^(([^:]*)(:([^:]*))?)$", nil, 0, + /* |user-| |pass-| */ + { 2, 4, }, + +[REabspath] + "^/" PSEGCHAR_3 "*$", nil, 0, + { 0, }, + +[REquery] + "^" URIC_2 "*$", nil, 0, + { 0, }, + +[REfragment] + "^" URIC_2 "*$", nil, 0, + { 0, }, + +[REhttppath] + "^.*$", nil, 0, + { 0, }, + +[REftppath] + "^(.+)(;[tT][yY][pP][eE]=([aAiIdD]))?$", nil, 0, + /*|--|-path |ftptype-| */ + { 1, 3, }, + +[REfilepath] + "^.*$", nil, 0, + { 0, }, +}; + +static int +countleftparen(char *s) +{ + int n; + + n = 0; + for(; *s; s++) + if(*s == '(') + n++; + return n; +} + +void +initurl(void) +{ + int i, j; + + for(i=0; i<nelem(retab); i++){ + retab[i].prog = regcomp(retab[i].str); + if(retab[i].prog == nil) + sysfatal("recomp(%s): %r", retab[i].str); + retab[i].size = countleftparen(retab[i].str)+1; + for(j=0; j<nelem(retab[i].ind); j++) + if(retab[i].ind[j] >= retab[i].size) + sysfatal("bad index in regexp table: retab[%d].ind[%d] = %d >= %d", + i, j, retab[i].ind[j], retab[i].size); + if(MaxResub < retab[i].size) + sysfatal("MaxResub too small: %d < %d", MaxResub, retab[i].size); + } +} + +typedef struct SplitUrl SplitUrl; +struct SplitUrl +{ + struct { + char *s; + char *e; + } url, scheme, authority, path, query, fragment; +}; + +/* + * Implements the algorithm in RFC2396 sec 5.2 step 6. + * Returns number of chars written, excluding NUL terminator. + * dest is known to be >= strlen(base)+rel_len. + */ +static void +merge_relative_path(char *base, char *rel_st, int rel_len, char *dest) +{ + char *s, *p, *e, *pdest; + + pdest = dest; + + /* 6a: start with base, discard last segment */ + if(base && base[0]){ + /* Empty paths don't match in our scheme; 'base' should be nil */ + assert(base[0] == '/'); + e = strrchr(base, '/'); + e++; + memmove(pdest, base, e-base); + pdest += e-base; + }else{ + /* Artistic license on my part */ + *pdest++ = '/'; + } + + /* 6b: append relative component */ + if(rel_st){ + memmove(pdest, rel_st, rel_len); + pdest += rel_len; + } + + /* 6c: remove any occurrences of "./" as a complete segment */ + s = dest; + *pdest = '\0'; + while(e = strstr(s, "./")){ + if((e == dest) || (*(e-1) == '/')){ + memmove(e, e+2, pdest+1-(e+2)); /* +1 for NUL */ + pdest -= 2; + }else + s = e+1; + } + + /* 6d: remove a trailing "." as a complete segment */ + if(pdest>dest && *(pdest-1)=='.' && + (pdest==dest+1 || *(pdest-2)=='/')) + *--pdest = '\0'; + + /* 6e: remove occurences of "seg/../", where seg != "..", left->right */ + s = dest+1; + while(e = strstr(s, "/../")){ + p = e - 1; + while(p >= dest && *p != '/') + p--; + if(memcmp(p, "/../", 4) != 0){ + memmove(p+1, e+4, pdest+1-(e+4)); + pdest -= (e+4) - (p+1); + }else + s = e+1; + } + + /* 6f: remove a trailing "seg/..", where seg isn't ".." */ + if(pdest-3 > dest && memcmp(pdest-3, "/..", 3)==0){ + p = pdest-3 - 1; + while(p >= dest && *p != '/') + p--; + if(memcmp(p, "/../", 4) != 0){ + pdest = p+1; + *pdest = '\0'; + } + } + + /* 6g: leading ".." segments are errors -- we'll just blat them out. */ + if(RemoveExtraRelDotDots){ + p = dest; + if (p[0] == '/') + p++; + s = p; + while(s[0]=='.' && s[1]=='.' && (s[2]==0 || s[2]=='/')) + s += 3; + if(s > p){ + memmove(p, s, pdest+1-s); + pdest -= s-p; + } + } + USED(pdest); + + if(urldebug) + fprint(2, "merge_relative_path: '%s' + '%.*s' -> '%s'\n", base, rel_len, + rel_st, dest); +} + +/* + * See RFC2396 sec 5.2 for info on resolving relative URIs to absolute form. + * + * If successful, this just ends up freeing and replacing "u->url". + */ +static int +resolve_relative(SplitUrl *su, Url *base, Url *u) +{ + char *url, *path; + char *purl, *ppath; + int currentdoc, ulen, plen; + + if(base == nil){ + werrstr("relative URI given without base"); + return -1; + } + if(base->scheme == nil){ + werrstr("relative URI given with no scheme"); + return -1; + } + if(base->ischeme == USunknown){ + werrstr("relative URI given with unknown scheme"); + return -1; + } + if(base->ischeme == UScurrent){ + werrstr("relative URI given with incomplete base"); + return -1; + } + assert(su->scheme.s == nil); + + /* Sec 5.2 step 2 */ + currentdoc = 0; + if(su->path.s==nil && su->scheme.s==nil && su->authority.s==nil && su->query.s==nil){ + /* Reference is to current document */ + if(urldebug) + fprint(2, "url %s is relative to current document\n", u->url); + u->ischeme = UScurrent; + if(!ExpandCurrentDocUrls) + return 0; + currentdoc = 1; + } + + /* Over-estimate the maximum lengths, for allocation purposes */ + /* (constants are for separators) */ + plen = 1; + if(base->path) + plen += strlen(base->path); + if(su->path.s) + plen += 1 + (su->path.e - su->path.s); + + ulen = 0; + ulen += strlen(base->scheme) + 1; + if(su->authority.s) + ulen += 2 + (su->authority.e - su->authority.s); + else + ulen += 2 + ((base->authority) ? strlen(base->authority) : 0); + ulen += plen; + if(su->query.s) + ulen += 1 + (su->query.e - su->query.s); + else if(currentdoc && base->query) + ulen += 1 + strlen(base->query); + if(su->fragment.s) + ulen += 1 + (su->fragment.e - su->fragment.s); + else if(currentdoc && base->fragment) + ulen += 1 + strlen(base->fragment); + url = emalloc(ulen+1); + path = emalloc(plen+1); + + url[0] = '\0'; + purl = url; + path[0] = '\0'; + ppath = path; + + if(su->authority.s || (su->path.s && (su->path.s[0] == '/'))){ + /* Is a "network-path" or "absolute-path"; don't merge with base path */ + /* Sec 5.2 steps 4,5 */ + if(su->path.s){ + memmove(ppath, su->path.s, su->path.e - su->path.s); + ppath += su->path.e - su->path.s; + *ppath = '\0'; + } + }else if(currentdoc){ + /* Is a current-doc reference; just copy the path from the base URL */ + if(base->path){ + strcpy(ppath, base->path); + ppath += strlen(ppath); + } + USED(ppath); + }else{ + /* Is a relative-path reference; we have to merge it */ + /* Sec 5.2 step 6 */ + merge_relative_path(base->path, + su->path.s, su->path.e - su->path.s, ppath); + } + + /* Build new URL from pieces, inheriting from base where needed */ + strcpy(purl, base->scheme); + purl += strlen(purl); + *purl++ = ':'; + if(su->authority.s){ + strcpy(purl, "//"); + purl += strlen(purl); + memmove(purl, su->authority.s, su->authority.e - su->authority.s); + purl += su->authority.e - su->authority.s; + }else if(base->authority){ + strcpy(purl, "//"); + purl += strlen(purl); + strcpy(purl, base->authority); + purl += strlen(purl); + } + assert((path[0] == '\0') || (path[0] == '/')); + strcpy(purl, path); + purl += strlen(purl); + + /* + * The query and fragment are not inherited from the base, + * except in case of "current document" URLs, which inherit any query + * and may inherit the fragment. + */ + if(su->query.s){ + *purl++ = '?'; + memmove(purl, su->query.s, su->query.e - su->query.s); + purl += su->query.e - su->query.s; + }else if(currentdoc && base->query){ + *purl++ = '?'; + strcpy(purl, base->query); + purl += strlen(purl); + } + + if(su->fragment.s){ + *purl++ = '#'; + memmove(purl, su->query.s, su->query.e - su->query.s); + purl += su->fragment.e - su->fragment.s; + }else if(currentdoc && base->fragment){ + *purl++ = '#'; + strcpy(purl, base->fragment); + purl += strlen(purl); + } + USED(purl); + + if(urldebug) + fprint(2, "resolve_relative: '%s' + '%s' -> '%s'\n", base->url, u->url, url); + free(u->url); + u->url = url; + free(path); + return 0; +} + +int +regx(Reprog *prog, char *s, Resub *m, int nm) +{ + int i; + + if(s == nil) + s = m[0].sp; /* why is this necessary? */ + + i = regexec(prog, s, m, nm); +/* + if(i >= 0) + for(j=0; j<nm; j++) + fprint(2, "match%d: %.*s\n", j, utfnlen(m[j].sp, m[j].ep-m[j].sp), m[j].sp); +*/ + return i; +} + +static int +ismatch(int i, char *s, char *desc) +{ + Resub m[1]; + + m[0].sp = m[0].ep = nil; + if(!regx(retab[i].prog, s, m, 1)){ + werrstr("malformed %s: %q", desc, s); + return 0; + } + return 1; +} + +static int +spliturl(char *url, SplitUrl *su) +{ + Resub m[MaxResub]; + Retab *t; + + /* + * Newlines are not valid in a URI, but regexp(2) treats them specially + * so it's best to make sure there are none before proceeding. + */ + if(strchr(url, '\n')){ + werrstr("newline in URI"); + return -1; + } + + /* + * Because we use NUL-terminated strings, as do many client and server + * implementations, an escaped NUL ("%00") will quite likely cause problems + * when unescaped. We can check for such a sequence once before examining + * the components because, per RFC2396 sec. 2.4.1 - 2.4.2, '%' is reserved + * in URIs to _always_ indicate escape sequences. Something like "%2500" + * will still get by, but that's legitimate, and if it ends up causing + * a NUL then someone is unescaping too many times. + */ + if(strstr(url, "%00")){ + werrstr("escaped NUL in URI"); + return -1; + } + + m[0].sp = m[0].ep = nil; + t = &retab[REsplit]; + if(!regx(t->prog, url, m, t->size)){ + werrstr("malformed URI: %q", url); + return -1; + } + + su->url.s = m[0].sp; + su->url.e = m[0].ep; + su->scheme.s = m[t->ind[0]].sp; + su->scheme.e = m[t->ind[0]].ep; + su->authority.s = m[t->ind[1]].sp; + su->authority.e = m[t->ind[1]].ep; + su->path.s = m[t->ind[2]].sp; + su->path.e = m[t->ind[2]].ep; + su->query.s = m[t->ind[3]].sp; + su->query.e = m[t->ind[3]].ep; + su->fragment.s = m[t->ind[4]].sp; + su->fragment.e = m[t->ind[4]].ep; + + if(urldebug) + fprint(2, "split url %s into %.*q %.*q %.*q %.*q %.*q %.*q\n", + url, + su->url.s ? utfnlen(su->url.s, su->url.e-su->url.s) : 10, su->url.s ? su->url.s : "", + su->scheme.s ? utfnlen(su->scheme.s, su->scheme.e-su->scheme.s) : 10, su->scheme.s ? su->scheme.s : "", + su->authority.s ? utfnlen(su->authority.s, su->authority.e-su->authority.s) : 10, su->authority.s ? su->authority.s : "", + su->path.s ? utfnlen(su->path.s, su->path.e-su->path.s) : 10, su->path.s ? su->path.s : "", + su->query.s ? utfnlen(su->query.s, su->query.e-su->query.s) : 10, su->query.s ? su->query.s : "", + su->fragment.s ? utfnlen(su->fragment.s, su->fragment.e-su->fragment.s) : 10, su->fragment.s ? su->fragment.s : ""); + + return 0; +} + +static int +parse_scheme(SplitUrl *su, Url *u) +{ + if(su->scheme.s == nil){ + werrstr("missing scheme"); + return -1; + } + u->scheme = estredup(su->scheme.s, su->scheme.e); + strlower(u->scheme); + + if(!ismatch(REscheme, u->scheme, "scheme")) + return -1; + + u->ischeme = ischeme(u->scheme); + if(urldebug) + fprint(2, "parse_scheme %s => %d\n", u->scheme, u->ischeme); + return 0; +} + +static int +parse_unknown_part(SplitUrl *su, Url *u) +{ + char *s, *e; + + assert(u->ischeme == USunknown); + assert(su->scheme.e[0] == ':'); + + s = su->scheme.e+1; + if(su->fragment.s){ + e = su->fragment.s-1; + assert(*e == '#'); + }else + e = s+strlen(s); + + u->schemedata = estredup(s, e); + if(!ismatch(REunknowndata, u->schemedata, "unknown scheme data")) + return -1; + return 0; +} + +static int +parse_userinfo(char *s, char *e, Url *u) +{ + Resub m[MaxResub]; + Retab *t; + + m[0].sp = s; + m[0].ep = e; + t = &retab[REuserinfo]; + if(!regx(t->prog, nil, m, t->size)){ + werrstr("malformed userinfo: %.*q", utfnlen(s, e-s), s); + return -1; + } + if(m[t->ind[0]].sp) + u->user = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep); + if(m[t->ind[1]].sp) + u->user = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep); + return 0; +} + +static int +parse_host(char *s, char *e, Url *u) +{ + Resub m[MaxResub]; + Retab *t; + + m[0].sp = s; + m[0].ep = e; + t = &retab[REhost]; + if(!regx(t->prog, nil, m, t->size)){ + werrstr("malformed host: %.*q", utfnlen(s, e-s), s); + return -1; + } + + assert(m[t->ind[0]].sp || m[t->ind[1]].sp); + + if(m[t->ind[0]].sp) /* regular */ + u->host = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep); + else + u->host = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep); + return 0; +} + +static int +parse_authority(SplitUrl *su, Url *u) +{ + Resub m[MaxResub]; + Retab *t; + char *host; + char *userinfo; + + if(su->authority.s == nil) + return 0; + + u->authority = estredup(su->authority.s, su->authority.e); + m[0].sp = m[0].ep = nil; + t = &retab[REauthority]; + if(!regx(t->prog, u->authority, m, t->size)){ + werrstr("malformed authority: %q", u->authority); + return -1; + } + + if(m[t->ind[0]].sp) + if(parse_userinfo(m[t->ind[0]].sp, m[t->ind[0]].ep, u) < 0) + return -1; + if(m[t->ind[1]].sp) + if(parse_host(m[t->ind[1]].sp, m[t->ind[1]].ep, u) < 0) + return -1; + if(m[t->ind[2]].sp) + u->port = estredup(m[t->ind[2]].sp, m[t->ind[2]].ep); + + + if(urldebug > 0){ + userinfo = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep); + host = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep); + fprint(2, "port: %q, authority %q\n", u->port, u->authority); + fprint(2, "host %q, userinfo %q\n", host, userinfo); + free(host); + free(userinfo); + } + return 0; +} + +static int +parse_abspath(SplitUrl *su, Url *u) +{ + if(su->path.s == nil) + return 0; + u->path = estredup(su->path.s, su->path.e); + if(!ismatch(REabspath, u->path, "absolute path")) + return -1; + return 0; +} + +static int +parse_query(SplitUrl *su, Url *u) +{ + if(su->query.s == nil) + return 0; + u->query = estredup(su->query.s, su->query.e); + if(!ismatch(REquery, u->query, "query")) + return -1; + return 0; +} + +static int +parse_fragment(SplitUrl *su, Url *u) +{ + if(su->fragment.s == nil) + return 0; + u->fragment = estredup(su->fragment.s, su->fragment.e); + if(!ismatch(REfragment, u->fragment, "fragment")) + return -1; + return 0; +} + +static int +postparse_http(Url *u) +{ + u->open = httpopen; + u->read = httpread; + u->close = httpclose; + + if(u->authority==nil){ + werrstr("missing authority (hostname, port, etc.)"); + return -1; + } + if(u->host == nil){ + werrstr("missing host specification"); + return -1; + } + + if(u->path == nil){ + u->http.page_spec = estrdup("/"); + return 0; + } + + if(!ismatch(REhttppath, u->path, "http path")) + return -1; + if(u->query){ + u->http.page_spec = emalloc(strlen(u->path)+1+strlen(u->query)+1); + strcpy(u->http.page_spec, u->path); + strcat(u->http.page_spec, "?"); + strcat(u->http.page_spec, u->query); + }else + u->http.page_spec = estrdup(u->path); + + return 0; +} + +static int +postparse_ftp(Url *u) +{ + Resub m[MaxResub]; + Retab *t; + + if(u->authority==nil){ + werrstr("missing authority (hostname, port, etc.)"); + return -1; + } + if(u->query){ + werrstr("unexpected \"?query\" in ftp path"); + return -1; + } + if(u->host == nil){ + werrstr("missing host specification"); + return -1; + } + + if(u->path == nil){ + u->ftp.path_spec = estrdup("/"); + return 0; + } + + m[0].sp = m[0].ep = nil; + t = &retab[REftppath]; + if(!regx(t->prog, u->path, m, t->size)){ + werrstr("malformed ftp path: %q", u->path); + return -1; + } + + if(m[t->ind[0]].sp){ + u->ftp.path_spec = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep); + if(strchr(u->ftp.path_spec, ';')){ + werrstr("unexpected \";param\" in ftp path"); + return -1; + } + }else + u->ftp.path_spec = estrdup("/"); + + if(m[t->ind[1]].sp){ + u->ftp.type = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep); + strlower(u->ftp.type); + } + return 0; +} + +static int +postparse_file(Url *u) +{ + if(u->user || u->passwd){ + werrstr("user information not valid with file scheme"); + return -1; + } + if(u->query){ + werrstr("unexpected \"?query\" in file path"); + return -1; + } + if(u->port){ + werrstr("port not valid with file scheme"); + return -1; + } + if(u->path == nil){ + werrstr("missing path in file scheme"); + return -1; + } + if(strchr(u->path, ';')){ + werrstr("unexpected \";param\" in file path"); + return -1; + } + + if(!ismatch(REfilepath, u->path, "file path")) + return -1; + + /* "localhost" is equivalent to no host spec, we'll chose the latter */ + if(u->host && cistrcmp(u->host, "localhost") == 0){ + free(u->host); + u->host = nil; + } + return 0; +} + +static int (*postparse[])(Url*) = { + nil, + postparse_http, + postparse_http, + postparse_ftp, + postparse_file, +}; + +Url* +parseurl(char *url, Url *base) +{ + Url *u; + SplitUrl su; + + if(urldebug) + fprint(2, "parseurl %s with base %s\n", url, base ? base->url : "<none>"); + + u = emalloc(sizeof(Url)); + u->url = estrdup(url); + if(spliturl(u->url, &su) < 0){ + Fail: + freeurl(u); + return nil; + } + + /* RFC2396 sec 3.1 says relative URIs are distinguished by absent scheme */ + if(su.scheme.s==nil){ + if(urldebug) + fprint(2, "parseurl has nil scheme\n"); + if(resolve_relative(&su, base, u) < 0 || spliturl(u->url, &su) < 0) + goto Fail; + if(u->ischeme == UScurrent){ + /* 'u.url' refers to current document; set fragment and return */ + if(parse_fragment(&su, u) < 0) + goto Fail; + return u; + } + } + + if(parse_scheme(&su, u) < 0 + || parse_fragment(&su, u) < 0) + goto Fail; + + if(u->ischeme == USunknown){ + if(parse_unknown_part(&su, u) < 0) + goto Fail; + return u; + } + + if(parse_query(&su, u) < 0 + || parse_authority(&su, u) < 0 + || parse_abspath(&su, u) < 0) + goto Fail; + + if(u->ischeme < nelem(postparse) && postparse[u->ischeme]) + if((*postparse[u->ischeme])(u) < 0) + goto Fail; + + setmalloctag(u, getcallerpc(&url)); + return u; +} + +void +freeurl(Url *u) +{ + if(u == nil) + return; + free(u->url); + free(u->scheme); + free(u->schemedata); + free(u->authority); + free(u->user); + free(u->passwd); + free(u->host); + free(u->port); + free(u->path); + free(u->query); + free(u->fragment); + switch(u->ischeme){ + case UShttp: + free(u->http.page_spec); + break; + case USftp: + free(u->ftp.path_spec); + free(u->ftp.type); + break; + } + free(u); +} + +void +rewriteurl(Url *u) +{ + char *s; + + if(u->schemedata) + s = estrmanydup(u->scheme, ":", u->schemedata, nil); + else + s = estrmanydup(u->scheme, "://", + u->user ? u->user : "", + u->passwd ? ":" : "", u->passwd ? u->passwd : "", + u->user ? "@" : "", u->host ? u->host : "", + u->port ? ":" : "", u->port ? u->port : "", + u->path, + u->query ? "?" : "", u->query ? u->query : "", + u->fragment ? "#" : "", u->fragment ? u->fragment : "", + nil); + free(u->url); + u->url = s; +} + +int +seturlquery(Url *u, char *query) +{ + if(query == nil){ + free(u->query); + u->query = nil; + return 0; + } + + if(!ismatch(REquery, query, "query")) + return -1; + + free(u->query); + u->query = estrdup(query); + return 0; +} + +static void +dupp(char **p) +{ + if(*p) + *p = estrdup(*p); +} + +Url* +copyurl(Url *u) +{ + Url *v; + + v = emalloc(sizeof(Url)); + *v = *u; + dupp(&v->url); + dupp(&v->scheme); + dupp(&v->schemedata); + dupp(&v->authority); + dupp(&v->user); + dupp(&v->passwd); + dupp(&v->host); + dupp(&v->port); + dupp(&v->path); + dupp(&v->query); + dupp(&v->fragment); + + switch(v->ischeme){ + case UShttp: + dupp(&v->http.page_spec); + break; + case USftp: + dupp(&v->ftp.path_spec); + dupp(&v->ftp.type); + break; + } + return v; +} + +static int +dhex(char c) +{ + if('0' <= c && c <= '9') + return c-'0'; + if('a' <= c && c <= 'f') + return c-'a'+10; + if('A' <= c && c <= 'F') + return c-'A'+10; + return 0; +} + +char* +escapeurl(char *s, int (*needesc)(int)) +{ + int n; + char *t, *u; + Rune r; + static char *hex = "0123456789abcdef"; + + n = 0; + for(t=s; *t; t++) + if((*needesc)(*t)) + n++; + + u = emalloc(strlen(s)+2*n+1); + t = u; + for(; *s; s++){ + s += chartorune(&r, s); + if(r >= 0xFF){ + werrstr("URLs cannot contain Runes > 0xFF"); + free(t); + return nil; + } + if((*needesc)(r)){ + *u++ = '%'; + *u++ = hex[(r>>4)&0xF]; + *u++ = hex[r&0xF]; + }else + *u++ = r; + } + *u = '\0'; + return t; +} + +char* +unescapeurl(char *s) +{ + char *r, *w; + Rune rune; + + s = estrdup(s); + for(r=w=s; *r; r++){ + if(*r=='%'){ + r++; + if(!isxdigit(r[0]) || !isxdigit(r[1])){ + werrstr("bad escape sequence '%.3s' in URL", r); + return nil; + } + if(r[0]=='0' && r[2]=='0'){ + werrstr("escaped NUL in URL"); + return nil; + } + rune = (dhex(r[0])<<4)|dhex(r[1]); /* latin1 */ + w += runetochar(w, &rune); + r += 2; + }else + *w++ = *r; + } + *w = '\0'; + return s; +} + diff --git a/sys/src/cmd/webfs/util.c b/sys/src/cmd/webfs/util.c new file mode 100755 index 000000000..b6788194e --- /dev/null +++ b/sys/src/cmd/webfs/util.c @@ -0,0 +1,86 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ndb.h> +#include <fcall.h> +#include <thread.h> +#include <9p.h> +#include <ctype.h> +#include "dat.h" +#include "fns.h" + +void* +erealloc(void *a, uint n) +{ + a = realloc(a, n); + if(a == nil) + sysfatal("realloc %d: out of memory", n); + setrealloctag(a, getcallerpc(&a)); + return a; +} + +void* +emalloc(uint n) +{ + void *a; + + a = mallocz(n, 1); + if(a == nil) + sysfatal("malloc %d: out of memory", n); + setmalloctag(a, getcallerpc(&n)); + return a; +} + +char* +estrdup(char *s) +{ + s = strdup(s); + if(s == nil) + sysfatal("strdup: out of memory"); + setmalloctag(s, getcallerpc(&s)); + return s; +} + +char* +estredup(char *s, char *e) +{ + char *t; + + t = emalloc(e-s+1); + memmove(t, s, e-s); + t[e-s] = '\0'; + setmalloctag(t, getcallerpc(&s)); + return t; +} + +char* +estrmanydup(char *s, ...) +{ + char *p, *t; + int len; + va_list arg; + + len = strlen(s); + va_start(arg, s); + while((p = va_arg(arg, char*)) != nil) + len += strlen(p); + len++; + + t = emalloc(len); + strcpy(t, s); + va_start(arg, s); + while((p = va_arg(arg, char*)) != nil) + strcat(t, p); + return t; +} + +char* +strlower(char *s) +{ + char *t; + + for(t=s; *t; t++) + if('A' <= *t && *t <= 'Z') + *t += 'a'-'A'; + return s; +} diff --git a/sys/src/cmd/webfs/webget.c b/sys/src/cmd/webfs/webget.c new file mode 100755 index 000000000..d3a13afa1 --- /dev/null +++ b/sys/src/cmd/webfs/webget.c @@ -0,0 +1,87 @@ +/* + * Sample client. + */ +#include <u.h> +#include <libc.h> + +void +xfer(int from, int to) +{ + char buf[12*1024]; + int n; + + while((n = read(from, buf, sizeof buf)) > 0) + if(write(to, buf, n) < 0) + sysfatal("write failed: %r"); + if(n < 0) + sysfatal("read failed: %r"); +} + +void +usage(void) +{ + fprint(2, "usage: webget [-b baseurl] [-m mtpt] [-p postbody] url\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + int conn, ctlfd, fd, n; + char buf[128], *base, *mtpt, *post, *url; + + mtpt = "/mnt/web"; + post = nil; + base = nil; + ARGBEGIN{ + default: + usage(); + case 'b': + base = EARGF(usage()); + break; + case 'm': + mtpt = EARGF(usage()); + break; + case 'p': + post = EARGF(usage()); + break; + }ARGEND; + + if (argc != 1) + usage(); + + url = argv[0]; + + snprint(buf, sizeof buf, "%s/clone", mtpt); + if((ctlfd = open(buf, ORDWR)) < 0) + sysfatal("couldn't open %s: %r", buf); + if((n = read(ctlfd, buf, sizeof buf-1)) < 0) + sysfatal("reading clone: %r"); + if(n == 0) + sysfatal("short read on clone"); + buf[n] = '\0'; + conn = atoi(buf); + + if(base) + if(fprint(ctlfd, "baseurl %s", base) < 0) + sysfatal("baseurl ctl write: %r"); + + if(fprint(ctlfd, "url %s", url) <= 0) + sysfatal("get ctl write: %r"); + + if(post){ + snprint(buf, sizeof buf, "%s/%d/postbody", mtpt, conn); + if((fd = open(buf, OWRITE)) < 0) + sysfatal("open %s: %r", buf); + if(write(fd, post, strlen(post)) < 0) + sysfatal("post write failed: %r"); + close(fd); + } + + snprint(buf, sizeof buf, "%s/%d/body", mtpt, conn); + if((fd = open(buf, OREAD)) < 0) + sysfatal("open %s: %r", buf); + + xfer(fd, 1); + exits(nil); +} |