summaryrefslogtreecommitdiff
path: root/sys/src/cmd/webfs
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/webfs
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/webfs')
-rwxr-xr-xsys/src/cmd/webfs/buf.c89
-rwxr-xr-xsys/src/cmd/webfs/client.c394
-rwxr-xr-xsys/src/cmd/webfs/cookies.c1173
-rwxr-xr-xsys/src/cmd/webfs/dat.h103
-rwxr-xr-xsys/src/cmd/webfs/fns.h62
-rwxr-xr-xsys/src/cmd/webfs/fs.c616
-rwxr-xr-xsys/src/cmd/webfs/http.c539
-rwxr-xr-xsys/src/cmd/webfs/io.c84
-rwxr-xr-xsys/src/cmd/webfs/main.c67
-rwxr-xr-xsys/src/cmd/webfs/mkfile35
-rwxr-xr-xsys/src/cmd/webfs/plumb.c165
-rwxr-xr-xsys/src/cmd/webfs/url.c1092
-rwxr-xr-xsys/src/cmd/webfs/util.c86
-rwxr-xr-xsys/src/cmd/webfs/webget.c87
14 files changed, 4592 insertions, 0 deletions
diff --git a/sys/src/cmd/webfs/buf.c b/sys/src/cmd/webfs/buf.c
new file mode 100755
index 000000000..ffd249407
--- /dev/null
+++ b/sys/src/cmd/webfs/buf.c
@@ -0,0 +1,89 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ip.h>
+#include <plumb.h>
+#include <thread.h>
+#include <fcall.h>
+#include <9p.h>
+#include "dat.h"
+#include "fns.h"
+
+void
+initibuf(Ibuf *b, Ioproc *io, int fd)
+{
+ b->fd = fd;
+ b->io = io;
+ b->rp = b->wp = b->buf;
+}
+
+int
+readibuf(Ibuf *b, char *buf, int len)
+{
+ int n;
+
+ n = b->wp - b->rp;
+ if(n > 0){
+ if(n > len)
+ n = len;
+ memmove(buf, b->rp, n);
+ b->rp += n;
+ return n;
+ }
+ return ioreadn(b->io, b->fd, buf, len);
+}
+
+void
+unreadline(Ibuf *b, char *line)
+{
+ int i, n;
+
+ i = strlen(line);
+ n = b->wp - b->rp;
+ memmove(&b->buf[i+1], b->rp, n);
+ memmove(b->buf, line, i);
+ b->buf[i] = '\n';
+ b->rp = b->buf;
+ b->wp = b->rp+i+1+n;
+}
+
+int
+readline(Ibuf *b, char *buf, int len)
+{
+ int n;
+ char *p;
+
+ len--;
+
+ for(p = buf;;){
+ if(b->rp >= b->wp){
+ n = ioread(b->io, b->fd, b->wp, sizeof(b->buf)/2);
+ if(n < 0)
+ return -1;
+ if(n == 0)
+ break;
+ b->wp += n;
+ }
+ n = *b->rp++;
+ if(len > 0){
+ *p++ = n;
+ len--;
+ }
+ if(n == '\n')
+ break;
+ }
+
+ /* drop trailing white */
+ for(;;){
+ if(p <= buf)
+ break;
+ n = *(p-1);
+ if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
+ break;
+ p--;
+ }
+
+ *p = 0;
+ return p-buf;
+}
+
diff --git a/sys/src/cmd/webfs/client.c b/sys/src/cmd/webfs/client.c
new file mode 100755
index 000000000..c133adbc2
--- /dev/null
+++ b/sys/src/cmd/webfs/client.c
@@ -0,0 +1,394 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ip.h>
+#include <plumb.h>
+#include <thread.h>
+#include <fcall.h>
+#include <9p.h>
+#include "dat.h"
+#include "fns.h"
+
+int nclient;
+Client **client;
+
+static void clientthread(void*);
+int
+newclient(int plumbed)
+{
+ int i;
+ Client *c;
+
+ for(i=0; i<nclient; i++)
+ if(client[i]->ref==0)
+ return i;
+
+ c = emalloc(sizeof(Client));
+ c->plumbed = plumbed;
+ c->creq = chancreate(sizeof(Req*), 8);
+ threadcreate(clientthread, c, STACK);
+
+ c->io = ioproc();
+ c->num = nclient;
+ c->ctl = globalctl;
+ clonectl(&c->ctl);
+ if(nclient%16 == 0)
+ client = erealloc(client, (nclient+16)*sizeof(client[0]));
+ client[nclient++] = c;
+ return nclient-1;
+}
+
+void
+closeclient(Client *c)
+{
+ if(--c->ref == 0){
+ if(c->bodyopened){
+ if(c->url && c->url->close)
+ (*c->url->close)(c);
+ c->bodyopened = 0;
+ }
+ free(c->contenttype);
+ c->contenttype = nil;
+ free(c->postbody);
+ c->postbody = nil;
+ freeurl(c->url);
+ c->url = nil;
+ free(c->redirect);
+ c->redirect = nil;
+ free(c->authenticate);
+ c->authenticate = nil;
+ c->npostbody = 0;
+ c->havepostbody = 0;
+ c->bodyopened = 0;
+ }
+}
+
+void
+clonectl(Ctl *c)
+{
+ if(c->useragent)
+ c->useragent = estrdup(c->useragent);
+}
+
+void
+clientbodyopen(Client *c, Req *r)
+{
+ char e[ERRMAX], *next;
+ int i, nauth;
+ Url *u;
+
+ nauth = 0;
+ next = nil;
+ for(i=0; i<=c->ctl.redirectlimit; i++){
+ if(c->url == nil){
+ werrstr("nil url");
+ goto Error;
+ }
+ if(c->url->open == nil){
+ werrstr("unsupported url type");
+ goto Error;
+ }
+ if(fsdebug)
+ fprint(2, "try %s\n", c->url->url);
+ if(c->url->open(c, c->url) < 0){
+ Error:
+ if(next)
+ fprint(2, "next %s (but for error)\n", next);
+ free(next);
+ rerrstr(e, sizeof e);
+ c->iobusy = 0;
+ if(r != nil)
+ r->fid->omode = -1;
+ closeclient(c); /* not opening */
+ if(r != nil)
+ respond(r, e);
+ return;
+ }
+ if (c->authenticate && nauth++ < 1)
+ continue;
+ if(!c->redirect)
+ break;
+ next = c->redirect;
+ c->redirect = nil;
+ if(i==c->ctl.redirectlimit){
+ werrstr("redirect limit reached");
+ goto Error;
+ }
+ if((u = parseurl(next, c->url)) == nil)
+ goto Error;
+ if(urldebug)
+ fprint(2, "parseurl %s got scheme %d\n", next, u->ischeme);
+ if(u->ischeme == USunknown){
+ werrstr("redirect with unknown URL scheme");
+ goto Error;
+ }
+ if(u->ischeme == UScurrent){
+ werrstr("redirect to URL relative to current document");
+ goto Error;
+ }
+ freeurl(c->url);
+ c->url = u;
+ }
+ free(next);
+ c->iobusy = 0;
+ if(r != nil)
+ respond(r, nil);
+}
+
+void
+plumburl(char *url, char *base)
+{
+ int i;
+ Client *c;
+ Url *ubase, *uurl;
+
+ ubase = nil;
+ if(base){
+ ubase = parseurl(base, nil);
+ if(ubase == nil)
+ return;
+ }
+ uurl = parseurl(url, ubase);
+ if(uurl == nil){
+ freeurl(ubase);
+ return;
+ }
+ i = newclient(1);
+ c = client[i];
+ c->ref++;
+ c->baseurl = ubase;
+ c->url = uurl;
+ sendp(c->creq, nil);
+}
+
+void
+clientbodyread(Client *c, Req *r)
+{
+ char e[ERRMAX];
+
+ if(c->url->read == nil){
+ respond(r, "unsupported url type");
+ return;
+ }
+ if(c->url->read(c, r) < 0){
+ rerrstr(e, sizeof e);
+ c->iobusy = 0;
+ respond(r, e);
+ return;
+ }
+ c->iobusy = 0;
+ respond(r, nil);
+}
+
+static void
+clientthread(void *a)
+{
+ Client *c;
+ Req *r;
+
+ c = a;
+ if(c->plumbed) {
+ recvp(c->creq);
+ if(c->url == nil){
+ fprint(2, "bad url got plumbed\n");
+ return;
+ }
+ clientbodyopen(c, nil);
+ replumb(c);
+ }
+ while((r = recvp(c->creq)) != nil){
+ if(fsdebug)
+ fprint(2, "clientthread %F\n", &r->ifcall);
+ switch(r->ifcall.type){
+ case Topen:
+ if(c->plumbed) {
+ c->plumbed = 0;
+ c->ref--; /* from plumburl() */
+ respond(r, nil);
+ }
+ else
+ clientbodyopen(c, r);
+ break;
+ case Tread:
+ clientbodyread(c, r);
+ break;
+ case Tflush:
+ respond(r, nil);
+ }
+ if(fsdebug)
+ fprint(2, "clientthread finished req\n");
+ }
+}
+
+enum
+{
+ Bool,
+ Int,
+ String,
+ XUrl,
+ Fn,
+};
+
+typedef struct Ctab Ctab;
+struct Ctab {
+ char *name;
+ int type;
+ void *offset;
+};
+
+Ctab ctltab[] = {
+ "acceptcookies", Bool, (void*)offsetof(Ctl, acceptcookies),
+ "sendcookies", Bool, (void*)offsetof(Ctl, sendcookies),
+ "redirectlimit", Int, (void*)offsetof(Ctl, redirectlimit),
+ "useragent", String, (void*)offsetof(Ctl, useragent),
+};
+
+Ctab globaltab[] = {
+ "chatty9p", Int, &chatty9p,
+ "fsdebug", Int, &fsdebug,
+ "cookiedebug", Int, &cookiedebug,
+ "urldebug", Int, &urldebug,
+ "httpdebug", Int, &httpdebug,
+};
+
+Ctab clienttab[] = {
+ "baseurl", XUrl, (void*)offsetof(Client, baseurl),
+ "url", XUrl, (void*)offsetof(Client, url),
+};
+
+static Ctab*
+findcmd(char *cmd, Ctab *tab, int ntab)
+{
+ int i;
+
+ for(i=0; i<ntab; i++)
+ if(strcmp(tab[i].name, cmd) == 0)
+ return &tab[i];
+ return nil;
+}
+
+static void
+parseas(Req *r, char *arg, int type, void *a)
+{
+ Url *u;
+ char e[ERRMAX];
+
+ switch(type){
+ case Bool:
+ if(strcmp(arg, "on")==0 || strcmp(arg, "1")==0)
+ *(int*)a = 1;
+ else
+ *(int*)a = 0;
+ break;
+ case String:
+ free(*(char**)a);
+ *(char**)a = estrdup(arg);
+ break;
+ case XUrl:
+ u = parseurl(arg, nil);
+ if(u == nil){
+ snprint(e, sizeof e, "parseurl: %r");
+ respond(r, e);
+ return;
+ }
+ freeurl(*(Url**)a);
+ *(Url**)a = u;
+ break;
+ case Int:
+ if(strcmp(arg, "on")==0)
+ *(int*)a = 1;
+ else
+ *(int*)a = atoi(arg);
+ break;
+ }
+ respond(r, nil);
+}
+
+int
+ctlwrite(Req *r, Ctl *ctl, char *cmd, char *arg)
+{
+ void *a;
+ Ctab *t;
+
+ if((t = findcmd(cmd, ctltab, nelem(ctltab))) == nil)
+ return 0;
+ a = (void*)((uintptr)ctl+(uintptr)t->offset);
+ parseas(r, arg, t->type, a);
+ return 1;
+}
+
+int
+clientctlwrite(Req *r, Client *c, char *cmd, char *arg)
+{
+ void *a;
+ Ctab *t;
+
+ if((t = findcmd(cmd, clienttab, nelem(clienttab))) == nil)
+ return 0;
+ a = (void*)((uintptr)c+(uintptr)t->offset);
+ parseas(r, arg, t->type, a);
+ return 1;
+}
+
+int
+globalctlwrite(Req *r, char *cmd, char *arg)
+{
+ void *a;
+ Ctab *t;
+
+ if((t = findcmd(cmd, globaltab, nelem(globaltab))) == nil)
+ return 0;
+ a = t->offset;
+ parseas(r, arg, t->type, a);
+ return 1;
+}
+
+static void
+ctlfmt(Ctl *c, char *s)
+{
+ int i;
+ void *a;
+ char *t;
+
+ for(i=0; i<nelem(ctltab); i++){
+ a = (void*)((uintptr)c+(uintptr)ctltab[i].offset);
+ switch(ctltab[i].type){
+ case Bool:
+ s += sprint(s, "%s %s\n", ctltab[i].name, *(int*)a ? "on" : "off");
+ break;
+ case Int:
+ s += sprint(s, "%s %d\n", ctltab[i].name, *(int*)a);
+ break;
+ case String:
+ t = *(char**)a;
+ if(t != nil)
+ s += sprint(s, "%s %.*s%s\n", ctltab[i].name, utfnlen(t, 100), t, strlen(t)>100 ? "..." : "");
+ break;
+ }
+ }
+}
+
+void
+ctlread(Req *r, Client *c)
+{
+ char buf[1024];
+
+ sprint(buf, "%11d \n", c->num);
+ ctlfmt(&c->ctl, buf+strlen(buf));
+ readstr(r, buf);
+ respond(r, nil);
+}
+
+void
+globalctlread(Req *r)
+{
+ char buf[1024], *s;
+ int i;
+
+ s = buf;
+ for(i=0; i<nelem(globaltab); i++)
+ s += sprint(s, "%s %d\n", globaltab[i].name, *(int*)globaltab[i].offset);
+ ctlfmt(&globalctl, s);
+ readstr(r, buf);
+ respond(r, nil);
+}
diff --git a/sys/src/cmd/webfs/cookies.c b/sys/src/cmd/webfs/cookies.c
new file mode 100755
index 000000000..6028bde92
--- /dev/null
+++ b/sys/src/cmd/webfs/cookies.c
@@ -0,0 +1,1173 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ndb.h>
+#include <fcall.h>
+#include <thread.h>
+#include <9p.h>
+#include <ctype.h>
+#include "dat.h"
+#include "fns.h"
+
+int cookiedebug;
+
+typedef struct Cookie Cookie;
+typedef struct Jar Jar;
+
+struct Cookie
+{
+ /* external info */
+ char* name;
+ char* value;
+ char* dom; /* starts with . */
+ char* path;
+ char* version;
+ char* comment; /* optional, may be nil */
+
+ uint expire; /* time of expiration: ~0 means when webcookies dies */
+ int secure;
+ int explicitdom; /* dom was explicitly set */
+ int explicitpath; /* path was explicitly set */
+ int netscapestyle;
+
+ /* internal info */
+ int deleted;
+ int mark;
+ int ondisk;
+};
+
+struct Jar
+{
+ Cookie *c;
+ int nc;
+ int mc;
+
+ Qid qid;
+ int dirty;
+ char *file;
+ char *lockfile;
+};
+
+struct {
+ char *s;
+ int offset;
+ int ishttp;
+} stab[] = {
+ "domain", offsetof(Cookie, dom), 1,
+ "path", offsetof(Cookie, path), 1,
+ "name", offsetof(Cookie, name), 0,
+ "value", offsetof(Cookie, value), 0,
+ "comment", offsetof(Cookie, comment), 1,
+ "version", offsetof(Cookie, version), 1,
+};
+
+struct {
+ char *s;
+ int offset;
+} itab[] = {
+ "expire", offsetof(Cookie, expire),
+ "secure", offsetof(Cookie, secure),
+ "explicitdomain", offsetof(Cookie, explicitdom),
+ "explicitpath", offsetof(Cookie, explicitpath),
+ "netscapestyle", offsetof(Cookie, netscapestyle),
+};
+
+#pragma varargck type "J" Jar*
+#pragma varargck type "K" Cookie*
+
+/* HTTP format */
+static int
+jarfmt(Fmt *fp)
+{
+ int i;
+ Jar *jar;
+
+ jar = va_arg(fp->args, Jar*);
+
+ if(jar == nil || jar->nc == 0)
+ return 0;
+
+ fmtstrcpy(fp, "Cookie: ");
+ if(jar->c[0].version)
+ fmtprint(fp, "$Version=%s; ", jar->c[0].version);
+ for(i=0; i<jar->nc; i++)
+ fmtprint(fp, "%s%s=%s", i ? "; ": "", jar->c[i].name, jar->c[i].value);
+ fmtstrcpy(fp, "\r\n");
+ return 0;
+}
+
+/* individual cookie */
+static int
+cookiefmt(Fmt *fp)
+{
+ int j, k, first;
+ char *t;
+ Cookie *c;
+
+ c = va_arg(fp->args, Cookie*);
+
+ first = 1;
+ for(j=0; j<nelem(stab); j++){
+ t = *(char**)((uintptr)c+stab[j].offset);
+ if(t == nil)
+ continue;
+ if(first)
+ first = 0;
+ else
+ fmtstrcpy(fp, " ");
+ fmtprint(fp, "%s=%q", stab[j].s, t);
+ }
+ for(j=0; j<nelem(itab); j++){
+ k = *(int*)((uintptr)c+itab[j].offset);
+ if(k == 0)
+ continue;
+ if(first)
+ first = 0;
+ else
+ fmtstrcpy(fp, " ");
+ fmtprint(fp, "%s=%ud", itab[j].s, k);
+ }
+ return 0;
+}
+
+/*
+ * sort cookies:
+ * - alpha by name
+ * - alpha by domain
+ * - longer paths first, then alpha by path (RFC2109 4.3.4)
+ */
+static int
+cookiecmp(Cookie *a, Cookie *b)
+{
+ int i;
+
+ if((i = strcmp(a->name, b->name)) != 0)
+ return i;
+ if((i = cistrcmp(a->dom, b->dom)) != 0)
+ return i;
+ if((i = strlen(b->path) - strlen(a->path)) != 0)
+ return i;
+ if((i = strcmp(a->path, b->path)) != 0)
+ return i;
+ return 0;
+}
+
+static int
+exactcookiecmp(Cookie *a, Cookie *b)
+{
+ int i;
+
+ if((i = cookiecmp(a, b)) != 0)
+ return i;
+ if((i = strcmp(a->value, b->value)) != 0)
+ return i;
+ if(a->version || b->version){
+ if(!a->version)
+ return -1;
+ if(!b->version)
+ return 1;
+ if((i = strcmp(a->version, b->version)) != 0)
+ return i;
+ }
+ if(a->comment || b->comment){
+ if(!a->comment)
+ return -1;
+ if(!b->comment)
+ return 1;
+ if((i = strcmp(a->comment, b->comment)) != 0)
+ return i;
+ }
+ if((i = b->expire - a->expire) != 0)
+ return i;
+ if((i = b->secure - a->secure) != 0)
+ return i;
+ if((i = b->explicitdom - a->explicitdom) != 0)
+ return i;
+ if((i = b->explicitpath - a->explicitpath) != 0)
+ return i;
+ if((i = b->netscapestyle - a->netscapestyle) != 0)
+ return i;
+
+ return 0;
+}
+
+static void
+freecookie(Cookie *c)
+{
+ int i;
+
+ for(i=0; i<nelem(stab); i++)
+ free(*(char**)((uintptr)c+stab[i].offset));
+}
+
+static void
+copycookie(Cookie *c)
+{
+ int i;
+ char **ps;
+
+ for(i=0; i<nelem(stab); i++){
+ ps = (char**)((uintptr)c+stab[i].offset);
+ if(*ps)
+ *ps = estrdup9p(*ps);
+ }
+}
+
+static void
+delcookie(Jar *j, Cookie *c)
+{
+ int i;
+
+ j->dirty = 1;
+ i = c - j->c;
+ if(i < 0 || i >= j->nc)
+ abort();
+ c->deleted = 1;
+}
+
+static void
+addcookie(Jar *j, Cookie *c)
+{
+ int i;
+
+ if(!c->name || !c->value || !c->path || !c->dom){
+ fprint(2, "not adding incomplete cookie\n");
+ return;
+ }
+
+ if(cookiedebug)
+ fprint(2, "add %K\n", c);
+
+ for(i=0; i<j->nc; i++)
+ if(cookiecmp(&j->c[i], c) == 0){
+ if(cookiedebug)
+ fprint(2, "cookie %K matches %K\n", &j->c[i], c);
+ if(exactcookiecmp(&j->c[i], c) == 0){
+ if(cookiedebug)
+ fprint(2, "\texactly\n");
+ j->c[i].mark = 0;
+ return;
+ }
+ delcookie(j, &j->c[i]);
+ }
+
+ j->dirty = 1;
+ if(j->nc == j->mc){
+ j->mc += 16;
+ j->c = erealloc9p(j->c, j->mc*sizeof(Cookie));
+ }
+ j->c[j->nc] = *c;
+ copycookie(&j->c[j->nc]);
+ j->nc++;
+}
+
+static void
+purgejar(Jar *j)
+{
+ int i;
+
+ for(i=j->nc-1; i>=0; i--){
+ if(!j->c[i].deleted)
+ continue;
+ freecookie(&j->c[i]);
+ --j->nc;
+ j->c[i] = j->c[j->nc];
+ }
+}
+
+static void
+addtojar(Jar *jar, char *line, int ondisk)
+{
+ Cookie c;
+ int i, j, nf, *pint;
+ char *f[20], *attr, *val, **pstr;
+
+ memset(&c, 0, sizeof c);
+ c.expire = ~0;
+ c.ondisk = ondisk;
+ nf = tokenize(line, f, nelem(f));
+ for(i=0; i<nf; i++){
+ attr = f[i];
+ if((val = strchr(attr, '=')) != nil)
+ *val++ = '\0';
+ else
+ val = "";
+ /* string attributes */
+ for(j=0; j<nelem(stab); j++){
+ if(strcmp(stab[j].s, attr) == 0){
+ pstr = (char**)((uintptr)&c+stab[j].offset);
+ *pstr = val;
+ }
+ }
+ /* integer attributes */
+ for(j=0; j<nelem(itab); j++){
+ if(strcmp(itab[j].s, attr) == 0){
+ pint = (int*)((uintptr)&c+itab[j].offset);
+ if(val[0]=='\0')
+ *pint = 1;
+ else
+ *pint = strtoul(val, 0, 0);
+ }
+ }
+ }
+ if(c.name==nil || c.value==nil || c.dom==nil || c.path==nil){
+ if(cookiedebug)
+ fprint(2, "ignoring fractional cookie %K\n", &c);
+ return;
+ }
+ addcookie(jar, &c);
+}
+
+static Jar*
+newjar(void)
+{
+ Jar *jar;
+
+ jar = emalloc9p(sizeof(Jar));
+ return jar;
+}
+
+static int
+expirejar(Jar *jar, int exiting)
+{
+ int i, n;
+ uint now;
+
+ now = time(0);
+ n = 0;
+ for(i=0; i<jar->nc; i++){
+ if(jar->c[i].expire < now || (exiting && jar->c[i].expire==~0)){
+ delcookie(jar, &jar->c[i]);
+ n++;
+ }
+ }
+ return n;
+}
+
+static void
+dumpjar(Jar *jar, char *desc)
+{
+ int i;
+ Biobuf *b;
+ char *s;
+
+ print("%s\n", desc);
+ print("\tin memory:\n");
+
+ for(i=0; i<jar->nc; i++)
+ print("\t%K%s%s%s\n", &jar->c[i],
+ jar->c[i].ondisk ? " ondisk" : "",
+ jar->c[i].deleted ? " deleted" : "",
+ jar->c[i].mark ? " mark" : "");
+ print("\n\ton disk:\n");
+ if((b = Bopen(jar->file, OREAD)) == nil){
+ print("\tno file\n");
+ }else{
+ while((s = Brdstr(b, '\n', 1)) != nil){
+ print("\t%s\n", s);
+ free(s);
+ }
+ Bterm(b);
+ }
+ print("\n");
+}
+
+static int
+syncjar(Jar *jar)
+{
+ int i, fd;
+ char *line;
+ Dir *d;
+ Biobuf *b;
+ Qid q;
+
+ if(jar->file==nil)
+ return 0;
+
+ memset(&q, 0, sizeof q);
+ if((d = dirstat(jar->file)) != nil){
+ q = d->qid;
+ if(d->qid.path != jar->qid.path || d->qid.vers != jar->qid.vers)
+ jar->dirty = 1;
+ free(d);
+ }
+
+ if(jar->dirty == 0)
+ return 0;
+
+ fd = -1;
+ for(i=0; i<50; i++){
+ if((fd = create(jar->lockfile, OWRITE, DMEXCL|0666)) < 0){
+ sleep(100);
+ continue;
+ }
+ break;
+ }
+ if(fd < 0){
+ if(cookiedebug)
+ fprint(2, "open %s: %r", jar->lockfile);
+ werrstr("cannot acquire jar lock: %r");
+ return -1;
+ }
+
+ for(i=0; i<jar->nc; i++) /* mark is cleared by addcookie */
+ jar->c[i].mark = jar->c[i].ondisk;
+
+ if((b = Bopen(jar->file, OREAD)) == nil){
+ if(cookiedebug)
+ fprint(2, "Bopen %s: %r", jar->file);
+ werrstr("cannot read cookie file %s: %r", jar->file);
+ close(fd);
+ return -1;
+ }
+ for(; (line = Brdstr(b, '\n', 1)) != nil; free(line)){
+ if(*line == '#')
+ continue;
+ addtojar(jar, line, 1);
+ }
+ Bterm(b);
+
+ for(i=0; i<jar->nc; i++)
+ if(jar->c[i].mark && jar->c[i].expire != ~0)
+ delcookie(jar, &jar->c[i]);
+
+ purgejar(jar);
+
+ b = Bopen(jar->file, OWRITE);
+ if(b == nil){
+ if(cookiedebug)
+ fprint(2, "Bopen write %s: %r", jar->file);
+ close(fd);
+ return -1;
+ }
+ Bprint(b, "# webcookies cookie jar\n");
+ Bprint(b, "# comments and non-standard fields will be lost\n");
+ for(i=0; i<jar->nc; i++){
+ if(jar->c[i].expire == ~0)
+ continue;
+ Bprint(b, "%K\n", &jar->c[i]);
+ jar->c[i].ondisk = 1;
+ }
+ Bterm(b);
+
+ jar->dirty = 0;
+ close(fd);
+ if((d = dirstat(jar->file)) != nil){
+ jar->qid = d->qid;
+ free(d);
+ }
+ return 0;
+}
+
+static Jar*
+readjar(char *file)
+{
+ char *lock, *p;
+ Jar *jar;
+
+ jar = newjar();
+ lock = emalloc9p(strlen(file)+10);
+ strcpy(lock, file);
+ if((p = strrchr(lock, '/')) != nil)
+ p++;
+ else
+ p = lock;
+ memmove(p+2, p, strlen(p)+1);
+ p[0] = 'L';
+ p[1] = '.';
+ jar->lockfile = lock;
+ jar->file = file;
+ jar->dirty = 1;
+
+ if(syncjar(jar) < 0){
+ free(jar->file);
+ free(jar->lockfile);
+ free(jar);
+ return nil;
+ }
+ return jar;
+}
+
+static void
+closejar(Jar *jar)
+{
+ int i;
+
+ if(jar == nil)
+ return;
+ expirejar(jar, 0);
+ if(syncjar(jar) < 0)
+ fprint(2, "warning: cannot rewrite cookie jar: %r\n");
+
+ for(i=0; i<jar->nc; i++)
+ freecookie(&jar->c[i]);
+
+ free(jar->file);
+ free(jar);
+}
+
+/*
+ * Domain name matching is per RFC2109, section 2:
+ *
+ * Hosts names can be specified either as an IP address or a FQHN
+ * string. Sometimes we compare one host name with another. Host A's
+ * name domain-matches host B's if
+ *
+ * * both host names are IP addresses and their host name strings match
+ * exactly; or
+ *
+ * * both host names are FQDN strings and their host name strings match
+ * exactly; or
+ *
+ * * A is a FQDN string and has the form NB, where N is a non-empty name
+ * string, B has the form .B', and B' is a FQDN string. (So, x.y.com
+ * domain-matches .y.com but not y.com.)
+ *
+ * Note that domain-match is not a commutative operation: a.b.c.com
+ * domain-matches .c.com, but not the reverse.
+ *
+ * (This does not verify that IP addresses and FQDN's are well-formed.)
+ */
+static int
+isdomainmatch(char *name, char *pattern)
+{
+ int lname, lpattern;
+
+ if(cistrcmp(name, pattern)==0)
+ return 1;
+
+ if(strcmp(ipattr(name), "dom")==0 && pattern[0]=='.'){
+ lname = strlen(name);
+ lpattern = strlen(pattern);
+ /* e.g., name: www.google.com && pattern: .google.com */
+ if(lname >= lpattern && cistrcmp(name+lname-lpattern, pattern)==0)
+ return 1;
+ /* e.g., name: google.com && pattern: .google.com */
+ if(lpattern > lname &&
+ cistrcmp(pattern+lpattern-lname, name) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * RFC2109 4.3.4:
+ * - domain must match
+ * - path in cookie must be a prefix of request path
+ * - cookie must not have expired
+ */
+static int
+iscookiematch(Cookie *c, char *dom, char *path, uint now)
+{
+ return isdomainmatch(dom, c->dom)
+ && strncmp(c->path, path, strlen(c->path))==0
+ && (c->expire == 0 || c->expire >= now);
+}
+
+/*
+ * Produce a subjar of matching cookies.
+ * Secure cookies are only included if secure is set.
+ */
+static Jar*
+cookiesearch(Jar *jar, char *dom, char *path, int issecure)
+{
+ int i;
+ Jar *j;
+ uint now;
+
+ if(cookiedebug)
+ fprint(2, "cookiesearch %s %s %d\n", dom, path, issecure);
+ now = time(0);
+ j = newjar();
+ for(i=0; i<jar->nc; i++){
+ if(cookiedebug)
+ fprint(2, "\ttry %s %s %d %s\n", jar->c[i].dom,
+ jar->c[i].path, jar->c[i].secure,
+ jar->c[i].name);
+ if((issecure || !jar->c[i].secure) &&
+ iscookiematch(&jar->c[i], dom, path, now)){
+ if(cookiedebug)
+ fprint(2, "\tmatched\n");
+ addcookie(j, &jar->c[i]);
+ }
+ }
+ if(j->nc == 0){
+ closejar(j);
+ werrstr("no cookies found");
+ return nil;
+ }
+ qsort(j->c, j->nc, sizeof(j->c[0]), (int(*)(void*, void*))cookiecmp);
+ return j;
+}
+
+/*
+ * RFC2109 4.3.2 security checks
+ */
+static char*
+isbadcookie(Cookie *c, char *dom, char *path)
+{
+ int lcdom, ldom;
+
+ if(strncmp(c->path, path, strlen(c->path)) != 0)
+ return "cookie path is not a prefix of the request path";
+
+ /*
+ * fgb says omitting this test is necessary to get some sites to work,
+ * but it seems dubious.
+ */
+ if(c->explicitdom && c->dom[0] != '.')
+ return "cookie domain doesn't start with dot";
+
+ lcdom = strlen(c->dom);
+ if(memchr(c->dom+1, '.', lcdom-1-1) == nil)
+ return "cookie domain doesn't have embedded dots";
+
+ if(!isdomainmatch(dom, c->dom))
+ return "request host does not match cookie domain";
+
+ ldom = strlen(dom);
+ if(strcmp(ipattr(dom), "dom")==0 && lcdom > ldom &&
+ memchr(dom, '.', lcdom - ldom) != nil)
+ return "request host contains dots before cookie domain";
+
+ return 0;
+}
+
+/*
+ * Sunday, 25-Jan-2002 12:24:36 GMT
+ * Sunday, 25 Jan 2002 12:24:36 GMT
+ * Sun, 25 Jan 02 12:24:36 GMT
+ */
+static int
+isleap(int year)
+{
+ return year%4==0 && (year%100!=0 || year%400==0);
+}
+
+static uint
+strtotime(char *s)
+{
+ char *os;
+ int i;
+ Tm tm;
+
+ static int mday[2][12] = {
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+ };
+ static char *wday[] = {
+ "Sunday", "Monday", "Tuesday", "Wednesday",
+ "Thursday", "Friday", "Saturday",
+ };
+ static char *mon[] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
+ };
+
+ os = s;
+ /* Sunday, */
+ for(i=0; i<nelem(wday); i++){
+ if(cistrncmp(s, wday[i], strlen(wday[i])) == 0){
+ s += strlen(wday[i]);
+ break;
+ }
+ if(cistrncmp(s, wday[i], 3) == 0){
+ s += 3;
+ break;
+ }
+ }
+ if(i==nelem(wday)){
+ if(cookiedebug)
+ fprint(2, "bad wday (%s)\n", os);
+ return -1;
+ }
+ if(*s++ != ',' || *s++ != ' '){
+ if(cookiedebug)
+ fprint(2, "bad wday separator (%s)\n", os);
+ return -1;
+ }
+
+ /* 25- */
+ if(!isdigit(s[0]) || !isdigit(s[1]) || (s[2]!='-' && s[2]!=' ')){
+ if(cookiedebug)
+ fprint(2, "bad day of month (%s)\n", os);
+ return -1;
+ }
+ tm.mday = strtol(s, 0, 10);
+ s += 3;
+
+ /* Jan- */
+ for(i=0; i<nelem(mon); i++)
+ if(cistrncmp(s, mon[i], 3) == 0){
+ tm.mon = i;
+ s += 3;
+ break;
+ }
+ if(i==nelem(mon)){
+ if(cookiedebug)
+ fprint(2, "bad month (%s)\n", os);
+ return -1;
+ }
+ if(s[0] != '-' && s[0] != ' '){
+ if(cookiedebug)
+ fprint(2, "bad month separator (%s)\n", os);
+ return -1;
+ }
+ s++;
+
+ /* 2002 */
+ if(!isdigit(s[0]) || !isdigit(s[1])){
+ if(cookiedebug)
+ fprint(2, "bad year (%s)\n", os);
+ return -1;
+ }
+ tm.year = strtol(s, 0, 10);
+ s += 2;
+ if(isdigit(s[0]) && isdigit(s[1]))
+ s += 2;
+ else{
+ if(tm.year <= 68)
+ tm.year += 2000;
+ else
+ tm.year += 1900;
+ }
+ if(tm.mday==0 || tm.mday > mday[isleap(tm.year)][tm.mon]){
+ if(cookiedebug)
+ fprint(2, "invalid day of month (%s)\n", os);
+ return -1;
+ }
+ tm.year -= 1900;
+ if(*s++ != ' '){
+ if(cookiedebug)
+ fprint(2, "bad year separator (%s)\n", os);
+ return -1;
+ }
+
+ if(!isdigit(s[0]) || !isdigit(s[1]) || s[2]!=':'
+ || !isdigit(s[3]) || !isdigit(s[4]) || s[5]!=':'
+ || !isdigit(s[6]) || !isdigit(s[7]) || s[8]!=' '){
+ if(cookiedebug)
+ fprint(2, "bad time (%s)\n", os);
+ return -1;
+ }
+
+ tm.hour = atoi(s);
+ tm.min = atoi(s+3);
+ tm.sec = atoi(s+6);
+ if(tm.hour >= 24 || tm.min >= 60 || tm.sec >= 60){
+ if(cookiedebug)
+ fprint(2, "invalid time (%s)\n", os);
+ return -1;
+ }
+ s += 9;
+
+ if(cistrcmp(s, "GMT") != 0){
+ if(cookiedebug)
+ fprint(2, "time zone not GMT (%s)\n", os);
+ return -1;
+ }
+ strcpy(tm.zone, "GMT");
+ tm.yday = 0;
+ return tm2sec(&tm);
+}
+
+/*
+ * skip linear whitespace. we're a bit more lenient than RFC2616 2.2.
+ */
+static char*
+skipspace(char *s)
+{
+ while(*s=='\r' || *s=='\n' || *s==' ' || *s=='\t')
+ s++;
+ return s;
+}
+
+/*
+ * Try to identify old netscape headers.
+ * The old headers:
+ * - didn't allow spaces around the '='
+ * - used an 'Expires' attribute
+ * - had no 'Version' attribute
+ * - had no quotes
+ * - allowed whitespace in values
+ * - apparently separated attr/value pairs with ';' exclusively
+ */
+static int
+isnetscape(char *hdr)
+{
+ char *s;
+
+ for(s=hdr; (s=strchr(s, '=')) != nil; s++){
+ if(isspace(s[1]) || (s > hdr && isspace(s[-1])))
+ return 0;
+ if(s[1]=='"')
+ return 0;
+ }
+ if(cistrstr(hdr, "version="))
+ return 0;
+ return 1;
+}
+
+/*
+ * Parse HTTP response headers, adding cookies to jar.
+ * Overwrites the headers. May overwrite path.
+ */
+static char* parsecookie(Cookie*, char*, char**, int, char*, char*);
+static int
+parsehttp(Jar *jar, char *hdr, char *dom, char *path)
+{
+ static char setcookie[] = "Set-Cookie:";
+ char *e, *p, *nextp;
+ Cookie c;
+ int isns, n;
+
+ isns = isnetscape(hdr);
+ n = 0;
+ for(p=hdr; p; p=nextp){
+ p = skipspace(p);
+ if(*p == '\0')
+ break;
+ nextp = strchr(p, '\n');
+ if(nextp != nil)
+ *nextp++ = '\0';
+ if(cistrncmp(p, setcookie, strlen(setcookie)) != 0)
+ continue;
+ if(cookiedebug)
+ fprint(2, "%s\n", p);
+ p = skipspace(p+strlen(setcookie));
+ for(; *p; p=skipspace(p)){
+ if((e = parsecookie(&c, p, &p, isns, dom, path)) != nil){
+ if(cookiedebug)
+ fprint(2, "parse cookie: %s\n", e);
+ break;
+ }
+ if((e = isbadcookie(&c, dom, path)) != nil){
+ if(cookiedebug)
+ fprint(2, "reject cookie; %s\n", e);
+ continue;
+ }
+ addcookie(jar, &c);
+ n++;
+ }
+ }
+ return n;
+}
+
+static char*
+skipquoted(char *s)
+{
+ /*
+ * Sec 2.2 of RFC2616 defines a "quoted-string" as:
+ *
+ * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
+ * qdtext = <any TEXT except <">>
+ * quoted-pair = "\" CHAR
+ *
+ * TEXT is any octet except CTLs, but including LWS;
+ * LWS is [CR LF] 1*(SP | HT);
+ * CHARs are ASCII octets 0-127; (NOTE: we reject 0's)
+ * CTLs are octets 0-31 and 127;
+ */
+ if(*s != '"')
+ return s;
+
+ for(s++; 32 <= *s && *s < 127 && *s != '"'; s++)
+ if(*s == '\\' && *(s+1) != '\0')
+ s++;
+ return s;
+}
+
+static char*
+skiptoken(char *s)
+{
+ /*
+ * Sec 2.2 of RFC2616 defines a "token" as
+ * 1*<any CHAR except CTLs or separators>;
+ * CHARs are ASCII octets 0-127;
+ * CTLs are octets 0-31 and 127;
+ * separators are "()<>@,;:\/[]?={}", double-quote, SP (32), and HT (9)
+ */
+ while(32 <= *s && *s < 127 && strchr("()<>@,;:[]?={}\" \t\\", *s)==nil)
+ s++;
+
+ return s;
+}
+
+static char*
+skipvalue(char *s, int isns)
+{
+ char *t;
+
+ /*
+ * An RFC2109 value is an HTTP token or an HTTP quoted string.
+ * Netscape servers ignore the spec and rely on semicolons, apparently.
+ */
+ if(isns){
+ if((t = strchr(s, ';')) == nil)
+ t = s+strlen(s);
+ return t;
+ }
+ if(*s == '"')
+ return skipquoted(s);
+ return skiptoken(s);
+}
+
+/*
+ * RMID=80b186bb64c03c65fab767f8; expires=Monday, 10-Feb-2003 04:44:39 GMT;
+ * path=/; domain=.nytimes.com
+ */
+static char*
+parsecookie(Cookie *c, char *p, char **e, int isns, char *dom, char *path)
+{
+ int i, done;
+ char *t, *u, *attr, *val;
+
+ c->expire = ~0;
+ memset(c, 0, sizeof *c);
+
+ /* NAME=VALUE */
+ t = skiptoken(p);
+ c->name = p;
+ p = skipspace(t);
+ if(*p != '='){
+ Badname:
+ return "malformed cookie: no NAME=VALUE";
+ }
+ *t = '\0';
+ p = skipspace(p+1);
+ t = skipvalue(p, isns);
+ if(*t)
+ *t++ = '\0';
+ c->value = p;
+ p = skipspace(t);
+ if(c->name[0]=='\0' || c->value[0]=='\0')
+ goto Badname;
+
+ done = 0;
+ for(; *p && !done; p=skipspace(p)){
+ attr = p;
+ t = skiptoken(p);
+ u = skipspace(t);
+ switch(*u){
+ case '\0':
+ *t = '\0';
+ val = p = u;
+ break;
+ case ';':
+ *t = '\0';
+ val = "";
+ p = u+1;
+ break;
+ case '=':
+ *t = '\0';
+ val = skipspace(u+1);
+ p = skipvalue(val, isns);
+ if(*p==',')
+ done = 1;
+ if(*p)
+ *p++ = '\0';
+ break;
+ case ',':
+ if(!isns){
+ val = "";
+ p = u;
+ *p++ = '\0';
+ done = 1;
+ break;
+ }
+ default:
+ if(cookiedebug)
+ fprint(2, "syntax: %s\n", p);
+ return "syntax error";
+ }
+ for(i=0; i<nelem(stab); i++)
+ if(stab[i].ishttp && cistrcmp(stab[i].s, attr)==0)
+ *(char**)((uintptr)c+stab[i].offset) = val;
+ if(cistrcmp(attr, "expires") == 0){
+ if(!isns)
+ return "non-netscape cookie has Expires tag";
+ if(!val[0])
+ return "bad expires tag";
+ c->expire = strtotime(val);
+ if(c->expire == ~0)
+ return "cannot parse netscape expires tag";
+ }
+ if(cistrcmp(attr, "max-age") == 0)
+ c->expire = time(0)+atoi(val);
+ if(cistrcmp(attr, "secure") == 0)
+ c->secure = 1;
+ }
+
+ if(c->dom)
+ c->explicitdom = 1;
+ else
+ c->dom = dom;
+ if(c->path)
+ c->explicitpath = 1;
+ else{
+ c->path = path;
+ if((t = strchr(c->path, '?')) != 0)
+ *t = '\0';
+ if((t = strrchr(c->path, '/')) != 0)
+ *t = '\0';
+ }
+ c->netscapestyle = isns;
+ *e = p;
+
+ return nil;
+}
+
+Jar *jar;
+
+typedef struct Aux Aux;
+struct Aux
+{
+ char *dom;
+ char *path;
+ char *inhttp;
+ char *outhttp;
+ char *ctext;
+ int rdoff;
+};
+enum
+{
+ AuxBuf = 4096,
+ MaxCtext = 16*1024*1024,
+};
+
+void
+cookieopen(Req *r)
+{
+ char *s, *es;
+ int i, sz;
+ Aux *a;
+
+ syncjar(jar);
+ a = emalloc9p(sizeof(Aux));
+ r->fid->aux = a;
+ if(r->ifcall.mode&OTRUNC){
+ a->ctext = emalloc9p(1);
+ a->ctext[0] = '\0';
+ }else{
+ sz = 256*jar->nc+1024; /* BUG should do better */
+ a->ctext = emalloc9p(sz);
+ a->ctext[0] = '\0';
+ s = a->ctext;
+ es = s+sz;
+ for(i=0; i<jar->nc; i++)
+ s = seprint(s, es, "%K\n", &jar->c[i]);
+ }
+ respond(r, nil);
+}
+
+void
+cookieread(Req *r)
+{
+ Aux *a;
+
+ a = r->fid->aux;
+ readstr(r, a->ctext);
+ respond(r, nil);
+}
+
+void
+cookiewrite(Req *r)
+{
+ Aux *a;
+ int sz;
+
+ a = r->fid->aux;
+ sz = r->ifcall.count+r->ifcall.offset;
+ if(sz > strlen(a->ctext)){
+ if(sz >= MaxCtext){
+ respond(r, "cookie file too large");
+ return;
+ }
+ a->ctext = erealloc9p(a->ctext, sz+1);
+ a->ctext[sz] = '\0';
+ }
+ memmove(a->ctext+r->ifcall.offset, r->ifcall.data, r->ifcall.count);
+ r->ofcall.count = r->ifcall.count;
+ respond(r, nil);
+}
+
+void
+cookieclunk(Fid *fid)
+{
+ char *p, *nextp;
+ Aux *a;
+ int i;
+
+ a = fid->aux;
+ if(a == nil)
+ return;
+ for(i=0; i<jar->nc; i++)
+ jar->c[i].mark = 1;
+ for(p=a->ctext; *p; p=nextp){
+ if((nextp = strchr(p, '\n')) != nil)
+ *nextp++ = '\0';
+ else
+ nextp = "";
+ addtojar(jar, p, 0);
+ }
+ for(i=0; i<jar->nc; i++)
+ if(jar->c[i].mark)
+ delcookie(jar, &jar->c[i]);
+ syncjar(jar);
+ free(a->dom);
+ free(a->path);
+ free(a->inhttp);
+ free(a->outhttp);
+ free(a->ctext);
+ free(a);
+}
+
+void
+closecookies(void)
+{
+ closejar(jar);
+}
+
+void
+initcookies(char *file)
+{
+ char *home;
+
+ fmtinstall('J', jarfmt);
+ fmtinstall('K', cookiefmt);
+
+ if(file == nil){
+ home = getenv("home");
+ if(home == nil)
+ sysfatal("no cookie file specified and no $home");
+ file = emalloc9p(strlen(home)+30);
+ strcpy(file, home);
+ strcat(file, "/lib/webcookies");
+ }
+ jar = readjar(file);
+ if(jar == nil)
+ sysfatal("readjar: %r");
+}
+
+void
+httpsetcookie(char *hdr, char *dom, char *path)
+{
+ if(path == nil)
+ path = "/";
+
+ parsehttp(jar, hdr, dom, path);
+ syncjar(jar);
+}
+
+char*
+httpcookies(char *dom, char *path, int issecure)
+{
+ char buf[1024];
+ Jar *j;
+
+ syncjar(jar);
+ j = cookiesearch(jar, dom, path, issecure);
+ snprint(buf, sizeof buf, "%J", j);
+ closejar(j);
+ return estrdup(buf);
+}
diff --git a/sys/src/cmd/webfs/dat.h b/sys/src/cmd/webfs/dat.h
new file mode 100755
index 000000000..cde79c3ff
--- /dev/null
+++ b/sys/src/cmd/webfs/dat.h
@@ -0,0 +1,103 @@
+typedef struct Client Client;
+typedef struct Ctl Ctl;
+typedef struct Ibuf Ibuf;
+typedef struct Url Url;
+
+/* simple buffered i/o for network connections; shared by http, ftp */
+struct Ibuf
+{
+ int fd;
+ Ioproc *io;
+ char buf[4096];
+ char *rp, *wp;
+};
+
+struct Ctl
+{
+ int acceptcookies;
+ int sendcookies;
+ int redirectlimit;
+ char *useragent;
+};
+
+struct Client
+{
+ Url *url;
+ Url *baseurl;
+ Ctl ctl;
+ Channel *creq; /* chan(Req*) */
+ int num;
+ int plumbed;
+ char *contenttype;
+ char *postbody;
+ char *redirect;
+ char *authenticate;
+ char *ext;
+ int npostbody;
+ int havepostbody;
+ int iobusy;
+ int bodyopened;
+ Ioproc *io;
+ int ref;
+ void *aux;
+};
+
+/*
+ * If ischeme is USunknown, then the given URL is a relative
+ * URL which references the "current document" in the context of the base.
+ * If this is the case, only the "fragment" and "url" members will have
+ * meaning, and the given URL structure may not be used as a base URL itself.
+ */
+enum
+{
+ USunknown,
+ UShttp,
+ UShttps,
+ USftp,
+ USfile,
+ UScurrent,
+};
+
+struct Url
+{
+ int ischeme;
+ char* url;
+ char* scheme;
+ int (*open)(Client*, Url*);
+ int (*read)(Client*, Req*);
+ void (*close)(Client*);
+ char* schemedata;
+ char* authority;
+ char* user;
+ char* passwd;
+ char* host;
+ char* port;
+ char* path;
+ char* query;
+ char* fragment;
+ union {
+ struct {
+ char *page_spec;
+ } http;
+ struct {
+ char *path_spec;
+ char *type;
+ } ftp;
+ };
+};
+
+enum
+{
+ STACK = 32*1024, /* was 16*1024; there are big arrays on the stack */
+};
+
+extern Client** client;
+extern int cookiedebug;
+extern Srv fs;
+extern int fsdebug;
+extern Ctl globalctl;
+extern int nclient;
+extern int urldebug;
+extern int httpdebug;
+extern char* status[];
+
diff --git a/sys/src/cmd/webfs/fns.h b/sys/src/cmd/webfs/fns.h
new file mode 100755
index 000000000..7bebe018f
--- /dev/null
+++ b/sys/src/cmd/webfs/fns.h
@@ -0,0 +1,62 @@
+/* buf.c */
+void initibuf(Ibuf*, Ioproc*, int);
+int readibuf(Ibuf*, char*, int);
+void unreadline(Ibuf*, char*);
+int readline(Ibuf*, char*, int);
+
+/* client.c */
+int newclient(int);
+void closeclient(Client*);
+void clonectl(Ctl*);
+int ctlwrite(Req*, Ctl*, char*, char*);
+int clientctlwrite(Req*, Client*, char*, char*);
+int globalctlwrite(Req*, char*, char*);
+void ctlread(Req*, Client*);
+void globalctlread(Req*);
+void plumburl(char*, char*);
+
+/* cookies.c */
+void cookieread(Req*);
+void cookiewrite(Req*);
+void cookieopen(Req*);
+void cookieclunk(Fid*);
+void initcookies(char*);
+void closecookies(void);
+void httpsetcookie(char*, char*, char*);
+char* httpcookies(char*, char*, int);
+
+/* fs.c */
+void initfs(void);
+
+/* http.c */
+int httpopen(Client*, Url*);
+int httpread(Client*, Req*);
+void httpclose(Client*);
+
+/* io.c */
+int iotlsdial(Ioproc*, char*, char*, char*, int*, int);
+int ioprint(Ioproc*, int, char*, ...);
+#pragma varargck argpos ioprint 3
+
+/* plumb.c */
+void plumbinit(void);
+void plumbstart(void);
+void replumb(Client*);
+
+/* url.c */
+Url* parseurl(char*, Url*);
+void freeurl(Url*);
+void rewriteurl(Url*);
+int seturlquery(Url*, char*);
+Url* copyurl(Url*);
+char* escapeurl(char*, int(*)(int));
+char* unescapeurl(char*);
+void initurl(void);
+
+/* util.c */
+char* estrdup(char*);
+char* estrmanydup(char*, ...);
+char* estredup(char*, char*);
+void* emalloc(uint);
+void* erealloc(void*, uint);
+char* strlower(char*);
diff --git a/sys/src/cmd/webfs/fs.c b/sys/src/cmd/webfs/fs.c
new file mode 100755
index 000000000..087390d72
--- /dev/null
+++ b/sys/src/cmd/webfs/fs.c
@@ -0,0 +1,616 @@
+/*
+ * Web file system. Conventionally mounted at /mnt/web
+ *
+ * ctl send control messages (might go away)
+ * cookies list of cookies, editable
+ * clone open and read to obtain new connection
+ * n connection directory
+ * ctl control messages (like get url)
+ * body retrieved data
+ * content-type mime content-type of body
+ * postbody data to be posted
+ * parsed parsed version of url
+ * url entire url
+ * scheme http, ftp, etc.
+ * host hostname
+ * path path on host
+ * query query after path
+ * fragment #foo anchor reference
+ * user user name (ftp)
+ * password password (ftp)
+ * ftptype transfer mode (ftp)
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ip.h>
+#include <plumb.h>
+#include <thread.h>
+#include <fcall.h>
+#include <9p.h>
+#include "dat.h"
+#include "fns.h"
+
+int fsdebug;
+
+enum
+{
+ Qroot,
+ Qrootctl,
+ Qclone,
+ Qcookies,
+ Qclient,
+ Qctl,
+ Qbody,
+ Qbodyext,
+ Qcontenttype,
+ Qpostbody,
+ Qparsed,
+ Qurl,
+ Qscheme,
+ Qschemedata,
+ Quser,
+ Qpasswd,
+ Qhost,
+ Qport,
+ Qpath,
+ Qquery,
+ Qfragment,
+ Qftptype,
+ Qend,
+};
+
+#define PATH(type, n) ((type)|((n)<<8))
+#define TYPE(path) ((int)(path) & 0xFF)
+#define NUM(path) ((uint)(path)>>8)
+
+Channel *creq;
+Channel *creqwait;
+Channel *cclunk;
+Channel *cclunkwait;
+
+typedef struct Tab Tab;
+struct Tab
+{
+ char *name;
+ ulong mode;
+ int offset;
+};
+
+Tab tab[] =
+{
+ "/", DMDIR|0555, 0,
+ "ctl", 0666, 0,
+ "clone", 0666, 0,
+ "cookies", 0666, 0,
+ "XXX", DMDIR|0555, 0,
+ "ctl", 0666, 0,
+ "body", 0444, 0,
+ "XXX", 0444, 0,
+ "contenttype", 0444, 0,
+ "postbody", 0666, 0,
+ "parsed", DMDIR|0555, 0,
+ "url", 0444, offsetof(Url, url),
+ "scheme", 0444, offsetof(Url, scheme),
+ "schemedata", 0444, offsetof(Url, schemedata),
+ "user", 0444, offsetof(Url, user),
+ "passwd", 0444, offsetof(Url, passwd),
+ "host", 0444, offsetof(Url, host),
+ "port", 0444, offsetof(Url, port),
+ "path", 0444, offsetof(Url, path),
+ "query", 0444, offsetof(Url, query),
+ "fragment", 0444, offsetof(Url, fragment),
+ "ftptype", 0444, offsetof(Url, ftp.type),
+};
+
+ulong time0;
+
+static void
+fillstat(Dir *d, uvlong path, ulong length, char *ext)
+{
+ Tab *t;
+ int type;
+ char buf[32];
+
+ memset(d, 0, sizeof(*d));
+ d->uid = estrdup("web");
+ d->gid = estrdup("web");
+ d->qid.path = path;
+ d->atime = d->mtime = time0;
+ d->length = length;
+ type = TYPE(path);
+ t = &tab[type];
+ if(type == Qbodyext) {
+ snprint(buf, sizeof buf, "body.%s", ext == nil ? "xxx" : ext);
+ d->name = estrdup(buf);
+ }
+ else if(t->name)
+ d->name = estrdup(t->name);
+ else{ /* client directory */
+ snprint(buf, sizeof buf, "%ud", NUM(path));
+ d->name = estrdup(buf);
+ }
+ d->qid.type = t->mode>>24;
+ d->mode = t->mode;
+}
+
+static void
+fsstat(Req *r)
+{
+ fillstat(&r->d, r->fid->qid.path, 0, nil);
+ respond(r, nil);
+}
+
+static int
+rootgen(int i, Dir *d, void*)
+{
+ char buf[32];
+
+ i += Qroot+1;
+ if(i < Qclient){
+ fillstat(d, i, 0, nil);
+ return 0;
+ }
+ i -= Qclient;
+ if(i < nclient){
+ fillstat(d, PATH(Qclient, i), 0, nil);
+ snprint(buf, sizeof buf, "%d", i);
+ free(d->name);
+ d->name = estrdup(buf);
+ return 0;
+ }
+ return -1;
+}
+
+static int
+clientgen(int i, Dir *d, void *aux)
+{
+ Client *c;
+
+ c = aux;
+ i += Qclient+1;
+ if(i <= Qparsed){
+ fillstat(d, PATH(i, c->num), 0, c->ext);
+ return 0;
+ }
+ return -1;
+}
+
+static int
+parsedgen(int i, Dir *d, void *aux)
+{
+ Client *c;
+
+ c = aux;
+ i += Qparsed+1;
+ if(i < Qend){
+ fillstat(d, PATH(i, c->num), 0, nil);
+ return 0;
+ }
+ return -1;
+}
+
+static void
+fsread(Req *r)
+{
+ char *s;
+ char e[ERRMAX];
+ Client *c;
+ ulong path;
+
+ path = r->fid->qid.path;
+ switch(TYPE(path)){
+ default:
+ snprint(e, sizeof e, "bug in webfs path=%lux\n", path);
+ respond(r, e);
+ break;
+
+ case Qroot:
+ dirread9p(r, rootgen, nil);
+ respond(r, nil);
+ break;
+
+ case Qrootctl:
+ globalctlread(r);
+ break;
+
+ case Qcookies:
+ cookieread(r);
+ break;
+
+ case Qclient:
+ dirread9p(r, clientgen, client[NUM(path)]);
+ respond(r, nil);
+ break;
+
+ case Qctl:
+ ctlread(r, client[NUM(path)]);
+ break;
+
+ case Qcontenttype:
+ c = client[NUM(path)];
+ if(c->contenttype == nil)
+ r->ofcall.count = 0;
+ else
+ readstr(r, c->contenttype);
+ respond(r, nil);
+ break;
+
+ case Qpostbody:
+ c = client[NUM(path)];
+ readbuf(r, c->postbody, c->npostbody);
+ respond(r, nil);
+ break;
+
+ case Qbody:
+ case Qbodyext:
+ c = client[NUM(path)];
+ if(c->iobusy){
+ respond(r, "already have i/o pending");
+ break;
+ }
+ c->iobusy = 1;
+ sendp(c->creq, r);
+ break;
+
+ case Qparsed:
+ dirread9p(r, parsedgen, client[NUM(path)]);
+ respond(r, nil);
+ break;
+
+ case Qurl:
+ case Qscheme:
+ case Qschemedata:
+ case Quser:
+ case Qpasswd:
+ case Qhost:
+ case Qport:
+ case Qpath:
+ case Qquery:
+ case Qfragment:
+ case Qftptype:
+ c = client[NUM(path)];
+ r->ofcall.count = 0;
+ if(c->url != nil
+ && (s = *(char**)((uintptr)c->url+tab[TYPE(path)].offset)) != nil)
+ readstr(r, s);
+ respond(r, nil);
+ break;
+ }
+}
+
+static void
+fswrite(Req *r)
+{
+ int m;
+ ulong path;
+ char e[ERRMAX], *buf, *cmd, *arg;
+ Client *c;
+
+ path = r->fid->qid.path;
+ switch(TYPE(path)){
+ default:
+ snprint(e, sizeof e, "bug in webfs path=%lux\n", path);
+ respond(r, e);
+ break;
+
+ case Qcookies:
+ cookiewrite(r);
+ break;
+
+ case Qrootctl:
+ case Qctl:
+ if(r->ifcall.count >= 1024){
+ respond(r, "ctl message too long");
+ return;
+ }
+ buf = estredup(r->ifcall.data, (char*)r->ifcall.data+r->ifcall.count);
+ cmd = buf;
+ arg = strpbrk(cmd, "\t ");
+ if(arg){
+ *arg++ = '\0';
+ arg += strspn(arg, "\t ");
+ }else
+ arg = "";
+ r->ofcall.count = r->ifcall.count;
+ if(TYPE(path)==Qrootctl){
+ if(!ctlwrite(r, &globalctl, cmd, arg)
+ && !globalctlwrite(r, cmd, arg))
+ respond(r, "unknown control command");
+ }else{
+ c = client[NUM(path)];
+ if(!ctlwrite(r, &c->ctl, cmd, arg)
+ && !clientctlwrite(r, c, cmd, arg))
+ respond(r, "unknown control command");
+ }
+ free(buf);
+ break;
+
+ case Qpostbody:
+ c = client[NUM(path)];
+ if(c->bodyopened){
+ respond(r, "cannot write postbody after opening body");
+ break;
+ }
+ if(r->ifcall.offset >= 128*1024*1024){ /* >128MB is probably a mistake */
+ respond(r, "offset too large");
+ break;
+ }
+ m = r->ifcall.offset + r->ifcall.count;
+ if(c->npostbody < m){
+ c->postbody = erealloc(c->postbody, m);
+ memset(c->postbody+c->npostbody, 0, m-c->npostbody);
+ c->npostbody = m;
+ }
+ memmove(c->postbody+r->ifcall.offset, r->ifcall.data, r->ifcall.count);
+ r->ofcall.count = r->ifcall.count;
+ respond(r, nil);
+ break;
+ }
+}
+
+static void
+fsopen(Req *r)
+{
+ static int need[4] = { 4, 2, 6, 1 };
+ ulong path;
+ int n;
+ Client *c;
+ Tab *t;
+
+ /*
+ * lib9p already handles the blatantly obvious.
+ * we just have to enforce the permissions we have set.
+ */
+ path = r->fid->qid.path;
+ t = &tab[TYPE(path)];
+ n = need[r->ifcall.mode&3];
+ if((n&t->mode) != n){
+ respond(r, "permission denied");
+ return;
+ }
+
+ switch(TYPE(path)){
+ case Qcookies:
+ cookieopen(r);
+ break;
+
+ case Qpostbody:
+ c = client[NUM(path)];
+ c->havepostbody++;
+ c->ref++;
+ respond(r, nil);
+ break;
+
+ case Qbody:
+ case Qbodyext:
+ c = client[NUM(path)];
+ if(c->url == nil){
+ respond(r, "url is not yet set");
+ break;
+ }
+ c->bodyopened = 1;
+ c->ref++;
+ sendp(c->creq, r);
+ break;
+
+ case Qclone:
+ n = newclient(0);
+ path = PATH(Qctl, n);
+ r->fid->qid.path = path;
+ r->ofcall.qid.path = path;
+ if(fsdebug)
+ fprint(2, "open clone => path=%lux\n", path);
+ t = &tab[Qctl];
+ /* fall through */
+ default:
+ if(t-tab >= Qclient)
+ client[NUM(path)]->ref++;
+ respond(r, nil);
+ break;
+ }
+}
+
+static void
+fsdestroyfid(Fid *fid)
+{
+ sendp(cclunk, fid);
+ recvp(cclunkwait);
+}
+
+static void
+fsattach(Req *r)
+{
+ if(r->ifcall.aname && r->ifcall.aname[0]){
+ respond(r, "invalid attach specifier");
+ return;
+ }
+ r->fid->qid.path = PATH(Qroot, 0);
+ r->fid->qid.type = QTDIR;
+ r->fid->qid.vers = 0;
+ r->ofcall.qid = r->fid->qid;
+ respond(r, nil);
+}
+
+static char*
+fswalk1(Fid *fid, char *name, Qid *qid)
+{
+ int i, n;
+ ulong path;
+ char buf[32], *ext;
+
+ path = fid->qid.path;
+ if(!(fid->qid.type&QTDIR))
+ return "walk in non-directory";
+
+ if(strcmp(name, "..") == 0){
+ switch(TYPE(path)){
+ case Qparsed:
+ qid->path = PATH(Qclient, NUM(path));
+ qid->type = tab[Qclient].mode>>24;
+ return nil;
+ case Qclient:
+ case Qroot:
+ qid->path = PATH(Qroot, 0);
+ qid->type = tab[Qroot].mode>>24;
+ return nil;
+ default:
+ return "bug in fswalk1";
+ }
+ }
+
+ i = TYPE(path)+1;
+ for(; i<nelem(tab); i++){
+ if(i==Qclient){
+ n = atoi(name);
+ snprint(buf, sizeof buf, "%d", n);
+ if(n < nclient && strcmp(buf, name) == 0){
+ qid->path = PATH(i, n);
+ qid->type = tab[i].mode>>24;
+ return nil;
+ }
+ break;
+ }
+ if(i==Qbodyext){
+ ext = client[NUM(path)]->ext;
+ snprint(buf, sizeof buf, "body.%s", ext == nil ? "xxx" : ext);
+ if(strcmp(buf, name) == 0){
+ qid->path = PATH(i, NUM(path));
+ qid->type = tab[i].mode>>24;
+ return nil;
+ }
+ }
+ else if(strcmp(name, tab[i].name) == 0){
+ qid->path = PATH(i, NUM(path));
+ qid->type = tab[i].mode>>24;
+ return nil;
+ }
+ if(tab[i].mode&DMDIR)
+ break;
+ }
+ return "directory entry not found";
+}
+
+static void
+fsflush(Req *r)
+{
+ Req *or;
+ int t;
+ Client *c;
+ ulong path;
+
+ or=r;
+ while(or->ifcall.type==Tflush)
+ or = or->oldreq;
+
+ if(or->ifcall.type != Tread && or->ifcall.type != Topen)
+ abort();
+
+ path = or->fid->qid.path;
+ t = TYPE(path);
+ if(t != Qbody && t != Qbodyext)
+ abort();
+
+ c = client[NUM(path)];
+ sendp(c->creq, r);
+ iointerrupt(c->io);
+}
+
+static void
+fsthread(void*)
+{
+ ulong path;
+ Alt a[3];
+ Fid *fid;
+ Req *r;
+
+ threadsetname("fsthread");
+ plumbstart();
+
+ a[0].op = CHANRCV;
+ a[0].c = cclunk;
+ a[0].v = &fid;
+ a[1].op = CHANRCV;
+ a[1].c = creq;
+ a[1].v = &r;
+ a[2].op = CHANEND;
+
+ for(;;){
+ switch(alt(a)){
+ case 0:
+ path = fid->qid.path;
+ if(TYPE(path)==Qcookies)
+ cookieclunk(fid);
+ if(fid->omode != -1 && TYPE(path) >= Qclient)
+ closeclient(client[NUM(path)]);
+ sendp(cclunkwait, nil);
+ break;
+ case 1:
+ switch(r->ifcall.type){
+ case Tattach:
+ fsattach(r);
+ break;
+ case Topen:
+ fsopen(r);
+ break;
+ case Tread:
+ fsread(r);
+ break;
+ case Twrite:
+ fswrite(r);
+ break;
+ case Tstat:
+ fsstat(r);
+ break;
+ case Tflush:
+ fsflush(r);
+ break;
+ default:
+ respond(r, "bug in fsthread");
+ break;
+ }
+ sendp(creqwait, 0);
+ break;
+ }
+ }
+}
+
+static void
+fssend(Req *r)
+{
+ sendp(creq, r);
+ recvp(creqwait); /* avoids need to deal with spurious flushes */
+}
+
+void
+initfs(void)
+{
+ time0 = time(0);
+ creq = chancreate(sizeof(void*), 0);
+ creqwait = chancreate(sizeof(void*), 0);
+ cclunk = chancreate(sizeof(void*), 0);
+ cclunkwait = chancreate(sizeof(void*), 0);
+ procrfork(fsthread, nil, STACK, RFNAMEG);
+}
+
+void
+takedown(Srv*)
+{
+ closecookies();
+ threadexitsall("done");
+}
+
+Srv fs =
+{
+.attach= fssend,
+.destroyfid= fsdestroyfid,
+.walk1= fswalk1,
+.open= fssend,
+.read= fssend,
+.write= fssend,
+.stat= fssend,
+.flush= fssend,
+.end= takedown,
+};
+
diff --git a/sys/src/cmd/webfs/http.c b/sys/src/cmd/webfs/http.c
new file mode 100755
index 000000000..0d25a7c20
--- /dev/null
+++ b/sys/src/cmd/webfs/http.c
@@ -0,0 +1,539 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ip.h>
+#include <plumb.h>
+#include <thread.h>
+#include <fcall.h>
+#include <9p.h>
+#include <libsec.h>
+#include <auth.h>
+#include "dat.h"
+#include "fns.h"
+
+char PostContentType[] = "application/x-www-form-urlencoded";
+int httpdebug;
+
+typedef struct HttpState HttpState;
+struct HttpState
+{
+ int fd;
+ Client *c;
+ char *location;
+ char *setcookie;
+ char *netaddr;
+ char *credentials;
+ char autherror[ERRMAX];
+ Ibuf b;
+};
+
+static void
+location(HttpState *hs, char *value)
+{
+ if(hs->location == nil)
+ hs->location = estrdup(value);
+}
+
+static void
+contenttype(HttpState *hs, char *value)
+{
+ if(hs->c->contenttype != nil)
+ free(hs->c->contenttype);
+ hs->c->contenttype = estrdup(value);
+}
+
+static void
+setcookie(HttpState *hs, char *value)
+{
+ char *s, *t;
+ Fmt f;
+
+ s = hs->setcookie;
+ fmtstrinit(&f);
+ if(s)
+ fmtprint(&f, "%s", s);
+ fmtprint(&f, "set-cookie: ");
+ fmtprint(&f, "%s", value);
+ fmtprint(&f, "\n");
+ t = fmtstrflush(&f);
+ if(t){
+ free(s);
+ hs->setcookie = t;
+ }
+}
+
+static char*
+unquote(char *s, char **ps)
+{
+ char *p;
+
+ if(*s != '"'){
+ p = strpbrk(s, " \t\r\n");
+ *p++ = 0;
+ *ps = p;
+ return s;
+ }
+ for(p=s+1; *p; p++){
+ if(*p == '\"'){
+ *p++ = 0;
+ break;
+ }
+ if(*p == '\\' && *(p+1)){
+ p++;
+ continue;
+ }
+ }
+ memmove(s, s+1, p-(s+1));
+ s[p-(s+1)] = 0;
+ *ps = p;
+ return s;
+}
+
+static char*
+servername(char *addr)
+{
+ char *p;
+
+ if(strncmp(addr, "tcp!", 4) == 0
+ || strncmp(addr, "net!", 4) == 0)
+ addr += 4;
+ addr = estrdup(addr);
+ p = addr+strlen(addr);
+ if(p>addr && *(p-1) == 's')
+ p--;
+ if(p>addr+5 && strcmp(p-5, "!http") == 0)
+ p[-5] = 0;
+ return addr;
+}
+
+void
+wwwauthenticate(HttpState *hs, char *line)
+{
+ char cred[64], *user, *pass, *realm, *s, *spec, *name;
+ Fmt fmt;
+ UserPasswd *up;
+
+ spec = nil;
+ up = nil;
+ cred[0] = 0;
+ hs->autherror[0] = 0;
+ if(cistrncmp(line, "basic ", 6) != 0){
+ werrstr("unknown auth: %s", line);
+ goto error;
+ }
+ line += 6;
+ if(cistrncmp(line, "realm=", 6) != 0){
+ werrstr("missing realm: %s", line);
+ goto error;
+ }
+ line += 6;
+ user = hs->c->url->user;
+ pass = hs->c->url->passwd;
+ if(user==nil || pass==nil){
+ realm = unquote(line, &line);
+ fmtstrinit(&fmt);
+ name = servername(hs->netaddr);
+ fmtprint(&fmt, "proto=pass service=http server=%q realm=%q", name, realm);
+ free(name);
+ if(hs->c->url->user)
+ fmtprint(&fmt, " user=%q", hs->c->url->user);
+ spec = fmtstrflush(&fmt);
+ if(spec == nil)
+ goto error;
+ if((up = auth_getuserpasswd(nil, "%s", spec)) == nil)
+ goto error;
+ user = up->user;
+ pass = up->passwd;
+ }
+ if((s = smprint("%s:%s", user, pass)) == nil)
+ goto error;
+ free(up);
+ enc64(cred, sizeof(cred), (uchar*)s, strlen(s));
+ memset(s, 0, strlen(s));
+ free(s);
+ hs->credentials = smprint("Basic %s", cred);
+ if(hs->credentials == nil)
+ goto error;
+ return;
+
+error:
+ free(up);
+ free(spec);
+ snprint(hs->autherror, sizeof hs->autherror, "%r");
+ fprint(2, "%s: Authentication failed: %r\n", argv0);
+}
+
+struct {
+ char *name; /* Case-insensitive */
+ void (*fn)(HttpState *hs, char *value);
+} hdrtab[] = {
+ { "location:", location },
+ { "content-type:", contenttype },
+ { "set-cookie:", setcookie },
+ { "www-authenticate:", wwwauthenticate },
+};
+
+static int
+httprcode(HttpState *hs)
+{
+ int n;
+ char *p;
+ char buf[256];
+
+ n = readline(&hs->b, buf, sizeof(buf)-1);
+ if(n <= 0)
+ return n;
+ if(httpdebug)
+ fprint(2, "-> %s\n", buf);
+ p = strchr(buf, ' ');
+ if(memcmp(buf, "HTTP/", 5) != 0 || p == nil){
+ werrstr("bad response from server");
+ return -1;
+ }
+ buf[n] = 0;
+ return atoi(p+1);
+}
+
+/*
+ * read a single mime header, collect continuations.
+ *
+ * this routine assumes that there is a blank line twixt
+ * the header and the message body, otherwise bytes will
+ * be lost.
+ */
+static int
+getheader(HttpState *hs, char *buf, int n)
+{
+ char *p, *e;
+ int i;
+
+ n--;
+ p = buf;
+ for(e = p + n; ; p += i){
+ i = readline(&hs->b, p, e-p);
+ if(i < 0)
+ return i;
+
+ if(p == buf){
+ /* first line */
+ if(strchr(buf, ':') == nil)
+ break; /* end of headers */
+ } else {
+ /* continuation line */
+ if(*p != ' ' && *p != '\t'){
+ unreadline(&hs->b, p);
+ *p = 0;
+ break; /* end of this header */
+ }
+ }
+ }
+
+ if(httpdebug)
+ fprint(2, "-> %s\n", buf);
+ return p-buf;
+}
+
+static int
+httpheaders(HttpState *hs)
+{
+ char buf[2048];
+ char *p;
+ int i, n;
+
+ for(;;){
+ n = getheader(hs, buf, sizeof(buf));
+ if(n < 0)
+ return -1;
+ if(n == 0)
+ return 0;
+ // print("http header: '%.*s'\n", n, buf);
+ for(i = 0; i < nelem(hdrtab); i++){
+ n = strlen(hdrtab[i].name);
+ if(cistrncmp(buf, hdrtab[i].name, n) == 0){
+ /* skip field name and leading white */
+ p = buf + n;
+ while(*p == ' ' || *p == '\t')
+ p++;
+ (*hdrtab[i].fn)(hs, p);
+ break;
+ }
+ }
+ }
+}
+
+int
+httpopen(Client *c, Url *url)
+{
+ int fd, code, redirect, authenticate;
+ char *cookies;
+ Ioproc *io;
+ HttpState *hs;
+ char *service;
+
+ if(httpdebug)
+ fprint(2, "httpopen\n");
+ io = c->io;
+ hs = emalloc(sizeof(*hs));
+ hs->c = c;
+
+ if(url->port)
+ service = url->port;
+ else
+ service = url->scheme;
+ hs->netaddr = estrdup(netmkaddr(url->host, 0, service));
+ c->aux = hs;
+ if(httpdebug){
+ fprint(2, "dial %s\n", hs->netaddr);
+ fprint(2, "dial port: %s\n", url->port);
+ }
+ fd = iotlsdial(io, hs->netaddr, 0, 0, 0, url->ischeme==UShttps);
+ if(fd < 0){
+ Error:
+ if(httpdebug)
+ fprint(2, "iodial: %r\n");
+ free(hs->location);
+ free(hs->setcookie);
+ free(hs->netaddr);
+ free(hs->credentials);
+ if(fd >= 0)
+ ioclose(io, hs->fd);
+ hs->fd = -1;
+ free(hs);
+ c->aux = nil;
+ return -1;
+ }
+ hs->fd = fd;
+ if(httpdebug)
+ fprint(2, "<- %s %s HTTP/1.0\n<- Host: %s\n",
+ c->havepostbody? "POST": "GET", url->http.page_spec, url->host);
+ ioprint(io, fd, "%s %s HTTP/1.0\r\nHost: %s\r\n",
+ c->havepostbody? "POST" : "GET", url->http.page_spec, url->host);
+ if(httpdebug)
+ fprint(2, "<- User-Agent: %s\n", c->ctl.useragent);
+ if(c->ctl.useragent)
+ ioprint(io, fd, "User-Agent: %s\r\n", c->ctl.useragent);
+ if(c->ctl.sendcookies){
+ /* should we use url->page here? sometimes it is nil. */
+ cookies = httpcookies(url->host, url->http.page_spec,
+ url->ischeme == UShttps);
+ if(cookies && cookies[0])
+ ioprint(io, fd, "%s", cookies);
+ if(httpdebug)
+ fprint(2, "<- %s", cookies);
+ free(cookies);
+ }
+ if(c->havepostbody){
+ ioprint(io, fd, "Content-type: %s\r\n", PostContentType);
+ ioprint(io, fd, "Content-length: %ud\r\n", c->npostbody);
+ if(httpdebug){
+ fprint(2, "<- Content-type: %s\n", PostContentType);
+ fprint(2, "<- Content-length: %ud\n", c->npostbody);
+ }
+ }
+ if(c->authenticate){
+ ioprint(io, fd, "Authorization: %s\r\n", c->authenticate);
+ if(httpdebug)
+ fprint(2, "<- Authorization: %s\n", c->authenticate);
+ }
+ ioprint(io, fd, "\r\n");
+ if(c->havepostbody)
+ if(iowrite(io, fd, c->postbody, c->npostbody) != c->npostbody)
+ goto Error;
+
+ redirect = 0;
+ authenticate = 0;
+ initibuf(&hs->b, io, fd);
+ code = httprcode(hs);
+
+ switch(code){
+ case -1: /* connection timed out */
+ goto Error;
+
+/*
+ case Eof:
+ werrstr("EOF from HTTP server");
+ goto Error;
+*/
+
+ case 200: /* OK */
+ case 201: /* Created */
+ case 202: /* Accepted */
+ case 204: /* No Content */
+ case 205: /* Reset Content */
+#ifdef NOT_DEFINED
+ if(ofile == nil && r->start != 0)
+ sysfatal("page changed underfoot");
+#endif
+ break;
+
+ case 206: /* Partial Content */
+ werrstr("Partial Content (206)");
+ goto Error;
+
+ case 301: /* Moved Permanently */
+ case 302: /* Moved Temporarily */
+ case 303: /* See Other */
+ case 307: /* Temporary Redirect */
+ redirect = 1;
+ break;
+
+ case 304: /* Not Modified */
+ break;
+
+ case 400: /* Bad Request */
+ werrstr("Bad Request (400)");
+ goto Error;
+
+ case 401: /* Unauthorized */
+ if(c->authenticate){
+ werrstr("Authentication failed (401)");
+ goto Error;
+ }
+ authenticate = 1;
+ break;
+ case 402: /* Payment Required */
+ werrstr("Payment Required (402)");
+ goto Error;
+
+ case 403: /* Forbidden */
+ werrstr("Forbidden by server (403)");
+ goto Error;
+
+ case 404: /* Not Found */
+ werrstr("Not found on server (404)");
+ goto Error;
+
+ case 405: /* Method Not Allowed */
+ werrstr("Method not allowed (405)");
+ goto Error;
+
+ case 406: /* Not Acceptable */
+ werrstr("Not Acceptable (406)");
+ goto Error;
+
+ case 407: /* Proxy auth */
+ werrstr("Proxy authentication required (407)");
+ goto Error;
+
+ case 408: /* Request Timeout */
+ werrstr("Request Timeout (408)");
+ goto Error;
+
+ case 409: /* Conflict */
+ werrstr("Conflict (409)");
+ goto Error;
+
+ case 410: /* Gone */
+ werrstr("Gone (410)");
+ goto Error;
+
+ case 411: /* Length Required */
+ werrstr("Length Required (411)");
+ goto Error;
+
+ case 412: /* Precondition Failed */
+ werrstr("Precondition Failed (412)");
+ goto Error;
+
+ case 413: /* Request Entity Too Large */
+ werrstr("Request Entity Too Large (413)");
+ goto Error;
+
+ case 414: /* Request-URI Too Long */
+ werrstr("Request-URI Too Long (414)");
+ goto Error;
+
+ case 415: /* Unsupported Media Type */
+ werrstr("Unsupported Media Type (415)");
+ goto Error;
+
+ case 416: /* Requested Range Not Satisfiable */
+ werrstr("Requested Range Not Satisfiable (416)");
+ goto Error;
+
+ case 417: /* Expectation Failed */
+ werrstr("Expectation Failed (417)");
+ goto Error;
+
+ case 500: /* Internal server error */
+ werrstr("Server choked (500)");
+ goto Error;
+
+ case 501: /* Not implemented */
+ werrstr("Server can't do it (501)");
+ goto Error;
+
+ case 502: /* Bad gateway */
+ werrstr("Bad gateway (502)");
+ goto Error;
+
+ case 503: /* Service unavailable */
+ werrstr("Service unavailable (503)");
+ goto Error;
+
+ default:
+ /* Bogus: we should treat unknown code XYZ as code X00 */
+ werrstr("Unknown response code %d", code);
+ goto Error;
+ }
+
+ if(httpheaders(hs) < 0)
+ goto Error;
+ if(c->ctl.acceptcookies && hs->setcookie)
+ httpsetcookie(hs->setcookie, url->host, url->path);
+ if(authenticate){
+ if(!hs->credentials){
+ if(hs->autherror[0])
+ werrstr("%s", hs->autherror);
+ else
+ werrstr("unauthorized; no www-authenticate: header");
+ goto Error;
+ }
+ c->authenticate = hs->credentials;
+ hs->credentials = nil;
+ }else if(c->authenticate)
+ c->authenticate = 0;
+ if(redirect){
+ if(!hs->location){
+ werrstr("redirection without Location: header");
+ goto Error;
+ }
+ c->redirect = hs->location;
+ hs->location = nil;
+ }
+ return 0;
+}
+
+int
+httpread(Client *c, Req *r)
+{
+ HttpState *hs;
+ long n;
+
+ hs = c->aux;
+ n = readibuf(&hs->b, r->ofcall.data, r->ifcall.count);
+ if(n < 0)
+ return -1;
+
+ r->ofcall.count = n;
+ return 0;
+}
+
+void
+httpclose(Client *c)
+{
+ HttpState *hs;
+
+ hs = c->aux;
+ if(hs == nil)
+ return;
+ if(hs->fd >= 0)
+ ioclose(c->io, hs->fd);
+ hs->fd = -1;
+ free(hs->location);
+ free(hs->setcookie);
+ free(hs->netaddr);
+ free(hs->credentials);
+ free(hs);
+ c->aux = nil;
+}
diff --git a/sys/src/cmd/webfs/io.c b/sys/src/cmd/webfs/io.c
new file mode 100755
index 000000000..9eea91264
--- /dev/null
+++ b/sys/src/cmd/webfs/io.c
@@ -0,0 +1,84 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ip.h>
+#include <plumb.h>
+#include <thread.h>
+#include <fcall.h>
+#include <9p.h>
+#include <mp.h>
+#include <libsec.h>
+#include "dat.h"
+#include "fns.h"
+
+static long
+_iovfprint(va_list *arg)
+{
+ int fd;
+ char *fmt;
+ va_list arg2;
+
+ fd = va_arg(*arg, int);
+ fmt = va_arg(*arg, char*);
+ arg2 = va_arg(*arg, va_list);
+ return vfprint(fd, fmt, arg2);
+}
+
+int
+iovfprint(Ioproc *io, int fd, char *fmt, va_list arg)
+{
+ return iocall(io, _iovfprint, fd, fmt, arg);
+}
+
+int
+ioprint(Ioproc *io, int fd, char *fmt, ...)
+{
+ int n;
+ va_list arg;
+
+ va_start(arg, fmt);
+ n = iovfprint(io, fd, fmt, arg);
+ va_end(arg);
+ return n;
+}
+
+static long
+_iotlsdial(va_list *arg)
+{
+ char *addr, *local, *dir;
+ int *cfdp, fd, tfd, usetls;
+ TLSconn conn;
+
+ addr = va_arg(*arg, char*);
+ local = va_arg(*arg, char*);
+ dir = va_arg(*arg, char*);
+ cfdp = va_arg(*arg, int*);
+ usetls = va_arg(*arg, int);
+
+ fd = dial(addr, local, dir, cfdp);
+ if(fd < 0)
+ return -1;
+ if(!usetls)
+ return fd;
+
+ memset(&conn, 0, sizeof conn);
+ /* does no good, so far anyway */
+ // conn.chain = readcertchain("/sys/lib/ssl/vsignss.pem");
+
+ tfd = tlsClient(fd, &conn);
+ close(fd);
+ if(tfd < 0)
+ fprint(2, "%s: tlsClient: %r\n", argv0);
+ else {
+ /* BUG: check cert here? */
+ if(conn.cert)
+ free(conn.cert);
+ }
+ return tfd;
+}
+
+int
+iotlsdial(Ioproc *io, char *addr, char *local, char *dir, int *cfdp, int usetls)
+{
+ return iocall(io, _iotlsdial, addr, local, dir, cfdp, usetls);
+}
diff --git a/sys/src/cmd/webfs/main.c b/sys/src/cmd/webfs/main.c
new file mode 100755
index 000000000..0792dbf13
--- /dev/null
+++ b/sys/src/cmd/webfs/main.c
@@ -0,0 +1,67 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ip.h>
+#include <plumb.h>
+#include <thread.h>
+#include <fcall.h>
+#include <9p.h>
+#include "dat.h"
+#include "fns.h"
+
+char *cookiefile;
+char *mtpt = "/mnt/web";
+char *service;
+
+Ctl globalctl =
+{
+ 1, /* accept cookies */
+ 1, /* send cookies */
+ 10, /* redirect limit */
+ "webfs/2.0 (plan 9)" /* user agent */
+};
+
+void
+usage(void)
+{
+ fprint(2, "usage: webfs [-c cookies] [-m mtpt] [-s service]\n");
+ threadexitsall("usage");
+}
+
+#include <pool.h>
+void
+threadmain(int argc, char **argv)
+{
+ rfork(RFNOTEG);
+ ARGBEGIN{
+ case 'd':
+ mainmem->flags |= POOL_PARANOIA|POOL_ANTAGONISM;
+ break;
+ case 'D':
+ chatty9p++;
+ break;
+ case 'c':
+ cookiefile = EARGF(usage());
+ break;
+ case 'm':
+ mtpt = EARGF(usage());
+ break;
+ case 's':
+ service = EARGF(usage());
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ quotefmtinstall();
+ if(argc != 0)
+ usage();
+
+ plumbinit();
+ globalctl.useragent = estrdup(globalctl.useragent);
+ initcookies(cookiefile);
+ initurl();
+ initfs();
+ threadpostmountsrv(&fs, service, mtpt, MREPL);
+ threadexits(nil);
+}
diff --git a/sys/src/cmd/webfs/mkfile b/sys/src/cmd/webfs/mkfile
new file mode 100755
index 000000000..879ab16b9
--- /dev/null
+++ b/sys/src/cmd/webfs/mkfile
@@ -0,0 +1,35 @@
+</$objtype/mkfile
+BIN=/$objtype/bin
+
+TARG=webfs
+
+SCHEMEOFILES=\
+ file.$O\
+ ftp.$O\
+ http.$O\
+
+OFILES=\
+ buf.$O\
+ client.$O\
+ cookies.$O\
+ fs.$O\
+ http.$O\
+ io.$O\
+ main.$O\
+ plumb.$O\
+ url.$O\
+ util.$O\
+# $SCHEMEOFILES
+
+HFILES=\
+ dat.h\
+ fns.h\
+
+UPDATE=\
+ mkfile\
+ $HFILES\
+ ${OFILES:%.$O=%.c}\
+ ${TARG:%=/386/bin/%}\
+
+</sys/src/cmd/mkone
+
diff --git a/sys/src/cmd/webfs/plumb.c b/sys/src/cmd/webfs/plumb.c
new file mode 100755
index 000000000..ada0f4168
--- /dev/null
+++ b/sys/src/cmd/webfs/plumb.c
@@ -0,0 +1,165 @@
+#include <u.h>
+#include <libc.h>
+#include <auth.h>
+#include <fcall.h>
+#include <thread.h>
+#include <plumb.h>
+#include <9p.h>
+
+#include "dat.h"
+#include "fns.h"
+
+static int plumbsendfd;
+static int plumbwebfd;
+static Channel *plumbchan;
+
+static void plumbwebproc(void*);
+static void plumbwebthread(void*);
+static void plumbsendproc(void*);
+
+void
+plumbinit(void)
+{
+ plumbsendfd = plumbopen("send", OWRITE|OCEXEC);
+ plumbwebfd = plumbopen("web", OREAD|OCEXEC);
+}
+
+void
+plumbstart(void)
+{
+ plumbchan = chancreate(sizeof(Plumbmsg*), 0);
+ proccreate(plumbwebproc, nil, STACK);
+ threadcreate(plumbwebthread, nil, STACK);
+}
+
+static void
+plumbwebthread(void*)
+{
+ char *base;
+ Plumbmsg *m;
+
+ for(;;){
+ m = recvp(plumbchan);
+ if(m == nil)
+ threadexits(nil);
+ base = plumblookup(m->attr, "baseurl");
+ if(base == nil)
+ base = m->wdir;
+ plumburl(m->data, base);
+ plumbfree(m);
+ }
+}
+
+static void
+plumbwebproc(void*)
+{
+ Plumbmsg *m;
+
+ for(;;){
+ m = plumbrecv(plumbwebfd);
+ sendp(plumbchan, m);
+ if(m == nil)
+ threadexits(nil);
+ }
+}
+
+static void
+addattr(Plumbmsg *m, char *name, char *value)
+{
+ Plumbattr *a;
+
+ a = malloc(sizeof(Plumbattr));
+ a->name = name;
+ a->value = value;
+ a->next = m->attr;
+ m->attr = a;
+}
+
+static void
+freeattrs(Plumbmsg *m)
+{
+ Plumbattr *a, *next;
+
+ a = m->attr;
+ while(a != nil) {
+ next = a->next;
+ free(a);
+ a = next;
+ }
+}
+
+static struct
+{
+ char *ctype;
+ char *ext;
+}
+ctypes[] =
+{
+ { "application/msword", "doc" },
+ { "application/pdf", "pdf" },
+ { "application/postscript", "ps" },
+ { "application/rtf", "rtf" },
+ { "image/gif", "gif" },
+ { "image/jpeg", "jpg" },
+ { "image/png", "png" },
+ { "image/ppm", "ppm" },
+ { "image/tiff", "tiff" },
+ { "text/html", "html" },
+ { "text/plain", "txt" },
+ { "text/xml", "xml" },
+};
+
+void
+replumb(Client *c)
+{
+ int i;
+ Plumbmsg *m;
+ char name[128], *ctype, *ext, *p;
+
+ if(!c->plumbed)
+ return;
+ m = emalloc(sizeof(Plumbmsg));
+ m->src = "webfs";
+ m->dst = nil;
+ m->wdir = "/";
+ m->type = "text";
+ m->attr = nil;
+ addattr(m, "url", c->url->url);
+ ctype = c->contenttype;
+ ext = nil;
+ if(ctype != nil) {
+ addattr(m, "content-type", ctype);
+ for(i = 0; i < nelem(ctypes); i++) {
+ if(strcmp(ctype, ctypes[i].ctype) == 0) {
+ ext = ctypes[i].ext;
+ break;
+ }
+ }
+ }
+ if(ext == nil) {
+ p = strrchr(c->url->url, '/');
+ if(p != nil)
+ p = strrchr(p+1, '.');
+ if(p != nil && strlen(p) <= 5)
+ ext = p+1;
+ else
+ ext = "txt"; /* punt */
+ }
+ c->ext = ext;
+if(0)fprint(2, "content type %s -> extension .%s\n", ctype, ext);
+ m->ndata = snprint(name, sizeof name, "/mnt/web/%d/body.%s", c->num, ext);
+ m->data = estrdup(name);
+ proccreate(plumbsendproc, m, STACK); /* separate proc to avoid a deadlock */
+}
+
+static void
+plumbsendproc(void *x)
+{
+ Plumbmsg *m;
+
+ m = x;
+ plumbsend(plumbsendfd, m);
+ freeattrs(m);
+ free(m->data);
+ free(m);
+}
diff --git a/sys/src/cmd/webfs/url.c b/sys/src/cmd/webfs/url.c
new file mode 100755
index 000000000..f46c8b47b
--- /dev/null
+++ b/sys/src/cmd/webfs/url.c
@@ -0,0 +1,1092 @@
+/*
+ * This is a URL parser, written to parse "Common Internet Scheme" URL
+ * syntax as described in RFC1738 and updated by RFC2396. Only absolute URLs
+ * are supported, using "server-based" naming authorities in the schemes.
+ * Support for literal IPv6 addresses is included, per RFC2732.
+ *
+ * Current "known" schemes: http, ftp, file.
+ *
+ * We can do all the parsing operations without Runes since URLs are
+ * defined to be composed of US-ASCII printable characters.
+ * See RFC1738, RFC2396.
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include <regexp.h>
+#include <plumb.h>
+#include <thread.h>
+#include <fcall.h>
+#include <9p.h>
+#include "dat.h"
+#include "fns.h"
+
+int urldebug;
+
+/* If set, relative paths with leading ".." segments will have them trimmed */
+#define RemoveExtraRelDotDots 0
+#define ExpandCurrentDocUrls 1
+
+static char*
+schemestrtab[] =
+{
+ nil,
+ "http",
+ "https",
+ "ftp",
+ "file",
+};
+
+static int
+ischeme(char *s)
+{
+ int i;
+
+ for(i=0; i<nelem(schemestrtab); i++)
+ if(schemestrtab[i] && strcmp(s, schemestrtab[i])==0)
+ return i;
+ return USunknown;
+}
+
+/*
+ * URI splitting regexp is from RFC2396, Appendix B:
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * 12 3 4 5 6 7 8 9
+ *
+ * Example: "http://www.ics.uci.edu/pub/ietf/uri/#Related"
+ * $2 = scheme "http"
+ * $4 = authority "www.ics.uci.edu"
+ * $5 = path "/pub/ietf/uri/"
+ * $7 = query <undefined>
+ * $9 = fragment "Related"
+ */
+
+/*
+ * RFC2396, Sec 3.1, contains:
+ *
+ * Scheme names consist of a sequence of characters beginning with a
+ * lower case letter and followed by any combination of lower case
+ * letters, digits, plus ("+"), period ("."), or hyphen ("-"). For
+ * resiliency, programs interpreting URI should treat upper case letters
+ * as equivalent to lower case in scheme names (e.g., allow "HTTP" as
+ * well as "http").
+ */
+
+/*
+ * For server-based naming authorities (RFC2396 Sec 3.2.2):
+ * server = [ [ userinfo "@" ] hostport ]
+ * userinfo = *( unreserved | escaped |
+ * ";" | ":" | "&" | "=" | "+" | "$" | "," )
+ * hostport = host [ ":" port ]
+ * host = hostname | IPv4address
+ * hostname = *( domainlabel "." ) toplabel [ "." ]
+ * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+ * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
+ * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
+ * port = *digit
+ *
+ * The host is a domain name of a network host, or its IPv4 address as a
+ * set of four decimal digit groups separated by ".". Literal IPv6
+ * addresses are not supported.
+ *
+ * Note that literal IPv6 address support is outlined in RFC2732:
+ * host = hostname | IPv4address | IPv6reference
+ * ipv6reference = "[" IPv6address "]" (RFC2373)
+ *
+ * Since hostnames and numbers will have to be resolved by the OS anyway,
+ * we don't have to parse them too pedantically (counting '.'s, checking
+ * for well-formed literal IP addresses, etc.).
+ *
+ * In FTP/file paths, we reject most ";param"s and querys. In HTTP paths,
+ * we just pass them through.
+ *
+ * Instead of letting a "path" be 0-or-more characters as RFC2396 suggests,
+ * we'll say it's 1-or-more characters, 0-or-1 times. This way, an absent
+ * path yields a nil substring match, instead of an empty one.
+ *
+ * We're more restrictive than RFC2396 indicates with "userinfo" strings,
+ * insisting they have the form "[user[:password]]". This may need to
+ * change at some point, however.
+ */
+
+/* RE character-class components -- these go in brackets */
+#define PUNCT "\\-_.!~*'()"
+#define RES ";/?:@&=+$,"
+#define ALNUM "a-zA-Z0-9"
+#define HEX "0-9a-fA-F"
+#define UNRES ALNUM PUNCT
+
+/* RE components; _N => has N parenthesized subexpressions when expanded */
+#define ESCAPED_1 "(%[" HEX "][" HEX "])"
+#define URIC_2 "([" RES UNRES "]|" ESCAPED_1 ")"
+#define URICNOSLASH_2 "([" UNRES ";?:@&=+$,]|" ESCAPED_1 ")"
+#define USERINFO_2 "([" UNRES ";:&=+$,]|" ESCAPED_1 ")"
+#define PCHAR_2 "([" UNRES ":@&=+$,]|" ESCAPED_1 ")"
+#define PSEGCHAR_3 "([/;]|" PCHAR_2 ")"
+
+typedef struct Retab Retab;
+struct Retab
+{
+ char *str;
+ Reprog *prog;
+ int size;
+ int ind[5];
+};
+
+enum
+{
+ REsplit = 0,
+ REscheme,
+ REunknowndata,
+ REauthority,
+ REhost,
+ REuserinfo,
+ REabspath,
+ REquery,
+ REfragment,
+ REhttppath,
+ REftppath,
+ REfilepath,
+
+ MaxResub= 20,
+};
+
+Retab retab[] = /* view in constant width Font */
+{
+[REsplit]
+ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]+)?(\\?([^#]*))?(#(.*))?$", nil, 0,
+ /* |-scheme-| |-auth.-| |path--| |query| |--|frag */
+ { 2, 4, 5, 7, 9},
+
+[REscheme]
+ "^[a-z][a-z0-9+-.]*$", nil, 0,
+ { 0, },
+
+[REunknowndata]
+ "^" URICNOSLASH_2 URIC_2 "*$", nil, 0,
+ { 0, },
+
+[REauthority]
+ "^(((" USERINFO_2 "*)@)?(((\\[[^\\]@]+\\])|([^:\\[@]+))(:([0-9]*))?)?)?$", nil, 0,
+ /* |----user info-----| |--------host----------------| |-port-| */
+ { 3, 7, 11, },
+
+[REhost]
+ "^(([a-zA-Z0-9\\-.]+)|(\\[([a-fA-F0-9.:]+)\\]))$", nil, 0,
+ /* |--regular host--| |-IPv6 literal-| */
+ { 2, 4, },
+
+[REuserinfo]
+ "^(([^:]*)(:([^:]*))?)$", nil, 0,
+ /* |user-| |pass-| */
+ { 2, 4, },
+
+[REabspath]
+ "^/" PSEGCHAR_3 "*$", nil, 0,
+ { 0, },
+
+[REquery]
+ "^" URIC_2 "*$", nil, 0,
+ { 0, },
+
+[REfragment]
+ "^" URIC_2 "*$", nil, 0,
+ { 0, },
+
+[REhttppath]
+ "^.*$", nil, 0,
+ { 0, },
+
+[REftppath]
+ "^(.+)(;[tT][yY][pP][eE]=([aAiIdD]))?$", nil, 0,
+ /*|--|-path |ftptype-| */
+ { 1, 3, },
+
+[REfilepath]
+ "^.*$", nil, 0,
+ { 0, },
+};
+
+static int
+countleftparen(char *s)
+{
+ int n;
+
+ n = 0;
+ for(; *s; s++)
+ if(*s == '(')
+ n++;
+ return n;
+}
+
+void
+initurl(void)
+{
+ int i, j;
+
+ for(i=0; i<nelem(retab); i++){
+ retab[i].prog = regcomp(retab[i].str);
+ if(retab[i].prog == nil)
+ sysfatal("recomp(%s): %r", retab[i].str);
+ retab[i].size = countleftparen(retab[i].str)+1;
+ for(j=0; j<nelem(retab[i].ind); j++)
+ if(retab[i].ind[j] >= retab[i].size)
+ sysfatal("bad index in regexp table: retab[%d].ind[%d] = %d >= %d",
+ i, j, retab[i].ind[j], retab[i].size);
+ if(MaxResub < retab[i].size)
+ sysfatal("MaxResub too small: %d < %d", MaxResub, retab[i].size);
+ }
+}
+
+typedef struct SplitUrl SplitUrl;
+struct SplitUrl
+{
+ struct {
+ char *s;
+ char *e;
+ } url, scheme, authority, path, query, fragment;
+};
+
+/*
+ * Implements the algorithm in RFC2396 sec 5.2 step 6.
+ * Returns number of chars written, excluding NUL terminator.
+ * dest is known to be >= strlen(base)+rel_len.
+ */
+static void
+merge_relative_path(char *base, char *rel_st, int rel_len, char *dest)
+{
+ char *s, *p, *e, *pdest;
+
+ pdest = dest;
+
+ /* 6a: start with base, discard last segment */
+ if(base && base[0]){
+ /* Empty paths don't match in our scheme; 'base' should be nil */
+ assert(base[0] == '/');
+ e = strrchr(base, '/');
+ e++;
+ memmove(pdest, base, e-base);
+ pdest += e-base;
+ }else{
+ /* Artistic license on my part */
+ *pdest++ = '/';
+ }
+
+ /* 6b: append relative component */
+ if(rel_st){
+ memmove(pdest, rel_st, rel_len);
+ pdest += rel_len;
+ }
+
+ /* 6c: remove any occurrences of "./" as a complete segment */
+ s = dest;
+ *pdest = '\0';
+ while(e = strstr(s, "./")){
+ if((e == dest) || (*(e-1) == '/')){
+ memmove(e, e+2, pdest+1-(e+2)); /* +1 for NUL */
+ pdest -= 2;
+ }else
+ s = e+1;
+ }
+
+ /* 6d: remove a trailing "." as a complete segment */
+ if(pdest>dest && *(pdest-1)=='.' &&
+ (pdest==dest+1 || *(pdest-2)=='/'))
+ *--pdest = '\0';
+
+ /* 6e: remove occurences of "seg/../", where seg != "..", left->right */
+ s = dest+1;
+ while(e = strstr(s, "/../")){
+ p = e - 1;
+ while(p >= dest && *p != '/')
+ p--;
+ if(memcmp(p, "/../", 4) != 0){
+ memmove(p+1, e+4, pdest+1-(e+4));
+ pdest -= (e+4) - (p+1);
+ }else
+ s = e+1;
+ }
+
+ /* 6f: remove a trailing "seg/..", where seg isn't ".." */
+ if(pdest-3 > dest && memcmp(pdest-3, "/..", 3)==0){
+ p = pdest-3 - 1;
+ while(p >= dest && *p != '/')
+ p--;
+ if(memcmp(p, "/../", 4) != 0){
+ pdest = p+1;
+ *pdest = '\0';
+ }
+ }
+
+ /* 6g: leading ".." segments are errors -- we'll just blat them out. */
+ if(RemoveExtraRelDotDots){
+ p = dest;
+ if (p[0] == '/')
+ p++;
+ s = p;
+ while(s[0]=='.' && s[1]=='.' && (s[2]==0 || s[2]=='/'))
+ s += 3;
+ if(s > p){
+ memmove(p, s, pdest+1-s);
+ pdest -= s-p;
+ }
+ }
+ USED(pdest);
+
+ if(urldebug)
+ fprint(2, "merge_relative_path: '%s' + '%.*s' -> '%s'\n", base, rel_len,
+ rel_st, dest);
+}
+
+/*
+ * See RFC2396 sec 5.2 for info on resolving relative URIs to absolute form.
+ *
+ * If successful, this just ends up freeing and replacing "u->url".
+ */
+static int
+resolve_relative(SplitUrl *su, Url *base, Url *u)
+{
+ char *url, *path;
+ char *purl, *ppath;
+ int currentdoc, ulen, plen;
+
+ if(base == nil){
+ werrstr("relative URI given without base");
+ return -1;
+ }
+ if(base->scheme == nil){
+ werrstr("relative URI given with no scheme");
+ return -1;
+ }
+ if(base->ischeme == USunknown){
+ werrstr("relative URI given with unknown scheme");
+ return -1;
+ }
+ if(base->ischeme == UScurrent){
+ werrstr("relative URI given with incomplete base");
+ return -1;
+ }
+ assert(su->scheme.s == nil);
+
+ /* Sec 5.2 step 2 */
+ currentdoc = 0;
+ if(su->path.s==nil && su->scheme.s==nil && su->authority.s==nil && su->query.s==nil){
+ /* Reference is to current document */
+ if(urldebug)
+ fprint(2, "url %s is relative to current document\n", u->url);
+ u->ischeme = UScurrent;
+ if(!ExpandCurrentDocUrls)
+ return 0;
+ currentdoc = 1;
+ }
+
+ /* Over-estimate the maximum lengths, for allocation purposes */
+ /* (constants are for separators) */
+ plen = 1;
+ if(base->path)
+ plen += strlen(base->path);
+ if(su->path.s)
+ plen += 1 + (su->path.e - su->path.s);
+
+ ulen = 0;
+ ulen += strlen(base->scheme) + 1;
+ if(su->authority.s)
+ ulen += 2 + (su->authority.e - su->authority.s);
+ else
+ ulen += 2 + ((base->authority) ? strlen(base->authority) : 0);
+ ulen += plen;
+ if(su->query.s)
+ ulen += 1 + (su->query.e - su->query.s);
+ else if(currentdoc && base->query)
+ ulen += 1 + strlen(base->query);
+ if(su->fragment.s)
+ ulen += 1 + (su->fragment.e - su->fragment.s);
+ else if(currentdoc && base->fragment)
+ ulen += 1 + strlen(base->fragment);
+ url = emalloc(ulen+1);
+ path = emalloc(plen+1);
+
+ url[0] = '\0';
+ purl = url;
+ path[0] = '\0';
+ ppath = path;
+
+ if(su->authority.s || (su->path.s && (su->path.s[0] == '/'))){
+ /* Is a "network-path" or "absolute-path"; don't merge with base path */
+ /* Sec 5.2 steps 4,5 */
+ if(su->path.s){
+ memmove(ppath, su->path.s, su->path.e - su->path.s);
+ ppath += su->path.e - su->path.s;
+ *ppath = '\0';
+ }
+ }else if(currentdoc){
+ /* Is a current-doc reference; just copy the path from the base URL */
+ if(base->path){
+ strcpy(ppath, base->path);
+ ppath += strlen(ppath);
+ }
+ USED(ppath);
+ }else{
+ /* Is a relative-path reference; we have to merge it */
+ /* Sec 5.2 step 6 */
+ merge_relative_path(base->path,
+ su->path.s, su->path.e - su->path.s, ppath);
+ }
+
+ /* Build new URL from pieces, inheriting from base where needed */
+ strcpy(purl, base->scheme);
+ purl += strlen(purl);
+ *purl++ = ':';
+ if(su->authority.s){
+ strcpy(purl, "//");
+ purl += strlen(purl);
+ memmove(purl, su->authority.s, su->authority.e - su->authority.s);
+ purl += su->authority.e - su->authority.s;
+ }else if(base->authority){
+ strcpy(purl, "//");
+ purl += strlen(purl);
+ strcpy(purl, base->authority);
+ purl += strlen(purl);
+ }
+ assert((path[0] == '\0') || (path[0] == '/'));
+ strcpy(purl, path);
+ purl += strlen(purl);
+
+ /*
+ * The query and fragment are not inherited from the base,
+ * except in case of "current document" URLs, which inherit any query
+ * and may inherit the fragment.
+ */
+ if(su->query.s){
+ *purl++ = '?';
+ memmove(purl, su->query.s, su->query.e - su->query.s);
+ purl += su->query.e - su->query.s;
+ }else if(currentdoc && base->query){
+ *purl++ = '?';
+ strcpy(purl, base->query);
+ purl += strlen(purl);
+ }
+
+ if(su->fragment.s){
+ *purl++ = '#';
+ memmove(purl, su->query.s, su->query.e - su->query.s);
+ purl += su->fragment.e - su->fragment.s;
+ }else if(currentdoc && base->fragment){
+ *purl++ = '#';
+ strcpy(purl, base->fragment);
+ purl += strlen(purl);
+ }
+ USED(purl);
+
+ if(urldebug)
+ fprint(2, "resolve_relative: '%s' + '%s' -> '%s'\n", base->url, u->url, url);
+ free(u->url);
+ u->url = url;
+ free(path);
+ return 0;
+}
+
+int
+regx(Reprog *prog, char *s, Resub *m, int nm)
+{
+ int i;
+
+ if(s == nil)
+ s = m[0].sp; /* why is this necessary? */
+
+ i = regexec(prog, s, m, nm);
+/*
+ if(i >= 0)
+ for(j=0; j<nm; j++)
+ fprint(2, "match%d: %.*s\n", j, utfnlen(m[j].sp, m[j].ep-m[j].sp), m[j].sp);
+*/
+ return i;
+}
+
+static int
+ismatch(int i, char *s, char *desc)
+{
+ Resub m[1];
+
+ m[0].sp = m[0].ep = nil;
+ if(!regx(retab[i].prog, s, m, 1)){
+ werrstr("malformed %s: %q", desc, s);
+ return 0;
+ }
+ return 1;
+}
+
+static int
+spliturl(char *url, SplitUrl *su)
+{
+ Resub m[MaxResub];
+ Retab *t;
+
+ /*
+ * Newlines are not valid in a URI, but regexp(2) treats them specially
+ * so it's best to make sure there are none before proceeding.
+ */
+ if(strchr(url, '\n')){
+ werrstr("newline in URI");
+ return -1;
+ }
+
+ /*
+ * Because we use NUL-terminated strings, as do many client and server
+ * implementations, an escaped NUL ("%00") will quite likely cause problems
+ * when unescaped. We can check for such a sequence once before examining
+ * the components because, per RFC2396 sec. 2.4.1 - 2.4.2, '%' is reserved
+ * in URIs to _always_ indicate escape sequences. Something like "%2500"
+ * will still get by, but that's legitimate, and if it ends up causing
+ * a NUL then someone is unescaping too many times.
+ */
+ if(strstr(url, "%00")){
+ werrstr("escaped NUL in URI");
+ return -1;
+ }
+
+ m[0].sp = m[0].ep = nil;
+ t = &retab[REsplit];
+ if(!regx(t->prog, url, m, t->size)){
+ werrstr("malformed URI: %q", url);
+ return -1;
+ }
+
+ su->url.s = m[0].sp;
+ su->url.e = m[0].ep;
+ su->scheme.s = m[t->ind[0]].sp;
+ su->scheme.e = m[t->ind[0]].ep;
+ su->authority.s = m[t->ind[1]].sp;
+ su->authority.e = m[t->ind[1]].ep;
+ su->path.s = m[t->ind[2]].sp;
+ su->path.e = m[t->ind[2]].ep;
+ su->query.s = m[t->ind[3]].sp;
+ su->query.e = m[t->ind[3]].ep;
+ su->fragment.s = m[t->ind[4]].sp;
+ su->fragment.e = m[t->ind[4]].ep;
+
+ if(urldebug)
+ fprint(2, "split url %s into %.*q %.*q %.*q %.*q %.*q %.*q\n",
+ url,
+ su->url.s ? utfnlen(su->url.s, su->url.e-su->url.s) : 10, su->url.s ? su->url.s : "",
+ su->scheme.s ? utfnlen(su->scheme.s, su->scheme.e-su->scheme.s) : 10, su->scheme.s ? su->scheme.s : "",
+ su->authority.s ? utfnlen(su->authority.s, su->authority.e-su->authority.s) : 10, su->authority.s ? su->authority.s : "",
+ su->path.s ? utfnlen(su->path.s, su->path.e-su->path.s) : 10, su->path.s ? su->path.s : "",
+ su->query.s ? utfnlen(su->query.s, su->query.e-su->query.s) : 10, su->query.s ? su->query.s : "",
+ su->fragment.s ? utfnlen(su->fragment.s, su->fragment.e-su->fragment.s) : 10, su->fragment.s ? su->fragment.s : "");
+
+ return 0;
+}
+
+static int
+parse_scheme(SplitUrl *su, Url *u)
+{
+ if(su->scheme.s == nil){
+ werrstr("missing scheme");
+ return -1;
+ }
+ u->scheme = estredup(su->scheme.s, su->scheme.e);
+ strlower(u->scheme);
+
+ if(!ismatch(REscheme, u->scheme, "scheme"))
+ return -1;
+
+ u->ischeme = ischeme(u->scheme);
+ if(urldebug)
+ fprint(2, "parse_scheme %s => %d\n", u->scheme, u->ischeme);
+ return 0;
+}
+
+static int
+parse_unknown_part(SplitUrl *su, Url *u)
+{
+ char *s, *e;
+
+ assert(u->ischeme == USunknown);
+ assert(su->scheme.e[0] == ':');
+
+ s = su->scheme.e+1;
+ if(su->fragment.s){
+ e = su->fragment.s-1;
+ assert(*e == '#');
+ }else
+ e = s+strlen(s);
+
+ u->schemedata = estredup(s, e);
+ if(!ismatch(REunknowndata, u->schemedata, "unknown scheme data"))
+ return -1;
+ return 0;
+}
+
+static int
+parse_userinfo(char *s, char *e, Url *u)
+{
+ Resub m[MaxResub];
+ Retab *t;
+
+ m[0].sp = s;
+ m[0].ep = e;
+ t = &retab[REuserinfo];
+ if(!regx(t->prog, nil, m, t->size)){
+ werrstr("malformed userinfo: %.*q", utfnlen(s, e-s), s);
+ return -1;
+ }
+ if(m[t->ind[0]].sp)
+ u->user = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep);
+ if(m[t->ind[1]].sp)
+ u->user = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep);
+ return 0;
+}
+
+static int
+parse_host(char *s, char *e, Url *u)
+{
+ Resub m[MaxResub];
+ Retab *t;
+
+ m[0].sp = s;
+ m[0].ep = e;
+ t = &retab[REhost];
+ if(!regx(t->prog, nil, m, t->size)){
+ werrstr("malformed host: %.*q", utfnlen(s, e-s), s);
+ return -1;
+ }
+
+ assert(m[t->ind[0]].sp || m[t->ind[1]].sp);
+
+ if(m[t->ind[0]].sp) /* regular */
+ u->host = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep);
+ else
+ u->host = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep);
+ return 0;
+}
+
+static int
+parse_authority(SplitUrl *su, Url *u)
+{
+ Resub m[MaxResub];
+ Retab *t;
+ char *host;
+ char *userinfo;
+
+ if(su->authority.s == nil)
+ return 0;
+
+ u->authority = estredup(su->authority.s, su->authority.e);
+ m[0].sp = m[0].ep = nil;
+ t = &retab[REauthority];
+ if(!regx(t->prog, u->authority, m, t->size)){
+ werrstr("malformed authority: %q", u->authority);
+ return -1;
+ }
+
+ if(m[t->ind[0]].sp)
+ if(parse_userinfo(m[t->ind[0]].sp, m[t->ind[0]].ep, u) < 0)
+ return -1;
+ if(m[t->ind[1]].sp)
+ if(parse_host(m[t->ind[1]].sp, m[t->ind[1]].ep, u) < 0)
+ return -1;
+ if(m[t->ind[2]].sp)
+ u->port = estredup(m[t->ind[2]].sp, m[t->ind[2]].ep);
+
+
+ if(urldebug > 0){
+ userinfo = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep);
+ host = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep);
+ fprint(2, "port: %q, authority %q\n", u->port, u->authority);
+ fprint(2, "host %q, userinfo %q\n", host, userinfo);
+ free(host);
+ free(userinfo);
+ }
+ return 0;
+}
+
+static int
+parse_abspath(SplitUrl *su, Url *u)
+{
+ if(su->path.s == nil)
+ return 0;
+ u->path = estredup(su->path.s, su->path.e);
+ if(!ismatch(REabspath, u->path, "absolute path"))
+ return -1;
+ return 0;
+}
+
+static int
+parse_query(SplitUrl *su, Url *u)
+{
+ if(su->query.s == nil)
+ return 0;
+ u->query = estredup(su->query.s, su->query.e);
+ if(!ismatch(REquery, u->query, "query"))
+ return -1;
+ return 0;
+}
+
+static int
+parse_fragment(SplitUrl *su, Url *u)
+{
+ if(su->fragment.s == nil)
+ return 0;
+ u->fragment = estredup(su->fragment.s, su->fragment.e);
+ if(!ismatch(REfragment, u->fragment, "fragment"))
+ return -1;
+ return 0;
+}
+
+static int
+postparse_http(Url *u)
+{
+ u->open = httpopen;
+ u->read = httpread;
+ u->close = httpclose;
+
+ if(u->authority==nil){
+ werrstr("missing authority (hostname, port, etc.)");
+ return -1;
+ }
+ if(u->host == nil){
+ werrstr("missing host specification");
+ return -1;
+ }
+
+ if(u->path == nil){
+ u->http.page_spec = estrdup("/");
+ return 0;
+ }
+
+ if(!ismatch(REhttppath, u->path, "http path"))
+ return -1;
+ if(u->query){
+ u->http.page_spec = emalloc(strlen(u->path)+1+strlen(u->query)+1);
+ strcpy(u->http.page_spec, u->path);
+ strcat(u->http.page_spec, "?");
+ strcat(u->http.page_spec, u->query);
+ }else
+ u->http.page_spec = estrdup(u->path);
+
+ return 0;
+}
+
+static int
+postparse_ftp(Url *u)
+{
+ Resub m[MaxResub];
+ Retab *t;
+
+ if(u->authority==nil){
+ werrstr("missing authority (hostname, port, etc.)");
+ return -1;
+ }
+ if(u->query){
+ werrstr("unexpected \"?query\" in ftp path");
+ return -1;
+ }
+ if(u->host == nil){
+ werrstr("missing host specification");
+ return -1;
+ }
+
+ if(u->path == nil){
+ u->ftp.path_spec = estrdup("/");
+ return 0;
+ }
+
+ m[0].sp = m[0].ep = nil;
+ t = &retab[REftppath];
+ if(!regx(t->prog, u->path, m, t->size)){
+ werrstr("malformed ftp path: %q", u->path);
+ return -1;
+ }
+
+ if(m[t->ind[0]].sp){
+ u->ftp.path_spec = estredup(m[t->ind[0]].sp, m[t->ind[0]].ep);
+ if(strchr(u->ftp.path_spec, ';')){
+ werrstr("unexpected \";param\" in ftp path");
+ return -1;
+ }
+ }else
+ u->ftp.path_spec = estrdup("/");
+
+ if(m[t->ind[1]].sp){
+ u->ftp.type = estredup(m[t->ind[1]].sp, m[t->ind[1]].ep);
+ strlower(u->ftp.type);
+ }
+ return 0;
+}
+
+static int
+postparse_file(Url *u)
+{
+ if(u->user || u->passwd){
+ werrstr("user information not valid with file scheme");
+ return -1;
+ }
+ if(u->query){
+ werrstr("unexpected \"?query\" in file path");
+ return -1;
+ }
+ if(u->port){
+ werrstr("port not valid with file scheme");
+ return -1;
+ }
+ if(u->path == nil){
+ werrstr("missing path in file scheme");
+ return -1;
+ }
+ if(strchr(u->path, ';')){
+ werrstr("unexpected \";param\" in file path");
+ return -1;
+ }
+
+ if(!ismatch(REfilepath, u->path, "file path"))
+ return -1;
+
+ /* "localhost" is equivalent to no host spec, we'll chose the latter */
+ if(u->host && cistrcmp(u->host, "localhost") == 0){
+ free(u->host);
+ u->host = nil;
+ }
+ return 0;
+}
+
+static int (*postparse[])(Url*) = {
+ nil,
+ postparse_http,
+ postparse_http,
+ postparse_ftp,
+ postparse_file,
+};
+
+Url*
+parseurl(char *url, Url *base)
+{
+ Url *u;
+ SplitUrl su;
+
+ if(urldebug)
+ fprint(2, "parseurl %s with base %s\n", url, base ? base->url : "<none>");
+
+ u = emalloc(sizeof(Url));
+ u->url = estrdup(url);
+ if(spliturl(u->url, &su) < 0){
+ Fail:
+ freeurl(u);
+ return nil;
+ }
+
+ /* RFC2396 sec 3.1 says relative URIs are distinguished by absent scheme */
+ if(su.scheme.s==nil){
+ if(urldebug)
+ fprint(2, "parseurl has nil scheme\n");
+ if(resolve_relative(&su, base, u) < 0 || spliturl(u->url, &su) < 0)
+ goto Fail;
+ if(u->ischeme == UScurrent){
+ /* 'u.url' refers to current document; set fragment and return */
+ if(parse_fragment(&su, u) < 0)
+ goto Fail;
+ return u;
+ }
+ }
+
+ if(parse_scheme(&su, u) < 0
+ || parse_fragment(&su, u) < 0)
+ goto Fail;
+
+ if(u->ischeme == USunknown){
+ if(parse_unknown_part(&su, u) < 0)
+ goto Fail;
+ return u;
+ }
+
+ if(parse_query(&su, u) < 0
+ || parse_authority(&su, u) < 0
+ || parse_abspath(&su, u) < 0)
+ goto Fail;
+
+ if(u->ischeme < nelem(postparse) && postparse[u->ischeme])
+ if((*postparse[u->ischeme])(u) < 0)
+ goto Fail;
+
+ setmalloctag(u, getcallerpc(&url));
+ return u;
+}
+
+void
+freeurl(Url *u)
+{
+ if(u == nil)
+ return;
+ free(u->url);
+ free(u->scheme);
+ free(u->schemedata);
+ free(u->authority);
+ free(u->user);
+ free(u->passwd);
+ free(u->host);
+ free(u->port);
+ free(u->path);
+ free(u->query);
+ free(u->fragment);
+ switch(u->ischeme){
+ case UShttp:
+ free(u->http.page_spec);
+ break;
+ case USftp:
+ free(u->ftp.path_spec);
+ free(u->ftp.type);
+ break;
+ }
+ free(u);
+}
+
+void
+rewriteurl(Url *u)
+{
+ char *s;
+
+ if(u->schemedata)
+ s = estrmanydup(u->scheme, ":", u->schemedata, nil);
+ else
+ s = estrmanydup(u->scheme, "://",
+ u->user ? u->user : "",
+ u->passwd ? ":" : "", u->passwd ? u->passwd : "",
+ u->user ? "@" : "", u->host ? u->host : "",
+ u->port ? ":" : "", u->port ? u->port : "",
+ u->path,
+ u->query ? "?" : "", u->query ? u->query : "",
+ u->fragment ? "#" : "", u->fragment ? u->fragment : "",
+ nil);
+ free(u->url);
+ u->url = s;
+}
+
+int
+seturlquery(Url *u, char *query)
+{
+ if(query == nil){
+ free(u->query);
+ u->query = nil;
+ return 0;
+ }
+
+ if(!ismatch(REquery, query, "query"))
+ return -1;
+
+ free(u->query);
+ u->query = estrdup(query);
+ return 0;
+}
+
+static void
+dupp(char **p)
+{
+ if(*p)
+ *p = estrdup(*p);
+}
+
+Url*
+copyurl(Url *u)
+{
+ Url *v;
+
+ v = emalloc(sizeof(Url));
+ *v = *u;
+ dupp(&v->url);
+ dupp(&v->scheme);
+ dupp(&v->schemedata);
+ dupp(&v->authority);
+ dupp(&v->user);
+ dupp(&v->passwd);
+ dupp(&v->host);
+ dupp(&v->port);
+ dupp(&v->path);
+ dupp(&v->query);
+ dupp(&v->fragment);
+
+ switch(v->ischeme){
+ case UShttp:
+ dupp(&v->http.page_spec);
+ break;
+ case USftp:
+ dupp(&v->ftp.path_spec);
+ dupp(&v->ftp.type);
+ break;
+ }
+ return v;
+}
+
+static int
+dhex(char c)
+{
+ if('0' <= c && c <= '9')
+ return c-'0';
+ if('a' <= c && c <= 'f')
+ return c-'a'+10;
+ if('A' <= c && c <= 'F')
+ return c-'A'+10;
+ return 0;
+}
+
+char*
+escapeurl(char *s, int (*needesc)(int))
+{
+ int n;
+ char *t, *u;
+ Rune r;
+ static char *hex = "0123456789abcdef";
+
+ n = 0;
+ for(t=s; *t; t++)
+ if((*needesc)(*t))
+ n++;
+
+ u = emalloc(strlen(s)+2*n+1);
+ t = u;
+ for(; *s; s++){
+ s += chartorune(&r, s);
+ if(r >= 0xFF){
+ werrstr("URLs cannot contain Runes > 0xFF");
+ free(t);
+ return nil;
+ }
+ if((*needesc)(r)){
+ *u++ = '%';
+ *u++ = hex[(r>>4)&0xF];
+ *u++ = hex[r&0xF];
+ }else
+ *u++ = r;
+ }
+ *u = '\0';
+ return t;
+}
+
+char*
+unescapeurl(char *s)
+{
+ char *r, *w;
+ Rune rune;
+
+ s = estrdup(s);
+ for(r=w=s; *r; r++){
+ if(*r=='%'){
+ r++;
+ if(!isxdigit(r[0]) || !isxdigit(r[1])){
+ werrstr("bad escape sequence '%.3s' in URL", r);
+ return nil;
+ }
+ if(r[0]=='0' && r[2]=='0'){
+ werrstr("escaped NUL in URL");
+ return nil;
+ }
+ rune = (dhex(r[0])<<4)|dhex(r[1]); /* latin1 */
+ w += runetochar(w, &rune);
+ r += 2;
+ }else
+ *w++ = *r;
+ }
+ *w = '\0';
+ return s;
+}
+
diff --git a/sys/src/cmd/webfs/util.c b/sys/src/cmd/webfs/util.c
new file mode 100755
index 000000000..b6788194e
--- /dev/null
+++ b/sys/src/cmd/webfs/util.c
@@ -0,0 +1,86 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ndb.h>
+#include <fcall.h>
+#include <thread.h>
+#include <9p.h>
+#include <ctype.h>
+#include "dat.h"
+#include "fns.h"
+
+void*
+erealloc(void *a, uint n)
+{
+ a = realloc(a, n);
+ if(a == nil)
+ sysfatal("realloc %d: out of memory", n);
+ setrealloctag(a, getcallerpc(&a));
+ return a;
+}
+
+void*
+emalloc(uint n)
+{
+ void *a;
+
+ a = mallocz(n, 1);
+ if(a == nil)
+ sysfatal("malloc %d: out of memory", n);
+ setmalloctag(a, getcallerpc(&n));
+ return a;
+}
+
+char*
+estrdup(char *s)
+{
+ s = strdup(s);
+ if(s == nil)
+ sysfatal("strdup: out of memory");
+ setmalloctag(s, getcallerpc(&s));
+ return s;
+}
+
+char*
+estredup(char *s, char *e)
+{
+ char *t;
+
+ t = emalloc(e-s+1);
+ memmove(t, s, e-s);
+ t[e-s] = '\0';
+ setmalloctag(t, getcallerpc(&s));
+ return t;
+}
+
+char*
+estrmanydup(char *s, ...)
+{
+ char *p, *t;
+ int len;
+ va_list arg;
+
+ len = strlen(s);
+ va_start(arg, s);
+ while((p = va_arg(arg, char*)) != nil)
+ len += strlen(p);
+ len++;
+
+ t = emalloc(len);
+ strcpy(t, s);
+ va_start(arg, s);
+ while((p = va_arg(arg, char*)) != nil)
+ strcat(t, p);
+ return t;
+}
+
+char*
+strlower(char *s)
+{
+ char *t;
+
+ for(t=s; *t; t++)
+ if('A' <= *t && *t <= 'Z')
+ *t += 'a'-'A';
+ return s;
+}
diff --git a/sys/src/cmd/webfs/webget.c b/sys/src/cmd/webfs/webget.c
new file mode 100755
index 000000000..d3a13afa1
--- /dev/null
+++ b/sys/src/cmd/webfs/webget.c
@@ -0,0 +1,87 @@
+/*
+ * Sample client.
+ */
+#include <u.h>
+#include <libc.h>
+
+void
+xfer(int from, int to)
+{
+ char buf[12*1024];
+ int n;
+
+ while((n = read(from, buf, sizeof buf)) > 0)
+ if(write(to, buf, n) < 0)
+ sysfatal("write failed: %r");
+ if(n < 0)
+ sysfatal("read failed: %r");
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: webget [-b baseurl] [-m mtpt] [-p postbody] url\n");
+ exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+ int conn, ctlfd, fd, n;
+ char buf[128], *base, *mtpt, *post, *url;
+
+ mtpt = "/mnt/web";
+ post = nil;
+ base = nil;
+ ARGBEGIN{
+ default:
+ usage();
+ case 'b':
+ base = EARGF(usage());
+ break;
+ case 'm':
+ mtpt = EARGF(usage());
+ break;
+ case 'p':
+ post = EARGF(usage());
+ break;
+ }ARGEND;
+
+ if (argc != 1)
+ usage();
+
+ url = argv[0];
+
+ snprint(buf, sizeof buf, "%s/clone", mtpt);
+ if((ctlfd = open(buf, ORDWR)) < 0)
+ sysfatal("couldn't open %s: %r", buf);
+ if((n = read(ctlfd, buf, sizeof buf-1)) < 0)
+ sysfatal("reading clone: %r");
+ if(n == 0)
+ sysfatal("short read on clone");
+ buf[n] = '\0';
+ conn = atoi(buf);
+
+ if(base)
+ if(fprint(ctlfd, "baseurl %s", base) < 0)
+ sysfatal("baseurl ctl write: %r");
+
+ if(fprint(ctlfd, "url %s", url) <= 0)
+ sysfatal("get ctl write: %r");
+
+ if(post){
+ snprint(buf, sizeof buf, "%s/%d/postbody", mtpt, conn);
+ if((fd = open(buf, OWRITE)) < 0)
+ sysfatal("open %s: %r", buf);
+ if(write(fd, post, strlen(post)) < 0)
+ sysfatal("post write failed: %r");
+ close(fd);
+ }
+
+ snprint(buf, sizeof buf, "%s/%d/body", mtpt, conn);
+ if((fd = open(buf, OREAD)) < 0)
+ sysfatal("open %s: %r", buf);
+
+ xfer(fd, 1);
+ exits(nil);
+}