diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/ip/httpd/hints.c |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/ip/httpd/hints.c')
-rwxr-xr-x | sys/src/cmd/ip/httpd/hints.c | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/sys/src/cmd/ip/httpd/hints.c b/sys/src/cmd/ip/httpd/hints.c new file mode 100755 index 000000000..27dd51043 --- /dev/null +++ b/sys/src/cmd/ip/httpd/hints.c @@ -0,0 +1,297 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include "httpd.h" +#include "httpsrv.h" + +enum{ URLmax = 65536, HINTmax = 20 }; +#define RECIPLOG2 1.44269504089 + +char **urlname; /* array of url strings 1,...,nurl */ +static int nurl; +static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */ +static int urlnext[URLmax]; /* index urltab of next url in chain */ +static int urlhash[URLmax]; /* initially 0, meaning empty buckets */ + +typedef struct Hint { + ushort url; + uchar prob; +} Hint; +Hint *hints[URLmax]; +uchar nhint[URLmax]; + +vlong +Bfilelen(void *vb) +{ + Biobuf *b; + vlong n; + + b = vb; + n = Bseek(b, 0L, 2); + Bseek(b, 0L, 0); + return n; +} + +static uint +hashstr(char* key) +{ + /* asu works better than pjw for urls */ + uchar *k = (unsigned char*)key; + uint h = 0; + while(*k!=0) + h = 65599*h + *k++; + return h; +} + +static int +urllookup(uint url) +{ + /* returns +index into urltab, else -hash */ + int j, hash; + + hash = 1 + url%(URLmax-1); + j = urlhash[hash]; + for(;;){ + if(j==0) + return -hash; + if(url==urltab[j]) + return j; + j = urlnext[j]; + } +} + +int +Bage(Biobuf *b) +{ + Dir *dir; + long mtime; + + dir = dirfstat(Bfildes(b)); + if(dir != nil) + mtime = dir->mtime; + else + mtime = 0; + free(dir); + return time(nil) - mtime; +} + +void +urlinit(void) +{ + static Biobuf *b = nil; + static vlong filelen = 0; + vlong newlen; + char *s, *arena; + int i, j, n; + uint url; + char *file; + + if(filelen < 0) + return; + file = "/sys/log/httpd/url"; + if(b == nil){ + b = Bopen(file, OREAD); /* first time */ + if(b == nil){ + syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file); + filelen = -1; + return; + } + } + newlen = Bfilelen(b); /* side effect: rewinds b */ + if(newlen == filelen || Bage(b)<300) + return; + filelen = newlen; + if(filelen < 0) + return; + if(nurl){ /* free existing tables */ + free(urlname[0]); /* arena */ + memset(urlhash,0,sizeof urlhash); + memset(urlnext,0,sizeof urlnext); + nurl = 0; + } + if(urlname==nil) + urlname = (char**)ezalloc(URLmax*sizeof(*urlname)); + arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */ + i = 1; + while((s=Brdline(b,'\n'))!=0){ + /* read lines of the form: 999 /url/path */ + n = Blinelen(b) - 1; + if(n>2 && s[n]=='\n'){ + s[n] = '\0'; + }else{ + sysfatal("missing fields or newline in url-db"); + } + j = strtoul(s,&s,10); + while(*s==' ') + s++; + if(i++!=j) + sysfatal("url-db synchronization error"); + url = hashstr(s); + j = urllookup(url); + if(j>=0) + sysfatal("duplicate url"); + j = -j; + nurl++; + if(nurl>=URLmax){ + syslog(0, HTTPLOG, "urlinit overflow at %s",s); + free(urlname[0]); /* arena */ + memset(urlhash,0,sizeof urlhash); + memset(urlnext,0,sizeof urlnext); + nurl = 0; + return; + } + urltab[nurl] = url; + urlnext[nurl] = urlhash[j]; + urlhash[j] = nurl; + strcpy(arena,s); + urlname[nurl] = arena; + arena += strlen(s)+1; + } + syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen)); + /* b is held open, because namespace will be chopped */ +} + +void +statsinit(void) +{ + static Biobuf *b = nil; + static vlong filelen = 0; + vlong newlen; + int iq, n, i, nstats = 0; + uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */ + Hint *arena, *h; + char *file; + static void *oldarena = nil; + + file = "/sys/log/httpd/pathstat"; + if(b == nil){ + if(filelen == -1) + return; /* if failed first time */ + b = Bopen(file, OREAD); /* first time */ + if(b == nil){ + syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file); + filelen = -1; + return; + } + } + newlen = Bfilelen(b); /* side effect: rewinds b */ + if(newlen == filelen || Bage(b)<300) + return; + filelen = newlen; + if(oldarena){ + free(oldarena); + memset(nhint,0,sizeof nhint); + } + arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint)); + oldarena = arena; + for(;;){ + i = Bread(b,buf,3); + if(i<3) + break; + nstats++; + iq = buf[0]; + iq = (iq<<8) | buf[1]; + n = buf[2]; + h = arena; + arena += n; + hints[iq] = h; + nhint[iq] = n; + if(Bread(b,buf,3*n)!=3*n) + sysfatal("stats read error"); + for(i=0; i<n; i++){ + s = &buf[3*i]; + h[i].url = (s[0]<<8) | s[1]; + h[i].prob = s[2]; + } + } + syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint))); +} + +void +urlcanon(char *url) +{ + /* all the changes here can be implemented by rewriting in-place */ + char *p, *q; + + /* remove extraneous '/' in the middle and at the end */ + p = url+1; /* first char needs no change */ + q = p; + while(q[0]){ + if(q[0]=='/' && q[-1]=='/'){ + q++; + continue; + } + *p++ = *q++; + } + if(q[-1]=='/'){ /* trailing '/' */ + p[-1] = '\0'; + }else{ + p[0] = '\0'; + } + + /* specific to the cm.bell-labs.com web site */ + if(strncmp(url,"/cm/",4)==0){ + if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0) + /* strip off /cm/cs */ + memmove(url,url+6,strlen(url+6)+1); + else if(strncmp(url+4,"ms/what/wavelet",15)==0) + /* /cm/ms/what */ + memmove(url,url+11,strlen(url+11)+1); + } +} + +void +hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej) +{ + int i, j, pr, prefix, fd, siz, havei, newhint = 0, n; + char *query, *sf, etag[32], *wurl; + Dir *dir; + Hint *h, *haveh; + + query = hstrdup(hc, uri); + urlcanon(query); + j = urllookup(hashstr(query)); + if(j < 0) + return; + query = strrchr(uri,'/'); + if(!query) + return; /* can't happen */ + prefix = query-uri+1; /* = strlen(dirname)+1 */ + h = hints[j]; + for(i=0; i<nhint[j]; i++){ + if(havej > 0 && havej < URLmax){ /* exclude hints client has */ + haveh = hints[havej]; + for(havei=0; havei<nhint[havej]; havei++) + if( haveh[havei].url == h[i].url) + goto continuei; + } + sf = urlname[h[i].url]; + pr = h[i].prob; + if(pr<thresh) + break; + n = strlen(webroot) + strlen(sf) + 1; + wurl = halloc(hc, n); + strcpy(wurl, webroot); + strcat(wurl, sf); + fd = open(wurl, OREAD); + if(fd<0) + continue; + dir = dirfstat(fd); + if(dir == nil){ + close(fd); + continue; + } + close(fd); + snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers); + siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999); + free(dir); + if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0) + sf = sf+prefix; + hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf); + newhint++; +continuei: ; + } + if(newhint) + hprint(hout, "Fresh: have/%d\r\n", j); +} + |