diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2013-11-24 11:56:33 +0100 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2013-11-24 11:56:33 +0100 |
commit | f2bd1de5bdba2449f01085984483702adb833fea (patch) | |
tree | fa9555065cbf71382d4a8a11037e7aaab333560f | |
parent | 3720b5ab9c4cb485c64e83d8af740aea3680123b (diff) |
webfs: support for internationalized domain name urls
-rw-r--r-- | sys/src/cmd/webfs/dat.h | 4 | ||||
-rw-r--r-- | sys/src/cmd/webfs/fns.h | 5 | ||||
-rw-r--r-- | sys/src/cmd/webfs/fs.c | 1 | ||||
-rw-r--r-- | sys/src/cmd/webfs/http.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/webfs/idn.c | 267 | ||||
-rw-r--r-- | sys/src/cmd/webfs/mkfile | 2 | ||||
-rw-r--r-- | sys/src/cmd/webfs/url.c | 39 |
7 files changed, 313 insertions, 7 deletions
diff --git a/sys/src/cmd/webfs/dat.h b/sys/src/cmd/webfs/dat.h index b6aab58e8..3f9ac3f38 100644 --- a/sys/src/cmd/webfs/dat.h +++ b/sys/src/cmd/webfs/dat.h @@ -68,3 +68,7 @@ int debug; Url *proxy; int timeout; char *whitespace; + +enum { + Domlen = 256, +}; diff --git a/sys/src/cmd/webfs/fns.h b/sys/src/cmd/webfs/fns.h index 655a9fe49..3fa5596a7 100644 --- a/sys/src/cmd/webfs/fns.h +++ b/sys/src/cmd/webfs/fns.h @@ -16,6 +16,7 @@ char* unquote(char *s, char **ps); #pragma varargck type "E" Str2 int Efmt(Fmt*); +int Hfmt(Fmt*); int Ufmt(Fmt*); char* Upath(Url *); Url* url(char *s, Url *b); @@ -23,6 +24,10 @@ Url* saneurl(Url *u); int matchurl(Url *u, Url *s); void freeurl(Url *u); +/* idn */ +char* idn2utf(char *name, char *buf, int nbuf); +char* utf2idn(char *name, char *buf, int nbuf); + /* buq */ int buread(Buq *q, void *v, int l); int buwrite(Buq *q, void *v, int l); diff --git a/sys/src/cmd/webfs/fs.c b/sys/src/cmd/webfs/fs.c index fd8b36dab..21d6f361e 100644 --- a/sys/src/cmd/webfs/fs.c +++ b/sys/src/cmd/webfs/fs.c @@ -765,6 +765,7 @@ main(int argc, char *argv[]) quotefmtinstall(); fmtinstall('U', Ufmt); fmtinstall('E', Efmt); + fmtinstall('H', Hfmt); srv = nil; mtpt = "/mnt/web"; diff --git a/sys/src/cmd/webfs/http.c b/sys/src/cmd/webfs/http.c index e659bd725..3e023be4d 100644 --- a/sys/src/cmd/webfs/http.c +++ b/sys/src/cmd/webfs/http.c @@ -573,7 +573,7 @@ http(char *m, Url *u, Key *shdr, Buq *qbody, Buq *qpost) ru.path = Upath(u); ru.query = u->query; } - n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %s%s%s\r\n", + n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %H%s%s\r\n", method, &ru, u->host, u->port ? ":" : "", u->port ? u->port : ""); if(n >= sizeof(buf)-64){ werrstr("request too large"); diff --git a/sys/src/cmd/webfs/idn.c b/sys/src/cmd/webfs/idn.c new file mode 100644 index 000000000..b01d54107 --- /dev/null +++ b/sys/src/cmd/webfs/idn.c @@ -0,0 +1,267 @@ +#include <u.h> +#include <libc.h> +#include <ctype.h> +#include <fcall.h> +#include <thread.h> +#include <9p.h> + +#include "dat.h" +#include "fns.h" + +enum { + base = 36, + tmin = 1, + tmax = 26, + skew = 38, + damp = 700, + initial_bias = 72, + initial_n = 0x80, +}; + +static uint maxint = ~0; + +static uint +decode_digit(uint cp) +{ + if((cp - '0') < 10) + return cp - ('0' - 26); + if((cp - 'A') < 26) + return cp - 'A'; + if((cp - 'a') < 26) + return cp - 'a'; + return base; +} + +static char +encode_digit(uint d, int flag) +{ + if(d < 26) + return d + (flag ? 'A' : 'a'); + return d + ('0' - 26); +} + +static uint +adapt(uint delta, uint numpoints, int firsttime) +{ + uint k; + + delta = firsttime ? delta / damp : delta >> 1; + delta += delta / numpoints; + for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) + delta /= base - tmin; + return k + (base - tmin + 1) * delta / (delta + skew); +} + +static int +punyencode(uint input_length, Rune input[], uint max_out, char output[]) +{ + uint n, delta, h, b, out, bias, j, m, q, k, t; + + n = initial_n; + delta = out = 0; + bias = initial_bias; + + for (j = 0; j < input_length; ++j) { + if ((uint)input[j] < 0x80) { + if (max_out - out < 2) + return -1; + output[out++] = input[j]; + } + } + + h = b = out; + + if (b > 0) + output[out++] = '-'; + + while (h < input_length) { + for (m = maxint, j = 0; j < input_length; ++j) { + if (input[j] >= n && input[j] < m) + m = input[j]; + } + + if (m - n > (maxint - delta) / (h + 1)) + return -1; + + delta += (m - n) * (h + 1); + n = m; + + for (j = 0; j < input_length; ++j) { + if (input[j] < n) { + if (++delta == 0) + return -1; + } + + if (input[j] == n) { + for (q = delta, k = base;; k += base) { + if (out >= max_out) + return -1; + if (k <= bias) + t = tmin; + else if (k >= bias + tmax) + t = tmax; + else + t = k - bias; + if (q < t) + break; + output[out++] = encode_digit(t + (q - t) % (base - t), 0); + q = (q - t) / (base - t); + } + output[out++] = encode_digit(q, isupperrune(input[j])); + bias = adapt(delta, h + 1, h == b); + delta = 0; + ++h; + } + } + + ++delta, ++n; + } + + return (int)out; +} + +static int +punydecode(uint input_length, char input[], uint max_out, Rune output[]) +{ + uint n, out, i, bias, b, j, in, oldi, w, k, digit, t; + + n = initial_n; + out = i = 0; + bias = initial_bias; + + for (b = j = 0; j < input_length; ++j) + if (input[j] == '-') + b = j; + + if (b > max_out) + return -1; + + for (j = 0; j < b; ++j) { + if (input[j] & 0x80) + return -1; + output[out++] = input[j]; + } + + for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) { + for (oldi = i, w = 1, k = base;; k += base) { + if (in >= input_length) + return -1; + digit = decode_digit(input[in++]); + if (digit >= base) + return -1; + if (digit > (maxint - i) / w) + return -1; + i += digit * w; + if (k <= bias) + t = tmin; + else if (k >= bias + tmax) + t = tmax; + else + t = k - bias; + if (digit < t) + break; + if (w > maxint / (base - t)) + return -1; + w *= (base - t); + } + + bias = adapt(i - oldi, out + 1, oldi == 0); + + if (i / (out + 1) > maxint - n) + return -1; + n += i / (out + 1); + i %= (out + 1); + + if (out >= max_out) + return -1; + + memmove(output + i + 1, output + i, (out - i) * sizeof *output); + if(((uint)input[in-1] - 'A') < 26) + output[i++] = toupperrune(n); + else + output[i++] = tolowerrune(n); + } + + return (int)out; +} + +/* + * convert punycode encoded internationalized + * domain name to unicode string + */ +char* +idn2utf(char *name, char *buf, int nbuf) +{ + char *dp, *de, *cp; + Rune rb[Domlen], r; + int nc, nr, n; + + cp = name; + dp = buf; + de = dp+nbuf-1; + for(;;){ + nc = nr = 0; + while(cp[nc] != 0){ + n = chartorune(&r, cp+nc); + if(r == '.') + break; + rb[nr++] = r; + nc += n; + } + if(cistrncmp(cp, "xn--", 4) == 0) + if((nr = punydecode(nc-4, cp+4, nelem(rb), rb)) < 0) + return nil; + dp = seprint(dp, de, "%.*S", nr, rb); + if(dp >= de) + return nil; + if(cp[nc] == 0) + break; + *dp++ = '.'; + cp += nc+1; + } + *dp = 0; + return buf; +} + +/* + * convert unicode string to punycode + * encoded internationalized domain name + */ +char* +utf2idn(char *name, char *buf, int nbuf) +{ + char *dp, *de, *cp; + Rune rb[Domlen], r; + int nc, nr, n; + + dp = buf; + de = dp+nbuf-1; + cp = name; + for(;;){ + nc = nr = 0; + while(cp[nc] != 0 && nr < nelem(rb)){ + n = chartorune(&r, cp+nc); + if(r == '.') + break; + rb[nr++] = r; + nc += n; + } + if(nc == nr) + dp = seprint(dp, de, "%.*s", nc, cp); + else { + dp = seprint(dp, de, "xn--"); + if((n = punyencode(nr, rb, de - dp, dp)) < 0) + return nil; + dp += n; + } + if(dp >= de) + return nil; + if(cp[nc] == 0) + break; + *dp++ = '.'; + cp += nc+1; + } + *dp = 0; + return buf; +} + diff --git a/sys/src/cmd/webfs/mkfile b/sys/src/cmd/webfs/mkfile index 08993cf32..974a45349 100644 --- a/sys/src/cmd/webfs/mkfile +++ b/sys/src/cmd/webfs/mkfile @@ -3,6 +3,6 @@ BIN=/$objtype/bin TARG=webfs HFILES=fns.h dat.h -OFILES=sub.$O url.$O buq.$O http.$O fs.$O +OFILES=sub.$O url.$O buq.$O http.$O fs.$O idn.$O </sys/src/cmd/mkone diff --git a/sys/src/cmd/webfs/url.c b/sys/src/cmd/webfs/url.c index 784a67393..ebd538dbf 100644 --- a/sys/src/cmd/webfs/url.c +++ b/sys/src/cmd/webfs/url.c @@ -69,6 +69,21 @@ Efmt(Fmt *f) } int +Hfmt(Fmt *f) +{ + char *d, *s; + + s = va_arg(f->args, char*); + d = emalloc(Domlen); + if(utf2idn(s, d, Domlen) == nil) + d = s; + fmtprint(f, "%s", d); + if(d != s) + free(d); + return 0; +} + +int Ufmt(Fmt *f) { char *s; @@ -87,7 +102,7 @@ Ufmt(Fmt *f) fmtprint(f, "@"); } if(u->host){ - fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%s", u->host); + fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%H", u->host); if(u->port) fmtprint(f, ":%s", u->port); } @@ -184,12 +199,17 @@ pstrdup(char **p) static char* mklowcase(char *s) { - char *p; - + char *cp; + Rune r; + if(s == nil) return s; - for(p = s; *p; p++) - *p = tolower(*p); + cp = s; + while(*cp != 0){ + chartorune(&r, cp); + r = tolowerrune(r); + cp += runetochar(cp, &r); + } return s; } @@ -299,6 +319,15 @@ Out: while(s = strchr(s, '+')) *s++ = ' '; + if(s = u->host){ + t = emalloc(Domlen); + if(idn2utf(s, t, Domlen)){ + u->host = estrdup(t); + free(s); + } + free(t); + } + unescape(u->user, ""); unescape(u->pass, ""); unescape(u->path, "/"); |