diff options
author | cinap_lenrek <cinap_lenrek@gmx.de> | 2013-07-14 16:31:59 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@gmx.de> | 2013-07-14 16:31:59 +0200 |
commit | 1af732323852c6e6a06f7c3e84899d9055289c62 (patch) | |
tree | 4323ecb4fd661a20011acd85083a8c190a63099a /sys/src/cmd/abaco/util.c | |
parent | 0ca4c2ea45cc83057e969862d32c02a015da1dbc (diff) |
abaco: use uhtml to handle charset conversions
Diffstat (limited to 'sys/src/cmd/abaco/util.c')
-rw-r--r-- | sys/src/cmd/abaco/util.c | 130 |
1 files changed, 9 insertions, 121 deletions
diff --git a/sys/src/cmd/abaco/util.c b/sys/src/cmd/abaco/util.c index 2a704793e..7b66888f5 100644 --- a/sys/src/cmd/abaco/util.c +++ b/sys/src/cmd/abaco/util.c @@ -715,88 +715,22 @@ writeproc(void *v) sendul(sync, i); } -struct { - char *mime; - char *tcs; -}tcstab[] = { - -#include "tcs.h" - - /* not generated by the script */ - "euc_jp", "jis", - "euc_kr", "euc-k", - "windows-874", "tis", - nil, nil, -}; - -enum { - Winstart = 127, - Winend = 159 -}; - -static int winchars[] = { - 8226, /* 8226 is a bullet */ - 8226, 8226, 8218, 402, 8222, 8230, 8224, 8225, - 710, 8240, 352, 8249, 338, 8226, 8226, 8226, - 8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212, - 732, 8482, 353, 8250, 339, 8226, 8226, 376 -}; - char * -tcs(char *cs, char *s, long *np) +uhtml(char *cs, char *s, long *np) { Channel *sync; Exec *e; - Rune r; long i, n; void **a; - uchar *us; char buf[BUFSIZE], cmd[50]; - char *t, *u; + char *t; int p[2], q[2]; - if(s==nil || *s=='\0' || *np==0){ - werrstr("tcs failed: no data"); - return s; - } - - if(cs == nil){ - werrstr("tcs failed: no charset"); + werrstr("uhtml failed: no data"); return s; } - if(cistrncmp(cs, "utf-8", 5)==0 || cistrncmp(cs, "utf8", 4)==0) - return s; - - for(i=0; tcstab[i].mime!=nil; i++) - if(cistrncmp(cs, tcstab[i].mime, strlen(tcstab[i].mime)) == 0) - break; - - if(tcstab[i].mime == nil){ - fprint(2, "abaco: charset: %s not supported\n", cs); - goto latin1; - } - if(cistrcmp(tcstab[i].tcs, "8859-1")==0 || cistrcmp(tcstab[i].tcs, "ascii")==0){ -latin1: - n = 0; - for(us=(uchar*)s; *us; us++) - n += runelen(*us); - n++; - t = emalloc(n); - for(us=(uchar*)s, u=t; *us; us++){ - if(*us>=Winstart && *us<=Winend) - *u++ = winchars[*us-Winstart]; - else{ - r = *us; - u += runetochar(u, &r); - } - } - *u = 0; - free(s); - return t; - } - if(pipe(p)<0 || pipe(q)<0) error("can't create pipe"); @@ -804,7 +738,7 @@ latin1: if(sync == nil) error("can't create channel"); - snprint(cmd, sizeof cmd, "tcs -f %s", tcstab[i].tcs); + snprint(cmd, sizeof cmd, (cs != nil && *cs != '\0') ? "uhtml -c %s" : "uthml", cs); e = emalloc(sizeof(Exec)); e->p[0] = p[0]; e->p[1] = p[1]; @@ -818,7 +752,7 @@ latin1: close(p[0]); close(q[1]); - /* in case tcs fails */ + /* in case uhtml fails */ t = s; sync = chancreate(sizeof(ulong), 0); if(sync == nil) @@ -831,6 +765,7 @@ latin1: a[3] = (void *)*np; proccreate(writeproc, a, STACK); + i = 0; s = nil; while((n = read(q[0], buf, sizeof(buf))) > 0){ s = erealloc(s, i+n+1); @@ -840,14 +775,14 @@ latin1: } n = recvul(sync); if(n != *np) - fprint(2, "tcs: did not write %ld; wrote %uld\n", *np, n); + fprint(2, "uhtml failed: did not write %ld; wrote %uld\n", *np, n); *np = i; chanfree(sync); close(q[0]); if(s == nil){ - fprint(2, "tcs failed: can't convert charset=%s to %s\n", cs, tcstab[i].tcs); + fprint(2, "uhtml failed: can't convert charset=%s\n", cs); return t; } free(t); @@ -901,46 +836,6 @@ findctype(char *b, int l, char *keyword, char *s) return 0; } -static -int -finddocctype(char *b, int l, char *s) -{ - char *p, *e; - - p = cistrstr(s, "<meta"); - if(!p) - return -1; - p += 5; - e = strchr(s, '>'); - if(!e) - return -1; - snprint(b, l, "%.*s", (int)(e-p), p); - return 0; -} - -static -int -findxmltype(char *b, int l, char *s) -{ - char *p, *e; - - p = cistrstr(s, "<?xml "); - if(!p) - return -1; - - p += 6; - e = strstr(p, "?>"); - if(!e) - return -1; - snprint(b, l, "%.*s", (int)(e-p), p); - - return 0; -} - -/* - * servers can lie about lie about the charset, - * so we use the charset based on the priority. - */ char * convert(Runestr ctype, char *s, long *np) { @@ -951,14 +846,7 @@ convert(Runestr ctype, char *s, long *np) snprint(buf, sizeof(buf), "%.*S", ctype.nr, ctype.r); findctype(t, sizeof(t), "charset", buf); } - if(findxmltype(buf, sizeof(buf), s)==0) - findctype(t, sizeof(t), "encoding", buf); - if(finddocctype(buf, sizeof(buf), s) == 0) - findctype(t, sizeof(t), "charset", buf); - - if(*t == '\0') - strcpy(t, charset); - return tcs(t, s, np); + return uhtml(t, s, np); } int |