summaryrefslogtreecommitdiff
path: root/sys/src/cmd/abaco/util.c
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@gmx.de>2013-07-14 16:31:59 +0200
committercinap_lenrek <cinap_lenrek@gmx.de>2013-07-14 16:31:59 +0200
commit1af732323852c6e6a06f7c3e84899d9055289c62 (patch)
tree4323ecb4fd661a20011acd85083a8c190a63099a /sys/src/cmd/abaco/util.c
parent0ca4c2ea45cc83057e969862d32c02a015da1dbc (diff)
abaco: use uhtml to handle charset conversions
Diffstat (limited to 'sys/src/cmd/abaco/util.c')
-rw-r--r--sys/src/cmd/abaco/util.c130
1 files changed, 9 insertions, 121 deletions
diff --git a/sys/src/cmd/abaco/util.c b/sys/src/cmd/abaco/util.c
index 2a704793e..7b66888f5 100644
--- a/sys/src/cmd/abaco/util.c
+++ b/sys/src/cmd/abaco/util.c
@@ -715,88 +715,22 @@ writeproc(void *v)
sendul(sync, i);
}
-struct {
- char *mime;
- char *tcs;
-}tcstab[] = {
-
-#include "tcs.h"
-
- /* not generated by the script */
- "euc_jp", "jis",
- "euc_kr", "euc-k",
- "windows-874", "tis",
- nil, nil,
-};
-
-enum {
- Winstart = 127,
- Winend = 159
-};
-
-static int winchars[] = {
- 8226, /* 8226 is a bullet */
- 8226, 8226, 8218, 402, 8222, 8230, 8224, 8225,
- 710, 8240, 352, 8249, 338, 8226, 8226, 8226,
- 8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
- 732, 8482, 353, 8250, 339, 8226, 8226, 376
-};
-
char *
-tcs(char *cs, char *s, long *np)
+uhtml(char *cs, char *s, long *np)
{
Channel *sync;
Exec *e;
- Rune r;
long i, n;
void **a;
- uchar *us;
char buf[BUFSIZE], cmd[50];
- char *t, *u;
+ char *t;
int p[2], q[2];
-
if(s==nil || *s=='\0' || *np==0){
- werrstr("tcs failed: no data");
- return s;
- }
-
- if(cs == nil){
- werrstr("tcs failed: no charset");
+ werrstr("uhtml failed: no data");
return s;
}
- if(cistrncmp(cs, "utf-8", 5)==0 || cistrncmp(cs, "utf8", 4)==0)
- return s;
-
- for(i=0; tcstab[i].mime!=nil; i++)
- if(cistrncmp(cs, tcstab[i].mime, strlen(tcstab[i].mime)) == 0)
- break;
-
- if(tcstab[i].mime == nil){
- fprint(2, "abaco: charset: %s not supported\n", cs);
- goto latin1;
- }
- if(cistrcmp(tcstab[i].tcs, "8859-1")==0 || cistrcmp(tcstab[i].tcs, "ascii")==0){
-latin1:
- n = 0;
- for(us=(uchar*)s; *us; us++)
- n += runelen(*us);
- n++;
- t = emalloc(n);
- for(us=(uchar*)s, u=t; *us; us++){
- if(*us>=Winstart && *us<=Winend)
- *u++ = winchars[*us-Winstart];
- else{
- r = *us;
- u += runetochar(u, &r);
- }
- }
- *u = 0;
- free(s);
- return t;
- }
-
if(pipe(p)<0 || pipe(q)<0)
error("can't create pipe");
@@ -804,7 +738,7 @@ latin1:
if(sync == nil)
error("can't create channel");
- snprint(cmd, sizeof cmd, "tcs -f %s", tcstab[i].tcs);
+ snprint(cmd, sizeof cmd, (cs != nil && *cs != '\0') ? "uhtml -c %s" : "uthml", cs);
e = emalloc(sizeof(Exec));
e->p[0] = p[0];
e->p[1] = p[1];
@@ -818,7 +752,7 @@ latin1:
close(p[0]);
close(q[1]);
- /* in case tcs fails */
+ /* in case uhtml fails */
t = s;
sync = chancreate(sizeof(ulong), 0);
if(sync == nil)
@@ -831,6 +765,7 @@ latin1:
a[3] = (void *)*np;
proccreate(writeproc, a, STACK);
+ i = 0;
s = nil;
while((n = read(q[0], buf, sizeof(buf))) > 0){
s = erealloc(s, i+n+1);
@@ -840,14 +775,14 @@ latin1:
}
n = recvul(sync);
if(n != *np)
- fprint(2, "tcs: did not write %ld; wrote %uld\n", *np, n);
+ fprint(2, "uhtml failed: did not write %ld; wrote %uld\n", *np, n);
*np = i;
chanfree(sync);
close(q[0]);
if(s == nil){
- fprint(2, "tcs failed: can't convert charset=%s to %s\n", cs, tcstab[i].tcs);
+ fprint(2, "uhtml failed: can't convert charset=%s\n", cs);
return t;
}
free(t);
@@ -901,46 +836,6 @@ findctype(char *b, int l, char *keyword, char *s)
return 0;
}
-static
-int
-finddocctype(char *b, int l, char *s)
-{
- char *p, *e;
-
- p = cistrstr(s, "<meta");
- if(!p)
- return -1;
- p += 5;
- e = strchr(s, '>');
- if(!e)
- return -1;
- snprint(b, l, "%.*s", (int)(e-p), p);
- return 0;
-}
-
-static
-int
-findxmltype(char *b, int l, char *s)
-{
- char *p, *e;
-
- p = cistrstr(s, "<?xml ");
- if(!p)
- return -1;
-
- p += 6;
- e = strstr(p, "?>");
- if(!e)
- return -1;
- snprint(b, l, "%.*s", (int)(e-p), p);
-
- return 0;
-}
-
-/*
- * servers can lie about lie about the charset,
- * so we use the charset based on the priority.
- */
char *
convert(Runestr ctype, char *s, long *np)
{
@@ -951,14 +846,7 @@ convert(Runestr ctype, char *s, long *np)
snprint(buf, sizeof(buf), "%.*S", ctype.nr, ctype.r);
findctype(t, sizeof(t), "charset", buf);
}
- if(findxmltype(buf, sizeof(buf), s)==0)
- findctype(t, sizeof(t), "encoding", buf);
- if(finddocctype(buf, sizeof(buf), s) == 0)
- findctype(t, sizeof(t), "charset", buf);
-
- if(*t == '\0')
- strcpy(t, charset);
- return tcs(t, s, np);
+ return uhtml(t, s, np);
}
int