diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2014-05-12 02:38:53 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2014-05-12 02:38:53 +0200 |
commit | 679b092ee02429b444b3e8995f6db11b42008dad (patch) | |
tree | b5d9321c93474ca4c8c356cac87034386c63f666 /sys/src/cmd/htmlfmt | |
parent | 66f76c28212d3a25d4b039de2ce817fc74c5ca1e (diff) |
htmlfmt: use uhtml for character set conversion
Diffstat (limited to 'sys/src/cmd/htmlfmt')
-rw-r--r-- | sys/src/cmd/htmlfmt/dat.h | 2 | ||||
-rw-r--r-- | sys/src/cmd/htmlfmt/html.c | 31 | ||||
-rw-r--r-- | sys/src/cmd/htmlfmt/main.c | 39 |
3 files changed, 34 insertions, 38 deletions
diff --git a/sys/src/cmd/htmlfmt/dat.h b/sys/src/cmd/htmlfmt/dat.h index f3b05605c..b23b2125a 100644 --- a/sys/src/cmd/htmlfmt/dat.h +++ b/sys/src/cmd/htmlfmt/dat.h @@ -28,12 +28,10 @@ struct URLwin extern char* url; extern int aflag; extern int width; -extern int defcharset; extern char* loadhtml(int); extern char* readfile(char*, char*, int*); -extern int charset(char*); extern void* emalloc(ulong); extern char* estrdup(char*); extern char* estrstrdup(char*, char*); diff --git a/sys/src/cmd/htmlfmt/html.c b/sys/src/cmd/htmlfmt/html.c index fc3ea4915..5f9ea86be 100644 --- a/sys/src/cmd/htmlfmt/html.c +++ b/sys/src/cmd/htmlfmt/html.c @@ -285,40 +285,13 @@ rerender(URLwin *u) free(t); } -/* - * Somewhat of a hack. Not a full parse, just looks for strings in the beginning - * of the document (cistrstr only looks at first somewhat bytes). - */ -int -charset(char *s) -{ - char *meta, *emeta, *charset; - - if(defcharset == 0) - defcharset = ISO_8859_1; - meta = cistrstr(s, "<meta"); - if(meta == nil) - return defcharset; - for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++) - ; - charset = cistrstr(s, "charset="); - if(charset == nil) - return defcharset; - charset += 8; - if(*charset == '"') - charset++; - if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4)) - return UTF_8; - return defcharset; -} - void rendertext(URLwin *u, Bytes *b) { Rune *rurl; - rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1); - u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo); + rurl = toStr((uchar*)u->url, strlen(u->url), UTF_8); + u->items = parsehtml(b->b, b->n, rurl, u->type, UTF_8, &u->docinfo); // free(rurl); rerender(u); diff --git a/sys/src/cmd/htmlfmt/main.c b/sys/src/cmd/htmlfmt/main.c index f85bbb484..871c658c9 100644 --- a/sys/src/cmd/htmlfmt/main.c +++ b/sys/src/cmd/htmlfmt/main.c @@ -8,7 +8,34 @@ char *url = ""; int aflag; int width = 70; -int defcharset; +char *defcharset = "latin1"; + +int +uhtml(int fd) +{ + int p[2]; + + if(pipe(p) < 0) + return fd; + switch(fork()){ + case -1: + break; + case 0: + dup(fd, 0); + dup(p[1], 1); + close(p[1]); + close(p[0]); + execl("/bin/uhtml", "uhtml", "-c", defcharset, nil); + execl("/bin/cat", "cat", nil); + exits("exec"); + default: + dup(p[0], fd); + break; + } + close(p[0]); + close(p[1]); + return fd; +} void usage(void) @@ -21,7 +48,7 @@ void main(int argc, char *argv[]) { int i, fd; - char *p, *err, *file; + char *err, *file; char errbuf[ERRMAX]; ARGBEGIN{ @@ -29,9 +56,7 @@ main(int argc, char *argv[]) aflag++; break; case 'c': - p = smprint("<meta charset=\"%s\">", EARGF(usage())); - defcharset = charset(p); - free(p); + defcharset = EARGF(usage()); break; case 'l': case 'w': err = EARGF(usage()); @@ -50,7 +75,7 @@ main(int argc, char *argv[]) err = nil; file = "<stdin>"; if(argc == 0) - err = loadhtml(0); + err = loadhtml(uhtml(0)); else for(i=0; err==nil && i<argc; i++){ file = argv[i]; @@ -60,7 +85,7 @@ main(int argc, char *argv[]) err = errbuf; break; } - err = loadhtml(fd); + err = loadhtml(uhtml(fd)); close(fd); if(err) break; |