From 679b092ee02429b444b3e8995f6db11b42008dad Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Mon, 12 May 2014 02:38:53 +0200 Subject: htmlfmt: use uhtml for character set conversion --- sys/src/cmd/htmlfmt/dat.h | 2 -- sys/src/cmd/htmlfmt/html.c | 31 ++----------------------------- sys/src/cmd/htmlfmt/main.c | 39 ++++++++++++++++++++++++++++++++------- 3 files changed, 34 insertions(+), 38 deletions(-) (limited to 'sys/src/cmd') diff --git a/sys/src/cmd/htmlfmt/dat.h b/sys/src/cmd/htmlfmt/dat.h index f3b05605c..b23b2125a 100644 --- a/sys/src/cmd/htmlfmt/dat.h +++ b/sys/src/cmd/htmlfmt/dat.h @@ -28,12 +28,10 @@ struct URLwin extern char* url; extern int aflag; extern int width; -extern int defcharset; extern char* loadhtml(int); extern char* readfile(char*, char*, int*); -extern int charset(char*); extern void* emalloc(ulong); extern char* estrdup(char*); extern char* estrstrdup(char*, char*); diff --git a/sys/src/cmd/htmlfmt/html.c b/sys/src/cmd/htmlfmt/html.c index fc3ea4915..5f9ea86be 100644 --- a/sys/src/cmd/htmlfmt/html.c +++ b/sys/src/cmd/htmlfmt/html.c @@ -285,40 +285,13 @@ rerender(URLwin *u) free(t); } -/* - * Somewhat of a hack. Not a full parse, just looks for strings in the beginning - * of the document (cistrstr only looks at first somewhat bytes). - */ -int -charset(char *s) -{ - char *meta, *emeta, *charset; - - if(defcharset == 0) - defcharset = ISO_8859_1; - meta = cistrstr(s, "url, strlen(u->url), ISO_8859_1); - u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo); + rurl = toStr((uchar*)u->url, strlen(u->url), UTF_8); + u->items = parsehtml(b->b, b->n, rurl, u->type, UTF_8, &u->docinfo); // free(rurl); rerender(u); diff --git a/sys/src/cmd/htmlfmt/main.c b/sys/src/cmd/htmlfmt/main.c index f85bbb484..871c658c9 100644 --- a/sys/src/cmd/htmlfmt/main.c +++ b/sys/src/cmd/htmlfmt/main.c @@ -8,7 +8,34 @@ char *url = ""; int aflag; int width = 70; -int defcharset; +char *defcharset = "latin1"; + +int +uhtml(int fd) +{ + int p[2]; + + if(pipe(p) < 0) + return fd; + switch(fork()){ + case -1: + break; + case 0: + dup(fd, 0); + dup(p[1], 1); + close(p[1]); + close(p[0]); + execl("/bin/uhtml", "uhtml", "-c", defcharset, nil); + execl("/bin/cat", "cat", nil); + exits("exec"); + default: + dup(p[0], fd); + break; + } + close(p[0]); + close(p[1]); + return fd; +} void usage(void) @@ -21,7 +48,7 @@ void main(int argc, char *argv[]) { int i, fd; - char *p, *err, *file; + char *err, *file; char errbuf[ERRMAX]; ARGBEGIN{ @@ -29,9 +56,7 @@ main(int argc, char *argv[]) aflag++; break; case 'c': - p = smprint("", EARGF(usage())); - defcharset = charset(p); - free(p); + defcharset = EARGF(usage()); break; case 'l': case 'w': err = EARGF(usage()); @@ -50,7 +75,7 @@ main(int argc, char *argv[]) err = nil; file = ""; if(argc == 0) - err = loadhtml(0); + err = loadhtml(uhtml(0)); else for(i=0; err==nil && i