From a31e4f61a4c9afb9696a7ff4ff09e02b3281a2f3 Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Tue, 20 Sep 2011 00:38:28 +0200 Subject: uhtml: add html to unicode converter, used by mothra and page/html2ms --- sys/src/cmd/uhtml.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 sys/src/cmd/uhtml.c (limited to 'sys/src/cmd/uhtml.c') diff --git a/sys/src/cmd/uhtml.c b/sys/src/cmd/uhtml.c new file mode 100644 index 000000000..49503877e --- /dev/null +++ b/sys/src/cmd/uhtml.c @@ -0,0 +1,121 @@ +#include +#include +#include + +int nbuf; +char buf[4096+1]; +char *cset = "utf"; + +void +usage(void) +{ + fprint(2, "%s [ -h ] [ -c charset ] [ file ]\n", argv0); + exits("usage"); +} + +char* +strval(char *s) +{ + char *e, q; + + while(strchr("\t ", *s)) + s++; + q = 0; + if(*s == '"' || *s == '\'') + q = *s++; + for(e = s; *e; e++){ + if(*e == q) + break; + if(isalnum(*e)) + continue; + if(*e == '-' || *e == '_') + continue; + break; + } + if(e - s > 1) + return smprint("%.*s", (int)(e-s), s); + return nil; +} + +void +main(int argc, char *argv[]) +{ + int pfd[2], pflag = 0; + char *arg[4], *s; + + ARGBEGIN { + case 'h': + usage(); + case 'c': + cset = EARGF(usage()); + break; + case 'p': + pflag = 1; + break; + } ARGEND; + + if(*argv){ + close(0); + if(open(*argv, OREAD) != 1) + sysfatal("open: %r"); + } + if((nbuf = read(0, buf, sizeof(buf)-1)) < 0) + sysfatal("read: %r"); + buf[nbuf] = 0; + for(;;){ + if(s = cistrstr(buf, "encoding=")) + if(s = strval(s+9)){ + cset = s; + break; + } + if(s = cistrstr(buf, "charset=")) + if(s = strval(s+8)){ + cset = s; + break; + } + break; + } + + if(pflag){ + print("%s\n", cset); + exits(0); + } + + if(pipe(pfd) < 0) + sysfatal("pipe: %r"); + + if(nbuf == 0){ + write(1, buf, 0); + exits(0); + } + + switch(rfork(RFFDG|RFREND|RFPROC|RFNOWAIT)){ + case -1: + sysfatal("fork: %r"); + case 0: + dup(pfd[0], 0); + close(pfd[0]); + close(pfd[1]); + + arg[0] = "rc"; + arg[1] = "-c"; + if(strcmp(cset, "utf")) + arg[2] = smprint("tcs -f %s -t utf | tcs -f html -t utf", cset); + else + arg[2] = "tcs -f html -t utf"; + arg[3] = nil; + exec("/bin/rc", arg); + } + + dup(pfd[1], 1); + close(pfd[0]); + close(pfd[1]); + + while(nbuf > 0){ + if(write(1, buf, nbuf) != nbuf) + sysfatal("write: %r"); + if((nbuf = read(0, buf, sizeof(buf))) < 0) + sysfatal("read: %r"); + } + exits(0); +} -- cgit v1.2.3