diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/html2ms.c |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/html2ms.c')
-rwxr-xr-x | sys/src/cmd/html2ms.c | 601 |
1 files changed, 601 insertions, 0 deletions
diff --git a/sys/src/cmd/html2ms.c b/sys/src/cmd/html2ms.c new file mode 100755 index 000000000..9cd8fa4a8 --- /dev/null +++ b/sys/src/cmd/html2ms.c @@ -0,0 +1,601 @@ +#include <u.h> +#include <libc.h> +#include <ctype.h> +#include <bio.h> + +enum +{ + SSIZE = 10, + + /* list types */ + Lordered = 0, + Lunordered, + Lmenu, + Ldir, + +}; + +Biobuf in, out; +int lastc = '\n'; +int inpre = 0; + +/* stack for fonts */ +char *fontstack[SSIZE]; +char *font = "R"; +int fsp; + +/* stack for lists */ +struct +{ + int type; + int ord; +} liststack[SSIZE]; +int lsp; + +int quoting; + +typedef struct Goobie Goobie; +struct Goobie +{ + char *name; + void (*f)(Goobie*, char*); + void (*ef)(Goobie*, char*); +}; + +void eatwhite(void); +void escape(void); + +typedef void Action(Goobie*, char*); + +Action g_ignore; +Action g_unexpected; +Action g_title; +Action g_p; +Action g_h; +Action g_li; +Action g_list, g_listend; +Action g_pre; +Action g_fpush, g_fpop; +Action g_indent, g_exdent; +Action g_dt; +Action g_display; +Action g_displayend; +Action g_table, g_tableend, g_caption, g_captionend; +Action g_br, g_hr; + +Goobie gtab[] = +{ + "!--", g_ignore, g_unexpected, + "!doctype", g_ignore, g_unexpected, + "a", g_ignore, g_ignore, + "address", g_display, g_displayend, + "b", g_fpush, g_fpop, + "base", g_ignore, g_unexpected, + "blink", g_ignore, g_ignore, + "blockquote", g_ignore, g_ignore, + "body", g_ignore, g_ignore, + "br", g_br, g_unexpected, + "caption", g_caption, g_captionend, + "center", g_ignore, g_ignore, + "cite", g_ignore, g_ignore, + "code", g_ignore, g_ignore, + "dd", g_ignore, g_unexpected, + "dfn", g_ignore, g_ignore, + "dir", g_list, g_listend, + "dl", g_indent, g_exdent, + "dt", g_dt, g_unexpected, + "em", g_ignore, g_ignore, + "font", g_ignore, g_ignore, + "form", g_ignore, g_ignore, + "h1", g_h, g_p, + "h2", g_h, g_p, + "h3", g_h, g_p, + "h4", g_h, g_p, + "h5", g_h, g_p, + "h6", g_h, g_p, + "head", g_ignore, g_ignore, + "hr", g_hr, g_unexpected, + "html", g_ignore, g_ignore, + "i", g_fpush, g_fpop, + "input", g_ignore, g_unexpected, + "img", g_ignore, g_unexpected, + "isindex", g_ignore, g_unexpected, + "kbd", g_fpush, g_fpop, + "key", g_ignore, g_ignore, + "li", g_li, g_unexpected, + "link", g_ignore, g_unexpected, + "listing", g_ignore, g_ignore, + "menu", g_list, g_listend, + "meta", g_ignore, g_unexpected, + "nextid", g_ignore, g_unexpected, + "ol", g_list, g_listend, + "option", g_ignore, g_unexpected, + "p", g_p, g_ignore, + "plaintext", g_ignore, g_unexpected, + "pre", g_pre, g_displayend, + "samp", g_ignore, g_ignore, + "select", g_ignore, g_ignore, + "strong", g_ignore, g_ignore, + "table", g_table, g_tableend, + "textarea", g_ignore, g_ignore, + "title", g_title, g_ignore, + "tt", g_fpush, g_fpop, + "u", g_ignore, g_ignore, + "ul", g_list, g_listend, + "var", g_ignore, g_ignore, + "xmp", g_ignore, g_ignore, + 0, 0, 0, +}; + +typedef struct Entity Entity; +struct Entity +{ + char *name; + Rune value; +}; + +Entity pl_entity[]= +{ +"#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"', +"AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å', +"Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É', +"Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î', +"Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô', +"Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ', +"Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý', +"aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&', +"aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é', +"ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>', +"iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<', +"ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø', +"otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú', +"ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ', +0 +}; + +int +cistrcmp(char *a, char *b) +{ + int c, d; + + for(;; a++, b++){ + d = tolower(*a); + c = d - tolower(*b); + if(c) + break; + if(d == 0) + break; + } + return c; +} + +int +readupto(char *buf, int n, char d, char notme) +{ + char *p; + int c; + + buf[0] = 0; + for(p = buf;; p++){ + c = Bgetc(&in); + if(c < 0){ + *p = 0; + return -1; + } + if(c == notme){ + Bungetc(&in); + return -1; + } + if(c == d){ + *p = 0; + return 0; + } + *p = c; + if(p == buf + n){ + *p = 0; + Bprint(&out, "<%s", buf); + return -1; + } + } +} + +void +dogoobie(void) +{ + char *arg, *type; + Goobie *g; + char buf[1024]; + int closing; + + if(readupto(buf, sizeof(buf), '>', '<') < 0){ + Bprint(&out, "<%s", buf); + return; + } + type = buf; + if(*type == '/'){ + type++; + closing = 1; + } else + closing = 0; + arg = strchr(type, ' '); + if(arg == 0) + arg = strchr(type, '\r'); + if(arg == 0) + arg = strchr(type, '\n'); + if(arg) + *arg++ = 0; + for(g = gtab; g->name; g++) + if(cistrcmp(type, g->name) == 0){ + if(closing){ + if(g->ef){ + (*g->ef)(g, arg); + return; + } + } else { + if(g->f){ + (*g->f)(g, arg); + return; + } + } + } + if(closing) + type--; + if(arg) + Bprint(&out, "<%s %s>\n", type, arg); + else + Bprint(&out, "<%s>\n", type); +} + +void +main(void) +{ + int c, pos; + + Binit(&in, 0, OREAD); + Binit(&out, 1, OWRITE); + + pos = 0; + for(;;){ + c = Bgetc(&in); + if(c < 0) + return; + switch(c){ + case '<': + dogoobie(); + break; + case '&': + escape(); + break; + case '\r': + pos = 0; + break; + case '\n': + if(quoting){ + Bputc(&out, '"'); + quoting = 0; + } + if(lastc != '\n') + Bputc(&out, '\n'); + /* can't emit leading spaces in filled troff docs */ + if (!inpre) + eatwhite(); + lastc = c; + break; + default: + ++pos; + if(!inpre && isascii(c) && isspace(c) && pos > 80){ + Bputc(&out, '\n'); + eatwhite(); + pos = 0; + }else + Bputc(&out, c); + lastc = c; + break; + } + } +} + +void +escape(void) +{ + int c; + Entity *e; + char buf[8]; + + if(readupto(buf, sizeof(buf), ';', '\n') < 0){ + Bprint(&out, "&%s", buf); + return; + } + for(e = pl_entity; e->name; e++) + if(strcmp(buf, e->name) == 0){ + Bprint(&out, "%C", e->value); + return; + } + if(*buf == '#'){ + c = atoi(buf+1); + if(isascii(c) && isprint(c)){ + Bputc(&out, c); + return; + } + } + Bprint(&out, "&%s;", buf); +} + +/* + * whitespace is not significant to HTML, but newlines + * and leading spaces are significant to troff. + */ +void +eatwhite(void) +{ + int c; + + for(;;){ + c = Bgetc(&in); + if(c < 0) + break; + if(!isspace(c)){ + Bungetc(&in); + break; + } + } +} + +/* + * print at start of line + */ +void +printsol(char *fmt, ...) +{ + va_list arg; + + if(quoting){ + Bputc(&out, '"'); + quoting = 0; + } + if(lastc != '\n') + Bputc(&out, '\n'); + va_start(arg, fmt); + Bvprint(&out, fmt, arg); + va_end(arg); + lastc = '\n'; +} + +void +g_ignore(Goobie *g, char *arg) +{ + USED(g, arg); +} + +void +g_unexpected(Goobie *g, char *arg) +{ + USED(arg); + fprint(2, "unexpected %s ending\n", g->name); +} + +void +g_title(Goobie *g, char *arg) +{ + USED(arg); + printsol(".TL\n", g->name); +} + +void +g_p(Goobie *g, char *arg) +{ + USED(arg); + printsol(".LP\n", g->name); +} + +void +g_h(Goobie *g, char *arg) +{ + USED(arg); + printsol(".SH %c\n", g->name[1]); +} + +void +g_list(Goobie *g, char *arg) +{ + USED(arg); + + if(lsp != SSIZE){ + switch(g->name[0]){ + case 'o': + liststack[lsp].type = Lordered; + liststack[lsp].ord = 0; + break; + default: + liststack[lsp].type = Lunordered; + break; + } + } + lsp++; +} + +void +g_br(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".br\n"); +} + +void +g_li(Goobie *g, char *arg) +{ + USED(g, arg); + if(lsp <= 0 || lsp > SSIZE){ + printsol(".IP \\(bu\n"); + return; + } + switch(liststack[lsp-1].type){ + case Lunordered: + printsol(".IP \\(bu\n"); + break; + case Lordered: + printsol(".IP %d\n", ++liststack[lsp-1].ord); + break; + } +} + +void +g_listend(Goobie *g, char *arg) +{ + USED(g, arg); + if(--lsp < 0) + lsp = 0; + printsol(".LP\n"); +} + +void +g_display(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".DS\n"); +} + +void +g_pre(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".DS L\n"); + inpre = 1; +} + +void +g_displayend(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".DE\n"); + inpre = 0; +} + +void +g_fpush(Goobie *g, char *arg) +{ + USED(arg); + if(fsp < SSIZE) + fontstack[fsp] = font; + fsp++; + switch(g->name[0]){ + case 'b': + font = "B"; + break; + case 'i': + font = "I"; + break; + case 'k': /* kbd */ + case 't': /* tt */ + font = "(CW"; + break; + } + Bprint(&out, "\\f%s", font); +} + +void +g_fpop(Goobie *g, char *arg) +{ + USED(g, arg); + fsp--; + if(fsp < SSIZE) + font = fontstack[fsp]; + else + font = "R"; + + Bprint(&out, "\\f%s", font); +} + +void +g_indent(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".RS\n"); +} + +void +g_exdent(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".RE\n"); +} + +void +g_dt(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".IP \""); + quoting = 1; +} + +void +g_hr(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".br\n"); + printsol("\\l'5i'\n"); +} + + +/* +<table border> +<caption><font size="+1"><b>Cumulative Class Data</b></font></caption> +<tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th> +</tr> +<tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th> +</tr> +<tr align=center> +<td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +<tr align=center> +<td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> +</tr> +</table> +*/ + +void +g_table(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".TS\ncenter ;\n"); +} + +void +g_tableend(Goobie *g, char *arg) +{ + USED(g, arg); + printsol(".TE\n"); +} + +void +g_caption(Goobie *g, char *arg) +{ + USED(g, arg); +} + +void +g_captionend(Goobie *g, char *arg) +{ + USED(g, arg); +} |