summaryrefslogtreecommitdiff
path: root/sys/src/cmd/troff2html/troff2html.c
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/troff2html/troff2html.c
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/troff2html/troff2html.c')
-rwxr-xr-xsys/src/cmd/troff2html/troff2html.c795
1 files changed, 795 insertions, 0 deletions
diff --git a/sys/src/cmd/troff2html/troff2html.c b/sys/src/cmd/troff2html/troff2html.c
new file mode 100755
index 000000000..ad134c4a7
--- /dev/null
+++ b/sys/src/cmd/troff2html/troff2html.c
@@ -0,0 +1,795 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+enum{
+ Nfont = 11,
+ Wid = 20, /* tmac.anhtml sets page width to 20" so we can recognize .nf text */
+};
+
+typedef uintptr Char;
+typedef struct Troffchar Troffchar;
+typedef struct Htmlchar Htmlchar;
+typedef struct Font Font;
+typedef struct HTMLfont HTMLfont;
+
+/*
+ * a Char is >= 32 bits. low 16 bits are the rune. higher are attributes.
+ * must be able to hold a pointer.
+ */
+enum
+{
+ Italic = 16,
+ Bold,
+ CW,
+ Indent1,
+ Indent2,
+ Indent3,
+ Heading = 25,
+ Anchor = 26, /* must be last */
+};
+
+enum /* magic emissions */
+{
+ Estring = 0,
+ Epp = 1<<16,
+};
+
+int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW };
+
+int nest[10];
+int nnest;
+
+struct Troffchar
+{
+ char *name;
+ char *value;
+};
+
+struct Htmlchar
+{
+ char *utf;
+ char *name;
+ int value;
+};
+
+#include "chars.h"
+
+struct Font{
+ char *name;
+ HTMLfont *htmlfont;
+};
+
+struct HTMLfont{
+ char *name;
+ char *htmlname;
+ int bit;
+};
+
+/* R must be first; it's the default representation for fonts we don't recognize */
+HTMLfont htmlfonts[] =
+{
+ "R", nil, 0,
+ "LucidaSans", nil, 0,
+ "I", "i", Italic,
+ "LucidaSansI", "i", Italic,
+ "CW", "tt", CW,
+ "LucidaCW", "tt", CW,
+ nil, nil,
+};
+
+#define TABLE "<table border=0 cellpadding=0 cellspacing=0>"
+
+char*
+onattr[8*sizeof(int)] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ "<i>", /* italic */
+ "<b>", /* bold */
+ "<tt><font size=+1>", /* cw */
+ "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent1 */
+ "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent2 */
+ "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent3 */
+ 0,
+ 0,
+ 0,
+ "<p><font size=+1><b>", /* heading 25 */
+ "<unused>", /* anchor 26 */
+};
+
+char*
+offattr[8*sizeof(int)] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ "</i>", /* italic */
+ "</b>", /* bold */
+ "</font></tt>", /* cw */
+ "<-/table>", /* indent1 */
+ "<-/table>", /* indent2 */
+ "<-/table>", /* indent3 */
+ 0,
+ 0,
+ 0,
+ "</b></font>", /* heading 25 */
+ "</a>", /* anchor 26 */
+};
+
+Font *font[Nfont];
+
+Biobuf bout;
+int debug = 0;
+
+/* troff state */
+int page = 1;
+int ft = 1;
+int vp = 0;
+int hp = 0;
+int ps = 1;
+int res = 720;
+
+int didP = 0;
+int atnewline = 1;
+int prevlineH = 0;
+Char attr = 0; /* or'ed into each Char */
+
+Char *chars;
+int nchars;
+int nalloc;
+char** anchors; /* allocated in order */
+int nanchors;
+
+char *filename;
+int cno;
+char buf[8192];
+char *title = "Plan 9 man page";
+
+void process(Biobuf*, char*);
+void mountfont(int, char*);
+void switchfont(int);
+void header(char*);
+void flush(void);
+void trailer(void);
+
+void*
+emalloc(ulong n)
+{
+ void *p;
+
+ p = malloc(n);
+ if(p == nil)
+ sysfatal("malloc failed: %r");
+ return p;
+}
+
+void*
+erealloc(void *p, ulong n)
+{
+
+ p = realloc(p, n);
+ if(p == nil)
+ sysfatal("realloc failed: %r");
+ return p;
+}
+
+char*
+estrdup(char *s)
+{
+ char *t;
+
+ t = strdup(s);
+ if(t == nil)
+ sysfatal("strdup failed: %r");
+ return t;
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n");
+ exits("usage");
+}
+
+int
+hccmp(const void *va, const void *vb)
+{
+ Htmlchar *a, *b;
+
+ a = (Htmlchar*)va;
+ b = (Htmlchar*)vb;
+ return a->value - b->value;
+}
+
+void
+main(int argc, char *argv[])
+{
+ int i;
+ Biobuf in, *inp;
+ Rune r;
+
+ for(i=0; i<nelem(htmlchars); i++){
+ chartorune(&r, htmlchars[i].utf);
+ htmlchars[i].value = r;
+ }
+ qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp);
+
+ ARGBEGIN{
+ case 't':
+ title = ARGF();
+ if(title == nil)
+ usage();
+ break;
+ case 'd':
+ debug++;
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ Binit(&bout, 1, OWRITE);
+ if(argc == 0){
+ header(title);
+ Binit(&in, 0, OREAD);
+ process(&in, "<stdin>");
+ }else{
+ header(title);
+ for(i=0; i<argc; i++){
+ inp = Bopen(argv[i], OREAD);
+ if(inp == nil)
+ sysfatal("can't open %s: %r", argv[i]);
+ process(inp, argv[i]);
+ Bterm(inp);
+ }
+ }
+ flush();
+ trailer();
+ exits(nil);
+}
+
+void
+emitchar(Char c)
+{
+ if(nalloc == nchars){
+ nalloc += 10000;
+ chars = realloc(chars, nalloc*sizeof(chars[0]));
+ if(chars == nil)
+ sysfatal("malloc failed: %r");
+ }
+ chars[nchars++] = c;
+}
+
+void
+emit(Rune r)
+{
+ emitchar(r | attr);
+ /*
+ * Close man page references early, so that
+ * .IR proof (1),
+ * doesn't make the comma part of the link.
+ */
+ if(r == ')')
+ attr &= ~(1<<Anchor);
+}
+
+void
+emitstr(char *s)
+{
+ emitchar(Estring);
+ emitchar((Char)s);
+}
+
+int indentlevel;
+int linelen;
+
+void
+iputrune(Biobuf *b, Rune r)
+{
+ int i;
+
+ if(linelen++ > 60 && r == ' ')
+ r = '\n';
+ Bputrune(b, r);
+ if(r == '\n'){
+ for(i=0; i<indentlevel; i++)
+ Bprint(b, " ");
+ linelen = 0;
+ }
+}
+
+void
+iputs(Biobuf *b, char *s)
+{
+ if(s[0]=='<' && s[1]=='+'){
+ iputrune(b, '\n');
+ Bprint(b, "<%s", s+2);
+ indentlevel++;
+ iputrune(b, '\n');
+ }else if(s[0]=='<' && s[1]=='-'){
+ indentlevel--;
+ iputrune(b, '\n');
+ Bprint(b, "<%s", s+2);
+ iputrune(b, '\n');
+ }else
+ Bprint(b, "%s", s);
+}
+
+void
+setattr(Char a)
+{
+ Char on, off;
+ int i, j;
+
+ on = a & ~attr;
+ off = attr & ~a;
+
+ /* walk up the nest stack until we reach something we need to turn off. */
+ for(i=0; i<nnest; i++)
+ if(off&(1<<nest[i]))
+ break;
+
+ /* turn off everything above that */
+ for(j=nnest-1; j>=i; j--)
+ iputs(&bout, offattr[nest[j]]);
+
+ /* turn on everything we just turned off but didn't want to */
+ for(j=i; j<nnest; j++)
+ if(a&(1<<nest[j]))
+ iputs(&bout, onattr[nest[j]]);
+ else
+ nest[j] = 0;
+
+ /* shift the zeros (turned off things) up */
+ for(i=j=0; i<nnest; i++)
+ if(nest[i] != 0)
+ nest[j++] = nest[i];
+ nnest = j;
+
+ /* now turn on the new attributes */
+ for(i=0; i<nelem(attrorder); i++){
+ j = attrorder[i];
+ if(on&(1<<j)){
+ if(j == Anchor)
+ onattr[j] = anchors[nanchors++];
+ iputs(&bout, onattr[j]);
+ if(nnest >= nelem(nest))
+ sysfatal("nesting too deep");
+ nest[nnest++] = j;
+ }
+ }
+ attr = a;
+}
+
+void
+flush(void)
+{
+ int i;
+ Char c, a;
+
+ nanchors = 0;
+ for(i=0; i<nchars; i++){
+ c = chars[i];
+ if(c == Estring){
+ /* next word is string to print */
+ iputs(&bout, (char*)chars[++i]);
+ continue;
+ }
+ if(c == Epp){
+ iputrune(&bout, '\n');
+ iputs(&bout, TABLE "<tr height=5><td></table>");
+ iputrune(&bout, '\n');
+ continue;
+ }
+ a = c & ~0xFFFF;
+ c &= 0xFFFF;
+ /*
+ * If we're going to something off after a space,
+ * let's just turn it off before.
+ */
+ if(c == ' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32)
+ a ^= a & ~chars[i+1];
+ setattr(a);
+ iputrune(&bout, c & 0xFFFF);
+ }
+}
+
+void
+header(char *s)
+{
+ Bprint(&bout, "<head>\n");
+ Bprint(&bout, "<title>%s</title>\n", s);
+ Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n");
+ Bprint(&bout, "</head>\n");
+ Bprint(&bout, "<body bgcolor=#ffffff>\n");
+}
+
+void
+trailer(void)
+{
+
+#ifdef LUCENT
+ Tm *t;
+ t = localtime(time(nil));
+ Bprint(&bout, TABLE "<tr height=20><td></table>\n");
+ Bprint(&bout, "<font size=-1><a href=\"http://www.lucent.com/copyright.html\">\n");
+ Bprint(&bout, "Copyright</A> &#169; %d Alcatel-Lucent. All rights reserved.</font>\n", t->year+1900);
+#endif
+ Bprint(&bout, "</body></html>\n");
+}
+
+int
+getc(Biobuf *b)
+{
+ cno++;
+ return Bgetrune(b);
+}
+
+void
+ungetc(Biobuf *b)
+{
+ cno--;
+ Bungetrune(b);
+}
+
+char*
+getline(Biobuf *b)
+{
+ int i, c;
+
+ for(i=0; i<sizeof buf; i++){
+ c = getc(b);
+ if(c == Beof)
+ return nil;
+ buf[i] = c;
+ if(c == '\n'){
+ buf[i] = '\0';
+ break;
+ }
+ }
+ return buf;
+}
+
+int
+getnum(Biobuf *b)
+{
+ int i, c;
+
+ i = 0;
+ for(;;){
+ c = getc(b);
+ if(c<'0' || '9'<c){
+ ungetc(b);
+ break;
+ }
+ i = i*10 + (c-'0');
+ }
+ return i;
+}
+
+char*
+getstr(Biobuf *b)
+{
+ int i, c;
+
+ for(i=0; i<sizeof buf; i++){
+ /* must get bytes not runes */
+ cno++;
+ c = Bgetc(b);
+ if(c == Beof)
+ return nil;
+ buf[i] = c;
+ if(c == '\n' || c==' ' || c=='\t'){
+ ungetc(b);
+ buf[i] = '\0';
+ break;
+ }
+ }
+ return buf;
+}
+
+int
+setnum(Biobuf *b, char *name, int min, int max)
+{
+ int i;
+
+ i = getnum(b);
+ if(debug > 2)
+ fprint(2, "set %s = %d\n", name, i);
+ if(min<=i && i<max)
+ return i;
+ sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno);
+ return i;
+}
+
+void
+xcmd(Biobuf *b)
+{
+ char *p, *fld[16], buf[1024];
+
+ int i, nfld;
+
+ p = getline(b);
+ if(p == nil)
+ sysfatal("xcmd error: %r");
+ if(debug)
+ fprint(2, "x command '%s'\n", p);
+ nfld = tokenize(p, fld, nelem(fld));
+ if(nfld == 0)
+ return;
+ switch(fld[0][0]){
+ case 'f':
+ /* mount font */
+ if(nfld != 3)
+ break;
+ i = atoi(fld[1]);
+ if(i<0 || Nfont<=i)
+ sysfatal("font %d out of range at %s:#%d", i, filename, cno);
+ mountfont(i, fld[2]);
+ return;
+ case 'i':
+ /* init */
+ return;
+ case 'r':
+ if(nfld<2 || atoi(fld[1])!=res)
+ sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>");
+ return;
+ case 's':
+ /* stop */
+ return;
+ case 't':
+ /* trailer */
+ return;
+ case 'T':
+ if(nfld!=2 || strcmp(fld[1], "utf")!=0)
+ sysfatal("output for unknown typesetter type %s", fld[1]);
+ return;
+ case 'X':
+ if(nfld<3 || strcmp(fld[1], "html")!=0)
+ break;
+ /* is it a man reference of the form cp(1)? */
+ /* X manref start/end cp (1) */
+ if(nfld==6 && strcmp(fld[2], "manref")==0){
+ /* was the right macro; is it the right form? */
+ if(strlen(fld[5])>=3 &&
+ fld[5][0]=='(' && fld[5][2]==')' &&
+ '0'<=fld[5][1] && fld[5][1]<='9'){
+ if(strcmp(fld[3], "start") == 0){
+ /* set anchor attribute and remember string */
+ attr |= (1<<Anchor);
+ snprint(buf, sizeof buf,
+ "<a href=\"/magic/man2html/%c/%s\">",
+ fld[5][1], fld[4]);
+ nanchors++;
+ anchors = erealloc(anchors, nanchors*sizeof(char*));
+ anchors[nanchors-1] = estrdup(buf);
+ }else if(strcmp(fld[3], "end") == 0)
+ attr &= ~(1<<Anchor);
+ }
+ }else if(strcmp(fld[2], "manPP") == 0){
+ didP = 1;
+ emitchar(Epp);
+ }else if(nfld<4 || strcmp(fld[2], "manref")!=0){
+ if(nfld>2 && strcmp(fld[2], "<P>")==0){ /* avoid triggering extra <br> */
+ didP = 1;
+ /* clear all font attributes before paragraph */
+ emitchar(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW)))));
+ emitstr("<P>");
+ /* next emittec char will turn font attributes back on */
+ }else if(nfld>2 && strcmp(fld[2], "<H4>")==0)
+ attr |= (1<<Heading);
+ else if(nfld>2 && strcmp(fld[2], "</H4>")==0)
+ attr &= ~(1<<Heading);
+ else if(debug)
+ fprint(2, "unknown in-line html %s... at %s:%#d\n",
+ fld[2], filename, cno);
+ }
+ return;
+ }
+ if(debug)
+ fprint(2, "unknown or badly formatted x command %s\n", fld[0]);
+}
+
+int
+lookup(int c, Htmlchar tab[], int ntab)
+{
+ int low, high, mid;
+
+ low = 0;
+ high = ntab - 1;
+ while(low <= high){
+ mid = (low+high)/2;
+ if(c < tab[mid].value)
+ high = mid - 1;
+ else if(c > tab[mid].value)
+ low = mid + 1;
+ else
+ return mid;
+ }
+ return -1; /* no match */
+}
+
+void
+emithtmlchar(int r)
+{
+ static char buf[10];
+ int i;
+
+ i = lookup(r, htmlchars, nelem(htmlchars));
+ if(i >= 0)
+ emitstr(htmlchars[i].name);
+ else
+ emit(r);
+}
+
+char*
+troffchar(char *s)
+{
+ int i;
+
+ for(i=0; troffchars[i].name!=nil; i++)
+ if(strcmp(s, troffchars[i].name) == 0)
+ return troffchars[i].value;
+ return "??";
+}
+
+void
+indent(void)
+{
+ int nind;
+
+ didP = 0;
+ if(atnewline){
+ if(hp != prevlineH){
+ prevlineH = hp;
+ /* these most peculiar numbers appear in the troff -man output */
+ nind = ((prevlineH-1*res)+323)/324;
+ attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3));
+ if(nind >= 1)
+ attr |= (1<<Indent1);
+ if(nind >= 2)
+ attr |= (1<<Indent2);
+ if(nind >= 3)
+ attr |= (1<<Indent3);
+ }
+ atnewline = 0;
+ }
+}
+
+void
+process(Biobuf *b, char *name)
+{
+ int c, r, v, i;
+ char *p;
+
+ cno = 0;
+ prevlineH = res;
+ filename = name;
+ for(;;){
+ c = getc(b);
+ switch(c){
+ case Beof:
+ /* go to ground state */
+ attr = 0;
+ emit('\n');
+ return;
+ case '\n':
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ v = c-'0';
+ c = getc(b);
+ if(c<'0' || '9'<c)
+ sysfatal("illegal character motion at %s:#%d", filename, cno);
+ v = v*10 + (c-'0');
+ hp += v;
+ /* fall through to character case */
+ case 'c':
+ indent();
+ r = getc(b);
+ emithtmlchar(r);
+ break;
+ case 'D':
+ /* draw line; ignore */
+ do
+ c = getc(b);
+ while(c!='\n' && c!= Beof);
+ break;
+ case 'f':
+ v = setnum(b, "font", 0, Nfont);
+ switchfont(v);
+ break;
+ case 'h':
+ v = setnum(b, "hpos", -20000, 20000);
+ /* generate spaces if motion is large and within a line */
+ if(!atnewline && v>2*72)
+ for(i=0; i<v; i+=72)
+ emitstr("&nbsp;");
+ hp += v;
+ break;
+ case 'n':
+ setnum(b, "n1", -10000, 10000);
+ //Bprint(&bout, " N1=%d", v);
+ getc(b); /* space separates */
+ setnum(b, "n2", -10000, 10000);
+ atnewline = 1;
+ if(!didP && hp < (Wid-1)*res) /* if line is less than 19" long, probably need a line break */
+ emitstr("<br>");
+ emit('\n');
+ break;
+ case 'p':
+ page = setnum(b, "ps", -10000, 10000);
+ break;
+ case 's':
+ ps = setnum(b, "ps", 1, 1000);
+ break;
+ case 'v':
+ vp += setnum(b, "vpos", -10000, 10000);
+ /* BUG: ignore motion */
+ break;
+ case 'x':
+ xcmd(b);
+ break;
+ case 'w':
+ emit(' ');
+ break;
+ case 'C':
+ indent();
+ p = getstr(b);
+ emitstr(troffchar(p));
+ break;
+ case 'H':
+ hp = setnum(b, "hpos", 0, 20000);
+ //Bprint(&bout, " H=%d ", hp);
+ break;
+ case 'V':
+ vp = setnum(b, "vpos", 0, 10000);
+ break;
+ default:
+ fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno);
+ return;
+ }
+ }
+}
+
+HTMLfont*
+htmlfont(char *name)
+{
+ int i;
+
+ for(i=0; htmlfonts[i].name!=nil; i++)
+ if(strcmp(name, htmlfonts[i].name) == 0)
+ return &htmlfonts[i];
+ return &htmlfonts[0];
+}
+
+void
+mountfont(int pos, char *name)
+{
+ if(debug)
+ fprint(2, "mount font %s on %d\n", name, pos);
+ if(font[pos] != nil){
+ free(font[pos]->name);
+ free(font[pos]);
+ }
+ font[pos] = emalloc(sizeof(Font));
+ font[pos]->name = estrdup(name);
+ font[pos]->htmlfont = htmlfont(name);
+}
+
+void
+switchfont(int pos)
+{
+ HTMLfont *hf;
+
+ if(debug)
+ fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name);
+ if(pos == ft)
+ return;
+ hf = font[ft]->htmlfont;
+ if(hf->bit != 0)
+ attr &= ~(1<<hf->bit);
+ ft = pos;
+ hf = font[ft]->htmlfont;
+ if(hf->bit != 0)
+ attr |= (1<<hf->bit);
+}