summaryrefslogtreecommitdiff
path: root/sys/src/cmd/tcs/html.c
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/tcs/html.c
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/tcs/html.c')
-rwxr-xr-xsys/src/cmd/tcs/html.c471
1 files changed, 471 insertions, 0 deletions
diff --git a/sys/src/cmd/tcs/html.c b/sys/src/cmd/tcs/html.c
new file mode 100755
index 000000000..93bd9e5ad
--- /dev/null
+++ b/sys/src/cmd/tcs/html.c
@@ -0,0 +1,471 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "hdr.h"
+#include "conv.h"
+
+typedef struct Hchar Hchar;
+struct Hchar
+{
+ char *s;
+ Rune r;
+};
+
+/* &lt;, &gt;, &quot;, &amp; intentionally omitted */
+
+/*
+ * Names beginning with _ are names we recognize
+ * (without the underscore) but will not generate,
+ * because they are nonstandard.
+ */
+static Hchar byname[] =
+{
+ {"AElig", 198},
+ {"Aacute", 193},
+ {"Acirc", 194},
+ {"Agrave", 192},
+ {"Alpha", 913},
+ {"Aring", 197},
+ {"Atilde", 195},
+ {"Auml", 196},
+ {"Beta", 914},
+ {"Ccedil", 199},
+ {"Chi", 935},
+ {"Dagger", 8225},
+ {"Delta", 916},
+ {"ETH", 208},
+ {"Eacute", 201},
+ {"Ecirc", 202},
+ {"Egrave", 200},
+ {"Epsilon", 917},
+ {"Eta", 919},
+ {"Euml", 203},
+ {"Gamma", 915},
+ {"Iacute", 205},
+ {"Icirc", 206},
+ {"Igrave", 204},
+ {"Iota", 921},
+ {"Iuml", 207},
+ {"Kappa", 922},
+ {"Lambda", 923},
+ {"Mu", 924},
+ {"Ntilde", 209},
+ {"Nu", 925},
+ {"OElig", 338},
+ {"Oacute", 211},
+ {"Ocirc", 212},
+ {"Ograve", 210},
+ {"Omega", 937},
+ {"Omicron", 927},
+ {"Oslash", 216},
+ {"Otilde", 213},
+ {"Ouml", 214},
+ {"Phi", 934},
+ {"Pi", 928},
+ {"Prime", 8243},
+ {"Psi", 936},
+ {"Rho", 929},
+ {"Scaron", 352},
+ {"Sigma", 931},
+ {"THORN", 222},
+ {"Tau", 932},
+ {"Theta", 920},
+ {"Uacute", 218},
+ {"Ucirc", 219},
+ {"Ugrave", 217},
+ {"Upsilon", 933},
+ {"Uuml", 220},
+ {"Xi", 926},
+ {"Yacute", 221},
+ {"Yuml", 376},
+ {"Zeta", 918},
+ {"aacute", 225},
+ {"acirc", 226},
+ {"acute", 180},
+ {"aelig", 230},
+ {"agrave", 224},
+ {"alefsym", 8501},
+ {"alpha", 945},
+ {"amp", 38},
+ {"and", 8743},
+ {"ang", 8736},
+ {"aring", 229},
+ {"asymp", 8776},
+ {"atilde", 227},
+ {"auml", 228},
+ {"bdquo", 8222},
+ {"beta", 946},
+ {"brvbar", 166},
+ {"bull", 8226},
+ {"cap", 8745},
+ {"ccedil", 231},
+ {"cdots", 8943},
+ {"cedil", 184},
+ {"cent", 162},
+ {"chi", 967},
+ {"circ", 710},
+ {"clubs", 9827},
+ {"cong", 8773},
+ {"copy", 169},
+ {"crarr", 8629},
+ {"cup", 8746},
+ {"curren", 164},
+ {"dArr", 8659},
+ {"dagger", 8224},
+ {"darr", 8595},
+ {"ddots", 8945},
+ {"deg", 176},
+ {"delta", 948},
+ {"diams", 9830},
+ {"divide", 247},
+ {"eacute", 233},
+ {"ecirc", 234},
+ {"egrave", 232},
+ {"_emdash", 8212}, /* non-standard but commonly used */
+ {"empty", 8709},
+ {"emsp", 8195},
+ {"_endash", 8211}, /* non-standard but commonly used */
+ {"ensp", 8194},
+ {"epsilon", 949},
+ {"equiv", 8801},
+ {"eta", 951},
+ {"eth", 240},
+ {"euml", 235},
+ {"euro", 8364},
+ {"exist", 8707},
+ {"fnof", 402},
+ {"forall", 8704},
+ {"frac12", 189},
+ {"frac14", 188},
+ {"frac34", 190},
+ {"frasl", 8260},
+ {"gamma", 947},
+ {"ge", 8805},
+ {"gt", 62},
+ {"hArr", 8660},
+ {"harr", 8596},
+ {"hearts", 9829},
+ {"hellip", 8230},
+ {"iacute", 237},
+ {"icirc", 238},
+ {"iexcl", 161},
+ {"igrave", 236},
+ {"image", 8465},
+ {"infin", 8734},
+ {"int", 8747},
+ {"iota", 953},
+ {"iquest", 191},
+ {"isin", 8712},
+ {"iuml", 239},
+ {"kappa", 954},
+ {"lArr", 8656},
+ {"lambda", 955},
+ {"lang", 9001},
+ {"laquo", 171},
+ {"larr", 8592},
+ {"lceil", 8968},
+ {"_ldots", 8230},
+ {"ldquo", 8220},
+ {"le", 8804},
+ {"lfloor", 8970},
+ {"lowast", 8727},
+ {"loz", 9674},
+ {"lrm", 8206},
+ {"lsaquo", 8249},
+ {"lsquo", 8216},
+ {"lt", 60},
+ {"macr", 175},
+ {"mdash", 8212},
+ {"micro", 181},
+ {"middot", 183},
+ {"minus", 8722},
+ {"mu", 956},
+ {"nabla", 8711},
+ {"nbsp", 160},
+ {"ndash", 8211},
+ {"ne", 8800},
+ {"ni", 8715},
+ {"not", 172},
+ {"notin", 8713},
+ {"nsub", 8836},
+ {"ntilde", 241},
+ {"nu", 957},
+ {"oacute", 243},
+ {"ocirc", 244},
+ {"oelig", 339},
+ {"ograve", 242},
+ {"oline", 8254},
+ {"omega", 969},
+ {"omicron", 959},
+ {"oplus", 8853},
+ {"or", 8744},
+ {"ordf", 170},
+ {"ordm", 186},
+ {"oslash", 248},
+ {"otilde", 245},
+ {"otimes", 8855},
+ {"ouml", 246},
+ {"para", 182},
+ {"part", 8706},
+ {"permil", 8240},
+ {"perp", 8869},
+ {"phi", 966},
+ {"pi", 960},
+ {"piv", 982},
+ {"plusmn", 177},
+ {"pound", 163},
+ {"prime", 8242},
+ {"prod", 8719},
+ {"prop", 8733},
+ {"psi", 968},
+ {"quad", 8193},
+ {"quot", 34},
+ {"rArr", 8658},
+ {"radic", 8730},
+ {"rang", 9002},
+ {"raquo", 187},
+ {"rarr", 8594},
+ {"rceil", 8969},
+ {"rdquo", 8221},
+ {"real", 8476},
+ {"reg", 174},
+ {"rfloor", 8971},
+ {"rho", 961},
+ {"rlm", 8207},
+ {"rsaquo", 8250},
+ {"rsquo", 8217},
+ {"sbquo", 8218},
+ {"scaron", 353},
+ {"sdot", 8901},
+ {"sect", 167},
+ {"shy", 173},
+ {"sigma", 963},
+ {"sigmaf", 962},
+ {"sim", 8764},
+ {"_sp", 8194},
+ {"spades", 9824},
+ {"sub", 8834},
+ {"sube", 8838},
+ {"sum", 8721},
+ {"sup", 8835},
+ {"sup1", 185},
+ {"sup2", 178},
+ {"sup3", 179},
+ {"supe", 8839},
+ {"szlig", 223},
+ {"tau", 964},
+ {"there4", 8756},
+ {"theta", 952},
+ {"thetasym", 977},
+ {"thinsp", 8201},
+ {"thorn", 254},
+ {"tilde", 732},
+ {"times", 215},
+ {"trade", 8482},
+ {"uArr", 8657},
+ {"uacute", 250},
+ {"uarr", 8593},
+ {"ucirc", 251},
+ {"ugrave", 249},
+ {"uml", 168},
+ {"upsih", 978},
+ {"upsilon", 965},
+ {"uuml", 252},
+ {"_varepsilon", 8712},
+ {"varphi", 981},
+ {"_varpi", 982},
+ {"varrho", 1009},
+ {"vdots", 8942},
+ {"_vsigma", 962},
+ {"_vtheta", 977},
+ {"weierp", 8472},
+ {"xi", 958},
+ {"yacute", 253},
+ {"yen", 165},
+ {"yuml", 255},
+ {"zeta", 950},
+ {"zwj", 8205},
+ {"zwnj", 8204}
+};
+
+static Hchar byrune[nelem(byname)];
+
+static int
+hnamecmp(const void *va, const void *vb)
+{
+ Hchar *a, *b;
+
+ a = (Hchar*)va;
+ b = (Hchar*)vb;
+ return strcmp(a->s, b->s);
+}
+
+static int
+hrunecmp(const void *va, const void *vb)
+{
+ Hchar *a, *b;
+
+ a = (Hchar*)va;
+ b = (Hchar*)vb;
+ return a->r - b->r;
+}
+
+static void
+html_init(void)
+{
+ static int init;
+ int i;
+
+ if(init)
+ return;
+ init = 1;
+ memmove(byrune, byname, sizeof byrune);
+
+ /* Eliminate names we aren't allowed to generate. */
+ for(i=0; i<nelem(byrune); i++){
+ if(byrune[i].s[0] == '_'){
+ byrune[i].r = Runeerror;
+ byname[i].s++;
+ }
+ }
+
+ qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);
+ qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);
+}
+
+static Rune
+findbyname(char *s)
+{
+ Hchar *h;
+ int n, m, x;
+
+ h = byname;
+ n = nelem(byname);
+ while(n > 0){
+ m = n/2;
+ x = strcmp(h[m].s, s);
+ if(x == 0)
+ return h[m].r;
+ if(x < 0){
+ h += m+1;
+ n -= m+1;
+ }else
+ n = m;
+ }
+ return Runeerror;
+}
+
+static char*
+findbyrune(Rune r)
+{
+ Hchar *h;
+ int n, m;
+
+ if(r == Runeerror)
+ return nil;
+ h = byrune;
+ n = nelem(byrune);
+ while(n > 0){
+ m = n/2;
+ if(h[m].r == r)
+ return h[m].s;
+ if(h[m].r < r){
+ h += m+1;
+ n -= m+1;
+ }else
+ n = m;
+ }
+ return nil;
+}
+
+void
+html_in(int fd, long *x, struct convert *out)
+{
+ char buf[100], *p;
+ Biobuf b;
+ Rune rbuf[N];
+ Rune *r, *er;
+ int c, i;
+
+ USED(x);
+
+ html_init();
+ r = rbuf;
+ er = rbuf+N;
+ Binit(&b, fd, OREAD);
+ while((c = Bgetrune(&b)) != Beof){
+ if(r >= er){
+ OUT(out, rbuf, r-rbuf);
+ r = rbuf;
+ }
+ if(c == '&'){
+ buf[0] = c;
+ for(i=1; i<nelem(buf)-1;){
+ c = Bgetc(&b);
+ if(c == Beof)
+ break;
+ buf[i++] = c;
+ if(strchr("; \t\r\n", c))
+ break;
+ }
+ buf[i] = 0;
+ if(buf[i-1] == ';'){
+ buf[i-1] = 0;
+ if((c = findbyname(buf+1)) != Runeerror){
+ *r++ = c;
+ continue;
+ }
+ buf[i-1] = ';';
+ if(buf[1] == '#'){
+ if(buf[2] == 'x')
+ c = strtol(buf+3, &p, 16);
+ else
+ c = strtol(buf+2, &p, 10);
+ if(*p != ';' || c >= NRUNE || c < 0)
+ goto bad;
+ *r++ = c;
+ continue;
+ }
+ }
+ bad:
+ for(p=buf; p<buf+i; ){
+ p += chartorune(r++, p);
+ if(r >= er){
+ OUT(out, rbuf, r-rbuf);
+ r = rbuf;
+ }
+ }
+ continue;
+ }
+ *r++ = c;
+ }
+ if(r > rbuf)
+ OUT(out, rbuf, r-rbuf);
+ OUT(out, rbuf, 0);
+}
+
+/*
+ * use biobuf because can use more than UTFmax bytes per rune
+ */
+void
+html_out(Rune *r, int n, long *x)
+{
+ char *s;
+ Biobuf b;
+ Rune *er;
+
+ USED(x);
+ html_init();
+ Binit(&b, 1, OWRITE);
+ er = r+n;
+ for(; r<er; r++){
+ if(*r < Runeself)
+ Bputrune(&b, *r);
+ else if((s = findbyrune(*r)) != nil)
+ Bprint(&b, "&%s;", s);
+ else
+ Bprint(&b, "&#%d;", *r);
+ }
+ Bflush(&b);
+}
+