summaryrefslogtreecommitdiff
path: root/sys/src/cmd/mothra/html.h
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@centraldogma>2011-09-04 19:16:30 +0200
committercinap_lenrek <cinap_lenrek@centraldogma>2011-09-04 19:16:30 +0200
commit6842f8712508262d0ea27692f13caa686419601e (patch)
tree8c074ca18dc70c02392fc951f6e8e21db00d7cdb /sys/src/cmd/mothra/html.h
parentf6e73a6a22db00925f2f447815287bf2a086054b (diff)
add mothra
Diffstat (limited to 'sys/src/cmd/mothra/html.h')
-rw-r--r--sys/src/cmd/mothra/html.h201
1 files changed, 201 insertions, 0 deletions
diff --git a/sys/src/cmd/mothra/html.h b/sys/src/cmd/mothra/html.h
new file mode 100644
index 000000000..f5462ceea
--- /dev/null
+++ b/sys/src/cmd/mothra/html.h
@@ -0,0 +1,201 @@
+/*
+ * Parameters
+ */
+#define NSTACK 100 /* html grammar is not recursive, so 30 or so should do */
+#define NHBUF 8192 /* Input buffer size */
+#define NPEEKC 3 /* Maximum lookahead */
+#define NTOKEN 1024 /* Maximum token length */
+#define NATTR 512 /* Maximum number of attributes of a tag */
+typedef struct Pair Pair;
+typedef struct Tag Tag;
+typedef struct Stack Stack;
+typedef struct Hglob Hglob;
+typedef struct Form Form;
+typedef struct Entity Entity;
+struct Pair{
+ char *name;
+ char *value;
+};
+struct Entity{
+ char *name;
+ Rune value;
+};
+struct Tag{
+ char *name;
+ int action;
+};
+struct Stack{
+ int tag; /* html tag being processed */
+ int pre; /* in preformatted text? */
+ int font; /* typeface */
+ int size; /* point size of text */
+ int margin; /* left margin position */
+ int indent; /* extra indent at paragraph start */
+ int number; /* paragraph number */
+ int ismap; /* flag of <img> */
+ int width; /* size of image */
+ int height;
+ int table; /* depth of table nesting */
+ char image[NNAME]; /* arg of <img> */
+ char link[NNAME]; /* arg of <a href=...> */
+ char name[NNAME]; /* arg of <a name=...> */
+};
+
+/*
+ * Globals -- these are packed up into a struct that gets passed around
+ * so that multiple parsers can run concurrently
+ */
+struct Hglob{
+ char *tp; /* pointer in text buffer */
+ char *name; /* input file name */
+ int hfd; /* input file descriptor */
+ char hbuf[NHBUF]; /* input buffer */
+ char *hbufp; /* next character in buffer */
+ char *ehbuf; /* end of good characters in buffer */
+ int heof; /* end of file flag */
+ int peekc[NPEEKC]; /* characters to re-read */
+ int npeekc; /* # of characters to re-read */
+ char token[NTOKEN]; /* if token type is TEXT */
+ Pair attr[NATTR]; /* tag attribute/value pairs */
+ int nsp; /* # of white-space characters before TEXT token */
+ int spacc; /* place to accumulate more spaces */
+ /* if negative, won't accumulate! */
+ int tag; /* if token type is TAG or END */
+ Stack stack[NSTACK]; /* parse stack */
+ Stack *state; /* parse stack pointer */
+ int lineno; /* input line number */
+ int linebrk; /* flag set if we require a line-break in output */
+ int para; /* flag set if we need an indent at the break */
+ char *text; /* text buffer */
+ char *etext; /* end of text buffer */
+ Form *form; /* data for form under construction */
+ Www *dst; /* where the text goes */
+ char charset[NNAME];
+};
+
+/*
+ * Token types
+ */
+enum{
+ TAG=1,
+ ENDTAG,
+ TEXT,
+};
+
+/*
+ * Magic characters corresponding to
+ * literal < followed by / ! or alpha,
+ * literal > and
+ * end of file
+ */
+#define STAG 65536
+#define ETAG 65537
+#define EOF -1
+
+/*
+ * fonts
+ */
+enum{
+ ROMAN,
+ ITALIC,
+ BOLD,
+ CWIDTH,
+};
+
+/*
+ * font sizes
+ */
+enum{
+ SMALL,
+ NORMAL,
+ LARGE,
+ ENORMOUS,
+};
+
+/*
+ * Token names for the html parser.
+ * Tag_end corresponds to </end> tags.
+ * Tag_text tags text not in a tag.
+ * Those two must follow the others.
+ */
+enum{
+ Tag_comment,
+ Tag_a,
+ Tag_address,
+ Tag_b,
+ Tag_base,
+ Tag_blockquot,
+ Tag_body,
+ Tag_br,
+ Tag_center,
+ Tag_cite,
+ Tag_code,
+ Tag_dd,
+ Tag_dfn,
+ Tag_dir,
+ Tag_dl,
+ Tag_dt,
+ Tag_em,
+ Tag_font,
+ Tag_form,
+ Tag_h1,
+ Tag_h2,
+ Tag_h3,
+ Tag_h4,
+ Tag_h5,
+ Tag_h6,
+ Tag_head,
+ Tag_hr,
+ Tag_html,
+ Tag_i,
+ Tag_img,
+ Tag_input,
+ Tag_isindex,
+ Tag_kbd,
+ Tag_key,
+ Tag_li,
+ Tag_link,
+ Tag_listing,
+ Tag_menu,
+ Tag_meta,
+ Tag_nextid,
+ Tag_ol,
+ Tag_option,
+ Tag_p,
+ Tag_plaintext,
+ Tag_pre,
+ Tag_samp,
+ Tag_select,
+ Tag_strong,
+ Tag_textarea,
+ Tag_title,
+ Tag_tt,
+ Tag_u,
+ Tag_ul,
+ Tag_var,
+ Tag_xmp,
+ Tag_frame, /* rm 5.8.97 */
+ Tag_table, /* rm 3.8.00 */
+ Tag_td,
+ Tag_tr,
+ Tag_script,
+ Tag_style,
+ Tag_end, /* also used to indicate unrecognized start tag */
+
+ Tag_text,
+};
+enum{
+ NTAG=Tag_end,
+ END=1, /* tag must have a matching end tag */
+ NOEND, /* tag must not have a matching end tag */
+ OPTEND, /* tag may have a matching end tag */
+ ERR, /* tag must not occur */
+};
+Tag tag[];
+Entity pl_entity[];
+int pl_entities;
+void rdform(Hglob *);
+void endform(Hglob *);
+char *pl_getattr(Pair *, char *);
+int pl_hasattr(Pair *, char *);
+void pl_htmloutput(Hglob *, int, char *, Field *);