add mothra

author: cinap_lenrek <cinap_lenrek@centraldogma> 2011-09-04 19:16:30 +0200
committer: cinap_lenrek <cinap_lenrek@centraldogma> 2011-09-04 19:16:30 +0200
commit: 6842f8712508262d0ea27692f13caa686419601e (patch)
tree: 8c074ca18dc70c02392fc951f6e8e21db00d7cdb /sys/src/cmd/mothra/html.h
parent: f6e73a6a22db00925f2f447815287bf2a086054b (diff)
1 files changed, 201 insertions, 0 deletions
diff --git a/sys/src/cmd/mothra/html.h b/sys/src/cmd/mothra/html.h
new file mode 100644
index 000000000..f5462ceea
--- /dev/null
+++ b/sys/src/cmd/mothra/html.h
@@ -0,0 +1,201 @@
+/*
+ * Parameters
+ */
+#define	NSTACK	100	/* html grammar is not recursive, so 30 or so should do */
+#define	NHBUF	8192	/* Input buffer size */
+#define	NPEEKC	3	/* Maximum lookahead */
+#define	NTOKEN	1024	/* Maximum token length */
+#define	NATTR	512	/* Maximum number of attributes of a tag */
+typedef struct Pair Pair;
+typedef struct Tag Tag;
+typedef struct Stack Stack;
+typedef struct Hglob Hglob;
+typedef struct Form Form;
+typedef struct Entity Entity;
+struct Pair{
+	char *name;
+	char *value;
+};
+struct Entity{
+	char *name;
+	Rune value;
+};
+struct Tag{
+	char *name;
+	int action;
+};
+struct Stack{
+	int tag;		/* html tag being processed */
+	int pre;		/* in preformatted text? */
+	int font;		/* typeface */
+	int size;		/* point size of text */
+	int margin;		/* left margin position */
+	int indent;		/* extra indent at paragraph start */
+	int number;		/* paragraph number */
+	int ismap;		/* flag of <img> */
+	int width;		/* size of image */
+	int height;
+	int	table;		/* depth of table nesting */
+	char image[NNAME];	/* arg of <img> */
+	char link[NNAME];	/* arg of <a href=...> */
+	char name[NNAME];	/* arg of <a name=...> */
+};
+
+/*
+ * Globals -- these are packed up into a struct that gets passed around
+ * so that multiple parsers can run concurrently
+ */
+struct Hglob{
+	char *tp;		/* pointer in text buffer */
+	char *name;		/* input file name */
+	int hfd;		/* input file descriptor */
+	char hbuf[NHBUF];	/* input buffer */
+	char *hbufp;		/* next character in buffer */
+	char *ehbuf;		/* end of good characters in buffer */
+	int heof;		/* end of file flag */
+	int peekc[NPEEKC];	/* characters to re-read */
+	int npeekc;		/* # of characters to re-read */
+	char token[NTOKEN];	/* if token type is TEXT */
+	Pair attr[NATTR];	/* tag attribute/value pairs */
+	int nsp;		/* # of white-space characters before TEXT token */
+	int spacc;		/* place to accumulate more spaces */
+				/* if negative, won't accumulate! */
+	int tag;		/* if token type is TAG or END */
+	Stack stack[NSTACK];	/* parse stack */
+	Stack *state;		/* parse stack pointer */
+	int lineno;		/* input line number */
+	int linebrk;		/* flag set if we require a line-break in output */
+	int para;		/* flag set if we need an indent at the break */
+	char *text;		/* text buffer */
+	char *etext;		/* end of text buffer */
+	Form *form;		/* data for form under construction */
+	Www *dst;		/* where the text goes */
+	char charset[NNAME];
+};
+
+/*
+ * Token types
+ */
+enum{
+	TAG=1,
+	ENDTAG,
+	TEXT,
+};
+
+/*
+ * Magic characters corresponding to
+ *	literal < followed by / ! or alpha,
+ *	literal > and
+ *	end of file
+ */
+#define STAG	65536
+#define ETAG	65537
+#define EOF	-1
+
+/*
+ * fonts
+ */
+enum{
+	ROMAN,
+	ITALIC,
+	BOLD,
+	CWIDTH,
+};
+
+/*
+ * font sizes
+ */
+enum{
+	SMALL,
+	NORMAL,
+	LARGE,
+	ENORMOUS,
+};
+
+/*
+ * Token names for the html parser.
+ * Tag_end corresponds to </end> tags.
+ * Tag_text tags text not in a tag.
+ * Those two must follow the others.
+ */
+enum{
+	Tag_comment,
+	Tag_a,
+	Tag_address,
+	Tag_b,
+	Tag_base,
+	Tag_blockquot,
+	Tag_body,
+	Tag_br,
+	Tag_center,
+	Tag_cite,
+	Tag_code,
+	Tag_dd,
+	Tag_dfn,
+	Tag_dir,
+	Tag_dl,
+	Tag_dt,
+	Tag_em,
+	Tag_font,
+	Tag_form,
+	Tag_h1,
+	Tag_h2,
+	Tag_h3,
+	Tag_h4,
+	Tag_h5,
+	Tag_h6,
+	Tag_head,
+	Tag_hr,
+	Tag_html,
+	Tag_i,
+	Tag_img,
+	Tag_input,
+	Tag_isindex,
+	Tag_kbd,
+	Tag_key,
+	Tag_li,
+	Tag_link,
+	Tag_listing,
+	Tag_menu,
+	Tag_meta,
+	Tag_nextid,
+	Tag_ol,
+	Tag_option,
+	Tag_p,
+	Tag_plaintext,
+	Tag_pre,
+	Tag_samp,
+	Tag_select,
+	Tag_strong,
+	Tag_textarea,
+	Tag_title,
+	Tag_tt,
+	Tag_u,
+	Tag_ul,
+	Tag_var,
+	Tag_xmp,
+	Tag_frame,	/* rm 5.8.97 */
+	Tag_table,	/* rm 3.8.00 */
+	Tag_td,
+	Tag_tr,
+	Tag_script,
+	Tag_style,
+	Tag_end,	/* also used to indicate unrecognized start tag */
+
+	Tag_text,
+};
+enum{
+	NTAG=Tag_end,
+	END=1,	/* tag must have a matching end tag */
+	NOEND,	/* tag must not have a matching end tag */
+	OPTEND,	/* tag may have a matching end tag */
+	ERR,		/* tag must not occur */
+};
+Tag tag[];
+Entity pl_entity[];
+int pl_entities;
+void rdform(Hglob *);
+void endform(Hglob *);
+char *pl_getattr(Pair *, char *);
+int pl_hasattr(Pair *, char *);
+void pl_htmloutput(Hglob *, int, char *, Field *);
author	cinap_lenrek <cinap_lenrek@centraldogma>	2011-09-04 19:16:30 +0200
committer	cinap_lenrek <cinap_lenrek@centraldogma>	2011-09-04 19:16:30 +0200
commit	6842f8712508262d0ea27692f13caa686419601e (patch)
tree	8c074ca18dc70c02392fc951f6e8e21db00d7cdb /sys/src/cmd/mothra/html.h
parent	f6e73a6a22db00925f2f447815287bf2a086054b (diff)