diff options
author | cinap_lenrek <cinap_lenrek@centraldogma> | 2011-09-04 19:16:30 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@centraldogma> | 2011-09-04 19:16:30 +0200 |
commit | 6842f8712508262d0ea27692f13caa686419601e (patch) | |
tree | 8c074ca18dc70c02392fc951f6e8e21db00d7cdb /sys/src/cmd/mothra/html.h | |
parent | f6e73a6a22db00925f2f447815287bf2a086054b (diff) |
add mothra
Diffstat (limited to 'sys/src/cmd/mothra/html.h')
-rw-r--r-- | sys/src/cmd/mothra/html.h | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/sys/src/cmd/mothra/html.h b/sys/src/cmd/mothra/html.h new file mode 100644 index 000000000..f5462ceea --- /dev/null +++ b/sys/src/cmd/mothra/html.h @@ -0,0 +1,201 @@ +/* + * Parameters + */ +#define NSTACK 100 /* html grammar is not recursive, so 30 or so should do */ +#define NHBUF 8192 /* Input buffer size */ +#define NPEEKC 3 /* Maximum lookahead */ +#define NTOKEN 1024 /* Maximum token length */ +#define NATTR 512 /* Maximum number of attributes of a tag */ +typedef struct Pair Pair; +typedef struct Tag Tag; +typedef struct Stack Stack; +typedef struct Hglob Hglob; +typedef struct Form Form; +typedef struct Entity Entity; +struct Pair{ + char *name; + char *value; +}; +struct Entity{ + char *name; + Rune value; +}; +struct Tag{ + char *name; + int action; +}; +struct Stack{ + int tag; /* html tag being processed */ + int pre; /* in preformatted text? */ + int font; /* typeface */ + int size; /* point size of text */ + int margin; /* left margin position */ + int indent; /* extra indent at paragraph start */ + int number; /* paragraph number */ + int ismap; /* flag of <img> */ + int width; /* size of image */ + int height; + int table; /* depth of table nesting */ + char image[NNAME]; /* arg of <img> */ + char link[NNAME]; /* arg of <a href=...> */ + char name[NNAME]; /* arg of <a name=...> */ +}; + +/* + * Globals -- these are packed up into a struct that gets passed around + * so that multiple parsers can run concurrently + */ +struct Hglob{ + char *tp; /* pointer in text buffer */ + char *name; /* input file name */ + int hfd; /* input file descriptor */ + char hbuf[NHBUF]; /* input buffer */ + char *hbufp; /* next character in buffer */ + char *ehbuf; /* end of good characters in buffer */ + int heof; /* end of file flag */ + int peekc[NPEEKC]; /* characters to re-read */ + int npeekc; /* # of characters to re-read */ + char token[NTOKEN]; /* if token type is TEXT */ + Pair attr[NATTR]; /* tag attribute/value pairs */ + int nsp; /* # of white-space characters before TEXT token */ + int spacc; /* place to accumulate more spaces */ + /* if negative, won't accumulate! */ + int tag; /* if token type is TAG or END */ + Stack stack[NSTACK]; /* parse stack */ + Stack *state; /* parse stack pointer */ + int lineno; /* input line number */ + int linebrk; /* flag set if we require a line-break in output */ + int para; /* flag set if we need an indent at the break */ + char *text; /* text buffer */ + char *etext; /* end of text buffer */ + Form *form; /* data for form under construction */ + Www *dst; /* where the text goes */ + char charset[NNAME]; +}; + +/* + * Token types + */ +enum{ + TAG=1, + ENDTAG, + TEXT, +}; + +/* + * Magic characters corresponding to + * literal < followed by / ! or alpha, + * literal > and + * end of file + */ +#define STAG 65536 +#define ETAG 65537 +#define EOF -1 + +/* + * fonts + */ +enum{ + ROMAN, + ITALIC, + BOLD, + CWIDTH, +}; + +/* + * font sizes + */ +enum{ + SMALL, + NORMAL, + LARGE, + ENORMOUS, +}; + +/* + * Token names for the html parser. + * Tag_end corresponds to </end> tags. + * Tag_text tags text not in a tag. + * Those two must follow the others. + */ +enum{ + Tag_comment, + Tag_a, + Tag_address, + Tag_b, + Tag_base, + Tag_blockquot, + Tag_body, + Tag_br, + Tag_center, + Tag_cite, + Tag_code, + Tag_dd, + Tag_dfn, + Tag_dir, + Tag_dl, + Tag_dt, + Tag_em, + Tag_font, + Tag_form, + Tag_h1, + Tag_h2, + Tag_h3, + Tag_h4, + Tag_h5, + Tag_h6, + Tag_head, + Tag_hr, + Tag_html, + Tag_i, + Tag_img, + Tag_input, + Tag_isindex, + Tag_kbd, + Tag_key, + Tag_li, + Tag_link, + Tag_listing, + Tag_menu, + Tag_meta, + Tag_nextid, + Tag_ol, + Tag_option, + Tag_p, + Tag_plaintext, + Tag_pre, + Tag_samp, + Tag_select, + Tag_strong, + Tag_textarea, + Tag_title, + Tag_tt, + Tag_u, + Tag_ul, + Tag_var, + Tag_xmp, + Tag_frame, /* rm 5.8.97 */ + Tag_table, /* rm 3.8.00 */ + Tag_td, + Tag_tr, + Tag_script, + Tag_style, + Tag_end, /* also used to indicate unrecognized start tag */ + + Tag_text, +}; +enum{ + NTAG=Tag_end, + END=1, /* tag must have a matching end tag */ + NOEND, /* tag must not have a matching end tag */ + OPTEND, /* tag may have a matching end tag */ + ERR, /* tag must not occur */ +}; +Tag tag[]; +Entity pl_entity[]; +int pl_entities; +void rdform(Hglob *); +void endform(Hglob *); +char *pl_getattr(Pair *, char *); +int pl_hasattr(Pair *, char *); +void pl_htmloutput(Hglob *, int, char *, Field *); |