plan9fox/sys/src/cmd/mothra/html.h
2012-03-18 15:02:23 +01:00

210 lines
4 KiB
C

/*
* Parameters
*/
#define NSTACK 100 /* html grammar is not recursive, so 30 or so should do */
#define NHBUF 8192 /* Input buffer size */
#define NPEEKC 3 /* Maximum lookahead */
#define NTOKEN 1024 /* Maximum token length */
#define NATTR 512 /* Maximum number of attributes of a tag */
typedef struct Pair Pair;
typedef struct Tag Tag;
typedef struct Stack Stack;
typedef struct Hglob Hglob;
typedef struct Form Form;
typedef struct Entity Entity;
struct Pair{
char *name;
char *value;
};
struct Entity{
char *name;
Rune value;
};
struct Tag{
char *name;
int action;
};
struct Stack{
int tag; /* html tag being processed */
int pre; /* in preformatted text? */
int font; /* typeface */
int size; /* point size of text */
int margin; /* left margin position */
int indent; /* extra indent at paragraph start */
int number; /* paragraph number */
int ismap; /* flag of <img> */
int width; /* size of image */
int height;
char image[NNAME]; /* arg of <img> */
char link[NNAME]; /* arg of <a href=...> */
char name[NNAME]; /* arg of <a name=...> */
};
/*
* Globals -- these are packed up into a struct that gets passed around
* so that multiple parsers can run concurrently
*/
struct Hglob{
char *tp; /* pointer in text buffer */
char *name; /* input file name */
int hfd; /* input file descriptor */
char hbuf[NHBUF]; /* input buffer */
char *hbufp; /* next character in buffer */
char *ehbuf; /* end of good characters in buffer */
int heof; /* end of file flag */
int peekc[NPEEKC]; /* characters to re-read */
int npeekc; /* # of characters to re-read */
char token[NTOKEN]; /* if token type is TEXT */
Pair attr[NATTR]; /* tag attribute/value pairs */
int nsp; /* # of white-space characters before TEXT token */
int spacc; /* place to accumulate more spaces */
/* if negative, won't accumulate! */
int tag; /* if token type is TAG or END */
Stack stack[NSTACK]; /* parse stack */
Stack *state; /* parse stack pointer */
int lineno; /* input line number */
int linebrk; /* flag set if we require a line-break in output */
int para; /* flag set if we need an indent at the break */
char *text; /* text buffer */
char *etext; /* end of text buffer */
Form *form; /* data for form under construction */
Www *dst; /* where the text goes */
};
/*
* Token types
*/
enum{
TAG=1,
ENDTAG,
TEXT,
};
/*
* Magic characters corresponding to
* literal < followed by / ! or alpha,
* literal > and
* end of file
*/
#define STAG 65536
#define ETAG 65537
#define EOF -1
/*
* fonts
*/
enum{
ROMAN,
ITALIC,
BOLD,
CWIDTH,
};
/*
* font sizes
*/
enum{
SMALL,
NORMAL,
LARGE,
ENORMOUS,
};
/*
* Token names for the html parser.
* Tag_end corresponds to </end> tags.
* Tag_text tags text not in a tag.
* Those two must follow the others.
*/
enum{
Tag_comment,
Tag_a,
Tag_abbr,
Tag_acronym,
Tag_address,
Tag_applet,
Tag_audio,
Tag_b,
Tag_base,
Tag_blockquot,
Tag_body,
Tag_br,
Tag_button,
Tag_center,
Tag_cite,
Tag_code,
Tag_dd,
Tag_div,
Tag_dfn,
Tag_dir,
Tag_dl,
Tag_dt,
Tag_em,
Tag_embed,
Tag_font,
Tag_form,
Tag_frame, /* rm 5.8.97 */
Tag_h1,
Tag_h2,
Tag_h3,
Tag_h4,
Tag_h5,
Tag_h6,
Tag_head,
Tag_hr,
Tag_html,
Tag_i,
Tag_iframe,
Tag_img,
Tag_input,
Tag_isindex,
Tag_kbd,
Tag_key,
Tag_li,
Tag_link,
Tag_listing,
Tag_menu,
Tag_meta,
Tag_nextid,
Tag_object,
Tag_ol,
Tag_option,
Tag_p,
Tag_plaintext,
Tag_pre,
Tag_samp,
Tag_script,
Tag_select,
Tag_strong,
Tag_style,
Tag_source,
Tag_table, /* rm 3.8.00 */
Tag_td,
Tag_textarea,
Tag_title,
Tag_tr,
Tag_tt,
Tag_u,
Tag_ul,
Tag_var,
Tag_video,
Tag_xmp,
Tag_end, /* also used to indicate unrecognized start tag */
Tag_text,
};
enum{
NTAG=Tag_end,
END=1, /* tag must have a matching end tag */
NOEND, /* tag must not have a matching end tag */
OPTEND, /* tag may have a matching end tag */
ERR, /* tag must not occur */
};
Tag tag[];
void rdform(Hglob *);
void endform(Hglob *);
char *pl_getattr(Pair *, char *);
int pl_hasattr(Pair *, char *);
void pl_htmloutput(Hglob *, int, char *, Field *);