634 lines
15 KiB
C
Executable file
634 lines
15 KiB
C
Executable file
#pragma lib "libhtml.a"
|
|
#pragma src "/sys/src/libhtml"
|
|
|
|
/* UTILS */
|
|
extern uchar* fromStr(Rune* buf, int n, int chset);
|
|
extern Rune* toStr(uchar* buf, int n, int chset);
|
|
|
|
/* Common LEX and BUILD enums */
|
|
|
|
/* Media types */
|
|
enum
|
|
{
|
|
ApplMsword,
|
|
ApplOctets,
|
|
ApplPdf,
|
|
ApplPostscript,
|
|
ApplRtf,
|
|
ApplFramemaker,
|
|
ApplMsexcel,
|
|
ApplMspowerpoint,
|
|
UnknownType,
|
|
Audio32kadpcm,
|
|
AudioBasic,
|
|
ImageCgm,
|
|
ImageG3fax,
|
|
ImageGif,
|
|
ImageIef,
|
|
ImageJpeg,
|
|
ImagePng,
|
|
ImageTiff,
|
|
ImageXBit,
|
|
ImageXBit2,
|
|
ImageXBitmulti,
|
|
ImageXXBitmap,
|
|
ModelVrml,
|
|
MultiDigest,
|
|
MultiMixed,
|
|
TextCss,
|
|
TextEnriched,
|
|
TextHtml,
|
|
TextJavascript,
|
|
TextPlain,
|
|
TextRichtext,
|
|
TextSgml,
|
|
TextTabSeparatedValues,
|
|
TextXml,
|
|
VideoMpeg,
|
|
VideoQuicktime,
|
|
NMEDIATYPES
|
|
};
|
|
|
|
/* HTTP methods */
|
|
enum
|
|
{
|
|
HGet,
|
|
HPost
|
|
};
|
|
|
|
/* Charsets */
|
|
enum
|
|
{
|
|
UnknownCharset,
|
|
US_Ascii,
|
|
ISO_8859_1,
|
|
UTF_8,
|
|
Unicode,
|
|
NCHARSETS
|
|
};
|
|
|
|
/* Frame Target IDs */
|
|
enum {
|
|
FTtop,
|
|
FTself,
|
|
FTparent,
|
|
FTblank
|
|
};
|
|
|
|
/* LEX */
|
|
typedef struct Token Token;
|
|
typedef struct Attr Attr;
|
|
|
|
#pragma incomplete Token
|
|
|
|
/* BUILD */
|
|
|
|
typedef struct Item Item;
|
|
typedef struct Itext Itext;
|
|
typedef struct Irule Irule;
|
|
typedef struct Iimage Iimage;
|
|
typedef struct Iformfield Iformfield;
|
|
typedef struct Itable Itable;
|
|
typedef struct Ifloat Ifloat;
|
|
typedef struct Ispacer Ispacer;
|
|
typedef struct Genattr Genattr;
|
|
typedef struct SEvent SEvent;
|
|
typedef struct Formfield Formfield;
|
|
typedef struct Option Option;
|
|
typedef struct Form Form;
|
|
typedef struct Table Table;
|
|
typedef struct Tablecol Tablecol;
|
|
typedef struct Tablerow Tablerow;
|
|
typedef struct Tablecell Tablecell;
|
|
typedef struct Align Align;
|
|
typedef struct Dimen Dimen;
|
|
typedef struct Anchor Anchor;
|
|
typedef struct DestAnchor DestAnchor;
|
|
typedef struct Map Map;
|
|
typedef struct Area Area;
|
|
typedef struct Background Background;
|
|
typedef struct Kidinfo Kidinfo;
|
|
typedef struct Docinfo Docinfo;
|
|
typedef struct Stack Stack;
|
|
typedef struct Pstate Pstate;
|
|
typedef struct ItemSource ItemSource;
|
|
typedef struct Lay Lay; /* defined in Layout module */
|
|
|
|
#pragma incomplete Lay
|
|
|
|
|
|
/* Alignment types */
|
|
enum {
|
|
ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
|
|
ALchar, ALtop, ALmiddle, ALbottom, ALbaseline,
|
|
};
|
|
|
|
struct Align
|
|
{
|
|
uchar halign; /* one of ALnone, ALleft, etc. */
|
|
uchar valign; /* one of ALnone, ALtop, etc. */
|
|
};
|
|
|
|
/*
|
|
* A Dimen holds a dimension specification, especially for those
|
|
* cases when a number can be followed by a % or a * to indicate
|
|
* percentage of total or relative weight.
|
|
* Dnone means no dimension was specified
|
|
*/
|
|
|
|
/* To fit in a word, use top bits to identify kind, rest for value */
|
|
enum {
|
|
Dnone = 0,
|
|
Dpixels = (1<<29),
|
|
Dpercent = (2<<29),
|
|
Drelative = (3<<29),
|
|
Dkindmask = (3<<29),
|
|
Dspecmask = (~Dkindmask)
|
|
};
|
|
|
|
struct Dimen
|
|
{
|
|
int kindspec; /* kind | spec */
|
|
};
|
|
|
|
/*
|
|
* Background is either an image or a color.
|
|
* If both are set, the image has precedence.
|
|
*/
|
|
struct Background
|
|
{
|
|
Rune* image; /* url */
|
|
int color;
|
|
};
|
|
|
|
|
|
/*
|
|
* There are about a half dozen Item variants.
|
|
* The all look like this at the start (using Plan 9 C's
|
|
* anonymous structure member mechanism),
|
|
* and then the tag field dictates what extra fields there are.
|
|
*/
|
|
struct Item
|
|
{
|
|
Item* next; /* successor in list of items */
|
|
int width; /* width in pixels (0 for floating items) */
|
|
int height; /* height in pixels */
|
|
int ascent; /* ascent (from top to baseline) in pixels */
|
|
int anchorid; /* if nonzero, which anchor we're in */
|
|
int state; /* flags and values (see below) */
|
|
Genattr*genattr; /* generic attributes and events */
|
|
int tag; /* variant discriminator: Itexttag, etc. */
|
|
};
|
|
|
|
/* Item variant tags */
|
|
enum {
|
|
Itexttag,
|
|
Iruletag,
|
|
Iimagetag,
|
|
Iformfieldtag,
|
|
Itabletag,
|
|
Ifloattag,
|
|
Ispacertag
|
|
};
|
|
|
|
struct Itext
|
|
{
|
|
Item; /* (with tag ==Itexttag) */
|
|
Rune* s; /* the characters */
|
|
int fnt; /* style*NumSize+size (see font stuff, below) */
|
|
int fg; /* Pixel (color) for text */
|
|
uchar voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
|
|
uchar ul; /* ULnone, ULunder, or ULmid */
|
|
};
|
|
|
|
struct Irule
|
|
{
|
|
Item; /* (with tag ==Iruletag) */
|
|
uchar align; /* alignment spec */
|
|
uchar noshade; /* if true, don't shade */
|
|
int size; /* size attr (rule height) */
|
|
int color; /* color attr */
|
|
Dimen wspec; /* width spec */
|
|
};
|
|
|
|
|
|
struct Iimage
|
|
{
|
|
Item; /* (with tag ==Iimagetag) */
|
|
Rune* imsrc; /* image src url */
|
|
int imwidth; /* spec width (actual, if no spec) */
|
|
int imheight; /* spec height (actual, if no spec) */
|
|
Rune* altrep; /* alternate representation, in absence of image */
|
|
Map* map; /* if non-nil, client side map */
|
|
int ctlid; /* if animated */
|
|
uchar align; /* vertical alignment */
|
|
uchar hspace; /* in pixels; buffer space on each side */
|
|
uchar vspace; /* in pixels; buffer space on top and bottom */
|
|
uchar border; /* in pixels: border width to draw around image */
|
|
Iimage* nextimage; /* next in list of document's images */
|
|
void* aux;
|
|
};
|
|
|
|
|
|
struct Iformfield
|
|
{
|
|
Item; /* (with tag ==Iformfieldtag) */
|
|
Formfield*formfield;
|
|
void* aux;
|
|
};
|
|
|
|
|
|
struct Itable
|
|
{
|
|
Item; /* (with tag ==Itabletag) */
|
|
Table* table;
|
|
};
|
|
|
|
|
|
struct Ifloat
|
|
{
|
|
Item; /* (with tag ==Ifloattag) */
|
|
Item* item; /* table or image item that floats */
|
|
int x; /* x coord of top (from right, if ALright) */
|
|
int y; /* y coord of top */
|
|
uchar side; /* margin it floats to: ALleft or ALright */
|
|
uchar infloats; /* true if this has been added to a lay.floats */
|
|
Ifloat* nextfloat; /* in list of floats */
|
|
};
|
|
|
|
|
|
struct Ispacer
|
|
{
|
|
Item; /* (with tag ==Ispacertag) */
|
|
int spkind; /* ISPnull, etc. */
|
|
};
|
|
|
|
/* Item state flags and value fields */
|
|
enum {
|
|
IFbrk = 0x80000000, /* forced break before this item */
|
|
IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */
|
|
IFnobrk = 0x20000000, /* break not allowed before this item */
|
|
IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */
|
|
IFcright= 0x08000000, /* clear right floats (IFbrk set too) */
|
|
IFwrap = 0x04000000, /* in a wrapping (non-pre) line */
|
|
IFhang = 0x02000000, /* in a hanging (into left indent) item */
|
|
IFrjust = 0x01000000, /* right justify current line */
|
|
IFcjust = 0x00800000, /* center justify current line */
|
|
IFsmap = 0x00400000, /* image is server-side map */
|
|
IFindentshift = 8,
|
|
IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */
|
|
IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */
|
|
};
|
|
|
|
/* Bias added to Itext's voff field */
|
|
enum { Voffbias = 128 };
|
|
|
|
/* Spacer kinds */
|
|
enum {
|
|
ISPnull, /* 0 height and width */
|
|
ISPvline, /* height and ascent of current font */
|
|
ISPhspace, /* width of space in current font */
|
|
ISPgeneral /* other purposes (e.g., between markers and list) */
|
|
};
|
|
|
|
/* Generic attributes and events (not many elements will have any of these set) */
|
|
struct Genattr
|
|
{
|
|
Rune* id;
|
|
Rune* class;
|
|
Rune* style;
|
|
Rune* title;
|
|
SEvent* events;
|
|
};
|
|
|
|
struct SEvent
|
|
{
|
|
SEvent* next; /* in list of events */
|
|
int type; /* SEonblur, etc. */
|
|
Rune* script;
|
|
};
|
|
|
|
enum {
|
|
SEonblur, SEonchange, SEonclick, SEondblclick,
|
|
SEonfocus, SEonkeypress, SEonkeyup, SEonload,
|
|
SEonmousedown, SEonmousemove, SEonmouseout,
|
|
SEonmouseover, SEonmouseup, SEonreset, SEonselect,
|
|
SEonsubmit, SEonunload,
|
|
Numscriptev
|
|
};
|
|
|
|
/* Form field types */
|
|
enum {
|
|
Ftext,
|
|
Fpassword,
|
|
Fcheckbox,
|
|
Fradio,
|
|
Fsubmit,
|
|
Fhidden,
|
|
Fimage,
|
|
Freset,
|
|
Ffile,
|
|
Fbutton,
|
|
Fselect,
|
|
Ftextarea
|
|
};
|
|
|
|
/* Information about a field in a form */
|
|
struct Formfield
|
|
{
|
|
Formfield*next; /* in list of fields for a form */
|
|
int ftype; /* Ftext, Fpassword, etc. */
|
|
int fieldid; /* serial no. of field within its form */
|
|
Form* form; /* containing form */
|
|
Rune* name; /* name attr */
|
|
Rune* value; /* value attr */
|
|
int size; /* size attr */
|
|
int maxlength; /* maxlength attr */
|
|
int rows; /* rows attr */
|
|
int cols; /* cols attr */
|
|
uchar flags; /* FFchecked, etc. */
|
|
Option* options; /* for Fselect fields */
|
|
Item* image; /* image item, for Fimage fields */
|
|
int ctlid; /* identifies control for this field in layout */
|
|
SEvent* events; /* same as genattr->events of containing item */
|
|
};
|
|
|
|
enum {
|
|
FFchecked = (1<<7),
|
|
FFmultiple = (1<<6)
|
|
};
|
|
|
|
/* Option holds info about an option in a "select" form field */
|
|
struct Option
|
|
{
|
|
Option* next; /* next in list of options for a field */
|
|
int selected; /* true if selected initially */
|
|
Rune* value; /* value attr */
|
|
Rune* display; /* display string */
|
|
};
|
|
|
|
/* Form holds info about a form */
|
|
struct Form
|
|
{
|
|
Form* next; /* in list of forms for document */
|
|
int formid; /* serial no. of form within its doc */
|
|
Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */
|
|
Rune* action; /* action attr */
|
|
int target; /* target attr as targetid */
|
|
int method; /* HGet or HPost */
|
|
int nfields; /* number of fields */
|
|
Formfield*fields; /* field's forms, in input order */
|
|
};
|
|
|
|
/* Flags used in various table structures */
|
|
enum {
|
|
TFparsing = (1<<7),
|
|
TFnowrap = (1<<6),
|
|
TFisth = (1<<5)
|
|
};
|
|
|
|
|
|
/* Information about a table */
|
|
struct Table
|
|
{
|
|
Table* next; /* next in list of document's tables */
|
|
int tableid; /* serial no. of table within its doc */
|
|
Tablerow*rows; /* array of row specs (list during parsing) */
|
|
int nrow; /* total number of rows */
|
|
Tablecol*cols; /* array of column specs */
|
|
int ncol; /* total number of columns */
|
|
Tablecell*cells; /* list of unique cells */
|
|
int ncell; /* total number of cells */
|
|
Tablecell***grid; /* 2-D array of cells */
|
|
Align align; /* alignment spec for whole table */
|
|
Dimen width; /* width spec for whole table */
|
|
int border; /* border attr */
|
|
int cellspacing; /* cellspacing attr */
|
|
int cellpadding; /* cellpadding attr */
|
|
Background background; /* table background */
|
|
Item* caption; /* linked list of Items, giving caption */
|
|
uchar caption_place; /* ALtop or ALbottom */
|
|
Lay* caption_lay; /* layout of caption */
|
|
int totw; /* total width */
|
|
int toth; /* total height */
|
|
int caph; /* caption height */
|
|
int availw; /* used for previous 3 sizes */
|
|
Token* tabletok; /* token that started the table */
|
|
uchar flags; /* Lchanged, perhaps */
|
|
};
|
|
|
|
|
|
struct Tablecol
|
|
{
|
|
int width;
|
|
Align align;
|
|
Point pos;
|
|
};
|
|
|
|
|
|
struct Tablerow
|
|
{
|
|
Tablerow*next; /* Next in list of rows, during parsing */
|
|
Tablecell*cells; /* Cells in row, linked through nextinrow */
|
|
int height;
|
|
int ascent;
|
|
Align align;
|
|
Background background;
|
|
Point pos;
|
|
uchar flags; /* 0 or TFparsing */
|
|
};
|
|
|
|
/*
|
|
* A Tablecell is one cell of a table.
|
|
* It may span multiple rows and multiple columns.
|
|
* Cells are linked on two lists: the list for all the cells of
|
|
* a document (the next pointers), and the list of all the
|
|
* cells that start in a given row (the nextinrow pointers)
|
|
*/
|
|
struct Tablecell
|
|
{
|
|
Tablecell*next; /* next in list of table's cells */
|
|
Tablecell*nextinrow; /* next in list of row's cells */
|
|
int cellid; /* serial no. of cell within table */
|
|
Item* content; /* contents before layout */
|
|
Lay* lay; /* layout of cell */
|
|
int rowspan; /* number of rows spanned by this cell */
|
|
int colspan; /* number of cols spanned by this cell */
|
|
Align align; /* alignment spec */
|
|
uchar flags; /* TFparsing, TFnowrap, TFisth */
|
|
Dimen wspec; /* suggested width */
|
|
int hspec; /* suggested height */
|
|
Background background; /* cell background */
|
|
int minw; /* minimum possible width */
|
|
int maxw; /* maximum width */
|
|
int ascent; /* cell's ascent */
|
|
int row; /* row of upper left corner */
|
|
int col; /* col of upper left corner */
|
|
Point pos; /* nw corner of cell contents, in cell */
|
|
};
|
|
|
|
/* Anchor is for info about hyperlinks that go somewhere */
|
|
struct Anchor
|
|
{
|
|
Anchor* next; /* next in list of document's anchors */
|
|
int index; /* serial no. of anchor within its doc */
|
|
Rune* name; /* name attr */
|
|
Rune* href; /* href attr */
|
|
int target; /* target attr as targetid */
|
|
};
|
|
|
|
|
|
/* DestAnchor is for info about hyperlinks that are destinations */
|
|
struct DestAnchor
|
|
{
|
|
DestAnchor*next; /* next in list of document's destanchors */
|
|
int index; /* serial no. of anchor within its doc */
|
|
Rune* name; /* name attr */
|
|
Item* item; /* the destination */
|
|
};
|
|
|
|
|
|
/* Maps (client side) */
|
|
struct Map
|
|
{
|
|
Map* next; /* next in list of document's maps */
|
|
Rune* name; /* map name */
|
|
Area* areas; /* list of map areas */
|
|
};
|
|
|
|
|
|
struct Area
|
|
{
|
|
Area* next; /* next in list of a map's areas */
|
|
int shape; /* SHrect, etc. */
|
|
Rune* href; /* associated hypertext link */
|
|
int target; /* associated target frame */
|
|
Dimen* coords; /* array of coords for shape */
|
|
int ncoords; /* size of coords array */
|
|
};
|
|
|
|
/* Area shapes */
|
|
enum {
|
|
SHrect, SHcircle, SHpoly
|
|
};
|
|
|
|
/* Fonts are represented by integers: style*NumSize + size */
|
|
|
|
/* Font styles */
|
|
enum {
|
|
FntR, /* roman */
|
|
FntI, /* italic */
|
|
FntB, /* bold */
|
|
FntT, /* typewriter */
|
|
NumStyle
|
|
};
|
|
|
|
/* Font sizes */
|
|
enum {
|
|
Tiny,
|
|
Small,
|
|
Normal,
|
|
Large,
|
|
Verylarge,
|
|
NumSize
|
|
};
|
|
|
|
enum {
|
|
NumFnt = NumStyle*NumSize,
|
|
DefFnt = FntR*NumSize+Normal,
|
|
};
|
|
|
|
/* Lines are needed through some text items, for underlining or strikethrough */
|
|
enum {
|
|
ULnone, ULunder, ULmid
|
|
};
|
|
|
|
/* Kidinfo flags */
|
|
enum {
|
|
FRnoresize = (1<<0),
|
|
FRnoscroll = (1<<1),
|
|
FRhscroll = (1<<2),
|
|
FRvscroll = (1<<3),
|
|
FRhscrollauto = (1<<4),
|
|
FRvscrollauto = (1<<5)
|
|
};
|
|
|
|
/* Information about child frame or frameset */
|
|
struct Kidinfo
|
|
{
|
|
Kidinfo*next; /* in list of kidinfos for a frameset */
|
|
int isframeset;
|
|
|
|
/* fields for "frame" */
|
|
Rune* src; /* only nil if a "dummy" frame or this is frameset */
|
|
Rune* name; /* always non-empty if this isn't frameset */
|
|
int marginw;
|
|
int marginh;
|
|
int framebd;
|
|
int flags;
|
|
|
|
/* fields for "frameset" */
|
|
Dimen* rows; /* array of row dimensions */
|
|
int nrows; /* length of rows */
|
|
Dimen* cols; /* array of col dimensions */
|
|
int ncols; /* length of cols */
|
|
Kidinfo*kidinfos;
|
|
Kidinfo*nextframeset; /* parsing stack */
|
|
};
|
|
|
|
|
|
/* Document info (global information about HTML page) */
|
|
struct Docinfo
|
|
{
|
|
/* stuff from HTTP headers, doc head, and body tag */
|
|
Rune* src; /* original source of doc */
|
|
Rune* base; /* base URL of doc */
|
|
Rune* doctitle; /* from <title> element */
|
|
Background background; /* background specification */
|
|
Iimage* backgrounditem; /* Image Item for doc background image, or nil */
|
|
int text; /* doc foreground (text) color */
|
|
int link; /* unvisited hyperlink color */
|
|
int vlink; /* visited hyperlink color */
|
|
int alink; /* highlighting hyperlink color */
|
|
int target; /* target frame default */
|
|
int chset; /* ISO_8859, etc. */
|
|
int mediatype; /* TextHtml, etc. */
|
|
int scripttype; /* TextJavascript, etc. */
|
|
int hasscripts; /* true if scripts used */
|
|
Rune* refresh; /* content of <http-equiv=Refresh ...> */
|
|
Kidinfo*kidinfo; /* if a frameset */
|
|
int frameid; /* id of document frame */
|
|
|
|
/* info needed to respond to user actions */
|
|
Anchor* anchors; /* list of href anchors */
|
|
DestAnchor*dests; /* list of destination anchors */
|
|
Form* forms; /* list of forms */
|
|
Table* tables; /* list of tables */
|
|
Map* maps; /* list of maps */
|
|
Iimage* images; /* list of image items (through nextimage links) */
|
|
};
|
|
|
|
extern int dimenkind(Dimen d);
|
|
extern int dimenspec(Dimen d);
|
|
extern void freedocinfo(Docinfo* d);
|
|
extern void freeitems(Item* ithead);
|
|
extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
|
|
extern void printitems(Item* items, char* msg);
|
|
extern int targetid(Rune* s);
|
|
extern Rune* targetname(int targid);
|
|
extern int validitems(Item* i);
|
|
|
|
#pragma varargck type "I" Item*
|
|
|
|
/* Control print output */
|
|
extern int warn;
|
|
extern int dbglex;
|
|
extern int dbgbuild;
|
|
|
|
/*
|
|
* To be provided by caller
|
|
* emalloc and erealloc should not return if can't get memory.
|
|
* emalloc should zero its memory.
|
|
*/
|
|
extern void* emalloc(ulong);
|
|
extern void* erealloc(void* p, ulong size);
|