#ifndef HTMLTIDY_HTML_H #define HTMLTIDY_HTML_H /* htmltidy.h (c) 1998-2000 (W3C) MIT, INRIA, Keio University (c) 2001 eGenix.com Software GmbH, Langenfeld See tidy.c for the copyright notice. */ /* Include platform specific work-arounds and additions */ #include "platform.h" #ifdef __cplusplus extern "C" { #endif /* indentation modes */ #define NO_INDENT 0 #define BLOCK_INDENT 1 #define SMART_INDENT 2 /* character encodings */ #define RAW 0 #define ASCII 1 #define LATIN1 2 #define UTF8 3 #define ISO2022 4 #define MACROMAN 5 /* Output streams */ typedef struct { int encoding; int state; /* for ISO 2022 */ FILE *fp; unsigned char *data; int datasize; int datapos; int CharEncoding; } OutputStream; void outc(unsigned int c, OutputStream *out); DL_EXPORT(OutputStream) *OutputStreamFromFile(FILE *fp, int CharEncoding); DL_EXPORT(OutputStream) *OutputStreamFromBuffer(char *data, int datasize, int datapos, int CharEncoding); DL_EXPORT(void) FreeOutputStream(OutputStream *out); /* states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch character sets. The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" + "$" + ? and "ESC" + "$" + "(" + ? for multibyte character sets */ #define FSM_ASCII 0 #define FSM_ESC 1 #define FSM_ESCD 2 #define FSM_ESCDP 3 #define FSM_ESCP 4 #define FSM_NONASCII 5 /* lexer char types */ #define digit 1 #define letter 2 #define namechar 4 #define white 8 #define newline 16 #define lowercase 32 #define uppercase 64 /* lexer GetToken states */ #define LEX_CONTENT 0 #define LEX_GT 1 #define LEX_ENDTAG 2 #define LEX_STARTTAG 3 #define LEX_COMMENT 4 #define LEX_DOCTYPE 5 #define LEX_PROCINSTR 6 #define LEX_ENDCOMMENT 7 #define LEX_CDATA 8 #define LEX_SECTION 9 #define LEX_ASP 10 #define LEX_JSTE 11 #define LEX_PHP 12 /* content model shortcut encoding */ #define CM_UNKNOWN 0 #define CM_EMPTY (1 << 0) #define CM_HTML (1 << 1) #define CM_HEAD (1 << 2) #define CM_BLOCK (1 << 3) #define CM_INLINE (1 << 4) #define CM_LIST (1 << 5) #define CM_DEFLIST (1 << 6) #define CM_TABLE (1 << 7) #define CM_ROWGRP (1 << 8) #define CM_ROW (1 << 9) #define CM_FIELD (1 << 10) #define CM_OBJECT (1 << 11) #define CM_PARAM (1 << 12) #define CM_FRAMES (1 << 13) #define CM_HEADING (1 << 14) #define CM_OPT (1 << 15) #define CM_IMG (1 << 16) #define CM_MIXED (1 << 17) #define CM_NO_INDENT (1 << 18) #define CM_OBSOLETE (1 << 19) #define CM_NEW (1 << 20) #define CM_OMITST (1 << 21) /* Linked list of class names and styles */ struct _style { char *tag; char *tag_class; char *properties; struct _style *next; }; typedef struct _style Style; /* Linked list of style properties */ struct _styleprop { char *name; char *value; struct _styleprop *next; }; typedef struct _styleprop StyleProp; /* Attribute/Value linked list node */ struct _attval { struct _attval *next; struct _attribute *dict; struct _node *asp; struct _node *php; int delim; char *attribute; char *value; }; typedef struct _attval AttVal; /* node->type is one of these values */ #define RootNode 0 #define DocTypeTag 1 #define CommentTag 2 #define ProcInsTag 3 #define TextNode 4 #define StartTag 5 #define EndTag 6 #define StartEndTag 7 #define CDATATag 8 #define SectionTag 9 #define AspTag 10 #define JsteTag 11 #define PhpTag 12 struct _node { struct _node *parent; struct _node *prev; struct _node *next; struct _node *content; struct _node *last; struct _attval *attributes; char *element; /* name (null for text nodes) */ unsigned int start; /* start of span onto text array */ unsigned int end; /* end of span onto text array */ unsigned int type; /* TextNode, StartTag, EndTag etc. */ Bool closed; /* true if closed by explicit end tag */ Bool implicit; /* true if inferred */ Bool linebreak; /* true if followed by a line break */ struct _tagdict *was; /* old tag when it was changed */ struct _tagdict *tag; /* tag's dictionary definition */ }; typedef struct _node Node; /* If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes then describe it as HTML Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the flavors of Voyager (strict, loose or frameset). */ #define VERS_UNKNOWN 0 #define VERS_HTML20 1 #define VERS_HTML32 2 #define VERS_HTML40_STRICT 4 #define VERS_HTML40_LOOSE 8 #define VERS_FRAMES 16 #define VERS_XML 32 #define VERS_NETSCAPE 64 #define VERS_MICROSOFT 128 #define VERS_SUN 256 #define VERS_MALFORMED 512 #define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_LOOSE (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_IFRAMES (VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_FROM32 (VERS_HTML40_STRICT|VERS_LOOSE) #define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN) #define VERS_EVERYTHING (VERS_ALL|VERS_PROPRIETARY) /* Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inline markup errors such as: italic text

more italic text normal text which for compatibility with Mosaic is mapped to: italic text

more italic text normal text Note that any inline end tag pop's the effect of the current inline start tag, so that pop's in the above example. */ struct _inode { struct _inode *next; struct _tagdict *tag; /* tag's dictionary definition */ char *element; /* name (null for text nodes) */ struct _attval *attributes; }; typedef struct _inode IStack; typedef struct _lexer Lexer; /* tidy.c */ #define EndOfStream EOF /* non-raw input is cleaned up*/ typedef struct { int state; /* FSM for ISO2022 */ Bool pushed; int c; int tabs; int lastcol; int curcol; int curline; int encoding; FILE *fp; unsigned char *data; int datasize; int datapos; int CharEncoding; int tabsize; Lexer *lexer; /* needed for error reporting */ } InputStream; DL_EXPORT(InputStream) *InputStreamFromFile(FILE *fp, int CharEncoding, int tabsize); DL_EXPORT(InputStream) *InputStreamFromBuffer(char *data, int datasize, int datapos, int CharEncoding, int tabsize); DL_EXPORT(void) FreeInputStream(InputStream *in); DL_EXPORT(int) ReadChar(InputStream *in); DL_EXPORT(int) StreamEOF(InputStream *in); DL_EXPORT(void) UngetChar(int c, InputStream *in); /* The following are private to the lexer Use NewLexer() to create a lexer, and FreeLexer(lexer) to free it. */ struct _lexer { /* Streams */ InputStream *in; /* file stream */ OutputStream *errout; /* error output stream */ /* Configuration */ struct _tidyconfig *config; /* Runtime */ unsigned int badAccess; /* for accessibility errors */ unsigned int badLayout; /* for bad style errors */ unsigned int badChars; /* for bad char encodings */ unsigned int badForm; /* for mismatched/mispositioned form tags */ unsigned int warnings; /* count of warnings in this document */ unsigned int errors; /* count of errors */ unsigned int lines; /* lines seen */ unsigned int columns; /* at start of current token */ Bool waswhite; /* used to collapse contiguous white space */ Bool pushed; /* true after token has been pushed back */ Bool insertspace; /* when space is moved after end tag */ Bool excludeBlocks; /* Netscape compatibility */ Bool exiled; /* true if moved out of table */ Bool isvoyager; /* true if xmlns attribute on html element */ unsigned int versions; /* bit vector of HTML versions */ int doctype; /* version as given by doctype (if any) */ Bool bad_doctype; /* e.g. if html or PUBLIC is missing */ unsigned int txtstart; /* start of current node */ unsigned int txtend; /* end of current node */ unsigned int state; /* state of lexer's finite state machine */ struct _node *token; /* lexer character buffer parse tree nodes span onto this buffer which contains the concatenated text contents of all of the elements. lexsize must be reset for each file. */ char *lexbuf; /* char buffer */ unsigned int lexlength; /* allocated */ unsigned int lexsize; /* used */ /* Inline stack for compatibility with Mosaic */ Node *inode; /* for deferring text node */ IStack *insert; /* for inferring inline tags */ IStack *istack; unsigned int istacklength; /* allocated */ unsigned int istacksize; /* used */ unsigned int istackbase; /* start of frame */ Style *styles; /* used for cleaning up presentation markup */ }; typedef void (Parser)(Lexer *lexer, Node *node, unsigned int mode); typedef void (CheckAttribs)(Lexer *lexer, Node *node); /* declaration for methods that check attribute values */ typedef void (AttrCheck)(Lexer *lexer, Node *node, AttVal *attval); struct _attribute { struct _attribute *next; char *name; Bool nowrap; Bool literal; unsigned versions; AttrCheck *attrchk; }; typedef struct _attribute Attribute; /* well known attributes */ extern Attribute *attr_href; extern Attribute *attr_src; extern Attribute *attr_id; extern Attribute *attr_name; extern Attribute *attr_summary; extern Attribute *attr_alt; extern Attribute *attr_longdesc; extern Attribute *attr_title; /* Tag dictionary node */ struct _tagdict { struct _tagdict *next; char *name; unsigned int versions; unsigned int model; Parser *parser; CheckAttribs *chkattrs; }; typedef struct _tagdict Dict; /* modes for GetToken() */ #define IgnoreWhitespace 0 #define MixedContent 1 #define Preformatted 2 #define IgnoreMarkup 3 void FatalError(char *msg); void FileError(OutputStream *out, const char *file); DL_EXPORT(Node) *GetToken(Lexer *lexer, unsigned int mode); /* one level unget only */ DL_EXPORT(void) UngetToken(Lexer *lexer); /* create lexer for a file stream */ DL_EXPORT(Lexer) *NewLexer(struct _tidyconfig *config, InputStream *in, OutputStream *errout); /* delete lexer */ DL_EXPORT(void) FreeLexer(Lexer *lexer); DL_EXPORT(Bool) EndOfInput(Lexer *lexer); /* used for script or style */ DL_EXPORT(Node) *GetCDATA(Lexer *lexer, Node *container); /* use this to create node for inferred start tag */ DL_EXPORT(Node) *InferredTag(Lexer *lexer, char *name); /* used to create line break in preformatted text when cleaning the augean stables (Word2000) */ DL_EXPORT(Node) *NewLineNode(Lexer *lexer); /* Parser calls this to create RootNode */ DL_EXPORT(Node) *NewNode(void); DL_EXPORT(AttVal) *NewAttribute(void); DL_EXPORT(void) FreeAttrs(Node *node); DL_EXPORT(void) FreeAttribute(AttVal *av); DL_EXPORT(void) RemoveAttribute(Node *node, AttVal *attr); /* use this to free parse tree node and all its children */ DL_EXPORT(void) FreeNode(Node *node); /* used to clone heading nodes when split by an


*/ DL_EXPORT(Node) *CloneNode(Lexer *lexer, Node *element); /* lexer char map - must be initialized */ DL_EXPORT(void) InitMap(void); DL_EXPORT(void) AddCharToLexer(Lexer *lexer, unsigned int c); DL_EXPORT(void) AddStringLiteral(Lexer *lexer, char *str); DL_EXPORT(Node) *TextToken(Lexer *lexer); /* used by pretty printer for tag names */ DL_EXPORT(char) FoldCase(Lexer *lexer, char c, Bool tocaps); DL_EXPORT(Bool) IsLetter(unsigned int c); DL_EXPORT(Bool) IsDigit(unsigned int c); DL_EXPORT(Bool) IsWhite(unsigned int c); /* used to fixup doctype to match contents */ DL_EXPORT(Node) *FindDocType(Node *root); DL_EXPORT(Node) *FindHTML(Node *root); DL_EXPORT(Node) *FindHEAD(Node *root); DL_EXPORT(Bool) AddGenerator(Lexer *lexer, Node *root); DL_EXPORT(void) DiscardDocType(Node *root); DL_EXPORT(Bool) FixDocType(Lexer *lexer, Node *node); DL_EXPORT(char) *HTMLVersionName(Lexer *lexer); DL_EXPORT(int) ApparentVersion(Lexer *lexer); DL_EXPORT(Bool) FixXMLPI(Lexer *lexer, Node *root); DL_EXPORT(Bool) SetXHTMLDocType(Lexer *lexer, Node *root); DL_EXPORT(void) FixId(Lexer *lexer, Node *node); DL_EXPORT(Bool) CheckDocTypeKeyWords(Lexer *lexer, Node *doctype); /* used to detect faulty attributes */ DL_EXPORT(Bool) IsValidAttrName( char *attr); /* mode controlling treatment of doctype */ typedef enum { doctype_omit, doctype_auto, doctype_strict, doctype_loose, doctype_user } DocTypeMode; /* Tidy runtime configuration and runtime state */ struct _tidyconfig { /* Configuration */ unsigned int spaces; /* default indentation */ unsigned int wraplen; /* default wrap margin */ int CharEncoding; int tabsize; DocTypeMode doctype_mode; /* see doctype property */ char *alt_text; /* default text for alt attribute */ char *doctype_str; /* user specified doctype */ char *errfile; /* file name to write errors to */ Bool writeback; /* if true then output tidied markup */ Bool Errors; /* if true error output is generated */ Bool Output; /* if true normal output is generated */ Bool ShowWarnings; /* however errors are always shown */ Bool Quiet; /* no 'Parsing X', guessed DTD or summary */ Bool IndentContent; /* indent content of appropriate tags */ Bool SmartIndent; /* does text/block level content effect indentation */ Bool HideEndTags; /* suppress optional end tags */ Bool XmlTags; /* treat input as XML */ Bool XmlOut; /* create output as XML */ Bool xHTML; /* output extensible HTML */ Bool XmlPi; /* add for XML docs */ Bool RawOut; /* avoid mapping values > 127 to entities */ Bool UpperCaseTags; /* output tags in upper not lower case */ Bool UpperCaseAttrs; /* output attributes in upper not lower case */ Bool MakeClean; /* replace presentational clutter by style rules */ Bool LogicalEmphasis; /* replace i by em and b by strong */ Bool DropFontTags; /* discard presentation tags */ Bool DropEmptyParas; /* discard empty p elements */ Bool FixComments; /* fix comments with adjacent hyphens */ Bool BreakBeforeBR; /* o/p newline before
or not? */ Bool NumEntities; /* use numeric entities */ Bool QuoteMarks; /* output " marks as " */ Bool QuoteNbsp; /* output non-breaking space as entity */ Bool QuoteAmpersand; /* output naked ampersand as & */ Bool WrapAttVals; /* wrap within attribute values */ Bool WrapScriptlets; /* wrap within JavaScript string literals */ Bool WrapSection; /* wrap within section tags */ Bool WrapAsp; /* wrap within ASP pseudo elements */ Bool WrapJste; /* wrap within JSTE pseudo elements */ Bool WrapPhp; /* wrap within PHP pseudo elements */ Bool FixBackslash; /* fix URLs by replacing \ with / */ Bool IndentAttributes; /* newline+indent before each attribute */ Bool XmlPIs; /* if set to yes PIs must end with ?> */ Bool XmlSpace; /* if set to yes adds xml:space attr as needed */ Bool EncloseBodyText; /* if yes text at body is wrapped in

's */ Bool EncloseBlockText; /* if yes text in blocks is wrapped in

's */ Bool Word2000; /* draconian cleaning for Word2000 */ Bool TidyMark; /* add meta element indicating tidied doc */ Bool Emacs; /* if true format error output for GNU Emacs */ Bool LiteralAttribs; /* if true attributes may use newlines */ }; typedef struct _tidyconfig tidyconfig; /* config parameters, see config.c for defaults */ DL_EXPORT(tidyconfig) *NewConfig(void); DL_EXPORT(void) InitConfig(tidyconfig *config); DL_EXPORT(void) FreeConfig(tidyconfig *config); DL_EXPORT(void) AdjustConfig(tidyconfig *config); /* parser.c */ DL_EXPORT(Node) *ParseDocument(Lexer *lexer); DL_EXPORT(Node) *ParseXMLDocument(Lexer *lexer); DL_EXPORT(Bool) XMLPreserveWhiteSpace(Node *element); DL_EXPORT(void) CoerceNode(Lexer *lexer, Node *node, Dict *tag); DL_EXPORT(Bool) CheckNodeIntegrity(Node *node); DL_EXPORT(Bool) IsNewNode(Node *node); DL_EXPORT(void) RemoveNode(Node *node); DL_EXPORT(Node) *DiscardElement(Node *element); DL_EXPORT(void) InsertNodeAtStart(Node *element, Node *node); DL_EXPORT(void) InsertNodeAtEnd(Node *element, Node *node); DL_EXPORT(void) InsertNodeBeforeElement(Node *element, Node *node); DL_EXPORT(void) InsertNodeAfterElement(Node *element, Node *node); DL_EXPORT(Bool) IsJavaScript(Node *node); /* attrs.c */ DL_EXPORT(void) InitAttrs(void); DL_EXPORT(void) FreeAttrTable(void); DL_EXPORT(Attribute) *FindAttribute(AttVal *attval); DL_EXPORT(AttVal) *GetAttrByName(Node *node, char *name); DL_EXPORT(void) AddAttribute(Lexer *lexer, Node *node, char *name, char *value); DL_EXPORT(void) CheckUniqueAttributes(Lexer *lexer, Node *node); DL_EXPORT(void) CheckAttributes(Lexer *lexer, Node *node); DL_EXPORT(Attribute) *CheckAttribute(Lexer *lexer, Node *node, AttVal *attval); DL_EXPORT(Bool) IsUrl(char *attrname); DL_EXPORT(Bool) IsScript(char *attrname); DL_EXPORT(void) DeclareLiteralAttrib(char *name); DL_EXPORT(Bool) IsLiteralAttribute(char *attrname); /* istack.c */ DL_EXPORT(void) PushInline(Lexer *lexer, Node *node); DL_EXPORT(void) PopInline(Lexer *lexer, Node *node); DL_EXPORT(Bool) IsPushed(Lexer *lexer, Node *node); DL_EXPORT(int) InlineDup(Lexer *lexer, Node *node); DL_EXPORT(Node) *InsertedToken(Lexer *lexer); DL_EXPORT(AttVal) *DupAttrs(AttVal *attrs); DL_EXPORT(void) DeferDup(Lexer *lexer); DL_EXPORT(void) InsertNode(Node *element, Node *node); /* clean.c */ DL_EXPORT(void) FreeStyles(Lexer *lexer); DL_EXPORT(void) AddClass(Lexer *lexer, Node *node, char *classname); DL_EXPORT(void) CleanTree(Lexer *lexer, Node *node); DL_EXPORT(void) NestedEmphasis(Lexer *lexer, Node *node); DL_EXPORT(void) EmFromI(Lexer *lexer, Node *node); DL_EXPORT(void) CleanWord2000(Lexer *lexer, Node *node); DL_EXPORT(void) DropSections(Lexer *lexer, Node *node); DL_EXPORT(void) List2BQ(Lexer *lexer, Node *node); DL_EXPORT(void) BQ2Div(Lexer *lexer, Node *node); DL_EXPORT(Bool) IsWord2000(Lexer *lexer, Node *root); /* entities.c */ DL_EXPORT(void) InitEntities(void); DL_EXPORT(void) FreeEntities(void); DL_EXPORT(unsigned int) EntityCode(char *name); DL_EXPORT(char) *EntityName(unsigned int n); /* tags.c */ DL_EXPORT(void) DefineEmptyTag(char *name); DL_EXPORT(void) DefineInlineTag(char *name); DL_EXPORT(void) DefineBlockTag(char *name); DL_EXPORT(void) DefinePreTag(char *name); DL_EXPORT(Bool) FindTag(Lexer *lexer, Node *node); DL_EXPORT(void) InitTags(void); DL_EXPORT(void) FreeTags(void); DL_EXPORT(Parser) *FindParser(Node *node); DL_EXPORT(int) HTMLVersion(Lexer *lexer); /* localize.c -- used for all message text */ DL_EXPORT(void) ShowVersion(OutputStream *out); DL_EXPORT(void) ReadingFromStdin(void); DL_EXPORT(void) ReportUnknownOption(char *option); DL_EXPORT(void) ReportBadArgument(char *option); DL_EXPORT(void) NeedsAuthorIntervention(OutputStream *errout); DL_EXPORT(void) MissingBody(OutputStream *errout); DL_EXPORT(void) GeneralInfo(OutputStream *errout); DL_EXPORT(void) HelloMessage(OutputStream *errout, char *date, char *filename); DL_EXPORT(void) ReportVersion(OutputStream *errout, Lexer *lexer, char *filename, Node *doctype); DL_EXPORT(void) ReportNumWarnings(OutputStream *errout, Lexer *lexer); /* pprint.c */ DL_EXPORT(unsigned int) GetUTF8(unsigned char *str, unsigned int *ch); DL_EXPORT(char) *PutUTF8(char *buf, unsigned int c); DL_EXPORT(void) PPrintTree(Lexer *lexer, OutputStream *fout, unsigned int mode, unsigned int indent, Node *node); DL_EXPORT(void) PPrintXMLTree(Lexer *lexer, OutputStream *fout, unsigned int mode, unsigned int indent, Node *node); DL_EXPORT(Node) *FindHead(Node *root); DL_EXPORT(Node) *FindBody(Node *root); /* tidy.c */ #define EndOfStream EOF DL_EXPORT(void) InitTidy(void); DL_EXPORT(void) DeInitTidy(void); DL_EXPORT(void) *MemAlloc(unsigned int size); DL_EXPORT(void) *MemRealloc(void *mem, unsigned int newsize); DL_EXPORT(void) MemFree(void *mem); /* string functions */ DL_EXPORT(unsigned int) ToLower(unsigned int c); DL_EXPORT(char) *wstrdup(char *str); DL_EXPORT(char) *wstrndup(char *str, int len); DL_EXPORT(void) wstrncpy(char *s1, char *s2, int size); DL_EXPORT(void) wstrcat(char *s1, char *s2); DL_EXPORT(void) wstrcpy(char *s1, char *s2); DL_EXPORT(int) wstrcmp(char *s1, char *s2); DL_EXPORT(int) wstrcasecmp(char *s1, char *s2); DL_EXPORT(int) wstrncmp(char *s1, char *s2, int n); DL_EXPORT(int) wstrncasecmp(char *s1, char *s2, int n); DL_EXPORT(int) wstrlen(char *str); DL_EXPORT(Bool) wsubstr(char *s1, char *s2); DL_EXPORT(void) ClearMemory(void *, unsigned int size); DL_EXPORT(void) tidy_out(OutputStream *out, const char* msg, ...); #define uprintf fprintf /* error codes for entities */ #define MISSING_SEMICOLON 1 #define UNKNOWN_ENTITY 2 #define UNESCAPED_AMPERSAND 3 /* error codes for element messages */ #define MISSING_ENDTAG_FOR 1 #define MISSING_ENDTAG_BEFORE 2 #define DISCARDING_UNEXPECTED 3 #define NESTED_EMPHASIS 4 #define NON_MATCHING_ENDTAG 5 #define TAG_NOT_ALLOWED_IN 6 #define MISSING_STARTTAG 7 #define UNEXPECTED_ENDTAG 8 #define USING_BR_INPLACE_OF 9 #define INSERTING_TAG 10 #define SUSPECTED_MISSING_QUOTE 11 #define MISSING_TITLE_ELEMENT 12 #define DUPLICATE_FRAMESET 13 #define CANT_BE_NESTED 14 #define OBSOLETE_ELEMENT 15 #define PROPRIETARY_ELEMENT 16 #define UNKNOWN_ELEMENT 17 #define TRIM_EMPTY_ELEMENT 18 #define COERCE_TO_ENDTAG 19 #define ILLEGAL_NESTING 20 #define NOFRAMES_CONTENT 21 #define CONTENT_AFTER_BODY 22 #define INCONSISTENT_VERSION 23 #define MALFORMED_COMMENT 24 #define BAD_COMMENT_CHARS 25 #define BAD_XML_COMMENT 26 #define BAD_CDATA_CONTENT 27 #define INCONSISTENT_NAMESPACE 28 #define DOCTYPE_AFTER_TAGS 29 #define MALFORMED_DOCTYPE 30 #define UNEXPECTED_END_OF_FILE 31 #define DTYPE_NOT_UPPER_CASE 32 #define TOO_MANY_ELEMENTS 33 /* error codes used for attribute messages */ #define UNKNOWN_ATTRIBUTE 1 #define MISSING_ATTRIBUTE 2 #define MISSING_ATTR_VALUE 3 #define BAD_ATTRIBUTE_VALUE 4 #define UNEXPECTED_GT 5 #define PROPRIETARY_ATTR_VALUE 6 #define REPEATED_ATTRIBUTE 7 #define MISSING_IMAGEMAP 8 #define XML_ATTRIBUTE_VALUE 9 #define UNEXPECTED_QUOTEMARK 10 #define ID_NAME_MISMATCH 11 /* page transition effects */ #define EFFECT_BLEND -1 #define EFFECT_BOX_IN 0 #define EFFECT_BOX_OUT 1 #define EFFECT_CIRCLE_IN 2 #define EFFECT_CIRCLE_OUT 3 #define EFFECT_WIPE_UP 4 #define EFFECT_WIPE_DOWN 5 #define EFFECT_WIPE_RIGHT 6 #define EFFECT_WIPE_LEFT 7 #define EFFECT_VERT_BLINDS 8 #define EFFECT_HORZ_BLINDS 9 #define EFFECT_CHK_ACROSS 10 #define EFFECT_CHK_DOWN 11 #define EFFECT_RND_DISSOLVE 12 #define EFFECT_SPLIT_VIRT_IN 13 #define EFFECT_SPLIT_VIRT_OUT 14 #define EFFECT_SPLIT_HORZ_IN 15 #define EFFECT_SPLIT_HORZ_OUT 16 #define EFFECT_STRIPS_LEFT_DOWN 17 #define EFFECT_STRIPS_LEFT_UP 18 #define EFFECT_STRIPS_RIGHT_DOWN 19 #define EFFECT_STRIPS_RIGHT_UP 20 #define EFFECT_RND_BARS_HORZ 21 #define EFFECT_RND_BARS_VERT 22 #define EFFECT_RANDOM 23 /* accessibility flaws */ #define MISSING_IMAGE_ALT 1 #define MISSING_LINK_ALT 2 #define MISSING_SUMMARY 4 #define MISSING_IMAGE_MAP 8 #define USING_FRAMES 16 #define USING_NOFRAMES 32 /* presentation flaws */ #define USING_SPACER 1 #define USING_LAYER 2 #define USING_NOBR 4 #define USING_FONT 8 #define USING_BODY 16 /* character encoding errors */ #define WINDOWS_CHARS 1 #define NON_ASCII 2 #define FOUND_UTF16 4 DL_EXPORT(void) HelpText(OutputStream *errout, char *prog); DL_EXPORT(void) GeneralInfo(OutputStream *errout); DL_EXPORT(void) UnknownOption(OutputStream *errout, char c); DL_EXPORT(void) UnknownFile(OutputStream *errout, char *program, char *file); DL_EXPORT(void) ErrorSummary(Lexer *lexer); DL_EXPORT(void) ReportEncodingError(Lexer *lexer, unsigned int code, unsigned int c); DL_EXPORT(void) ReportEntityError(Lexer *lexer, unsigned int code, char *entity, int c); DL_EXPORT(void) ReportAttrError(Lexer *lexer, Node *node, char *attr, unsigned int code); DL_EXPORT(void) ReportWarning(Lexer *lexer, Node *element, Node *node, unsigned int code); DL_EXPORT(void) ReportError(Lexer *lexer, Node *element, Node *node, unsigned int code); DL_EXPORT(Node) *FindBody(Node *node); /* Parser methods for tags */ Parser ParseHTML; Parser ParseHead; Parser ParseTitle; Parser ParseScript; Parser ParseFrameSet; Parser ParseNoFrames; Parser ParseBody; Parser ParsePre; Parser ParseList; Parser ParseLI; Parser ParseDefList; Parser ParseBlock; Parser ParseInline; Parser ParseTableTag; Parser ParseColGroup; Parser ParseRowGroup; Parser ParseRow; Parser ParseSelect; Parser ParseOptGroup; Parser ParseText; Parser ParseObject; Parser ParseMap; /* Attribute checking methods */ CheckAttribs CheckHR; CheckAttribs CheckIMG; CheckAttribs CheckAnchor; CheckAttribs CheckLINK; CheckAttribs CheckMap; CheckAttribs CheckAREA; CheckAttribs CheckTABLE; CheckAttribs CheckTableCell; CheckAttribs CheckCaption; CheckAttribs CheckSCRIPT; CheckAttribs CheckSTYLE; CheckAttribs CheckHTML; /* used to control printing of null attributes */ DL_EXPORT(Bool) IsBoolAttribute(AttVal *attval); extern Dict *tag_html; extern Dict *tag_head; extern Dict *tag_body; extern Dict *tag_frameset; extern Dict *tag_frame; extern Dict *tag_noframes; extern Dict *tag_title; extern Dict *tag_base; extern Dict *tag_hr; extern Dict *tag_meta; extern Dict *tag_pre; extern Dict *tag_listing; extern Dict *tag_h1; extern Dict *tag_h2; extern Dict *tag_p; extern Dict *tag_ul; extern Dict *tag_ol; extern Dict *tag_dir; extern Dict *tag_li; extern Dict *tag_dt; extern Dict *tag_dd; extern Dict *tag_dl; extern Dict *tag_td; extern Dict *tag_th; extern Dict *tag_tr; extern Dict *tag_col; extern Dict *tag_br; extern Dict *tag_a; extern Dict *tag_link; extern Dict *tag_b; extern Dict *tag_i; extern Dict *tag_strong; extern Dict *tag_em; extern Dict *tag_big; extern Dict *tag_small; extern Dict *tag_param; extern Dict *tag_option; extern Dict *tag_optgroup; extern Dict *tag_img; extern Dict *tag_map; extern Dict *tag_area; extern Dict *tag_nobr; extern Dict *tag_wbr; extern Dict *tag_layer; extern Dict *tag_center; extern Dict *tag_spacer; extern Dict *tag_font; extern Dict *tag_style; extern Dict *tag_script; extern Dict *tag_noscript; extern Dict *tag_table; extern Dict *tag_caption; extern Dict *tag_form; extern Dict *tag_textarea; extern Dict *tag_blockquote; extern Dict *tag_applet; extern Dict *tag_object; extern Dict *tag_div; extern Dict *tag_span; /* EOF */ #ifdef __cplusplus } #endif #endif