#ifndef HTMLTIDY_HTML_H #define HTMLTIDY_HTML_H /* htmltidy.h (c) 1998-2000 (W3C) MIT, INRIA, Keio University (c) 2001 eGenix.com Software GmbH, Langenfeld See tidy.c for the copyright notice. */ /* Include platform specific work-arounds and additions */ #include "platform.h" #ifdef __cplusplus extern "C" { #endif /* indentation modes */ #define NO_INDENT 0 #define BLOCK_INDENT 1 #define SMART_INDENT 2 /* character encodings */ #define RAW 0 #define ASCII 1 #define LATIN1 2 #define UTF8 3 #define ISO2022 4 #define MACROMAN 5 /* Output streams */ typedef struct { int encoding; int state; /* for ISO 2022 */ FILE *fp; unsigned char *data; int datasize; int datapos; int CharEncoding; } OutputStream; void outc(unsigned int c, OutputStream *out); DL_EXPORT(OutputStream) *OutputStreamFromFile(FILE *fp, int CharEncoding); DL_EXPORT(OutputStream) *OutputStreamFromBuffer(char *data, int datasize, int datapos, int CharEncoding); DL_EXPORT(void) FreeOutputStream(OutputStream *out); /* states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch character sets. The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" + "$" + ? and "ESC" + "$" + "(" + ? for multibyte character sets */ #define FSM_ASCII 0 #define FSM_ESC 1 #define FSM_ESCD 2 #define FSM_ESCDP 3 #define FSM_ESCP 4 #define FSM_NONASCII 5 /* lexer char types */ #define digit 1 #define letter 2 #define namechar 4 #define white 8 #define newline 16 #define lowercase 32 #define uppercase 64 /* lexer GetToken states */ #define LEX_CONTENT 0 #define LEX_GT 1 #define LEX_ENDTAG 2 #define LEX_STARTTAG 3 #define LEX_COMMENT 4 #define LEX_DOCTYPE 5 #define LEX_PROCINSTR 6 #define LEX_ENDCOMMENT 7 #define LEX_CDATA 8 #define LEX_SECTION 9 #define LEX_ASP 10 #define LEX_JSTE 11 #define LEX_PHP 12 /* content model shortcut encoding */ #define CM_UNKNOWN 0 #define CM_EMPTY (1 << 0) #define CM_HTML (1 << 1) #define CM_HEAD (1 << 2) #define CM_BLOCK (1 << 3) #define CM_INLINE (1 << 4) #define CM_LIST (1 << 5) #define CM_DEFLIST (1 << 6) #define CM_TABLE (1 << 7) #define CM_ROWGRP (1 << 8) #define CM_ROW (1 << 9) #define CM_FIELD (1 << 10) #define CM_OBJECT (1 << 11) #define CM_PARAM (1 << 12) #define CM_FRAMES (1 << 13) #define CM_HEADING (1 << 14) #define CM_OPT (1 << 15) #define CM_IMG (1 << 16) #define CM_MIXED (1 << 17) #define CM_NO_INDENT (1 << 18) #define CM_OBSOLETE (1 << 19) #define CM_NEW (1 << 20) #define CM_OMITST (1 << 21) /* Linked list of class names and styles */ struct _style { char *tag; char *tag_class; char *properties; struct _style *next; }; typedef struct _style Style; /* Linked list of style properties */ struct _styleprop { char *name; char *value; struct _styleprop *next; }; typedef struct _styleprop StyleProp; /* Attribute/Value linked list node */ struct _attval { struct _attval *next; struct _attribute *dict; struct _node *asp; struct _node *php; int delim; char *attribute; char *value; }; typedef struct _attval AttVal; /* node->type is one of these values */ #define RootNode 0 #define DocTypeTag 1 #define CommentTag 2 #define ProcInsTag 3 #define TextNode 4 #define StartTag 5 #define EndTag 6 #define StartEndTag 7 #define CDATATag 8 #define SectionTag 9 #define AspTag 10 #define JsteTag 11 #define PhpTag 12 struct _node { struct _node *parent; struct _node *prev; struct _node *next; struct _node *content; struct _node *last; struct _attval *attributes; char *element; /* name (null for text nodes) */ unsigned int start; /* start of span onto text array */ unsigned int end; /* end of span onto text array */ unsigned int type; /* TextNode, StartTag, EndTag etc. */ Bool closed; /* true if closed by explicit end tag */ Bool implicit; /* true if inferred */ Bool linebreak; /* true if followed by a line break */ struct _tagdict *was; /* old tag when it was changed */ struct _tagdict *tag; /* tag's dictionary definition */ }; typedef struct _node Node; /* If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes then describe it as HTML Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the flavors of Voyager (strict, loose or frameset). */ #define VERS_UNKNOWN 0 #define VERS_HTML20 1 #define VERS_HTML32 2 #define VERS_HTML40_STRICT 4 #define VERS_HTML40_LOOSE 8 #define VERS_FRAMES 16 #define VERS_XML 32 #define VERS_NETSCAPE 64 #define VERS_MICROSOFT 128 #define VERS_SUN 256 #define VERS_MALFORMED 512 #define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_LOOSE (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_IFRAMES (VERS_HTML40_LOOSE|VERS_FRAMES) #define VERS_FROM32 (VERS_HTML40_STRICT|VERS_LOOSE) #define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN) #define VERS_EVERYTHING (VERS_ALL|VERS_PROPRIETARY) /* Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inline markup errors such as: italic text
more italic text normal text which for compatibility with Mosaic is mapped to: italic text
more italic text normal text
Note that any inline end tag pop's the effect of the current
inline start tag, so that pop's in the above example.
*/
struct _inode
{
struct _inode *next;
struct _tagdict *tag; /* tag's dictionary definition */
char *element; /* name (null for text nodes) */
struct _attval *attributes;
};
typedef struct _inode IStack;
typedef struct _lexer Lexer;
/* tidy.c */
#define EndOfStream EOF
/* non-raw input is cleaned up*/
typedef struct
{
int state; /* FSM for ISO2022 */
Bool pushed;
int c;
int tabs;
int lastcol;
int curcol;
int curline;
int encoding;
FILE *fp;
unsigned char *data;
int datasize;
int datapos;
int CharEncoding;
int tabsize;
Lexer *lexer; /* needed for error reporting */
} InputStream;
DL_EXPORT(InputStream) *InputStreamFromFile(FILE *fp,
int CharEncoding,
int tabsize);
DL_EXPORT(InputStream) *InputStreamFromBuffer(char *data,
int datasize,
int datapos,
int CharEncoding,
int tabsize);
DL_EXPORT(void) FreeInputStream(InputStream *in);
DL_EXPORT(int) ReadChar(InputStream *in);
DL_EXPORT(int) StreamEOF(InputStream *in);
DL_EXPORT(void) UngetChar(int c, InputStream *in);
/*
The following are private to the lexer
Use NewLexer() to create a lexer, and
FreeLexer(lexer) to free it.
*/
struct _lexer
{
/* Streams */
InputStream *in; /* file stream */
OutputStream *errout; /* error output stream */
/* Configuration */
struct _tidyconfig *config;
/* Runtime */
unsigned int badAccess; /* for accessibility errors */
unsigned int badLayout; /* for bad style errors */
unsigned int badChars; /* for bad char encodings */
unsigned int badForm; /* for mismatched/mispositioned form tags */
unsigned int warnings; /* count of warnings in this document */
unsigned int errors; /* count of errors */
unsigned int lines; /* lines seen */
unsigned int columns; /* at start of current token */
Bool waswhite; /* used to collapse contiguous white space */
Bool pushed; /* true after token has been pushed back */
Bool insertspace; /* when space is moved after end tag */
Bool excludeBlocks; /* Netscape compatibility */
Bool exiled; /* true if moved out of table */
Bool isvoyager; /* true if xmlns attribute on html element */
unsigned int versions; /* bit vector of HTML versions */
int doctype; /* version as given by doctype (if any) */
Bool bad_doctype; /* e.g. if html or PUBLIC is missing */
unsigned int txtstart; /* start of current node */
unsigned int txtend; /* end of current node */
unsigned int state; /* state of lexer's finite state machine */
struct _node *token;
/*
lexer character buffer
parse tree nodes span onto this buffer
which contains the concatenated text
contents of all of the elements.
lexsize must be reset for each file.
*/
char *lexbuf; /* char buffer */
unsigned int lexlength; /* allocated */
unsigned int lexsize; /* used */
/* Inline stack for compatibility with Mosaic */
Node *inode; /* for deferring text node */
IStack *insert; /* for inferring inline tags */
IStack *istack;
unsigned int istacklength; /* allocated */
unsigned int istacksize; /* used */
unsigned int istackbase; /* start of frame */
Style *styles; /* used for cleaning up presentation markup */
};
typedef void (Parser)(Lexer *lexer, Node *node, unsigned int mode);
typedef void (CheckAttribs)(Lexer *lexer, Node *node);
/* declaration for methods that check attribute values */
typedef void (AttrCheck)(Lexer *lexer, Node *node, AttVal *attval);
struct _attribute
{
struct _attribute *next;
char *name;
Bool nowrap;
Bool literal;
unsigned versions;
AttrCheck *attrchk;
};
typedef struct _attribute Attribute;
/* well known attributes */
extern Attribute *attr_href;
extern Attribute *attr_src;
extern Attribute *attr_id;
extern Attribute *attr_name;
extern Attribute *attr_summary;
extern Attribute *attr_alt;
extern Attribute *attr_longdesc;
extern Attribute *attr_title;
/*
Tag dictionary node
*/
struct _tagdict
{
struct _tagdict *next;
char *name;
unsigned int versions;
unsigned int model;
Parser *parser;
CheckAttribs *chkattrs;
};
typedef struct _tagdict Dict;
/* modes for GetToken() */
#define IgnoreWhitespace 0
#define MixedContent 1
#define Preformatted 2
#define IgnoreMarkup 3
void FatalError(char *msg);
void FileError(OutputStream *out, const char *file);
DL_EXPORT(Node) *GetToken(Lexer *lexer, unsigned int mode);
/* one level unget only */
DL_EXPORT(void) UngetToken(Lexer *lexer);
/* create lexer for a file stream */
DL_EXPORT(Lexer) *NewLexer(struct _tidyconfig *config,
InputStream *in,
OutputStream *errout);
/* delete lexer */
DL_EXPORT(void) FreeLexer(Lexer *lexer);
DL_EXPORT(Bool) EndOfInput(Lexer *lexer);
/* used for script or style */
DL_EXPORT(Node) *GetCDATA(Lexer *lexer, Node *container);
/* use this to create node for inferred start tag */
DL_EXPORT(Node) *InferredTag(Lexer *lexer, char *name);
/* used to create line break in preformatted text
when cleaning the augean stables (Word2000) */
DL_EXPORT(Node) *NewLineNode(Lexer *lexer);
/* Parser calls this to create RootNode */
DL_EXPORT(Node) *NewNode(void);
DL_EXPORT(AttVal) *NewAttribute(void);
DL_EXPORT(void) FreeAttrs(Node *node);
DL_EXPORT(void) FreeAttribute(AttVal *av);
DL_EXPORT(void) RemoveAttribute(Node *node, AttVal *attr);
/* use this to free parse tree node and all its children */
DL_EXPORT(void) FreeNode(Node *node);
/* used to clone heading nodes when split by an
*/
DL_EXPORT(Node) *CloneNode(Lexer *lexer, Node *element);
/* lexer char map - must be initialized */
DL_EXPORT(void) InitMap(void);
DL_EXPORT(void) AddCharToLexer(Lexer *lexer, unsigned int c);
DL_EXPORT(void) AddStringLiteral(Lexer *lexer, char *str);
DL_EXPORT(Node) *TextToken(Lexer *lexer);
/* used by pretty printer for tag names */
DL_EXPORT(char) FoldCase(Lexer *lexer, char c, Bool tocaps);
DL_EXPORT(Bool) IsLetter(unsigned int c);
DL_EXPORT(Bool) IsDigit(unsigned int c);
DL_EXPORT(Bool) IsWhite(unsigned int c);
/* used to fixup doctype to match contents */
DL_EXPORT(Node) *FindDocType(Node *root);
DL_EXPORT(Node) *FindHTML(Node *root);
DL_EXPORT(Node) *FindHEAD(Node *root);
DL_EXPORT(Bool) AddGenerator(Lexer *lexer, Node *root);
DL_EXPORT(void) DiscardDocType(Node *root);
DL_EXPORT(Bool) FixDocType(Lexer *lexer, Node *node);
DL_EXPORT(char) *HTMLVersionName(Lexer *lexer);
DL_EXPORT(int) ApparentVersion(Lexer *lexer);
DL_EXPORT(Bool) FixXMLPI(Lexer *lexer, Node *root);
DL_EXPORT(Bool) SetXHTMLDocType(Lexer *lexer, Node *root);
DL_EXPORT(void) FixId(Lexer *lexer, Node *node);
DL_EXPORT(Bool) CheckDocTypeKeyWords(Lexer *lexer, Node *doctype);
/* used to detect faulty attributes */
DL_EXPORT(Bool) IsValidAttrName( char *attr);
/* mode controlling treatment of doctype */
typedef enum
{
doctype_omit,
doctype_auto,
doctype_strict,
doctype_loose,
doctype_user
} DocTypeMode;
/* Tidy runtime configuration and runtime state */
struct _tidyconfig
{
/* Configuration */
unsigned int spaces; /* default indentation */
unsigned int wraplen; /* default wrap margin */
int CharEncoding;
int tabsize;
DocTypeMode doctype_mode; /* see doctype property */
char *alt_text; /* default text for alt attribute */
char *doctype_str; /* user specified doctype */
char *errfile; /* file name to write errors to */
Bool writeback; /* if true then output tidied markup */
Bool Errors; /* if true error output is generated */
Bool Output; /* if true normal output is generated */
Bool ShowWarnings; /* however errors are always shown */
Bool Quiet; /* no 'Parsing X', guessed DTD or summary */
Bool IndentContent; /* indent content of appropriate tags */
Bool SmartIndent; /* does text/block level content effect indentation */
Bool HideEndTags; /* suppress optional end tags */
Bool XmlTags; /* treat input as XML */
Bool XmlOut; /* create output as XML */
Bool xHTML; /* output extensible HTML */
Bool XmlPi; /* add for XML docs */
Bool RawOut; /* avoid mapping values > 127 to entities */
Bool UpperCaseTags; /* output tags in upper not lower case */
Bool UpperCaseAttrs; /* output attributes in upper not lower case */
Bool MakeClean; /* replace presentational clutter by style rules */
Bool LogicalEmphasis; /* replace i by em and b by strong */
Bool DropFontTags; /* discard presentation tags */
Bool DropEmptyParas; /* discard empty p elements */
Bool FixComments; /* fix comments with adjacent hyphens */
Bool BreakBeforeBR; /* o/p newline before
or not? */
Bool NumEntities; /* use numeric entities */
Bool QuoteMarks; /* output " marks as " */
Bool QuoteNbsp; /* output non-breaking space as entity */
Bool QuoteAmpersand; /* output naked ampersand as & */
Bool WrapAttVals; /* wrap within attribute values */
Bool WrapScriptlets; /* wrap within JavaScript string literals */
Bool WrapSection; /* wrap within section tags */
Bool WrapAsp; /* wrap within ASP pseudo elements */
Bool WrapJste; /* wrap within JSTE pseudo elements */
Bool WrapPhp; /* wrap within PHP pseudo elements */
Bool FixBackslash; /* fix URLs by replacing \ with / */
Bool IndentAttributes; /* newline+indent before each attribute */
Bool XmlPIs; /* if set to yes PIs must end with ?> */
Bool XmlSpace; /* if set to yes adds xml:space attr as needed */
Bool EncloseBodyText; /* if yes text at body is wrapped in
's */ Bool EncloseBlockText; /* if yes text in blocks is wrapped in
's */ Bool Word2000; /* draconian cleaning for Word2000 */ Bool TidyMark; /* add meta element indicating tidied doc */ Bool Emacs; /* if true format error output for GNU Emacs */ Bool LiteralAttribs; /* if true attributes may use newlines */ }; typedef struct _tidyconfig tidyconfig; /* config parameters, see config.c for defaults */ DL_EXPORT(tidyconfig) *NewConfig(void); DL_EXPORT(void) InitConfig(tidyconfig *config); DL_EXPORT(void) FreeConfig(tidyconfig *config); DL_EXPORT(void) AdjustConfig(tidyconfig *config); /* parser.c */ DL_EXPORT(Node) *ParseDocument(Lexer *lexer); DL_EXPORT(Node) *ParseXMLDocument(Lexer *lexer); DL_EXPORT(Bool) XMLPreserveWhiteSpace(Node *element); DL_EXPORT(void) CoerceNode(Lexer *lexer, Node *node, Dict *tag); DL_EXPORT(Bool) CheckNodeIntegrity(Node *node); DL_EXPORT(Bool) IsNewNode(Node *node); DL_EXPORT(void) RemoveNode(Node *node); DL_EXPORT(Node) *DiscardElement(Node *element); DL_EXPORT(void) InsertNodeAtStart(Node *element, Node *node); DL_EXPORT(void) InsertNodeAtEnd(Node *element, Node *node); DL_EXPORT(void) InsertNodeBeforeElement(Node *element, Node *node); DL_EXPORT(void) InsertNodeAfterElement(Node *element, Node *node); DL_EXPORT(Bool) IsJavaScript(Node *node); /* attrs.c */ DL_EXPORT(void) InitAttrs(void); DL_EXPORT(void) FreeAttrTable(void); DL_EXPORT(Attribute) *FindAttribute(AttVal *attval); DL_EXPORT(AttVal) *GetAttrByName(Node *node, char *name); DL_EXPORT(void) AddAttribute(Lexer *lexer, Node *node, char *name, char *value); DL_EXPORT(void) CheckUniqueAttributes(Lexer *lexer, Node *node); DL_EXPORT(void) CheckAttributes(Lexer *lexer, Node *node); DL_EXPORT(Attribute) *CheckAttribute(Lexer *lexer, Node *node, AttVal *attval); DL_EXPORT(Bool) IsUrl(char *attrname); DL_EXPORT(Bool) IsScript(char *attrname); DL_EXPORT(void) DeclareLiteralAttrib(char *name); DL_EXPORT(Bool) IsLiteralAttribute(char *attrname); /* istack.c */ DL_EXPORT(void) PushInline(Lexer *lexer, Node *node); DL_EXPORT(void) PopInline(Lexer *lexer, Node *node); DL_EXPORT(Bool) IsPushed(Lexer *lexer, Node *node); DL_EXPORT(int) InlineDup(Lexer *lexer, Node *node); DL_EXPORT(Node) *InsertedToken(Lexer *lexer); DL_EXPORT(AttVal) *DupAttrs(AttVal *attrs); DL_EXPORT(void) DeferDup(Lexer *lexer); DL_EXPORT(void) InsertNode(Node *element, Node *node); /* clean.c */ DL_EXPORT(void) FreeStyles(Lexer *lexer); DL_EXPORT(void) AddClass(Lexer *lexer, Node *node, char *classname); DL_EXPORT(void) CleanTree(Lexer *lexer, Node *node); DL_EXPORT(void) NestedEmphasis(Lexer *lexer, Node *node); DL_EXPORT(void) EmFromI(Lexer *lexer, Node *node); DL_EXPORT(void) CleanWord2000(Lexer *lexer, Node *node); DL_EXPORT(void) DropSections(Lexer *lexer, Node *node); DL_EXPORT(void) List2BQ(Lexer *lexer, Node *node); DL_EXPORT(void) BQ2Div(Lexer *lexer, Node *node); DL_EXPORT(Bool) IsWord2000(Lexer *lexer, Node *root); /* entities.c */ DL_EXPORT(void) InitEntities(void); DL_EXPORT(void) FreeEntities(void); DL_EXPORT(unsigned int) EntityCode(char *name); DL_EXPORT(char) *EntityName(unsigned int n); /* tags.c */ DL_EXPORT(void) DefineEmptyTag(char *name); DL_EXPORT(void) DefineInlineTag(char *name); DL_EXPORT(void) DefineBlockTag(char *name); DL_EXPORT(void) DefinePreTag(char *name); DL_EXPORT(Bool) FindTag(Lexer *lexer, Node *node); DL_EXPORT(void) InitTags(void); DL_EXPORT(void) FreeTags(void); DL_EXPORT(Parser) *FindParser(Node *node); DL_EXPORT(int) HTMLVersion(Lexer *lexer); /* localize.c -- used for all message text */ DL_EXPORT(void) ShowVersion(OutputStream *out); DL_EXPORT(void) ReadingFromStdin(void); DL_EXPORT(void) ReportUnknownOption(char *option); DL_EXPORT(void) ReportBadArgument(char *option); DL_EXPORT(void) NeedsAuthorIntervention(OutputStream *errout); DL_EXPORT(void) MissingBody(OutputStream *errout); DL_EXPORT(void) GeneralInfo(OutputStream *errout); DL_EXPORT(void) HelloMessage(OutputStream *errout, char *date, char *filename); DL_EXPORT(void) ReportVersion(OutputStream *errout, Lexer *lexer, char *filename, Node *doctype); DL_EXPORT(void) ReportNumWarnings(OutputStream *errout, Lexer *lexer); /* pprint.c */ DL_EXPORT(unsigned int) GetUTF8(unsigned char *str, unsigned int *ch); DL_EXPORT(char) *PutUTF8(char *buf, unsigned int c); DL_EXPORT(void) PPrintTree(Lexer *lexer, OutputStream *fout, unsigned int mode, unsigned int indent, Node *node); DL_EXPORT(void) PPrintXMLTree(Lexer *lexer, OutputStream *fout, unsigned int mode, unsigned int indent, Node *node); DL_EXPORT(Node) *FindHead(Node *root); DL_EXPORT(Node) *FindBody(Node *root); /* tidy.c */ #define EndOfStream EOF DL_EXPORT(void) InitTidy(void); DL_EXPORT(void) DeInitTidy(void); DL_EXPORT(void) *MemAlloc(unsigned int size); DL_EXPORT(void) *MemRealloc(void *mem, unsigned int newsize); DL_EXPORT(void) MemFree(void *mem); /* string functions */ DL_EXPORT(unsigned int) ToLower(unsigned int c); DL_EXPORT(char) *wstrdup(char *str); DL_EXPORT(char) *wstrndup(char *str, int len); DL_EXPORT(void) wstrncpy(char *s1, char *s2, int size); DL_EXPORT(void) wstrcat(char *s1, char *s2); DL_EXPORT(void) wstrcpy(char *s1, char *s2); DL_EXPORT(int) wstrcmp(char *s1, char *s2); DL_EXPORT(int) wstrcasecmp(char *s1, char *s2); DL_EXPORT(int) wstrncmp(char *s1, char *s2, int n); DL_EXPORT(int) wstrncasecmp(char *s1, char *s2, int n); DL_EXPORT(int) wstrlen(char *str); DL_EXPORT(Bool) wsubstr(char *s1, char *s2); DL_EXPORT(void) ClearMemory(void *, unsigned int size); DL_EXPORT(void) tidy_out(OutputStream *out, const char* msg, ...); #define uprintf fprintf /* error codes for entities */ #define MISSING_SEMICOLON 1 #define UNKNOWN_ENTITY 2 #define UNESCAPED_AMPERSAND 3 /* error codes for element messages */ #define MISSING_ENDTAG_FOR 1 #define MISSING_ENDTAG_BEFORE 2 #define DISCARDING_UNEXPECTED 3 #define NESTED_EMPHASIS 4 #define NON_MATCHING_ENDTAG 5 #define TAG_NOT_ALLOWED_IN 6 #define MISSING_STARTTAG 7 #define UNEXPECTED_ENDTAG 8 #define USING_BR_INPLACE_OF 9 #define INSERTING_TAG 10 #define SUSPECTED_MISSING_QUOTE 11 #define MISSING_TITLE_ELEMENT 12 #define DUPLICATE_FRAMESET 13 #define CANT_BE_NESTED 14 #define OBSOLETE_ELEMENT 15 #define PROPRIETARY_ELEMENT 16 #define UNKNOWN_ELEMENT 17 #define TRIM_EMPTY_ELEMENT 18 #define COERCE_TO_ENDTAG 19 #define ILLEGAL_NESTING 20 #define NOFRAMES_CONTENT 21 #define CONTENT_AFTER_BODY 22 #define INCONSISTENT_VERSION 23 #define MALFORMED_COMMENT 24 #define BAD_COMMENT_CHARS 25 #define BAD_XML_COMMENT 26 #define BAD_CDATA_CONTENT 27 #define INCONSISTENT_NAMESPACE 28 #define DOCTYPE_AFTER_TAGS 29 #define MALFORMED_DOCTYPE 30 #define UNEXPECTED_END_OF_FILE 31 #define DTYPE_NOT_UPPER_CASE 32 #define TOO_MANY_ELEMENTS 33 /* error codes used for attribute messages */ #define UNKNOWN_ATTRIBUTE 1 #define MISSING_ATTRIBUTE 2 #define MISSING_ATTR_VALUE 3 #define BAD_ATTRIBUTE_VALUE 4 #define UNEXPECTED_GT 5 #define PROPRIETARY_ATTR_VALUE 6 #define REPEATED_ATTRIBUTE 7 #define MISSING_IMAGEMAP 8 #define XML_ATTRIBUTE_VALUE 9 #define UNEXPECTED_QUOTEMARK 10 #define ID_NAME_MISMATCH 11 /* page transition effects */ #define EFFECT_BLEND -1 #define EFFECT_BOX_IN 0 #define EFFECT_BOX_OUT 1 #define EFFECT_CIRCLE_IN 2 #define EFFECT_CIRCLE_OUT 3 #define EFFECT_WIPE_UP 4 #define EFFECT_WIPE_DOWN 5 #define EFFECT_WIPE_RIGHT 6 #define EFFECT_WIPE_LEFT 7 #define EFFECT_VERT_BLINDS 8 #define EFFECT_HORZ_BLINDS 9 #define EFFECT_CHK_ACROSS 10 #define EFFECT_CHK_DOWN 11 #define EFFECT_RND_DISSOLVE 12 #define EFFECT_SPLIT_VIRT_IN 13 #define EFFECT_SPLIT_VIRT_OUT 14 #define EFFECT_SPLIT_HORZ_IN 15 #define EFFECT_SPLIT_HORZ_OUT 16 #define EFFECT_STRIPS_LEFT_DOWN 17 #define EFFECT_STRIPS_LEFT_UP 18 #define EFFECT_STRIPS_RIGHT_DOWN 19 #define EFFECT_STRIPS_RIGHT_UP 20 #define EFFECT_RND_BARS_HORZ 21 #define EFFECT_RND_BARS_VERT 22 #define EFFECT_RANDOM 23 /* accessibility flaws */ #define MISSING_IMAGE_ALT 1 #define MISSING_LINK_ALT 2 #define MISSING_SUMMARY 4 #define MISSING_IMAGE_MAP 8 #define USING_FRAMES 16 #define USING_NOFRAMES 32 /* presentation flaws */ #define USING_SPACER 1 #define USING_LAYER 2 #define USING_NOBR 4 #define USING_FONT 8 #define USING_BODY 16 /* character encoding errors */ #define WINDOWS_CHARS 1 #define NON_ASCII 2 #define FOUND_UTF16 4 DL_EXPORT(void) HelpText(OutputStream *errout, char *prog); DL_EXPORT(void) GeneralInfo(OutputStream *errout); DL_EXPORT(void) UnknownOption(OutputStream *errout, char c); DL_EXPORT(void) UnknownFile(OutputStream *errout, char *program, char *file); DL_EXPORT(void) ErrorSummary(Lexer *lexer); DL_EXPORT(void) ReportEncodingError(Lexer *lexer, unsigned int code, unsigned int c); DL_EXPORT(void) ReportEntityError(Lexer *lexer, unsigned int code, char *entity, int c); DL_EXPORT(void) ReportAttrError(Lexer *lexer, Node *node, char *attr, unsigned int code); DL_EXPORT(void) ReportWarning(Lexer *lexer, Node *element, Node *node, unsigned int code); DL_EXPORT(void) ReportError(Lexer *lexer, Node *element, Node *node, unsigned int code); DL_EXPORT(Node) *FindBody(Node *node); /* Parser methods for tags */ Parser ParseHTML; Parser ParseHead; Parser ParseTitle; Parser ParseScript; Parser ParseFrameSet; Parser ParseNoFrames; Parser ParseBody; Parser ParsePre; Parser ParseList; Parser ParseLI; Parser ParseDefList; Parser ParseBlock; Parser ParseInline; Parser ParseTableTag; Parser ParseColGroup; Parser ParseRowGroup; Parser ParseRow; Parser ParseSelect; Parser ParseOptGroup; Parser ParseText; Parser ParseObject; Parser ParseMap; /* Attribute checking methods */ CheckAttribs CheckHR; CheckAttribs CheckIMG; CheckAttribs CheckAnchor; CheckAttribs CheckLINK; CheckAttribs CheckMap; CheckAttribs CheckAREA; CheckAttribs CheckTABLE; CheckAttribs CheckTableCell; CheckAttribs CheckCaption; CheckAttribs CheckSCRIPT; CheckAttribs CheckSTYLE; CheckAttribs CheckHTML; /* used to control printing of null attributes */ DL_EXPORT(Bool) IsBoolAttribute(AttVal *attval); extern Dict *tag_html; extern Dict *tag_head; extern Dict *tag_body; extern Dict *tag_frameset; extern Dict *tag_frame; extern Dict *tag_noframes; extern Dict *tag_title; extern Dict *tag_base; extern Dict *tag_hr; extern Dict *tag_meta; extern Dict *tag_pre; extern Dict *tag_listing; extern Dict *tag_h1; extern Dict *tag_h2; extern Dict *tag_p; extern Dict *tag_ul; extern Dict *tag_ol; extern Dict *tag_dir; extern Dict *tag_li; extern Dict *tag_dt; extern Dict *tag_dd; extern Dict *tag_dl; extern Dict *tag_td; extern Dict *tag_th; extern Dict *tag_tr; extern Dict *tag_col; extern Dict *tag_br; extern Dict *tag_a; extern Dict *tag_link; extern Dict *tag_b; extern Dict *tag_i; extern Dict *tag_strong; extern Dict *tag_em; extern Dict *tag_big; extern Dict *tag_small; extern Dict *tag_param; extern Dict *tag_option; extern Dict *tag_optgroup; extern Dict *tag_img; extern Dict *tag_map; extern Dict *tag_area; extern Dict *tag_nobr; extern Dict *tag_wbr; extern Dict *tag_layer; extern Dict *tag_center; extern Dict *tag_spacer; extern Dict *tag_font; extern Dict *tag_style; extern Dict *tag_script; extern Dict *tag_noscript; extern Dict *tag_table; extern Dict *tag_caption; extern Dict *tag_form; extern Dict *tag_textarea; extern Dict *tag_blockquote; extern Dict *tag_applet; extern Dict *tag_object; extern Dict *tag_div; extern Dict *tag_span; /* EOF */ #ifdef __cplusplus } #endif #endif