/* pprint.c -- pretty print parse tree (c) 1998-2000 (W3C) MIT, INRIA, Keio University (c) 2001 eGenix.com Software GmbH, Langenfeld See tidy.c for the copyright notice. */ #include #include #include /* Include HTML Tidy Header */ #include "htmltidy.h" /* Pretty Printer State */ struct _pprintstate { Lexer *lexer; tidyconfig *config; /* == lexer->config; safes a pointer deref */ OutputStream *fout; /* Line buffer */ unsigned int *linebuf; unsigned int lbufsize; unsigned int linelen; unsigned int wraphere; /* State */ Bool InAttVal; Bool InString; }; typedef struct _pprintstate pprintstate; /* Block-level and unknown elements are printed on new lines and their contents indented 2 spaces Inline elements are printed inline. Inline content is wrapped on spaces (except in attribute values or preformatted text, after start tags and before end tags */ static void PPrintAsp(pprintstate *ppstate, unsigned int indent, Node *node); static void PPrintJste(pprintstate *ppstate, unsigned int indent, Node *node); static void PPrintPhp(pprintstate *ppstate, unsigned int indent, Node *node); #define NORMAL 0 #define PREFORMATTED 1 #define COMMENT 2 #define ATTRIBVALUE 4 #define NOWRAP 8 #define CDATA 16 /* 1010 A 1011 B 1100 C 1101 D 1110 E 1111 F */ /* return one less that the number of bytes used by UTF-8 char */ /* str points to 1st byte, *ch initialized to 1st byte */ unsigned int GetUTF8(unsigned char *str, unsigned int *ch) { unsigned int c, n, i, bytes; c = str[0]; if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */ { n = c & 31; bytes = 2; } else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */ { n = c & 15; bytes = 3; } else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */ { n = c & 7; bytes = 4; } else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */ { n = c & 3; bytes = 5; } else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */ { n = c & 1; bytes = 6; } else /* 0XXX XXXX one byte */ { *ch = c; return 0; } /* successor bytes should have the form 10XX XXXX */ for (i = 1; i < bytes; ++i) { c = str[i]; n = (n << 6) | (c & 0x3F); } *ch = n; return bytes - 1; } /* store char c as UTF-8 encoded byte stream */ char *PutUTF8(char *buf, unsigned int c) { if (c < 128) *buf++ = c; else if (c <= 0x7FF) { *buf++ = (0xC0 | (c >> 6)); *buf++ = (0x80 | (c & 0x3F)); } else if (c <= 0xFFFF) { *buf++ = (0xE0 | (c >> 12)); *buf++ = (0x80 | ((c >> 6) & 0x3F)); *buf++ = (0x80 | (c & 0x3F)); } else if (c <= 0x1FFFFF) { *buf++ = (0xF0 | (c >> 18)); *buf++ = (0x80 | ((c >> 12) & 0x3F)); *buf++ = (0x80 | ((c >> 6) & 0x3F)); *buf++ = (0x80 | (c & 0x3F)); } else { *buf++ = (0xF8 | (c >> 24)); *buf++ = (0x80 | ((c >> 18) & 0x3F)); *buf++ = (0x80 | ((c >> 12) & 0x3F)); *buf++ = (0x80 | ((c >> 6) & 0x3F)); *buf++ = (0x80 | (c & 0x3F)); } return buf; } static pprintstate *NewPrettyPrinter(Lexer *lexer, OutputStream *fout) { pprintstate *ppstate = (pprintstate *)MemAlloc(sizeof(pprintstate)); if (ppstate == NULL) return ppstate; ppstate->linebuf = NULL; ppstate->lbufsize = 0; ppstate->linelen = 0; ppstate->wraphere = 0; ppstate->InAttVal = no; ppstate->InString = no; ppstate->lexer = lexer; ppstate->config = lexer->config; ppstate->fout = fout; return ppstate; } static void FreePrettyPrinter(pprintstate *ppstate) { if (ppstate->linebuf) MemFree(ppstate->linebuf); ppstate->linebuf = NULL; ppstate->lbufsize = 0; MemFree(ppstate); } static void WriteChar(pprintstate *ppstate, unsigned int c) { if (ppstate->linelen + 1 >= ppstate->lbufsize) { while ((ppstate->linelen + 1) >= ppstate->lbufsize) { if (ppstate->lbufsize == 0) ppstate->lbufsize = 256; else ppstate->lbufsize = ppstate->lbufsize * 2; } ppstate->linebuf = (unsigned int *)MemRealloc(ppstate->linebuf, ppstate->lbufsize*sizeof(unsigned int)); } ppstate->linebuf[ppstate->linelen++] = c; } static void WrapLine(pprintstate *ppstate, unsigned int indent) { unsigned int i, *p, *q; if (ppstate->wraphere == 0) return; for (i = 0; i < indent; ++i) outc(' ', ppstate->fout); for (i = 0; i < ppstate->wraphere; ++i) outc(ppstate->linebuf[i], ppstate->fout); if (ppstate->InString) { outc(' ', ppstate->fout); outc('\\', ppstate->fout); } outc('\n', ppstate->fout); if (ppstate->linelen > ppstate->wraphere) { p = ppstate->linebuf; if (ppstate->linebuf[ppstate->wraphere] == ' ') ++ppstate->wraphere; q = ppstate->linebuf + ppstate->wraphere; WriteChar(ppstate, '\0'); ppstate->linelen--; while ((*p++ = *q++)); ppstate->linelen -= ppstate->wraphere; } else ppstate->linelen = 0; ppstate->wraphere = 0; } static void WrapAttrVal(pprintstate *ppstate, unsigned int indent, Bool inString) { unsigned int i, *p, *q; for (i = 0; i < indent; ++i) outc(' ', ppstate->fout); for (i = 0; i < ppstate->wraphere; ++i) outc(ppstate->linebuf[i], ppstate->fout); outc(' ', ppstate->fout); if (inString) outc('\\', ppstate->fout); outc('\n', ppstate->fout); if (ppstate->linelen > ppstate->wraphere) { p = ppstate->linebuf; if (ppstate->linebuf[ppstate->wraphere] == ' ') ++ppstate->wraphere; q = ppstate->linebuf + ppstate->wraphere; WriteChar(ppstate, '\0'); ppstate->linelen--; while ((*p++ = *q++)); ppstate->linelen -= ppstate->wraphere; } else ppstate->linelen = 0; ppstate->wraphere = 0; } static void PFlushLine(pprintstate *ppstate, unsigned int indent) { unsigned int i; if (ppstate->linelen > 0) { if (indent + ppstate->linelen >= ppstate->config->wraplen) WrapLine(ppstate, indent); if (!ppstate->InAttVal || ppstate->config->IndentAttributes) { for (i = 0; i < indent; ++i) outc(' ', ppstate->fout); } for (i = 0; i < ppstate->linelen; ++i) outc(ppstate->linebuf[i], ppstate->fout); } outc('\n', ppstate->fout); ppstate->linelen = ppstate->wraphere = 0; ppstate->InAttVal = no; } static void PCondFlushLine(pprintstate *ppstate, unsigned int indent) { unsigned int i; if (ppstate->linelen > 0) { if (indent + ppstate->linelen >= ppstate->config->wraplen) WrapLine(ppstate, indent); if (!ppstate->InAttVal || ppstate->config->IndentAttributes) { for (i = 0; i < indent; ++i) outc(' ', ppstate->fout); } for (i = 0; i < ppstate->linelen; ++i) outc(ppstate->linebuf[i], ppstate->fout); outc('\n', ppstate->fout); ppstate->linelen = ppstate->wraphere = 0; ppstate->InAttVal = no; } } static void PPrintChar(pprintstate *ppstate, unsigned int c, unsigned int mode) { char *p = NULL, entity[128]; if (c == ' ' && !(mode & (PREFORMATTED | COMMENT | ATTRIBVALUE))) { /* coerce a space character to a non-breaking space */ if (mode & NOWRAP) { /* by default XML doesn't define   */ if (ppstate->config->NumEntities || ppstate->config->XmlTags) { WriteChar(ppstate, '&'); WriteChar(ppstate, '#'); WriteChar(ppstate, '1'); WriteChar(ppstate, '6'); WriteChar(ppstate, '0'); WriteChar(ppstate, ';'); } else /* otherwise use named entity */ { WriteChar(ppstate, '&'); WriteChar(ppstate, 'n'); WriteChar(ppstate, 'b'); WriteChar(ppstate, 's'); WriteChar(ppstate, 'p'); WriteChar(ppstate, ';'); } return; } else ppstate->wraphere = ppstate->linelen; } /* comment characters are passed raw */ if (mode & COMMENT) { WriteChar(ppstate, c); return; } /* except in CDATA map < to < etc. */ if (! (mode & CDATA) ) { if (c == '<') { WriteChar(ppstate, '&'); WriteChar(ppstate, 'l'); WriteChar(ppstate, 't'); WriteChar(ppstate, ';'); return; } if (c == '>') { WriteChar(ppstate, '&'); WriteChar(ppstate, 'g'); WriteChar(ppstate, 't'); WriteChar(ppstate, ';'); return; } /* naked '&' chars can be left alone or quoted as & The latter is required for XML where naked '&' are illegal. */ if (c == '&' && ppstate->config->QuoteAmpersand) { WriteChar(ppstate, '&'); WriteChar(ppstate, 'a'); WriteChar(ppstate, 'm'); WriteChar(ppstate, 'p'); WriteChar(ppstate, ';'); return; } if (c == '"' && ppstate->config->QuoteMarks) { WriteChar(ppstate, '&'); WriteChar(ppstate, 'q'); WriteChar(ppstate, 'u'); WriteChar(ppstate, 'o'); WriteChar(ppstate, 't'); WriteChar(ppstate, ';'); return; } if (c == '\'' && ppstate->config->QuoteMarks) { WriteChar(ppstate, '&'); WriteChar(ppstate, '#'); WriteChar(ppstate, '3'); WriteChar(ppstate, '9'); WriteChar(ppstate, ';'); return; } if (c == 160 && ppstate->config->CharEncoding != RAW) { if (ppstate->config->QuoteNbsp) { WriteChar(ppstate, '&'); if (ppstate->config->NumEntities) { WriteChar(ppstate, '#'); WriteChar(ppstate, '1'); WriteChar(ppstate, '6'); WriteChar(ppstate, '0'); } else { WriteChar(ppstate, 'n'); WriteChar(ppstate, 'b'); WriteChar(ppstate, 's'); WriteChar(ppstate, 'p'); } WriteChar(ppstate, ';'); } else WriteChar(ppstate, c); return; } } /* otherwise ISO 2022 characters are passed raw */ if (ppstate->config->CharEncoding == ISO2022 || ppstate->config->CharEncoding == RAW) { WriteChar(ppstate, c); return; } /* if preformatted text, map   to space */ if (c == 160 && (mode & PREFORMATTED)) { WriteChar(ppstate, ' '); return; } /* Filters from Word and PowerPoint often use smart quotes resulting in character codes between 128 and 159. Unfortunately, the corresponding HTML 4.0 entities for these are not widely supported. The following converts dashes and quotation marks to the nearest ASCII equivalent. My thanks to Andrzej Novosiolov for his help with this code. */ if (ppstate->config->MakeClean) { if (c >= 0x2013 && c <= 0x201E) { switch (c) { case 0x2013: case 0x2014: c = '-'; break; case 0x2018: case 0x2019: case 0x201A: c = '\''; break; case 0x201C: case 0x201D: case 0x201E: c = '"'; break; } } } /* don't map latin-1 chars to entities */ if (ppstate->config->CharEncoding == LATIN1) { if (c > 255) /* multi byte chars */ { if (!ppstate->config->NumEntities && (p = EntityName(c)) != NULL) sprintf(entity, "&%s;", p); else sprintf(entity, "&#%u;", c); for (p = entity; *p; ++p) WriteChar(ppstate, *p); return; } if (c > 126 && c < 160) { sprintf(entity, "&#%d;", c); for (p = entity; *p; ++p) WriteChar(ppstate, *p); return; } WriteChar(ppstate, c); return; } /* don't map utf8 chars to entities */ if (ppstate->config->CharEncoding == UTF8) { WriteChar(ppstate, c); return; } /* use numeric entities only for XML */ if (ppstate->config->XmlTags) { /* if ASCII use numeric entities for chars > 127 */ if (c > 127 && ppstate->config->CharEncoding == ASCII) { sprintf(entity, "&#%u;", c); for (p = entity; *p; ++p) WriteChar(ppstate, *p); return; } /* otherwise output char raw */ WriteChar(ppstate, c); return; } /* default treatment for ASCII */ if (c > 126 || (c < ' ' && c != '\t')) { if (!ppstate->config->NumEntities && (p = EntityName(c)) != NULL) sprintf(entity, "&%s;", p); else sprintf(entity, "&#%u;", c); for (p = entity; *p; ++p) WriteChar(ppstate, *p); return; } WriteChar(ppstate, c); } /* The line buffer is unsigned int not char so we can hold Unicode values unencoded. The translation to UTF-8 is deferred to the outc routine called to flush the line buffer. */ static void PPrintText(pprintstate *ppstate, unsigned int mode, unsigned int indent, unsigned int start, unsigned int end) { unsigned int i, c; for (i = start; i < end; ++i) { if (indent + ppstate->linelen >= ppstate->config->wraplen) WrapLine(ppstate, indent); c = (unsigned char)ppstate->lexer->lexbuf[i]; /* look for UTF-8 multibyte character */ if (c > 0x7F) i += GetUTF8((unsigned char *)ppstate->lexer->lexbuf + i, &c); if (c == '\n') { PFlushLine(ppstate, indent); continue; } PPrintChar(ppstate, c, mode); } } static void PPrintString(pprintstate *ppstate, unsigned int indent, char *str) { while (*str != '\0') WriteChar(ppstate, *str++); } static void PPrintAttrValue(pprintstate *ppstate, unsigned int indent, char *value, int delim, Bool wrappable) { unsigned int c; Bool wasinstring = no; int mode = (wrappable ? (NORMAL | ATTRIBVALUE) : (PREFORMATTED | ATTRIBVALUE)); /* look for ASP, Tango or PHP instructions for computed attribute value */ if (value && value[0] == '<') { if (value[1] == '%' || value[1] == '@'|| wstrncmp(value, "config->XmlOut) { if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; if (indent + ppstate->linelen >= ppstate->config->wraplen) WrapLine(ppstate, indent); if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; else PCondFlushLine(ppstate, indent); } WriteChar(ppstate, delim); if (value) { ppstate->InString = no; while (*value != '\0') { c = (unsigned char)*value; if (wrappable && c == ' ' && indent + ppstate->linelen < ppstate->config->wraplen) { ppstate->wraphere = ppstate->linelen; wasinstring = ppstate->InString; } if (wrappable && ppstate->wraphere > 0 && indent + ppstate->linelen >= ppstate->config->wraplen) WrapAttrVal(ppstate, indent, wasinstring); if (c == (unsigned int)delim) { char *entity; entity = (c == '"' ? """ : "'"); while (*entity != '\0') WriteChar(ppstate, *entity++); ++value; continue; } else if (c == '"') { if (ppstate->config->QuoteMarks) { WriteChar(ppstate, '&'); WriteChar(ppstate, 'q'); WriteChar(ppstate, 'u'); WriteChar(ppstate, 'o'); WriteChar(ppstate, 't'); WriteChar(ppstate, ';'); } else WriteChar(ppstate, '"'); if (delim == '\'') ppstate->InString = (Bool)(!ppstate->InString); ++value; continue; } else if (c == '\'') { if (ppstate->config->QuoteMarks) { WriteChar(ppstate, '&'); WriteChar(ppstate, '#'); WriteChar(ppstate, '3'); WriteChar(ppstate, '9'); WriteChar(ppstate, ';'); } else WriteChar(ppstate, '\''); if (delim == '"') ppstate->InString = (Bool)(!ppstate->InString); ++value; continue; } /* look for UTF-8 multibyte character */ if (c > 0x7F) value += GetUTF8((unsigned char *)value, &c); ++value; if (c == '\n') { PFlushLine(ppstate, indent); continue; } PPrintChar(ppstate, c, mode); } } ppstate->InString = no; WriteChar(ppstate, delim); } static void PPrintAttribute(pprintstate *ppstate, unsigned int indent, Node *node, AttVal *attr) { char *name; Bool wrappable = no; if (ppstate->config->IndentAttributes) { PFlushLine(ppstate, indent); indent += ppstate->config->spaces; } name = attr->attribute; if (indent + ppstate->linelen >= ppstate->config->wraplen) WrapLine(ppstate, indent); if (!ppstate->config->XmlTags && !ppstate->config->XmlOut && attr->dict) { if (IsScript(name)) wrappable = ppstate->config->WrapScriptlets; else if (!attr->dict->nowrap && ppstate->config->WrapAttVals) wrappable = yes; } if (indent + ppstate->linelen < ppstate->config->wraplen) { ppstate->wraphere = ppstate->linelen; WriteChar(ppstate, ' '); } else { PCondFlushLine(ppstate, indent); WriteChar(ppstate, ' '); } while (*name != '\0') WriteChar(ppstate, FoldCase(ppstate->lexer, *name++, ppstate->config->UpperCaseAttrs)); if (indent + ppstate->linelen >= ppstate->config->wraplen) WrapLine(ppstate, indent); if (attr->value == NULL) { if (ppstate->config->XmlTags || ppstate->config->XmlOut) PPrintAttrValue(ppstate, indent, attr->attribute, attr->delim, yes); else if (!IsBoolAttribute(attr) && !IsNewNode(node)) PPrintAttrValue(ppstate, indent, "", attr->delim, yes); else if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; } else PPrintAttrValue(ppstate, indent, attr->value, attr->delim, wrappable); } static void PPrintAttrs(pprintstate *ppstate, unsigned int indent, Node *node, AttVal *attr) { if (attr) { if (attr->next) PPrintAttrs(ppstate, indent, node, attr->next); if (attr->attribute != NULL) PPrintAttribute(ppstate, indent, node, attr); else if (attr->asp != NULL) { WriteChar(ppstate, ' '); PPrintAsp(ppstate, indent, attr->asp); } else if (attr->php != NULL) { WriteChar(ppstate, ' '); PPrintPhp(ppstate, indent, attr->php); } } /* add xml:space attribute to pre and other elements */ if (ppstate->config->XmlOut == yes && ppstate->config->XmlSpace && XMLPreserveWhiteSpace(node) && !GetAttrByName(node, "xml:space")) PPrintString(ppstate, indent, " xml:space=\"preserve\""); } /* Line can be wrapped immediately after inline start tag provided if follows a text node ending in a space, or it parent is an inline element that that rule applies to. This behaviour was reverse engineered from Netscape 3.0 */ static Bool AfterSpace(pprintstate *ppstate, Node *node) { Node *prev; unsigned int c; if (!node || !node->tag || !(node->tag->model & CM_INLINE)) return yes; prev = node->prev; if (prev) { if (prev->type == TextNode && prev->end > prev->start) { c = (unsigned char)ppstate->lexer->lexbuf[prev->end - 1]; if (c == 160 || c == ' ' || c == '\n') return yes; } return no; } return AfterSpace(ppstate, node->parent); } static void PPrintTag(pprintstate *ppstate, unsigned int mode, unsigned int indent, Node *node) { char c, *p; WriteChar(ppstate, '<'); if (node->type == EndTag) WriteChar(ppstate, '/'); for (p = node->element; (c = *p); ++p) WriteChar(ppstate, FoldCase(ppstate->lexer, c, ppstate->config->UpperCaseTags)); PPrintAttrs(ppstate, indent, node, node->attributes); if ((ppstate->config->XmlOut == yes || ppstate->lexer->isvoyager) && (node->type == StartEndTag || node->tag->model & CM_EMPTY )) { WriteChar(ppstate, ' '); /* compatibility hack */ WriteChar(ppstate, '/'); } WriteChar(ppstate, '>');; if (node->type != StartEndTag && !(mode & PREFORMATTED)) { if (indent + ppstate->linelen >= ppstate->config->wraplen) WrapLine(ppstate, indent); if (indent + ppstate->linelen < ppstate->config->wraplen) { /* wrap after start tag if is
or if it's not inline or it is an empty tag followed by */ if (AfterSpace(ppstate, node)) { if (!(mode & NOWRAP) && (!(node->tag->model & CM_INLINE) || (node->tag == tag_br) || ((node->tag->model & CM_EMPTY) && node->next == NULL && node->parent->tag == tag_a))) { ppstate->wraphere = ppstate->linelen; } } } else PCondFlushLine(ppstate, indent); } } static void PPrintEndTag(pprintstate *ppstate, unsigned int mode, unsigned int indent, Node *node) { char c, *p; /* Netscape ignores SGML standard by not ignoring a line break before or etc. To avoid rendering this as an underlined space, I disable line wrapping before inline end tags by the #if 0 ... #endif */ #if 0 if (indent + ppstate->linelen < ppstate->config->wraplen && !(mode & NOWRAP)) ppstate->wraphere = ppstate->linelen; #endif WriteChar(ppstate, '<'); WriteChar(ppstate, '/'); for (p = node->element; (c = *p); ++p) WriteChar(ppstate, FoldCase(ppstate->lexer, c, ppstate->config->UpperCaseTags)); WriteChar(ppstate, '>'); } static void PPrintComment(pprintstate *ppstate, unsigned int indent, Node *node) { if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; WriteChar(ppstate, '<'); WriteChar(ppstate, '!'); WriteChar(ppstate, '-'); WriteChar(ppstate, '-'); #if 0 if (ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; #endif PPrintText(ppstate, COMMENT, indent, node->start, node->end); #if 0 if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; WriteChar(ppstate, '-'); WriteChar(ppstate, '-'); #endif WriteChar(ppstate, '>'); if (node->linebreak) PFlushLine(ppstate, indent); } static void PPrintDocType(pprintstate *ppstate, unsigned int indent, Node *node) { Bool q = ppstate->config->QuoteMarks; ppstate->config->QuoteMarks = no; if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; PCondFlushLine(ppstate, indent); WriteChar(ppstate, '<'); WriteChar(ppstate, '!'); WriteChar(ppstate, 'D'); WriteChar(ppstate, 'O'); WriteChar(ppstate, 'C'); WriteChar(ppstate, 'T'); WriteChar(ppstate, 'Y'); WriteChar(ppstate, 'P'); WriteChar(ppstate, 'E'); WriteChar(ppstate, ' '); if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; PPrintText(ppstate, 0, indent, node->start, node->end); if (ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; WriteChar(ppstate, '>'); ppstate->config->QuoteMarks = q; PCondFlushLine(ppstate, indent); } static void PPrintPI(pprintstate *ppstate, unsigned int indent, Node *node) { if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; WriteChar(ppstate, '<'); WriteChar(ppstate, '?'); /* set CDATA to pass < and > unescaped */ PPrintText(ppstate, CDATA, indent, node->start, node->end); if (ppstate->lexer->lexbuf[node->end - 1] != '?') WriteChar(ppstate, '?'); WriteChar(ppstate, '>'); PCondFlushLine(ppstate, indent); } /* note ASP and JSTE share <% ... %> syntax */ static void PPrintAsp(pprintstate *ppstate, unsigned int indent, Node *node) { int savewraplen = ppstate->config->wraplen; /* disable wrapping if so requested */ if (!ppstate->config->WrapAsp || !ppstate->config->WrapJste) ppstate->config->wraplen = 0xFFFFFF; /* a very large number */ #if 0 if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; #endif WriteChar(ppstate, '<'); WriteChar(ppstate, '%'); PPrintText(ppstate, (ppstate->config->WrapAsp ? CDATA : COMMENT), indent, node->start, node->end); WriteChar(ppstate, '%'); WriteChar(ppstate, '>'); /* PCondFlushLine(ppstate, indent); */ ppstate->config->wraplen = savewraplen; } /* JSTE also supports <# ... #> syntax */ static void PPrintJste(pprintstate *ppstate, unsigned int indent, Node *node) { int savewraplen = ppstate->config->wraplen; /* disable wrapping if so requested */ if (!ppstate->config->WrapAsp) ppstate->config->wraplen = 0xFFFFFF; /* a very large number */ WriteChar(ppstate, '<'); WriteChar(ppstate, '#'); PPrintText(ppstate, (ppstate->config->WrapJste ? CDATA : COMMENT), indent, node->start, node->end); WriteChar(ppstate, '#'); WriteChar(ppstate, '>'); /* PCondFlushLine(ppstate, indent); */ ppstate->config->wraplen = savewraplen; } /* PHP is based on XML processing instructions */ static void PPrintPhp(pprintstate *ppstate, unsigned int indent, Node *node) { int savewraplen = ppstate->config->wraplen; /* disable wrapping if so requested */ if (!ppstate->config->WrapPhp) ppstate->config->wraplen = 0xFFFFFF; /* a very large number */ #if 0 if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; #endif WriteChar(ppstate, '<'); WriteChar(ppstate, '?'); PPrintText(ppstate, (ppstate->config->WrapPhp ? CDATA : COMMENT), indent, node->start, node->end); WriteChar(ppstate, '?'); WriteChar(ppstate, '>'); /* PCondFlushLine(ppstate, indent); */ ppstate->config->wraplen = savewraplen; } static void PPrintCDATA(pprintstate *ppstate, unsigned int indent, Node *node) { int savewraplen = ppstate->config->wraplen; PCondFlushLine(ppstate, indent); /* disable wrapping */ ppstate->config->wraplen = 0xFFFFFF; /* a very large number */ WriteChar(ppstate, '<'); WriteChar(ppstate, '!'); WriteChar(ppstate, '['); WriteChar(ppstate, 'C'); WriteChar(ppstate, 'D'); WriteChar(ppstate, 'A'); WriteChar(ppstate, 'T'); WriteChar(ppstate, 'A'); WriteChar(ppstate, '['); PPrintText(ppstate, COMMENT, indent, node->start, node->end); WriteChar(ppstate, ']'); WriteChar(ppstate, ']'); WriteChar(ppstate, '>'); PCondFlushLine(ppstate, indent); ppstate->config->wraplen = savewraplen; } static void PPrintSection(pprintstate *ppstate, unsigned int indent, Node *node) { int savewraplen = ppstate->config->wraplen; /* disable wrapping if so requested */ if (!ppstate->config->WrapSection) ppstate->config->wraplen = 0xFFFFFF; /* a very large number */ #if 0 if (indent + ppstate->linelen < ppstate->config->wraplen) ppstate->wraphere = ppstate->linelen; #endif WriteChar(ppstate, '<'); WriteChar(ppstate, '!'); WriteChar(ppstate, '['); PPrintText(ppstate, (ppstate->config->WrapSection ? CDATA : COMMENT), indent, node->start, node->end); WriteChar(ppstate, ']'); WriteChar(ppstate, '>'); /* PCondFlushLine(ppstate, indent); */ ppstate->config->wraplen = savewraplen; } static Bool ShouldIndent(pprintstate *ppstate, Node *node) { if (ppstate->config->IndentContent == no) return no; if (ppstate->config->SmartIndent) { if (node->content && (node->tag->model & CM_NO_INDENT)) { for (node = node->content; node; node = node->next) if (node->tag && node->tag->model & CM_BLOCK) return yes; return no; } if (node->tag->model & CM_HEADING) return no; if (node->tag == tag_p) return no; if (node->tag == tag_title) return no; } if (node->tag->model & (CM_FIELD | CM_OBJECT)) return yes; if (node->tag == tag_map) return yes; return (Bool)(!(node->tag->model & CM_INLINE)); } static void _PPrintTree(pprintstate *ppstate, unsigned int mode, unsigned int indent, Node *node) { Node *content, *last; if (node == NULL) return; if (node->type == TextNode) PPrintText(ppstate, mode, indent, node->start, node->end); else if (node->type == CommentTag) { PPrintComment(ppstate, indent, node); } else if (node->type == RootNode) { for (content = node->content; content != NULL; content = content->next) _PPrintTree(ppstate, mode, indent, content); } else if (node->type == DocTypeTag) PPrintDocType(ppstate, indent, node); else if (node->type == ProcInsTag) PPrintPI(ppstate, indent, node); else if (node->type == CDATATag) PPrintCDATA(ppstate, indent, node); else if (node->type == SectionTag) PPrintSection(ppstate, indent, node); else if (node->type == AspTag) PPrintAsp(ppstate, indent, node); else if (node->type == JsteTag) PPrintJste(ppstate, indent, node); else if (node->type == PhpTag) PPrintPhp(ppstate, indent, node); else if (node->tag->model & CM_EMPTY || node->type == StartEndTag) { if (!(node->tag->model & CM_INLINE)) PCondFlushLine(ppstate, indent); if (node->tag == tag_br && node->prev && node->prev->tag != tag_br && ppstate->config->BreakBeforeBR) PFlushLine(ppstate, indent); if (ppstate->config->MakeClean && node->tag == tag_wbr) PPrintString(ppstate, indent, " "); else PPrintTag(ppstate, mode, indent, node); if (node->tag == tag_param || node->tag == tag_area) PCondFlushLine(ppstate, indent); else if (node->tag == tag_br || node->tag == tag_hr) PFlushLine(ppstate, indent); } else /* some kind of container element */ { if (node->tag && node->tag->parser == ParsePre) { PCondFlushLine(ppstate, indent); indent = 0; PCondFlushLine(ppstate, indent); PPrintTag(ppstate, mode, indent, node); PFlushLine(ppstate, indent); for (content = node->content; content != NULL; content = content->next) _PPrintTree(ppstate, (mode | PREFORMATTED | NOWRAP), indent, content); PCondFlushLine(ppstate, indent); PPrintEndTag(ppstate, mode, indent, node); PFlushLine(ppstate, indent); if (ppstate->config->IndentContent == no && node->next != NULL) PFlushLine(ppstate, indent); } else if (node->tag == tag_style || node->tag == tag_script) { PCondFlushLine(ppstate, indent); indent = 0; PCondFlushLine(ppstate, indent); PPrintTag(ppstate, mode, indent, node); PFlushLine(ppstate, indent); for (content = node->content; content != NULL; content = content->next) _PPrintTree(ppstate, (mode | PREFORMATTED | NOWRAP |CDATA), indent, content); PCondFlushLine(ppstate, indent); PPrintEndTag(ppstate, mode, indent, node); PFlushLine(ppstate, indent); if (ppstate->config->IndentContent == no && node->next != NULL) PFlushLine(ppstate, indent); } else if (node->tag->model & CM_INLINE) { if (ppstate->config->MakeClean) { /* discards and tags */ if (node->tag == tag_font) { for (content = node->content; content != NULL; content = content->next) _PPrintTree(ppstate, mode, indent, content); return; } /* replace ... by   or   etc. */ if (node->tag == tag_nobr) { for (content = node->content; content != NULL; content = content->next) _PPrintTree(ppstate, mode|NOWRAP, indent, content); return; } } /* otherwise a normal inline element */ PPrintTag(ppstate, mode, indent, node); /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */ if (ShouldIndent(ppstate, node)) { PCondFlushLine(ppstate, indent); indent += ppstate->config->spaces; for (content = node->content; content != NULL; content = content->next) _PPrintTree(ppstate, mode, indent, content); PCondFlushLine(ppstate, indent); indent -= ppstate->config->spaces; PCondFlushLine(ppstate, indent); } else { for (content = node->content; content != NULL; content = content->next) _PPrintTree(ppstate, mode, indent, content); } PPrintEndTag(ppstate, mode, indent, node); } else /* other tags */ { PCondFlushLine(ppstate, indent); if (ppstate->config->SmartIndent && node->prev != NULL) PFlushLine(ppstate, indent); if (ppstate->config->HideEndTags == no || !(node->tag && (node->tag->model & CM_OMITST))) { PPrintTag(ppstate, mode, indent, node); if (ShouldIndent(ppstate, node)) PCondFlushLine(ppstate, indent); else if (node->tag->model & CM_HTML || node->tag == tag_noframes || (node->tag->model & CM_HEAD && !(node->tag == tag_title))) PFlushLine(ppstate, indent); } last = NULL; for (content = node->content; content != NULL; content = content->next) { /* kludge for naked text before block level tag */ if (last && !ppstate->config->IndentContent && last->type == TextNode && content->tag && content->tag->model & CM_BLOCK) { PFlushLine(ppstate, indent); PFlushLine(ppstate, indent); } _PPrintTree(ppstate, mode, (ShouldIndent(ppstate, node) ? indent + ppstate->config->spaces : indent), content); last = content; } /* don't flush line for td and th */ if (ShouldIndent(ppstate, node) || ((node->tag->model & CM_HTML || node->tag == tag_noframes || (node->tag->model & CM_HEAD && !(node->tag == tag_title))) && ppstate->config->HideEndTags == no)) { PCondFlushLine(ppstate, (ppstate->config->IndentContent ? indent + ppstate->config->spaces : indent)); if (ppstate->config->HideEndTags == no || !(node->tag->model & CM_OPT)) { PPrintEndTag(ppstate, mode, indent, node); PFlushLine(ppstate, indent); } } else { if (ppstate->config->HideEndTags == no || !(node->tag->model & CM_OPT)) PPrintEndTag(ppstate, mode, indent, node); PFlushLine(ppstate, indent); } if (ppstate->config->IndentContent == no && node->next != NULL && ppstate->config->HideEndTags == no && (node->tag->model & (CM_BLOCK|CM_LIST|CM_DEFLIST|CM_TABLE))) { PFlushLine(ppstate, indent); } } } } void PPrintTree(Lexer *lexer, OutputStream *fout, unsigned int mode, unsigned int indent, Node *node) { pprintstate *ppstate; if (node == NULL) return; ppstate = NewPrettyPrinter(lexer, fout); if (ppstate == NULL) return; _PPrintTree(ppstate, mode, indent, node); PFlushLine(ppstate, indent); FreePrettyPrinter(ppstate); } static void _PPrintXMLTree(pprintstate *ppstate, unsigned int mode, unsigned int indent, Node *node) { if (node == NULL) return; if (node->type == TextNode) { PPrintText(ppstate, mode, indent, node->start, node->end); } else if (node->type == CommentTag) { PCondFlushLine(ppstate, indent); PPrintComment(ppstate, 0, node); PCondFlushLine(ppstate, 0); } else if (node->type == RootNode) { Node *content; for (content = node->content; content != NULL; content = content->next) _PPrintXMLTree(ppstate, mode, indent, content); } else if (node->type == DocTypeTag) PPrintDocType(ppstate, indent, node); else if (node->type == ProcInsTag) PPrintPI(ppstate, indent, node); else if (node->type == CDATATag) PPrintCDATA(ppstate, indent, node); else if (node->type == SectionTag) PPrintSection(ppstate, indent, node); else if (node->type == AspTag) PPrintAsp(ppstate, indent, node); else if (node->type == JsteTag) PPrintJste(ppstate, indent, node); else if (node->type == PhpTag) PPrintPhp(ppstate, indent, node); else if (node->tag->model & CM_EMPTY || node->type == StartEndTag) { PCondFlushLine(ppstate, indent); PPrintTag(ppstate, mode, indent, node); PFlushLine(ppstate, indent); if (node->next) PFlushLine(ppstate, indent); } else /* some kind of container element */ { Node *content; Bool mixed = no; int cindent = 0; for (content = node->content; content; content = content->next) { if (content->type == TextNode) { mixed = yes; break; } } PCondFlushLine(ppstate, indent); if (XMLPreserveWhiteSpace(node)) { indent = 0; cindent = 0; mixed = no; } else if (mixed) cindent = indent; else cindent = indent + ppstate->config->spaces; PPrintTag(ppstate, mode, indent, node); if (!mixed) PFlushLine(ppstate, indent); for (content = node->content; content != NULL; content = content->next) _PPrintXMLTree(ppstate, mode, cindent, content); if (!mixed) PCondFlushLine(ppstate, cindent); PPrintEndTag(ppstate, mode, indent, node); PCondFlushLine(ppstate, indent); if (node->next) PFlushLine(ppstate, indent); } } void PPrintXMLTree(Lexer *lexer, OutputStream *fout, unsigned int mode, unsigned int indent, Node *node) { pprintstate *ppstate; if (node == NULL) return; ppstate = NewPrettyPrinter(lexer, fout); if (ppstate == NULL) return; _PPrintXMLTree(ppstate, mode, indent, node); PFlushLine(ppstate, indent); FreePrettyPrinter(ppstate); } Node *FindHead(Node *root) { Node *node; node = root->content; while (node && node->tag != tag_html) node = node->next; if (node == NULL) return NULL; node = node->content; while (node && node->tag != tag_head) node = node->next; return node; } Node *FindBody(Node *root) { Node *node; node = root->content; while (node && node->tag != tag_html) node = node->next; if (node == NULL) return NULL; node = node->content; while (node && node->tag != tag_body) node = node->next; return node; }