/* config.c - manage config properties (c) 1998-2000 (W3C) MIT, INRIA, Keio University (c) 2001 eGenix.com Software GmbH, Langenfeld See tidy.c for the copyright notice. */ /* property names are case insensitive and should be less than 60 characters in length and must start at the begining of the line, as whitespace at the start of a line signifies a line continuation. */ /* Include HTML Tidy Header */ #include "htmltidy.h" void InitConfig(tidyconfig *config) { /* Configuration */ config->spaces = 2; /* default indentation */ config->wraplen = 72; /* default wrap margin */ config->CharEncoding = ASCII; config->tabsize = 8; config->doctype_mode = doctype_auto; /* see doctype property */ config->alt_text = NULL; /* default text for alt attribute */ config->doctype_str = NULL; /* user specified doctype */ config->writeback = no; /* if true then output tidied markup */ config->Output = yes; /* if true normal output is generated */ config->Errors = yes; /* if true normal error output is generated */ config->ShowWarnings = yes; /* however errors are always shown */ config->Quiet = no; /* no 'Parsing X', guessed DTD or summary */ config->IndentContent = no; /* indent content of appropriate tags */ config->SmartIndent = no; /* does text/block level content effect indentation */ config->HideEndTags = no; /* suppress optional end tags */ config->XmlTags = no; /* treat input as XML */ config->XmlOut = no; /* create output as XML */ config->xHTML = no; /* output extensible HTML */ config->XmlPi = no; /* add for XML docs */ config->RawOut = no; /* avoid mapping values > 127 to entities */ config->UpperCaseTags = no; /* output tags in upper not lower case */ config->UpperCaseAttrs = no; /* output attributes in upper not lower case */ config->MakeClean = no; /* replace presentational clutter by style rules */ config->LogicalEmphasis = no; /* replace i by em and b by strong */ config->DropFontTags = no; /* discard presentation tags */ config->DropEmptyParas = yes; /* discard empty p elements */ config->FixComments = yes; /* fix comments with adjacent hyphens */ config->BreakBeforeBR = no; /* o/p newline before
or not? */ config->NumEntities = no; /* use numeric entities */ config->QuoteMarks = no; /* output " marks as " */ config->QuoteNbsp = yes; /* output non-breaking space as entity */ config->QuoteAmpersand = yes; /* output naked ampersand as & */ config->WrapAttVals = no; /* wrap within attribute values */ config->WrapScriptlets = no; /* wrap within JavaScript string literals */ config->WrapSection = yes; /* wrap within section tags */ config->WrapAsp = yes; /* wrap within ASP pseudo elements */ config->WrapJste = yes; /* wrap within JSTE pseudo elements */ config->WrapPhp = yes; /* wrap within PHP pseudo elements */ config->FixBackslash = yes; /* fix URLs by replacing \ with / */ config->IndentAttributes = no; /* newline+indent before each attribute */ config->XmlPIs = no; /* if set to yes PIs must end with ?> */ config->XmlSpace = no; /* if set to yes adds xml:space attr as needed */ config->EncloseBodyText = no; /* if yes text at body is wrapped in

's */ config->EncloseBlockText = no; /* if yes text in blocks is wrapped in

's */ config->Word2000 = no; /* draconian cleaning for Word2000 */ config->TidyMark = no; /* add meta element indicating tidied doc */ config->Emacs = no; /* if true format error output for GNU Emacs */ config->LiteralAttribs = no; /* if true attributes may use newlines */ } tidyconfig *NewConfig(void) { tidyconfig *config; config = (tidyconfig *)MemAlloc(sizeof(tidyconfig)); if (config == NULL) return config; InitConfig(config); return config; } /* ensure that config is self consistent */ void AdjustConfig(tidyconfig *config) { if (config->EncloseBlockText) config->EncloseBodyText = yes; /* avoid the need to set IndentContent when SmartIndent is set */ if (config->SmartIndent) config->IndentContent = yes; /* disable wrapping */ if (config->wraplen == 0) config->wraplen = 0x7FFFFFFF; /* Word 2000 needs o:p to be declared as inline */ if (config->Word2000) { DefineInlineTag("o:p"); } /* XHTML is written in lower case */ if (config->xHTML) { config->XmlOut = yes; config->UpperCaseTags = no; config->UpperCaseAttrs = no; } /* if XML in, then XML out */ if (config->XmlTags) { config->XmlOut = yes; config->XmlPIs = yes; } /* XML requires end tags */ if (config->XmlOut) { config->QuoteAmpersand = yes; config->HideEndTags = no; } } void FreeConfig(tidyconfig *config) { if (config->alt_text) MemFree(config->alt_text); if (config->doctype_str) MemFree(config->doctype_str); MemFree(config); } #if 0 /* unsigned integers */ void ParseInt(Location location, char *option) { int number = 0; Bool digits = no; SkipWhite(); while(IsDigit(c)) { number = c - '0' + (10 * number); digits = yes; AdvanceChar(); } if (!digits) ReportBadArgument(option); *location.number = number; NextProperty(); } /* true/false or yes/no only looks at 1st char */ void ParseBool(Location location, char *option) { Bool flag = no; SkipWhite(); if (c == 't' || c == 'T' || c == 'y' || c == 'Y' || c == '1') flag = yes; else if (c == 'f' || c == 'F' || c == 'n' || c == 'N' || c == '0') flag = no; else ReportBadArgument(option); *location.logical = flag; NextProperty(); } void ParseInvBool(Location location, char *option) { Bool flag = no; SkipWhite(); if (c == 't' || c == 'T' || c == 'y' || c == 'Y') flag = yes; else if (c == 'f' || c == 'F' || c == 'n' || c == 'N') flag = no; else ReportBadArgument(option); *location.logical = (Bool)(!flag); NextProperty(); } /* a string excluding whitespace */ void ParseName(Location location, char *option) { char buf[256]; int i = 0; SkipWhite(); while (i < 254 && c != EOF && !IsWhite(c)) { buf[i++] = c; AdvanceChar(); } buf[i] = '\0'; if (i == 0) ReportBadArgument(option); *location.string = wstrdup(buf); NextProperty(); } /* a space or comma separated list of tag names */ void ParseTagNames(Location location, char *option) { char buf[1024]; int i = 0; do { if (c == ' ' || c == '\t' || c == ',') { AdvanceChar(); continue; } if (c == '\r') { AdvanceChar(); if (c == '\n') AdvanceChar(); if (!(IsWhite((unsigned int) c))) break; } if (c == '\n') { AdvanceChar(); if (!(IsWhite((unsigned int) c))) break; } while (i < 1022 && c != EOF && !IsWhite(c) && c != ',') { buf[i++] = ToLower(c); AdvanceChar(); } buf[i] = '\0'; /* add tag to dictionary */ if(location.string == &inline_tags) DefineInlineTag(buf); else if (location.string == &block_tags) DefineBlockTag(buf); else if (location.string == &empty_tags) DefineEmptyTag(buf); else if (location.string == &pre_tags) DefinePreTag(buf); i = 0; } while (c != EOF); } /* a string including whitespace */ /* munges whitespace sequences */ void ParseString(Location location, char *option) { char buf[8192]; int i = 0; unsigned delim = 0; Bool waswhite = yes; SkipWhite(); if (c == '"' || c == '\'') delim = c; while (i < 8190 && c != EOF) { /* treat \r\n \r or \n as line ends */ if (c == '\r') { AdvanceChar(); if (c != '\n' && !IsWhite(c)) break; } if (c == '\n') { AdvanceChar(); if (!IsWhite(c)) break; } if (c == delim && delim != '\0') break; if (IsWhite(c)) { if (waswhite) { AdvanceChar(); continue; } c = ' '; } else waswhite = no; buf[i++] = c; AdvanceChar(); } buf[i] = '\0'; if (*location.string) MemFree(*location.string); #if 0 if (i == 0) ReportBadArgument(option); #endif *location.string = wstrdup(buf); } void ParseCharEncoding(Location location, char *option) { char buf[64]; int i = 0; SkipWhite(); while (i < 62 && c != EOF && !IsWhite(c)) { buf[i++] = c; AdvanceChar(); } buf[i] = '\0'; if (wstrcasecmp(buf, "ascii") == 0) *location.number = ASCII; else if (wstrcasecmp(buf, "latin1") == 0) *location.number = LATIN1; else if (wstrcasecmp(buf, "raw") == 0) *location.number = RAW; else if (wstrcasecmp(buf, "utf8") == 0) *location.number = UTF8; else if (wstrcasecmp(buf, "iso2022") == 0) *location.number = ISO2022; else if (wstrcasecmp(buf, "mac") == 0) *location.number = MACROMAN; else ReportBadArgument(option); NextProperty(); } /* slight hack to avoid changes to pprint.c */ void ParseIndent(Location location, char *option) { char buf[64]; int i = 0; SkipWhite(); while (i < 62 && c != EOF && !IsWhite(c)) { buf[i++] = c; AdvanceChar(); } buf[i] = '\0'; if (wstrcasecmp(buf, "yes") == 0) { IndentContent = yes; SmartIndent = no; } else if (wstrcasecmp(buf, "true") == 0) { IndentContent = yes; SmartIndent = no; } else if (wstrcasecmp(buf, "no") == 0) { IndentContent = no; SmartIndent = no; } else if (wstrcasecmp(buf, "false") == 0) { IndentContent = no; SmartIndent = no; } else if (wstrcasecmp(buf, "auto") == 0) { IndentContent = yes; SmartIndent = yes; } else ReportBadArgument(option); NextProperty(); } /* doctype: omit | auto | strict | loose | where the fpi is a string similar to "-//ACME//DTD HTML 3.14159//EN" */ void ParseDocType(Location location, char *option) { char buf[64]; int i = 0; SkipWhite(); /* "-//ACME//DTD HTML 3.14159//EN" or similar */ if (c == '"') { ParseString(location, option); doctype_mode = doctype_user; return; } /* read first word */ while (i < 62 && c != EOF && !IsWhite(c)) { buf[i++] = c; AdvanceChar(); } buf[i] = '\0'; doctype_mode = doctype_auto; if (wstrcasecmp(buf, "omit") == 0) doctype_mode = doctype_omit; else if (wstrcasecmp(buf, "strict") == 0) doctype_mode = doctype_strict; else if (wstrcasecmp(buf, "loose") == 0 || wstrcasecmp(buf, "transitional") == 0) doctype_mode = doctype_loose; else if (i == 0) ReportBadArgument(option); NextProperty(); } #endif