/*
config.c - manage config properties
(c) 1998-2000 (W3C) MIT, INRIA, Keio University
(c) 2001 eGenix.com Software GmbH, Langenfeld
See tidy.c for the copyright notice.
*/
/*
property names are case insensitive and should be less than
60 characters in length and must start at the begining of
the line, as whitespace at the start of a line signifies a
line continuation.
*/
/* Include HTML Tidy Header */
#include "htmltidy.h"
void InitConfig(tidyconfig *config)
{
/* Configuration */
config->spaces = 2; /* default indentation */
config->wraplen = 72; /* default wrap margin */
config->CharEncoding = ASCII;
config->tabsize = 8;
config->doctype_mode = doctype_auto; /* see doctype property */
config->alt_text = NULL; /* default text for alt attribute */
config->doctype_str = NULL; /* user specified doctype */
config->writeback = no; /* if true then output tidied markup */
config->Output = yes; /* if true normal output is generated */
config->Errors = yes; /* if true normal error output is generated */
config->ShowWarnings = yes; /* however errors are always shown */
config->Quiet = no; /* no 'Parsing X', guessed DTD or summary */
config->IndentContent = no; /* indent content of appropriate tags */
config->SmartIndent = no; /* does text/block level content effect indentation */
config->HideEndTags = no; /* suppress optional end tags */
config->XmlTags = no; /* treat input as XML */
config->XmlOut = no; /* create output as XML */
config->xHTML = no; /* output extensible HTML */
config->XmlPi = no; /* add for XML docs */
config->RawOut = no; /* avoid mapping values > 127 to entities */
config->UpperCaseTags = no; /* output tags in upper not lower case */
config->UpperCaseAttrs = no; /* output attributes in upper not lower case */
config->MakeClean = no; /* replace presentational clutter by style rules */
config->LogicalEmphasis = no; /* replace i by em and b by strong */
config->DropFontTags = no; /* discard presentation tags */
config->DropEmptyParas = yes; /* discard empty p elements */
config->FixComments = yes; /* fix comments with adjacent hyphens */
config->BreakBeforeBR = no; /* o/p newline before
or not? */
config->NumEntities = no; /* use numeric entities */
config->QuoteMarks = no; /* output " marks as " */
config->QuoteNbsp = yes; /* output non-breaking space as entity */
config->QuoteAmpersand = yes; /* output naked ampersand as & */
config->WrapAttVals = no; /* wrap within attribute values */
config->WrapScriptlets = no; /* wrap within JavaScript string literals */
config->WrapSection = yes; /* wrap within section tags */
config->WrapAsp = yes; /* wrap within ASP pseudo elements */
config->WrapJste = yes; /* wrap within JSTE pseudo elements */
config->WrapPhp = yes; /* wrap within PHP pseudo elements */
config->FixBackslash = yes; /* fix URLs by replacing \ with / */
config->IndentAttributes = no; /* newline+indent before each attribute */
config->XmlPIs = no; /* if set to yes PIs must end with ?> */
config->XmlSpace = no; /* if set to yes adds xml:space attr as needed */
config->EncloseBodyText = no; /* if yes text at body is wrapped in
's */ config->EncloseBlockText = no; /* if yes text in blocks is wrapped in
's */
config->Word2000 = no; /* draconian cleaning for Word2000 */
config->TidyMark = no; /* add meta element indicating tidied doc */
config->Emacs = no; /* if true format error output for GNU Emacs */
config->LiteralAttribs = no; /* if true attributes may use newlines */
}
tidyconfig *NewConfig(void)
{
tidyconfig *config;
config = (tidyconfig *)MemAlloc(sizeof(tidyconfig));
if (config == NULL)
return config;
InitConfig(config);
return config;
}
/* ensure that config is self consistent */
void AdjustConfig(tidyconfig *config)
{
if (config->EncloseBlockText)
config->EncloseBodyText = yes;
/* avoid the need to set IndentContent when SmartIndent is set */
if (config->SmartIndent)
config->IndentContent = yes;
/* disable wrapping */
if (config->wraplen == 0)
config->wraplen = 0x7FFFFFFF;
/* Word 2000 needs o:p to be declared as inline */
if (config->Word2000) {
DefineInlineTag("o:p");
}
/* XHTML is written in lower case */
if (config->xHTML) {
config->XmlOut = yes;
config->UpperCaseTags = no;
config->UpperCaseAttrs = no;
}
/* if XML in, then XML out */
if (config->XmlTags) {
config->XmlOut = yes;
config->XmlPIs = yes;
}
/* XML requires end tags */
if (config->XmlOut) {
config->QuoteAmpersand = yes;
config->HideEndTags = no;
}
}
void FreeConfig(tidyconfig *config)
{
if (config->alt_text)
MemFree(config->alt_text);
if (config->doctype_str)
MemFree(config->doctype_str);
MemFree(config);
}
#if 0
/* unsigned integers */
void ParseInt(Location location, char *option)
{
int number = 0;
Bool digits = no;
SkipWhite();
while(IsDigit(c))
{
number = c - '0' + (10 * number);
digits = yes;
AdvanceChar();
}
if (!digits)
ReportBadArgument(option);
*location.number = number;
NextProperty();
}
/* true/false or yes/no only looks at 1st char */
void ParseBool(Location location, char *option)
{
Bool flag = no;
SkipWhite();
if (c == 't' || c == 'T' || c == 'y' || c == 'Y' || c == '1')
flag = yes;
else if (c == 'f' || c == 'F' || c == 'n' || c == 'N' || c == '0')
flag = no;
else
ReportBadArgument(option);
*location.logical = flag;
NextProperty();
}
void ParseInvBool(Location location, char *option)
{
Bool flag = no;
SkipWhite();
if (c == 't' || c == 'T' || c == 'y' || c == 'Y')
flag = yes;
else if (c == 'f' || c == 'F' || c == 'n' || c == 'N')
flag = no;
else
ReportBadArgument(option);
*location.logical = (Bool)(!flag);
NextProperty();
}
/* a string excluding whitespace */
void ParseName(Location location, char *option)
{
char buf[256];
int i = 0;
SkipWhite();
while (i < 254 && c != EOF && !IsWhite(c))
{
buf[i++] = c;
AdvanceChar();
}
buf[i] = '\0';
if (i == 0)
ReportBadArgument(option);
*location.string = wstrdup(buf);
NextProperty();
}
/* a space or comma separated list of tag names */
void ParseTagNames(Location location, char *option)
{
char buf[1024];
int i = 0;
do
{
if (c == ' ' || c == '\t' || c == ',')
{
AdvanceChar();
continue;
}
if (c == '\r')
{
AdvanceChar();
if (c == '\n')
AdvanceChar();
if (!(IsWhite((unsigned int) c)))
break;
}
if (c == '\n')
{
AdvanceChar();
if (!(IsWhite((unsigned int) c)))
break;
}
while (i < 1022 && c != EOF && !IsWhite(c) && c != ',')
{
buf[i++] = ToLower(c);
AdvanceChar();
}
buf[i] = '\0';
/* add tag to dictionary */
if(location.string == &inline_tags)
DefineInlineTag(buf);
else if (location.string == &block_tags)
DefineBlockTag(buf);
else if (location.string == &empty_tags)
DefineEmptyTag(buf);
else if (location.string == &pre_tags)
DefinePreTag(buf);
i = 0;
}
while (c != EOF);
}
/* a string including whitespace */
/* munges whitespace sequences */
void ParseString(Location location, char *option)
{
char buf[8192];
int i = 0;
unsigned delim = 0;
Bool waswhite = yes;
SkipWhite();
if (c == '"' || c == '\'')
delim = c;
while (i < 8190 && c != EOF)
{
/* treat \r\n \r or \n as line ends */
if (c == '\r')
{
AdvanceChar();
if (c != '\n' && !IsWhite(c))
break;
}
if (c == '\n')
{
AdvanceChar();
if (!IsWhite(c))
break;
}
if (c == delim && delim != '\0')
break;
if (IsWhite(c))
{
if (waswhite)
{
AdvanceChar();
continue;
}
c = ' ';
}
else
waswhite = no;
buf[i++] = c;
AdvanceChar();
}
buf[i] = '\0';
if (*location.string)
MemFree(*location.string);
#if 0
if (i == 0)
ReportBadArgument(option);
#endif
*location.string = wstrdup(buf);
}
void ParseCharEncoding(Location location, char *option)
{
char buf[64];
int i = 0;
SkipWhite();
while (i < 62 && c != EOF && !IsWhite(c))
{
buf[i++] = c;
AdvanceChar();
}
buf[i] = '\0';
if (wstrcasecmp(buf, "ascii") == 0)
*location.number = ASCII;
else if (wstrcasecmp(buf, "latin1") == 0)
*location.number = LATIN1;
else if (wstrcasecmp(buf, "raw") == 0)
*location.number = RAW;
else if (wstrcasecmp(buf, "utf8") == 0)
*location.number = UTF8;
else if (wstrcasecmp(buf, "iso2022") == 0)
*location.number = ISO2022;
else if (wstrcasecmp(buf, "mac") == 0)
*location.number = MACROMAN;
else
ReportBadArgument(option);
NextProperty();
}
/* slight hack to avoid changes to pprint.c */
void ParseIndent(Location location, char *option)
{
char buf[64];
int i = 0;
SkipWhite();
while (i < 62 && c != EOF && !IsWhite(c))
{
buf[i++] = c;
AdvanceChar();
}
buf[i] = '\0';
if (wstrcasecmp(buf, "yes") == 0)
{
IndentContent = yes;
SmartIndent = no;
}
else if (wstrcasecmp(buf, "true") == 0)
{
IndentContent = yes;
SmartIndent = no;
}
else if (wstrcasecmp(buf, "no") == 0)
{
IndentContent = no;
SmartIndent = no;
}
else if (wstrcasecmp(buf, "false") == 0)
{
IndentContent = no;
SmartIndent = no;
}
else if (wstrcasecmp(buf, "auto") == 0)
{
IndentContent = yes;
SmartIndent = yes;
}
else
ReportBadArgument(option);
NextProperty();
}
/*
doctype: omit | auto | strict | loose |