#include "FileIn.h" #include "Token.h" #include "UmlCom.h" FileIn::FileIn(const QString & path, FILE * fp) : _path(path), _fp(fp), _utf8(false), _linenum(1), _length(1024) { _buffer = new char[_length]; _special_chars["amp"] = '&'; _special_chars["lt"] = '<'; _special_chars["gt"] = '>'; _special_chars["quot"] = '"'; _special_chars["apos"] = '\''; _special_chars["euro"] = (char) 128; _special_chars["oelig"] = (char) 156; _special_chars["Yuml"] = (char) 159; _special_chars["nbsp"] = (char) 160; _special_chars["iexcl"] = (char) 161; _special_chars["cent"] = (char) 162; _special_chars["pound"] = (char) 163; _special_chars["curren"] = (char) 164; _special_chars["ye"] = (char) 165; _special_chars["brvbar"] = (char) 166; _special_chars["sect"] = (char) 167; _special_chars["uml"] = (char) 168; _special_chars["copy; "] = (char) 169; _special_chars["ordf"] = (char) 170; _special_chars["laquo"] = (char) 171; _special_chars["not"] = (char) 172; _special_chars["shy"] = (char) 173; _special_chars["reg"] = (char) 174; _special_chars["masr"] = (char) 175; _special_chars["deg"] = (char) 176; _special_chars["plusmn"] = (char) 177; _special_chars["sup2"] = (char) 178; _special_chars["sup3"] = (char) 179; _special_chars["acute"] = (char) 180; _special_chars["micro"] = (char) 181; _special_chars["para"] = (char) 182; _special_chars["middot"] = (char) 183; _special_chars["cedil"] = (char) 184; _special_chars["sup1"] = (char) 185; _special_chars["ordm"] = (char) 186; _special_chars["raquo"] = (char) 187; _special_chars["frac14"] = (char) 188; _special_chars["frac12"] = (char) 189; _special_chars["frac34"] = (char) 190; _special_chars["iquest"] = (char) 191; _special_chars["Agrave"] = (char) 192; _special_chars["Aacute"] = (char) 193; _special_chars["Acirc"] = (char) 194; _special_chars["Atilde"] = (char) 195; _special_chars["Auml"] = (char) 196; _special_chars["Aring"] = (char) 197; _special_chars["Aelig"] = (char) 198; _special_chars["Ccedil"] = (char) 199; _special_chars["Egrave"] = (char) 200; _special_chars["Eacute"] = (char) 201; _special_chars["Ecirc"] = (char) 202; _special_chars["Euml"] = (char) 203; _special_chars["Igrave"] = (char) 204; _special_chars["Iacute"] = (char) 205; _special_chars["Icirc"] = (char) 206; _special_chars["Iuml"] = (char) 207; _special_chars["eth"] = (char) 208; _special_chars["Ntilde"] = (char) 209; _special_chars["Ograve"] = (char) 210; _special_chars["Oacute"] = (char) 211; _special_chars["Ocirc"] = (char) 212; _special_chars["Otilde"] = (char) 213; _special_chars["Ouml"] = (char) 214; _special_chars["times"] = (char) 215; _special_chars["Oslash"] = (char) 216; _special_chars["Ugrave"] = (char) 217; _special_chars["Uacute"] = (char) 218; _special_chars["Ucirc"] = (char) 219; _special_chars["Uuml"] = (char) 220; _special_chars["Yacute"] = (char) 221; _special_chars["thorn"] = (char) 222; _special_chars["szlig"] = (char) 223; _special_chars["agrave"] = (char) 224; _special_chars["aacute"] = (char) 225; _special_chars["acirc"] = (char) 226; _special_chars["atilde"] = (char) 227; _special_chars["auml"] = (char) 228; _special_chars["aring"] = (char) 229; _special_chars["aelig"] = (char) 230; _special_chars["ccedil"] = (char) 231; _special_chars["egrave"] = (char) 232; _special_chars["eacute"] = (char) 233; _special_chars["ecirc"] = (char) 234; _special_chars["euml"] = (char) 235; _special_chars["igrave"] = (char) 236; _special_chars["iacute"] = (char) 237; _special_chars["icirc"] = (char) 238; _special_chars["iuml"] = (char) 239; _special_chars["eth"] = (char) 240; _special_chars["ntilde"] = (char) 241; _special_chars["ograve"] = (char) 242; _special_chars["oacute"] = (char) 243; _special_chars["ocirc"] = (char) 244; _special_chars["otilde"] = (char) 245; _special_chars["ouml"] = (char) 246; _special_chars["divide"] = (char) 247; _special_chars["oslash"] = (char) 248; _special_chars["ugrave"] = (char) 249; _special_chars["uacute"] = (char) 250; _special_chars["ucirc"] = (char) 251; _special_chars["uuml"] = (char) 252; _special_chars["yacute"] = (char) 253; _special_chars["thorn"] = (char) 254; _special_chars["yuml"] = (char) 255; } FileIn::~FileIn() { fclose(_fp); } Token & FileIn::read(bool any) { static Token token; token.read(*this, any); return token; } QCString FileIn::body(QCString what) { QCString r; int index = 0; int c; for (;;) { c = fgetc(_fp); if (c == EOF) error("premature end of file"); if (c == '<') { _buffer[index] = 0; r += _buffer; ungetc(c, _fp); Token & token = read(FALSE); if (!token.close(what)) error("'</" + what +">' expected rather than '</" + token.what() +">'"); return r; } if (c == '&') c = read_special_char(); if ((index + 1) == _length) { _buffer[index] = 0; index = 0; r += _buffer; } _buffer[index++] = c; } } const char * FileIn::readWord(bool any, bool & str) { int c; // bypass comment for (;;) { c = fgetc(_fp); if (c == EOF) // doesn't return error("premature end of file"); if (c == '\n') _linenum += 1; else if ((c != ' ') && (c != '\t') && (c != '\r')) break; } switch (c) { case '<': case '>': case '/': case '=': case '?': case '-': case '!': _buffer[0] = c; _buffer[1] = 0; str = FALSE; return _buffer; case '"': str = TRUE; return read_string(); default: str = FALSE; return read_word(c, any); } } void FileIn::finish(QCString what) { for (;;) { Token & tk = read(TRUE); if (tk.close(what)) return; else if (tk.close()) error("'</" + tk.what() + ">' while wait for '</" + what + ">'"); else if (! tk.closed()) finish(tk.what()); } } void FileIn::error(QCString s) { QCString num; QCString err = QCString("error in ") + _path + " line " + num.setNum(_linenum) + " : " + s + "
"; UmlCom::trace(err); throw 0; } void FileIn::warning(QCString s) { QCString num; QCString warn = QCString("warning in ") + _path + " line " + num.setNum(_linenum) + " : " + s + "
"; UmlCom::trace(warn); } const char * FileIn::read_word(int c, bool any) { if (((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')) || (c == '_')) _buffer[0] = c; else if ((c >= 'A') && (c <= 'Z')) _buffer[0] = c - 'A' + 'a'; else if (any) { _buffer[0] = c; _buffer[1] = 0; return _buffer; } else { // doesn't return char cs[2]; cs[0] = c; cs[1] = 0; error("unexpected character '" + QCString(cs) + "'"); } int index = 1; for (;;) { c = fgetc(_fp); if (c == EOF) error("premature end of file"); if ((c >= 'A') && (c <= 'Z')) c = c - 'A' + 'a'; else if ((c != ':') && (c != '_') && ((c < 'a') || (c > 'z')) && ((c < '0') || (c > '9'))) { // may add 0 without size check _buffer[index] = 0; ungetc(c, _fp); return _buffer; } if ((index + 1) == _length) { // can't add c then 0 char * b = new char[_length + 1024]; memcpy(b, _buffer, index); delete [] _buffer; _buffer = b; _length += 1024; } _buffer[index++] = c; } } const char * FileIn::read_string() { // " already read int index = 0; int c; for (;;) { c = fgetc(_fp); switch (c) { case EOF: // doesn't return error("premature end of file"); break; case '"': // may add 0 without size check _buffer[index] = 0; return _buffer; case '&': // special char c = read_special_char(); break; default: if (_utf8 && (((unsigned char) c) > 127)) c = ((c & 3) << 6) + (fgetc(_fp) & 0x3f); break; } if ((index + 1) == _length) { // can't add c then 0 char * b = new char[_length + 1024]; memcpy(b, _buffer, index); delete [] _buffer; _buffer = b; _length += 1024; } _buffer[index++] = c; } } char FileIn::read_special_char() { // & already read int c = fgetc(_fp); if (c == EOF) // doesn't return error("premature end of file"); if (c == '#') { int c; int r = 0; while ((c = fgetc(_fp)) != ';') { if (c == EOF) // doesn't return error("premature end of file"); if ((c >= '0') && (c <= '9')) r = r*10 + c - '0'; else // doesn't return error("not a valid special character"); } return r; } else { int index = 0; char s[16]; while (c != ';') { if (index == sizeof(s) - 2) // doesn't return error("not a valid special character"); s[index++] = c; c = fgetc(_fp); if (c == EOF) // doesn't return error("premature end of file"); } s[index] = 0; // check on index useless QMap::ConstIterator iter = _special_chars.find(s); if (iter == _special_chars.end()) // doesn't return error("not a valid special character"); return iter.data(); } } void FileIn::setEncoding(QCString s) { if (s.left(3).lower() == "utf") { if (s.right(1) != "8") { UmlCom::trace("sorry, in the UTF encoding, only UTF-8 is managed"); throw 0; } else _utf8 = TRUE; } }