//@copyright_begin // ================================================================ // Copyright Notice // Copyright (C) 1998-2004 by Joe Linoff // // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to // the following conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL JOE LINOFF BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // // Comments and suggestions are always welcome. // Please report bugs to http://ccdoc.sourceforge.net/ccdoc // ================================================================ //@copyright_end #include "log.h" #include "phase1_scanner.h" #include // ================================================================ // This variable allows the header version // to be queried at runtime. // ================================================================ namespace { char ccdoc_rcsid[] = "$Id: phase1_scanner.cc,v 1.11 2004/09/30 04:16:07 jlinoff Exp $"; } // ================================================================ // Constructor. // ================================================================ ccdoc::phase1::scanner::scanner(switches& sw) : m_lineno(0), m_sw(sw), m_debug(false) { } // ================================================================ // Destructor. // ================================================================ ccdoc::phase1::scanner::~scanner() { } // ================================================================ // Open // ================================================================ void ccdoc::phase1::scanner::open(const string& name) { m_name = name; m_lineno = 1; m_put_tokens.clear(); m_put_chars.clear(); if( m_is.is_open() ) m_is.close(); m_is.open( m_name.c_str() ); } // ================================================================ // Close // ================================================================ void ccdoc::phase1::scanner::close() { if( m_is.is_open() ) m_is.close(); } // ================================================================ // Get character. // ================================================================ char ccdoc::phase1::scanner::get_char() { char ch; if( m_put_chars.size() ) { ch = m_put_chars.back(); m_put_chars.pop_back(); } else if( m_is.eof() || m_is.bad() || m_is.fail() ) { return 0; } else if(!m_is.get(ch)) { return 0; } // Ignore carriage returns under Windows. if( '\r' == ch ) return get_char(); if( '\n' == ch ) ++m_lineno; return ch; } // ================================================================ // Put character. Use a vector so that characters can // be put even when the end of the stream is reached. // ================================================================ void ccdoc::phase1::scanner::put_char(char ch) { if(ch) { m_put_chars.push_back(ch); // Issue 0056: m_line>0 if( '\n' == ch && m_lineno>0 ) --m_lineno; } } // ================================================================ // Put token. // ================================================================ void ccdoc::phase1::scanner::put_token(const string& token) { m_put_tokens.push_back(token); if(m_debug) { if( token == "\n" ) s_log << "CCDOC_PHASE1_DEBUG: put_line: " << m_lineno << "\n"; else if( token == "" ) s_log << "CCDOC_PHASE1_DEBUG: put_eof: " << m_lineno << "\n"; else s_log << "CCDOC_PHASE1_DEBUG: put_token: '" << token << "'\n"; } } // ================================================================ // Get token. // ================================================================ const char* ccdoc::phase1::scanner::get_token() { const char* token = scan_token(); if(m_debug) { if( *token == '\n' ) s_log << "CCDOC_PHASE1_DEBUG: get_line: " << m_lineno << "\n"; else if( *token == 0 ) s_log << "CCDOC_PHASE1_DEBUG: get_eof: " << m_lineno << "\n"; else s_log << "CCDOC_PHASE1_DEBUG: get_token: '" << token << "'\n"; } return token; } // ================================================================ // Scan token. // ================================================================ const char* ccdoc::phase1::scanner::scan_token() { static char tokenbuf[0x100000]; // 2^20 *tokenbuf = 0; // ================================================ // A token was put back, return that one. // ================================================ if( m_put_tokens.size() ) { ::strcpy(tokenbuf,m_put_tokens.back().c_str()); m_put_tokens.pop_back(); return tokenbuf; } char ch = skip_ws(); // ================================================ // If this the end of the file, return // an empty token. // ================================================ if(!ch) { return tokenbuf; } // ================================================ // Translate trigraph sequences. // This must be done early because they can // be part of another token. // ================================================ if( '?' == ch ) { ch = scan_trigraph(); if( '?' == ch ) { tokenbuf[0] = '?'; tokenbuf[1] = 0; return tokenbuf; } } // ================================================ // Eliminate the "\\\n" white space. This cannot // be done in skip_ws() because of the backslash // trigraph sequence "??/". // ================================================ if( '\\' == ch ) { char ch1 = get_char(); if( '\n' == ch1 ) { // issue 0124 return get_token(); } put_char(ch1); } // ================================================ // Check for character and string literals with // the 'L' prefix. // ================================================ if( ch == 'L' ) { char ch1 = get_char(); if( '"' == ch1 ) { tokenbuf[0] = ch; tokenbuf[1] = ch1; tokenbuf[2] = 0; get_string_literal(&tokenbuf[2],sizeof(tokenbuf)-3); return tokenbuf; } if( '\'' == ch1 ) { tokenbuf[0] = ch; tokenbuf[1] = ch1; tokenbuf[2] = 0; get_char_literal(&tokenbuf[2],sizeof(tokenbuf)-3); return tokenbuf; } put_char(ch1); } // ================================================ // This is a quoted string. Look for the next // un-escaped quote. // ================================================ if( ch == '"' ) { tokenbuf[0] = ch; tokenbuf[1] = 0; get_string_literal(&tokenbuf[1],sizeof(tokenbuf)-2); return tokenbuf; } // ================================================ // This is a single quoted character. Look for the // next un-escaped quote. // ================================================ if( '\'' == ch ) { tokenbuf[0] = ch; tokenbuf[1] = 0; get_char_literal(&tokenbuf[1],sizeof(tokenbuf)-2); return tokenbuf; } // ================================================ // This is some sort of number. // The special case of ".333" is handled in the '.' // processing. // ================================================ if( '0' <= ch && ch <= '9' ) { put_char(ch); get_number_literal(tokenbuf,sizeof(tokenbuf)-1); return tokenbuf; } // ================================================ // Convert alternative token forms to their // primary form. This is handled for each // separate character analysis. // // alt pri alt pri alt pri // ====== === ====== === ====== === // <% { and && and_eq &= // %> } bitor | or_eq |= // <: [ or || xor_eq ^= // :> ] xor ^ not ! // %: # compl ~ not_eq != // %:%: ## bitand & // // We don't care about the identifier based // alternative forms. // ================================================ // ================================================ // This is an identifier. // Ccdoc must support the non-standard $ character // because some compilers support it. // ================================================ if( ( 'a' <= ch && ch <= 'z' ) || ( 'A' <= ch && ch <= 'Z' ) || ( '_' == ch || '$' == ch ) ) { put_char(ch); get_identifier(tokenbuf,sizeof(tokenbuf)-2); return tokenbuf; } // ================================================ // This is a operator/punctuator. // Do a longest match. // ================================================ if( '{' == ch || '}' == ch || '[' == ch || ']' == ch || '(' == ch || ')' == ch || '~' == ch || ',' == ch || ';' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; *pstr = 0; return tokenbuf; } // ================================================ // OP: !, != // ================================================ if( '!' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: #, ## // ================================================ if( '#' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '#' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: %, %:, %:%:, %=, %> // ================================================ if( '%' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { // %> --> } pstr = tokenbuf; *pstr++ = '}'; *pstr++ = 0; return tokenbuf; } else if( ':' == ch ) { ch = get_char(); if( '%' == ch ) { ch = get_char(); if( ':' == ch ) { // %:%: --> ## pstr = tokenbuf; *pstr++ = '#'; *pstr++ = '#'; *pstr++ = 0; return tokenbuf; } else { put_char('%'); put_char(ch); } } else { // %: --> # pstr = tokenbuf; *pstr++ = '#'; *pstr++ = 0; put_char(ch); return tokenbuf; } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: &, &&, &= // ================================================ if( '&' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '&' == ch || '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: *, *= // ================================================ if( '*' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: +, ++, += // ================================================ if( '+' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '+' == ch || '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: -, --, -=, ->, ->* // ================================================ if( '-' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '-' == ch || '=' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { *pstr++ = ch; ch = get_char(); if( '*' == ch ) { *pstr++ = ch; } else { put_char(ch); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: ., .*, ..., .[0-9]+ // ================================================ if( '.' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '*' == ch ) { *pstr++ = ch; } else if( '0' <= ch && ch <= '9' ) { // Handle floating number of the form: .333 put_char(ch); put_char('.'); get_number_literal(tokenbuf,sizeof(tokenbuf)-1); return tokenbuf; } else if( '.' == ch ) { ch = get_char(); if( '.' == ch ) { *pstr++ = '.'; *pstr++ = '.'; } else { put_char(ch); put_char('.'); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: /, /=, //, /* // ================================================ if( '/' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; *pstr = 0; return tokenbuf; } else if( '/' == ch ) { // Found '//' // Check for '//@{' or a '// @{' comment. return scan_ccdoc_style2(tokenbuf,sizeof(tokenbuf)-1); } else if( '*' == ch ) { // Found '/*' // Check for a '/**' comment. return scan_ccdoc_style1(tokenbuf,sizeof(tokenbuf)-1); } else { put_char(ch); // issue 0069 *pstr = 0; return tokenbuf; } } // ================================================ // OP: :, ::, :> // ================================================ if( ':' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( ':' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { // :> --> ] pstr = tokenbuf; *pstr++ = ']'; *pstr++ = 0; return tokenbuf; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: <, <%, <:, <<, <<=, <= // ================================================ if( '<' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else if( '%' == ch ) { // <% --> { pstr = tokenbuf; *pstr++ = '{'; *pstr++ = 0; return tokenbuf; } else if( ':' == ch ) { // <: --> ] pstr = tokenbuf; *pstr++ = '['; *pstr++ = 0; return tokenbuf; } else if( '<' == ch ) { *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: =, == // ================================================ if( '=' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: >, >=, >>, >>= // ================================================ if( '>' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: ^, ^= // ================================================ if( '^' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: |, |=, || // ================================================ if( '|' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch || '|' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // Catch all at the end. tokenbuf[0] = ch; tokenbuf[1] = 0; return tokenbuf; } // ================================================================ // Skip w/s. Note that new lines are not w/s because they // are significant for pre-processing directives. // ================================================================ char ccdoc::phase1::scanner::skip_ws() { // Ignore white space, keep new lines // because they are used for pre-processing // analysis. char ch = get_char(); while( (ch > 0 && ch <= ' ') && ch != 0 && ch != '\n' ) { ch = get_char(); } return ch; } // ================================================================ // scan trigraph sequences. // ================================================================ char ccdoc::phase1::scanner::scan_trigraph() { // ================================================ // Check for tri-graph sequences. // The standard states that these sequences are // converted first. I interpret this to mean all // sequences except strings and comments. // OP: ?, ??=, ??/, ??', ??(, ??), ??!, ??<, ??>, ??- // ================================================ char ch1 = get_char(); if( ch1 != '?' ) { put_char(ch1); return '?'; } char ch2 = get_char(); switch(ch2) { case '=': ch2 = '#' ; break; case '/': ch2 = '\\'; break; case '\'': ch2 = '^' ; break; case '(': ch2 = '[' ; break; case ')': ch2 = ']' ; break; case '!': ch2 = '|' ; break; case '<': ch2 = '{' ; break; case '>': ch2 = '}' ; break; case '-': ch2 = '~' ; break; default: put_char(ch2); put_char(ch1); } return ch2; } // ================================================================ // get_string_literal // ================================================================ void ccdoc::phase1::scanner::get_string_literal(char* token,int max) { int ch = '"'; int pch = ch; while( max>0 && (ch = get_char()) ) { max--; *token++ = ch; if( ch == '"' && pch != '\\' ) { *token = 0; return; } // Issue 0116 // Contributed by Chris Martin 2001/11/25 if( ch == '\\' && pch == '\\' ) { // Reset pch to 0 to make sure that // the terminating double quote is // found correctly for the case of "\\". ch = 0; } pch = ch; } *token = 0; // The end of the string was never reached. // We should never reach this point because all input should // be from a legal C++ file. s_log.warning() << "Unterminated string literal found in " << m_name.c_str() << " at line " << m_lineno << ".\n" << s_log.enable(); } // ================================================================ // get_char_literal // ================================================================ void ccdoc::phase1::scanner::get_char_literal(char* token,int max) { // Issue 0052: Handle the special case of '\\'. int ch = get_char(); --max; *token++ = ch; if( '\\' == ch ) { ch = get_char(); --max; *token++ = ch; } ch = get_char(); --max; *token++ = ch; // Also handle the special cases of '\xad'. while( ch != '\'' ) { ch = get_char(); --max; *token++ = ch; if( max <= 2 ) { *token = 0; // The end of the character was never reached. // We should never reach this point because all input should // be from a legal C++ file. s_log.warning() << "Unterminated character literal found in " << m_name.c_str() << " at line " << m_lineno << ".\n" << s_log.enable(); return; } } *token = 0; } // ================================================================ // get_number_literal // ================================================================ void ccdoc::phase1::scanner::get_number_literal(char* token,int max) { // Some of the numeric processing is confusing because // the types overlap. Consider the following // number: // // 07777.35 // // This is a floating pointer number not an ill-formed // octal integer literal. // // This analyzer takes advantage of the fact that the // input is guaranteed to be legal C++. char ch = get_char(); if( '0' == ch ) { *token++ = ch; max--; char ch1 = get_char(); if( 'x' == ch1 || 'X' == ch1 ) { // This is a hex number. *token++ = ch1; max--; while( max>0 && (ch = get_char()) ) { if( ( 'a' <= ch && ch <= 'f' ) || ( 'A' <= ch && ch <= 'Z' ) || ( '0' <= ch && ch <= '9' ) ) { *token++ = ch; max--; } else { break; } } } else if( '0' <= ch1 && ch1 <= '9' ) { // This is octal or floating point. *token++ = ch1; max--; while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } else { ch = ch1; } } else if( '1' <= ch && ch <= '9' ) { *token++ = ch; max--; // This is decimal or floating point. while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } // At this point, ch is not a digit. Check // for a floating point decimal point, an // exponent or the UL suffix. // Scan for a floating point literal decimal point. if( '.' == ch ) { *token++ = ch; max--; while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } // Scan for a floating point literal exponent. if( 'E' == ch || 'e' == ch ) { *token++ = ch; max--; if( max>0 && (ch = get_char()) ) { // Get the sign of the exponent or a digit if // the sign was not specified. if( ( '0' <= ch && ch <= '9' ) || ( '-' == ch || '+' == ch ) ) { *token++ = ch; max--; // Get the rest of the exponent digits. while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } } } // Scan for the ULF suffix. if( 'U' == ch || 'u' == ch || 'L' == ch || 'l' == ch || 'F' == ch || 'f' == ch) { *token++ = ch; max--; while( max>0 && (ch = get_char()) ) { if( 'U' == ch || 'u' == ch || 'L' == ch || 'l' == ch || 'F' == ch || 'f' == ch ) { *token++ = ch; max--; } else { break; } } } put_char(ch); *token = 0; } // ================================================================ // get_identifier // ================================================================ void ccdoc::phase1::scanner::get_identifier(char* token,int max) { char ch; while( max>0 && (ch = get_char()) ) { max--; if( ( 'a' <= ch && ch <= 'z' ) || ( 'A' <= ch && ch <= 'Z' ) || ( '0' <= ch && ch <= '9' ) || ( '_' == ch || ch == '$' ) ) { *token++ = ch; } else { // Non-identifier token found, we are done. *token = 0; put_char(ch); return; } } // We reached the end of the file. // This is legal for things like: // #endif *token = 0; } // ================================================================ // scan ccdoc style1 // ================================================================ const char* ccdoc::phase1::scanner::scan_ccdoc_style1(char* token, int max) { bool suffix_flag = false; char ch = get_char(); if( '*' != ch ) { if( ch == '!' ) { // Issue 0162: // This may be a doxygen style comment of the form: // int a; /*!< ... */ ch = get_char(); if( ch == '<' ) { suffix_flag = true; } else { // This could be '*'. put_char(ch); } } if( !suffix_flag ) { // This is not a ccdoc comment, // skip to the end of the comment. while( ch != 0 ) { char pch = ch; ch = get_char(); if( '*' == pch && '/' == ch ) break; } return scan_token(); } } // Issue 0162: // Check for '/**<'. This is a doxygen style suffix comment. ch = get_char(); if( ch == '<' ) { suffix_flag = true; } else { put_char(ch); } // Found: '/**' // ================================================ // Check for the special pre-processing directives: // ================================================ if( scan_ccdoc_style1_special() ) return scan_token(); // Re-format the comment for consumption by the parser. // Here are the comment fields. scanner_doc doc(*this,m_sw); // Define the processing mode. static char line[65536]; // maximum line length bool first = true; bool done = false; bool ignore_flag = false; while(!done) { // ================================================ // Skip leading w/s // ================================================ char ch = get_char(); while(ch && ch != '\n' && (ch > 0 && ch <= ' ') ) { ch = get_char(); } // ================================================ // Skip the leading asterisk if it exists. // ================================================ if( !first && '*' == ch ) { ch = get_char(); if( '/' == ch ) { done = true; break; } } // ================================================ // Now load the line for directive processing. // Don't trim w/s, it may be needed for
.
    // ================================================
    char* pline = line;
    while( ch && ch != '\n' ) {
      *pline++ = ch;
      char pch = ch;
      ch = get_char();
      if( '*' == pch && ch == '/' ) {
	// This is the end of the comment.
	//
	// Make sure that any preceding directives
	// are processed, such as:
	//   '/**'
	//   ' * @return Foo bar spam */'
	//
	// Also make sure that all trailing asterisks are eaten:
	//  '/**'
	//  ' **/'
	//
	pline--; // *pline == '*'
	ccdoc_assert( '*' == *pline );
	while( pline>line && '*' == *pline )
	  --pline;
	if( pline == line ) {
	  // We are done.
	  done = true;
	  break;
	}
	// There may be some stuff on this line that
	// we need to parse.
	pline++;
	put_char('\n');
	put_char('/');
	put_char('*');
	break;
      }
    }
    *pline = 0;

    // ================================================
    // EOF
    // ================================================
    if( !ch ) {
      // The end of the file was reached before the
      // comment was terminated.
      s_log.warning()
	<< "Unexpected EOF found, unterminated ccdoc comment "
	<< "specified at line "
	<< m_lineno << " in " << m_name.c_str()
	<< ".\n"
	<< s_log.enable();
      return scan_token();
    }

    // ================================================
    // Eliminate special tokens from the comment line.
    // ================================================
    if( m_sw.doxygen() ) {
      if( !ignore_flag ) {
        if( contains_token(line,"@file") ) {
          ignore_flag = true;
        }
      }
    }

    // ================================================
    // At this point we have the line.
    // Terminate it and write it out in debug mode.
    // ================================================
    if( m_debug ) {
      s_log << "CCDOC_PHASE1_DEBUG: ccdoc_line: '" << line << "'\n";
    }

    // ================================================
    // Skip the first line, if it is empty to
    // avoid conflicts when trying to determine
    // the long description for the following case:
    //   /**            | <-- line 1 (blank - ignore)
    //    * short       | <-- line 2 (short description)
    //    *             | <-- line 3 (blank - separator)
    //    * long        | <-- line 4 (long description)
    //    */            | <-- line 5 (end of comment)
    // ================================================
    if(first) {
      first = false;
      if(*line == 0)
	continue;
    }

    // ================================================
    // Issue 0082: only do this if -nojdsds is specified.
    // Set the short description flag to false
    // if a blank line (other than the first one)
    // is encountered.
    // ================================================
    if(!m_sw.jdsds() && *line == 0 && doc.m_mode == scanner_doc::SHORT) {
      doc.m_mode = scanner_doc::LONG;
      continue;
    }

    doc.parse_line(line);
  }

  // Issue 0162:
  if( suffix_flag )
    doc.m_comment.add_suffix(true);

  // ================================================
  // At this point we have a valid ccdoc comment.
  // Format it for the parser to make things easy.
  // If it is empty, ignore it.
  // ================================================
  if( doc.empty() || ignore_flag )
    return scan_token();
  return doc.format(token,max);
}
// ================================================================
// scan ccdoc style1 special
//   /**@#-*\/       - Start ignoring characters
//   /**@#+*\/       - Stop ignoring characters.
//   /**@#=*\/ - Insert the specified token
//                     into the input stream.
// ================================================================
bool ccdoc::phase1::scanner::scan_ccdoc_style1_special()
{
  char ch = get_char();
  if( '@' != ch ) {
    put_char(ch);
    return false;
  }
  ch = get_char();
  if( '#' != ch ) {
    put_char(ch);
    put_char('@');
    return false;
  }
  ch = get_char();
  if( '-' != ch && '+' != ch && '=' != ch ) {
    put_char(ch);
    put_char('#');
    put_char('@');
    return false;
  }
  if( '-' == ch || '+' == ch ) {
    char ch1 = get_char();
    if( '*' != ch1 ) {
      put_char(ch1);
      put_char(ch);
      put_char('#');
      put_char('@');
      return false;
    }
    ch1 = get_char();
    if( '/' != ch1 ) {
      put_char(ch1);
      put_char('*');
      put_char(ch);
      put_char('#');
      put_char('@');
      return false;
    }
    if( '-' == ch ) {
      // We found: "/**@#-*\/", Ignore all characters
      // until we find "/**@#+*\/".
      for(ch=get_char();ch;ch=get_char()) {
	if( '/' != ch ) continue;
	// The put char handles cases like this:
	//   "/**@/**@#+*\/"
	ch = get_char(); if(!ch) break;
	if( '*' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '*' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '@' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '#' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '+' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '*' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '/' != ch ) {put_char(ch);continue;}
	break;
	// Don't warn about nested "/**@#-*\/", they are
	// legal.
      }
      // Don't warn about the EOF, that is legal as well.
    }
    // Ignore "/**@#+*\/". It is only used during
    // "/**@#-*\/" processing.
    return true;
  }

  // At this point we have: "/**@#=".
  ch = get_char(); // This is the character to insert.
  char ch1 = get_char();
  if( '*' != ch1 ) {
    put_char(ch1);
    put_char(ch);
    put_char('=');
    put_char('#');
    put_char('@');
    return false;
  }
  ch1 = get_char();
  if( '/' != ch1 ) {
    put_char(ch1);
    put_char('*');
    put_char(ch);
    put_char('=');
    put_char('#');
    put_char('@');
    return false;
  }

  // We found: "/**@#=*\/".
  // Insert the character into the input stream.
  put_char(ch);
  return true;
}
// ================================================================
// scan ccdoc style2
// ================================================================
const char* ccdoc::phase1::scanner::scan_ccdoc_style2(char* token,
						      int max)
{
  // ================================================
  // Check for ccdoc comment designators:
  //   '//[ \t]*@{' 'vi}'  -- brace balancing for vi, added by bzoe
  //     or
  //   '//[ \t]*/**'
  // ================================================
  enum {
    NOT_A_COMMENT,
    STYLE2A,
    STYLE2B,
    STYLE2C} // Issue 0086: STYLE2C, added by bzoe 2001/11/26
  ccdoc_flag = NOT_A_COMMENT;

  // Get the third character:
  //  //x
  //    ^
  char ch = get_char();

  // Issue 0162:
  if( '!' == ch || '/' == ch ) {
    // Look for doxygen single comment indicators:
    //   '//!<' or '///<'
    char first_ch = ch;
    ch = get_char();
    if( '<' == ch ) {
      // This is a doxygen style comment of the form:
      //   int x; //!< ...
      // or
      //   int x; ///< ...
      //
      // This is the same as ccdoc:
      //   int x; //@- ...
      put_char('-');
      ch = '@';
    }
    else {
      // This is not a doxygen style comment.
      put_char(ch);       // NOT '<'
      ch = first_ch;
    }
  }

  // ccdoc doesn't care about the leading w/s.
  while( ' ' == ch || '\t' == ch ) {
    ch = get_char();
  }

  // process the other tokens.
  if( '@' == ch ) {
    // Look for: '//[ \t]*@{' 'vi}'  -- brace balancing for vi, added by bzoe
    ch = get_char();
    if( '{' == ch ) {
      ccdoc_flag = STYLE2A;
    }
    else if( '-' == ch ) {
      // Issue 0086
      // Added by bzoe to support the new
      // single line, suffix syntax:
      //  int foo; //@- this is a comment
      ccdoc_flag = STYLE2C;
    }
    else {
      put_char(ch); // in case it is a '\n'
    }
  }
  else if( '/' == ch ) {
    // Look for: '//[ \t]*/**'
    // Discard: '///***'
    ch = get_char();
    if( '*' == ch ) {
      ch = get_char();
      if( '*' == ch ) {
	ch = get_char();
	if( '*' != ch ) {
	  ccdoc_flag = STYLE2B;
	  put_char(ch);
	}
	else {
	  put_char(ch);
	  put_char('*');
	  put_char('*');
	  put_char('/');
	}
      }
      else {
	put_char(ch);
	put_char('*');
	put_char('/');
      }
    }
    else {
      put_char(ch);
      put_char('/');
    }
  }
  else {
    put_char(ch);
  }

  // ================================================
  // This is not a ccdoc comment,
  // skip to the end of the line.
  // ================================================
  if(ccdoc_flag == NOT_A_COMMENT) {
    while( ch != 0 && ch != '\n' ) {
      ch = get_char();
    }
    if( '\n' == ch ) {
      put_char(ch);
    }
    return scan_token();
  }

  // ================================================
  // Get the rest of the tokens on the line.
  // Here are the comment fields.
  // ================================================
  scanner_doc doc(*this,m_sw);

  // Define the processing mode.
  static char line[65536]; // maximum line length
  bool first = true;
  bool done = false;
  bool ignore_flag = false;
  while(!done) {
    // ================================================
    // Skip leading w/s
    // ================================================
    char ch = get_char();
    while(ch && ch !='\n' && (ch > 0 && ch <= ' ') ) {
      ch = get_char();
    }

    // ================================================
    // This must be either a comment or a blank line.
    // Both are valid.
    // ================================================
    if( !first ) {
      if( '/' == ch ) {
	ch = get_char();
	if( '/' != ch ) {
	  s_log.warning()
	    << "Invalid ccdoc comment specified at line "
	    << m_lineno << " in " << m_name.c_str()
	    << ".\n"
	    << s_log.enable();
	  put_char(ch);
	  put_char('/');
	  return scan_token();
	}
	// Check for end of comment.
	// Skip the lead '//'
	ch = get_char();

        // 'vi{' -- brace balancing for vi, added by bzoe
	// Is this '//@}'?
	if( ccdoc_flag == STYLE2A ) {
	  if( '@' == ch ) {
	    char ch1 = get_char();
            // 'vi{' -- brace balancing for vi, added by bzoe
	    if( '}' == ch1 ) {
	      while( ch1 && ch1 != '\n' )
		ch1 = get_char();
	      line[0] = 0;
	      done = true;
	      break;
	    }
	    put_char(ch1);
	  }
	}
	else if( ccdoc_flag == STYLE2B ) {
	  // Is this '//\*/'?
	  if( '*' == ch ) {
	    char ch1 = get_char();
	    if( '/' == ch1 ) {
	      while( ch1 && ch1 != '\n' )
		ch1 = get_char();
	      line[0] = 0;
	      done = true;
	      break;
	    }
	    // Skip the asterisk in this form.
	    ch = ch1;
	  }
	  // Is this '// \*/'?
	  else if( ' ' == ch || '\t' == ch ) {
	    char ch1 = get_char();
	    if( '*' == ch1 ) {
	      char ch2 = get_char();
	      if( '/' == ch2 ) {
		while( ch2 && ch2 != '\n' )
		  ch2 = get_char();
		line[0] = 0;
		done = true;
		break;
	      }
	      // Skip the asterisk and preceding space in this form.
	      ch = ch2;
	    }
	    else {
	      put_char(ch1);
	    }
	  }
	}
      }
    }

    // ================================================
    // Now load the line for directive processing.
    // Don't trim w/s, it may be needed for 
.
    // ================================================
    char* pline = line;
    while( ch && ch != '\n' ) {
      *pline++ = ch;
      char pch = ch;
      ch = get_char();
      if( pline>&line[3] ) {
	if( ccdoc_flag == STYLE2A ) {
          // 'vi{' -- brace balancing for vi, added by bzoe
	  if( '@' == pch && ch == '}' ) {
	    // This may be the end of the comment.
	    //
	    // Make sure that any preceding directives
	    // are processed, such as:
	    //   '//@{ @return Foo bar spam //@}'
	    //
	    char* mark = pline;
	    pline--; // *pline == '@'
	    ccdoc_assert( '@' == *pline );
	    char ch1 = *--pline;
	    char ch2 = *--pline;
	    if( '/' == ch1 && '/' == ch2 ) {
	      if( pline == line ) {
		// We are done.
		done = true;
		break;
	      }

	      // 'vi{' -- brace balancing for vi, added by bzoe
	      // This was an end token '//@}',
	      // parse the other stuff on the line.
	      pline = mark;
	      pline--; // *pline == '@'
	      pline--; // *pline == '/'
	      pline--; // *pline == '/'
	      *pline = 0;
	      done = true;

	      // Ignore everything to the end of the line.
	      while( ch && ch != '\n' ) {
		ch = get_char();
	      }
	      break;
	    }
	    else {
              // 'vi{' -- brace balancing for vi, added by bzoe
	      // This was not an end token '//@}',
	      // continue parsing.
	      pline = mark;
	    }
	  }
	}
	else if( ccdoc_flag == STYLE2B ) {
	  if( '*' == pch && ch == '/' ) {
	    // This may be the end of the comment.
	    //
	    // Make sure that any preceding directives
	    // are processed, such as:
	    //   '///** @return Foo bar spam //\*/'
	    //
	    char* mark = pline;
	    pline--; // *pline == '*'
	    ccdoc_assert( '*' == *pline );
	    char ch1 = *--pline;
	    char ch2 = *--pline;
	    if( '/' == ch1 && '/' == ch2 ) {
	      if( pline == line ) {
		// We are done.
		done = true;
		break;
	      }

	      // This was an end token '//\*/',
	      // parse the other stuff on the line.
	      pline = mark;
	      pline--; // *pline == '*'
	      pline--; // *pline == '/'
	      pline--; // *pline == '/'
	      *pline = 0;
	      done = true;

	      // Ignore everything to the end of the line.
	      while( ch && ch != '\n' ) {
		ch = get_char();
	      }
	      break;
	    }
	    else {
	      // This was not an end token '//\*/',
	      // continue parsing.
	      pline = mark;
	    }
	  }
	}
	else if( ccdoc_flag == STYLE2C ) {
          // Issue 0086
          if( '\n' == ch ) {
            done = true;
            break;
          }
        }
      }
    }
    *pline = 0;

    // ================================================
    // EOF
    // ================================================
    if( !ch ) {
      // The end of the file was reached before the
      // comment was terminated.
      s_log.warning()
	<< "Unexpected EOF found, unterminated ccdoc comment "
	<< "specified at line "
	<< m_lineno << " in " << m_name.c_str()
	<< ".\n"
	<< s_log.enable();
      return scan_token();
    }

    // ================================================
    // Eliminate special tokens from the comment line.
    // ================================================
    if( !ignore_flag ) {
      if( contains_token(line,"@file") ) {
        ignore_flag = true;
      }
    }

    // ================================================
    // At this point we have the line.
    // Terminate it and write it out in debug mode.
    // ================================================
    if( m_debug ) {
      s_log << "CCDOC_PHASE1_DEBUG: ccdoc_line: '" << line << "'\n";
    }

    // ================================================
    // Skip the first line, if it is empty to
    // avoid conflicts when trying to determine
    // the long description for the following case:
    //   //@{           | <-- line 1 (blank - ignore)
    //   // short       | <-- line 2 (short description)
    //   //             | <-- line 3 (blank - separator)
    //   // long        | <-- line 4 (long description)
    //   //             | <-- line 5 (end of comment)
    // ================================================
    if( ccdoc_flag == STYLE2C ) { // bzoe
      // Issue 0086
      first = false;
      if (*line == 0) {
        s_log.warning()
          << "Empty single suffix comment\n"
          << s_log.enable();
      }
      if (m_debug) {
        s_log << "STYLE2C: " << line << "\n";
      }
    }
    else {
      if(first) {
        first = false;
        if(*line == 0)
          continue;
      }

      // ================================================
      // Issue 0082: only do this if -nojdsds is specified.
      // Set the short description flag to false
      // if a blank line (other than the first one)
      // is encountered.
      // ================================================
      if(!m_sw.jdsds() && *line == 0 && doc.m_mode == scanner_doc::SHORT) {
        doc.m_mode = scanner_doc::LONG;
        continue;
      }
    }

    doc.parse_line(line);

    // Issue 0086
    if (ccdoc_flag == STYLE2C)
      doc.m_comment.add_suffix(true);  // bzoe
  }

  // ================================================
  // At this point we have a valid ccdoc comment.
  // Format it for the parser to make things easy.
  // If it is empty, ignore it.
  // ================================================
  if( doc.empty() || ignore_flag )
    return scan_token();
  return doc.format(token,max);
}
// ================================================================
// Strip out token.
// ================================================================
void ccdoc::phase1::scanner::strip_token(char* buf,
                                         const char* token) const
{
  if( buf && token && token[0] ) {
    char* p1 = buf;
    for(;*p1;++p1) {
      char* p2 = p1;
      const char* p3 = token;
      for(;*p3 && *p2 == *p3;++p3,++p2);
      if( !*p3 && *p2<=' ' ) {
        // They matched. Strip out the token.
        // Ex. token = CDE
        //   AB CDE FG
        //      ^   ^
        //      |   +--- p2
        //      +------ p1
        //
        char* p4 = p2-1;
        for(;*p2;++p1,++p2) {
          *p1 = *p2;
        }
        *p1 = 0;
        p1 = p4;
      }
    }
  }
}
// ================================================================
// Contains token.
// ================================================================
bool ccdoc::phase1::scanner::contains_token(const char* buf,
                                            const char* token) const
{
  if( buf && token && token[0] ) {
    const char* pbuf = buf;
    for(;*pbuf;++pbuf) {
      const char* p2 = pbuf;
      const char* ptoken = token;

      // Make sure that there is preceding w/s or
      // we are at the start of the line.
      if( p2 == buf || *p2 <= ' ' ) {
        for(;*ptoken && *p2 && *p2 == *ptoken;++ptoken,++p2);
        // Make sure that there is trailing w/s.
        if( !*ptoken && *p2<=' ' )
          return true;
      }
    }
  }
  return false;
}