//@copyright_begin // ================================================================ // Copyright Notice // Copyright (C) 1998-2004 by Joe Linoff // // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to // the following conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL JOE LINOFF BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // // Comments and suggestions are always welcome. // Please report bugs to http://ccdoc.sourceforge.net/ccdoc // ================================================================ //@copyright_end #include "log.h" #include "phase1_scanner.h" #include // ================================================================ // This variable allows the header version // to be queried at runtime. // ================================================================ namespace { char ccdoc_rcsid[] = "$Id: phase1_scanner.cc,v 1.11 2004/09/30 04:16:07 jlinoff Exp $"; } // ================================================================ // Constructor. // ================================================================ ccdoc::phase1::scanner::scanner(switches& sw) : m_lineno(0), m_sw(sw), m_debug(false) { } // ================================================================ // Destructor. // ================================================================ ccdoc::phase1::scanner::~scanner() { } // ================================================================ // Open // ================================================================ void ccdoc::phase1::scanner::open(const string& name) { m_name = name; m_lineno = 1; m_put_tokens.clear(); m_put_chars.clear(); if( m_is.is_open() ) m_is.close(); m_is.open( m_name.c_str() ); } // ================================================================ // Close // ================================================================ void ccdoc::phase1::scanner::close() { if( m_is.is_open() ) m_is.close(); } // ================================================================ // Get character. // ================================================================ char ccdoc::phase1::scanner::get_char() { char ch; if( m_put_chars.size() ) { ch = m_put_chars.back(); m_put_chars.pop_back(); } else if( m_is.eof() || m_is.bad() || m_is.fail() ) { return 0; } else if(!m_is.get(ch)) { return 0; } // Ignore carriage returns under Windows. if( '\r' == ch ) return get_char(); if( '\n' == ch ) ++m_lineno; return ch; } // ================================================================ // Put character. Use a vector so that characters can // be put even when the end of the stream is reached. // ================================================================ void ccdoc::phase1::scanner::put_char(char ch) { if(ch) { m_put_chars.push_back(ch); // Issue 0056: m_line>0 if( '\n' == ch && m_lineno>0 ) --m_lineno; } } // ================================================================ // Put token. // ================================================================ void ccdoc::phase1::scanner::put_token(const string& token) { m_put_tokens.push_back(token); if(m_debug) { if( token == "\n" ) s_log << "CCDOC_PHASE1_DEBUG: put_line: " << m_lineno << "\n"; else if( token == "" ) s_log << "CCDOC_PHASE1_DEBUG: put_eof: " << m_lineno << "\n"; else s_log << "CCDOC_PHASE1_DEBUG: put_token: '" << token << "'\n"; } } // ================================================================ // Get token. // ================================================================ const char* ccdoc::phase1::scanner::get_token() { const char* token = scan_token(); if(m_debug) { if( *token == '\n' ) s_log << "CCDOC_PHASE1_DEBUG: get_line: " << m_lineno << "\n"; else if( *token == 0 ) s_log << "CCDOC_PHASE1_DEBUG: get_eof: " << m_lineno << "\n"; else s_log << "CCDOC_PHASE1_DEBUG: get_token: '" << token << "'\n"; } return token; } // ================================================================ // Scan token. // ================================================================ const char* ccdoc::phase1::scanner::scan_token() { static char tokenbuf[0x100000]; // 2^20 *tokenbuf = 0; // ================================================ // A token was put back, return that one. // ================================================ if( m_put_tokens.size() ) { ::strcpy(tokenbuf,m_put_tokens.back().c_str()); m_put_tokens.pop_back(); return tokenbuf; } char ch = skip_ws(); // ================================================ // If this the end of the file, return // an empty token. // ================================================ if(!ch) { return tokenbuf; } // ================================================ // Translate trigraph sequences. // This must be done early because they can // be part of another token. // ================================================ if( '?' == ch ) { ch = scan_trigraph(); if( '?' == ch ) { tokenbuf[0] = '?'; tokenbuf[1] = 0; return tokenbuf; } } // ================================================ // Eliminate the "\\\n" white space. This cannot // be done in skip_ws() because of the backslash // trigraph sequence "??/". // ================================================ if( '\\' == ch ) { char ch1 = get_char(); if( '\n' == ch1 ) { // issue 0124 return get_token(); } put_char(ch1); } // ================================================ // Check for character and string literals with // the 'L' prefix. // ================================================ if( ch == 'L' ) { char ch1 = get_char(); if( '"' == ch1 ) { tokenbuf[0] = ch; tokenbuf[1] = ch1; tokenbuf[2] = 0; get_string_literal(&tokenbuf[2],sizeof(tokenbuf)-3); return tokenbuf; } if( '\'' == ch1 ) { tokenbuf[0] = ch; tokenbuf[1] = ch1; tokenbuf[2] = 0; get_char_literal(&tokenbuf[2],sizeof(tokenbuf)-3); return tokenbuf; } put_char(ch1); } // ================================================ // This is a quoted string. Look for the next // un-escaped quote. // ================================================ if( ch == '"' ) { tokenbuf[0] = ch; tokenbuf[1] = 0; get_string_literal(&tokenbuf[1],sizeof(tokenbuf)-2); return tokenbuf; } // ================================================ // This is a single quoted character. Look for the // next un-escaped quote. // ================================================ if( '\'' == ch ) { tokenbuf[0] = ch; tokenbuf[1] = 0; get_char_literal(&tokenbuf[1],sizeof(tokenbuf)-2); return tokenbuf; } // ================================================ // This is some sort of number. // The special case of ".333" is handled in the '.' // processing. // ================================================ if( '0' <= ch && ch <= '9' ) { put_char(ch); get_number_literal(tokenbuf,sizeof(tokenbuf)-1); return tokenbuf; } // ================================================ // Convert alternative token forms to their // primary form. This is handled for each // separate character analysis. // // alt pri alt pri alt pri // ====== === ====== === ====== === // <% { and && and_eq &= // %> } bitor | or_eq |= // <: [ or || xor_eq ^= // :> ] xor ^ not ! // %: # compl ~ not_eq != // %:%: ## bitand & // // We don't care about the identifier based // alternative forms. // ================================================ // ================================================ // This is an identifier. // Ccdoc must support the non-standard $ character // because some compilers support it. // ================================================ if( ( 'a' <= ch && ch <= 'z' ) || ( 'A' <= ch && ch <= 'Z' ) || ( '_' == ch || '$' == ch ) ) { put_char(ch); get_identifier(tokenbuf,sizeof(tokenbuf)-2); return tokenbuf; } // ================================================ // This is a operator/punctuator. // Do a longest match. // ================================================ if( '{' == ch || '}' == ch || '[' == ch || ']' == ch || '(' == ch || ')' == ch || '~' == ch || ',' == ch || ';' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; *pstr = 0; return tokenbuf; } // ================================================ // OP: !, != // ================================================ if( '!' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: #, ## // ================================================ if( '#' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '#' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: %, %:, %:%:, %=, %> // ================================================ if( '%' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { // %> --> } pstr = tokenbuf; *pstr++ = '}'; *pstr++ = 0; return tokenbuf; } else if( ':' == ch ) { ch = get_char(); if( '%' == ch ) { ch = get_char(); if( ':' == ch ) { // %:%: --> ## pstr = tokenbuf; *pstr++ = '#'; *pstr++ = '#'; *pstr++ = 0; return tokenbuf; } else { put_char('%'); put_char(ch); } } else { // %: --> # pstr = tokenbuf; *pstr++ = '#'; *pstr++ = 0; put_char(ch); return tokenbuf; } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: &, &&, &= // ================================================ if( '&' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '&' == ch || '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: *, *= // ================================================ if( '*' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: +, ++, += // ================================================ if( '+' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '+' == ch || '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: -, --, -=, ->, ->* // ================================================ if( '-' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '-' == ch || '=' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { *pstr++ = ch; ch = get_char(); if( '*' == ch ) { *pstr++ = ch; } else { put_char(ch); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: ., .*, ..., .[0-9]+ // ================================================ if( '.' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '*' == ch ) { *pstr++ = ch; } else if( '0' <= ch && ch <= '9' ) { // Handle floating number of the form: .333 put_char(ch); put_char('.'); get_number_literal(tokenbuf,sizeof(tokenbuf)-1); return tokenbuf; } else if( '.' == ch ) { ch = get_char(); if( '.' == ch ) { *pstr++ = '.'; *pstr++ = '.'; } else { put_char(ch); put_char('.'); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: /, /=, //, /* // ================================================ if( '/' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; *pstr = 0; return tokenbuf; } else if( '/' == ch ) { // Found '//' // Check for '//@{' or a '// @{' comment. return scan_ccdoc_style2(tokenbuf,sizeof(tokenbuf)-1); } else if( '*' == ch ) { // Found '/*' // Check for a '/**' comment. return scan_ccdoc_style1(tokenbuf,sizeof(tokenbuf)-1); } else { put_char(ch); // issue 0069 *pstr = 0; return tokenbuf; } } // ================================================ // OP: :, ::, :> // ================================================ if( ':' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( ':' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { // :> --> ] pstr = tokenbuf; *pstr++ = ']'; *pstr++ = 0; return tokenbuf; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: <, <%, <:, <<, <<=, <= // ================================================ if( '<' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else if( '%' == ch ) { // <% --> { pstr = tokenbuf; *pstr++ = '{'; *pstr++ = 0; return tokenbuf; } else if( ':' == ch ) { // <: --> ] pstr = tokenbuf; *pstr++ = '['; *pstr++ = 0; return tokenbuf; } else if( '<' == ch ) { *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: =, == // ================================================ if( '=' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: >, >=, >>, >>= // ================================================ if( '>' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else if( '>' == ch ) { *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: ^, ^= // ================================================ if( '^' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // ================================================ // OP: |, |=, || // ================================================ if( '|' == ch ) { char* pstr = tokenbuf; *pstr++ = ch; ch = get_char(); if( '=' == ch || '|' == ch ) { *pstr++ = ch; } else { put_char(ch); } *pstr = 0; return tokenbuf; } // Catch all at the end. tokenbuf[0] = ch; tokenbuf[1] = 0; return tokenbuf; } // ================================================================ // Skip w/s. Note that new lines are not w/s because they // are significant for pre-processing directives. // ================================================================ char ccdoc::phase1::scanner::skip_ws() { // Ignore white space, keep new lines // because they are used for pre-processing // analysis. char ch = get_char(); while( (ch > 0 && ch <= ' ') && ch != 0 && ch != '\n' ) { ch = get_char(); } return ch; } // ================================================================ // scan trigraph sequences. // ================================================================ char ccdoc::phase1::scanner::scan_trigraph() { // ================================================ // Check for tri-graph sequences. // The standard states that these sequences are // converted first. I interpret this to mean all // sequences except strings and comments. // OP: ?, ??=, ??/, ??', ??(, ??), ??!, ??<, ??>, ??- // ================================================ char ch1 = get_char(); if( ch1 != '?' ) { put_char(ch1); return '?'; } char ch2 = get_char(); switch(ch2) { case '=': ch2 = '#' ; break; case '/': ch2 = '\\'; break; case '\'': ch2 = '^' ; break; case '(': ch2 = '[' ; break; case ')': ch2 = ']' ; break; case '!': ch2 = '|' ; break; case '<': ch2 = '{' ; break; case '>': ch2 = '}' ; break; case '-': ch2 = '~' ; break; default: put_char(ch2); put_char(ch1); } return ch2; } // ================================================================ // get_string_literal // ================================================================ void ccdoc::phase1::scanner::get_string_literal(char* token,int max) { int ch = '"'; int pch = ch; while( max>0 && (ch = get_char()) ) { max--; *token++ = ch; if( ch == '"' && pch != '\\' ) { *token = 0; return; } // Issue 0116 // Contributed by Chris Martin 2001/11/25 if( ch == '\\' && pch == '\\' ) { // Reset pch to 0 to make sure that // the terminating double quote is // found correctly for the case of "\\". ch = 0; } pch = ch; } *token = 0; // The end of the string was never reached. // We should never reach this point because all input should // be from a legal C++ file. s_log.warning() << "Unterminated string literal found in " << m_name.c_str() << " at line " << m_lineno << ".\n" << s_log.enable(); } // ================================================================ // get_char_literal // ================================================================ void ccdoc::phase1::scanner::get_char_literal(char* token,int max) { // Issue 0052: Handle the special case of '\\'. int ch = get_char(); --max; *token++ = ch; if( '\\' == ch ) { ch = get_char(); --max; *token++ = ch; } ch = get_char(); --max; *token++ = ch; // Also handle the special cases of '\xad'. while( ch != '\'' ) { ch = get_char(); --max; *token++ = ch; if( max <= 2 ) { *token = 0; // The end of the character was never reached. // We should never reach this point because all input should // be from a legal C++ file. s_log.warning() << "Unterminated character literal found in " << m_name.c_str() << " at line " << m_lineno << ".\n" << s_log.enable(); return; } } *token = 0; } // ================================================================ // get_number_literal // ================================================================ void ccdoc::phase1::scanner::get_number_literal(char* token,int max) { // Some of the numeric processing is confusing because // the types overlap. Consider the following // number: // // 07777.35 // // This is a floating pointer number not an ill-formed // octal integer literal. // // This analyzer takes advantage of the fact that the // input is guaranteed to be legal C++. char ch = get_char(); if( '0' == ch ) { *token++ = ch; max--; char ch1 = get_char(); if( 'x' == ch1 || 'X' == ch1 ) { // This is a hex number. *token++ = ch1; max--; while( max>0 && (ch = get_char()) ) { if( ( 'a' <= ch && ch <= 'f' ) || ( 'A' <= ch && ch <= 'Z' ) || ( '0' <= ch && ch <= '9' ) ) { *token++ = ch; max--; } else { break; } } } else if( '0' <= ch1 && ch1 <= '9' ) { // This is octal or floating point. *token++ = ch1; max--; while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } else { ch = ch1; } } else if( '1' <= ch && ch <= '9' ) { *token++ = ch; max--; // This is decimal or floating point. while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } // At this point, ch is not a digit. Check // for a floating point decimal point, an // exponent or the UL suffix. // Scan for a floating point literal decimal point. if( '.' == ch ) { *token++ = ch; max--; while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } // Scan for a floating point literal exponent. if( 'E' == ch || 'e' == ch ) { *token++ = ch; max--; if( max>0 && (ch = get_char()) ) { // Get the sign of the exponent or a digit if // the sign was not specified. if( ( '0' <= ch && ch <= '9' ) || ( '-' == ch || '+' == ch ) ) { *token++ = ch; max--; // Get the rest of the exponent digits. while( max>0 && (ch = get_char()) ) { if( '0' <= ch && ch <= '9' ) { *token++ = ch; max--; } else { break; } } } } } // Scan for the ULF suffix. if( 'U' == ch || 'u' == ch || 'L' == ch || 'l' == ch || 'F' == ch || 'f' == ch) { *token++ = ch; max--; while( max>0 && (ch = get_char()) ) { if( 'U' == ch || 'u' == ch || 'L' == ch || 'l' == ch || 'F' == ch || 'f' == ch ) { *token++ = ch; max--; } else { break; } } } put_char(ch); *token = 0; } // ================================================================ // get_identifier // ================================================================ void ccdoc::phase1::scanner::get_identifier(char* token,int max) { char ch; while( max>0 && (ch = get_char()) ) { max--; if( ( 'a' <= ch && ch <= 'z' ) || ( 'A' <= ch && ch <= 'Z' ) || ( '0' <= ch && ch <= '9' ) || ( '_' == ch || ch == '$' ) ) { *token++ = ch; } else { // Non-identifier token found, we are done. *token = 0; put_char(ch); return; } } // We reached the end of the file. // This is legal for things like: // #endif *token = 0; } // ================================================================ // scan ccdoc style1 // ================================================================ const char* ccdoc::phase1::scanner::scan_ccdoc_style1(char* token, int max) { bool suffix_flag = false; char ch = get_char(); if( '*' != ch ) { if( ch == '!' ) { // Issue 0162: // This may be a doxygen style comment of the form: // int a; /*!< ... */ ch = get_char(); if( ch == '<' ) { suffix_flag = true; } else { // This could be '*'. put_char(ch); } } if( !suffix_flag ) { // This is not a ccdoc comment, // skip to the end of the comment. while( ch != 0 ) { char pch = ch; ch = get_char(); if( '*' == pch && '/' == ch ) break; } return scan_token(); } } // Issue 0162: // Check for '/**<'. This is a doxygen style suffix comment. ch = get_char(); if( ch == '<' ) { suffix_flag = true; } else { put_char(ch); } // Found: '/**' // ================================================ // Check for the special pre-processing directives: // ================================================ if( scan_ccdoc_style1_special() ) return scan_token(); // Re-format the comment for consumption by the parser. // Here are the comment fields. scanner_doc doc(*this,m_sw); // Define the processing mode. static char line[65536]; // maximum line length bool first = true; bool done = false; bool ignore_flag = false; while(!done) { // ================================================ // Skip leading w/s // ================================================ char ch = get_char(); while(ch && ch != '\n' && (ch > 0 && ch <= ' ') ) { ch = get_char(); } // ================================================ // Skip the leading asterisk if it exists. // ================================================ if( !first && '*' == ch ) { ch = get_char(); if( '/' == ch ) { done = true; break; } } // ================================================ // Now load the line for directive processing. // Don't trim w/s, it may be needed for

. // ================================================ char* pline = line; while( ch && ch != '\n' ) { *pline++ = ch; char pch = ch; ch = get_char(); if( '*' == pch && ch == '/' ) { // This is the end of the comment. // // Make sure that any preceding directives // are processed, such as: // '/**' // ' * @return Foo bar spam */' // // Also make sure that all trailing asterisks are eaten: // '/**' // ' **/' // pline--; // *pline == '*' ccdoc_assert( '*' == *pline ); while( pline>line && '*' == *pline ) --pline; if( pline == line ) { // We are done. done = true; break; } // There may be some stuff on this line that // we need to parse. pline++; put_char('\n'); put_char('/'); put_char('*'); break; } } *pline = 0; // ================================================ // EOF // ================================================ if( !ch ) { // The end of the file was reached before the // comment was terminated. s_log.warning() << "Unexpected EOF found, unterminated ccdoc comment " << "specified at line " << m_lineno << " in " << m_name.c_str() << ".\n" << s_log.enable(); return scan_token(); } // ================================================ // Eliminate special tokens from the comment line. // ================================================ if( m_sw.doxygen() ) { if( !ignore_flag ) { if( contains_token(line,"@file") ) { ignore_flag = true; } } } // ================================================ // At this point we have the line. // Terminate it and write it out in debug mode. // ================================================ if( m_debug ) { s_log << "CCDOC_PHASE1_DEBUG: ccdoc_line: '" << line << "'\n"; } // ================================================ // Skip the first line, if it is empty to // avoid conflicts when trying to determine // the long description for the following case: // /** | <-- line 1 (blank - ignore) // * short | <-- line 2 (short description) // * | <-- line 3 (blank - separator) // * long | <-- line 4 (long description) // */ | <-- line 5 (end of comment) // ================================================ if(first) { first = false; if(*line == 0) continue; } // ================================================ // Issue 0082: only do this if -nojdsds is specified. // Set the short description flag to false // if a blank line (other than the first one) // is encountered. // ================================================ if(!m_sw.jdsds() && *line == 0 && doc.m_mode == scanner_doc::SHORT) { doc.m_mode = scanner_doc::LONG; continue; } doc.parse_line(line); } // Issue 0162: if( suffix_flag ) doc.m_comment.add_suffix(true); // ================================================ // At this point we have a valid ccdoc comment. // Format it for the parser to make things easy. // If it is empty, ignore it. // ================================================ if( doc.empty() || ignore_flag ) return scan_token(); return doc.format(token,max); } // ================================================================ // scan ccdoc style1 special // /**@#-*\/ - Start ignoring characters // /**@#+*\/ - Stop ignoring characters. // /**@#=*\/ - Insert the specified token // into the input stream. // ================================================================ bool ccdoc::phase1::scanner::scan_ccdoc_style1_special() { char ch = get_char(); if( '@' != ch ) { put_char(ch); return false; } ch = get_char(); if( '#' != ch ) { put_char(ch); put_char('@'); return false; } ch = get_char(); if( '-' != ch && '+' != ch && '=' != ch ) { put_char(ch); put_char('#'); put_char('@'); return false; } if( '-' == ch || '+' == ch ) { char ch1 = get_char(); if( '*' != ch1 ) { put_char(ch1); put_char(ch); put_char('#'); put_char('@'); return false; } ch1 = get_char(); if( '/' != ch1 ) { put_char(ch1); put_char('*'); put_char(ch); put_char('#'); put_char('@'); return false; } if( '-' == ch ) { // We found: "/**@#-*\/", Ignore all characters // until we find "/**@#+*\/". for(ch=get_char();ch;ch=get_char()) { if( '/' != ch ) continue; // The put char handles cases like this: // "/**@/**@#+*\/" ch = get_char(); if(!ch) break; if( '*' != ch ) {put_char(ch);continue;} ch = get_char(); if(!ch) break; if( '*' != ch ) {put_char(ch);continue;} ch = get_char(); if(!ch) break; if( '@' != ch ) {put_char(ch);continue;} ch = get_char(); if(!ch) break; if( '#' != ch ) {put_char(ch);continue;} ch = get_char(); if(!ch) break; if( '+' != ch ) {put_char(ch);continue;} ch = get_char(); if(!ch) break; if( '*' != ch ) {put_char(ch);continue;} ch = get_char(); if(!ch) break; if( '/' != ch ) {put_char(ch);continue;} break; // Don't warn about nested "/**@#-*\/", they are // legal. } // Don't warn about the EOF, that is legal as well. } // Ignore "/**@#+*\/". It is only used during // "/**@#-*\/" processing. return true; } // At this point we have: "/**@#=". ch = get_char(); // This is the character to insert. char ch1 = get_char(); if( '*' != ch1 ) { put_char(ch1); put_char(ch); put_char('='); put_char('#'); put_char('@'); return false; } ch1 = get_char(); if( '/' != ch1 ) { put_char(ch1); put_char('*'); put_char(ch); put_char('='); put_char('#'); put_char('@'); return false; } // We found: "/**@#=*\/". // Insert the character into the input stream. put_char(ch); return true; } // ================================================================ // scan ccdoc style2 // ================================================================ const char* ccdoc::phase1::scanner::scan_ccdoc_style2(char* token, int max) { // ================================================ // Check for ccdoc comment designators: // '//[ \t]*@{' 'vi}' -- brace balancing for vi, added by bzoe // or // '//[ \t]*/**' // ================================================ enum { NOT_A_COMMENT, STYLE2A, STYLE2B, STYLE2C} // Issue 0086: STYLE2C, added by bzoe 2001/11/26 ccdoc_flag = NOT_A_COMMENT; // Get the third character: // //x // ^ char ch = get_char(); // Issue 0162: if( '!' == ch || '/' == ch ) { // Look for doxygen single comment indicators: // '//!<' or '///<' char first_ch = ch; ch = get_char(); if( '<' == ch ) { // This is a doxygen style comment of the form: // int x; //!< ... // or // int x; ///< ... // // This is the same as ccdoc: // int x; //@- ... put_char('-'); ch = '@'; } else { // This is not a doxygen style comment. put_char(ch); // NOT '<' ch = first_ch; } } // ccdoc doesn't care about the leading w/s. while( ' ' == ch || '\t' == ch ) { ch = get_char(); } // process the other tokens. if( '@' == ch ) { // Look for: '//[ \t]*@{' 'vi}' -- brace balancing for vi, added by bzoe ch = get_char(); if( '{' == ch ) { ccdoc_flag = STYLE2A; } else if( '-' == ch ) { // Issue 0086 // Added by bzoe to support the new // single line, suffix syntax: // int foo; //@- this is a comment ccdoc_flag = STYLE2C; } else { put_char(ch); // in case it is a '\n' } } else if( '/' == ch ) { // Look for: '//[ \t]*/**' // Discard: '///***' ch = get_char(); if( '*' == ch ) { ch = get_char(); if( '*' == ch ) { ch = get_char(); if( '*' != ch ) { ccdoc_flag = STYLE2B; put_char(ch); } else { put_char(ch); put_char('*'); put_char('*'); put_char('/'); } } else { put_char(ch); put_char('*'); put_char('/'); } } else { put_char(ch); put_char('/'); } } else { put_char(ch); } // ================================================ // This is not a ccdoc comment, // skip to the end of the line. // ================================================ if(ccdoc_flag == NOT_A_COMMENT) { while( ch != 0 && ch != '\n' ) { ch = get_char(); } if( '\n' == ch ) { put_char(ch); } return scan_token(); } // ================================================ // Get the rest of the tokens on the line. // Here are the comment fields. // ================================================ scanner_doc doc(*this,m_sw); // Define the processing mode. static char line[65536]; // maximum line length bool first = true; bool done = false; bool ignore_flag = false; while(!done) { // ================================================ // Skip leading w/s // ================================================ char ch = get_char(); while(ch && ch !='\n' && (ch > 0 && ch <= ' ') ) { ch = get_char(); } // ================================================ // This must be either a comment or a blank line. // Both are valid. // ================================================ if( !first ) { if( '/' == ch ) { ch = get_char(); if( '/' != ch ) { s_log.warning() << "Invalid ccdoc comment specified at line " << m_lineno << " in " << m_name.c_str() << ".\n" << s_log.enable(); put_char(ch); put_char('/'); return scan_token(); } // Check for end of comment. // Skip the lead '//' ch = get_char(); // 'vi{' -- brace balancing for vi, added by bzoe // Is this '//@}'? if( ccdoc_flag == STYLE2A ) { if( '@' == ch ) { char ch1 = get_char(); // 'vi{' -- brace balancing for vi, added by bzoe if( '}' == ch1 ) { while( ch1 && ch1 != '\n' ) ch1 = get_char(); line[0] = 0; done = true; break; } put_char(ch1); } } else if( ccdoc_flag == STYLE2B ) { // Is this '//\*/'? if( '*' == ch ) { char ch1 = get_char(); if( '/' == ch1 ) { while( ch1 && ch1 != '\n' ) ch1 = get_char(); line[0] = 0; done = true; break; } // Skip the asterisk in this form. ch = ch1; } // Is this '// \*/'? else if( ' ' == ch || '\t' == ch ) { char ch1 = get_char(); if( '*' == ch1 ) { char ch2 = get_char(); if( '/' == ch2 ) { while( ch2 && ch2 != '\n' ) ch2 = get_char(); line[0] = 0; done = true; break; } // Skip the asterisk and preceding space in this form. ch = ch2; } else { put_char(ch1); } } } } } // ================================================ // Now load the line for directive processing. // Don't trim w/s, it may be needed for

. // ================================================ char* pline = line; while( ch && ch != '\n' ) { *pline++ = ch; char pch = ch; ch = get_char(); if( pline>&line[3] ) { if( ccdoc_flag == STYLE2A ) { // 'vi{' -- brace balancing for vi, added by bzoe if( '@' == pch && ch == '}' ) { // This may be the end of the comment. // // Make sure that any preceding directives // are processed, such as: // '//@{ @return Foo bar spam //@}' // char* mark = pline; pline--; // *pline == '@' ccdoc_assert( '@' == *pline ); char ch1 = *--pline; char ch2 = *--pline; if( '/' == ch1 && '/' == ch2 ) { if( pline == line ) { // We are done. done = true; break; } // 'vi{' -- brace balancing for vi, added by bzoe // This was an end token '//@}', // parse the other stuff on the line. pline = mark; pline--; // *pline == '@' pline--; // *pline == '/' pline--; // *pline == '/' *pline = 0; done = true; // Ignore everything to the end of the line. while( ch && ch != '\n' ) { ch = get_char(); } break; } else { // 'vi{' -- brace balancing for vi, added by bzoe // This was not an end token '//@}', // continue parsing. pline = mark; } } } else if( ccdoc_flag == STYLE2B ) { if( '*' == pch && ch == '/' ) { // This may be the end of the comment. // // Make sure that any preceding directives // are processed, such as: // '///** @return Foo bar spam //\*/' // char* mark = pline; pline--; // *pline == '*' ccdoc_assert( '*' == *pline ); char ch1 = *--pline; char ch2 = *--pline; if( '/' == ch1 && '/' == ch2 ) { if( pline == line ) { // We are done. done = true; break; } // This was an end token '//\*/', // parse the other stuff on the line. pline = mark; pline--; // *pline == '*' pline--; // *pline == '/' pline--; // *pline == '/' *pline = 0; done = true; // Ignore everything to the end of the line. while( ch && ch != '\n' ) { ch = get_char(); } break; } else { // This was not an end token '//\*/', // continue parsing. pline = mark; } } } else if( ccdoc_flag == STYLE2C ) { // Issue 0086 if( '\n' == ch ) { done = true; break; } } } } *pline = 0; // ================================================ // EOF // ================================================ if( !ch ) { // The end of the file was reached before the // comment was terminated. s_log.warning() << "Unexpected EOF found, unterminated ccdoc comment " << "specified at line " << m_lineno << " in " << m_name.c_str() << ".\n" << s_log.enable(); return scan_token(); } // ================================================ // Eliminate special tokens from the comment line. // ================================================ if( !ignore_flag ) { if( contains_token(line,"@file") ) { ignore_flag = true; } } // ================================================ // At this point we have the line. // Terminate it and write it out in debug mode. // ================================================ if( m_debug ) { s_log << "CCDOC_PHASE1_DEBUG: ccdoc_line: '" << line << "'\n"; } // ================================================ // Skip the first line, if it is empty to // avoid conflicts when trying to determine // the long description for the following case: // //@{ | <-- line 1 (blank - ignore) // // short | <-- line 2 (short description) // // | <-- line 3 (blank - separator) // // long | <-- line 4 (long description) // // | <-- line 5 (end of comment) // ================================================ if( ccdoc_flag == STYLE2C ) { // bzoe // Issue 0086 first = false; if (*line == 0) { s_log.warning() << "Empty single suffix comment\n" << s_log.enable(); } if (m_debug) { s_log << "STYLE2C: " << line << "\n"; } } else { if(first) { first = false; if(*line == 0) continue; } // ================================================ // Issue 0082: only do this if -nojdsds is specified. // Set the short description flag to false // if a blank line (other than the first one) // is encountered. // ================================================ if(!m_sw.jdsds() && *line == 0 && doc.m_mode == scanner_doc::SHORT) { doc.m_mode = scanner_doc::LONG; continue; } } doc.parse_line(line); // Issue 0086 if (ccdoc_flag == STYLE2C) doc.m_comment.add_suffix(true); // bzoe } // ================================================ // At this point we have a valid ccdoc comment. // Format it for the parser to make things easy. // If it is empty, ignore it. // ================================================ if( doc.empty() || ignore_flag ) return scan_token(); return doc.format(token,max); } // ================================================================ // Strip out token. // ================================================================ void ccdoc::phase1::scanner::strip_token(char* buf, const char* token) const { if( buf && token && token[0] ) { char* p1 = buf; for(;*p1;++p1) { char* p2 = p1; const char* p3 = token; for(;*p3 && *p2 == *p3;++p3,++p2); if( !*p3 && *p2<=' ' ) { // They matched. Strip out the token. // Ex. token = CDE // AB CDE FG // ^ ^ // | +--- p2 // +------ p1 // char* p4 = p2-1; for(;*p2;++p1,++p2) { *p1 = *p2; } *p1 = 0; p1 = p4; } } } } // ================================================================ // Contains token. // ================================================================ bool ccdoc::phase1::scanner::contains_token(const char* buf, const char* token) const { if( buf && token && token[0] ) { const char* pbuf = buf; for(;*pbuf;++pbuf) { const char* p2 = pbuf; const char* ptoken = token; // Make sure that there is preceding w/s or // we are at the start of the line. if( p2 == buf || *p2 <= ' ' ) { for(;*ptoken && *p2 && *p2 == *ptoken;++ptoken,++p2); // Make sure that there is trailing w/s. if( !*ptoken && *p2<=' ' ) return true; } } } return false; }