#include #include #include #include #include #include #include #include #ifdef MAPM_TREES_COUNTING #include typedef MAPM numTreesType; #define MAPM_PRECISION 50 ostream& operator << (ostream &out, const MAPM &number){ char buff[MAPM_PRECISION+1]; number.toIntegerString(buff); out << buff; return out; } #else typedef unsigned long long numTreesType; #endif #include #ifdef DEBUG int glrStateVertex::numStateVertices = 0; int glrStateVertex::maxStateVertexId = 0; glrSymbolTable *globalSymbols; bool mergeInProgress = false; #endif #ifdef DEBUG_FOREST int glrNode::maxNodeId = 0; #endif #ifdef NODES_COUNTING int numTerminalNodes=0; int numNonterminalNodes = 0; int numEpsilonNodes = 0; #endif #ifdef LOG_GLR_TABLE #define glrStateImplementation glrMapState #else #define glrStateImplementation glrVectorState #endif class stringForest : public vector { public: stringForest &operator *= (stringForest *forest); stringForest &operator += (stringForest *forest); stringForest &operator += (const string &suffix); void print(ostream &output,int numberOfTokens); }; class commonNode : public glrNode { public: commonNode(const glrSymbolTable::glrSymbol &symbolA); commonNode(const glrRule* const &ruleA); commonNode(const glrRule* const &ruleA,const deque &succA); virtual ~commonNode(); virtual numTreesType getNumberOfSubtrees(); virtual stringForest *getForest(const glrSymbolTable &symbols); }; class nonterminalNode : public commonNode { private: deque > succ; numTreesType numberOfSubtrees; bool compInProgress; public: nonterminalNode(const glrRule* const &ruleA); nonterminalNode(const glrRule* const &ruleA,const deque &succA); virtual void addSubtree(const glrRule* const &ruleA,const deque &succA); virtual ~nonterminalNode(); virtual numTreesType getNumberOfSubtrees(); virtual stringForest *getForest(const glrSymbolTable &symbols); }; class terminalNode : public commonNode { private: string terminal; public: terminalNode(const glrSymbolTable::glrSymbol &symbolA,const string &terminalA); virtual ~terminalNode(); virtual stringForest *getForest(const glrSymbolTable &symbols); }; class complexParser : public glrParser { private: map > terminals; istream *input; bool syntInput; char* line; protected: virtual void readToken(); public: static const int maxInputLineLength; void readTerminals(istream &input); void setInput(istream *inputA); void setSyntInput(istream *inputA); numTreesType getNumberOfTrees(); void printForest(ostream &output); complexParser(); ~complexParser(); }; const int complexParser::maxInputLineLength = 1024; commonNode::commonNode(const glrSymbolTable::glrSymbol &symbolA) : glrNode(symbolA) { #ifdef NODES_COUNTING ++numTerminalNodes; #endif } commonNode::commonNode(const glrRule* const &ruleA) : glrNode(ruleA){ #ifdef NODES_COUNTING ++numEpsilonNodes; #endif } commonNode::commonNode(const glrRule* const &ruleA,const deque &succA) : glrNode(ruleA,succA){ #ifdef NODES_COUNTING ++numNonterminalNodes; #endif } commonNode::~commonNode(){ #ifdef NODES_COUNTING switch(getType()){ case glrTerminalNode: --numTerminalNodes; break; case glrNonterminalNode: --numNonterminalNodes; break; case glrEpsilonNode: --numEpsilonNodes; break; } #endif } numTreesType commonNode::getNumberOfSubtrees() { return 1; } stringForest *commonNode::getForest(const glrSymbolTable &symbols) { return (stringForest*)NULL; } stringForest &stringForest::operator *= (stringForest *forest){ stringForest::iterator i=forest->begin(); i++; int len=size(); while(i!=forest->end()){ for(int j=0;jbegin()); delete forest; } stringForest &stringForest::operator += (stringForest *forest){ insert(end(),forest->begin(),forest->end()); delete forest; } stringForest &stringForest::operator += (const string &suffix){ for(stringForest::iterator i=begin();i!=end();++i)(*i)+=suffix; } void stringForest::print(ostream &output,int numberOfTokens){ for(stringForest::iterator i=begin();i!=end(); output << " 0 " << numberOfTokens << " " << (*(i++)) << endl); } nonterminalNode::nonterminalNode(const glrRule* const &ruleA) : commonNode(ruleA) { numberOfSubtrees=1; compInProgress = false; } nonterminalNode::nonterminalNode(const glrRule* const &ruleA,const deque &succA) : commonNode(ruleA,succA) { numberOfSubtrees=0; compInProgress = false; addSubtree(ruleA,succA); } nonterminalNode::~nonterminalNode(){ for(deque >::iterator iSucc=succ.begin();iSucc!=succ.end();++iSucc) for(deque::iterator iNode=iSucc->begin();iNode!=iSucc->end();++iNode) if(*iNode!=this)(*iNode)->release(); } void nonterminalNode::addSubtree(const glrRule* const &ruleA,const deque &succA){ #ifdef DEBUG if(getType()==glrEpsilonNode){ cerr << "error: refusing request for adding subtree to the epsilon node" << endl; return; } for(deque >::const_iterator i = succ.begin(); i!=succ.end(); ++i){ if((*i).size()==succA.size()){ deque::const_iterator n = succA.begin(); deque::const_iterator j = (*i).begin(); for( ; j!=(*i).end(); ++j,++n){ if((*j)!=(commonNode*)(*n))break; } if(j==(*i).end()){ cerr << "error: void nonterminalNode::addSubtree(const glrRule* const &ruleA,const deque &succA): " << "refusing attempt to add same subtree to existing node." << endl; return; } } } #endif #ifdef DEBUG_FOREST cerr << "adding subtree "; for(deque::const_iterator i = succA.begin(); i!=succA.end(); ++i){ cerr << "[" << (*i)->getId() << "] "; } cerr << "to node [" << getId() << "] ("; ruleA->print(*globalSymbols, cerr); cerr << endl; #endif succ.push_back((const deque&)succA); for(deque::iterator iNode=succ.back().begin();iNode!=succ.back().end();++iNode){ (*iNode)->shackle(); } } numTreesType nonterminalNode::getNumberOfSubtrees(){ if(compInProgress){ cerr << "warning: numTreesType nonterminalNode::getNumberOfSubtrees(): cyclical grammar." << endl; return 0; } if(numberOfSubtrees==0){ compInProgress=true; for(deque >::iterator iSub=succ.begin();iSub!=succ.end();++iSub){ numTreesType one=1; for(deque::iterator iNode=iSub->begin();iNode!=iSub->end();++iNode){ one*=(*iNode)->getNumberOfSubtrees(); } numberOfSubtrees+=one; } compInProgress=false; } return numberOfSubtrees; } stringForest *nonterminalNode::getForest(const glrSymbolTable &symbols) { if(compInProgress){ cerr << "warning: stringForest *nonterminalNode::getForest(const glrSymbolTable &symbols): cyclical grammar" << endl; return NULL; } if(getType()==glrEpsilonNode){ stringForest *all=new stringForest; all->push_back(string(" { ")+symbols.getStringFromSymbol(getSymbol())+" } "); return all; }else{ compInProgress=true; stringForest *all,*given; all=new stringForest; bool allSuccess=false; for(deque >::const_iterator iSucc=succ.begin();iSucc!=succ.end();++iSucc){ bool success=false; stringForest *one=new stringForest; one->push_back(string(" { ")+symbols.getStringFromSymbol(getSymbol())+" "); for(deque::const_iterator iNode=iSucc->begin();iNode!=iSucc->end();++iNode){ given=(*iNode)->getForest(symbols); if(given!=NULL){ (*one)*=given; success=true; } } if (success) { (*all)+=one; allSuccess=true; } else delete one; } if(allSuccess){ (*all)+="} "; }else{ delete all; all=NULL; } compInProgress=false; return all; } } terminalNode::terminalNode(const glrSymbolTable::glrSymbol &symbolA,const string &terminalA) : commonNode(symbolA) { terminal=terminalA; } terminalNode::~terminalNode() { } stringForest *terminalNode::getForest(const glrSymbolTable &symbols) { stringForest *ret=new stringForest; ret->push_back(string(" { ")+symbols.getStringFromSymbol(getSymbol())+"\\n"+terminal+" } "); return ret; } void complexParser::readTerminals(istream &input){ string terminal,preterminal; while(input >> terminal >> preterminal){ // if((preterminal.front()=='\'')&&(preterminal.back()='\'')) terminals[terminal].push_back(symbols.getSymbolFromString(preterminal)); } } void complexParser::readToken(){ if(syntInput){ while(*line!=0){ string s; char *c; for(c = line; ((*c!=':')&&(*c!='.')&&(*c!=0)); c++) s += *c; if(!*c)return; istrstream istr(s.c_str()); int newTokenNum; istr >> newTokenNum; if(getNumOfTokens()+1!=newTokenNum) break; for( ; ((*c!=' ')&&(*c)); ++c); if(!*c)return; string preterminal; c++; if(*c=='\'')c++; for( ; ((*c!=' ')&&(*c)&&(*c!='\'')); preterminal += *(c++)); if(!*c) return; for( ; ((*c!='"')&&(*c)); ++c); c++; string terminal; for( ; ((*c!='"')&&(*c)); terminal += *(c++)); if(!*c)return; preterminals.push_back(new terminalNode(symbols.getSymbolFromString(preterminal),terminal)); input->getline(line,maxInputLineLength); } }else{ string terminal; bool cont = true; while((cont)&&(*input >> terminal)){ #ifdef DEBUG cerr << "message: void complexParser::readToken(): terminal ,," << terminal << "'' read." << endl; #endif map >::iterator preter=terminals.find(terminal); if(preter!=terminals.end()){ for(vector::iterator t=preter->second.begin();t!=preter->second.end();++t){ preterminals.push_back(new terminalNode(*t,preter->first)); } } if((preter==terminals.end())||(preter->second.empty())){ cerr << "error: unknown terminal ,," << terminal << "'' => skipping" << endl; }else{ cont = false; } } } } void complexParser::setInput(istream *inputA){ syntInput = false; input=inputA; } void complexParser::setSyntInput(istream *inputA){ input = inputA; syntInput = true; input->getline(line,maxInputLineLength); } numTreesType complexParser::getNumberOfTrees(){ if(!getForestRoot())return 0; return getForestRoot()->getNumberOfSubtrees(); } void complexParser::printForest(ostream &output){ if(commonNode *root=getForestRoot()){ stringForest *forest=root->getForest(symbols); forest->print(output,getNumOfTokens()); delete forest; } } void printUsage(ostream &out){ out << "Usage: glrparse [-g grammar] [-l table] [-f input] [-r terminals] [-t] [-s] [-p #n] [-w table_out] [-y]" << endl << endl << "Options/Arguments:" << endl << " -g grammar grammar file. Default file name is ,,grammar''" << endl << " -l table glr table file. If not specified, table is computed dynamicaly." << endl << " -f input file with input for parsing." << endl << " -r terminals terminals file. Default file name is ,,terminals''" << endl << " -t print text representation of forest on cout." << endl << " If not specified, only number of trees found is printed." << endl << " -s print parser status when parser is ready to parsing." << endl << " -p #n repeat parsing #n times and then print the whole parsing time." << endl << " -y the input is in the form of the ,,synt -pt'' output" << endl << " -b take each line of input as different sentence -- parse them all and then print parsing time" << endl << " -w table_out write glr table to file table_out." << endl << endl << "either -w or -f option have to be specified" << endl << "the -b and -y options must not be specified concurrently" << endl; } complexParser::complexParser(){ #ifdef DEBUG globalSymbols = &symbols; #endif #ifdef MAPM_TREES_COUNTING m_apm_cpp_precision(MAPM_PRECISION); #endif line = new char[maxInputLineLength]; } complexParser::~complexParser(){ delete[] line; } int main(int argc,char**argv){ string grammar="grammar",table="",terminals="terminals",input="",tableOut=""; bool printStatus=false,printForest=false,syntInput = false; bool benchmark = false; int numOfSentences = 0; int repeat=0; const char *optString="g:l:f:r:tw:sp:yb"; int c; while((c=getopt(argc,argv,optString))!=-1){ switch(c){ case 'g': grammar=optarg; break; case 'l': table=optarg; break; case 'f': input=optarg; break; case 'r': terminals=optarg; break; case 't': printForest=true; break; case 'w': tableOut=optarg; break; case 's': printStatus=true; break; case 'p': { istrstream istr(optarg); istr >> repeat; break; } case 'y': syntInput = true; break; case 'b': benchmark = true; break; default: printUsage(cerr); exit(1); } } if(((input=="")&&(tableOut==""))||(benchmark&&syntInput)){ printUsage(cerr); exit(2); } complexParser parser; istream *file; file=new ifstream(grammar.c_str()); try{ parser.initGrammar(*file); }catch(glrException &e){ cerr << "error during reading grammar from file ,," << grammar << "'': " << e.message() << endl; exit(3); } delete file; if(table!=""){ try{ file=new ifstream(table.c_str()); parser.readTable(*file); }catch(glrException &e){ cerr << "error during reading table from file ,," << table << "'': " << e.message() << " => defaulting to glr table computation" << endl; parser.computeTable(); } delete file; }else{ parser.computeTable(); } if((input!="")&&(!syntInput)){ file = new ifstream(terminals.c_str()); try{ parser.readTerminals(*file); }catch(glrException &e){ cerr << "error during reading terminals from file ,," << terminals << "'': " << e.message() << endl; exit(4); } delete file; } if(tableOut!=""){ ofstream file(tableOut.c_str()); if(file.good()) parser.printTable(file); else{ cerr << "can't write glr table to file '" << tableOut << "'" << endl; } } if(printStatus){ parser.printStatus(cerr); cerr << endl; } numTreesType numTrees = 0; if(input!=""){ struct timeval begin,end,alltime; alltime.tv_sec = 0; alltime.tv_usec = 0; for(int i=0;(igood()){ try{ if(benchmark){ char inputSentence[1024]; while(file->getline(inputSentence,1024)){ istrstream parserInput(inputSentence); parser.setInput(&parserInput); parser.releaseForestRoot(); gettimeofday(&begin,NULL); parser.parse(); gettimeofday(&end,NULL); alltime.tv_sec += (end.tv_sec-begin.tv_sec)-(end.tv_usec