#ifndef GLRGRAMMAR #define GLRGRAMMAR #include #include #include #include using namespace std; #include #include #include #include /** @name glrRuleType * * Enumeration type for identifying the type of the grammar rule. The glrEpsilonRule * identifyies the epsilon rule. The glrNonterminalRule is used for rules that have * a nonterminal at the end of the right side. The glrPreterminalRule is used for * rules that have (pre)terminal at the right side of the rule. * * @see glrRule */ typedef enum { /** * * If the type of some rule is glrEpsilonRule, the rule has the right side empty. */ glrEpsilonRule, /** * * If the type of some rule is the glrNonterminalRule, the rule has nonterminal * at the end of the right side. */ glrNonterminalRule, /** * * If the type os some grammar rule is glrPreterminalRule, the rule has preterminal * at the end of the right side. */ glrPreterminalRule } glrRuleType; /** * * Type for storing the right side of the grammar rule. * * @see glrRule */ typedef vector glrRuleRight; /* class glrReductionHint { private: set packedNodes; class glrNode* ownParent; public: void setOwnParent(class glrNode* const& ownParentA){ if(ownParent){ throw glrLostOwnParentException("void glrReductionHint::setOwnParent(class glrNode* const& ownParentA): \ call of this function while ownParent not NULL"); } // if(ownParent)((glrGuard*)(ownParent))->release(); ownParent = ownParentA; if(ownParent) packedNodes.insert(ownParent); } class glrNode* const& getOwnParent(){ return ownParent; } bool packedNodes_add(class glrNode* const& node){ pair::iterator,bool> p = packedNodes.insert(node); return p.second; } glrReductionHint(){ ownParent = NULL; } ~glrReductionHint(){ if(ownParent)((glrGuard*)(ownParent))->release(); } set &getPackedNodes(){ return packedNodes; } }; class glrParseTimeRuleState { private: int iToken; map,glrReductionHint> reductions; class glrNode* epsilonNode; public: void reset(){ iToken = 0; reductions.clear(); if(epsilonNode)((glrGuard*)epsilonNode)->release(); epsilonNode = NULL; } glrReductionHint& getReductionHint(const deque &subtree,int const& iTokenA) { // if(iToken!=iTokenA){ // iToken = iTokenA; // reductions.clear(); // } return reductions[subtree]; } class glrNode *getEpsilonHint(const int& iTokenA){ // if(iToken!=iTokenA){ // iToken = iTokenA; // ((glrGuard*)epsilonNode)->release(); // epsilonNode = NULL; // } return epsilonNode; } void setEpsilonHint(class glrNode* const &epsilonNodeA){ epsilonNode = epsilonNodeA; } glrParseTimeRuleState(){ epsilonNode = NULL; } };*/ /** * * Class which used to represent the grammar rule. Note that you may want to derive this class * to store any other values associated with the rule. Of course then you have to * derive also the glrGrammar class and override its read function to store * rules of your own type in the grammar. * * @see glrGrammar */ class glrRule { private: glrRuleType type; int number; glrSymbolTable::glrSymbol left; glrRuleRight right; // glrParseTimeRuleState *ruleState; int nodeToken; void *epsilonNode; public: void* getEpsilonNode(const int &nodeTokenA) const { if (nodeToken==nodeTokenA) return epsilonNode; else return NULL; } void setEpsilonNode(int const& nodeTokenA, void* const& epsilonNodeA){ nodeToken = nodeTokenA; epsilonNode = epsilonNodeA; } /* glrReductionHint& getReductionHint(const deque &subtree,int const& iTokenA) const { return ruleState->getReductionHint(subtree,iTokenA); } void resetRuleState() const { ruleState->reset(); } void setEpsilonHint(class glrNode* const &epsilonNodeA) const { ruleState->setEpsilonHint(epsilonNodeA); } class glrNode *getEpsilonHint(const int& iTokenA) const { return ruleState->getEpsilonHint(iTokenA); } */ /** * * The function getNumber() returns the number of the grammar rule. * Note that the rule number 0 has special sense in the grammar. * It is the only rule which can be at the top of all derivation trees found by the parser. * * @see glrGrammar */ const int &getNumber() const { return number; } /** * * The function getType() returns the type of the grammar rule. It can be either glrEpsilonRule for epsilon * rules, glrNonterminalRule for rules which has at the end of right side nonterminal * and the glrPreterminalRule for rules which end with preterminal. * * @see glrRuleType */ const glrRuleType &getType() const { return type; } /** * * The function pushes the symbol symbol to the right side of the rule. * Argument symbols is parser's symbol table. * * @see glrSymbolTable::glrSymbol * @see glrSymbolTable * @see glrParser */ void pushRight(const glrSymbolTable &symbols,const glrSymbolTable::glrSymbol &symbol){ right.push_back(symbol); type=(symbols.getSymbolType(symbol)==glrSymbolTable::glrPreterminal)?glrPreterminalRule:glrNonterminalRule; } /** * * The function adds the symbol specified by the charSymbol argument to * the symbols symbol table (if there is not already such) and pushes * appropriate value of type glrSymbolTable::glrSymbol to the right side of the rule. * * @see glrSymbolTable::glrSymbol * @see glrSymbolTable * @see glrParser */ void pushRight(glrSymbolTable &symbols,const char* const &charSymbol){ const glrSymbolTable::glrSymbol &symbol=symbols.addTextSymbol(charSymbol); right.push_back(symbol); type=(symbols.getSymbolType(symbol)==glrSymbolTable::glrPreterminal)?glrPreterminalRule:glrNonterminalRule; } /** * * The function adds the symbol specified by the stringSymbol argument to * the symbols symbol table (if there is not already such) and pushes * appropriate value of type glrSymbolTable::glrSymbol to the right side of the rule. * * @see glrSymbolTable::glrSymbol * @see glrSymbolTable * @see glrParser */ void pushRight(glrSymbolTable &symbols,const string &stringSymbol){ const glrSymbolTable::glrSymbol &symbol=symbols.addTextSymbol(stringSymbol); right.push_back(symbol); type=(symbols.getSymbolType(symbol)==glrSymbolTable::glrPreterminal)?glrPreterminalRule:glrNonterminalRule; } /** * * The function prints the rule in the the format LEFT -> RIGHT_1 RIGHT_2 ... RIGHT_N. * The delimiter is space. The symbols argument is the parser's symbol table and * the output specifies where to print. * * @see glrSymbolTable */ virtual void print(const glrSymbolTable &symbols,ostream &output) const { if(!(output << symbols.getStringFromSymbol(left) << " ->")){ throw glrIOErrorException("void glrRule::print(const glrSymbolTable &symbols,ostream &output) const: can't write to output"); } for(glrRuleRight::const_iterator i=right.begin();i!=right.end();++i){ if(!(output << " " << symbols.getStringFromSymbol(*i))){ throw glrIOErrorException("void glrRule::print(const glrSymbolTable &symbols,ostream &output) const: can't write to output"); } } } /** * * The function returns the constant reference to the symbol at the end of the right side of * the rule. * * @see glrSymbolTable::glrSymbol * @see glrSymbolTable */ const glrSymbolTable::glrSymbol &right_back() const { #ifdef DEBUGEXCEPTIONS if(right.empty()){ throw glrEmptyContainerException("const glrSymbolTable::glrSymbol& glrRule::right_back() const: right side of the rule is empty"); } #endif return right.back(); } /** * * The function returns the size of the right side of the rule. * */ int right_size() const { return right.size(); } /** * * The function returns constant reference to the symbol at the position pos * at the right size of the rule. * * @see glrSymbolTable::glrSymbol * @see glrSymbolTable */ const glrSymbolTable::glrSymbol &right_at(const int &pos) const { #ifdef DEBUGEXCEPTIONS if((pos<0)||(pos>=right.size())){ throw glrIndexOutOfBoundsException("const glrSymbolTable::glrSymbol& glrRule::right_at(const int &pos) const: index out of bounds"); } #endif return right[pos]; } /** * * The function returns the iterator to the begin of the right side of the rule. * You can use it in the cooperation with the right_end() function * to iterate over all symbols of the right side of the rule. * * @see glrRuleRight * @see glrRule::right_end */ glrRuleRight::const_iterator right_begin() const { return right.begin(); } /** * * Function returns iterator to the end of the right side of the rule. * * @see glrRuleRight * @see glrRule::right_begin */ glrRuleRight::const_iterator right_end() const { return right.end(); } /** * * The function returns the iterator to the begin of the reversed right * side of the rule. * */ glrRuleRight::const_reverse_iterator right_rbegin() const { return right.rbegin(); } /** * * The function returns the iterator to the end of the reversed right side * of the rule. * * @see glrRuleRight * @see glrRule::right_rbegin */ glrRuleRight::const_reverse_iterator right_rend() const { return right.rend(); } /** * * Function returns constant reference to the symbol at the left side of the rule. * * @see glrSymbolTable::glrSymbol * @see glrSymbolTable */ const glrSymbolTable::glrSymbol &getLeft() const { return left; } /** * * Constructor. numberA is the number of the rule and leftA is the symbol at the left side of the rule. * */ glrRule(const int &numberA,const glrSymbolTable::glrSymbol &leftA){ left=leftA; number=numberA; type=glrEpsilonRule; //ruleState = new glrParseTimeRuleState(); } virtual ~glrRule(){ //delete ruleState; } }; /** * * Class glrCompRule is used internally by the parser during the glr table computation. * */ class glrCompRule { private: /** * * symbolTypes are types of symbols at the right side of rule rule. * */ vector symbolTypes; public: /** * * rule is constant pointer to associated rule. * */ const glrRule *rule; const glrSymbolTable::glrSymbolType& symbolType_at(const int &pos) const { #ifdef DEBUGEXCEPTIONS if((pos<0)||(pos>=symbolTypes.size())){ throw glrIndexOutOfBoundsException("const glrSymbolTable::glrSymbolType& glrCompRule::symbolType_at(const int &pos) const: index out of bounds"); } #endif return symbolTypes[pos]; } const glrSymbolTable::glrSymbolType& symbolTypes_back() const { #ifdef DEBUGEXCEPTIONS if(symbolTypes.empty()){ throw glrEmptyContainerException("const glrSymbolTable::glrSymbolType& glrCompRule::symbolTypes_back() const: symbol types container is empty"); } #endif return symbolTypes.back(); } /** * * Constructor. symbols is the parser's symbol table and ruleA is the rule * asociated with newly created object. * */ glrCompRule(const glrSymbolTable &symbols,const glrRule *ruleA){ rule=ruleA; for(glrRuleRight::const_iterator sym=rule->right_begin();sym!=rule->right_end();++sym){ symbolTypes.push_back(symbols.getSymbolType(*sym)); } } }; /** * * Class glrGrammarMap is used internally by the parser during the glr table * computation. It stores vector of pointers to objects of type glrCompRule at the position * returned by the (int) cast operation applyed to the symbol at the left sides * of stored rules. * * @see glrCompRule */ class glrGrammarMap : public vector >{ public: /** * * Function dealocates all objects of stored pointers and clears the map. * */ void releaseCompRules(){ for(glrGrammarMap::iterator i=begin();i!=end();++i) for(vector::iterator j=i->begin();j!=i->end();++j) delete (*j); clear(); } }; /** * * Class glrGrammar is used by the parser to store grammar. You may want to derive it * to override virtual function read and print to handle different format of grammar files. * Other purpuse may be also to store rules in object of type of your own class derived from * glrGrammarRule class. * * @see glrRule */ class glrGrammar { public: /** * * glrRulesContainer is the container which is used by the glrGrammar class * to store pointers to the grammar rules. * */ typedef vector glrRulesContainer; private: glrRulesContainer rules; unsigned int maxRuleLength; vector epsilonRules; public: /** * * Function prepareGrammar prepares the grammar for parsing. It is automaticaly * called at the end of the read function. If you are initializing the grammar * by your own way, you should call this function after last grammar modification before * parsing. */ void prepareGrammar() { maxRuleLength = 0; epsilonRules.clear(); for(glrRulesContainer::iterator i = rules.begin(); i!=rules.end(); ++i){ if(maxRuleLength<(*i)->right_size()){ maxRuleLength = (*i)->right_size(); } if((*i)->getType()==glrEpsilonRule){ epsilonRules.push_back(*i); } } } /** * * The resetEpsilonRules function is called by the parser to reset informations * stored in epsilon rules temporarly during parsing. */ void resetEpsilonRules(){ for(vector::iterator i = epsilonRules.begin(); i!=epsilonRules.end(); ++i){ (*i)->setEpsilonNode(-1, NULL); } } virtual unsigned int const& getMaxRuleLength() const { return maxRuleLength; } /* void resetParseTimeRuleStates(){ for(glrRulesContainer::iterator i = rules.begin(); i!=rules.end(); ++i){ (*i)->resetRuleState(); } } */ /** * * Function addRule adds the rule newRule to the grammar. * */ void addRule(glrRule* const &newRule){ if(maxRuleLengthright_size()){ maxRuleLength = newRule->right_size(); } if(newRule->getNumber()>=rules.size()){ rules.resize(newRule->getNumber()+1); } rules[newRule->getNumber()]=newRule; } /** * * Function getRule returns the pointer to rule which number is ruleNo. * */ const glrRule* getRule(const int &ruleNo) const { #ifdef DEBUGEXCEPTIONS if((ruleNo<0)||(ruleNo>=rules.size())){ throw glrIndexOutOfBoundsException("const glrRule* glrGrammar::getRule(const int &ruleNo) const: index ruleNo out of bounds"); } #endif return rules[ruleNo]; } /** * * Function print simply prints all rules to output by calling * function print for each rule. symbols is the parser's symbol table. * */ virtual void print(const glrSymbolTable &symbols,ostream &output) const { for(glrRulesContainer::const_iterator i=rules.begin();i!=rules.end();++i){ (*i)->print(symbols,output); if(!(output << endl)){ throw glrIOErrorException("void glrGrammar:: print(const glrSymbolTable &symbols,ostream &output) const: can't write to output stream"); } } } /** * * Function size returns the number of rules in the grammar. * */ int size() const { return rules.size(); } /** * * Function initGrammarMap initializes the object grammarMap from the grammar. * symbols is the parser's symbol table. * */ glrCompRule *initGrammarMap(const glrSymbolTable &symbols,glrGrammarMap &grammarMap) const { grammarMap.resize(symbols.size()); glrCompRule *ret = NULL; for(glrRulesContainer::const_iterator iRule=rules.begin();iRule!=rules.end();++iRule){ if(*iRule){ glrCompRule *newCompRule=new glrCompRule(symbols,*iRule); if((*iRule)->getNumber()==0)ret=newCompRule; grammarMap[(*iRule)->getLeft()].push_back(newCompRule); } } return ret; } glrGrammar(){ maxRuleLength = 0; } /** * * Destructor dealocates all stored rules. * */ virtual ~glrGrammar(){ for(glrRulesContainer::iterator i=rules.begin();i!=rules.end();++i){ if(*i)delete (*i); } } /** * * Function reads grammar from file. Each rule in the file have to be on * its own line. Format of the line is same as output of glrRule::print function. * istream is input. symbols is the parsers table of symbols. * * @see glrRule::print */ virtual void read(glrSymbolTable &symbols, istream &input){ int lineNo = 1; int state = 1; int ruleCount = 0; string leftSymbol; string token; glrRule *newRule = NULL; while(state){ int c = input.get(); switch(state){ case 1: if(((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))){ leftSymbol = c; state = 2; }else if((c==' ')||(c=='\t')||(c=='\n')){ if(c=='\n')lineNo++; }else if(c==EOF){ state = 0; }else if(c=='#'){ state = 3; }else if(c=='\''){ state = 20; }else{ state = -1; } break; case 20: if((c>=32)&&(c!='\'')){ leftSymbol = c; state = 21; }else{ state = -1; } break; case 21: if((c>=32)&&(c!='\'')){ leftSymbol += c; }else if(c=='\''){ state = 22; }else{ state = -1; } break; case 22: if((c==' ')||(c=='\t')){ state = 23; }else{ state = -1; } break; case 23: if((c==' ')||(c=='\t')){ }else if(c=='-'){ state = 5; }else{ state = -1; } break; case 2: if(((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))||(c=='-')||(c=='_')||((c>='0')&&(c<='9'))){ leftSymbol += c; }else if((c==':')||(c=='(')){ state = 3; }else if((c==' ')||(c=='\t')){ state = 4; }else{ state = -1; } break; case 3: if((c=='\n')||(c==EOF)){ state = 1; lineNo++; } break; case 4: if(c=='('){ state = 3; }else if((c==' ')||(c=='\t')){ }else if(c=='-'){ state = 5; }else{ state = -1; } break; case 5: if(c=='>'){ addRule(newRule = new glrRule(ruleCount++,symbols.addTextSymbol(leftSymbol))); state = 6; }else{ state = -1; } break; case 6: if((c==' ')||(c=='\t')){ state = 7; }else if((c=='\n')||(c==EOF)){ lineNo++; newRule = NULL; state = 1; }else{ state = -1; } break; case 7: if((c==' ')||(c=='\t')){ }else if((c=='\n')||(c==EOF)){ ++lineNo; newRule = NULL; state = 1; }else if(c=='+'){ state = 8; }else if(((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))){ token = c; state = 9; }else if(c=='\''){ state = 24; }else{ state = -1; } break; case 24: if((c>=32)&&(c!='\'')){ state = 25; token = c; }else{ state = -1; } break; case 25: if((c>=32)&&(c!='\'')){ token += c; }else if(c=='\''){ newRule->pushRight(symbols,token); state = 26; }else{ state = -1; } break; case 26: if((c==' ')||(c=='\t')){ state = 11; }else if((c=='\n')||(c=EOF)){ state = 1; newRule = NULL; lineNo++; }else{ state = -1; } break; case 8: if((c>='0')||(c<='9')){ state = 10; }else{ state = -1; } break; case 9: if(((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))||((c>='0')&&(c<='9'))||(c=='-')||(c=='_')){ token += c; }else if((c==' ')||(c=='\t')){ newRule->pushRight(symbols,token); state = 11; }else if((c=='\n')||(c==EOF)){ newRule->pushRight(symbols,token); newRule = NULL; lineNo++; state = 1; }else{ state = -1; } break; case 10: if((c>='0')&&(c<='9')){ }else if(c=='.'){ state = 12; }else if((c=='\n')||(c==EOF)){ state = 1; newRule = NULL; lineNo++; }else if((c==' ')||(c=='\t')){ state = 16; }else{ state = -1; } break; case 11: if((c==' ')||(c=='\t')){ }else if(((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))){ token = c; state = 9; }else if((c=='\n')||(c==EOF)){ newRule = NULL; lineNo++; state = 1; }else if(c=='+'){ state = 13; }else if(c=='\''){ state = 24; }else{ state = -1; } break; case 12: if((c>='0')&&(c<='9')){ state = 14; }else{ state = -1; } break; case 13: if((c>='0')&&(c<='9')){ state = 15; }else{ state = -1; } break; case 14: if((c>='0')&&(c<='9')){ }else if((c==' ')||(c=='\t')){ state = 16; }else if((c=='\n')||(c==EOF)){ state = 1; newRule = NULL; lineNo++; }else{ state = -1; } break; case 15: if((c>='0')&&(c<='9')){ }else if(c=='.'){ state = 17; }else if((c=='\n')||(c==EOF)){ state = 1; newRule = NULL; lineNo++; }else if((c=' ')||(c='\t')){ state = 19; }else{ state = -1; } break; case 16: if((c==' ')||(c=='\t')){ }else if((c=='\n')||(c==EOF)){ newRule = NULL; lineNo++; state = 1; }else{ state = -1; } break; case 17: if((c>='0')&&(c<='9')){ state = 18; }else{ state = -1; } break; case 18: if((c>='0')&&(c<='9')){ }else if((c==' ')||(c=='\t')){ state = 19; }else if((c=='\n')||(c==EOF)){ state = 1; newRule = NULL; lineNo++; }else{ state = -1; } break; case 19: if((c==' ')||(c=='\t')){ }else if((c=='\n')||(c==EOF)){ state = 1; lineNo++; newRule = NULL; }else{ state = -1; } break; case -1: throw glrSyntaxErrorException("void glrGrammar::read(glrSymbolTable &symbols, istream &input): syntax error in grammar at line ",lineNo); default: throw glrIOErrorException("void glrGrammar::read(glrSymbolTable &symbols, istream &input): grammar reader came to undefined state"); } } prepareGrammar(); } }; #endif