2017-04-13 39 views
1

SQLParser.h:HOWTO多次使用解析器?

class SQLParser{ 
/*____Variables____*/ 
private: 
    std::string _vendor; 
    antlr4::CommonTokenStream* _tokenStream; 
    antlr4::Parser* _parser; 
    antlr4::Lexer* _lexer; 

/*____Functions____*/ 
public: 
    SQLParser(const std::string& Vendor); 
    ~SQLParser(); 
    antlr4::CommonTokenStream* get_tokens(const std::string& text); 
    std::vector<std::string> get_lexems(const std::string& text); 
    antlr4::ParserRuleContext* parse(const std::string& text); 
    bool check_syntax(const std::string& text); 
    void print_string_tree(const std::string& text); // parse and print in LISP format 
}; 

SQLParser.cpp:

... 
CommonTokenStream* SQLParser::get_tokens(const std::string& text){ 
    (dynamic_cast<ANTLRInputStream*>(_lexer->getInputStream()))->load(text); 
    _tokenStream->reset(); 
    _tokenStream->fill(); 
    return _tokenStream; 
} 

std::vector<std::string> SQLParser::get_lexems(const std::string& text){ 
    get_tokens(text); 

    std::vector<std::string> lexems; 
    for(auto token : _tokenStream->getTokens()) { 
     lexems.push_back(token->getText()); 
    } 
    return lexems; 
} 

ParserRuleContext* SQLParser::parse(const std::string& text){ 
    get_tokens(text); 

    _parser->setInputStream(_tokenStream); 
    ParserRuleContext* tree; 
    try{ 
     if(_vendor == "tsql"){ 
      tree = (dynamic_cast<tsqlParser*>(_parser))->root(); 
     } 
     if(_vendor == "mysql"){ 
      tree = (dynamic_cast<mysqlParser*>(_parser))->root(); 
     } 
    } 
    catch(std::_Nested_exception<ParseCancellationException>& e){ 
     return nullptr; 
    } 
    return tree; 
} 

一个目的SQLParser针对每个具体厂商创建。 我想用这个对象来解析几个输入文本。但是我有TokenStream的大小问题。我预计它的大小会动态变化。 例如,主要这样的: main.cpp中:这样

#include <iostream> 
#include <string> 

#include <antlr4-runtime.h> 
#include "SQLParser.h" 

using namespace antlr4; 

int main(){ 
    SQLParser parser("tsql"); 
    std::cout << "'select 1;': "; 
    parser.print_string_tree("select 1;"); 
    std::cout << "\n\n'select 1,2,3;': "; 
    parser.print_string_tree("select 1,2;"); 
    std::cout << "\n"; 
    return 0; 
} 

是给输出:

'select 1;': (root (sql_clauses (sql_clause (dml_clause (select_statement (query_expression (query_specification select (select_list (select_list_elem (expression (constant 1)))))) ;)))) <EOF>) 

'select 1,2,3;': (root (sql_clauses (sql_clause (dml_clause (select_statement (query_expression (query_specification select (select_list (select_list_elem (expression (constant 1)))))) ,))))) 

我应该如何使用的TokenStream避免这种错误?

+0

*“我想用这个对象来解析几个输入文本” * =>如果C++ API是像C#一个,你不应该这样做 - 您改为创建另一个流/词法分析器/分析器。默认情况下,词法分析器缓存令牌。 *为什么*你想以不同的方式做到这一点? [XY问题](https://meta.stackexchange.com/q/66377/271659)? –

回答

1

我有一个类似你的设置。 Context类将lexer + parser + listener等整合在一起。要使用新输入重新开始解析,您必须让令牌流重新加载所有令牌。在我的上下文类我愿意这样做:

struct MySQLParserContextImpl : public MySQLParserContext { 
    ANTLRInputStream input; 
    MySQLLexer lexer; 
    CommonTokenStream tokens; 
    MySQLParser parser; 
    ContextErrorListener errorListener; 

    bool caseSensitive; 
    std::vector<ParserErrorInfo> errors; 

    ... 

    ParseTree *parse(const std::string &text, MySQLParseUnit unit) { 
    input.load(text); 
    return startParsing(false, unit); 
    } 

    bool errorCheck(const std::string &text, MySQLParseUnit unit) { 
    parser.removeParseListeners(); 
    input.load(text); 
    startParsing(true, unit); 
    return errors.empty(); 
    } 

private: 
    ParseTree *parseUnit(MySQLParseUnit unit) { 
    switch (unit) { 
     case MySQLParseUnit::PuCreateSchema: 
     return parser.createDatabase(); 
     case MySQLParseUnit::PuCreateTable: 
     return parser.createTable(); 
     case MySQLParseUnit::PuCreateTrigger: 
     return parser.createTrigger(); 
     case MySQLParseUnit::PuCreateView: 
     return parser.createView(); 
     case MySQLParseUnit::PuCreateFunction: 
     return parser.createFunction(); 
     case MySQLParseUnit::PuCreateProcedure: 
     return parser.createProcedure(); 
     case MySQLParseUnit::PuCreateUdf: 
     return parser.createUdf(); 
     case MySQLParseUnit::PuCreateRoutine: 
     return parser.createRoutine(); 
     case MySQLParseUnit::PuCreateEvent: 
     return parser.createEvent(); 
     case MySQLParseUnit::PuCreateIndex: 
     return parser.createIndex(); 
     case MySQLParseUnit::PuGrant: 
     return parser.grant(); 
     case MySQLParseUnit::PuDataType: 
     return parser.dataTypeDefinition(); 
     case MySQLParseUnit::PuCreateLogfileGroup: 
     return parser.createLogfileGroup(); 
     case MySQLParseUnit::PuCreateServer: 
     return parser.createServer(); 
     case MySQLParseUnit::PuCreateTablespace: 
     return parser.createTablespace(); 
     default: 
     return parser.query(); 
    } 
    } 

    ParseTree *startParsing(bool fast, MySQLParseUnit unit) { 
    errors.clear(); 
    lexer.reset(); 
    lexer.setInputStream(&input); // Not just reset(), which only rewinds the current position. 
    tokens.setTokenSource(&lexer); 

    parser.reset(); 
    parser.setBuildParseTree(!fast); 

    // First parse with the bail error strategy to get quick feedback for correct queries. 
    parser.setErrorHandler(std::make_shared<BailErrorStrategy>()); 
    parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(PredictionMode::SLL); 

    ParseTree *tree; 
    try { 
     tree = parseUnit(unit); 
    } catch (ParseCancellationException &) { 
     if (fast) 
     tree = nullptr; 
     else { 
     // If parsing was cancelled we either really have a syntax error or we need to do a second step, 
     // now with the default strategy and LL parsing. 
     tokens.reset(); 
     parser.reset(); 
     parser.setErrorHandler(std::make_shared<DefaultErrorStrategy>()); 
     parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(PredictionMode::LL); 
     tree = parseUnit(unit); 
     } 
    } 

    if (errors.empty() && !lexer.hitEOF) { 
     // There is more input than needed for the given parse unit. Make this a fail as we don't allow 
     // extra input after the specific rule. 
     // This part is only needed if the grammar has no explicit EOF token at the end of the parsed rule. 
     Token *token = tokens.LT(1); 
     ParserErrorInfo info = {"extraneous input found, expecting end of input", 
           token->getType(), 
           token->getStartIndex(), 
           token->getLine(), 
           token->getCharPositionInLine(), 
           token->getStopIndex() - token->getStartIndex() + 1}; 
     errors.push_back(info); 
    } 
    return tree; 
    } 
    ...