2012-09-09 68 views
3

我有一个字符串规则对的地图,我想创建一个“联合规则”(rule_t joint_rule;)以某种方式。如果我这样做是这样的:助推精神规则 - 规则连接

joint_rule = convert_logformat["%h"] >> convert_logformat["%t"]; 

比用parse_phrase联合统治字符串

std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000]"; 

但如果我创建的联合统治这样匹配:

for (it = convert_logformat.begin(); it != convert_logformat.end(); it++) 
{ 
    joint_rule = joint_rule.copy() >> (*it).second.copy(); 
} 

它确实不匹配相同的字符串。为什么?我怎么能实现类似于后者?


相关代码:

template <typename Iterator> 
bool parse_logentry(Iterator first, Iterator last, std::vector<char>& ip, std::vector<char>& timestamp, std::vector<char>& req, unsigned int& status, unsigned int& transferred_bytes, std::vector<char>& referer, std::vector<char>& ua) 
{ 
    using boost::spirit::qi::char_; 
    using boost::spirit::qi::int_; 
    using boost::spirit::qi::uint_; 
    using boost::spirit::qi::phrase_parse; 
    using boost::spirit::ascii::space; 
    using boost::spirit::ascii::space_type; 
    using boost::phoenix::ref; 
    using boost::phoenix::push_back; 
    using boost::spirit::qi::_1; 
    using boost::spirit::qi::lexeme; 
    using boost::spirit::qi::rule; 

    typedef boost::spirit::qi::rule<Iterator, std::string(), space_type> rule_t; 
    rule_t ip_rule, timestamp_rule, user_rule, req_rule, ref_rule, ua_rule, bytes_rule, status_rule; 
    ip_rule %= lexeme[(+char_("0-9."))[ref(ip) = _1]]; 
    timestamp_rule %= lexeme[('[' >> +(~char_(']')) >> ']')[ref(timestamp) = _1]]; 
    user_rule %= lexeme[(+~char_(" "))]; 
    req_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(req) = _1]]; 
    ref_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(referer) = _1]]; 
    ua_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(ua) = _1]]; 
    bytes_rule %= uint_[ref(transferred_bytes) = _1]; 
    status_rule %= uint_[ref(status) = _1]; 
    std::map<std::string, rule_t> convert_logformat; 
    typename std::map<std::string, rule_t>::iterator it; 

    convert_logformat.insert(std::pair<std::string, rule_t>("%h", ip_rule)); 
    convert_logformat.insert(std::pair<std::string, rule_t>("%t", timestamp_rule)); 
    //convert_logformat.insert(std::pair<std::string, rule_t>("%r", req_rule)); 
    //convert_logformat.insert(std::pair<std::string, rule_t>("%>s", status_rule)); 
    //convert_logformat.insert(std::pair<std::string, rule_t>("%b", bytes_rule)); 
    //convert_logformat.insert(std::pair<std::string, rule_t>("%u", user_rule)); 
    //convert_logformat.insert(std::pair<std::string, rule_t>("%{User-agent}i", ua_rule)); 
    //convert_logformat.insert(std::pair<std::string, rule_t>("%{Referer}i", ref_rule)); 

    rule_t joint_rule; 

    //joint_rule = convert_logformat["%h"] >> convert_logformat["%t"]; 

    for (it = convert_logformat.begin(); it != convert_logformat.end(); it++) 
    { 
     joint_rule = joint_rule.copy() >> (*it).second.copy(); 
     std::cout << (*it).first << ": " << typeid((*it).second).name() << "\n"; 
    } 

    std::cout << "convert_logformath: " << typeid(convert_logformat["%h"]).name() << "\n"; 

    bool r = phrase_parse(first, last, joint_rule, space); 
    if (first != last) 
     return false; 
    return r; 
} 
+0

什么是rule_t?你有这个SSCCE吗? http://meta.stackexchange.com/questions/22754/sscce-how-to-provide-examples-for-programming-questions/22762#22762 – sehe

+0

typedef boost :: spirit :: qi :: rule rule_t; – bayerb

+0

http://pastebin.com/uT59AMQp – bayerb

回答

3

咳咳。这真的很简单。你应该在第一行初始化变量:)

rule_t joint_rule; // what is it initialized to? 

for (auto it = convert_logformat.begin(); it != convert_logformat.end(); it++) 
{ 
    joint_rule = joint_rule.copy() >> (*it).second.copy(); 
} 

更改为

rule_t joint_rule = qi::eps; 

而且它的工作原理:

[email protected]:/tmp$ ./test 
127.0.0.1 
16/Aug/2012:01:50:02 +0000 

解析器缺少一些(好)常见的做法。请参阅下面的整理源代码(C++ 11)。

请注意,使用map来存储规则看起来很奇怪,因为地图迭代将按键排序,而不是按插入顺序排序。

看到代码住在http://liveworkspace.org/code/a7f2f94840d63fce43d8c3f56236330e

// #define BOOST_SPIRIT_DEBUG 
#include <boost/spirit/include/qi.hpp> 
#include <boost/spirit/include/phoenix.hpp> 
#include <typeinfo> 

namespace qi = boost::spirit::qi; 
namespace phx = boost::phoenix; 

template <typename Iterator> 
struct Grammar : qi::grammar<Iterator, std::string(), qi::space_type> 
{ 
    Grammar() : Grammar::base_type(joint_rule) 
    { 
     using namespace qi; 
     ip_rule  %= lexeme[ (+char_("0-9."))[phx::ref(ip)      = _1] ]; 
     timestamp_rule %= lexeme[ ('[' >> +(~char_(']')) >> ']')[phx::ref(timestamp) = _1] ]; 
     user_rule  %= lexeme[ (+~char_(" "))        ]; 
     req_rule  %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(req)  = _1] ]; 
     ref_rule  %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(referer) = _1] ]; 
     ua_rule  %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(ua)  = _1] ]; 
     bytes_rule  %= uint_[phx::ref(transferred_bytes)       = _1 ]; 
     status_rule %= uint_[phx::ref(status)          = _1 ]; 

     auto convert_logformat = std::map<std::string, rule_t> { 
      { "%h"   , ip_rule }  , 
      { "%t"   , timestamp_rule }, 
     // { "%r"   , req_rule }  , 
     // { "%>s"   , status_rule } , 
     // { "%b"   , bytes_rule } , 
     // { "%u"   , user_rule }  , 
     // { "%{User-agent}i", ua_rule }  , 
     // { "%{Referer}i" , ref_rule } 
     }; 

     joint_rule = eps; 

     for (auto const& p: convert_logformat) 
     { 
      joint_rule = joint_rule.copy() >> p.second.copy(); 
     } 

     BOOST_SPIRIT_DEBUG_NODE(ip_rule); 
     BOOST_SPIRIT_DEBUG_NODE(timestamp_rule); 
     BOOST_SPIRIT_DEBUG_NODE(user_rule); 
     BOOST_SPIRIT_DEBUG_NODE(req_rule); 
     BOOST_SPIRIT_DEBUG_NODE(ref_rule); 
     BOOST_SPIRIT_DEBUG_NODE(ua_rule); 
     BOOST_SPIRIT_DEBUG_NODE(bytes_rule); 
     BOOST_SPIRIT_DEBUG_NODE(status_rule); 
    } 

    typedef qi::rule<Iterator, std::string(), qi::space_type> rule_t; 
    rule_t ip_rule, timestamp_rule, user_rule, req_rule, ref_rule, ua_rule, bytes_rule, status_rule; 
    rule_t joint_rule; 

    std::vector<char> ip; 
    std::vector<char> timestamp; 
    std::vector<char> req; 
    unsigned int status; 
    unsigned int transferred_bytes; 
    std::vector<char> referer; 
    std::vector<char> ua; 
}; 

template <typename Iterator> 
bool parse_logentry(Iterator first, Iterator last, 
     Grammar<Iterator>& parser) 
{ 
    bool r = phrase_parse(first, last, parser, qi::space); 

    return (r && (first == last)); 
} 

int main(void) 
{ 
    std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000]"; 
    //std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000] \"GET /check.htm HTTP/1.1\" 200 17 \"-\" \"AgentName/0.1 libwww-perl/5.833\""; 

    Grammar<std::string::iterator> parser; 

    if (parse_logentry(entry.begin(), entry.end(), parser)) 
    { 
     for (auto i : parser.ip) 
      std::cout << i; 
     std::cout << "\n"; 

     for (auto ts: parser.timestamp) 
      std::cout << ts; 
     std::cout << "\n"; 
    } 
    else 
    { 
     std::cout << "not ok\n"; 
    } 

    return 0; 
} 

需要注意的是,除其他事项外,这种设置让您只需在开始定义BOOST_SPIRIT_DEBUG,使你的语法的调试。

+0

哇!谢谢!不幸的是,我不能在我的环境中使用C++ 11支持的编译器,但其他建议对我有很大的帮助。 – bayerb