2010-11-19 225 views
2

我试图将这个curl函数包含到我的类中,但遇到了CURLOPT_WRITEFUNCTION问题。编译后没有找到我的解决方案。还尝试了一些基于stackoverflow的东西,没有可用。CURLOPT_WRITEFUNCTION指向成员函数的指针

这是我尝试(在此代码替换 '作家')

节点::作家 &节点::作家 的std :: bind1st(STD :: mem_fun(&节点::作家),这一点) ;

这里是我的代码:

#ifndef NODE_H_ 
#define NODE_H_ 

int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer); 

/* 
* function prototypes 
*/ 

class node { 
/* 
    * general struct to hold html element properties 
    */ 
struct tag; 

/* 
    * the url and source of the page 
    */ 
std::string url; 
std::string source; 

/* 
    * vector of structures that store tag elements 
    */ 
std::vector<tag> heading; 
std::vector<tag> anchor; 

/* 
    * grab source with curl 
    */ 
std::string curlHttpget(const std::string &url); 

/* 
    * add tag structs to vector 
    * @see std::vector<tag> heading 
    * @see std::vector<tag> anchor 
    */ 
void add_heading(std::string, std::string); 
void add_anchor(std::string, std::string); 

public: 
/* 
    * constructors 
    */ 
node(){} 
node(std::string); 

/* 
    * deconstructors 
    */ 
~node(){} 

/* 
    * crawl page 
    */ 
void load(std::string seed);//crawls the page 

/* 
    * anchor tags 
    */ 
void get_anchors();// scrape the anchor tags 
void display_anchors(); 

/* 
    * heading tags 
    */ 
void get_headings();// scrape heading tags 
void display_headings(); 
}; 
/* 
* for all stored html elements 
*/ 
struct node::tag { 
std::string text; 
std::string properties; 
tag(std::string t, std::string p) : text(t), properties(p) {} 
}; 

/* 
* constructors 
*/ 
node::node(std::string seed) { 
load(seed); 
get_anchors(); 
get_headings(); 
} 
/* 
* araneus::subroutines 
*/ 

// crawl the page 
void node::load(std::string seed) { 
url = seed; 
source = curlHttpget(url); 
} 


//scrape html source 
std::string node::curlHttpget(const std::string &url) { 
std::string buffer; 

CURL *curl; 
CURLcode result; 

curl = curl_easy_init(); 

if (curl) { 
    curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); 
    curl_easy_setopt(curl, CURLOPT_HEADER, 0); 
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writer); 
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer); 

    result = curl_easy_perform(curl);//http get performed 

    curl_easy_cleanup(curl);//must cleanup 

    //error codes: http://curl.haxx.se/libcurl/c/libcurl-errors.html 
    if (result == CURLE_OK) { 
    return buffer; 
    } 
    //curl_easy_strerror was added in libcurl 7.12.0 
    std::cerr << "error: " << result << " " << curl_easy_strerror(result) << std::endl; 
    return ""; 
} 

std::cerr << "error: could not initalize curl" << std::endl; 
return ""; 
} 

void node::get_headings() { 
static const regex expression("<[hH][1-6]\\s*(?<properties>.*?)\\s*>(?<name>.*?)</\\s*[hH][1-6]\\s*>"); 

int const subMatches[] = { 1, 2 }; 

sregex_token_iterator p(source.begin(), source.end(), expression, subMatches); 
sregex_token_iterator end; 

string text; 
string properties; 

int count = 0; 
for (;p != end; count++, ++p) 
{ 
    string m(p->first, p->second); 

    if(count % 2) { 
    text = m; 
    add_heading(text, properties); 
    } 
    else { 
    properties = m; 
    } 
} 
} 

//use regex to find anchors in source 
void node::get_anchors() { 
static const regex expression("<[a|A].*?[href|HREF]\\s*=[\"\"'](?<url>.*?)[\"\"'].*?>(?<name>.*?)</[a|A]>"); 
static const regex relative("^\\/"); 
static const regex firstChar("^[A-Za-z0-9\\-_\\$\\.\\+!\\*'\\(\\)#]"); // valid url characters 
static const regex protocol("^[http:\\/\\/|HTTP:\\/\\/|https:\\/\\/|HTTPS:\\/\\/|ftp:\\/\\/|FTP:\\/\\/|sftp:\\/\\/|SFTP:\\/\\/]"); 

int const subMatches[] = { 1, 2 }; 

sregex_token_iterator p(source.begin(), source.end(), expression, subMatches); 
sregex_token_iterator end; 

string text, properties; 

int count = 0; 
for (; p != end; count++, ++p) { 
    std::string m(p->first, p->second); 

    if(count % 2) { 
    text = m; 
    add_anchor(text, properties); 
    } 
    else { 
    if(regex_search(m, relative)) { //if link is in "/somewhere" format 
    properties = url + m; 
    } 
    else if(regex_search(m, protocol)) { //if link is absolute "http://www.somewhere.com" 
    properties = m; 
    } 
    else if(regex_search(m, firstChar)) { //if link starts with a valid url char "somewhere.html" 
    properties = url + "/" + m; 
    } 
    else { 
    std::cout << "link of unknown protocol: " << m << std::endl; 
    } 
    } 
} 
} 

void node::add_heading(std::string text, std::string properties) { 
heading.push_back(tag(text, properties)); 
} 

void node::display_headings() { 
for(int i = 0; i < (int)heading.size(); i++) { 
    std::cout<< "[h]: " << heading[i].text << endl; 
    std::cout<< "[h.properties]: " << heading[i].properties << endl; 
} 
cout << "found " << (int)heading.size() << " <h[1-6]> tags" << endl; 
} 

void node::add_anchor(std::string text, std::string properties) { 
anchor.push_back(tag(text, properties)); 
} 

void node::display_anchors() { 
for(int i = 0; i < (int)anchor.size(); i++) { 
    std::cout<< "[a]: " << anchor[i].text << endl; 
    std::cout<< "[a.properties]: " << anchor[i].properties << endl; 
} 
cout << "found " << (int)anchor.size() << " <a> tags" << endl; 
} 

//required by libcurl 
int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer) { 
int result = 0; 

if (buffer != NULL) { 
    buffer->append(data, size * nmemb); 
    result = size * nmemb; 
} 
return result; 
} 

#endif /* NODE_H_ */ 

寻找一个解决方案,以获得函数“廉政作家”是“诠释节点::作家”。当我调用CURLOPT_WRITEFUNCTION时,问题发生在std :: string node :: curlHttpget中。

&节点::作家编译,但给人一种赛格故障=/

感谢

回答

1

而不是使用的std :: string的*使用节点*作为参数或其他类似类HTTPGET,有一个标准: :字符串和指向您节点的指针,以便它可以写入字符串并在每次调用时访问您的节点。

boost :: bind不适用于C-API回调。

它编译,因为curl_easy_setopt使用...所以是完全不是类型安全的。你可以在阳光下传递任何你想要的类型,然后编译它。它可能不会运行,因为你发现你的成本。

我会去,让你的函数的额外的类型安全具有完全相同的签名Curl_write_callback即无效*作为第四个参数,并做在功能实现铸造。