2009-09-07 95 views
2

我遇到了问题curl_multi_*,我想创建一个类/函数,它接收1000个URL,并且一次处理所有这些URL 5,所以当URL完成下载时它会将现在可用的插槽分配给尚未处理的新URL。CURL问题(多)

我见过someimplementations curl_multi的,但他们都不让我做我想做的,我相信这个解决方案是在curl_multi_select使用的地方,但该文件还不是很清楚,并且用户注意到唐”帮助很大。

任何人都可以请我提供一些例子,我如何实现这样的功能?

回答

6

以下是一种方法。该脚本一次可以获取任意数量的url,并在每次完成时添加一个新的url(因此它总是获取$ maxConcurrent页面)。

$sites = array('http://example.com', 'http://google.com', 'http://stackoverflow.com'); 
$concurrent = 2; // Any number. 

$mc = new MultiCurl($sites, $concurrent); 
$mc->process(); 

echo '</pre>'; 

class MultiCurl 
{ 
    private $allToDo; 
    private $multiHandle; 
    private $maxConcurrent = 2; 
    private $currentIndex = 0; 
    private $info   = array(); 
    private $options  = array(CURLOPT_RETURNTRANSFER => true, 
            CURLOPT_FOLLOWLOCATION => true, 
            CURLOPT_MAXREDIRS  => 3, 
            CURLOPT_TIMEOUT  => 3); 

    public function __construct($todo, $concurrent) 
    { 
     $this->allToDo = $todo; 
     $this->maxConcurrent = $concurrent; 
     $this->multiHandle = curl_multi_init(); 
    } 

    public function process() 
    { 
     $running = 0; 
     do { 
      $this->_addHandles(min(array($this->maxConcurrent - $running, $this->_moreToDo()))); 
      while ($exec = curl_multi_exec($this->multiHandle, $running) === -1) { 
      } 
      curl_multi_select($this->multiHandle); 
      while ($multiInfo = curl_multi_info_read($this->multiHandle, $msgs)) { 
       $this->_showData($multiInfo); 
       curl_multi_remove_handle($this->multiHandle, $multiInfo['handle']); 
       curl_close($multiInfo['handle']); 
      } 
     } while ($running || $this->_moreTodo()); 
     return $this; 
    }  

    private function _addHandles($num) 
    { 
     while ($num-- > 0) { 
      $handle = curl_init($this->allToDo[$this->currentIndex]); 
      curl_setopt_array($handle, $this->options); 
      curl_multi_add_handle($this->multiHandle, $handle); 
      $this->info[$handle]['url'] = $this->allToDo[$this->currentIndex]; 
      $this->currentIndex++; 
     } 
    }   

    private function _moreToDo() 
    { 
     return count($this->allToDo) - $this->currentIndex; 
    } 

    private function _showData($multiInfo) 
    { 
     $this->info[$multiInfo['handle']]['multi'] = $multiInfo; 
     $this->info[$multiInfo['handle']]['curl'] = curl_getinfo($multiInfo['handle']); 
     //print_r($this->info[$multiInfo['handle']]); 
     $content = curl_multi_getcontent($multiInfo['handle']); 
     echo $this->info[$multiInfo['handle']]['url'] . ' - ' . strlen($content) . ' bytes<br />'; 
     //echo htmlspecialchars($content); 
    } 
} 
+1

因为这个例子没有使用curl_multi_select(),它会忙环像疯了似的,因而采取100%的CPU,直到所有的传输完成... –

+1

@Daniel Stenberg发明的 - 感谢;你是对的。我已经用一个速度更快并且使用更少CPU时间的(精简)类替换了该代码。 – GZipp