2013-10-01 100 views
0

我有这个PHP脚本,它使用cURL向网站发送了很多请求,并且我试图尽可能地清理它,但是我碰到了一堵墙。我还能对这个脚本做些什么来使其更快?PHP脚本优化

<?php 
    ini_set('max_execution_time', 3600); 
    function remoteStatusCode($url){ 
     $ch = curl_init(); 
     curl_setopt ($ch, CURLOPT_URL,$url); 
     curl_setopt($ch, CURLOPT_NOBODY, true); 
     curl_setopt($ch,CURLOPT_SSL_VERIFYHOST, FALSE); 
     curl_exec($ch); 
     $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 
     curl_close($ch); 
     if ($httpcode == 404){ 
      echo $url; 
     } 
    } 
    $lines = file('wordsEn.txt', FILE_IGNORE_NEW_LINES); 
    foreach ($lines as $n){ 
     remoteStatusCode('<br>https://twitter.com/'.$n); 
    } 
?> 

任何建议在我的书中都是很好的建议,我需要我可以得到的所有帮助。

+1

http://php.net/manual/en/function.curl-multi-init.php – zerkms

+0

http://meta.codereview.stackexchange.com/ –

+0

@Fred -ii-:我确定它很合适太好了 – zerkms

回答

3

这是卷曲和多卷曲获得头信息,风向标:

<?php 
include_once "CURL.php"; 
$curl = new CURL(); 

$start_time = microtime(true); 

for ($i=0; $i < 10; $i++) { 
    $ch = curl_init(); 
    curl_setopt ($ch, CURLOPT_URL,"https://twitter.com"); 
    curl_setopt($ch, CURLOPT_NOBODY, true); 
    curl_setopt($ch,CURLOPT_SSL_VERIFYHOST, FALSE); 
    curl_exec($ch); 
    $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 
    curl_close($ch); 
} 
    $end_time = microtime(true); 
    echo "cUrl Timing : "; 
    echo $end_time - $start_time; 
    echo "\n"; 

$start_time = microtime(true); 
$get = array(CURLOPT_HEADER=>true,CURLOPT_SSL_VERIFYHOST=>false,CURLOPT_RETURNTRANSFER=>true); 
for ($i=0; $i < 10; $i++) { 
    $curl->addSession("https://twitter.com",$get); 
} 
$datas = $curl->exec(); 
$end_time = microtime(true); 
echo "Multi cUrl Timing : "; 
echo $end_time - $start_time; 
echo "\n"; 

print_r($datas[1]);// http codes of each request 


?> 

CURL.php(全球化志愿服务青年是here,但我做了一些修改得到的多cUrl作者类的头信息,这是卷曲)

<?php 

/** 
* OO cURL Class 
* Object oriented wrapper for the cURL library. 
* @author David Hopkins (semlabs.co.uk) 
* @version 0.3 
*/ 
class CURL 
{ 

    public $sessions    = array(); 
    public $retry     = 0; 

    /** 
    * Adds a cURL session to stack 
    * @param $url string, session's URL 
    * @param $opts array, optional array of cURL options and values 
    */ 
    public function addSession($url, $opts = false) 
    { 
     $this->sessions[] = curl_init($url); 
     if($opts != false) 
     { 
      $key = count($this->sessions) - 1; 
      $this->setOpts($opts, $key); 
     } 
    } 

    /** 
    * Sets an option to a cURL session 
    * @param $option constant, cURL option 
    * @param $value mixed, value of option 
    * @param $key int, session key to set option for 
    */ 
    public function setOpt($option, $value, $key = 0) 
    { 
     curl_setopt($this->sessions[$key], $option, $value); 
    } 

    /** 
    * Sets an array of options to a cURL session 
    * @param $options array, array of cURL options and values 
    * @param $key int, session key to set option for 
    */ 
    public function setOpts($options, $key = 0) 
    { 
     curl_setopt_array($this->sessions[$key], $options); 
    } 

    /** 
    * Executes as cURL session 
    * @param $key int, optional argument if you only want to execute one session 
    */ 
    public function exec($key = false) 
    { 
     $no = count($this->sessions); 

     if($no == 1) 
      $res = $this->execSingle(); 
     elseif($no > 1) { 
      if($key === false) 
       $res = $this->execMulti(); 
      else 
       $res = $this->execSingle($key); 
     } 

     if($res) 
      return $res; 
    } 

    /** 
    * Executes a single cURL session 
    * @param $key int, id of session to execute 
    * @return array of content if CURLOPT_RETURNTRANSFER is set 
    */ 
    public function execSingle($key = 0) 
    { 
     if($this->retry > 0) 
     { 
      $retry = $this->retry; 
      $code = 0; 
      while($retry >= 0 && ($code[0] == 0 || $code[0] >= 400)) 
      { 
       $res = curl_exec($this->sessions[$key]); 
       $code = $this->info($key, CURLINFO_HTTP_CODE); 

       $retry--; 
      } 
     } 
     else 
      $res = curl_exec($this->sessions[$key]); 

     return $res; 
    } 

    /** 
    * Executes a stack of sessions 
    * @return array of content if CURLOPT_RETURNTRANSFER is set 
    */ 
    public function execMulti() 
    { 
     $mh = curl_multi_init(); 

     #Add all sessions to multi handle 
     foreach ($this->sessions as $i => $url) 
      curl_multi_add_handle($mh, $this->sessions[$i]); 

     do 
      $mrc = curl_multi_exec($mh, $active); 
     while ($mrc == CURLM_CALL_MULTI_PERFORM); 

     while ($active && $mrc == CURLM_OK) 
     { 
      if (curl_multi_select($mh) != -1) 
      { 
       do 
        $mrc = curl_multi_exec($mh, $active); 
       while ($mrc == CURLM_CALL_MULTI_PERFORM); 
      } 
     } 

     if ($mrc != CURLM_OK) 
      echo "Curl multi read error $mrc\n"; 

     #Get content foreach session, retry if applied 
     foreach ($this->sessions as $i => $url) 
     { 
      $code = $this->info($i, CURLINFO_HTTP_CODE); 
      $codes[] = $code; 
      if($code[0] > 0 && $code[0] < 400) 
       $res[] = curl_multi_getcontent($this->sessions[$i]); 
      else 
      { 
       if($this->retry > 0) 
       { 
        $retry = $this->retry; 
        $this->retry -= 1; 
        $eRes = $this->execSingle($i); 

        if($eRes) 
         $res[] = $eRes; 
        else 
         $res[] = false; 

        $this->retry = $retry; 
        echo '1'; 
       } 
       else 
        $res[] = false; 
      } 
      curl_multi_remove_handle($mh, $this->sessions[$i]); 
     } 

     curl_multi_close($mh); 
     $all[] = $res; 
     $all[] = $codes; 
     return $all; 
    } 

    /** 
    * Closes cURL sessions 
    * @param $key int, optional session to close 
    */ 
    public function close($key = false) 
    { 
     if($key === false) 
     { 
      foreach($this->sessions as $session) 
       curl_close($session); 
     } 
     else 
      curl_close($this->sessions[$key]); 
    } 

    /** 
    * Remove all cURL sessions 
    */ 
    public function clear() 
    { 
     foreach($this->sessions as $session) 
      curl_close($session); 
     unset($this->sessions); 
    } 

    /** 
    * Returns an array of session information 
    * @param $key int, optional session key to return info on 
    * @param $opt constant, optional option to return 
    */ 
    public function info($key = false, $opt = false) 
    { 
     if($key === false) 
     { 
      foreach($this->sessions as $key => $session) 
      { 
       if($opt) 
        $info[] = curl_getinfo($this->sessions[$key], $opt); 
       else 
        $info[] = curl_getinfo($this->sessions[$key]); 
      } 
     } 
     else 
     { 
      if($opt) 
       $info[] = curl_getinfo($this->sessions[$key], $opt); 
      else 
       $info[] = curl_getinfo($this->sessions[$key]); 
     } 

     return $info; 
    } 

    /** 
    * Returns an array of errors 
    * @param $key int, optional session key to retun error on 
    * @return array of error messages 
    */ 
    public function error($key = false) 
    { 
     if($key === false) 
     { 
      foreach($this->sessions as $session) 
       $errors[] = curl_error($session); 
     } 
     else 
      $errors[] = curl_error($this->sessions[$key]); 

     return $errors; 
    } 

    /** 
    * Returns an array of session error numbers 
    * @param $key int, optional session key to retun error on 
    * @return array of error codes 
    */ 
    public function errorNo($key = false) 
    { 
     if($key === false) 
     { 
      foreach($this->sessions as $session) 
       $errors[] = curl_errno($session); 
     } 
     else 
      $errors[] = curl_errno($this->sessions[$key]); 

     return $errors; 
    } 

} 

?> 

结果是:

cUrl作者定时:3.5252928733826

多cUrl作者:时间:0.63891220092773

Array 
(
    [0] => Array 
     (
      [0] => 200 
     ) 

    [1] => Array 
     (
      [0] => 200 
     ) 

    [2] => Array 
     (
      [0] => 200 
     ) 

    [3] => Array 
     (
      [0] => 200 
     ) 

    [4] => Array 
     (
      [0] => 200 
     ) 

    [5] => Array 
     (
      [0] => 200 
     ) 

    [6] => Array 
     (
      [0] => 200 
     ) 

    [7] => Array 
     (
      [0] => 200 
     ) 

    [8] => Array 
     (
      [0] => 200 
     ) 

    [9] => Array 
     (
      [0] => 200 
     ) 

) 

如果我可以帮你,我会感到非常高兴。

+0

现在好多了,+1。你想放弃其他答案吗? – zerkms

+0

我删除了我的其他答案,不误导用户 –

+0

非常感谢你的兄弟。 * E-拥抱* – George