2012-10-02 24 views
0

请问能否帮我找到造成这个进程达到500MB内存使用情况的原因。 它基本上是一个html页面下载器。 尽管这个过程是稳定的(并且没有超过这个限制),但它意味着在低性能机器上使用,我并不满意。 mysql表'Sites'的大小为170MB。 遵循脚本代码。 在此先感谢。减少php守护进程的内存使用情况

function start() { 
try { 
    global $log; 
    $db = getConnection(); 
    Zend_Db_Table::setDefaultAdapter($db); 
    $log->logInfo("logger start"); 
    while (1) { 
     $sitesTable = new Zend_Db_Table('Sites'); 
     $rowset = $sitesTable->fetchAll(); 
     foreach ($rowset as $row) { 
      if (time() >= (strtotime($row->lastUpdate) + $row->pollingHours * 60 * 60)) { 
       db_updateHtml($row); 
      } 
     } 
    } 
} catch (Exception $e) { 
    global $log; 
    $log->logError($e->getMessage()); 
} 
} 

    function db_updateHtml($siteRecord) { 
     try { 
      if ($siteRecord instanceof Zend_Db_Table_Row) { 
       $rowwithConnection = $siteRecord; 
       $url = $siteRecord->url; 

       $idSite = $siteRecord->idSite; 
       $crawler = new Crawler(); 
       $sitesTable = new Zend_Db_Table('Sites'); 
       //$rowwithConnection = $sitesTable->fetchRow(
        //  $sitesTable->select()->where('idSite = ?', $idSite)); 
       $newHtml = HtmlDbEncode($crawler->get_web_page($url)); 


       if (strlen($newHtml) < 10) { 
        global $log; 
        $log->logError("Download failed for: url: $url \t idsite: $idSite "); 
       } 
       if ($rowwithConnection->isChecked != 0) { 
        $rowwithConnection->oldHtml = $rowwithConnection->newHtml; 
        $rowwithConnection->isChecked = 0; 
       } 
       $rowwithConnection->newHtml = $crawler->get_web_page($url); 
       $rowwithConnection->lastUpdate = date("Y-m-d H:i:s"); 
       //$rowwithConnection->diffHtml = getDiff($rowwithConnection->oldHtml, $rowwithConnection->newHtml, false, $rowwithConnection->minLengthChange); 
       $rowwithConnection->diffHtml = getDiffFromRecord($rowwithConnection, false, $rowwithConnection->minLengthChange); 
       /* if (strlen($rowwithConnection->diffHtml) > 30) { 
        $rowwithConnection->lastChanged = $rowwithConnection->lastUpdate; 
        } */ 
       $rowwithConnection->save(); 
      } else { 
       $log->logCrit("siteRecord is uninitialized"); 
      } 
     } catch (Exception $e) { 
      global $log; 
      $log->logError($e->getMessage()); 
     } 
    } 

    function getDiffFromRecord($row, $force = false, $minLengthChange = 100) { 
     if ($row instanceof Zend_Db_Table_Row) { 
      require_once '/var/www/diff/library/finediff.php'; 
      include_once '/var/www/diff/library/Text/Diff.php'; 
      $diff = new AndreaDiff(); 
      $differences = $diff->getDiff($row->oldHtml, $row->newHtml); 
      if ($diff->isChanged($minLengthChange) || $force) { 
       $row->lastChanged = $row->lastUpdate; 
       $row->isChecked = false; 
       return ($differences); 
      } 
     } 
     return null; 
    } 

    function getConnection() { 
     try { 
      $pdoParams = array(
       PDO::MYSQL_ATTR_USE_BUFFERED_QUERY => true 
      ); 
      $db = new Zend_Db_Adapter_Pdo_Mysql(array(
         'host' => '127.0.0.1', 
         'username' => 'root', 
         'password' => 'administrator', 
         'dbname' => 'diff', 
         'driver_options' => $pdoParams 
        )); 
      return $db; 
     } catch (Exception $e) { 
      global $log; 
      $log->logError($e->getMessage()); 
     } 

    } 

回答

0

1)尽量使用提取方法,不使用fetchall:

foreach($sitesTable->fetch() as $row){ 
    //... 
} 

2)尽量没有设置所有的变量,商店的HTML代码(如果您将其保存在内存中),在最后一次迭代我假设变量$rowwithConnection里面会有html代码。

当我想要个人档案php应用程序我使用xhprof它会为您节省很多时间。祝你好运!