2016-07-08 174 views
0

因此,我需要将房地产广告放入nidax.json文件中。我转到所有广告页面,并使用指向个别广告的链接来获取我需要的数据。我使用的是NodeJS Xray刮刀,但由于某种原因它不起作用。NodeJS Xray无法抓取到多个网站来抓取数据

有时它不会返回任何内容,有时它只返回单个广告的链接。

var Xray = require('x-ray'); 
var x= Xray(); 
x('http://nidax-nekretnine.rs/nekretnine/','div.kutija-veca_dno > div.read-more` span ',[{ 
    url: '[email protected]' 
    items: x('div.kutija-veca_dno > div.read-more > span > [email protected]', { 
    location: 'body > div.contentarea-novo > div > div.info-part > div.one-third div.osnovni-podaci > p:nth-child(2) > span.orange-text', 
}), // follow link to google images 
}]).write('nidax.json'); 

回答

0

当以下pull request正在被批准时,您可以订阅。

同时,我建议您将解决方案应用到您下载的X射线模块中。这是一行代码,我在两个项目中测试过,它很简单。看看在第237行的index.js文件,看到“返回”后长评论:

function WalkHTML (xray, selector, scope, filters) { 
    return function walkHTML ($, fn) { 
    walk(selector, function (v, k, next) { 
     if (typeof v === 'string') { 
     var value = resolve($, root(scope), v, filters) 
     return next(null, value) 
     } else if (typeof v === 'function') { 
     return v($, function (err, obj) { 
      if (err) return next(err) 
      return next(null, obj) 
     }) 
     } else if (isArray(v)) { 
     if (typeof v[0] === 'string') { 
      return next(null, resolve($, root(scope), v, filters)) 
     } else if (typeof v[0] === 'object') { 
      var $scope = $.find ? $.find(scope) : $(scope) 
      var pending = $scope.length 
      var out = [] 

      // Handle the empty result set (thanks @jenbennings!) 
      if (!pending) return next(null, out) 

      $scope.each(function (i, el) { 
      var $innerscope = $scope.eq(i) 
      var node = xray(scope, v[0]) 
      node($innerscope, function (err, obj) { 
       if (err) return next(err) 
       out[i] = obj 
       if (!--pending) { 
       return next(null, compact(out)) 
       } 
      }) 
      }) 
      // Nested crawling broken on 'master'. When to merge 'bugfix/nested-crawling' #111, Needed to exit this without calling next, the problem was that it returned to the "finished" callback before it had retrived all pending request. it should wait for "return next(null, compact(out))" 
      return 
     } 
     } 
     return next() 
    }, function (err, obj) { 
     if (err) return fn(err) 
     fn(null, obj, $) 
    }) 
    } 
}