用Phantomjs刮掉React站点

我正在使用Nodejs中的PhantomJS来刮取使用React组件的网站。用Phantomjs刮掉React站点

有了这个：https://github.com/amir20/phantomjs-node

下面是代码：

phantom.create().then(ph => { 
    _ph = ph; 
    return _ph.createPage(); 
}).then(page => { 
    _page = page; 
    return _page.open(url); 
}).then(status => { 
    return _page.property('content'); 
}).then(content => { 
    console.log(content); 
    _page.close(); 
    _ph.exit(); 
}).catch(e => console.log(e));

问题是反应内容不呈现，它只是说："在实际反应成分应该被加载。

如何取消呈现的反应组件？我最初从纯粹的节点请求解决方案切换到PhantomJS来解决这个问题，但现在我陷入困境。

UPDATE：

所以我没有真正的解决办法呢。我切换到NightmareJS（https://github.com/segmentio/nightmare），它有一个不错的.wait('.some-selector')函数，该函数等待指定的选择器加载。这解决了我动态加载的反应组件的问题。

来源

2016-11-05 DennisKo

此包是否支持接收page.onError回调？有没有错误？ – Vaviloff

是的，有'console.log（status）;'并且它返回成功。我得到整个HTML内容，除了反应组件，我得到了'<！ - react-empty：1 - >' – DennisKo

我怀疑'status'，如果来自'page.open'回调，并且您需要检查页面中的错误.onError回调。可能是这样的：[为什么我不能使用PhantomJS渲染我的ReactJS应用程序]（http://stackoverflow.com/questions/38469005/why-i-am-not-able-to-render-my-reactjs -application-using-phantomjs-2-1-1） – Vaviloff

我想你应该等待在页面加载后渲染页面上的反应元素。下面是使用Q承诺的这种等待功能的一个例子。该函数返回一个承诺并每隔50ms检查页面状态。如果达到所需的页面状态，则该功能解决该承诺。在超时的情况下，该功能拒绝承诺。

var phantom = require('phantom'); 
var Q = require('q'); 
var _ph, _page, _outObj; 
var url = 'https://tech.yandex.ru/maps/jsbox/'; 

phantom.create().then(ph => { 
    _ph = ph; 
    return _ph.createPage(); 
}).then(page => { 
    _page = page; 
    return _page.open(url); 
}).then(status => { 
    console.log(status); 
    return waitState(textPopulated, 3); 
}).then(() => { 
    return _page.property('content'); 
}).then(content => { 
    console.log(content); 
_page.close(); 
_ph.exit(); 
}).catch(e => console.log(e)); 

function textPopulated() { 
    return _page.evaluate(function() { 
     var layer = document.querySelector('.ace_text-layer'); 
     return layer && layer.childElementCount; 
    }).then(function(childElementCount) { 
     console.log('childElementCount: ' + childElementCount); 
     return childElementCount > 0; 
    }); 
} 

function waitState(state, timeout) { // timeout in seconds is optional 
    console.log('Start waiting for state: ' + state.name); 

    var limitTime = timeout * 1000 || 20000; 
    var startTime = new Date(); 

    return wait(); 

    function wait() { 
     return state().then(function(result) { 
      if (result) { 
       console.log('Reached state: ' + state.name); 
       return; 
      } else if (new Date() - startTime > limitTime) { 
       var errorMessage = 'Timeout state: ' + state.name; 
       console.log(errorMessage); 
       throw new Error(errorMessage); 
      } else { 
       return Q.delay(50).then(wait); 
      } 
     }).catch(function(error) { 
      throw error; 
     }); 
    } 
}

来源

2016-11-05 21:38:40

请问，如何使用node.js桥连接到PhantomJS的问题？ – Vaviloff

@Vaviloff我采用了桥梁的例子 –

对不起，我无法测试你的解决方案。我切换到NightmareJS（看我更新的问题）。这大大简化了我的问题。 – DennisKo

用Phantomjs刮掉React站点

回答

相关问题