我以phantomjs方法结束。 Ruby脚本遍历数据库表,然后调用这个phantomjs脚本表示URL
每个记录这是phantomjs脚本
var page = require('webpage').create(),
system = require('system'),
address,
isScript = false;
var fs = require('fs');
// main
analizePage(system.args[1]);
//open page.
//onResourceRequested event, compares domain of each one with 'my.domain.net'
//append to a log file: -1 for failed url, 1 for script presence, 0 for no script presence
function analizePage(address){
page.open(address, function (status) {
if (status !== 'success') {
console.log('FAIL to load the address ' + address);
fileWriter(-1, address);
}
\t else
\t {
\t \t if (!isScript){
\t \t \t fileWriter(0, address);
\t \t }
\t \t else
\t \t {
\t \t \t fileWriter(1, address);
\t \t }
\t \t console.log('Has script: ' + isScript);
\t }
\t phantom.exit(0);
\t });
\t page.onResourceRequested = function (req) {
\t \t \t try {
\t \t \t \t var link = document.createElement('a');
\t \t \t \t link.setAttribute('href', req.url); //extract asset's domain from URL
\t \t \t \t if (link.hostname == 'my.domain.net') {
\t \t \t \t \t isScript = true;
\t \t \t \t }
\t \t \t } catch(e) {
\t \t \t \t console.log("PAGE OPEN ERROR: " + e);
\t \t \t }
\t };
}
function fileWriter(type, line){
\t try {
\t \t fs.write("scriptlog.csv", type + ',' + line + ',' + Date.now() + ',' + system.args[2] + '\n', 'a');
\t \t } catch(e) {
\t \t console.log("FILE ERROR: " + e);
\t \t }
}