2016-07-07 35 views
1

我一直在努力与大部分时间。简而言之,我试图通过Node.js模块与PhantomJS一起登录亚马逊。我的问题的简短版本是,亚马逊给我一个消息,说需要cookies来使用该网站。PhantomJS永久Cookie和Javascript

这里是我目前的资源......

NPM's phantom module

Working example of logging into Amazon using PhantomJS

SO question addressing persistent cookies in PhantomJS

Another SO question about cookies set by Javascript

这最后一个问题是特别有趣,因为第一个答案解决了用户代理(我已经尝试过至少3次或4结果相同),而第二个答案指向我认为可能是我的问题。总之,亚马逊可能试图通过javascript设置测试cookie,然后检查cookie是否设置成功,以确定用户是否允许cookie。我可以成功地确认我的cookie文件正在创建,并且亚马逊已经在文件中设置了cookie,但是当提交登录表单时显然似乎不够用,因为在下一页我被cookie警告阻止。这让我相信最后一个问题中的用户是正确的 - 我的网页的Javascript并没有被解雇,尽管试图确保它是。

最后,我的page.render显示了一条亚马逊消息,说我需要启用cookie才能继续。这里是我的代码...

'use strict'; 

/** 
* Module dependencies. 
*/ 
var mongoose = require('mongoose'), 
    phantom = require('phantom'), 
    // Admin = mongoose.model('Admin'), 
    Item = mongoose.model('Item'), 
    config = require('../config/config'); 


/* 
* Check function. 
*/ 
module.exports.check= function() { 
    var loadInProgress = false, 
    interval = '', 
    testindex = 0, 
    cookiePath = __dirname + 'cookies.txt', 
    url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome', 
    tag = config.defaultAffiliateTag, 
    periodType = 'preSelected', 
    preSelectedPeriod = 'yesterday', 
    // url2 is for order data 
    url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX', 
    // url3 is for earnings data 
    url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX'; 

    phantom.create([/* '--debug=true', */ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false', '--cookies-file=' + cookiePath]).then(function(ph) { 
    ph.createPage().then(function(page) { 

     page.on('onLoadStarted', function() { 
     loadInProgress = true; 
     }); 

     page.on('onLoadFinished', function(response) { 
     if (response === 'success') { 
      loadInProgress = false; 
     } else { 
      console.log('Phantom page failed to load.'); 
     } 
     }); 

     page.on('onError', function(msg, trace) { 
     var msgStack = ['ERROR: ' + msg]; 
     if (trace && trace.length) { 
      msgStack.push('TRACE:'); 
      trace.forEach(function(t) { 
      msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : '')); 
      }); 
     } 
     console.error(msgStack.join('\n')); 
     }); 

     page.on('onResourceError', function(resourceError) { 
     console.log('= onResourceError()'); 
     console.log(' - unable to load url: "' + resourceError.url + '"'); 
     console.log(' - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString); 
     loadInProgress = false; 
     }); 

     var steps = [ 
     // Step 1 
     function() { 
      // Load the initial login page. 
      console.log('--- JAVASCRIPT ---') 

      // This is where I try to ensure my page has Javascript Enabled. 
      // val outputs true here. 
      page.setting('javascriptEnabled').then(function(val) { 
      console.log('val: ' + val); 
      page.setting('settings.userAgent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'); 
      loadInProgress = true; 
      page.open(url); 
      }) 
     }, 
     // Step 2 
     function() { 
      // Update username/password. 
      page.evaluate(function() { 
      document.getElementById('ap_email').value = 'XXXX'; 
      document.getElementById('ap_password').value = 'XXXX'; 
      }); 
     }, 
     // Step 3 
     function() { 
      // Login. 
      loadInProgress = true; 
      page.evaluate(function() { 
      document.forms['signIn'].submit(); 
      }); 
     }, 
     // Step 4 
     function() { 
      loadInProgress = true; 
      page.open(url2); 
     } 
     ]; 

     var interval = setInterval(function() { 
     if (!loadInProgress && typeof steps[testindex] === 'function') { 
      steps[testindex](); 
      console.log('Test Index: ' + (testindex + 1)); 
      page.render('config/images/step' + (testindex + 1) + '.png'); 
      testindex++; 
     } 
     if (typeof steps[testindex] !== 'function') { 
      clearInterval(interval); 
      setTimeout(function() { 
      ph.exit(); 
      }, 5000); 
     } 
     }, 50); 
    }); 
    }); 
}; 

我得到什么作为这样的结果是输出如下:

--- JAVASCRIPT --- 
    Test Index: 1 
    val: true 
    Test Index: 2 
    Test Index: 3 
    Test Index: 4 
    = onResourceError() 
     - unable to load url: "https://sentry.amazon.com/SSO/redirect?response_typ 
e=id_token&client_id=affiliate-program.amazon.com%3A443&redirect_uri=https%3A%2F 
%2Faffiliate-program.amazon.com%3A443%2Fhome%2Freports%2Ftable.json%3Fquery%255B 
type%255D%3Dorders%26query%255Bstart_date%255D%3D2016-05-28%26query%255Bend_date 
%255D%3D2016-06-26%26query%255Btag_id%255D%3D189318233%26query%255Bdevice_type%2 
55D%3Dall%26query%255Blast_accessed_row_index%255D%3D0%26query%255Bcolumns%255D% 
3Dtitle%252Casin%252Ccategory%252Cclicks%252Cconversion%252Cseller%252Cdqty%252C 
nqty%252Cqty%26query%255Bskip%255D%3D0%26query%255Bsort%255D%3Dasin%26query%255B 
limit%255D%3D25%26store_id%3XXXX&scope=openid&nonce=5d8a3f10bb3746c799 
a05a927b0204f3c0629d5c8c5646bb49ccdcd93f07247e&sentry_handler_version=TomcatSSOF 
ilter-1.1-1" 
     - error code: 5, description: Operation canceled 
    Phantom page failed to load. 

任何人都可以开导我,我可能会错过了什么?

回答

1

它似乎是PhantomJS 2.1.1(由NPM模块实现的版本)或NPM模块本身的问题。

我完全用Horseman和PhantomJS 2.0.0重写了这个脚本,并立即开始工作。对于未来的后代,以下是工作实施。我只有一天进入骑士阶段,我已经喜欢比我用过的任何其他Phantom包装更干净的链式执行。

'use strict'; 

/** 
* Module dependencies. 
*/ 
var mongoose = require('mongoose'), 
    Horseman = require('node-horseman'), 
    phPath = __dirname + '\\phantomjs-2.0.0-windows\\bin\\phantomjs.exe', 
    Item = mongoose.model('Item'), 
    config = require('../config/config'); 


/* 
* Check function. 
*/ 
module.exports.updateItems = function() { 
    var cookiePath = __dirname + 'cookies.txt', 
    url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome', 
    tag = config.defaultAffiliateTag, 
    periodType = 'preSelected', 
    preSelectedPeriod = 'yesterday', 
    // url2 is for order data 
    url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX', 
    // url3 is for earnings data 
    url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX'; 

    var horseman = new Horseman({ 
    cookiesFile: cookiePath, 
    ignoreSSLErrors: true, 
    sslProtocol: 'any', 
    webSecurity: false, 
    timeout: 15000, 
    phantomPath: phPath 
    }); 

    horseman 
    .userAgent('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36') 
    .authentication('XXXX', 'XXXX') 
    .on('consoleMessage', function(msg) { 
     console.log(msg); 
    }) 
    .on('error', function(msg, trace) { 
     var msgStack = ['ERROR: ' + msg]; 
     if (trace && trace.length) { 
     msgStack.push('TRACE:'); 
     trace.forEach(function(t) { 
      msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : '')); 
     }); 
     } 
     console.error(msgStack.join('\n')); 
    }) 
    .open(url) 
    .screenshot('config/images/step1.png') 
    .waitForSelector('#ap_email') 
    .value('#ap_email', 'XXXX') 
    .waitForSelector('#ap_password') 
    .value('#ap_password', 'XXXX') 
    .screenshot('config/images/step2.png') 
    .click('#signInSubmit') 
    .waitForNextPage() 
    .screenshot('config/images/step3.png') 
    .open(url2) 
    .screenshot('config/images/step4.png') 
    .plainText() 
    .then(function(txt) { 
     console.log('Page results: '); 
     console.dir(txt); 
     return; 
    }) 
    .open(url3) 
    .screenshot('config/images/step5.png') 
    .plainText() 
    .then(function(txt) { 
     console.log('Page results: '); 
     console.dir(txt); 
     return; 
    }) 
    .close(); 
}; 

祝你好运!

+0

有趣!如果必须有逻辑判断/逻辑判断,那么人们如何处理Horseman链接脚本? – Vaviloff

+1

@Vaviloff很好的问题。 Horseman拥有一个'do'函数(https://github.com/johntitus/node-horseman#dofn),它允许您在不破坏链条的情况下运行任意函数。据我了解,结果传递给链中的下一个函数,所以你可以运行'.do(function(){return stuff;})。然后(function(stuffFromDo){return moreStuff;});'Pretty光滑,如果你问我。 – aikorei

0

我最近面临同样的问题,简单的解决方案是将用户添加到新创建的网页。如果您使用的是phantomjs-node模块,那么这里是代码。

page.setting("userAgent", "your user agent here");