2017-05-03 191 views
0

看到更多的,直到最后我需要从这个链接http://www.biography.com/people点击使用硒蟒蛇

所以我用硒与蟒蛇按“看多”提取的文章,直到下载所有的人piography所以这是我的代码

from selenium import webdriver 
from selenium.common.exceptions import TimeoutException 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support import expected_conditions as EC 
chrome_path = r"./chromedriver" 
driver = webdriver.Chrome(chrome_path) 
driver.get("http://www.biography.com/people") 
while(True): 
    try: 
     element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(@class, 'm-component-footer--loader') and contains(@class, 'm-button')]"))) 
     element.click() 
    except TimeoutException:   
     break 

但问题此代码有时工作,并争取一次。之后给我这个例外。

Traceback (most recent call last): 
    File "sel.py", line 17, in <module> 
    element.click() 
    File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 77, in click 
    self._execute(Command.CLICK_ELEMENT) 
    File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 493, in _execute 
    return self._parent.execute(command, params) 
    File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 252, in execute 
    self.error_handler.check_response(response) 
    File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response 
    raise exception_class(message, screen, stacktrace) 
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <button class="m-component-footer--loader m-button" ng-show="properties.collection.hasMoreItems || loading" ng-click="buttonPressed()" phx-track-event="" phx-track-id="load more" ng-class="{'is-inverted': properties.background.inverted}" tabindex="0" aria-hidden="false">...</button> is not clickable at point (459, 883). Other element would receive the click: <div class="kskdDiv" style="position: fixed; overflow: hidden; height: 90px !important; z-index: 2000000; width: 728px !important;-webkit-transform-origin:0 100%;-moz-transform-origin:0 100%;-ms-transform-origin:0 100%;-o-transform-origin:0 100%;transform-origin:0 100%;left:50%;bottom:0px;-webkit-transform:scale(1) translateX(-50%);-moz-transform:scale(1) translateX(-50%);-ms-transform:scale(1) translateX(-50%);-o-transform:scale(1) translateX(-50%);transform:scale(1) translateX(-50%)" data-kiosked-role="boundary">...</div> 
    (Session info: chrome=58.0.3029.81) 
    (Driver info: chromedriver=2.29.461571 (8a88bbe0775e2a23afda0ceaf2ef7ee74e822cc5),platform=Linux 4.8.0-49-generic x86_64) 

编辑1: 当我改变从20时迫不及待地9999999999999.这是下载4页,之后抛出同样的错误。

+0

为什么你不能尝试Web抓取?更多细节:http://www.pythonforbeginners.com/python-on-the-web/web-scraping-with-beautifulsoup – rcubefather

回答

0

我更喜欢使用CSS选择

from selenium import webdriver 
import time 

chrome_path = r"./chromedriver" 
driver = webdriver.Chrome(chrome_path) 
driver.get("http://www.biography.com/people") 
click_more = True 
while click_more: 
    time.sleep(1) 
    element = driver.find_element_by_css_selector('button.m-component-footer--loader') 
    if element: 
     element.click() 
    else: 
     click_more = False 
+0

它与你一起工作吗?因为我试过你的代码。并在第二页后抛出异常。 – user1927468

+0

按1次点击后,您的意思是第二页?是的,它适用于我..唯一的区别是我使用的是Firefox驱动程序。什么是例外......也许增加睡眠时间。 – Tobey

0

这对我来说 工作点的负荷就会消失,然后点击按钮.... customdriver.waitForElementInvisible(loading_loc) customdriver.clickElementsBySendKey(按钮, “\ n”)

from selenium import webdriver 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.wait import WebDriverWait 
from selenium.common.exceptions import NoSuchElementException 
from selenium.common.exceptions import NoAlertPresentException 
from selenium.webdriver.common.action_chains import ActionChains 
from selenium.webdriver.support import expected_conditions as EC 

from selenium.webdriver.chrome.options import Options 
from selenium.webdriver.support.ui import Select 


class SeleniumBaseClass(object): 
    def __init__(self,driver): 
     self.driver = driver 
    def open(self,URL): 
     self.driver.get(URL) 
    def driverURLChange(self,URL): 
     print("change URL" + URL) 
     self.driver.get(URL) 
    def currentUrl(self): 
     print("URL " + self.driver.current_url) 
     return self.driver.current_url 

    def locateElement(self, loc): 
     try: 
      print(loc) 
      element = WebDriverWait(self.driver,10).until(EC.visibility_of_element_located(loc)) 
      return element 
     except: 
      print ("cannot find {0} element".format(loc)) 
     return None 

    def waitForElementInvisible(self,loc): 
     #load-spinner 
     try: 
      element = WebDriverWait(self.driver,10).until(EC.invisibility_of_element_located(loc)) 
      return True 
     except: 
      print ("cannot invisibility_of_element {0} element".format(loc)) 
     return False    

    def send_key_with_Element(self,loc,value): 
     self.locateElement(loc).clear() 
     self.locateElement(loc).send_keys(value) 
    def click_with_Element(self,loc): 
     self.locateElement(loc).click() 
    def clickElementsBySendKey(self,loc,value): 
     self.locateElement(loc).send_keys(value) 




customdriver = SeleniumBaseClass(webdriver.Chrome()) 

customdriver.open("http://www.biography.com/people") 
button = (By.XPATH,"//button[contains(@class, 'm-component-footer--loader')]") 
loading_loc = (By.XPATH,"//div[@aria-hidden='true' and @translate='FEED_LOADING']") 

while True: 
    customdriver.waitForElementInvisible(loading_loc) 
    customdriver.clickElementsBySendKey(button,"\n")