2017-03-11 55 views
1
from bs4 import BeautifulSoup 
import requests 
import time 

urls = ['http://www.soku.com/search_playlist/q_python_orderby_1_limitdate_0?site=14&page={}&spm=a2h0k.8191403.0.00'.format(str(i)) for i in range(1,30,1)] 

def UUrl(urls): 

    def Url(url): 
     single_urls = [] 
     time.sleep(1) 
     wb_data = requests.get(url) 
     soup = BeautifulSoup(wb_data.text,'lxml') 
     for single_urls in soup.find_all(class_ = "album_tit"): 
      single_url = (single_urls.a.get('href')) 
      return single_url 
      # print(single_url) 

    for url in urls: 
     Url(url) 

def get_url_title(urls,data = None): 
    urlsss = UUrl(urls) 
    for surl in urlsss: 
     wb_data = requests.get(surl) 
     soup = BeautifulSoup(wb_data.text,'lxml') 
     urlss = soup.find_all(class_="title short-title") 
     titles = soup.find_all(class_="title short-title") 

     for t_url,title in zip(urlss,titles): 
      data = { 
       'title':title.get_text(), 
       'url': (t_url.a.get('href')) 
      } 
      print(data) 

get_url_title(urls) 

回答

1

这意味着你正在遍历一个空值。 soup.findall函数可能没有返回结果。如果发生这种情况,函数返回非类型,就像python的null。然后你试图对不存在的东西进行循环。在你的代码中有几个区域可能会引发这个错误,但基本上它只是意味着for循环中的表达式IN后面的变量没有任何价值。你可以做一个。如果soup.find_all(class_ =“album_tit”)是NoneType:print(“find all function not returns a value”)