# count is an iterator that just keeps going
# from itertools import count
# but I'm not going to use it, because you want to set a reasonable limit
# otherwise you'll loop endlessly if your end condition fails
# requests is third party but generally better than the standard libs
import requests
base_url = 'http://www.trademe.co.nz/browse/categorylistings.aspx?v=list&rptpath=4-380-50-7145-&mcatpath=sports%2fcycling%2fmountain-bikes%2ffull-suspension&page={}&sort_order=default'
for i in range(1, 30):
result = requests.get(base_url.format(i))
if result.status_code != 200:
break
content = result.content.decode('utf-8')
# Note, this is actually quite fragile
# For example, they have 2 spaces between 'no' and 'listings'
# so looking for 'no listings' would break
# for a more robust solution be more clever.
if 'Sorry, there are currently no' in content:
break
# do stuff with your content here
print(i)
怎么样的'如果犯错响应:break'其中'err'是你上面提到的错误?最有可能使用trademe API会更干净,尽管 – Geotob
我建议使用他们的API像一个好的Internet公民,而不是窃取他们的数据:http://developer.trademe.co.nz/api-terms/terms-and-条件/ – IanAuld