尝试使用Queue()
和enumerate
存储顺序码。
import threading
import requests
import Queue
class UrlReader(threading.Thread):
def __init__(self, queue, output):
super(UrlReader, self).__init__()
self.setDaemon = True
self.queue = queue
self.output = output
def run(self):
while True:
try:
target = self.queue.get(block=False)
data = requests.get(target[1])
print data.status_code
if data.status_code == 200:
self.queue.task_done()
self.output.put((data.url, target[0]), block=False)
else:
self.queue.task_done()
self.queue.put(target)
except Queue.Empty:
break
except requests.exceptions.ConnectionError:
self.queue.task_done()
self.queue.put(target)
def load(urlrange, num_threads):
mainqueue = Queue.Queue()
outq = Queue.Queue()
mythreads = []
for url in urlrange:
mainqueue.put(url)
for j in xrange(num_threads):
mythreads.append(UrlReader(mainqueue, outq))
mythreads[-1].start()
mainqueue.join()
for j in xrange(num_threads):
mythreads.append(UrlReader(mainqueue, outq))
mythreads[j].join()
return list(outq.__dict__['queue'])
urls = ["http://google.com","http://example.com","http://yahoo.com","http://linkedin.com","http://orkut.com","http://quora.com","http://facebook.com","http://myspace.com","http://gmail.com","http://nltk.org","http://cyber.com"]
print load(enumerate(urls), 10)
>>> [(6, 'http://facebook.com'), (9, 'http://nltk.org'), (0, 'http://google.com'), (1, 'http://example.com'), (2, 'http://yahoo.com'), (3, 'http://linkedin.com'), (4, 'http://orkut.com'), (5, 'http://quora.com'), (7, 'http://myspace.com'), (8, 'http://gmail.com'), (10, 'http://cyber.com')]
请你这个映射为了得到,因为我无法理解? – Mounarajan
更新了应该帮助的答案。 – nehemiah