0
class Main:
PROJECT_NAME = 'something'
HOMEPAGE = 'something'
DOMAIN_NAME = get_domain_name(HOMEPAGE)
QUEUE_FILE = PROJECT_NAME + '/queue.txt'
CRAWLED_FILE = PROJECT_NAME + '/crawled.txt'
DATA_FILE = PROJECT_NAME + '/data.txt'
NUMBER_OF_THREADS = 20
queue = Queue()
Spider(PROJECT_NAME, HOMEPAGE, DOMAIN_NAME)
# Create worker threads (will die when main exits)
def create_workers(self):
for _ in range(self.NUMBER_OF_THREADS):
t = self.threading.Thread(target=self.work)
t.daemon = True
t.start()
# Do the next job in the queue
def work(self):
while True:
url = self.queue.get()
Spider.crawl_page(self.threading.current_thread().name, url)
self.queue.task_done()
# Each queued link is a new job
def create_jobs(self):
for link in self.file_to_set(self.QUEUE_FILE):
self.queue.put(link)
self.queue.join()
self.crawl()
# Check if there are items in the queue, if so crawl them
def crawl(self):
queued_links = self.file_to_set(self.QUEUE_FILE)
if len(queued_links) > 0:
print(str(len(queued_links)) + ' links in the queue')
self.create_jobs()
create_workers()
crawl()
以上是我的代码。我已经收到:Python-名称''未定义
NameError: name 'create_workers' is not defined and NameError: name 'crawl' is not defined.
任何帮助或建议初学者在这里?
太谢谢你了!它完美的工作! – NewbieCoder