2017-05-14 31 views
0
**keyword = '' 

'''To obtain keyword''' 
def test():`enter code here` 
    keywords = list() 
    while True: 
     print('what do you want to do?(a: add a key word for searching, q:quit adding words and start)') 
     command = input('command:') 
     if command == 'a': 
      word = input('keyword: ') 
      if word not in keywords: 
       keywords.append(word) 
     elif command == 'q': 
      break 
     else: 
      print('please input a valid command') 
    if len(keywords) == 0: 
     return 
    search_string = '' 
    for keyword in keywords: 
     search_string += keyword 
     search_string += '+' 
    search_string = search_string[:-1] 
    print(search_string) 

    search_url = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-' \ 
       'bool.html&r=0&f=S&l=50&TERM1=' + search_string + '&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT' 

    return search_url 
'''Incoming url start the scrapy crawle''' 
class Uspto(scrapy.Spider): 
    name = 'uspto' 
    #allowed_domains = ['http://patft.uspto.gov/'] 
    #start_url = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=0&f=S&l=50&TERM1=water&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT' 

    allowed_domains = ["http://patft.uspto.gov"] 
    keyword = test() 

    start_urls = [ 
     #"http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=0&f=S&l=50&TERM1=python&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT", 
     keyword, 

    ] 
** 

enter image description here错误截图无法获得关键字蟒蛇scrapy履带

从键盘输入的关键字,然后启动爬行,现在的问题是我的关键字方法问题访问

回答

0

根据您的错误信息NameError: name 'a' is not defined,似乎你正在使用的不是蟒蟒3 2,如果是的话,使用raw_input()代替input()

command = raw_input("commands:") 

raw_input()返回用户输入的字符串,你可以参考this answer了解inputraw_input更多细节。

+0

非常感谢您回答我的问题。这真的是一个版本问题。谢谢 – Matcha00

+0

如果是解决方案,请将其标记为接受答案,谢谢:) –