from urllib.request import Request, urlopen, urlretrieve
from bs4 import BeautifulSoup
def save_picture(self, word):
search_string = "https://www.google.nl/search?q={}&tbm=isch&tbs=isz:m".format(word)
request = Request(search_string, headers={'User-Agent': 'Mozilla/5.0'})
raw_website = urlopen(request).read()
soup = BeautifulSoup(raw_website, "html.parser")
image = soup.find("img").get("src")
urlretrieve(image, "{}.jpg".format(word))
的失败的urlopen我写上面的函数从谷歌图片保存第一TUMBNAIL图像。然而,问题是,当我输入一个非ansii字时会失败,例如:mañanaurllib.request里的Unicode字符串
错误消息来自urllib模块内。我使用python 3.6
Traceback (most recent call last): File "c:\users\xxx\Desktop\script.py", line 19, in main() File "c:\users\xxx\Desktop\script.py", line 16, in main save_picture("mañana") File "c:\users\xxx\Desktop\script.py", line 8, in save_picture raw_website = urlopen(request).read() File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen return opener.open(url, data, timeout) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 526, in open response = self._open(req, data) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 544, in _open '_open', req) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain result = func(*args) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1361, in https_open context=self._context, check_hostname=self._check_hostname) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1318, in do_open encode_chunked=req.has_header('Transfer-encoding')) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1239, in request self._send_request(method, url, body, headers, encode_chunked) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1250, in _send_request self.putrequest(method, url, **skips) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1117, in putrequest self._output(request.encode('ascii')) UnicodeEncodeError: 'ascii' codec can't encode character '\xf1' in position 16: ordinal not in range(128)
编辑:读了之后我才发现有这个任务,urllib的,urllib2的和请求几个库(也通过PIP:urllib3)。我得到这个错误,因为我正在使用折旧的库吗?
EDIT2:添加了完整的追溯
发布完整的回溯,所以我们有上下文。 –