这是非常糟糕的,但它 “作品” 对我来说,与0xc0000022l前面回答的
进口机械化,OS 从BeautifulSoup进口BeautifulSoup 进口的urllib2
def DownloadIMGs(url): # IMPORTANT URL WITH HTTP OR HTTPS
print "From", url
dir = 'F:\Downloadss' #Dir for Downloads
basicImgFileTypes = ['png','bmp','cur','ico','gif','jpg','jpeg','psd','raw','tif']
browser = mechanize.Browser()
html = browser.open(url)
soup = BeautifulSoup(html)
image_tags = soup.findAll('img')
print "N Images:", len(image_tags)
print
#---------SAVE PATH
#check if available
if not os.path.exists(dir):
os.makedirs(dir)
#---------SAVE PATH
for image in image_tags:
#---------SAVE PATH + FILENAME (Where It is downloading)
filename = image['src']
fileExt = filename.split('.')[-1]
fileExt = fileExt[0:3]
if (fileExt in basicImgFileTypes):
print 'File Extension:', fileExt
filename = filename.replace('?', '_')
filename = os.path.join(dir, filename.split('/')[-1])
num = filename.find(fileExt) + len(fileExt)
filename = filename[:num]
else:
filename = filename.replace('?', '_')
filename = os.path.join(dir, filename.split('/')[-1]) + '.' + basicImgFileTypes[0]
print 'File Saving:', filename
#---------SAVE PATH + FILENAME (Where It is downloading)
#--------- FULL URL PATH OF THE IMG
imageUrl = image['src']
print 'IMAGE SRC:', imageUrl
if (imageUrl.find('http://') > -1 or imageUrl.find('https://') > -1):
pass
else:
if (url.find('http://') > -1):
imageUrl = url[:len('http://')]
imageUrl = 'http://' + imageUrl.split('/')[0] + image['src']
elif(url.find('https://') > -1):
imageUrl = url[:len('https://')]
imageUrl = 'https://' + imageUrl.split('/')[0] + image['src']
else:
imageUrl = image['src']
print 'IMAGE URL:', imageUrl
#--------- FULL URL PATH OF THE IMG
#--------- TRY DOWNLOAD
try:
browser.retrieve(imageUrl, filename)
print "Downloaded:", image['src'].split('/')[-1]
print
except (mechanize.HTTPError,mechanize.URLError) as e:
print "Can't Download:", image['src'].split('/')[-1]
print
pass
#--------- TRY DOWNLOAD
browser.close()
DownloadIMGs('https://stackoverflow.com/questions/15593925/downloading-a-image-using-python-mechanize')
它无法正常工作。我得到错误:“NameError:全局名称'img'未定义”。并且图像应该保存在哪里? – XVirtusX 2013-03-24 01:58:35
这里第一行“img”表示它正在寻找“”标签。将它指向包含要保存在其“src”属性中的图像的url的标记。另外,该图像将保存在与脚本相同的文件夹中,如f.write语句所示。 – 2013-03-26 19:00:18