0
当前我正试图解析此维基页面上的所有表格。然而,正如你可以通过我的代码所告诉的那样,我只回收一张表。我正在寻找所有的表格,并把它们放在适当的列/行中。在Wiki中解析多个表格
下面是我的代码,我对接下来需要做的事情有些遗憾。
import csv
import urllib
import requests
import codecs
import re
from bs4 import BeautifulSoup
url = \
'https://en.wikipedia.org/wiki/List_of_school_shootings_in_the_United_States'
response = requests.get(url)
html = response.content
#remove references Brackets
removeBrackets = re.sub(r'\[.*\]', '', html)
#remove Trailing 0's in numbers
removeTrails = removeBrackets.replace('0,000,001','')
soup = BeautifulSoup(removeTrails)
table = soup.find('table', {'class': 'sortable wikitable'})
# remove all extra tags in the HTML Tables
for div in soup.findAll('span', 'sortkey'):
div.extract();
for div in soup.findAll('span', 'sorttext'):
div.extract();
#scan through table
list_of_rows = []
for row in table.findAll('tr')[1:]:
list_of_cells = []
for cell in row.findAll('td'):
text = cell.text.replace(' ', '')
list_of_cells.append(text)
list_of_rows.append(list_of_cells)
#write
outfile = open("schoolshootings.csv", "wb")
writer = csv.writer(outfile)
writer.writerow([s.encode('utf8') if type(s) is unicode else s for s in row])
writer.writerow(["Date", "Location", "Deaths", "Injuries", "Description"])
writer.writerows(list_of_rows)
哇..我无法相信我错过了。谢谢! – vikinggoesrar