问题与按钮命令Tkinter Python

所以我有这个程序搜索年度报告（10-K）SEC SEC埃德加数据库，并返回列表框中的40个不同项目的列表。好吧，我想创建一个显示在列表框中的下一个40个项目，其中下面的代码完成了“未来40”按钮：问题与按钮命令Tkinter Python

def Next(): 

global entryWidget 

page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany' 
sock = urllib.urlopen(page) 
raw = sock.read() 
soup = BeautifulSoup(raw) 

npar = str(soup.find(value="Next 40")) 
index = npar.find('/cgi') 
index2 = npar.find('count=40') + len('count=40') 
nextpage = 'http://www.sec.gov' + npar[index:index2] 

sock2 = urllib.urlopen(nextpage) 
raw2 = sock2.read() 
soup2 = BeautifulSoup(raw2) 

psoup = str(soup2.findAll(nowrap=True)) 

myparser = MyParser() 
myparser.parse(psoup) 

filinglist = myparser.get_descriptions() 
linklist = myparser.get_hyperlinks() 

filinglist = [s for s in filinglist if s != 'Documents'] 
filinglist = [s for s in filinglist if s != 'Documents Interactive Data'] 
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)] 

linklist = [s for s in linklist if not s.startswith('/cgi-')] 

Lb1.delete(0, END) 

counter = 0 

while counter < len(filinglist): 
    Lb1.insert(counter, filinglist[counter]) 
    counter = counter +1

当按下按钮时，你可以看到，它读取原始链接（页面）而不是在html网站（页面）上查找“Next 40”超链接。然后解析新的html文档（nextpage），然后获取项目名称和关联的链接。现在，此代码成功从原始页面转到下一页，但它只能显示下一页。

那么我怎么能够使（nextpage）进入原始（页面），然后能够列出（nextnextpage）html文件中的项目，每次我按下'下一步'按钮？对不起，如果这是令人困惑的，我真的不知道任何其他方式来解释它。

欲了解更多的解释，这里是我想要解析的实际站点链接：http://www.sec.gov/cgi-bin/browse-edgar ... getcompany 我想要'下一步'按钮来继续检索该网站'下一个40'按钮的HTML超链接。

这里是我的情况下，整个程序代码，您需要：

import BeautifulSoup 
from BeautifulSoup import BeautifulSoup 
import urllib 
import sgmllib 
from Tkinter import * 
import tkMessageBox 
import re 

class MyParser(sgmllib.SGMLParser): 

def parse(self, psoup): 
    self.feed(psoup) 
    self.close() 

def __init__(self, verbose=0): 
    sgmllib.SGMLParser.__init__(self, verbose) 
    self.descriptions = [] 
    self.hyperlinks = [] 
    self.inside_td_element = 0 
    self.starting_description = 0 

def start_td(self, attributes): 
    for name, value in attributes: 
     if name == "nowrap": 
      self.inside_td_element = 1 
      self.starting_description = 1 

def end_td(self): 
    self.inside_td_element = 0 

def start_a(self, attributes): 
    for name, value in attributes: 
     if name == "href": 
      self.hyperlinks.append(value) 

def handle_data(self, data): 
    if self.inside_td_element: 
     if self.starting_description: 
      self.descriptions.append(data) 
      self.starting_description = 0 
     else: 
      self.descriptions[-1] += data 

def get_descriptions(self): 
    return self.descriptions 

def get_hyperlinks(self): 
    return self.hyperlinks 

def Submit(): 

global entryWidget 

if entryWidget.get().strip() == "": 
    tkMessageBox.showerror("Tkinter Entry Widget", "Enter a text value") 
else: 
    page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany' 
    sock = urllib.urlopen(page) 
    raw = sock.read() 
    soup = BeautifulSoup(raw) 
    psoup = str(soup.findAll(nowrap=True)) 
    myparser = MyParser() 
    myparser.parse(psoup) 

    filinglist = myparser.get_descriptions() 
    linklist = myparser.get_hyperlinks() 

    filinglist = [s for s in filinglist if s != 'Documents'] 
    filinglist = [s for s in filinglist if s != 'Documents Interactive Data'] 
    filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)] 

    linklist = [s for s in linklist if not s.startswith('/cgi-')] 

    counter = 0 

    while counter < len(filinglist): 
     Lb1.insert(counter, filinglist[counter]) 
     counter = counter +1 

    downloadbutton.configure(state=NORMAL) 
    nextbutton.configure(state=NORMAL) 

def Next(): 

global entryWidget 

page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany' 
sock = urllib.urlopen(page) 
raw = sock.read() 
soup = BeautifulSoup(raw) 

npar = str(soup.find(value="Next 40")) 
index = npar.find('/cgi') 
index2 = npar.find('count=40') + len('count=40') 
nextpage = 'http://www.sec.gov' + npar[index:index2] 

sock2 = urllib.urlopen(nextpage) 
raw2 = sock2.read() 
soup2 = BeautifulSoup(raw2) 

psoup = str(soup2.findAll(nowrap=True)) 

myparser = MyParser() 
myparser.parse(psoup) 

filinglist = myparser.get_descriptions() 
linklist = myparser.get_hyperlinks() 

filinglist = [s for s in filinglist if s != 'Documents'] 
filinglist = [s for s in filinglist if s != 'Documents Interactive Data'] 
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)] 

linklist = [s for s in linklist if not s.startswith('/cgi-')] 

Lb1.delete(0, END) 

counter = 0 

while counter < len(filinglist): 
    Lb1.insert(counter, filinglist[counter]) 
    counter = counter +1 

previousbutton.configure(state=NORMAL) 
nextbutton.configure(state=DISABLED) 

def Previous(): 

global entryWidget 

page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany' 
sock = urllib.urlopen(page) 
raw = sock.read() 
soup = BeautifulSoup(raw) 

psoup = str(soup.findAll(nowrap=True)) 

myparser = MyParser() 
myparser.parse(psoup) 

filinglist = myparser.get_descriptions() 
linklist = myparser.get_hyperlinks() 

filinglist = [s for s in filinglist if s != 'Documents'] 
filinglist = [s for s in filinglist if s != 'Documents Interactive Data'] 
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)] 

linklist = [s for s in linklist if not s.startswith('/cgi-')] 

Lb1.delete(0, END) 

counter = 0 

while counter < len(filinglist): 
    Lb1.insert(counter, filinglist[counter]) 
    counter = counter +1 

nextbutton.configure(state=NORMAL) 
previousbutton.configure(state=DISABLED) 

if __name__ == "__main__": 

root = Tk() 
root.title("SEC Edgar Search") 
root["padx"] = 10 
root["pady"] = 25 

top = Frame(root) 
bottom = Frame(root) 
bottom2 = Frame(root) 
top.pack(side=TOP) 
bottom.pack(side=BOTTOM, fill=BOTH, expand=True) 
bottom2.pack(side=BOTTOM, fill=BOTH, expand=True) 

textFrame = Frame(root) 

entryLabel = Label(textFrame) 
entryLabel["text"] = "Ticker symbol:" 
entryLabel.pack(side=TOP) 

entryWidget = Entry(textFrame) 
entryWidget["width"] = 15 
entryWidget.pack(side=LEFT) 

textFrame.pack() 

scrollbar = Scrollbar(root) 
scrollbar.pack(side=RIGHT, fill=Y) 

Lb1 = Listbox(root, width=20, height=15, yscrollcommand=scrollbar.set, selectmode=EXTENDED) 
Lb1.pack() 

scrollbar.config(command=Lb1.yview) 

submitbutton = Button(root, text="Submit", command=Submit) 
submitbutton.pack(in_=bottom2, side=TOP) 

downloadbutton = Button(root, text="Download") 
downloadbutton.pack(in_=bottom2, side=TOP) 
downloadbutton.configure(state=DISABLED) 

previousbutton = Button(root, text="Previous 40", command=Previous) 
previousbutton.pack(in_=bottom, side=LEFT) 
previousbutton.configure(state=DISABLED) 

nextbutton = Button(root, text="Next 40", command=Next) 
nextbutton.pack(in_=bottom, side=LEFT) 
nextbutton.configure(state=DISABLED) 

root.mainloop()

来源

2011-07-30 kr21

使用应用程序类，而不是全局的。目前你总是下载第一页。但是，你的应用程序类应该缓存的当前页面，该页面next使用从“下一个40”的形式按钮获得的onClick价值的“汤”：

class Application(Frame): 
    def __init__(self, parent=None): 
     Frame.__init__(self, parent) 
     self.pack() 

     self.top = Frame(self) 
     self.bottom = Frame(self) 
     self.bottom2 = Frame(self) 
     self.top.pack(side=TOP) 
     self.bottom.pack(side=BOTTOM, fill=BOTH, expand=True) 
     self.bottom2.pack(side=BOTTOM, fill=BOTH, expand=True) 
     #... 
     self.submitbutton = Button(self, text="Submit", command=self.submit) 
     self.submitbutton.pack(in_=self.bottom2, side=TOP) 
     #... 

    #... 

    def submit(self): 
     page = ('http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + 
       self.entryWidget.get().strip() + 
       '&filenum=&State=&Country=&SIC=&owner=exclude' 
       '&Find=Find+Companies&action=getcompany') 
     #... 
     self.soup = ... 

    def next(self): 
     #... 
     #there must be a better way than this to extract the onclick value 
     #but I don't use/know BeautifulSoup to help with this part 

     npar = str(self.soup.find(value="Next 40")) 
     index1 = npar.find('/cgi') 
     index2 = npar.find('count=40') + len('count=40') 
     page = 'http://www.sec.gov' + npar[index1:index2] 

     sock = urllib.urlopen(page) 
     raw = sock.read() 
     self.soup = BeautifulSoup(raw) 

     #... 

if __name__ == '__main__': 
    root = Tk() 
    root.title("SEC Edgar Search") 
    root["padx"] = 10 
    root["pady"] = 25 

    app = Application(root) 

    app.mainloop() 
    root.destroy()

对于每一个新页面的的onClick链接更新&开始参数。所以或者你可以在你的班级增加一个计数器，而不用费力地解析当前的汤来获得价值。

来源

2011-07-31 02:36:19 eryksun

我尝试使用以下代码创建一个新类：Class Application（）：def submit（self）：。。。等等。但我不断收到此异常：Tkinter回调中的异常 Traceback（最近调用最后一次）：文件“C：\ Python27 \ lib \ lib-tk \ Tkinter.py”，第1410行，在__call__中 return self.func * args） TypeError：无约束方法必须使用Application实例作为第一个参数调用Submit（）（取而代之）。任何想法是什么造成这个？ – kr21

是的，这个作品非常完美，非常感谢！ – kr21

问题与按钮命令Tkinter Python

回答

相关问题