2016-06-10 52 views
6

如何用PyQt5 v5.6 QWebEngineView“呈现”HTML?如何用PyQt5的QWebEngineView“呈现”HTML

我以前使用PyQt5 v5.4.1 QWebPage执行任务,但尝试更新的QWebEngineView是suggested

下面是实现(通常按预期工作,但无限期地挂起一些网站和情况的倾向):

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebKitWidgets import QWebPage 

    class Render(QWebPage): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebPage.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.mainFrame().setHtml(html) 
      self.app.exec_() 

     def _loadFinished(self, result): 
      self.html = self.mainFrame().toHtml() 
      self.app.quit() 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 

以下是我在使用QWebEngineView尝试。首先,安装和设置PyQt5 v5.6在Ubuntu:

# install PyQt5 v5.6 wheel from PyPI 
pip3 install --user pyqt5 

# link missing resources 
ln -s ../resources/icudtl.dat ../resources/qtwebengine_resources.pak ../resources/qtwebengine_resources_100p.pak ../resources/qtwebengine_resources_200p.pak ../translations/qtwebengine_locales ~/.local/lib/python3.5/site-packages/PyQt5/Qt/libexec/ 

现在的Python的...在分段错误的结果如下:

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.setHtml(html) 
      self.app.exec_() 

     def _loadFinished(self, result): 
      # what's going on here? how can I get the HTML from toHtml? 
      self.page().toHtml(self.callable) 
      self.app.quit() 

     def callable(self, data): 
      self.html = data 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 

麻烦似乎在于调用异步toHtml()。它看起来应该相当简单,但我对如何处理它感到不知所措。我发现在C++环境下它已经是discussed,但我不确定如何将它转换为Python。我怎样才能得到HTML?

回答

7

相当多的关于该主题的讨论是在下面的线程发出:https://riverbankcomputing.com/pipermail/pyqt/2015-January/035324.html

新QWebEngine接口采用的事实 底层铬引擎是异步的帐号。因此,我们必须将异步API转换为同步API。

这里是如何看起来:

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtCore import QEventLoop 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.setHtml(html) 
      while self.html is None: 
       self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents) 
      self.app.quit() 

     def _callable(self, data): 
      self.html = data 

     def _loadFinished(self, result): 
      self.page().toHtml(self._callable) 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 
4

正如你指出的那样,Qt5.4依赖于异步调用。没有必要使用Loop(如你的答案所示),因为你唯一的错误是在toHtml调用完成之前调用quit

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.setHtml(html) 
      self.app.exec_() 

     def _loadFinished(self, result): 
      # This is an async call, you need to wait for this 
      # to be called before closing the app 
      self.page().toHtml(self.callable) 

     def callable(self, data): 
      self.html = data 
      # Data has been stored, it's safe to quit the app 
      self.app.quit() 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 
1

由六个& Veehmot答案是伟大的,但我发现我的目的是不充分的,因为它没有扩大,我想刮的页面的下拉元素。 稍作修改修复此问题:

def render(url): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtCore import QEventLoop,QUrl 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, url): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.load(QUrl(url)) 
      while self.html is None: 
       self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents) 
      self.app.quit() 

     def _callable(self, data): 
      self.html = data 

     def _loadFinished(self, result): 
      self.page().toHtml(self._callable) 

    return Render(url).html 


print(render(dummy_url))