2013-02-19 24 views
0

这个程序基本上去到一个网站,并获取特定的div标签中的所有链接。 然后导航到每个链接并获取这些页面中的链接。在VB.net中重新加载数据和类变量的形式

但是,成功获取第一个链接并前往第一个网站后,它会重新检查窗体数据并将类变量重置为其原始值,从而丢失所有链接。

它为什么重新加载窗体数据,我如何保留以前的数据?

Imports System.Text.RegularExpressions 

Public Class Form 

Private stage As String = "Getting Page Links" 
Dim PageUrls() As String = {} 
Dim PageHtml() As String = {} 

Private Sub Form_Load(sender As Object, e As System.EventArgs) Handles Me.Load 

    WebBrowser.Navigate("websiteurlhidden") 


End Sub 

Private Sub WebBrowser_DocumentCompleted(sender As Object, e As System.Windows.Forms.WebBrowserDocumentCompletedEventArgs) Handles WebBrowser.DocumentCompleted 
    If WebBrowser.ReadyState = WebBrowserReadyState.Complete Then 

     Try 




      Select Case stage 
       Case "Getting Page Links" 

        Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document 
        Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV") 

        For Each htmlElement As HtmlElement In HtmlElementCollection 

         Dim imgUrl As String = htmlElement.GetAttribute("classname") 
         If imgUrl = " nine " Then 



          Dim linkIndex As Integer = 0 
          Dim index2 As Integer = 0 
          For Each link As HtmlElement In htmlElement.GetElementsByTagName("a") 
           If linkIndex >= 26 Then 
            If Not String.IsNullOrEmpty(link.GetAttribute("href")) Then 
             ReDim Preserve PageUrls(index2) 

             PageUrls(index2) = link.GetAttribute("href") 
             ' MessageBox.Show(link.GetAttribute("href")) 
             ' MessageBox.Show(PageUrls(linkIndex)) 
             index2 = index2 + 1 
            End If 
           End If 
           linkIndex = linkIndex + 1 
          Next 

          For Each str As String In PageUrls 
           '   MessageBox.Show(str) 
          Next 
          stage = "Going through pages" 

         End If 

        Next 

        GoThroughPages() 

       Case "Going through pages" 

        Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document 
        Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV") 
        Dim linkIndex As Integer = 0 
        For Each htmlElement As HtmlElement In HtmlElementCollection 

         Dim imgUrl As String = htmlElement.GetAttribute("classname") 
         If imgUrl = " nine " Then 
          ReDim Preserve PageHtml(linkIndex) 
          'need to make permanent. 
          PageHtml(linkindex) = htmlElement.ToString() 

          Dim PageDownloadLinks = htmlElement.GetElementsByTagName("a") 

         End If 
         End 
        Next 

        ' GoThroughPages() 

       Case Else 
        MessageBox.Show("case else") 

      End Select 

     Catch ex As Exception 
      ' MessageBox.Show(ex.Message & " " & ex.ToString) 
     End Try 
    End If 

End Sub 

Private Sub GoThroughPages() 

    For linkIndex As Integer = 0 To PageUrls.Count - 1 


     MessageBox.Show(PageUrls(linkIndex)) 
     WebBrowser.Navigate(PageUrls(linkIndex)) 
     Delay(3) 
     While (WebBrowser.IsBusy) 

      Application.DoEvents() 
     End While 
    Next 

End Sub 

Sub Delay(ByVal dblSecs As Double) 

    Const OneSec As Double = 1.0#/(1440.0# * 60.0#) 
    Dim dblWaitTil As Date 
    Now.AddSeconds(OneSec) 
    dblWaitTil = Now.AddSeconds(OneSec).AddSeconds(dblSecs) 
    Do Until Now > dblWaitTil 
     Application.DoEvents() ' Allow windows messages to be processed 
    Loop 

End Sub 

End Class 
+0

也许你没有成功计算数组的新边界。使用泛型,以便您只需将项目添加到列表中,而不必担心其大小。 – 2013-02-19 22:28:35

回答

0

简单的解决办法是改变:

Dim linkIndex As Integer = 0 

Static linkIndex As Integer = 0 

这将导致linkIndex保留其调用之间的值,ReDim Preserve PageHtml(linkIndex)不会在每次调用复位。

相关问题