2012-12-28 15 views
-1

我正在写一个.net网络蜘蛛。虽然它在我的一个站点(大约20页)上效果很好,但它与我管理的另一个站点(大约500页)发生System.StackOverflowException异常爆炸。有关StackOverflowException的快速(希望)

我正在开发一款win7 64bit i3笔记本电脑,内含8g ram,128g hyperx ssd并且没有交换文件。

我的问题是....我得到这个异常抛出,因为我没有交换文件?

cpu使用率(vs2010调试过程)只有74-75m的ram使用率,只能达到34%左右。

如果是这样的话,我该如何确保它不会发生?

这是没有递归。

代码:

Imports System.Reflection 
Imports System.Net 
Imports Superstar.Html.Linq 

Public Class Downloader 
Implements IDisposable 

''' <summary> 
''' Get the returned downloaded string 
''' </summary> 
''' <value></value> 
''' <returns></returns> 
''' <remarks></remarks> 
Public ReadOnly Property ReturnString As String 
    Get 
     Return _StrReturn 
    End Get 
End Property 
Private Property _StrReturn As String 

''' <summary> 
''' Get the returned downloaded byte array 
''' </summary> 
''' <value></value> 
''' <returns></returns> 
''' <remarks></remarks> 
Public ReadOnly Property ReturnBytes As Byte() 
    Get 
     Return _FSReturn 
    End Get 
End Property 
Private Property _FSReturn As Byte() 


Private Property _UserAgent As String = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13" 
Private Property DataReceived As Boolean = False 

''' <summary> 
''' Download a string, but do not block the calling thread 
''' </summary> 
''' <param name="_Path"></param> 
''' <remarks></remarks> 
Public Sub DownloadString(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) 
    SetAllowUnsafeHeaderParsing20() 
    Using wc As New Net.WebClient() 
     With wc 
      Dim _ct As Long = 0 
      DataReceived = False 
      .Headers.Add("user-agent", _UserAgent) 
      .DownloadStringAsync(New System.Uri(_Path)) 
      AddHandler .DownloadStringCompleted, AddressOf StringDownloaded 
      Do While Not DataReceived 
       If _Worker IsNot Nothing Then 
        _ct += 1 
        ReportProgress(_ct, _Worker) 
       End If 
      Loop 
     End With 
    End Using 
End Sub 

''' <summary> 
''' Download a file, but do not block the calling thread 
''' </summary> 
''' <param name="_Path"></param> 
''' <remarks></remarks> 
Public Sub DownloadFile(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) 
    SetAllowUnsafeHeaderParsing20() 
    Using wc As New Net.WebClient() 
     With wc 
      Dim _ct As Long = 0 
      DataReceived = False 
      .Headers.Add("user-agent", _UserAgent) 
      .DownloadDataAsync(New System.Uri(_Path)) 
      AddHandler .DownloadDataCompleted, AddressOf FileStreamDownload 
      Do While Not DataReceived 
       If _Worker IsNot Nothing Then 
        _ct += 1 
        ReportProgress(_ct, _Worker) 
       End If 
      Loop 
     End With 
    End Using 
End Sub 

''' <summary> 
''' Download a parsable HDocument, for using HtmlToLinq 
''' </summary> 
''' <param name="_Path"></param> 
''' <returns></returns> 
''' <remarks></remarks> 
Public Function DownloadHDoc(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As HDocument 
    Try 
     ' 
     ' 
     ' 
     ' 
     ' 
     ' 
     'StackOverFlowException Occurring Here! 
     DownloadString(_Path, _Worker) 
     Return HDocument.Parse(_StrReturn) 
    Catch soex As StackOverflowException 
     'put some logging in here, with the path attempted 
     Return Nothing 
    Catch ex As Exception 
     SetAllowUnsafeHeaderParsing20() 
     Return HDocument.Load(_Path) 
    End Try 
End Function 

#Region "Internals" 

Private Sub SetAllowUnsafeHeaderParsing20() 
    Dim a As New System.Net.Configuration.SettingsSection 
    Dim aNetAssembly As System.Reflection.Assembly = Assembly.GetAssembly(a.GetType) 
    Dim aSettingsType As Type = aNetAssembly.GetType("System.Net.Configuration.SettingsSectionInternal") 
    Dim args As Object() = Nothing 
    Dim anInstance As Object = aSettingsType.InvokeMember("Section", BindingFlags.Static Or BindingFlags.GetProperty Or BindingFlags.NonPublic, Nothing, Nothing, args) 
    Dim aUseUnsafeHeaderParsing As FieldInfo = aSettingsType.GetField("useUnsafeHeaderParsing", BindingFlags.NonPublic Or BindingFlags.Instance) 
    aUseUnsafeHeaderParsing.SetValue(anInstance, True) 
End Sub 

Private Sub FileStreamDownload(ByVal sender As Object, ByVal e As DownloadDataCompletedEventArgs) 
    If e.Cancelled = False AndAlso e.Error Is Nothing Then 
     DataReceived = True 
     _FSReturn = DirectCast(e.Result, Byte()) 
    Else 
     _FSReturn = Nothing 
    End If 
End Sub 

Private Sub StringDownloaded(ByVal sender As Object, ByVal e As DownloadStringCompletedEventArgs) 
    If e.Cancelled = False AndAlso e.Error Is Nothing Then 
     DataReceived = True 
     _StrReturn = DirectCast(e.Result, String) 
    Else 
     _StrReturn = String.Empty 
    End If 
End Sub 

#End Region 

#Region "IDisposable Support" 
Private disposedValue As Boolean ' To detect redundant calls 

' IDisposable 
Protected Overridable Sub Dispose(disposing As Boolean) 
    If Not Me.disposedValue Then 
     If disposing Then 
     End If 
     _StrReturn = String.Empty 
     _FSReturn = Nothing 
    End If 
    Me.disposedValue = True 
End Sub 

Public Sub Dispose() Implements IDisposable.Dispose 
    Dispose(True) 
    GC.SuppressFinalize(Me) 
End Sub 
#End Region 

End Class 

而且调用此当计算器被happenning

Private Function PopulateSEOList(Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As List(Of Typing.SEO) 
    Dim _L = LinkList, _Ct As Long = 0 
    Dim _NL As New List(Of Typing.SEO) 
    Dim _EL As Typing.SEO.Elements = Nothing 
    Dim _Doc As HDocument = Nothing, _Keywords As String = String.Empty, _Description As String = String.Empty, _Content As HElement = Nothing 
    For i As Long = 0 To _L.Count - 1 
     Try 
      _Ct += 1 
      Using _HDoc As New Downloader 
       With _HDoc 
        _Doc = .DownloadHDoc(_L(i).SiteUrl) 
       End With 
      End Using 
      Tasks.Parallel.Invoke(Sub() 
             'Keywords 
             For Each Item In _Doc.Descendants("meta") 
              If Item.Attribute("name") = "keywords" Then 
               _Keywords = Item.Attribute("content").Value 
               'Exit For 
              End If 
             Next 
            End Sub, 
            Sub() 
             'Description 
             For Each Item In _Doc.Descendants("meta") 
              If Item.Attribute("name") = "description" Then 
               _Description = Item.Attribute("content").Value 
               'Exit For 
              End If 
             Next 
            End Sub, 
            Sub() 
             If _Doc.Descendants("body") IsNot Nothing Then 
              _Content = _Doc.Descendants("body").FirstOrDefault 
             End If 
            End Sub, 
            Sub() 
             _EL = New Typing.SEO.Elements() With { 
             .H1 = If(_Doc.Descendants("h1") IsNot Nothing, (From n In _Doc.Descendants("h1").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H2 = If(_Doc.Descendants("h2") IsNot Nothing, (From n In _Doc.Descendants("h2").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H3 = If(_Doc.Descendants("h3") IsNot Nothing, (From n In _Doc.Descendants("h3").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H4 = If(_Doc.Descendants("h4") IsNot Nothing, (From n In _Doc.Descendants("h4").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H5 = If(_Doc.Descendants("h5") IsNot Nothing, (From n In _Doc.Descendants("h5").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H6 = If(_Doc.Descendants("h6") IsNot Nothing, (From n In _Doc.Descendants("h6").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .UL = If(_Doc.Descendants("ul") IsNot Nothing, (From n In _Doc.Descendants("ul").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .OL = If(_Doc.Descendants("ol") IsNot Nothing, (From n In _Doc.Descendants("ol").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .STRONG = If(_Doc.Descendants("strong") IsNot Nothing OrElse _Doc.Descendants("b") IsNot Nothing, 
                (From n In _Doc.Descendants("strong").AsParallel() 
                Select n.Value).Union(From n In _Doc.Descendants("b").AsParallel() 
                Select n.Value).ToList(), Nothing), 
             .BLOCKQUOTE = If(_Doc.Descendants("blockquote") IsNot Nothing, (From n In _Doc.Descendants("blockquote").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .EM = If(_Doc.Descendants("em") IsNot Nothing OrElse _Doc.Descendants("i") IsNot Nothing, 
               (From n In _Doc.Descendants("em").AsParallel() 
               Select n.Value).Union(From n In _Doc.Descendants("i").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .A = If(_Doc.Descendants("a") IsNot Nothing, (From n In _Doc.Descendants("a").AsParallel() 
              Select New Typing.SEO.Elements.Links() With { 
               .Content = n.Value, 
               .Title = If(n.Attribute("title") IsNot Nothing, 
                  n.Attribute("title").Value, 
                  Nothing), 
               .Target = If(n.Attribute("target") IsNot Nothing, 
                  n.Attribute("target").Value, 
                  Nothing), 
               .Rel = If(n.Attribute("rel") IsNot Nothing, 
                  n.Attribute("rel").Value, 
                  Nothing), 
               .Href = If(n.Attribute("href") IsNot Nothing, 
                  n.Attribute("href").Value, 
                  Nothing) 
              }).ToList(), Nothing), 
             .IMG = If(_Doc.Descendants("img") IsNot Nothing, 
                (From n In _Doc.Descendants("img").AsParallel() 
                Select New Typing.SEO.Elements.Images() With { 
                 .Alt = If(n.Attribute("alt") IsNot Nothing, 
                  n.Attribute("alt").Value, 
                  Nothing), 
                 .Source = If(n.Attribute("src") IsNot Nothing, 
                  n.Attribute("src").Value, 
                  Nothing), 
                 .Title = If(n.Attribute("title") IsNot Nothing, 
                  n.Attribute("title").Value, 
                  Nothing) 
                }).ToList(), 
               Nothing) 
             } 
            End Sub) 
      _NL.Add(New Typing.SEO() With { 
        .Link = _L(i).SiteUrl, 
        .Title = _Doc.Descendants("title").First().Value, 
        .Keywords = _Keywords, 
        .Description = _Description, 
        .Content = _Content, 
        .ContentElements = _EL 
       }) 
      _L.RemoveAt(i) 
      _EL = Nothing : _Doc = Nothing 
      ReportProgress((_Ct/_L.Count) * 100, _Worker) 
     Catch ex As Exception 
      'Put logging in here 
     End Try 
    Next 
    Return _NL 
End Function 
+0

我们能否看到一些示例代码? –

+0

你可以发布一些针对蜘蛛算法的伪代码吗?也许方法调用蜘蛛链? –

+0

-1无堆栈跟踪。这是分析[一般可以从任何地方] StackOverflowException所需的绝对*最小值*。 – 2012-12-28 04:03:58

回答

1

正如你可能知道的代码,这个错误很可能是由于在代码中的错误导致无限循环在递归算法中。虽然你说你不使用递归,但你可能无意中发生了递归。

找出造成它的最简单方法是附加调试器,配置Visual Studio以打破异常,并在应用程序中触发错误。

当发生错误并且调试程序中断时,请查看调用堆栈 - 希望您会看到问题出在哪里。

1

我的猜测可能是软件问题。 StackOverflowException通常在递归算法出现问题时发生(尽管您提到您不使用它)。另一个常见原因是财产执行或平等比较中的错误。 例如:

public string Name 
{ 
    set 
    { 
    Name = value; 
    } 
} 
+0

(我不喜欢这*特殊*递归错误..事实上,VS和ReSharper嚷嚷着我。) – 2012-12-28 04:02:53