2015-10-21 202 views
1

我试图使用HtmlAgilityPackHtmlAgilityPack节点和子节点

<tr> 
<td width=0%>Artist:</td><td width=23% class='colour'><a href='/p/beatmaplist?q=Akiakane'>Akiakane</a></td> 
<td width=0%>Circle Size:</td><td width=23% class='colour'><div class='starfield' style='width:140px'><div class='active' style='width:56px'></div></div></td> 
<td width=0%>Approach Rate:</td><td class="colour"><div class='starfield' style='width:140px'><div class='active' style='width:126px'></div></div></td> 
</tr> 
<tr> 
<td width=0%>Title:</td><td class="colour"><a href='/p/beatmaplist?q=FlashBack'>FlashBack</a></td> 
<td width=0%>HP Drain:</td><td class="colour"><div class='starfield' style='width:140px'><div class='active' style='width:84px'></div></div></td> 
<td width=0%><strong>Star Difficulty</strong>:</td><td width=23% class='colour'><div class='starfield' style='width:140px'><div class='active' style='width:72.9650211334px'></div></div> (5.21)</td> 
</tr> 
<tr> 
<td width=0%>Creator:</td><td class="colour"><a href='/u/231111'>Kiiwa<a/></td> 
<td width=0%>Accuracy:</td><td class="colour"><div class='starfield' style='width:140px'><div class='active' style='width:98px'></div></div></td> 
<td width=0%>Length:</td><td class="colour">3:13 (2:49 drain)</td> 
</tr> 
<tr> 
<td width=0%>Source:</td><td class="colour"><a href='/p/beatmaplist?q='></a></td> 
<td width=0%>Genre:</td><td class="colour"><a href='/p/beatmaplist?g=4'>Rock</a> (<a href='/p/beatmaplist?la=3'>Japanese</a>)</td> 
<td width=0%>BPM:</td><td class="colour">185</td> 
</tr> 
<tr> 
<td width=0%>Tags:</td><td class="colour"><a href="/p/beatmaplist?q=j-pop">j-pop</a> <a href="/p/beatmaplist?q=beren">beren</a> <a href="/p/beatmaplist?q=collaboration">collaboration</a> <a href="/p/beatmaplist?q=collab">collab</a> <a href="/p/beatmaplist?q=boroboro">boroboro</a> <a href="/p/beatmaplist?q=na">na</a> <a href="/p/beatmaplist?q=ikizama">ikizama</a> <a href="/p/beatmaplist?q=niki">niki</a> <a href="/p/beatmaplist?q=niconicodouga">niconicodouga</a> <a href="/p/beatmaplist?q=toysfactory">toysfactory</a> </td> 
<td width=0%>User Rating:</td><td class="colour"> 
<table width="100%" height="20px" style="color:#fff;"> 
<tr> 
<td style="background-color:#BC2036;text-align:right;border:solid 1px #82000B;" width="3.37522441652">93</td> 
<td style="background-color:#78AB23;text-align:left;border:solid 1px #718F0A;" width="96.6965888689">2,692</td> 
</tr> 

解析复杂的HTML每个TR应与包括TD作为性能的对象。 I. e。

public class SongInfo 
     { 
      public string CS { get; set; } 
      public string AR { get; set; } 
      public string HP { get; set; } 
      public string STAR { get; set; } 
      public string LENGTH { get; set; } 
      public string BPM { get; set; } 
     } 

所以,在这种情况下,它应该是这样的:

CS should be "Circle Size: (starfield style % divided by active style %)" 
AR should be "Approach Rate: (starfield style % divided by active style %)" 
HP should be "HP Drain: (starfield style % divided by active style %)" 
STAR should be "Star Difficulty: (starfield style % divided by active style %)" 
LENGTH should be "Length: 3:13" 
BPM should be "BPM: 185" 

当我说(星空风格%的活跃风格%分), 我指的是这样的代码:

<div class='starfield' style='width:140px'><div class='active'style='width:56px'></div> 

所以在这种情况下,它应该是2.5,因为五十六分之一百四十= 2.5

我的第一个thoug HT是这样的:

foreach (HtmlAgilityPack.HtmlNode node in doc.DocumentNode.SelectNodes("//tr")) 
{ 
    foreach (HtmlAgilityPack.HtmlNode node2 in node.SelectNodes("//td[@width]=0%")) 
    { 

    } 
} 

但说实话,我不知道如何去与HtmlAgilityPack,因为我还没有真正使用它。

是否可以做我在问什么?

回答

1

我觉得你不够努力,因为你完全复制粘贴这个thread的一些代码,甚至没有尝试查看xpath。

很多html代码都是类似的。我为你制定了整个解决方案。请仔细阅读。还请阅读Html Agility Pack documentation和约xpath。 您的第一个初始Xpath是错误的。它假设是:“// td [@ width ='0%']”。 你可以使用“// td”(但下面的例子使用// td [@ width ='0%']“),然后你必须使用另外一种方法找到相关的东西,在下面的解决方案中,我使用每个

public class SongInfo 
{ 
    public string CS { get; set; } 
    public string AR { get; set; } 
    public string HP { get; set; } 
    public string STAR { get; set; } 
    public string LENGTH { get; set; } 
    public string BPM { get; set; } 
} 

class MainClass 
{ 
    public static void Main(string[] args) 
    { 
     SongInfo song = new SongInfo(); 

     HtmlDocument doc = new HtmlDocument(); 
     doc.Load("da.html"); 

     HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//td[@width='0%']"); 


     foreach (HtmlNode n in nodes) 
     { 
      if (n.InnerText.ToLower().Contains("circle size:")) 
      { 
       song.CS = n.InnerText+ " " + Convert.ToString(AlmostAnything(n.NextSibling)); 
      } 
      if (n.InnerText.ToLower().Contains("approach rate:")) 
      { 
       song.AR = n.InnerText + " " + Convert.ToString(AlmostAnything(n.NextSibling)); 
      } 
      if (n.InnerText.ToLower().Contains("hp drain:")) 
      { 
       song.HP = n.InnerText + " " + Convert.ToString(AlmostAnything(n.NextSibling)); 
      } 
      if (n.InnerText.ToLower().Contains("star difficulty:")) 
      { 
       song.STAR = n.InnerText + " " + Convert.ToString(AlmostAnything(n.NextSibling)); 
      } 
      if (n.InnerText.ToLower().Contains("length:")) 
      { 
       song.LENGTH = NextSiblingText(n); 
      } 
      if (n.InnerText.ToLower().Contains("bpm:")) 
      { 
       song.BPM = NextSiblingText(n); 
      } 

     } 
     PrintSong(song); 
    } 

    private static string NextSiblingText(HtmlNode n) 
    { 
     return n.NextSibling.InnerText; 
    } 

    private static int AlmostAnything(HtmlNode n) 
    { 
     string starfield="" , activefield = ""; 
     HtmlDocument temp = new HtmlDocument(); 
     temp.LoadHtml(n.InnerHtml); 

     foreach (HtmlNode hN in temp.DocumentNode.SelectNodes("//div")) 
     { 
      if (hN.GetAttributeValue("class", "not found") == "starfield") 
      { 
       starfield = hN.GetAttributeValue("style", "style not found"); 
      } 
      if (hN.GetAttributeValue("class", "not found") == "active") 
      { 
       activefield = hN.GetAttributeValue("style", "style not found"); 
      } 
     } 

     double result = ConvertStringToNum(starfield)/ConvertStringToNum(activefield); 
     return Convert.ToInt32(result); 
    } 

    private static double ConvertStringToNum(string s) 
    { 
     string temp=""; 
     for (int i = 0; i < s.Length; i++) 
     { 
      if (Char.IsNumber(s[i])) 
      { 
       temp += s[i]; 
       for (i = i + 1; i < s.Length; i++) 
       { 
        if (Char.IsNumber(s[i])) 
        { 
         temp += s[i]; 
        } 
        else 
        { 
         return Convert.ToDouble(temp); 
        } 
       } 
      } 
     } 
     return -1; 
    } 

    private static void PrintSong(SongInfo s) 
    { 
     Console.WriteLine(s.CS); 
     Console.WriteLine(s.AR); 
     Console.WriteLine(s.HP); 
     Console.WriteLine(s.STAR); 
     Console.WriteLine(s.LENGTH); 
     Console.WriteLine(s.BPM); 
    } 


}