2012-09-06 52 views

回答

1

我不知道我如果你已经在这里得到了你的答案,但是我对你建议的链接也做了同样的事情,我会在这里发布我的代码,但它仍然很杂乱,不适用最新的时间表(第9小时)

使用HTML为解析HTML清洁库:

try { 
     HtmlCleaner hc = new HtmlCleaner(); 
     CleanerProperties cp = hc.getProperties(); 
     cp.setAllowHtmlInsideAttributes(true); 
     cp.setAllowMultiWordAttributes(true); 
     cp.setRecognizeUnicodeChars(true); 
     cp.setOmitComments(true); 

     String loc = sp.getString(Constants.pref_locatie  , ""); 
     String per = sp.getString(Constants.pref_persoon  , ""); 
     String oob = sp.getString(Constants.pref_onderofboven , ""); 

     int counteruurmax; 
     int[] pauze; 
     if (oob.contains("onder")){ 
      pauze = Constants.pauzeo; 
     } else if (oob.contains("boven")) { 
      pauze = Constants.pauzeb; 
     } else { 
      return false; 
     } 

     String url = ""; 
     if (loc.contains("lochem")) { 
      url += Constants.RoosterLochem; 
      url += t.getDatum(); 
      url += "/"; 
      url += per; 
      counteruurmax = 11; 
     } else if (loc.contains("herenlaan")) { 
      url += Constants.RoosterHerenlaan; 
      url += per; 
      counteruurmax = 13; 
     } else if (loc.contains("beukenlaan")) { 
      url += Constants.RoosterBeukenlaan; 
      url += per; 
      counteruurmax = 11; 
     } else { 
      return false; 
     } 

     String htmlcode = t.getHtml(url); 
     TagNode html = hc.clean(htmlcode); 
     Document doc = new DomSerializer(cp, true).createDOM(html); 
     XPath xp = XPathFactory.newInstance().newXPath(); 
     NodeList nl = (NodeList) xp.evaluate(Constants.XPathRooster, doc, XPathConstants.NODESET); 

     int counteruur = 1; 
     int counterdag = 1; 
     int decreaser = 0; 
     Boolean isPauze = false; 
     RoosterItems RItems = new RoosterItems(); 
     RoosterItem RItem = null; 
     for (int i = 0; i < nl.getLength(); i++){ 

      if ((counteruur == pauze[0]) || (counteruur == pauze[1]) || (counteruur == pauze[2])) { 
       isPauze = true; 
       decreaser++; 
      } 

      if (!isPauze) { 
       RItem = new RoosterItem(); 
       switch (counterdag){ 
       case 1: 
        RItem.setDag("ma"); 
        break; 
       case 2: 
        RItem.setDag("di"); 
        break; 
       case 3: 
        RItem.setDag("wo"); 
        break; 
       case 4: 
        RItem.setDag("do"); 
        break; 
       case 5: 
        RItem.setDag("vr"); 
        break; 
       } 

       Node n = nl.item(i); 
       String content = n.getTextContent(); 
       if (content.length() > 1) { 
        RItem.setUur(""+(counteruur-decreaser)); 
        NodeList t1 = n.getChildNodes(); 
        NodeList t2 = t1.item(0).getChildNodes(); 
        NodeList t3 = t2.item(0).getChildNodes(); 
        for (int j = 0; j < t3.getLength(); j++) { 
         Node temp = t3.item(j); 
         if (t3.getLength() == 3) { 
          switch (j) { 
          case 0: 
           RItem.setLes(""+temp.getTextContent()); 
           break; 
          case 1: 
           RItem.setLokaal(""+temp.getTextContent()); 
           break; 
          case 2: 
           RItem.setDocent(""+temp.getTextContent()); 
           break; 
          default: 
           return false; 
          } 
         } else if (t3.getLength() == 4) { 
          switch (j) { 
          case 0: 
           break; 
          case 1: 
           RItem.setLes("tts. " + temp.getTextContent()); 
           break; 
          case 2: 
           RItem.setLokaal(""+temp.getTextContent()); 
           break; 
          case 3: 
           RItem.setDocent(""+temp.getTextContent()); 
           break; 
          default: 
           return false; 
          } 
         } else if (t3.getLength() == 1) { 
          RItem.setLes(""+temp.getTextContent()); 
         } else { 
          return false; 
         } 
        } 
       } else { 
        RItem.setUur("" + (counteruur-decreaser)); 
        RItem.setLokaal("Vrij"); 
       } 
       RItems.add(RItem); 
      } 
      if (counteruur == counteruurmax) { counteruur = 0; counterdag++; decreaser = 0;} 
      counteruur++; 
      isPauze = false; 
     } 

     if (RItems.size() > 0) { 
      mSQL = new RoosterSQLAdapter(mContext); 
      mSQL.openToWrite(); 
      mSQL.deleteAll(); 
      for (int j = 0; j < RItems.size(); j++) { 
       RoosterItem insert = RItems.get(j); 
       mSQL.insert(insert.getDag(), insert.getUur(), insert.getLes(), insert.getLokaal(), insert.getDocent()); 
      } 
      if (mSQL != null) mSQL.close(); 
     } 
     return true; 
    } catch (ParserConfigurationException e) { 
     e.printStackTrace(); 
     return false; 
    } catch (XPathExpressionException e) { 
     e.printStackTrace(); 
     return false; 
    } 

有几个常数,但我认为你可以自己猜测他们;),否则你知道如何让我为他们:)

的RoosterItem类将举行一小时的所有变量,并且RoosterItems将持有多于​​一个的RoosterItem

祝你好运!

+0

对不起,没有添加XPath,这里是:'“/ html/body/table [1]/tbody/tr/td”'注意,这只适用于使用XPath的API's – WHDeveloper

+0

谢谢,我已经找到答案。但由于上面的代码示例很好,我将其标记为最佳答案。 (是的,所以你得到了分数;)) – basnijkamp

1

到目前为止,我认为JSoup是提取或操纵HTML的最好方式一个.....

请参阅此链接:

http://jsoup.org/

但不知何故....这在我的情况我以前不工作,所以我转换的整个HTML代码转换成字符串,然后解析吧.....