2013-03-03 36 views
2

我正在使用Hpple解析HTML,它似乎无法识别它实际上是XML,它应该(XCode调试器显示此变量isXML = (BOOL) NO并且它不收集任何数据) 。我该如何解决?Hpple无法解析HTML

这是我的代码(它们也可能是其他的错误)。解析方法/函数与[ListParser parse:@"http://www.fanfiction.net/book/Harry-Potter/" at:@"//div[@=\"class\"]"];先叫:

@interface ListParser() //private 
+ (NSArray*) getNodeListAt: (NSURL*) page inside: (NSString*) page; 
+ (NSDictionary*) getNodeData: (TFHppleElement*) node; 
+ (void) addMiniListData: (NSString*) list to: (NSMutableDictionary*) dict; 
@end 


@implementation ListParser 

+ (NSArray*) getNodeListAt: (NSURL*) page inside: (NSString*) path { // "//div[@class"z-list"]" 
    NSData *data = [NSData dataWithContentsOfURL: page]; 
    TFHpple *listparser = [TFHpple hppleWithHTMLData:data]; //WHERE CODE SEEMS TO STOP TO WORK 
    NSArray *done = [listparser searchWithXPathQuery: path]; 
    return done; 
} 

+ (void) addMiniListData: (NSString*) list to: (NSMutableDictionary*) dict{ 
    NSArray *parts = [list componentsSeparatedByString:@" - "]; 

    for(NSString* p in parts){ 
     NSArray* two = [p componentsSeparatedByString:@": "]; 
     [dict setObject:[two objectAtIndex:1] forKey:[two objectAtIndex:0]]; 
    } 
} 

+ (NSDictionary*) getNodeData: (TFHppleElement*) node{ 
    NSMutableDictionary* data = [NSMutableDictionary dictionary]; 
    [data setObject:[[[node firstChild] firstChild] objectForKey:@"href"] forKey:@"Image"]; 
    [data setObject:[[node firstChild] text] forKey:@"Title"]; 
    [data setObject:[[[[node firstChild] children] objectAtIndex:2] text] forKey:@"By"]; 
    [data setObject:[[[[node firstChild] childrenWithClassName:@"z-indent"] objectAtIndex:0] text] forKey:@"Summery"]; 
    [self addMiniListData:[[[[[[node firstChild] childrenWithClassName:@"z-indent"] objectAtIndex:0] childrenWithClassName:@"z-padtop2"] objectAtIndex:0] text] to: data]; 

    return data; 
} 

+(NSArray*) parse: (NSString*) address at: (NSString*) path{ 
    NSURL *url = [[NSURL alloc] initWithString:address]; 
    NSArray* list = [self getNodeListAt:url inside:path]; 
    NSMutableArray *data = [[NSMutableArray alloc] init]; 
    for (TFHppleElement* e in list) { 
     [data addObject:[self getNodeData:e]]; 
    } 
    return [[NSArray alloc] initWithArray: data]; 
} 

@end 

这里的教程,我下面的链接:http://www.raywenderlich.com/14172/how-to-parse-html-on-ios

+0

HTML不是XML。 – nneonneo 2013-03-03 02:36:53

+0

@nneonneo我知道,但据称这是假设为两者工作。至少根据其网站和几个教程,它说它会将HTML识别为XML。 – 2013-03-03 02:38:21

回答

0

如果你需要一个TFHpple解析XML,你应该告诉它你”这样做。你叫+hppleWithHTMLData:。如果您阅读该方法的实施,您会看到它将isXML设置为NO。相反,使用hppleWithXMLData:方法。