您只需检索一个项目的原因是页面源代码在其标记中显示第一项,但浏览器在jQuery函数中呈现客户端JavaScript从JSON数据中提取。在浏览器中看到的不是JSoup从页面源解析什么。欢迎来到新的,动态的web编程世界!
这样说,如果你仍然需要提取页面的JSON数据,请考虑使用JSoup输入<script>
内容,然后您可以从var aUpdatedDevelopments
中获取JSON数据(使用DataNode属性)。下面解析前两个脚本标签文本文件,并输出到屏幕上:
// Set Current Directory
String currentDir = new File("").getAbsolutePath();
// Open text file
FileWriter writer = new FileWriter(currentDir + "/BerkeleyGroupScrape.txt");
// Connect to HTML page and get content
Document doc = Jsoup.connect("http://www.berkeleygroup.co.uk/search-results").timeout(10*1000).get();
Elements scriptTags = doc.getElementsByTag("script");
int i = 0;
for (Element tag : scriptTags){
if (i > 2) { continue; }
for (DataNode node : tag.dataNodes()) {
System.out.println(node.getWholeData());
writer.append(node.getWholeData());
i++;
}
}
// Close text file
writer.flush();
writer.close();
OUTPUT
var __stormJs ='t1.stormiq.com/dcv4/jslib/6525_E9642FE4_B222_4C82_B6CA_7F4D662CB825.js';
var device_width=screen.width;var device_height=screen.height;var dpr=1;
if(window.devicePixelRatio!==undefined)
{dpr=window.devicePixelRatio;}if(device_width>device_height)
{ai_width=device_width;}else{ai_width=device_height;}var data =
{dpr:dpr,resolution:ai_width};$.ajax({url:"/api.cfc?method=setClientVariables
",type:"POST",data:data,success:function(d,st,xhr){},error:function(xhr,st,e)
{}});
<!--
$(document).ready(function($){
var aUpdatedDevelopments = '{"ROWCOUNT":56,"COLUMNS":
["ICON","LAT","LONGI","BESTLINKTEXT","FRIENDLYURL","DEVINTROTEXT","DEVHEADING
","DEVSTATUS","PRICERANGE","DEVIMAGE","DEVLOGO","DIVISION"],"DATA":{"ICON":
["\/media\/logo\/6\/r\/one-tower-bridge-map-logo-image-black.png","\/media
\/logo\/d\/t\/berkeley-homes-woodberry-down-map-logo.png","\/media\/logo
\/7\/j\/st-james-heritage-walk-development-logo-map-v2.png","\/media\/logo
\/i\/k\/st-george-sovereign-court-map-logo.png","\/media\/logo\/j\/k
\/st-george-one-blackfriars-map-logo.png","\/media\/logo\/s\/o\/berkeley-
250-city-road-marketing-location-map-logo-v3.png","\/media\/logo\/r\/9
\/berkeley-taplow-riverside-map-logo-v2.png","\/media\/logo\/i\/3\/st-james-
smithfield-map-logo.png","\/media\/logo\/o\/s\/berkeley-royal-wells-park-map-
logo.png","\/media\/logo\/b\/7\/st-george-battersea-reach-map-logo.png","
\/media\/logo\/d\/5\/berkeley-vista-logo-map.png","\/media\/logo\/8\/4
\/berkeley-woodhurst-park-map-logo.png","\/media\/logo\/d\/5\/berkeley-
queenshurst-map-search-logo-v2_33.png","\/media\/logo\/j\/g\/st-george-
kew-bridge-map-logo.png","\/media\/logo\/j\/a\/berkeley-green-park-village-
map-logo-updated-v2.png","\/media\/logo\/h\/0\/st-james-fiennes-park-logo-
map.png","\/media\/logo\/t\/c\/st-james-albert-embankment-corniche-
map-search.png","\/media\/logo\/f\/a\/st-george-dickens-yard-map-logo.png","
\/media\/logo\/i\/8\/st-edward-375-kensington-high-street-map-logo.png","
\/media\/logo\/i\/8\/st-james-riverlight-map-logo1.png","\/media\/logo\/i\/8
\/berkeley-homes-the-ashmiles-map-logo.png","\/media\/logo\/d\/b\/berkeley-
kennet-island-map-logo.png","\/media\/logo\/1\/1\/berkeley-oakgrove-
map-logo.png","\/media\/logo\/d\/8\/st-george-chelsea-creek-map-logo.png","
\/media\/logo\/j\/b\/berkeley-homes-holborough-lakes-map-logo.png","\/media
\/logo\/b\/q\/berkeley-victory-pier-development-logo-on-white-map.png","
\/media\/logo\/4\/8\/berkeley-homes-kidbrooke-village-map-logo.png","\/media
\/logo\/l\/h\/st-george-london-dock-map-search-icon.png","\/media\/logo\/2\/9
\/berkeley-homes-abell-and-cleland-map-logo.png","\/media\/logo\/e\/0
\/berkeley-wye-dene-map-logo.png","\/media\/logo\/i\/k\/st-edward-kensington-
row-map-logo.png","\/media\/logo\/f\/q\/st-george-beaufort-park-map-
logo.png","\/media\/logo\/0\/d\/berkeley-homes-highwood-map-logo.png","
\/media\/logo\/7\/o\/berkeley-walnut-grove-map-logo.png","\/media\/logo\/7\/c
\/st-james-hurlingham-walk-map-logo.png","\/media\/logo\/3\/b\/berkeley-
homes-edenbrook-map-logo.png","\/media\/logo\/2\/j\/berkeley-ryewood-
map-logo.png","\/media\/logo\/j\/j\/berkeley-marine-wharf-map-logo.png","
\/media\/logo\/j\/0\/berkeley-brunswick-square-map-logo-v2.png","\/media
\/logo\/p\/b\/st-edward-stanmore-place-map-logo.png","\/media\/logo\/g\/k
\/berkeley-homes-wimbledon-hill-park-map-logo.png","\/media\/logo\/8\/0
\/Berkeley-south-quay-plaza-map-search-logo.png","\/media\/logo\/l\/i
\/st-james-kew-bridge-west-map-logo.png","\/media\/logo\/3\/0\/st-james-
southall-gasworks-map-logo.png","\/media\/logo\/h\/c\/st-james-dumont-
map-logo1.png","\/me
...
JSON Pretty Print(提取上述aUpdatedDevelopments变量的值)
{
"ROWCOUNT": 56,
"COLUMNS": [
"ICON",
"LAT",
"LONGI",
"BESTLINKTEXT",
"FRIENDLYURL",
"DEVINTROTEXT",
"DEVHEADING",
"DEVSTATUS",
"PRICERANGE",
"DEVIMAGE",
"DEVLOGO",
"DIVISION"
],
"DATA": {
"ICON": [
"\/media\/logo\/6\/r\/one-tower-bridge-map-logo-image-black.png",
"\/media\/logo\/d\/t\/berkeley-homes-woodberry-down-map-logo.png",
"\/media\/logo\/7\/j\/st-james-heritage-walk-development-logo-map-v2.png",
"\/media\/logo\/i\/k\/st-george-sovereign-court-map-logo.png",
"\/media\/logo\/j\/k\/st-george-one-blackfriars-map-logo.png",
"\/media\/logo\/s\/o\/berkeley-250-city-road-marketing-location-map-logo-v3.png",
"\/media\/logo\/r\/9\/berkeley-taplow-riverside-map-logo-v2.png",
"\/media\/logo\/i\/3\/st-james-smithfield-map-logo.png",
"\/media\/logo\/o\/s\/berkeley-royal-wells-park-map-logo.png",
...
"LAT": [
51.5038122197,
51.5707487583,
51.4884271,
51.494341,
51.507878,
51.528207,
51.5292685,
51.5879838,
51.1355008763,
51.4647143,
51.4802827,
51.427478,
51.414314,
51.488669,
51.4251047,
51.514982,
...
"BESTLINKTEXT": [
"One Tower Bridge",
"Woodberry Down",
"Heritage Walk",
"Sovereign Court",
"One Blackfriars",
"250 City Road",
"Taplow Riverside",
"Smithfield Square",
"Royal Wells Park",
"Battersea Reach",
"Vista, Chelsea Bridge",
"Woodhurst Park",
"Queenshurst",
"Kew Bridge",
"Green Park Village",
"Fiennes Park",
"The Corniche",
"Dickens Yard",
"375 Kensington High Street",
...
"FRIENDLYURL": [
"\/new-homes\/london\/tower-bridge\/one-tower-bridge",
"\/new-homes\/london\/finsbury-park\/woodberry-down",
"\/new-homes\/london\/kew-bridge\/heritage-walk",
"\/new-homes\/london\/hammersmith\/sovereign-court",
"\/new-homes\/london\/southwark\/one-blackfriars",
"\/new-homes\/london\/islington\/250-city-road",
"\/new-homes\/buckinghamshire\/taplow\/taplow-riverside",
"\/new-homes\/london\/hornsey\/smithfield-square",
"\/new-homes\/kent\/royal-tunbridge-wells\/royal-wells-park",
"\/new-homes\/london\/battersea\/battersea-reach",
"\/new-homes\/london\/battersea\/vista-chelsea-bridge",
"\/new-homes\/berkshire\/warfield\/woodhurst-park",
"\/new-homes\/london\/kingston\/queenshurst",
"\/new-homes\/london\/kew-bridge\/kew-bridge",
"\/new-homes\/berkshire\/reading\/green-park-village",
"\/new-homes\/berkshire\/maidenhead\/fiennes-park",
"\/new-homes\/london\/albert-embankment\/the-corniche",
"\/new-homes\/london\/ealing\/dickens-yard",
"\/new-homes\/london\/kensington\/375-kensington-high-street",
"\/new-homes\/london\/vauxhall\/riverlight",
"\/new-homes\/west-sussex\/barns-green\/the-ashmiles",
"\/new-homes\/berkshire\/reading\/kennet-island",
我找到了62个元素。 (使用BS for python,但这是完全相同的) – njzk2
是否打印所有62个元素? – Alex
是的,他们都62打印 – njzk2