2014-01-08 23 views
0

我想从网址获取所有网页内容,但我无法做到,有人帮助我。我无法从Android API中的网址获取所有内容16

这是我的代码:

public class HttpHelper { 
private static final int CONNECTION_TIMEOUT = 30000; 
private static final int SOCKET_TIMEOUT = 10000; 

public static String GET(String uri) { 
    HttpParams params = new BasicHttpParams(); 
    HttpConnectionParams.setConnectionTimeout(params, CONNECTION_TIMEOUT); 
    HttpConnectionParams.setSoTimeout(params, SOCKET_TIMEOUT); 
    HttpClient httpclient = new DefaultHttpClient(params); 
    HttpResponse response; 
    String data = ""; 
    try { 
     response = httpclient.execute(new HttpGet(uri)); 
     StatusLine statusLine = response.getStatusLine(); 

     if (statusLine.getStatusCode() == HttpStatus.SC_OK) { 
      ByteArrayOutputStream out = new ByteArrayOutputStream(); 
      response.getEntity().writeTo(out); 
      out.close(); 
      data = out.toString(); 
      Log.i("html", data); 
     } else { 
      // Closes the connection. 
      response.getEntity().getContent().close(); 
      throw new IOException(statusLine.getReasonPhrase()); 
     } 
    } catch (ClientProtocolException e) { 
    } catch (IOException e) { 
    } 
    return data; 
} 

}

在logcat中,它写道:


01-08 20:56:00.125: I/html(1248): <!doctype html><html itemscope="" itemtype="http://schema.org/WebPage"><head><meta itemprop="image" content="/images/google_favicon_128.png"><title>Google</title><script>(function(){ 

01-08 20:56:00.125: I/html(1248): window.google={kEI:"20rNUv2oKOLpiAeioYHADA",getEI:function(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||google.kEI},https:function(){return"https:"==window.location.protocol},kEXPI:"17259,4000116,4005204,4007278,4007661,4007830,4008067,4008133,4008142,4009033,4009103,4009352,4009565,4009641,4010077,4010806,4010858,4010899,4011063,4011228,4011258,4011524,4011559,4011863,4012001,4012096,4012190,4012275,4,4,4,4,4,4012415,4012427,4012512,4012523,4012526,4012538,4012547,4012691,4012836,4012851,4012869,4013104,4013140,4013312,4013374,4013378,4013414,4013416,4013433,4013474,4013513,4013567,4013667,4013668,4013669,4013671,4013672,4013701,4013723,4013782,4013787,4013794,4013838,4013842,4013846,4013853,4013920,4013960,4013979,4014054,4014057,4014097,4014179,4014215,4014316,8500073,8500075",kCSI:{e:"17259,4000116,4005204,4007278,4007661,4007830,4008067,4008133,4008142,4009033,4009103,4009352,4009565,4009641,4010077,4010806,4010858,4010899,4011063,4011228,4011258,4011524,4011559,4011863,4012001,4012096,4012190,4012275,4,4,4,4,4,4012415,4012427,4012512,4012523,4012526,4012538,4012547,4012691,4012836,4012851,4012869,4013104,4013140,4013312,4013374,4013378,4013414,4013416,4013433,4013474,4013513,4013567,4013667,4013668,4013669,4013671,4013672,4013701,4013723,4013782,4013787,4013794,4013838,4013842,4013846,4013853,4013920,4013960,4013979,4014054,4014057,4014097,4014179,4014215,4014316,8500073,8500075",ei:"20rNUv2oKOLpiAeioYHADA"},authuser:0,ml:function(){},kHL:"zh-CN",time:function(){return(new Date).getTime()},log:function(a,b,c,l,k){var d=new Image,f=google.lc,e=google.li,g="",h="gen_204";k&&(h= 

01-08 20:56:00.125: I/html(1248): k);d.onerror=d.onload=d.onabort=function(){delete f[e]};f[e]=d;c||-1!=b.search("&ei=")||(g="&ei="+google.getEI(l));c=c||"/"+h+"?atyp=i&ct="+a+"&cad="+b+g+"&zx="+google.time(); 

01-08 20:56:00.125: I/html(1248): a=/^http:/i;a.test(c)&&google.https()?(google.ml(Error("GLMM"),!1,{src:c}),delete f[e]):(d.src=c,google.li=e+1)},lc:[],li:0,Toolbelt:{},y:{},x:function(a,b){google.y[a.id]=[a,b];return!1},load:function(a,b,c){google.x({id:a+ 

01-08 20:56:00.125: I/html(1248): m++},function(){google.load(a,b,c)})}};var m=0;})(); 

01-08 20:56:00.125: I/html(1248): (function(){google.sn="webhp";google.timers={};google.startTick=function(a,b){google.timers[a]={t:{start:google.time()},bfr:!!b}};google.tick=function(a,b,g){google.timers[a]||google.startTick(a);google.timers[a].t[b]=g||google.time()};google.startTick("load",!0); 

01-08 20:56:00.125: I/html(1248): try{}catch(d){}})(); 

01-08 20:56:00.125: I/html(1248): var _gjwl=location;function _gjuc(){var a=_gjwl.href.indexOf("#");if(0<=a&&(a=_gjwl.href.substring(a),0<a.indexOf("&q=")||0<=a.indexOf("#q="))&&(a=a.substring(1),-1==a.indexOf("#"))){for(var d=0;d<a.length;){var b=d;"&"==a.charAt(b)&&++b;var c=a.indexOf("&",b);-1==c&&(c=a.length);b=a.substring(b,c);if(0==b.indexOf("fp="))a=a.substring(0,d)+a.substring(c,a.length),c=d;else if("cad=h"==b)return 0;d=c}_gjwl.href="/search?"+a+"&cad=h";return 1}return 0} 

01-08 20:56:00.125: I/html(1248): function _gjh(){!_gjuc()&&window.google&&google.x&&google.x({id:"GJH"},function(){google.nav&&google.nav.gjh&&google.nav.gjh()})}; 

01-08 20:56:00.125: I/html(1248): window._gjh&&_gjh();</script><style>#gb{font:13px/27px Arial,sans-serif;height:30px}#gbz,#gbg{position:absolute;white-space:nowrap;top:0;height:30px;z-index:1000}#gbz{left:0;padding-left:4px}#gbg{right:0;padding-right:5px}#gbs{background:transparent;position:absolute;top:-999px;visibility:hidden;z-index:998;right:0}.gbto #gbs{background:#fff}#gbx3,#gbx4{background-color:#2d2d2d;background-image:none;_background-image:none;background-position:0 -138px;background-repeat:repeat-x;border-bottom:1px solid #000;font-size:24px;height:29px;_height:30px;opacity:1;filter:alpha(opacity=100);position:absolute;top:0;width:100%;z-index:990}#gbx3{left:0}#gbx4{right:0}#gbb{position:relative}#gbbw{left:0;position:absolute;top:30px;width:100%}.gbtcb{position:absolute;visibility:hidden}#gbz .gbtcb{right:0}#gbg .gbtcb{left:0}.gbxx{display:none !important}.g 

我不明白,为什么很多输出倍。

顺便说一句,我的测试URL为“http://www.google.com

+0

具体问题是什么?你也忽略了你可能遇到的任何异常,至少记录它们。 – laalto

+0

我记录了异常,它什么也没有记录,也就是说,没有出现异常。 – lhking

回答

0

logcat的做了几件事情没有你的要求:

  1. 如果你给它一个很长的字符串,它会自动分解成多个logcat的输出消息。
  2. 如果你给它一个真的长字符串,它会在一些字符后截断它。我不知道确切的数字,但现在看来,这是超过4000

例如,你可以看看这些:

+0

非常感谢,它适合我! – lhking

+0

很高兴我能帮到你!你可以考虑接受这个答案作为解决方案,以便其他人更清楚地知道你的问题已经解决。 –

0

试试这个代码,

HttpResponse response = httpclient.execute(httpget); 
     in = new BufferedReader(new InputStreamReader(response.getEntity().getContent())); 
     StringBuffer sb = new StringBuffer(""); 
     String line = ""; 
     String NL = System.getProperty("line.separator"); 
     while ((line = in.readLine()) != null) {      
      sb.append(line + NL); 
     } 
     in.close(); 
     String data= sb.toString(); 
     Log.i("html", data); 

Hop e这可以帮助

+0

谢谢,我的类HttpHelper代码是对的,Log.i(“html”,data)有一些问题 – lhking

+0

是的,Log有一定的限制。它不会显示特定限制后的数据。 – Swapnil

相关问题