2015-01-15 50 views
3

我想发出关键字或标签的查询,并从包含关键字的所有tweets中检索所有图像。我可以使用Twitter4J和Java来轻松发出查询并检索最终的推文。我知道我可以在浏览器中访问的http://t.co/xxxx链接并查看关联图像。该图像位于https://pbs.twimg.com/xxxxx。所以,似乎我所要做的就是在我的代码中进行的过程!如何使用Twitter4J在推文中检索图像?

我可以轻松解析每条推文中的http://t.co/xxxx链接。但是,当我从该链接检索所有的HTML,我没有看到任何https://pbs.twimg.com/xxxx图像:(我觉得发生了什么是Twitter正在通过JavaScript加载这些图像

有没有什么办法可以轻松地检索图像每个鸣叫?

这是我到目前为止有:

package com.company; 

import twitter4j.*; 
import twitter4j.conf.ConfigurationBuilder; 

import java.io.BufferedReader; 
import java.io.InputStreamReader; 
import java.net.URL; 
import java.util.regex.Matcher; 
import java.util.regex.Pattern; 

public class Main { 

    public static void main(String[] args) throws Exception { 
     ConfigurationBuilder cb = new ConfigurationBuilder(); 
     cb.setDebugEnabled(true) 
       .setOAuthConsumerKey("xxxxxxxxxx") 
       .setOAuthConsumerSecret("xxxxxxxxxxxx") 
       .setOAuthAccessToken("xxxxxxxxx-xxx-xxxxxxxx") 
       .setOAuthAccessTokenSecret("xxxxxxxxxxxxxxxxxxx"); 
     TwitterFactory tf = new TwitterFactory(cb.build()); 
     Twitter twitter = tf.getInstance(); 
     Query query = new Query("#hashtag"); 
     QueryResult result = twitter.search(query); 
     Pattern pattern = Pattern.compile("http://t.co/\\w{10}"); 
     Pattern imagePattern = Pattern.compile("https\\:\\/\\/pbs\\.twimg\\.com/media/\\w+\\.(png | jpg | gif)(:large)?"); 
     for (Status status : result.getTweets()) { 
      if (status.isRetweet()) 
       continue; 
      System.out.println("@" + status.getUser().getScreenName() + ":" + status.getText()); 
      Matcher matcher = pattern.matcher(status.getText()); 

       if (matcher.find()) { 
        System.out.println("found a t.co url"); 
        URL oracle = new URL(matcher.group()); 
        BufferedReader in = new BufferedReader(
          new InputStreamReader(oracle.openStream())); 

        String inputLine; 
        while ((inputLine = in.readLine()) != null) { 
         matcher = imagePattern.matcher(inputLine); 

         if (matcher.find()) 
          System.out.println("YAYAAYAYAYYAYAYAYAYAYAYAYAYAAYAYYAYAAYYAYAYAYA: " + matcher.group()); 
        } 

        in.close(); 

      } 

     } 
    } 
} 

回答

12

有如果有鸣叫的图像插入您可以使用getMediaEntities()得到的鸣叫检索图像
简单的方法。媒体数据,然后用getMediaURL() 0检索url 你应该做这样的事情

MediaEntity[] media = status.getMediaEntities(); //get the media entities from the status 
for(MediaEntity m : media){ //search trough your entities 
    System.out.println(m.getMediaURL()); //get your url! 
} 
0

下载所有媒体在twitter4J状态

for (MediaEntity m : medias) { 
      try { 
       URL url = new URL(m.getMediaURL()); 
       InputStream in = new BufferedInputStream(url.openStream()); 
       ByteArrayOutputStream out = new ByteArrayOutputStream(); 
       byte[] buf = new byte[1024]; 
       int n = 0; 
       while (-1 != (n = in.read(buf))) { 
        out.write(buf, 0, n); 
       } 
       out.close(); 
       in.close(); 
       byte[] response = out.toByteArray(); 
       FileOutputStream fos = new FileOutputStream(file.getAbsolutePath() + "\\" + m.getId() + "." + getExtension(m.getType())); 
       fos.write(response); 
       fos.close(); 
      } catch (Exception ex) { 
       ex.printStackTrace(); 
      } 
     } 

拿到文件扩展名

private String getExtension(String type) { 
     if (type.equals("photo")) { 
      return "jpg"; 
     } else if (type.equals("video")) { 
      return "mp4"; 
     } else if (type.equals("animated_gif")) { 
      return "gif"; 
     } else { 
      return "err"; 
     } 
    } 
相关问题