2015-06-16 134 views
1

前段时间,我找到了一个Python脚本来导入twitter消息并将它们写入一个.JSON文件。问题是根据我的JSON查看器,这不是有效的JSON格式。该脚本是:在Python中,如何阅读Twitter消息并创建.JSON文件

import twitter 

twitter_api = twitter.Twitter(auth=auth) 
q = "question" 
count = 200 


search_results = twitter_api.search.tweets(q=q, count=count, lang="nl") 
statuses = search_results['statuses'] 


for _ in range(25): 
    try: 
     next_results = search_results['search_metadata']['next_results'] 
    except KeyError, e: # No more results when next_results doesn't exist 
     break 

    kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ]) # Create a dictionary from the query string params 
    search_results = twitter_api.search.tweets(**kwargs) 
    statuses += search_results['statuses'] 

import io 
import json 
import twitter 


QUERY = 'question' 

# The file to write output as newline-delimited JSON documents 
OUT_FILE = question + "5.json" 


# Write one tweet per line as a JSON document. 

with io.open(OUT_FILE, 'w', encoding='utf-8', buffering=1) as f: 
    for tweet in statuses: 
     f.write(unicode(u'{0}\n'.format(json.dumps(tweet, ensure_ascii=False)))) 
     print tweet['text'] 

第一Twitter信息如下所示:

[{u'contributors ':无,u'truncated':虚假,u'text ':U' @荷兰银行Bedankt voor de goede en snelle服务! !',u'is_quote_status':False,u'in_reply_to_status_id':None,u'id':610448080702439424L,u'favorite_count':0,u'source':u'Twitter Android',u'retweeted':假,u'coordinates ':无,u'entities':{u'symbols ':[],u'user_mentions':[{u'id ':2270841,u'indices':[0,8]中,u” id_str':u'2270841',u'screen_name':u'ABNAMRO',u'name':u'ABN AMRO'}],u'hashtags':[],u'urls':[]},u 'in_reply_to_screen_name':u'ABNAMRO 'u'in_reply_to_user_id ':2270841,u'retweet_count':0,u'id_str ':u'610448080702439424',u'favorited ':虚假,u'user':{u'follow_request_sent' :假,u'profile_use_background_image ':真,u'default_profile_image':虚假,u'id ':130104974,u'profile_background_image_url_https':U 'https://abs.twimg.com/images/themes/theme6/bg.gif',u'verified ':虚假,u'profile_text_color':u'333333' ,'u'profile_image_url_https':u'https://pbs.twimg.com/profile_images/457174881499889665/IkudfAL6_normal.jpeg',u'profile_sidebar_fill_color':u'A0C5C7',u'entities':{u'description':{u'urls':[]}},u'followers_count':15, u'profile _sidebar_border_color ':u'86A4A6',u'id_str ':u'130104974',u'profile_background_color ':u'709397',u'listed_count ':0,u'is_translation_enabled':虚假,u'utc_offset“:10800,U 'statuses_count':37,u'description ':u'Trotse moeder面包车2个 dochters!',u'friends_count ':52,u'location':u'Delft”,u'profile_link_color ':u'FF3300', u'profile_image_url ':U' http://pbs.twimg.com/profile_images/457174881499889665/IkudfAL6_normal.jpeg 'u'following ':虚假,u'geo_enabled':虚假,u'profile_background_image_url ':U' http://abs.twimg.com/images/themes/theme6/bg.gif',u'screen_name ':u'Deborah_81',u'lang':U 'NL',u'profile_background_tile ':虚假,u'favourites_count':2,u'name ':u'Deborah',u'notifications ':虚假,u'url':无,u'created_at':u'Tue 年4月6 9点37分51秒+0000 2010' ,u'contributors_enabled ':虚假,u'time_zone':u'Athens',u'protected ':虚假,u'default_profile':虚假,u'is_translator“: False},u'geo':None,u'in_reply_to_user_id_str':u'2270841',u'lang':u'nl',u'created_at':u'Mon Jun 15 14:05:38 +0000 2015',u'in_reply_to_status_id_str':None,u'place':None,u'metadata':{u'iso_language_code':u'nl',u'result_type':u 'recent'}}]

我该如何格式化消息才能正确显示为JSON?

+1

“换行分隔”“每行一个鸣叫”不是有效的JSON格式。你需要把它们放在一个数组中,用[]包围并用逗号分隔。为什么不直接将整个'状态'转储到JSON? –

回答

0

我看到的问题是,在将数据写入json格式的文件之前,您正在对其进行编码,这会给您带来错误。这应该可以解决它:

import twitter 
import json 

ckey = 'Your consumer key' 
csecret = 'your consumer secret' 
atoken = 'your token' 
asecret = 'your secret token' 

auth = twitter.oauth.OAuth(atoken, asecret, ckey, csecret) 

twitter_api = twitter.Twitter(auth=auth) 

q = "question" 
count = 200 


search_results = twitter_api.search.tweets(q=q, count=count, lang="nl") 

statuses = search_results['statuses'] 


for _ in range(25): 
    try: 
     next_results = search_results['search_metadata']['next_results'] 
    except KeyError, e: # No more results when next_results doesn't exist 
     break 

    kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ]) # Create a dictionary from the query string params 
    search_results = twitter_api.search.tweets(**kwargs) 
    statuses += search_results['statuses'] 


# Obtain the text 
status_texts = [ status['text'] 
       for status in statuses ] 

# The file to write output as newline-delimited JSON documents 
OUT_FILE = q + "5.json" 


# Write one tweet per line as a JSON document. 
with open(OUT_FILE, 'a') as outfile: 
    json.dump(status_texts[0:], outfile) 

它在这个SO问题中提及,以及Mining the Social Web, 2nd Edition

+0

嗨,Leb,我复制你的代码,但它gerenate不是所需的文件。 –

+0

它产生了什么? – Leb

+0

没有。该脚本没有创建一个文件 –