2017-07-17 35 views
1

我有以下结构的n个JSON对象数据转储:如何从嵌套的Python字典中的列表中删除一个标记并添加列表?

{ 
    "_id" : {"$numberLong" : "734702956751294464" }, 
    "created_at" : "Mon May 23 11:10:09 +0000 2016", 
    "entities" : { 
     "user_mentions" : [ { 
      "name" : "Thierry Zoller", 
      "id" : 15589731, 
      "indices" : [ 3, 17 ], 
      "screen_name" : "thierryzoller", 
      "id_str" : "15589731" 
      } ], 
     "media" : [ { 
      "source_status_id" : { "$numberLong" : "734677772963041280" }, 
      "url" : "XXXXX", 
      "source_user_id_str" : "15589731", 
      "source_user_id" : 15589731, 
      "id" : { "$numberLong" : "734677772703019008" }, 
      "type" : "photo", 
      "id_str" : "734677772703019008" 
      } ], 
     "hashtags" : [] }, 
    "favorited" : false, 
    "in_reply_to_user_id_str" : null, 
    "extended_entities" : { 
     "media" : [ { 
      "source_status_id" : { "$numberLong" : "734677772963041280" }, 
      "media_url_https" : "XXXXX", 
      "url" : "XXXXX", 
      "source_user_id_str" : "15589731", 
      "source_user_id" : 15589731, 
      "indices" : [ 113, 136 ], 
      "display_url" : "pic.twitter.com/nO9tw2O4eY", 
      "id" : { "$numberLong" : "734677772703019008" }, 
      }] 
     } 
} 

我要删除“$ numberLong”键,“$”。

下面的代码消除了名单外键的“$”:

def rec_key_replace(obj): 
    if isinstance(obj, Mapping): 
     return {key.replace('$', ''): rec_key_replace(val) for key, val in obj.items()} 
    return obj 

如何延长这一功能,这样就可以删除密钥对所有“$”,即使他们是在嵌套列表(它可能是一个列表还包含其他列表等)?

谢谢。

+0

你来了递归,但你只处理'Mapping'类型案件。您需要通过该树中的其他对象和列表进行递归。 –

回答

2

您可以使用递归函数来运行数据结构并替换从'$'开始的字典密钥。

  1. 如果当前对象是一个列表,遍历它通过递归遍历包含的项目。

  2. 如果对象是一个字典,通过设置新的密钥与'$'字符更新开始与'$'符号的键上的汽提弹出旧密钥的值,然后通过字典值递归发现嵌套的字典或列表。字符串的值只是按原样返回。


from pprint import pprint 

def replace_keys(obj): 
    if isinstance(obj, list): 
     return [replace_keys(x) for x in obj] 
    elif isinstance(obj, dict): 
     return {k.lstrip('$') if k.startswith('$') else k: replace_keys(x) 
                 for k, v in obj.items()} 
    else: 
     return obj 

new_obj = replace_keys(obj) 
pprint(new_obj)

{'_id': {'numberLong': '734702956751294464'}, 
'created_at': 'Mon May 23 11:10:09 +0000 2016', 
'entities': {'hashtags': [], 
       'media': [{'id': {'numberLong': '734677772703019008'}, 
         'id_str': '734677772703019008', 
         'source_status_id': {'numberLong': '734677772963041280'}, 
         'source_user_id': 15589731, 
         'source_user_id_str': '15589731', 
         'type': 'photo', 
         'url': 'XXXXX'}], 
       'user_mentions': [{'id': 15589731, 
           'id_str': '15589731', 
           'indices': [3, 17], 
           'name': 'Thierry Zoller', 
           'screen_name': 'thierryzoller'}]}, 
'extended_entities': {'media': [{'display_url': 'pic.twitter.com/nO9tw2O4eY', 
            'id': {'numberLong': '734677772703019008'}, 
            'indices': [113, 136], 
            'media_url_https': 'XXXXX', 
            'source_status_id': {'numberLong': '734677772963041280'}, 
            'source_user_id': 15589731, 
            'source_user_id_str': '15589731', 
            'url': 'XXXXX'}]}, 
'favorited': 'false', 
'in_reply_to_user_id_str': 'null'} 
1
import json 
import re 

j = {'_id': {'$numberLong': '734702956751294464'}, 'in_reply_to_user_id_str': None, 'created_at': 'Mon May 23 11:10:09 +0000 2016', 'extended_entities': {'media': [{'id': {'$numberLong': '734677772703019008'}, 'url': 'XXXXX', 'source_status_id': {'$numberLong': '734677772963041280'}, 'media_url_https': 'XXXXX', 'display_url': 'pic.twitter.com/nO9tw2O4eY', 'source_user_id': 15589731, 'indices': [113, 136], 'source_user_id_str': '15589731'}]}, 'favorited': False, 'entities': {'hashtags': [], 'media': [{'id': {'$numberLong': '734677772703019008'}, 'url': 'XXXXX', 'source_status_id': {'$numberLong': '734677772963041280'}, 'type': 'photo', 'source_user_id': 15589731, 'id_str': '734677772703019008', 'source_user_id_str': '15589731'}], 'user_mentions': [{'id': 15589731, 'screen_name': 'thierryzoller', 'name': 'Thierry Zoller', 'indices': [3, 17], 'id_str': '15589731'}]}} 

jStr = json.dumps(j) 


for x in re.findall("""{["']\$[^"']*["':]""", jStr): 
    jStr = jStr.replace(x, re.sub('\$', '', x)) 

print(jStr) 
相关问题