2017-08-17 70 views
-1

作为新的Python,并给出以下职位的答案:解析嵌套的JSON并将其写入到CSV(重返)

Parsing nested JSON and writing it to CSV

如何定义这个代码的工作输入文件?我知道我必须将“outputfile”定义为我正在写入的路径/文件名,但我只是不知道输入文件应该放在哪里?

编辑:添加为清楚说明,我有一个JSON文件的输入,并希望将其转换为CSV文件作为输出。我只是想知道如何编写代码(从上面的例子),并指定一个特定的JSON文件作为输入。同样为了清楚起见,JSON文件的名称将保持不变,但内容每天都会更改,所以我只需要知道在哪里放置open() 以及如何在脚本中调用它。

EDIT_2:

inputfile = "/some/file.json" 
outputfile = "/some/file.csv" 
with open(inputfile, 'r') as inf: 
    with open(outputfile, 'w') as outf: 
     writer = None # will be set to a csv.DictWriter later 
      fp = open(inputfile, 'r') 
      json_value = fp.read() 
      data = json.loads(json_value) 

     for key, item in sorted(data.items(), key=itemgetter(0)): 
      row = {} 
      nested_name, nested_items = '', {} 
      for k, v in item.items(): 
       if not isinstance(v, dict): 
        row[k] = v 
       else: 
        assert not nested_items, 'Only one nested structure is supported' 
        nested_name, nested_items = k, v 

      if writer is None: 
       # build fields for each first key of each nested item first 
       fields = sorted(row) 

       # sorted keys of first item in key sorted order 
       nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1]) 
       fields.extend('__'.join((nested_name, k)) for k in nested_keys) 

       writer = csv.DictWriter(outf, fields) 
       writer.writeheader() 

      for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)): 
       row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items()) 
       writer.writerow(row) 

我得到的错误是...

for k, v in item.items(): 

AttributeError的: '名单' 对象有没有属性 '项目'

我想我可能不会正确读取JSON文件... Python新手压力源。

EDIT_3(更新JSON结构): 这里从JSON文件中的一个 '入口' 我使用(NIST/NVD JSON文件)

{ 
     "CVE_data_type" : "CVE", 
     "CVE_data_format" : "MITRE", 
     "CVE_data_version" : "4.0", 
     "CVE_data_numberOfCVEs" : "6208", 
     "CVE_data_timestamp" : "2017-08-14T18:06Z", 
     "CVE_Items" : [ { 
     "cve" : { 
      "CVE_data_meta" : { 
      "ID" : "CVE-2003-1547" 
      }, 
      "affects" : { 
      "vendor" : { 
       "vendor_data" : [ { 
       "vendor_name" : "francisco_burzi", 
       "product" : { 
        "product_data" : [ { 
        "product_name" : "php-nuke", 
        "version" : { 
         "version_data" : [ { 
         "version_value" : "6.5" 
         }, { 
         "version_value" : "6.5_beta1" 
         }, { 
         "version_value" : "6.5_rc3" 
         }, { 
         "version_value" : "6.5_rc2" 
         }, { 
         "version_value" : "6.5_rc1" 
         } ] 
        } 
        } ] 
       } 
       } ] 
      } 
      }, 
      "problemtype" : { 
      "problemtype_data" : [ { 
       "description" : [ { 
       "lang" : "en", 
       "value" : "CWE-79" 
       } ] 
      } ] 
      }, 
      "references" : { 
      "reference_data" : [ { 
       "url" : "http://secunia.com/advisories/8478" 
      }, { 
       "url" : "http://securityreason.com/securityalert/3718" 
      }, { 
       "url" : "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded" 
      }, { 
       "url" : "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded" 
      }, { 
       "url" : "http://www.securityfocus.com/bid/7248" 
      }, { 
       "url" : "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675" 
      } ] 
      }, 
      "description" : { 
      "description_data" : [ { 
       "lang" : "en", 
       "value" : "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter." 
      } ] 
      } 
     }, 
     "configurations" : { 
      "CVE_data_version" : "4.0", 
      "nodes" : [ { 
      "operator" : "OR", 
      "cpe" : [ { 
       "vulnerable" : true, 
       "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5", 
       "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*" 
      }, { 
       "vulnerable" : true, 
       "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_beta1", 
       "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*" 
      }, { 
       "vulnerable" : true, 
       "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc1", 
       "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*" 
      }, { 
       "vulnerable" : true, 
       "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc2", 
       "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*" 
      }, { 
       "vulnerable" : true, 
       "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc3", 
       "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*" 
      } ] 
      } ] 
     }, 
     "impact" : { 
      "baseMetricV2" : { 
      "cvssV2" : { 
       "vectorString" : "(AV:N/AC:M/Au:N/C:N/I:P/A:N)", 
       "accessVector" : "NETWORK", 
       "accessComplexity" : "MEDIUM", 
       "authentication" : "NONE", 
       "confidentialityImpact" : "NONE", 
       "integrityImpact" : "PARTIAL", 
       "availabilityImpact" : "NONE", 
       "baseScore" : 4.3 
      }, 
      "severity" : "MEDIUM", 
      "exploitabilityScore" : 8.6, 
      "impactScore" : 2.9, 
      "obtainAllPrivilege" : false, 
      "obtainUserPrivilege" : false, 
      "obtainOtherPrivilege" : false, 
      "userInteractionRequired" : true 
      } 
     }, 
     "publishedDate" : "2003-12-31T05:00Z", 
     "lastModifiedDate" : "2017-08-08T01:29Z" 
     }] 
} 

我想关键是主题(如lastModifiedDate,cpe23Uri等)。我可以过滤出空白区域,然后选择我想要的列,只要我有CSV文件中的标题和数据。

+0

@MartjinPieters ......你接的链接的问题......你能回答这个问题吗? :p –

+0

该问题假设您已经拥有列表中的数据,您可以以任何您想要的方式获取数据。它不必来自输入文件,它可以来自您执行的计算。 – Barmar

+0

如果你想从另一个文件中获取它,只需编写代码来打开该文件并以适当的方式解析它。 – Barmar

回答

0

幸运的是,您的JSON数据足够有效,以便json.load()可以读取和解析....但只是说您想将这些键作为标题并不够具体 - 每个条目的不同级别都有很多'(如你将在下面看到的)。请注意,链接问题的OP不仅定义了输入,还特别定义了它中的数据如何映射到CSV文件中具有同样显示格式的值列 - 而不仅仅是一些关于映射键的手势到文件头。

无论如何,这里有些东西可以帮助你做到这一点。它将读取与正在阅读的JSON对象中的顶级"CVE_Items"键相关联的列表中的每个“条目”,并将其打印出来,格式良好。从输出中,您应该能够挑选出想要提取的列,并将其作为行写入CSV文件,并可以填写代码。

import json 

inputfile = "some_file.json" 
outputfile = "some_file.csv" 

with open(outputfile, 'w', newline='') as outf: 
    with open(inputfile, 'r') as fp: 
     data = json.load(fp) 

    # Here is where you should convert each entry into a row of CSV data. 
    # All this does now is show the contents of each entry in "CVE_Items" list. 
    for entry in data["CVE_Items"]: 
     print(json.dumps(entry, indent=4)) 

输出为您添加到您的问题样本JSON数据的单个条目:

{ 
    "cve": { 
     "CVE_data_meta": { 
      "ID": "CVE-2003-1547" 
     }, 
     "affects": { 
      "vendor": { 
       "vendor_data": [ 
        { 
         "vendor_name": "francisco_burzi", 
         "product": { 
          "product_data": [ 
           { 
            "product_name": "php-nuke", 
            "version": { 
             "version_data": [ 
              { 
               "version_value": "6.5" 
              }, 
              { 
               "version_value": "6.5_beta1" 
              }, 
              { 
               "version_value": "6.5_rc3" 
              }, 
              { 
               "version_value": "6.5_rc2" 
              }, 
              { 
               "version_value": "6.5_rc1" 
              } 
             ] 
            } 
           } 
          ] 
         } 
        } 
       ] 
      } 
     }, 
     "problemtype": { 
      "problemtype_data": [ 
       { 
        "description": [ 
         { 
          "lang": "en", 
          "value": "CWE-79" 
         } 
        ] 
       } 
      ] 
     }, 
     "references": { 
      "reference_data": [ 
       { 
        "url": "http://secunia.com/advisories/8478" 
       }, 
       { 
        "url": "http://securityreason.com/securityalert/3718" 
       }, 
       { 
        "url": "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded" 
       }, 
       { 
        "url": "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded" 
       }, 
       { 
        "url": "http://www.securityfocus.com/bid/7248" 
       }, 
       { 
        "url": "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675" 
       } 
      ] 
     }, 
     "description": { 
      "description_data": [ 
       { 
        "lang": "en", 
        "value": "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter." 
       } 
      ] 
     } 
    }, 
    "configurations": { 
     "CVE_data_version": "4.0", 
     "nodes": [ 
      { 
       "operator": "OR", 
       "cpe": [ 
        { 
         "vulnerable": true, 
         "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5", 
         "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*" 
        }, 
        { 
         "vulnerable": true, 
         "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_beta1", 
         "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*" 
        }, 
        { 
         "vulnerable": true, 
         "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc1", 
         "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*" 
        }, 
        { 
         "vulnerable": true, 
         "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc2", 
         "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*" 
        }, 
        { 
         "vulnerable": true, 
         "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc3", 
         "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*" 
        } 
       ] 
      } 
     ] 
    }, 
    "impact": { 
     "baseMetricV2": { 
      "cvssV2": { 
       "vectorString": "(AV:N/AC:M/Au:N/C:N/I:P/A:N)", 
       "accessVector": "NETWORK", 
       "accessComplexity": "MEDIUM", 
       "authentication": "NONE", 
       "confidentialityImpact": "NONE", 
       "integrityImpact": "PARTIAL", 
       "availabilityImpact": "NONE", 
       "baseScore": 4.3 
      }, 
      "severity": "MEDIUM", 
      "exploitabilityScore": 8.6, 
      "impactScore": 2.9, 
      "obtainAllPrivilege": false, 
      "obtainUserPrivilege": false, 
      "obtainOtherPrivilege": false, 
      "userInteractionRequired": true 
     } 
    }, 
    "publishedDate": "2003-12-31T05:00Z", 
    "lastModifiedDate": "2017-08-08T01:29Z" 
} 
+0

谢谢你的代码。如上所示,我可以打印。如何将这些行压缩成可以写入CSV文件的行? –

+0

我不知道如何将它弄平。正如我所说的,在链接问题中,OP指定了如何将JSON数据的各个部分转换为CSV行。 JSON主要是一个树形数据结构,而CSV是一个表格(或二维数组/矩阵),从一个到另一个的映射是任意的。我无法决定如何为你做这件事 - 但如果你至少可以定义你想要的,我可以告诉你如何实现它。一个非常重要的细节是如何将具有多个值的东西(如“version_data”)变成单行。 – martineau

+0

JSON文件中的数据似乎处于一个键值对中,有时对于给定键有多个值,如上面的“version_data:”所示。我想让这个对的“关键”部分成为列的标题,“值”(s)填充列作为数据。在一个键的多个值的情况下,我需要连接给定键的所有值。 –