说我有一个CSV与数百列(逗号分隔)文件:如何合并几百列在CSV与Python文件
ID,Column1,Column2,...,Column700
1,data,,...,
2,,data,...,
...
700,,,...,data
如何合并列,这样我都一列中的“数据”? CSV文件是从JSON文件中创建从这里:https://nvd.nist.gov/vuln/data-feeds#JSON_FEED
这里是我使用(从另一个SO后)从JSON转换为CSV代码:
def to_string(s):
try:
return str(s)
except:
#Change the encoding type if needed
return s.encode('utf-8')
def reduce_item(key, value):
global reduced_item
#Reduction Condition 1
if type(value) is list:
i=0
for sub_item in value:
reduce_item(key+'_'+to_string(i), sub_item)
i=i+1
#Reduction Condition 2
elif type(value) is dict:
sub_keys = value.keys()
for sub_key in sub_keys:
reduce_item(key+'_'+to_string(sub_key), value[sub_key])
#Base Condition
else:
reduced_item[to_string(key)] = to_string(value)
if __name__ == "__main__":
if len(sys.argv) == 4: #original was !=
print ("\nUsage: python json_to_csv.py <node_name> <json_in_file_path> <csv_out_file_path>\n")
#print ("Trying this without command line arguments")
else:
#Reading arguments
#node = sys.argv[1]
#json_file_path = sys.argv[2]
#csv_file_path = sys.argv[3]
node = "CVE_Items"
json_file_path = "some\file.json"
csv_file_path = "some\file.csv"
fp = open(json_file_path, 'r')
json_value = fp.read()
raw_data = json.loads(json_value)
try:
data_to_be_processed = raw_data[node]
except:
data_to_be_processed = raw_data
processed_data = []
header = []
for item in data_to_be_processed:
reduced_item = {}
reduce_item(node, item)
header += reduced_item.keys()
processed_data.append(reduced_item)
header = list(set(header))
header.sort()
with open(csv_file_path, 'w', newline='') as f:
writer = csv.DictWriter(f, header, quoting=csv.QUOTE_ALL)
writer.writeheader()
for row in processed_data:
writer.writerow(row)
,这里是一个样本从JSON文件条目:
{
"CVE_data_type" : "CVE",
"CVE_data_format" : "MITRE",
"CVE_data_version" : "4.0",
"CVE_data_numberOfCVEs" : "6208",
"CVE_data_timestamp" : "2017-08-14T18:06Z",
"CVE_Items" : [ {
"cve" : {
"CVE_data_meta" : {
"ID" : "CVE-2003-1547"
},
"affects" : {
"vendor" : {
"vendor_data" : [ {
"vendor_name" : "francisco_burzi",
"product" : {
"product_data" : [ {
"product_name" : "php-nuke",
"version" : {
"version_data" : [ {
"version_value" : "6.5"
}, {
"version_value" : "6.5_beta1"
}, {
"version_value" : "6.5_rc3"
}, {
"version_value" : "6.5_rc2"
}, {
"version_value" : "6.5_rc1"
} ]
}
} ]
}
} ]
}
},
"problemtype" : {
"problemtype_data" : [ {
"description" : [ {
"lang" : "en",
"value" : "CWE-79"
} ]
} ]
},
"references" : {
"reference_data" : [ {
"url" : "http://secunia.com/advisories/8478"
}, {
"url" : "http://securityreason.com/securityalert/3718"
}, {
"url" : "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded"
}, {
"url" : "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded"
}, {
"url" : "http://www.securityfocus.com/bid/7248"
}, {
"url" : "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675"
} ]
},
"description" : {
"description_data" : [ {
"lang" : "en",
"value" : "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter."
} ]
}
},
"configurations" : {
"CVE_data_version" : "4.0",
"nodes" : [ {
"operator" : "OR",
"cpe" : [ {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_beta1",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc1",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc2",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc3",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*"
} ]
} ]
},
"impact" : {
"baseMetricV2" : {
"cvssV2" : {
"vectorString" : "(AV:N/AC:M/Au:N/C:N/I:P/A:N)",
"accessVector" : "NETWORK",
"accessComplexity" : "MEDIUM",
"authentication" : "NONE",
"confidentialityImpact" : "NONE",
"integrityImpact" : "PARTIAL",
"availabilityImpact" : "NONE",
"baseScore" : 4.3
},
"severity" : "MEDIUM",
"exploitabilityScore" : 8.6,
"impactScore" : 2.9,
"obtainAllPrivilege" : false,
"obtainUserPrivilege" : false,
"obtainOtherPrivilege" : false,
"userInteractionRequired" : true
}
},
"publishedDate" : "2003-12-31T05:00Z",
"lastModifiedDate" : "2017-08-08T01:29Z"
} ]
}
如果您输入简单的输入和输出示例,在课程示例中省略700列的事实将有所帮助;) –
您是否有任何已在解决方案上启动的Python代码?为了将JSON转换为CSV,您可以直接将JSON数据作为PHP中的对象,并将其转储为CSV格式,只需几行即可。你可以在Python中做同样的事情。 –
@Aron - 根据要求添加样品。我目前没有使用PHP的选项。 –