2016-08-18 64 views
0

样本数据:如何合并行的阵列Python3,CSV

 
id, Name, mail, data1, data2, data3 
1, Name1, [email protected], abc, 14, de 
1, Name1, [email protected], fgh, 25, kl 
1, Name1, [email protected], mno, 38, pq 
2, Name2, [email protected], abc, 14, d 

我写选择第一场的脚本是一个独特的字符串来清除重复。但是,由于字段date1-3中的数据不重复,所以有必要得出结果:

1,Name1,mail @ com,“abc,14,de,fgh,25,k1, mno,38,pq“

如何合并数组中的行? 我的代码不能正常工作:

import sys 
import csv 

in_fln = sys.argv[1] 
# You can replace here and choose any delimiter: 
csv.register_dialect('dlm', delimiter=',') 
csv.register_dialect('dmt', delimiter=';') 
# if this .csv file do: 
if (in_fln[-3:]) == "csv": 
    out_fln = 'out' + in_fln 
    inputf = open(in_fln, 'r') 
    seen = [] 
    outfile = [] 
    nout = {} 
    #rowun = [] 
    try: 
     reader = csv.reader(inputf, dialect='dlm') 
     # select by ContactID 
     for row in reader: 
      if row[0] not in seen: 
       #IT'S work byt temp comment 
       #rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"' 
       #outfile.append(row[:-5]+[rowun]) 
       outfile.append(row[:-4]) 
       rowun = (row[0]) 
       nout[rowun] = (row[-4:-1]) 
       seen.append(row[0]) 
       print (type(row)) 
      else: 
       #rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'    
       #nout.insert(-1,(row[-4:-1])) 
       print (type(row)) 
       rowun = (row[0]) 
       rowun2 = {rowun:(row[-4:-1])} 
       nout.update(rowun2) 


    finally: 
     #print (nout) 
     #print (outfile[:-1]) 
     #csv.writer(open(('nout' + in_fln), 'w', newline='')).writerows(nout) 
     csv.writer(open(out_fln, 'w', newline=''), dialect='dlm').writerows(outfile) 
     inputf.close() 
     print ("All done") 

回答

0

这应该可以做到。

from collections import defaultdict 
import pandas as pd 


# recreate your example 
df = pd.DataFrame([[1, 'Name1', '[email protected]', 'abc', 14, 'de'], 
        [1, 'Name1', '[email protected]', 'fgh', 25, 'kl'], 
        [1, 'Name1', '[email protected]', 'mno', 38, 'pq'], 
        [2, 'Name2', '[email protected]', 'abc', 14, 'd'] 
        ], columns=['id', 'Name', 'mail', 'data1', 'data2','data3']) 

res = defaultdict(list) 

for ind, row in df.iterrows(): 
    key = (row['id'], row['Name'], row['mail']) 
    value = (row['data1'], row['data2'], row['data3']) 
    res[key].append(value) 

for key, value in res.items(): 
    print(key, value) 

# gives 
# (2, 'Name2', '[email protected]') [('abc', 14, 'd')] 
# (1, 'Name1', '[email protected]') [('abc', 14, 'de'), ('fgh', 25, 'kl'), ('mno', 38, 'pq')] 
0

我自己的版本是非常接近的尤为明显:

现在所有的工作!

#!/usr/bin/env python3 
import csv, re 
import os, sys 
in_fln = sys.argv[1] 
# You can replace here and choose any delimiter: 
#csv.register_dialect('dlm', delimiter=',') 
dm = ',' 
seen = [] 
# if this .csv file do: 
if (in_fln[-3:]) == "csv": 
    out_fln = 'out' + in_fln 
    #create the full structure: output_rows 
    infile = csv.reader(open(in_fln, 'r'), delimiter=dm, quotechar='"') 
    output_rows = [] 
    for row in infile: 
     a = 0 
     if row[0] not in seen: 
      seen.append(row[0]) 
      output_rows.append(row[:-4]) 
      #rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"' 
      rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2] 
      output_rows.append([rowun]) 
     else: 
      #output_rows.append([row[-4], row[-3], row[-2]]) 
      #rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"' 
      rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2] 
      #output_rows.insert(-1,[rowun]) 
      #rowun = str(rowun) 
      #print (rowun) 
      output_rows[-1].append(rowun) 
    #Finally save it to a file 
    csv.writer(open(out_fln, 'w', newline=''), delimiter=dm, quotechar='"').writerows(output_rows) 
chng = [ 
['","',','], # chng "," on , 
['\n"',',"'], # Del new str 
] 
input_file = open(out_fln).read() 
output_file = open(out_fln,'w') 

for string in chng: 
    input_file = re.sub(str(string[0]),str(string[1]),input_file) 

output_file.write(input_file) 
output_file.close() 
print ("All done")