2014-02-13 30 views
1
#!/usr/bin/python 

import csv 
import re 

string_1 = ('OneTouch AT') 
string_2 = ('LinkRunner AT') 
string_3 = ('AirCheck') 

#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck'] 
print "hello Pythong! " 

#def does_match(string): 
# stringl = string.lower() 
# return any(s in stringl for s in searched) 

inFile = open('data.csv', "rb") 
reader = csv.reader(inFile) 
outFile = open('data2.csv', "wb") 
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL) 

for row in reader: 
    found = False 
    for col in row: 
     if col in [string_1, string_2, string_3] and not found: 
      writer.writerow(row) 
      found = True 


#for row in reader: 
# if any(does_match(col) for col in row): 
    #  writer.writerow(row[:2]) # write only 2 first columns 

inFile.close() 
outFile.close() 

我想弄清楚如何搜索3个项目的CSV文件。如果这些项目存在,则打印该行。理想情况下,我只希望列1和列3打印到新文件。使用python搜索CSV文件中的字符串并写入结果

示例数据文件

LinkRunner AT Video,10,20 
Wireless Performance Video OneTouch AT,1,2 
Wired OneTouch AT,200,300 
LinkRunner AT,200,300 
AirCheck,200,300 
+1

什么不起作用? – 2014-02-13 20:55:32

+0

目前唯一没有工作的是我只想打印出第一和第三列。理想情况下,它会首先打印空气检测线,然后Linkrunner线然后在线上打印。 – Kris

回答

3

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

试试这个:

import csv 

search_for = ['OneTouch AT','LinkRunner AT','AirCheck'] 

with open('in.csv') as inf, open('out.csv','w') as outf: 
    reader = csv.reader(inf) 
    writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) 
    for row in reader: 
     if row[0] in search_for: 
      print('Found: {}'.format(row)) 
      writer.writerow(row) 
0
#!/usr/bin/python 

import csv 
import numpy as np 

class search_csv(object): 
    def __init__(self, infile, outfile): 
     infile = open(infile, 'rb') 
     read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)] 
     self.non_numpy_data = read_infile 
     self.data = np.array(read_infile, dtype=None) 
     self.outfile = open(outfile, 'wb') 
     self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) 

    def write_to(self, matched_values): 
     self.writer_.writerows(matched_values) 
     print ' Matched Values Written ' 
     return True 

    def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck'] 
     find_these = np.array(items, dtype=None) 
     matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0] 
     matching_data = self.data[matching_y][:,return_cols] 
     self.write_to(matching_data) 
     self.outfile.close() 
     return True 

    def non_numpy_search(self, items, return_cols=[0,2]): 
     lst = [] 
     for i in self.non_numpy_data: 
      for ii in items: 
       if ii in i: 
        z = [] 
        for idx in return_cols: 
         z.append(i[idx]) 
        lst.append(z) 
       break 
     self.write_to(lst) 
     return True 


### now use the class ### 

SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck'] 

IN_FILE = 'in_file.csv' 
OUT_FILE = 'out_file.csv' 

non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR) 

通过你的问题我假设你只是想完成手头的任务,并不真正关心如何措辞。因此,请复制并粘贴此文件,并将您的数据文件用作'IN_FILE'值和要写入的文件名作为'OUT_FILE'值。完成后,将要搜索的值放入“SEARCHING_FOR”列表中。

注意事项.... SEARCHING_FOR应该是一个列表。

SEARCHING_FOR中的值完全匹配,因此'A'不匹配'a'。如果你想使用正则表达式或更复杂的东西让我知道。

函数'non_numpy_search'中有'return_cols'参数。它默认为第一列和第三列。

如果你没有numpy让我知道。

+0

我得到了以下信息:回溯(最近通话最后一个): 文件 “csvparserV2.0.py” 34行,在 search_csv(in_file中,OUT_FILE).searcher(SEARCHING_FOR) 文件“csvparserV2.0.py “,第21行,在搜索器中 matching_y = np.in1d(self.data,find_these).reshape(self.data.shape).nonzero()[0] 文件”/System/Library/Frameworks/Python.framework/版本/ 2.7/Extras/lib/python/numpy/lib/arraysetops.py“,第335行,in1d order = ar.argsort(kind ='mergesort') TypeError:请求排序不适用于类型 – Kris

+0

你知道吗你正在运行哪个版本的numpy?打印n.p .__版本___ –

+0

如果文件不是巨大的,我刚刚发布的编辑应该可以正常工作。 –

0
#!/usr/bin/python 

import csv 
import re 
import sys 
import gdata.docs.service 


#string_1 = ('OneTouch AT') 
#string_2 = ('LinkRunner AT') 
#string_3 = ('AirCheck') 

searched = ['aircheck', 'linkrunner at', 'onetouch at'] 

def find_group(row): 
    """Return the group index of a row 
     0 if the row contains searched[0] 
     1 if the row contains searched[1] 
     etc 
     -1 if not found 
    """ 
    for col in row: 
     col = col.lower() 
     for j, s in enumerate(searched): 
      if s in col: 
       return j 
     return -1 

def does_match(string): 
    stringl = string.lower() 
    return any(s in stringl for s in searched) 

#Opens Input file for read and output file to write. 
inFile = open('data.csv', "rb") 
reader = csv.reader(inFile) 
outFile = open('data2.csv', "wb") 
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL) 


#for row in reader: 
# found = False 
# for col in row: 
#  if col in [string_1, string_2, string_3] and not found: 
#   writer.writerow(row) 
#   found = True 



"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1), 
    one stores a triple (1, 12, row) 
    When the triples are sorted later, all rows in group 0 will come first, then 
    all rows in group 1, etc. 
""" 
stored = [] 
for i, row in enumerate(reader): 
    g = find_group(row) 
    if g >= 0: 
     stored.append((g, i, row)) 
stored.sort() 

for g, i, row in stored: 
    writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5 

#for row in reader: 
# if any(does_match(col) for col in row): 
    #  writer.writerow(row[:2]) # write only 2 first columns 

# Closing Input and Output files. 
inFile.close() 
outFile.close() 
+0

经过一些谷歌搜索多一点阅读我想出了另一种方式来做到这一点。 :-)目标已经完成。下一个目标是更新Google文档的电子表格。 – Kris

相关问题