2015-09-07 63 views
0

我正在阅读一堆日常文件,并使用glob将它们连接在一起形成单独的dataframes.I最终将它们连接在一起,基本上创建了一个用于连接到仪表板的单个大文件。我不太熟悉Python,但我经常使用熊猫和sklearn。如你所见,我基本上只是读取最后60(或更多)天的数据(最后60个文件)并为每个数据创建一个数据帧。这有效,但我想知道是否有更pythonic /更好的方式?我观看了关于pydata的视频(关于不被PEP 8限制并确保你的代码是pythonic),这很有趣。 (仅供参考 - 我需要阅读60天的时间的原因是因为客户可以从很久以前发生的电话填写调查。客户今天填写了关于发生在七月,我需要知道的是调用(它持续了多久,什么主题是,等)。如何让这段代码更pythonic?

os.chdir(r'C:\\Users\Documents\FTP\\') 
loc = r'C:\\Users\Documents\\' 
rosterloc = r'\\mand\\' 
splitsname = r'Splits.csv' 
fcrname = r'global_disp_' 
npsname = r'survey_' 
ahtname = r'callbycall_' 
rostername = 'Daily_Roster.csv' 
vasname = r'vas_report_' 
ext ='.csv' 
startdate = dt.date.today() - Timedelta('60 day') 
enddate = dt.date.today() 
daterange = Timestamp(enddate) - Timestamp(startdate) 
daterange = (daterange/np.timedelta64(1, 'D')).astype(int) 

data = [] 
frames = [] 
calls = [] 
bracket = [] 
try: 
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)): 
     aht = pd.read_csv(ahtname+date_range.strftime('%Y_%m_%d')+ext) 
     calls.append(aht) 
except IOError: 
     print('File does not exist:', ahtname+date_range.strftime('%Y_%m_%d')+ext) 
aht = pd.concat(calls) 
print('AHT Done')     
try: 
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)): 
     fcr = pd.read_csv(fcrname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['call_time']) 
     data.append(fcr) 
except IOError: 
     print('File does not exist:', fcrname+date_range.strftime('%m_%d_%Y')+ext) 
fcr = pd.concat(data) 
print('FCR Done')             
try: 
    for date_range in (Timestamp(enddate) - dt.timedelta(n) for n in range(3)): 
     nps = pd.read_csv(npsname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['call_date','date_completed']) 
     frames.append(nps) 
except IOError: 
     print('File does not exist:', npsname+date_range.strftime('%m_%d_%Y')+ext) 
nps = pd.concat(frames) 
print('NPS Done')     
try: 
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)): 
     vas = pd.read_csv(vasname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['Call_date']) 
     bracket.append(vas) 
except IOError: 
     print('File does not exist:', vasname+date_range.strftime('%m_%d_%Y')+ext) 
vas = pd.concat(bracket) 
print('VAS Done')     
roster = pd.read_csv(loc+rostername) 
print('Roster Done') 
splits = pd.read_csv(loc+splitsname) 
print('Splits Done')  
+15

如果这是**工作代码**,你认为可能是改进后,请考虑在http://codereview.stackexchange.com上提问。 – jonrsharpe

回答

-1

我没有更改名称,但恕我直言,他们应该更详细的如:PD ==熊猫?不确定。下面是一些pythonic的方式来写它:

from functools import partial 
import logging 
from operator import add, sub 
import os 
import datetime as dt 
import contextlib 

os.chdir(r'C:\\Users\Documents\FTP\\') 
location = r'C:\\Users\Documents\\' 
roster_location = r'\\mand\\' 
splits_name = r'Splits.csv' 
fcr_name = r'global_disp_' 
nps_name = r'survey_' 
aht_name = r'callbycall_' 
roster_name = 'Daily_Roster.csv' 
vas_name = r'vas_report_' 
ext = '.csv' 
start_date = dt.date.today() - Timedelta('60 day') 
end_date = dt.date.today() 
daterange = Timestamp(end_date) - Timestamp(start_date) 
daterange = (daterange/np.timedelta64(1, 'D')).astype(int) 
logger = logging.getLogger() # logger is better than "print" in case, when you have multiple tiers to log. In this case: regular debug and exceptions 


def timestamps_in_range(daterange, method=add): # injected operation method instead of "if" statement in case of subtracting 
    for n in xrange(daterange): 
     yield method(Timestamp(start_date), dt.timedelta(n)) # use generators for creating series of data in place 


def read_csv(name, date_range, **kwargs): # use functions/methods to shorten (make more readable) long, repetitive method invocation 
    return pd.read_csv(name + date_range.strftime('%Y_%m_%d') + ext, kwargs) 


def log_done(module): # use functions/methods to shorten (make more readable) long, repetitive method invocation 
    logger.debug("%s Done" % module) 


@contextlib.contextmanager #contextmanager is great to separate business logic from exception handling 
def mapper(function, iterable): 
    try: 
     yield map(function, iterable) # map instead of executing function in "for" loop 
    except IOError, err: 
     logger.error('File does not exist: ', err.filename) 


# Following code is visualy tight and cleaner. 
# Shows only what's needed, hiding most insignificant details and repetitive code 

read_csv_aht = partial(read_csv, aht_name) # partial pre-fills function (first argument) with arguments of this function (remaining arguments). In this case it is useful for feeding "map" function - it takes one-argument function to execute on each element of a list 
with mapper(read_csv_aht, timestamps_in_range(daterange)) as calls: # contextmanager beautifully hides "dangerous" content, sharing only the "safe" result to be used 
    aht = pd.concat(calls) 
    log_done('AHT') 

read_csv_fcr = partial(read_csv, fcr_name) 
with mapper(read_csv_fcr, timestamps_in_range(daterange)) as data: 
    fcr = pd.concat(data) 
    log_done('FCR') 

read_csv_nps = partial(read_csv, nps_name, parse_dates=['call_date', 'date_completed']) 
with mapper(read_csv_nps, timestamps_in_range(3, sub)) as frames: 
    nps = pd.concat(frames) 
    log_done('NPS') 

read_csv_vas = partial(read_csv, vas_name, parse_dates=['Call_date']) 
with mapper(read_csv_vas, timestamps_in_range(daterange)) as bracket: 
    vas = pd.concat(bracket) 
    log_done('VAS') 

roster = pd.read_csv(location + roster_name) 
log_done('Roster') 

splits = pd.read_csv(location + splits_name) 
log_done('Splits') 
+0

这将有助于引用并解释为了使OP代码更“pythonic”而做出的具体更改。 – bmhkim