2015-05-09 73 views
0

我有一个循环来生成大熊猫DF每列的图。我使用Ipython,但是这些图都显示在循环结尾,而不是显示在我想要根据代码显示它们的地方。ipython强制大熊猫绘制

我该如何强制ipython/pandas在我有'plot'功能的精确点上显示cols?

def explore(file, sep=";", top = 5, k='Code Agence'): 
    """ 

    """ 
    %matplotlib inline 
    import time 
    import matplotlib.pyplot as plt 
    import pandas as pd 
    import time 
    import sys 
    dataframes_top = [] 
    start = time.time() 
    #print "Exploring :", get_file_name(file), "with %s lines"%(top) 

    to_explore = pd.read_csv(file, sep=";", error_bad_lines=False) 
    cols = to_explore.columns 
    i = -1 
    for col in cols: 
     i +=1 
     serie = to_explore[col] 
     try: 
      print"plotting %s"%(col) 
      serie.plot().show() 
      time.sleep(2) 
     except Exception as e: 
      "plotting issue :%s"%(e) 
     #serie.index = index 

     null = serie.isnull() 
     not_null = len([x for x in null if not x]) 
     r = not_null/len(serie) 

     s = serie.value_counts()#return value as index, count as value 
     pct_top = s.values[:top]/not_null 
     serie_top_n = pd.Series(s.values[:top],index=s.index[:top]) 
     local_df = pd.DataFrame() 
     local_df[col]=serie_top_n 
     local_df['pct']=pct_top 
     somme = local_df['pct'].sum() 

     pct_2_top= s.values[:top*2]/not_null 
     serie_2_top_n = pd.Series(s.values[:top*2],index=s.index[:top*2]) 
     local_df_2_top = pd.DataFrame() 
     local_df_2_top[col]=serie_2_top_n 
     local_df_2_top['pct']=pct_2_top 
     somme_2_top = local_df_2_top['pct'].sum() 


     print 
     print "%s : [col %s = %s ] "%(get_file_name(file), i,col) 
     print 
     print "%.2f"%(r), " pct not null" 
     print "%.2f pct on the first %s "%(somme, top) 
     print "%.2f pct on the first %s "%(somme_2_top, 2*top) 
     print "plot :" 
     print pd.DataFrame(serie.describe()).T 

     print 
     print local_df.T 
     print "plot :" 
     local_df.plot() 

     print "="*100 

     dataframes_top.append(local_df) 
    elapsed = time.time()-start 
    print "="*20, elapsed, "for %s lines"%(len(serie)),"="*20 
    sys.stdout.flush() 

回答

0

每次绘制新图时务必打电话给plt.show()。如果您不这样做,iPython会自动缓冲每个绘图并在到达单元结束时显示它们。我想你会在循环结束时忘记这么做。

下面是一些代码,将正确地绘制的环内的曲线,而不是朝向非常端等待一个例子:

%matplotlib inline 

import matplotlib.pyplot as plt 
import random 
from pandas import Series 
from numpy.random import randn 

for i in range(5): 
    print("Before graph {0}".format(i)) 
    ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000)) 
    ts = ts.cumsum() 
    ts.plot() 

    plt.show() 
    print("After graph {0}".format(i)) 

如果我运行此,被打印的输出之间显示的每个情节,如期望。

我使用IPython笔记本版本3.0.0-f75fda4使用Python 3.