2016-01-28 35 views
0

所以我想加快我的Python脚本从文件加载数据并将其存储在数组中。但是我意识到如果我重复1020次左右的程序,不要问我为什么会出现分段错误。用于加载数据的代码是:Ctypes分段错误

import os,sys 
import numpy as np 
import pandas as pd 
import ctypes as ct 

VERSION = 0.1 

try: 
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "_vec") 
except NameError: 
    path = "./_im7" 

if not(sys.platform in ('win32', 'cygwin')): 
    path += '.so.'+str(VERSION) 
    libfunctions = ct.cdll.LoadLibrary(path) 
else: 
    libfunctions = ct.cdll.LoadLibrary(path) 

libfunctions.readvec.restype = ct.c_void_p 
libfunctions.readvec.argtypes = [ct.c_char_p, np.ctypeslib.ndpointer(ct.c_float), \ 
    ct.c_int,ct.c_int,ct.c_int] 


def readvecCT(filename,nx,ny,nz): 
    # we are supposing for the moment that the naming scheme PIV__vxy.case PIV__vxy.geo not changes should that 
    # not be the case appropriate changes have to be made to the corresponding file 

    # ctypes 
    data_temp = np.zeros((2*ny*nx,1),dtype=np.dtype('f4')) 
    libfunctions.readvec(ct.c_char_p(filename),data_temp,ct.c_int(nx), ct.c_int(ny),ct.c_int(3)) 

    # W value 
    if (nz)>1: 
     return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:2*nx*ny].reshape(ny, nx), data_temp[2*nx*ny:].reshape(ny, nx) 
    else:  
     return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:].reshape(ny, nx)   

底层C函数:

#include <stdio.h> 


void readvec(const char *fname, float *data, int nx, int ny,int skiprows) { 
    //void cfun(const double * indata, int rowcount, int colcount, double * outdata) { 
    int i,j,check; 
    size_t length; 
    FILE *file; 
    char buffer[1024]; 
    char *buffer_ptr = &buffer[0]; 
    //puts("Here we go!"); 

    file = fopen(fname, "r"); 
    //printf("Nx: %d Ny: %d skiprows: %d \n",nx,ny,skiprows); 
    //printf("Filename %s \n",fname); 
    for (i=0;i<=skiprows;i++){ 
    check=getline(&buffer_ptr,&length,file); 
    //printf("buffer: %s \n",buffer); 
    if (check==-1){ 
     puts("ERROR"); 
    } 
    } 

    for (i = 0; i < ny; i++) { 
    for (j=0;j<nx;j++){ 
     check=fscanf(file,"%f",&data[i*nx+j]); 
     //printf("Data %s\n",buffer); 
     if (check==-1){ 
     puts("ERROR"); 
     } 
    } 
    } 

    for (i = 0; i < ny; i++) { 
    for (j=0;j<nx;j++){ 
     check=fscanf(file,"%f",&data[ny*nx+i*nx+j]); 
     if (check==-1){ 
     puts("ERROR"); 
     } 
    } 
    } 

    //fclose(file); 
    //puts("Done!"); 
} 

和一个简单的测试,产生错误:

import time 
import numpy as np 
import libvec.libvec as vec 
import matplotlib.pyplot as plt 


tmp_geo = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.geo' 
tmp_file = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy_01019.vec' 
tmp_case = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.case' 

x,y,z = vec.readgeo(tmp_geo) 
nx = len(x) 
ny = len(y) 
nz = len(z) 
iterations = 1100 

start_time = time.time() 

for i in range(iterations): 
    U,V = vec.readvecCT(tmp_file,nx,ny,nz); 

print("Ctypes --- %f seconds ---" % ((time.time() - start_time)/iterations)) 

现在,当迭代次数是1000没有问题发生,但在1100我得到分段错误。我想它与内存管理有关,但我不知道如何修复它,甚至不知道从哪里开始!任何帮助将不胜感激。

THX很多提前

Ĵ

+0

您是否曾经在C函数中初始化'length'的值?看起来这个人的价值可能是不确定的。 – JCVanHamme

+0

我会在'check = fscanf(file,“%f”,&data [ny * nx + i * nx + j])''行上检查索引'ny * nx + i * nx + j''。它是否低于'2 * ny * nx'? –

+0

您忘记初始化'size_t length = 1024',所以'getline'会在堆上重新分配''buffer_ptr',导致内存泄漏。无论哪种方式,如果地址已经改变,你需要'释放'分配的内存,否则你有内存泄漏。但是,这不会导致段错误。 – eryksun

回答

1

事实证明:

fclose(file) 

有人评论因此这并不会直接导致一个错误,但在同一时间之后导致段错误n次迭代

非常感谢大家!