2016-01-26 43 views
0

我想将包含函数指针数组的结构复制到设备。我无法弄清楚下面的代码有什么问题。内核中的代码不起作用。将结构与函数指针数组复制到设备

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include <cuda.h> 

#include "cuda_runtime.h" 
#include "device_launch_parameters.h" 

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 
    inline void gpuAssert(cudaError_t code, const char *file, int line,bool abort = true) 
    { 
    if (code != cudaSuccess) 
    { 
     fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 
     if (abort) exit(code); 
    } 
} 

#define N_MODELS 2 
#define N_PARAMS 2 

struct userData 
{ 
    float (*eval[N_MODELS]) (const float params[N_PARAMS]); 

}; 


__device__ float add(const float params[N_PARAMS]) 
{ 
    return params[0] + params[1]; 

} 

__device__ float mult(const float params[N_PARAMS]) 
{ 
    return params[0] * params[1]; 

} 

// function pointer for device 
__device__ float (*add_ptr)(const float params[N_PARAMS]) = add; 
__device__ float (*mult_ptr)(const float params[N_PARAMS]) = mult; 




__global__ void kernel(float *d_result,struct userData *d_user, const float *d_params) 
{ 
    //this is currently not working 
    *d_result = (d_user->eval[0]) (d_params); 
    printf("d_result = %g\n", *d_result); 

} 

int main(void) 
{ 

    //*************// 
    // struct part // 
    //*************// 

    // function pointer 
    float(*fpAdd)(const float params[N_PARAMS]); 
    float(*fpMult)(const float params[N_PARAMS]); 

    // copy function pointers to device 
    gpuErrchk(cudaMemcpyFromSymbol(&fpAdd, add_ptr, sizeof(void *))); 
    gpuErrchk(cudaMemcpyFromSymbol(&fpMult, mult_ptr, sizeof(void *))); 


    struct userData h_user; 
    h_user.eval[0] = add; 
    h_user.eval[1] = mult; 

    struct userData *d_user; 
    gpuErrchk(cudaMalloc(&d_user, sizeof(userData))); 
    gpuErrchk(cudaMemcpy(d_user, &h_user, sizeof(userData), cudaMemcpyHostToDevice)); 


    // parameters 
    float h_params[N_PARAMS] = { 3.0f, 2.0f }; 
    float *d_params; 
    gpuErrchk(cudaMalloc(&d_params, N_PARAMS*sizeof(float))); 
    gpuErrchk(cudaMemcpy(d_params, h_params, N_PARAMS*sizeof(float), cudaMemcpyHostToDevice)); 


    // result 
    float h_result = 1.0f; 
    float *d_result; 
    gpuErrchk(cudaMalloc(&d_result, sizeof(float))); 
    gpuErrchk(cudaMemcpy(d_result, &h_result, sizeof(float), cudaMemcpyHostToDevice)); 

    kernel << <1, 1 >> >(d_result, d_user, d_params); 

    gpuErrchk(cudaMemcpy(&h_result, d_result, sizeof(float), cudaMemcpyDeviceToHost)); 

    printf("result = %g\n", h_result); 

    gpuErrchk(cudaFree(d_result)); 
    gpuErrchk(cudaFree(d_params)); 
    gpuErrchk(cudaFree(d_user)); 


    return EXIT_SUCCESS; 
} 
+0

你尝试打印'd_user'变量,并将其与你的函数指针数组? – vmachan

+1

什么“不起作用”? – Lundin

+0

还没有,但我会试一试 – beginneR

回答

1

的错误是在这里:

struct userData h_user; 
h_user.eval[0] = add; 
h_user.eval[1] = mult; 

你用错误的值填充结构。读取__device__存储器值以从设备获取函数指针后,需要使用这些值来填充函数结构,而不是设备函数的主机符号。所以这个:

struct userData h_user; 
h_user.eval[0] = fpAdd; 
h_user.eval[1] = fpMult; 

应按照您预期,