0
我想将包含函数指针数组的结构复制到设备。我无法弄清楚下面的代码有什么问题。内核中的代码不起作用。将结构与函数指针数组复制到设备
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line,bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#define N_MODELS 2
#define N_PARAMS 2
struct userData
{
float (*eval[N_MODELS]) (const float params[N_PARAMS]);
};
__device__ float add(const float params[N_PARAMS])
{
return params[0] + params[1];
}
__device__ float mult(const float params[N_PARAMS])
{
return params[0] * params[1];
}
// function pointer for device
__device__ float (*add_ptr)(const float params[N_PARAMS]) = add;
__device__ float (*mult_ptr)(const float params[N_PARAMS]) = mult;
__global__ void kernel(float *d_result,struct userData *d_user, const float *d_params)
{
//this is currently not working
*d_result = (d_user->eval[0]) (d_params);
printf("d_result = %g\n", *d_result);
}
int main(void)
{
//*************//
// struct part //
//*************//
// function pointer
float(*fpAdd)(const float params[N_PARAMS]);
float(*fpMult)(const float params[N_PARAMS]);
// copy function pointers to device
gpuErrchk(cudaMemcpyFromSymbol(&fpAdd, add_ptr, sizeof(void *)));
gpuErrchk(cudaMemcpyFromSymbol(&fpMult, mult_ptr, sizeof(void *)));
struct userData h_user;
h_user.eval[0] = add;
h_user.eval[1] = mult;
struct userData *d_user;
gpuErrchk(cudaMalloc(&d_user, sizeof(userData)));
gpuErrchk(cudaMemcpy(d_user, &h_user, sizeof(userData), cudaMemcpyHostToDevice));
// parameters
float h_params[N_PARAMS] = { 3.0f, 2.0f };
float *d_params;
gpuErrchk(cudaMalloc(&d_params, N_PARAMS*sizeof(float)));
gpuErrchk(cudaMemcpy(d_params, h_params, N_PARAMS*sizeof(float), cudaMemcpyHostToDevice));
// result
float h_result = 1.0f;
float *d_result;
gpuErrchk(cudaMalloc(&d_result, sizeof(float)));
gpuErrchk(cudaMemcpy(d_result, &h_result, sizeof(float), cudaMemcpyHostToDevice));
kernel << <1, 1 >> >(d_result, d_user, d_params);
gpuErrchk(cudaMemcpy(&h_result, d_result, sizeof(float), cudaMemcpyDeviceToHost));
printf("result = %g\n", h_result);
gpuErrchk(cudaFree(d_result));
gpuErrchk(cudaFree(d_params));
gpuErrchk(cudaFree(d_user));
return EXIT_SUCCESS;
}
你尝试打印'd_user'变量,并将其与你的函数指针数组? – vmachan
什么“不起作用”? – Lundin
还没有,但我会试一试 – beginneR