内部结构的数组的分配我已经这些结构:CUDA:一个struct
typedef struct neuron
{
float* weights;
int n_weights;
}Neuron;
typedef struct neurallayer
{
Neuron *neurons;
int n_neurons;
int act_function;
}NLayer;
“n图层”结构可以包含“神经元”的任意数量的
我试图分配5“神经元”一“n图层”结构从以这种方式主机:
NLayer* nL;
int i;
int tmp=9;
cudaMalloc((void**)&nL,sizeof(NLayer));
cudaMalloc((void**)&nL->neurons,6*sizeof(Neuron));
for(i=0;i<5;i++)
cudaMemcpy(&nL->neurons[i].n_weights,&tmp,sizeof(int),cudaMemcpyHostToDevice);
...然后我试图修改“NL->神经元[0] .n_weights”变量与内核:
__global__ void test(NLayer* n)
{
n->neurons[0].n_weights=121;
}
,但在编译时NVCC返回该“警告”与内核无关的唯一行:
Warning: Cannot tell what pointer points to, assuming global memory space
当内核完成其工作的结构开始无法访问。
这很可能是我在分配过程中做错了什么事......有人可以帮助我吗? 非常感谢,对不起我的英语! :)
UPDATE:
感谢奥兰我修改我的代码创建这个函数应该分配结构“n图层”的一个实例:
NLayer* setNLayer(int numNeurons,int weightsPerNeuron,int act_fun)
{
int i;
NLayer h_layer;
NLayer* d_layer;
float* d_weights;
//SET THE LAYER VARIABLE OF THE HOST NLAYER
h_layer.act_function=act_fun;
h_layer.n_neurons=numNeurons;
//ALLOCATING THE DEVICE NLAYER
if(cudaMalloc((void**)&d_layer,sizeof(NLayer))!=cudaSuccess)
puts("ERROR: Unable to allocate the Layer");
//ALLOCATING THE NEURONS ON THE DEVICE
if(cudaMalloc((void**)&h_layer.neurons,numNeurons*sizeof(Neuron))!=cudaSuccess)
puts("ERROR: Unable to allocate the Neurons of the Layer");
//COPING THE HOST NLAYER ON THE DEVICE
if(cudaMemcpy(d_layer,&h_layer,sizeof(NLayer),cudaMemcpyHostToDevice)!=cudaSuccess)
puts("ERROR: Unable to copy the data layer onto the device");
for(i=0;i<numNeurons;i++)
{
//ALLOCATING THE WEIGHTS' ARRAY ON THE DEVICE
cudaMalloc((void**)&d_weights,weightsPerNeuron*sizeof(float));
//COPING ITS POINTER AS PART OF THE i-TH NEURONS STRUCT
if(cudaMemcpy(&d_layer->neurons[i].weights,&d_weights,sizeof(float*),cudaMemcpyHostToDevice)!=cudaSuccess)
puts("Error: unable to copy weights' pointer to the device");
}
//RETURN THE DEVICE POINTER
return d_layer;
}
,我调用该函数(内核“测试”是以前声明的):
int main()
{
NLayer* nL;
int h_tmp1;
float h_tmp2;
nL=setNLayer(10,12,13);
test<<<1,1>>>(nL);
if(cudaMemcpy(&h_tmp1,&nL->neurons[0].n_weights,sizeof(float),cudaMemcpyDeviceToHost)!=cudaSuccess);
puts("ERROR!!");
printf("RESULT:%d",h_tmp1);
}
当我编译该代码编译器显示我的警告,当我执行该程序时,它在屏幕上打印:
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
ERROR!!
RESULT:1
如果我评论内核调用,最后一个错误不会比较。
我在哪里错了? 我不知道该怎么办 感谢您的帮助!
虽然你对这个警告是正确的,但我怀疑是什么导致了程序的异常行为。毕竟,编译器关于位于全局Emory空间中的结构的假设是正确的... – aland 2012-08-09 01:52:52
我使用的是具有1.2功能的NVIDIA GeForce 320M 256 MB,所以我不认为它是“费米”卡 – 2012-08-09 09:29:32