嘿,我正在使用CUDA和Thrust库。当我尝试访问CUDA内核上的双指针时遇到问题,该指针使用来自主机的Object *(指针向量)类型的thrust :: device_vector加载。当使用'nvcc -o thrust main.cpp cukernel.cu'进行编译时,我收到警告'警告:无法知道指向哪个指针,假设存在全局内存空间'并尝试运行该程序时出现启动错误。CUDA /推力双指针问题(指针向量)
我已经阅读过Nvidia论坛,解决方案似乎是'不要在CUDA内核中使用双指针'。我不想在发送到内核之前将双指针折叠成一维指针...有没有人找到了解决这个问题的方法?所需的代码如下,预先感谢!
--------------------------
main.cpp
--------------------------
Sphere * parseSphere(int i)
{
Sphere * s = new Sphere();
s->a = 1+i;
s->b = 2+i;
s->c = 3+i;
return s;
}
int main(int argc, char** argv) {
int i;
thrust::host_vector<Sphere *> spheres_h;
thrust::host_vector<Sphere> spheres_resh(NUM_OBJECTS);
//initialize spheres_h
for(i=0;i<NUM_OBJECTS;i++){
Sphere * sphere = parseSphere(i);
spheres_h.push_back(sphere);
}
//initialize spheres_resh
for(i=0;i<NUM_OBJECTS;i++){
spheres_resh[i].a = 1;
spheres_resh[i].b = 1;
spheres_resh[i].c = 1;
}
thrust::device_vector<Sphere *> spheres_dv = spheres_h;
thrust::device_vector<Sphere> spheres_resv = spheres_resh;
Sphere ** spheres_d = thrust::raw_pointer_cast(&spheres_dv[0]);
Sphere * spheres_res = thrust::raw_pointer_cast(&spheres_resv[0]);
kernelBegin(spheres_d,spheres_res,NUM_OBJECTS);
thrust::copy(spheres_dv.begin(),spheres_dv.end(),spheres_h.begin());
thrust::copy(spheres_resv.begin(),spheres_resv.end(),spheres_resh.begin());
bool result = true;
for(i=0;i<NUM_OBJECTS;i++){
result &= (spheres_resh[i].a == i+1);
result &= (spheres_resh[i].b == i+2);
result &= (spheres_resh[i].c == i+3);
}
if(result)
{
cout << "Data GOOD!" << endl;
}else{
cout << "Data BAD!" << endl;
}
return 0;
}
--------------------------
cukernel.cu
--------------------------
__global__ void deviceBegin(Sphere ** spheres_d, Sphere * spheres_res, float
num_objects)
{
int index = threadIdx.x + blockIdx.x*blockDim.x;
spheres_res[index].a = (*(spheres_d+index))->a; //causes warning/launch error
spheres_res[index].b = (*(spheres_d+index))->b;
spheres_res[index].c = (*(spheres_d+index))->c;
}
void kernelBegin(Sphere ** spheres_d, Sphere * spheres_res, float num_objects)
{
int threads = 512;//per block
int grids = ((num_objects)/threads)+1;//blocks per grid
deviceBegin<<<grids,threads>>>(spheres_d, spheres_res, num_objects);
}
太棒了,谢谢你的回应......我会尝试一些这些想法,并得到结果! – nhelenih 2011-06-06 17:01:02