我正尝试在CUDA(200x200x100)中使用3D数组。三维阵列的分割错误
当我将z维(model_num)从4更改为5时,出现了分段错误。为什么,我该如何解决它?
const int nrcells = 200;
const int nphicells = 200;
const int model_num = 5; //So far, 4 is the maximum model_num that works. At 5 and after, there is a segmentation fault
__global__ void kernel(float* mgridb)
{
const unsigned long long int i = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
if(tx >= 0 && tx < nphicells && ty >=0 && ty < nrcells && tz >= 0 && tz < model_num){
//Do stuff with mgridb[i]
}
}
int main (void)
{
unsigned long long int size_matrices = nphicells*nrcells*model_num;
unsigned long long int mem_size_matrices = sizeof(float) * size_matrices;
float *h_mgridb = (float *)malloc(mem_size_matrices);
float mgridb[nphicells][nrcells][model_num];
for(int k = 0; k < model_num; k++){
for(int j = 0; j < nrcells; j++){
for(int i = 0; i < nphicells; i++){
mgridb[i][j][k] = 0;
}
}
}
float *d_mgridb;
cudaMalloc((void**)&d_mgridb, mem_size_matrices);
cudaMemcpy(d_mgridb, h_mgridb, mem_size_matrices, cudaMemcpyHostToDevice);
int threads = nphicells;
uint3 blocks = make_uint3(nrcells,model_num,1);
kernel<<<blocks,threads>>>(d_mgridb);
cudaMemcpy(h_mgridb, d_mgridb, mem_size_matrices, cudaMemcpyDeviceToHost);
cudaFree(d_mgridb);
return 0;
}
请多关注一下您在问题中发布的代码的格式和内容。您发布的代码不必要的难以阅读并且包含不平衡{}。 – talonmies
会做。谢谢。 –