3
我试图用Clang 3.0将简单的CUDA程序转换为LLVM IR。程序如下,叮当中的CUDA支持
#include <stdio.h>
#include <clang/test/SemaCUDA/cuda.h>
__global__ void kernfunc(int *a)
{
//kernel definition
*a = threadIdx.x + blockIdx.x * blockDim.x;
}
int main()
{
int *h_a, *d_a, n;
n = sizeof(int);
h_a = (int*)malloc(n);
*h_a = 5;
cudaMalloc((void*)&d_a, n);
cudaMemcpy(d_a, h_a, n, cudaMemcpyHostToDevice);
//kernel call
kernelfunc<<<1,1>>>(d_a);
cudaMemcpy(h_a, d_a, n, cudaMemcpyDeviceToHost);
printf("%d", *h_a);
return 0;
}
应该包含哪些额外的头文件? Clang 3.0目前不支持哪部分代码?