CUDA中的三线性插值

我需要在CUDA中执行三线性插值。这是问题的定义：CUDA中的三线性插值

点考虑三个矢量：x[nx]，y[ny]，z[nz]和功能的矩阵值func[nx][ny][nz]，我想在x，y和z范围之间的一些随机点，找到函数值。

我可以在CUDA中编写我自己的插值内核，但我想知道是否有一个已经存在的工作。

谢谢！

2014-04-09 shadowfax

加载的二进制数据可以看看[在CUDA节目指南纹理存储器部分（http://docs.nvidia.com/cuda/cuda-c-programming-guide /＃纹理存储器）。特别*过滤模式*可让您对三维纹理进行三线内插。 – Farzad

谢谢，将会查看纹理内存部分以获取更多信息。 – shadowfax

[npp]（http://docs.nvidia.com/cuda/npp/index.html）可以进行各种插值，但我不确定三线是否是其中之一，我不确定是否投射矢量空间作为一个3D图像是否合理或不合理。 –

正如@Farzad所述，您可以使用纹理过滤在CUDA中执行三线性插值。 simpleTexture3D示例提供了有关如何使用它的完整示例。然而，现在它可能不是直接使用，因为它涉及使用像OpenGL和glut这样的库以及其他外部依赖项，如cutil.h。

因此，我发现将上述代码简化为显示该概念的“最小尺寸”示例非常有用。正如你将会看到的那样，代码会载入位于名为的文件中的外部数据，这些文件是我从上面链接的github页面“借用”的。

下面的代码将一个长方体数据插入位于其中央切片中的常规笛卡尔网格。如果一切顺利，您将重建的图像将在下面报告。

enter image description here

的代码如下：

#include <stdio.h> 
#include <stdlib.h> 
#include <fstream> 

#include <cuda_runtime.h> 
#include <cuda.h> 

typedef unsigned char uchar; 

#define BLOCKSIZE 16 

float w = 0.5; // texture coordinate in z 

/********************/ 
/* CUDA ERROR CHECK */ 
/********************/ 
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) 
{ 
    if (code != cudaSuccess) 
    { 
     fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 
     if (abort) { getchar(); exit(code); } 
    } 
} 

typedef unsigned int uint; 
typedef unsigned char uchar; 

texture<uchar, 3, cudaReadModeNormalizedFloat> tex; // 3D texture 

cudaArray *d_volumeArray = 0; 

uint *d_output = NULL; 
uint *h_output = NULL; 

/************************************************/ 
/* TEXTURE-BASED TRILINEAR INTERPOLATION KERNEL */ 
/************************************************/ 
__global__ void 
d_render(uint *d_output, uint imageW, uint imageH, float w) 
{ 
    uint x = __umul24(blockIdx.x, blockDim.x) + threadIdx.x; 
    uint y = __umul24(blockIdx.y, blockDim.y) + threadIdx.y; 

    float u = x/(float) imageW; 
    float v = y/(float) imageH; 

    // read from 3D texture 
    float voxel = tex3D(tex, u, v, w); 

    if ((x < imageW) && (y < imageH)) { 
     // write output color 
     uint i = __umul24(y, imageW) + x; 
     d_output[i] = voxel*255; 
    } 
} 

void main() { 

    int N = 32; 
    int imageH = 512; 
    int imageW = 512; 

    const char* filename = "Bucky.raw"; 

    // --- Loading data from file 
    FILE *fp = fopen(filename, "rb"); 
    if (!fp) { fprintf(stderr, "Error opening file '%s'\n", filename); getchar(); return; } 

    uchar *data = (uchar*)malloc(N*N*N*sizeof(uchar)); 
    size_t read = fread(data, 1, N*N*N, fp); 
    fclose(fp); 

    printf("Read '%s', %lu bytes\n", filename, read); 

    gpuErrchk(cudaMalloc((void**)&d_output, imageH*imageW*sizeof(uint))); 

    // --- Create 3D array 
    const cudaExtent volumeSize = make_cudaExtent(N, N, N); 

    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>(); 
    gpuErrchk(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize)); 

    // --- Copy data to 3D array (host to device) 
    cudaMemcpy3DParms copyParams = {0}; 
    copyParams.srcPtr = make_cudaPitchedPtr((void*)data, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height); 
    copyParams.dstArray = d_volumeArray; 
    copyParams.extent = volumeSize; 
    copyParams.kind  = cudaMemcpyHostToDevice; 
    gpuErrchk(cudaMemcpy3D(&copyParams)); 

    // --- Set texture parameters 
    tex.normalized = true;      // access with normalized texture coordinates 
    tex.filterMode = cudaFilterModeLinear;  // linear interpolation 
    tex.addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates 
    tex.addressMode[1] = cudaAddressModeWrap; 
    tex.addressMode[2] = cudaAddressModeWrap; 

    // --- Bind array to 3D texture 
    gpuErrchk(cudaBindTextureToArray(tex, d_volumeArray, channelDesc)); 

    // --- Launch the interpolation kernel 
    const dim3 blockSize(BLOCKSIZE, BLOCKSIZE, 1); 
    const dim3 gridSize(imageW/blockSize.x, imageH/blockSize.y); 
    d_render<<<gridSize, blockSize>>>(d_output, imageW, imageH, w); 
    gpuErrchk(cudaPeekAtLastError()); 
    gpuErrchk(cudaDeviceSynchronize()); 

    // --- Copy the interpolated data to host 
    h_output = (uint*)malloc(imageW*imageH*sizeof(uint)); 
    gpuErrchk(cudaMemcpy(h_output,d_output,imageW*imageH*sizeof(uint),cudaMemcpyDeviceToHost)); 

    std::ofstream outfile; 
    outfile.open("out_texture.dat", std::ios::out | std::ios::binary); 
    outfile.write((char*)h_output, imageW*imageH*sizeof(uint)); 
    outfile.close(); 

    getchar(); 

}

代码以二进制格式保存在一个out_texture.dat结果。可以从MATLAB根据

fd = fopen('out_texture.dat','r'); 
U = fread(fd,imageH*imageW,'unsigned int'); 
fclose(fd);

来源

2014-04-16 19:53:32 JackOLantern

CUDA中的三线性插值

回答

相关问题