2013-06-02 161 views
0

我写了一个前景提取内核在Matlab中使用,没有打印任何东西,所以我将它移植到纯Cuda C中,并将大部分逻辑取出。这件事没有做任何事情,甚至没有在返回之前打印cuPrintf语句,为什么?内核不启动?

#include <cuda.h> 
#include <stdio.h>  /* printf, scanf, NULL */ 
#include <stdlib.h>  /* calloc, exit, free */ 
#include "cuPrintf.cu" 
#include "utils.h" 
#include <time.h>  /* clock_t, clock, CLOCKS_PER_SEC */ 



__global__ void foreground_extract(  unsigned char* inputImageRed, 
             unsigned char* inputImageGreen, 
             unsigned char* inputImageBlue, 

             unsigned char* outputImageRed, 
             unsigned char* outputImageGreen, 
             unsigned char* outputImageBlue,           

             const int xDim, 
             const int yDim) 
{ 


    cuPrintf("print something \n"); 
    //x = col, y = row 
    //xDim = col_dim, yDim = row_dim 
    int x = threadIdx.x + blockIdx.x * blockDim.x; 
    int y = threadIdx.y + blockIdx.y * blockDim.y; 
    int offset = x + y *blockDim.x *gridDim.x; 

    int nnodes = xDim*yDim; 
    if (offset >= nnodes) return; 


    //test equality 

    outputImageRed[offset] = inputImageRed[offset]; 
    outputImageGreen[offset] = inputImageGreen[offset]; 
    outputImageBlue[offset] = inputImageBlue[offset]; 

    cuPrintf("print something here too \n"); 
    cuPrintf("%d \n", outputImageRed[offset]); 

} 

int main() 
{ 

     int xDim = 3; 
     int yDim = 3; 

             unsigned char* h_inputImageRed; 
             unsigned char* h_inputImageGreen; 
             unsigned char* h_inputImageBlue; 

             unsigned char* h_outputImageRed; 
             unsigned char* h_outputImageGreen; 
             unsigned char* h_outputImageBlue; 


        h_inputImageRed = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_inputImageGreen = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_inputImageBlue = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 

        h_outputImageRed = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_outputImageGreen = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_outputImageBlue = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 


     //initiate input only 
     unsigned char init =0; 
     for (int i=0; i<(xDim*yDim);i++){ 

              h_inputImageRed[i] = init; 
              h_inputImageGreen[i] = init; 
              h_inputImageBlue[i] = init; 

              init++; 

              printf("%d\n", h_inputImageRed[i]); 

     } 

             //device arrays 
             unsigned char* d_inputImageRed; 
             unsigned char* d_inputImageGreen; 
             unsigned char* d_inputImageBlue; 

             unsigned char* d_outputImageRed; 
             unsigned char* d_outputImageGreen; 
             unsigned char* d_outputImageBlue; 


    //cudaMallocs 

    checkCudaErrors(cudaMalloc((void**)&d_inputImageRed, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_inputImageGreen, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_inputImageBlue, (sizeof(unsigned char)*xDim*yDim))); 

    checkCudaErrors(cudaMalloc((void**)&d_outputImageRed, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_outputImageGreen, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_outputImageBlue, (sizeof(unsigned char)*xDim*yDim))); 

    //cudaMemcpys, Host to Device 

    checkCudaErrors(cudaMemcpy(d_inputImageRed, h_inputImageRed, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_inputImageGreen, h_inputImageGreen, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_inputImageBlue, h_inputImageBlue, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 

    checkCudaErrors(cudaMemcpy(d_outputImageRed, h_outputImageRed, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_outputImageGreen, h_outputImageGreen, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_outputImageBlue, h_outputImageBlue, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 

    cudaPrintfInit(); 

    int gridSizeX = ceil(float(xDim/8)); 
    int gridSizeY = ceil(float(yDim/8)); 
    int gridSizeZ = 1; 

    int blockSizeX=8; 
    int blockSizeY=8; 
    int blockSizeZ=1; 

    const dim3 gridSize(gridSizeX,gridSizeY,gridSizeZ); 
    const dim3 blockSize(blockSizeX,blockSizeY,blockSizeZ); 

    foreground_extract <<< gridSize, blockSize >>>(d_inputImageRed, 
                d_inputImageGreen, 
                d_inputImageBlue, 

                d_outputImageRed, 
                d_outputImageGreen, 
                d_outputImageBlue, 

                xDim,yDim); 


     cudaPrintfDisplay(stdout,true); 
     cudaPrintfEnd(); 

     checkCudaErrors(cudaMemcpy(h_outputImageRed, d_outputImageRed, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyDeviceToHost)); 
     checkCudaErrors(cudaMemcpy(h_outputImageGreen, d_outputImageGreen, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyDeviceToHost)); 
     checkCudaErrors(cudaMemcpy(h_outputImageBlue, d_outputImageBlue, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyDeviceToHost)); 

     //free gpu data 
    checkCudaErrors(cudaFree(d_outputImageRed)); 
    checkCudaErrors(cudaFree(d_outputImageGreen)); 
    checkCudaErrors(cudaFree(d_outputImageBlue)); 
    checkCudaErrors(cudaFree(d_inputImageRed)); 
    checkCudaErrors(cudaFree(d_inputImageGreen)); 
    checkCudaErrors(cudaFree(d_inputImageBlue)); 

    //free host data 
    free(h_outputImageRed); 
    free(h_outputImageGreen); 
    free(h_outputImageBlue); 
    free(h_inputImageRed); 
    free(h_inputImageGreen); 
    free(h_inputImageBlue); 



     while(true){} 
     return 0; 
} 

回答

3

你的内核没有启动,这就是为什么你从内核的printf没有输出。 如果你在内核启动上做了适当的cuda error checking,你会发现这一点。

内核启动返回的错误是invalid configuration argument

您正在传递无效值gridSize.xgridSize.y

如果你想看看它们是什么,请在调用内核之前将它们打印出来。 (一般调试提示。)

让我们来看看这条线,因为它不是做你认为:

int gridSizeX = ceil(float(xDim/8)); 
          ^^ 
           both values inside the parenthesis are *integers* 

您还没有投任何这些值(xDim8)的一个float。所以主机编译器使用整数除法解决圆括号内的数量。 3/8的整数除法为零。此后没有任何变化的价值。还是零。

+0

谢谢。不知道关于内核启动时的错误检查,我刚刚使用了checkCudaErrors,就像你上面看到的那样。有帮助和彻底的答案,虽然也许你可能试图听起来不那么沉闷?是? :) – andandandand

+0

那里。稍微不太敏捷。 –