2012-10-29 21 views
2

我是opencl的新手。任务是:如何使用opencl在内核内显示图像?

  1. 负载的预先存在的图像使用OpenCL的发送图像PTR
  2. 主机写代码到内核中的内核加载的图像的
  3. 计算HSL门槛
  4. 显示阈值或二进制图像

我已经使用opencv在我的程序中加载预先存在的2D图像。我用开放的cl缓冲区对象来分配内存并发送图像指针给内核。内核执行后为了显示从内核计算的图像我需要clEnqueueReadBuffer。然后我使用opencv来显示来自主机的图像。我附上的代码如下

因为这需要更多时间在GPU和CPU上,我认为切换到图像内存。

但我想知道图像的使用是否也需要clenqueueReadImage将图像从内核复制到主机,或者我们有什么方法在内核本身显示阈值图像?

//My code using opencl buffers  
IplImage *src = cvLoadImage("../Input/im2.png",CV_LOAD_IMAGE_COLOR); 

int a=src->height; 
int b=src->width; 

cl_context CreateContext() 
{ 
    cl_int errNum; 
    cl_uint numPlatforms; 
    cl_platform_id firstPlatformId; 
    cl_context context = NULL; 
    errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms); 
    if (errNum != CL_SUCCESS || numPlatforms <= 0) 
    { 
     std::cerr << "Failed to find any OpenCL platforms." << std::endl; 
     return NULL; 
    } 

    cl_context_properties contextProperties[] = 
    { 
     CL_CONTEXT_PLATFORM, 
     (cl_context_properties)firstPlatformId, 
     0 
    }; 
    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, 
             NULL, NULL, &errNum); 
    if (errNum != CL_SUCCESS) 
    { 
     std::cout << "Could not create GPU context, trying CPU..." << std::endl; 
     context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum); 
     if (errNum != CL_SUCCESS) 
     { 
      std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl; 
      return NULL; 
     } 
    } 
    return context; 
} 


cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device) 
{ 
    cl_int errNum; 
    cl_device_id *devices; 
    cl_command_queue commandQueue = NULL; 
    size_t deviceBufferSize = -1; 
    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize); 
    if (errNum != CL_SUCCESS) 
    { 
     std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)"; 
     return NULL; 
    } 

    if (deviceBufferSize <= 0) 
    { 
     std::cerr << "No devices available."; 
     return NULL; 
    } 
    devices = new cl_device_id[deviceBufferSize/sizeof(cl_device_id)]; 
    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL); 
    if (errNum != CL_SUCCESS) 
    { 
     delete [] devices; 
     std::cerr << "Failed to get device IDs"; 
     return NULL; 
    } 

    commandQueue = clCreateCommandQueue(context, devices[0],CL_QUEUE_PROFILING_ENABLE, &errNum); 

    if (commandQueue == NULL) 
    { 
     delete [] devices; 
     std::cerr << "Failed to create commandQueue for device 0"; 
     return NULL; 
    } 

    *device = devices[0]; 
    delete [] devices; 
    return commandQueue; 
} 

cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName) 
{ 
    cl_int errNum; 
    cl_program program; 

    std::ifstream kernelFile(fileName, std::ios::in); 
    if (!kernelFile.is_open()) 
    { 
     std::cerr << "Failed to open file for reading: " << fileName << std::endl; 
     return NULL; 
    } 

    std::ostringstream oss; 
    oss << kernelFile.rdbuf(); 

    std::string srcStdStr = oss.str(); 
    const char *srcStr = srcStdStr.c_str(); 
    program = clCreateProgramWithSource(context, 1, 
             (const char**)&srcStr, 
             NULL, NULL); 
    if (program == NULL) 
    { 
     std::cerr << "Failed to create CL program from source." << std::endl; 
     return NULL; 
    } 
    errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 

    if (errNum != CL_SUCCESS) 
    { 
     char buildLog[16384]; 
     clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
           sizeof(buildLog), buildLog, NULL); 
     std::cerr << "Error in kernel: " << std::endl; 
     std::cerr << buildLog; 
     clReleaseProgram(program); 
     return NULL; 
    } 
    return program; 
} 

bool CreateMemObjects(cl_context context, cl_mem memObjects[2], unsigned char *src_ptr) 
{ 
    memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(unsigned char) *(a*b*3) , src_ptr , NULL); 
    memObjects[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned char) *(a*b) , NULL, NULL); 

    if (memObjects[0] == NULL || memObjects[1] == NULL) 
    { 
     std::cerr << "Error creating memory objects" << std::endl; 
     return false; 
    } 
    return true; 
} 

void Cleanup(cl_context context, cl_command_queue commandQueue, cl_program program, cl_kernel kernel, cl_mem memObjects[2]) 
{ 
    for (int i = 0; i < 2; i++) 
    { 
     if (memObjects[i] != 0) 
      clReleaseMemObject(memObjects[i]); 
    } 
    if (commandQueue != 0) 
     clReleaseCommandQueue(commandQueue); 

    if (kernel != 0) 
     clReleaseKernel(kernel); 

    if (program != 0) 
     clReleaseProgram(program); 

    if (context != 0) 
     clReleaseContext(context); 
} 


int main() 
{ 
    cl_context context = 0; 
    cl_command_queue commandQueue = 0; 
    cl_program program = 0; 
    cl_device_id device = 0; 
    cl_kernel kernel = 0; 
    cl_mem memObjects[2] = { 0,0 }; 
    cl_int errNum; 
    cl_event myEvent; 

    cl_ulong start_time,end_time; 
    double kernelExecTimeNs; 

    IplImage *thres_img1 = cvCreateImage(cvGetSize(src), IPL_DEPTH_8U, 1); 

    unsigned char *tur_image1,*src_ptr; 
    tur_image1 = (unsigned char*) malloc((a*b) * sizeof(unsigned char)); 
    src_ptr = (unsigned char*) malloc ((a*b*3) * sizeof(unsigned char)); 

    context = CreateContext(); 
    if (context == NULL) 
    { 
     std::cerr << "Failed to create OpenCL context." <<std::endl; 
     return 1; 
    } 

    commandQueue = CreateCommandQueue(context, &device); 
    if (commandQueue == NULL) 
    { 
     Cleanup(context, commandQueue, program, kernel, memObjects); 
     return 1; 
    } 

    program = CreateProgram(context, device, "hsl_threshold.cl"); 
    if (program == NULL) 
    { 
     Cleanup(context, commandQueue, program, kernel, memObjects); 
     return 1; 
    } 

    kernel = clCreateKernel(program, "HSL_threshold", NULL); 
    if (kernel == NULL) 
    { 
     std::cerr << "Failed to create kernel" << std::endl; 
     Cleanup(context, commandQueue, program, kernel, memObjects); 
     return 1; 
    } 

    printf("height:%d\n",a);//image height 
    printf("width:%d\n",b);//image width 

    cvShowImage("color image",src); 
    cvWaitKey(0); 

    memcpy(src_ptr,src->imageData,(a*b*3)); 

    if (!CreateMemObjects(context, memObjects, src_ptr)) 
    { 
     Cleanup(context, commandQueue, program, kernel, memObjects); 
     return 1; 
    } 

     errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); 
    errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); 

    if (errNum != CL_SUCCESS) 
    { 
     std::cerr << "Error setting kernel arguments" << std::endl; 
     Cleanup(context, commandQueue, program, kernel, memObjects); 
     return 1; 
    } 

    cout<<"Kernel arguments set successfully"; 
    size_t globalWorkSize[1]={a*b}; 
    size_t localWorkSize[1]={512}; 

    errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &myEvent); 

    clWaitForEvents(1,&myEvent); 

    if (errNum != CL_SUCCESS) 
    { 
     std::cerr << "Error queuing kernel for execution." << std::endl; 
     Cleanup(context, commandQueue, program, kernel, memObjects); 
     return 1; 
    } 
    clFinish(commandQueue); 


    clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_START, sizeof(start_time), &start_time, NULL); 
    clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL); 

    kernelExecTimeNs = end_time-start_time; 

    printf("\nExecution time in milliseconds = %0.3f ms\n",(kernelExecTimeNs/1000000.0)); 
    cout<<"\n Kernel timings \n"<<kernelExecTimeNs<<"seconds"; 

    errNum = clEnqueueReadBuffer(commandQueue, memObjects[1], CL_TRUE, 
           0, (a*b) * sizeof(unsigned char), tur_image1, 
           0, NULL, NULL); 

    if (errNum != CL_SUCCESS) 
    { 
     std::cerr << "Error reading result buffer." << std::endl; 
     Cleanup(context, commandQueue, program, kernel, memObjects); 
     return 1; 
    } 

    memcpy(thres_img1->imageData,tur_image1,sizeof(unsigned char)*(a*b)); 

    cvShowImage("hsl_thresh",thres_img1); 
    cvSaveImage("../Output/hsl_threshold.png",thres_img1); 
    cvWaitKey(0); 

    std::cout<<std::endl; 
    std::cout<<"Image displayed Successfully"<<std::endl; 

    Cleanup(context,commandQueue,program,kernel,memObjects); 
    printf("\n Free opencl resources"); 
    std::cin.get(); 
    return 0; 

} 

回答

6

有几种方法可以直接处理由OpenCL通过OpenGL计算的数据。您的OCL实施必须支持扩展cl_khr_gl_sharing
这种模式称为CL/GL-Interop模式。

如果首先创建一个OpenGL实例并用指向您的GL实例的指针初始化OpenCL,那么每个实现都可以访问其他每个数据。

(所有片段从代码中使用CL-C++采取 - 绑定,我想这是正常的一般理解)

cl_context_properties properties[] = 
    // Take this line to create an OCL context in GL-CL-interop-mode. 
    // OpenGL must already be initialised. 
    // For interop init see: http://www.khronos.org/registry/cl/extensions/khr/cl_khr_gl_sharing.txt 
    // USING: CL_GL_CONTEXT_KHR: Rendering Context [Use your OGL-HGLRC variable or do wglGetCurrentContext(); ] 
    // AND: CL_WGL_HDC_KHR: Device Context [Use your OGL-HDC variable or do wglGetCurrentDC(); ] 
    { 
    CL_CONTEXT_PLATFORM, (cl_context_properties)(_platforms->at(0))(), 
    CL_GL_CONTEXT_KHR, (cl_context_properties)myGL->hRC, 
    CL_WGL_HDC_KHR, (cl_context_properties)myGL->hDC, 0 
    }; 

现在你可以基于OGL纹理

创建OCL图像
//The following data can be accessed both from OCL and OGL 
cl::Image2D imageFromGL = new cl::Image2DGL(*_context, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, myGL->textures[0]); 

在OCL使用内存之前,你要问OGL释放它

//Ask OGL to release memory. All OGL actions must be finished before doing so! 
_queue->enqueueAcquireGLObjects(&imageFromGL, NULL, &evt); 

现在,你想要什么,然后给它回OGL:

//Hand memory back to OGL. All OCL actions must be finished before doing so! 
_queue->enqueueReleaseGLObjects(&imageFromGL, NULL, &evt); 

最后,你可以使用OpenGL代码显示在屏幕上的数据。

+0

谢谢,但我可以知道是否使用opengl我们只能在主机或内核内显示图像? – Binitha

+0

请耐心等待。我有这个疑问,因为我还没有使用opengl .. – Binitha

+0

OpenGL的工作方式与OpenCL类似:您可以在主机上准备好您的操作,但数据操作等在GPU上发生。因此,如果您在OGL内部创建纹理,图像数据将被放置在GPU内存中。 OGL不是类似内核的结构,但是您可以通过单个调用来控制GPU。 – Nippey