2015-12-28 25 views
0

我试图把渐变放在图像上 - 这是可行的.CPU和GPU程序应该做同样的事情。我有输出图像的问题,因为GPU的代码给我不同的图像比CPU的代码,我不知道哪里是错误的。我认为CPU代码很好,但GPU不行。 Output images - 原始,CPU,GPU - 请检查我的代码。谢谢。PyOpenCL错误的输出图像

import pyopencl as cl 
import sys 
import Image 
import numpy 
from time import time 

def gpu_gradient(): 

    if len(sys.argv) != 3: 
     print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>" 
     return 1 

    # create context and command queue 
    ctx = cl.create_some_context() 
    queue = cl.CommandQueue(ctx) 

    # load image 
    im = Image.open(sys.argv[1]) 
    if im.mode != "RGBA": 
     im = im.convert("RGBA") 
    imgSize = im.size 
    buffer = im.tostring() # len(buffer) = imgSize[0] * imgSize[1] * 4 


    # Create ouput image object 
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA, 
           cl.channel_type.UNSIGNED_INT8) 
    input_image = cl.Image(ctx, 
           cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, 
           clImageFormat, 
           imgSize, 
           None, 
           buffer) 
    output_image = cl.Image(ctx, 
          cl.mem_flags.WRITE_ONLY, 
          clImageFormat, 
          imgSize) 

    # load the kernel source code 
    kernelFile = open("gradient.cl", "r") 
    kernelSrc = kernelFile.read() 

    # Create OpenCL program 
    program = cl.Program(ctx, kernelSrc).build() 
    # Call the kernel directly 
    globalWorkSize = (imgSize[0],imgSize[1]) 
    gpu_start_time = time() 
    program.gradientcover(queue, 
          globalWorkSize, 
          None, 
          input_image, 
          output_image) 

    # Read the output buffer back to the Host 
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8) 
    origin = (0, 0, 0) 
    region = (imgSize[0], imgSize[1], 1) 

    cl.enqueue_read_image(queue, output_image, 
         origin, region, buffer).wait() 

    # Save the image to disk 
    gsim = Image.fromstring("RGBA", imgSize, buffer.tostring()) 
    gsim.save("GPU_"+sys.argv[2]) 
    gpu_end_time = time() 
    print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time)) 

def cpu_gradient(): 
    if len(sys.argv) != 3: 
     print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>" 
     return 1 

    gpu_start_time = time() 
    im = Image.open(sys.argv[1]) 
    if im.mode != "RGBA": 
     im = im.convert("RGBA") 
    pixels = im.load() 
    for i in range(im.size[0]): 
     for j in range(im.size[1]): 

      RGBA= pixels[i,j] 
      RGBA2=RGBA[0],RGBA[1],0,0 
      pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3] 

      final_pixels=list(pixel) 
      if final_pixels[0]>255: 
       final_pixels[0]=255 
      elif final_pixels[1]>255: 
       final_pixels[1]=255 
      pixel=tuple(final_pixels) 
      pixels[i,j]=pixel 
    im.save("CPU_"+sys.argv[2]) 
    gpu_end_time = time() 
    print("CPU Time: {0} s".format(gpu_end_time - gpu_start_time)) 
cpu_gradient() 
gpu_gradient() 

内核代码:

const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | 
          CLK_ADDRESS_CLAMP | 
          CLK_FILTER_NEAREST; 

__kernel void gradientcover(read_only image2d_t srcImg, 
           write_only image2d_t dstImg) 
{ 

    int2 coord = (int2) (get_global_id(0), get_global_id(1)); 

    uint4 pixel = read_imageui(srcImg, sampler, coord); 
    uint4 pixel2 = (uint4)(coord.x, coord.y,0,0); 
    pixel=pixel + pixel2; 
    if(pixel.x > 255) pixel.x=255; 
    if(pixel.y > 255) pixel.y=255; 


    // Write the output value to image 
    write_imageui(dstImg, coord, pixel); 
} 

回答

1

你的CL和Python代码不会做同样的事情!

 RGBA= pixels[i,j] 
     RGBA2=RGBA[0],RGBA[1],0,0 
     pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3] 

将RG分量添加到像素。

uint4 pixel = read_imageui(srcImg, sampler, coord); 
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0); 
pixel=pixel + pixel2; 

将X,Y从坐标加到像素上。

这很可能是您的结果有差异的原因。

假设(从描述)要 变暗 减轻由坐标图像,我会sugest的Python代码应该是:

 RGBA= pixels[i,j] 
     RGBA2=i,j,0,0 

代替。