2015-05-22 112 views
1

我想要计算一组5D点(像素)到5D单点(中心)的欧氏距离并存储在另一个结果向量中,我想使用向量索引来存储所有信息(5i),(5i + 1),... 我是OpenCL的新手,我只是为了自己的意图编辑了一个互联网上的示例代码。理论是正确的,但代码没有显示正确的答案! 这里是内核使用OpenCL的欧几里德距离

//d_kernel.cl 

__kernel void distance_kernel(__global double *pixelInfo, 
           __global double *clusterCentres, 
           __global double *distanceFromClusterCentre) 
{ 
    int index = get_global_id(0); 

    int d, dl, da, db, dx, dy; 

    dl = pixelInfo[5 * index] - clusterCentres[0]; 
    dl = dl * dl; 

    da = pixelInfo[5 * index + 1] - clusterCentres[1]; 
    da = da * da; 

    db = pixelInfo[5 * index + 2] - clusterCentres[2]; 
    db = db * db; 

    dx = pixelInfo[5 * index + 3] - clusterCentres[3]; 
    dx = dx * dx; 

    dy = pixelInfo[5 * index + 4] - clusterCentres[4]; 
    dy = dy * dy; 

    distanceFromClusterCentre[index] = dx + dy + dl + da + db; 

} 

这里是主机代码

#include <iostream> 
#include <CL/cl.h> 
#include <vector> 
using namespace std; 

#define MAX_SOURCE_SIZE (0x100000) 
int main(int argc, char **argv) 
{ 

    // Create the two input vectors 
    int i; 
    const int pixelsNumber = 1024; 
    const int clustersNumber = 1; 

    std::vector<double> pixelInfo; 
    pixelInfo.resize(5 * pixelsNumber); 
    std::fill(pixelInfo.begin(), pixelInfo.end(), 500); 

    std::vector<double> clusterCentres; 
    clusterCentres.resize(5 * clustersNumber); 
    std::fill(clusterCentres.begin(), clusterCentres.end(), 200); 

    std::vector<double> distanceFromClusterCentre; 
    distanceFromClusterCentre.resize(pixelsNumber); 
    std::fill(distanceFromClusterCentre.begin(), distanceFromClusterCentre.end(), 0); 

    // Load the kernel source code into the array source_str 
    FILE *fp; 
    char *source_str; 
    size_t source_size; 

    fp = fopen("d_kernel.cl", "r"); 
    if (!fp) { 
     fprintf(stderr, "Failed to load kernel.\n"); 
     exit(1); 
    } 
    source_str = (char*)malloc(MAX_SOURCE_SIZE); 
    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); 
    fclose(fp); 

    // Get platform and device information 
    cl_platform_id platform_id = NULL; 
    cl_device_id device_id = NULL; 
    cl_uint ret_num_devices; 
    cl_uint ret_num_platforms; 
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); 
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, 
     &device_id, &ret_num_devices); 

    // Create an OpenCL context 
    cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); 

    // Create a command queue 
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); 

    // Create memory buffers on the device for each vector 
    cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, 
     5 * pixelsNumber * sizeof(int), NULL, &ret); 
    cl_mem clusterCentres_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, 
     5 * clustersNumber * sizeof(int), NULL, &ret); 
    cl_mem distanceFromClusterCentre_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
     pixelsNumber * sizeof(int), NULL, &ret); 

    // Copy the vectors to their respective memory buffers 
    ret = clEnqueueWriteBuffer(command_queue, pixelInfo_mem, CL_TRUE, 0, 
     5 * pixelsNumber * sizeof(int), pixelInfo.data(), 0, NULL, NULL); 
    ret = clEnqueueWriteBuffer(command_queue, clusterCentres_mem, CL_TRUE, 0, 
     5 * clustersNumber * sizeof(int), clusterCentres.data(), 0, NULL, NULL); 

    // Create a program from the kernel source 
    cl_program program = clCreateProgramWithSource(context, 1, 
     (const char **)&source_str, (const size_t *)&source_size, &ret); 

    // Build the program 
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); 

    // Create the OpenCL kernel 
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret); 

    // Set the arguments of the kernel 
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&pixelInfo_mem); 
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&clusterCentres_mem); 
    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&distanceFromClusterCentre_mem); 

    // Execute the OpenCL kernel on the list 
    size_t global_item_size = pixelsNumber; // Process the entire lists 
    size_t local_item_size = 64; // Divide work items into groups of 64 
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
     &global_item_size, &local_item_size, 0, NULL, NULL); 

    // Read the memory buffer result on the device to the local vector result 
    ret = clEnqueueReadBuffer(command_queue, distanceFromClusterCentre_mem, CL_TRUE, 0, 
     pixelsNumber * sizeof(int), distanceFromClusterCentre.data(), 0, NULL, NULL); 

    // Display the result to the screen 
    for (i = 0; i < pixelsNumber; i++) 
    { 
     cout << "Pixel " << i << ": " << distanceFromClusterCentre[i] << endl; 
     //system("PAUSE"); 
    } 

    // Clean up 
    ret = clFlush(command_queue); 
    ret = clFinish(command_queue); 
    ret = clReleaseKernel(kernel); 
    ret = clReleaseProgram(program); 
    ret = clReleaseMemObject(pixelInfo_mem); 
    ret = clReleaseMemObject(clusterCentres_mem); 
    ret = clReleaseMemObject(distanceFromClusterCentre_mem); 
    ret = clReleaseCommandQueue(command_queue); 
    ret = clReleaseContext(context); 
    free(pixelInfo.data()); 
    free(clusterCentres.data()); 
    free(distanceFromClusterCentre.data()); 

    system("PAUSE"); 
    return 0; 
} 

RESULT的部分是:

. 
. 
. 
Pixel 501: -1.11874e+306 
Pixel 502: -1.16263e+306 
Pixel 503: -1.07485e+306 
Pixel 504: -1.03079e+306 
Pixel 505: -9.42843e+305 
Pixel 506: -9.86903e+305 
Pixel 507: -8.98954e+305 
Pixel 508: -9.86903e+305 
Pixel 509: -8.98954e+305 
Pixel 510: -9.43014e+305 
Press any key to continue . . . 
Pixel 511: -8.55065e+305 
Pixel 512: 0 
Pixel 513: 0 
Pixel 514: 0 
Pixel 515: 0 
Pixel 516: 0 
Pixel 517: 0 
Pixel 518: 0 
Pixel 519: 0 
Pixel 520: 0 
. 
. 
. 

指数后511其余的矢量是零!

+0

你从这段代码中得到了什么结果?即它有什么问题? – user463035818

+0

@ tobi303我已经更新了一部分结果的帖子! –

+0

将'nearest_neighbour'重命名为'euclidian_distance_squared' – DrKoch

回答

1

您创建了double的向量,然后您将它们视为int(为ints创建缓冲区,将数据写入int缓冲区并以int值读回结果)。为了避免这样的错误,你可以这样写你的代码:

cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, pixelInfo.size() * sizeof(pixelInfo[0]), NULL, &ret); 
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
+0

我在内核中使用了'int index = get_global_id(0)'!这应该处理我猜测的正确索引。我也跟着你的建议,我用'int index = get_group_id(0)'替换我自己的ID,但结果仍然是错误的,并且与最后一个 –

+0

一样也没有用! –

+0

你知道一个简单的'get_global_id'很容易替换'get_group_id'和'get_local_id'的组合吗? –