2016-04-19 21 views
1

我一直在试图运行计算着色器 - 前缀和演示在提供:glMapBufferRange仅映射4个值中的1个。为什么?

https://github.com/openglsuperbible/sb7code/blob/master/src/prefixsum/prefixsum.cpp

我使用的确切代码:

#define NUM_ELEMENTS 2048 

float random_float() 
{ 
    static unsigned int seed = 0x13371337; 

    float res; 
    unsigned int tmp; 

    seed *= 16807; 

    tmp = seed^(seed >> 4)^(seed << 15); 

    *((unsigned int *)&res) = (tmp >> 9) | 0x3F800000; 

    return (res - 1.0f); 
} 

static int PrefixSum(int programHandle) 
{ 
    GLuint data_buffer[2]; 

    float input_data[NUM_ELEMENTS]; 
    float output_data[NUM_ELEMENTS]; 

    glGenBuffers(2, data_buffer); 

    glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[0]); 
    glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_DRAW); 

    glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[1]); 
    glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_COPY); 

    int i; 

    for (i = 0; i < NUM_ELEMENTS; i++) 
    { 
     input_data[i] = random_float(); 
    } 

    glShaderStorageBlockBinding(programHandle, 0, 0); 
    glShaderStorageBlockBinding(programHandle, 1, 1); 

    float * ptr; 

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[0], 0, sizeof(float) * NUM_ELEMENTS); 
    glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, input_data); 

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS); 

    glUseProgram(programHandle); 
    glDispatchCompute(1, 1, 1); 

    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 
    glFinish(); 

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS); 
    ptr = (float *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, GL_MAP_READ_BIT); 

    char buffer[1024]; 
    sprintf(buffer, "SUM: %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f " 
    "%2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f", 
    ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7], 
    ptr[8], ptr[9], ptr[10], ptr[11], ptr[12], ptr[13], ptr[14], ptr[15]); 

    glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); 
} 

这是着色器:

#version 430 core 

layout (local_size_x = 1024) in; 

layout (binding = 0) coherent readonly buffer block1 
{ 
    float input_data[gl_WorkGroupSize.x]; 
}; 

layout (binding = 1) coherent writeonly buffer block2 
{ 
    float output_data[gl_WorkGroupSize.x]; 
}; 

shared float shared_data[gl_WorkGroupSize.x * 2]; 

void main(void) 
{ 
    uint id = gl_LocalInvocationID.x; 
    uint rd_id; 
    uint wr_id; 
    uint mask; 

    const uint steps = uint(log2(gl_WorkGroupSize.x)) + 1; 
    uint step = 0; 

    shared_data[id * 2] = input_data[id * 2]; 
    shared_data[id * 2 + 1] = input_data[id * 2 + 1]; 

    barrier(); 

    for (step = 0; step < steps; step++) 
    { 
     mask = (1 << step) - 1; 
     rd_id = ((id >> step) << (step + 1)) + mask; 
     wr_id = rd_id + 1 + (id & mask); 

     shared_data[wr_id] += shared_data[rd_id]; 

     barrier(); 
    } 

    output_data[id * 2] = shared_data[id * 2]; 
    output_data[id * 2 + 1] = shared_data[id * 2 + 1]; 
} 

问题是,输出写入4个位置中的1个:

SUM: 0.70 0.00 0.00 0.00 1.69 0.00 0.00 0.00 1.81 0.00 0.00 0.00 2.59 0.00 0.00 0.00 

这是输入:

[0] 0.700959682 float 
    [1] 0.837353945 float 
    [2] 0.403481007 float 
    [3] 0.856583834 float 
    [4] 0.993326187 float 
    [5] 0.727316380 float 
    [6] 0.768217087 float 
    [7] 0.0675410032 float 
    [8] 0.112720609 float 
    [9] 0.703838706 float 
    [10] 0.365846157 float 
    [11] 0.504367113 float 
    [12] 0.778576016 float 
    [13] 0.217134356 float 
    [14] 0.944752693 float 
    [15] 0.575236082 float 
    [16] 0.795839429 float 
    [17] 0.707037449 float 
    [18] 0.181974053 float 
    [19] 0.745973587 float 
    [20] 0.281350732 float 
+0

FYI:我不相信你正在使用你的计算着色器SSBOs在需要'coherent'这样的方式。这主要是因为工作项目之间通过缓冲区进行交叉对话,并且所有的串扰都是通过共享内存进行的。这不是你的问题,但它可能会提高性能。 –

回答

1

解决:指定用于缓冲包装标准解决了这个问题:

layout (std430, binding = 1) coherent writeonly buffer block2 
{ 
    float output_data[gl_WorkGroupSize.x]; 
}; 
1
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 

记忆障碍指定你打算如何写后访问对象,你怎么不写它。你将通过映射它来阅读对象来阅读,所以你应该这样说。具体而言,您应该使用GL_BUFFER_UPDATE_BARRIER_BIT

另外:

glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS); 

这应该只是glBindBuffer(GL_SHADER_STORAGE_BUFFER)。您正在绑定它来映射它,而不是用于存储操作。

+0

我按照你的建议修改了代码,但并没有解决问题。 – markwalberg

相关问题