我参加了着色器课程,对计算机视觉和图像处理感兴趣。我想知道如何将GLSL着色器知识与图像处理混合在一起?如果我使用GLSL实现图像处理算法,我会获得什么?使用GLSL着色器进行图像处理?
回答
第一个明显的答案是您获得并行性。现在,为什么使用GLSL而不是说CUDA更灵活? GLSL不要求你有NVIDIA显卡,所以它是一个更便携的解决方案(尽管你仍然可以选择OpenCL)。
您可以通过并行获得什么?大多数情况下,您可以独立处理像素。例如,增加图像的对比度通常需要您遍历所有像素并应用像素值的仿射变换。如果每个像素都由一个单独的线程处理,那么您不必再执行此循环:您只需渲染一个四元组,然后应用在当前栅格化点处读取纹理的像素着色器,然后输出到渲染目标(或屏幕)变换的像素值。
缺点是您的数据需要驻留在GPU上:您需要将所有图像传输到GPU,这可能需要一些时间,并且可以使并行化所带来的加速无用。因此,GPU实现通常是在要进行的操作是计算密集型的时候,或者整个管道可以保留在GPU上时完成的(例如,如果目标是只在屏幕上显示修改后的图像,则可以节省需求在CPU上传回图像)。
OpenGL 4.3(在SIGGRAPH 2012上发布)支持Compute着色器。如果您严格执行图形工作,并且已经使用OpenGL,则可能比OpenCL/OpenGL interop(或CUDA/OpenGL interop)更易于使用。
以下是Khronos关于何时使用4.3计算着色器与OpenCL的比较:Link to PDF; see slide 5。
案例研究:对CPU VS GPU片段着色器实时箱模糊
我实现了一个简单的盒子模糊https://en.wikipedia.org/wiki/Box_blur算法的CPU和GPU的片段着色器,看看这是更快:
- 演示视频https://www.youtube.com/watch?v=MRhAljmHq-o
- 源代码:https://github.com/cirosantilli/cpp-cheat/blob/cfd18700827dfcff6080a938793b44b790f0b7e7/opengl/glfw_webcam_image_process.c
我的相机刷新率将FPS上限设置为30,所以我测量了盒子的宽度,仍然保持30 FPS。
在一个联想T430(2012),NVIDIA NVS5400的,Ubuntu 16.04与图像尺寸960×540,最大宽度为:
- GPU:23
- CPU:5
由于计算是二次的,加速是:
(23/5)^2 = 21.16
在GPU上比CPU更快!
并非所有算法在GPU上都更快。例如,对单个图像采用交换RGB的操作在CPU上达到30FPS,因此将GPU编程的复杂性添加到CPU上是没有用的。
像其他CPU和GPU加速问题一样,如果每个字节都有足够的工作传输到GPU,并且基准测试是您能做的最好的事情,一般来说,二次算法或更糟的是GPU的一个很好的选择。
代码的主要部分(刚刚从GitHub克隆):
#include "common.h"
#include "../v4l2/common_v4l2.h"
static const GLuint WIDTH = 640;
static const GLuint HEIGHT = 480;
static const GLfloat vertices[] = {
/* xy uv */
-1.0, 1.0, 0.0, 1.0,
0.0, 1.0, 0.0, 0.0,
0.0, -1.0, 1.0, 0.0,
-1.0, -1.0, 1.0, 1.0,
};
static const GLuint indices[] = {
0, 1, 2,
0, 2, 3,
};
static const GLchar *vertex_shader_source =
"#version 330 core\n"
"in vec2 coord2d;\n"
"in vec2 vertexUv;\n"
"out vec2 fragmentUv;\n"
"void main() {\n"
" gl_Position = vec4(coord2d, 0, 1);\n"
" fragmentUv = vertexUv;\n"
"}\n";
static const GLchar *fragment_shader_source =
"#version 330 core\n"
"in vec2 fragmentUv;\n"
"out vec3 color;\n"
"uniform sampler2D myTextureSampler;\n"
"void main() {\n"
" color = texture(myTextureSampler, fragmentUv.yx).rgb;\n"
"}\n";
static const GLchar *vertex_shader_source2 =
"#version 330 core\n"
"in vec2 coord2d;\n"
"in vec2 vertexUv;\n"
"out vec2 fragmentUv;\n"
"void main() {\n"
" gl_Position = vec4(coord2d + vec2(1.0, 0.0), 0, 1);\n"
" fragmentUv = vertexUv;\n"
"}\n";
static const GLchar *fragment_shader_source2 =
"#version 330 core\n"
"in vec2 fragmentUv;\n"
"out vec3 color;\n"
"uniform sampler2D myTextureSampler;\n"
"// pixel Delta. How large a pixel is in 0.0 to 1.0 that textures use.\n"
"uniform vec2 pixD;\n"
"void main() {\n"
/*"// Identity\n"*/
/*" color = texture(myTextureSampler, fragmentUv.yx).rgb;\n"*/
/*"// Inverter\n"*/
/*" color = 1.0 - texture(myTextureSampler, fragmentUv.yx).rgb;\n"*/
/*"// Swapper\n"*/
/*" color = texture(myTextureSampler, fragmentUv.yx).gbr;\n"*/
/*"// Double vision ortho.\n"*/
/*" color = ("*/
/*" texture(myTextureSampler, fragmentUv.yx).rgb +\n"*/
/*" texture(myTextureSampler, fragmentUv.xy).rgb\n"*/
/*" )/2.0;\n"*/
/*"// Multi-me.\n"*/
/*" color = texture(myTextureSampler, 4.0 * fragmentUv.yx).rgb;\n"*/
/*"// Horizontal linear blur.\n"*/
/*" int blur_width = 21;\n"*/
/*" int blur_width_half = blur_width/2;\n"*/
/*" color = vec3(0.0, 0.0, 0.0);\n"*/
/*" for (int i = -blur_width_half; i <= blur_width_half; ++i) {\n"*/
/*" color += texture(myTextureSampler, vec2(fragmentUv.y + i * pixD.x, fragmentUv.x)).rgb;\n"*/
/*" }\n"*/
/*" color /= blur_width;\n"*/
/*"// Square linear blur.\n"*/
" int blur_width = 23;\n"
" int blur_width_half = blur_width/2;\n"
" color = vec3(0.0, 0.0, 0.0);\n"
" for (int i = -blur_width_half; i <= blur_width_half; ++i) {\n"
" for (int j = -blur_width_half; j <= blur_width_half; ++j) {\n"
" color += texture(\n"
" myTextureSampler, fragmentUv.yx + ivec2(i, j) * pixD\n"
" ).rgb;\n"
" }\n"
" }\n"
" color /= (blur_width * blur_width);\n"
"}\n";
int main(int argc, char **argv) {
CommonV4l2 common_v4l2;
GLFWwindow *window;
GLint
coord2d_location,
myTextureSampler_location,
vertexUv_location,
coord2d_location2,
pixD_location2,
myTextureSampler_location2,
vertexUv_location2
;
GLuint
ebo,
program,
program2,
texture,
vbo,
vao,
vao2
;
unsigned int
cpu,
width,
height
;
uint8_t *image;
float *image2 = NULL;
/*uint8_t *image2 = NULL;*/
if (argc > 1) {
width = strtol(argv[1], NULL, 10);
} else {
width = WIDTH;
}
if (argc > 2) {
height = strtol(argv[2], NULL, 10);
} else {
height = HEIGHT;
}
if (argc > 3) {
cpu = (argv[3][0] == '1');
} else {
cpu = 0;
}
/* Window system. */
glfwInit();
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
window = glfwCreateWindow(2 * width, height, __FILE__, NULL, NULL);
glfwMakeContextCurrent(window);
glewInit();
CommonV4l2_init(&common_v4l2, COMMON_V4L2_DEVICE, width, height);
/* Shader setup. */
program = common_get_shader_program(vertex_shader_source, fragment_shader_source);
coord2d_location = glGetAttribLocation(program, "coord2d");
vertexUv_location = glGetAttribLocation(program, "vertexUv");
myTextureSampler_location = glGetUniformLocation(program, "myTextureSampler");
/* Shader setup 2. */
const GLchar *fs;
if (cpu) {
fs = fragment_shader_source;
} else {
fs = fragment_shader_source2;
}
program2 = common_get_shader_program(vertex_shader_source2, fs);
coord2d_location2 = glGetAttribLocation(program2, "coord2d");
vertexUv_location2 = glGetAttribLocation(program2, "vertexUv");
myTextureSampler_location2 = glGetUniformLocation(program2, "myTextureSampler");
pixD_location2 = glGetUniformLocation(program2, "pixD");
/* Create vbo. */
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
/* Create ebo. */
glGenBuffers(1, &ebo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
/* vao. */
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexAttribPointer(coord2d_location, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(vertices[0]), (GLvoid*)0);
glEnableVertexAttribArray(coord2d_location);
glVertexAttribPointer(vertexUv_location, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat), (GLvoid*)(2 * sizeof(vertices[0])));
glEnableVertexAttribArray(vertexUv_location);
glBindVertexArray(0);
/* vao2. */
glGenVertexArrays(1, &vao2);
glBindVertexArray(vao2);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexAttribPointer(coord2d_location2, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(vertices[0]), (GLvoid*)0);
glEnableVertexAttribArray(coord2d_location2);
glVertexAttribPointer(vertexUv_location2, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat), (GLvoid*)(2 * sizeof(vertices[0])));
glEnableVertexAttribArray(vertexUv_location2);
glBindVertexArray(0);
/* Texture buffer. */
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
/* Constant state. */
glViewport(0, 0, 2 * width, height);
glClearColor(1.0f, 1.0f, 1.0f, 1.0f);
glActiveTexture(GL_TEXTURE0);
/* Main loop. */
common_fps_init();
do {
/* Blocks until an image is available, thus capping FPS to that.
* 30FPS is common in cheap webcams. */
CommonV4l2_updateImage(&common_v4l2);
image = CommonV4l2_getImage(&common_v4l2);
glClear(GL_COLOR_BUFFER_BIT);
/* Original. */
glTexImage2D(
GL_TEXTURE_2D, 0, GL_RGB, width, height,
0, GL_RGB, GL_UNSIGNED_BYTE, image
);
glUseProgram(program);
glUniform1i(myTextureSampler_location, 0);
glBindVertexArray(vao);
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
glBindVertexArray(0);
/* Optional CPU modification to compare with GPU shader speed. */
if (cpu) {
image2 = realloc(image2, 3 * width * height * sizeof(image2[0]));
for (unsigned int i = 0; i < height; ++i) {
for (unsigned int j = 0; j < width; ++j) {
size_t index = 3 * (i * width + j);
/* Inverter. */
/*image2[index + 0] = 1.0 - (image[index + 0]/255.0);*/
/*image2[index + 1] = 1.0 - (image[index + 1]/255.0);*/
/*image2[index + 2] = 1.0 - (image[index + 2]/255.0);*/
/* Swapper. */
/*image2[index + 0] = image[index + 1]/255.0;*/
/*image2[index + 1] = image[index + 2]/255.0;*/
/*image2[index + 2] = image[index + 0]/255.0;*/
/* Square linear blur. */
int blur_width = 5;
int blur_width_half = blur_width/2;
int blur_width2 = (blur_width * blur_width);
image2[index + 0] = 0.0;
image2[index + 1] = 0.0;
image2[index + 2] = 0.0;
for (int k = -blur_width_half; k <= blur_width_half; ++k) {
for (int l = -blur_width_half; l <= blur_width_half; ++l) {
int i2 = i + k;
int j2 = j + l;
// Out of bounds is black. TODO: do module to match shader exactly.
if (i2 > 0 && i2 < (int)height && j2 > 0 && j2 < (int)width) {
unsigned int srcIndex = index + 3 * (k * width + l);
image2[index + 0] += image[srcIndex + 0];
image2[index + 1] += image[srcIndex + 1];
image2[index + 2] += image[srcIndex + 2];
}
}
}
image2[index + 0] /= (blur_width2 * 255.0);
image2[index + 1] /= (blur_width2 * 255.0);
image2[index + 2] /= (blur_width2 * 255.0);
}
}
glTexImage2D(
GL_TEXTURE_2D, 0, GL_RGB, width, height,
0, GL_RGB, GL_FLOAT, image2
);
}
/* Modified. */
glUseProgram(program2);
glUniform1i(myTextureSampler_location2, 0);
glUniform2f(pixD_location2, 1.0/width, 1.0/height);
glBindVertexArray(vao2);
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
glBindVertexArray(0);
glfwSwapBuffers(window);
glfwPollEvents();
common_fps_print();
} while (!glfwWindowShouldClose(window));
/* Cleanup. */
if (cpu) {
free(image2);
}
CommonV4l2_deinit(&common_v4l2);
glDeleteBuffers(1, &vbo);
glDeleteVertexArrays(1, &vao);
glDeleteTextures(1, &texture);
glDeleteProgram(program);
glfwTerminate();
return EXIT_SUCCESS;
}
- 1. WP8使用像素着色器进行图像处理
- 2. 使用GLSL着色器在PyOpenGL中进行多纹理化
- 3. 使用GLSL着色器SDL2
- 4. GLSL着色器
- 5. glsl着色器进/出变量包装
- 6. 如何在OpenGL中使用着色器后处理图像?
- 7. 滚动纹理的GLSL着色器
- 8. 使用多个sampler2Ds的GLSL着色器
- 9. 使用lines_adjacency的GLSL几何着色器
- 10. glDrawElement使用GLSL着色器崩溃
- 11. VAO + IBO与GLSL着色器的使用
- 12. OpenGL的每像素着色与GLSL着色器
- 13. GLSL纹理映射和着色在相同的着色器
- 14. GLSL:顶点着色器无片段着色片段着色器
- 15. GLSL法线贴图着色器发出
- 16. 三角图案GLSL着色器
- 17. shadowmapping opengl glsl着色器图形数学
- 18. 处理多灯光和GLSL着色器程序
- 19. 如何在处理中更改GLSL着色器参数
- 20. GLSL如何在着色器处理后检索顶点位置?
- 21. Android上的着色图像处理
- 22. OpenGL GLSL着色器统计
- 23. GLSL着色器闪烁
- 24. GLSL边框着色器
- 25. GLSL着色器问题
- 26. GLSL着色器检测
- 27. GLSL像素着色器只操作0纹理
- 28. 用于图像处理的OpenGL ES 2.0着色器示例?
- 29. 使用SailsJS进行图像处理
- 30. 使用Hadoop MapReduce进行图像处理
的OpenCL为什么没有提到,如果你谈论的是CUDA? –
根据“游戏引擎宝石2”中描述的基准测试,GLSL在某些情况下胜过OpenCL和CUDA :) –
Nicol:在我看来,它被提到我的答案的第三行,不是吗? (我没有编辑我的答案) – WhitAngl