I made a pathological test case (attached) which repeatedly renders into an MSAA FBO and then blits it to the screen and measures the framerate. It checks it with a range of different sample counts. The rendering is done either by rendering two triangles to fill the framebuffer or by calling glClear.
The percentage increase in framerate after applying the patch is like this: With triangles to fill buffer: 16 62,27% 8 48,59% 4 27,72% 2 5,34% 0 0,58% With glClear: 16 -5,20% 8 -7,08% 4 -2,45% 2 -20,76% 0 3,71% It seems like a pretty convincing win for the triangle case but the clear case makes it slightly worse. Presumably this is because we don't do anything to detect the value stored in the MCS buffer when doing a fast clear so the fast path isn't taken and the shader being more complicated makes it slower. Not sure if we want to try and do anything about that because potentially the cleared pixels aren't very common in a framebuffer from a real use case so it might not really matter. Currently we don't use SIMD16 for 16x MSAA because we can't allocate the registers well enough to make it worthwhile. This patch makes that problem a bit more interesting because even if we end up spilling a lot it might still be worth doing SIMD16 because the cases where the spilled instructions are hit would be much less common. - Neil
#include <stdio.h> #include <SDL.h> #include <epoxy/gl.h> #include <stdint.h> #include <stdbool.h> #define N_FRAMES_TO_SKIP 1000 #define N_FRAMES_TO_DRAW 10000 enum draw_mode { DRAW_MODE_TRIANGLES, DRAW_MODE_CLEAR }; struct data { SDL_Window *window; SDL_GLContext gl_context; }; static SDL_GLContext create_gl_context(SDL_Window *window) { /* First try creating a core context because if we get one it * can be more efficient. */ SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1); SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); return SDL_GL_CreateContext(window); } static const char vertex_shader_source[] = "#version 150\n" "in vec2 piglit_vertex;\n" "\n" "void\n" "main()\n" "{\n" " gl_Position = vec4(piglit_vertex, 0.0, 1.0);\n" "}\n"; static const char fragment_shader_source[] = "#version 150\n" "uniform vec3 color;\n" "\n" "void\n" "main()\n" "{\n" " gl_FragColor = vec4(color, 1.0);\n" "}\n"; static GLuint create_shader(const char *name, GLenum type, const char *source, int source_length) { GLuint shader; GLint length, compile_status; GLsizei actual_length; GLchar *info_log; shader = glCreateShader(type); glShaderSource(shader, 1, /* n_strings */ &source, &source_length); glCompileShader(shader); glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length); if (length > 0) { info_log = malloc(length); glGetShaderInfoLog(shader, length, &actual_length, info_log); if (*info_log) { fprintf(stderr, "Info log for %s:\n%s\n", name, info_log); } free(info_log); } glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status); if (!compile_status) { fprintf(stderr, "%s compilation failed", name); glDeleteShader(shader); return 0; } return shader; } static bool link_program(GLuint program) { GLint length, link_status; GLsizei actual_length; GLchar *info_log; glLinkProgram(program); glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length); if (length > 0) { info_log = malloc(length); glGetProgramInfoLog(program, length, &actual_length, info_log); if (*info_log) fprintf(stderr, "Link info log:\n%s\n", info_log); free(info_log); } glGetProgramiv(program, GL_LINK_STATUS, &link_status); if (!link_status) { fprintf(stderr, "program link failed"); return false; } return true; } static bool run_test(struct data *data, int n_samples, enum draw_mode draw_mode, float *result) { float verts[] = { -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f }; GLuint vbo, vao; GLuint prog; GLuint fragment_shader, vertex_shader; GLint color_loc; int window_width, window_height; int attr; SDL_Event event; GLuint fbo, rb; int frame_count; uint32_t start_time = 0, end_time; bool ret = true; prog = glCreateProgram(); vertex_shader = create_shader("vertex", GL_VERTEX_SHADER, vertex_shader_source, sizeof vertex_shader_source - 1); fragment_shader = create_shader("fragment", GL_FRAGMENT_SHADER, fragment_shader_source, sizeof fragment_shader_source - 1); if (vertex_shader == 0 || fragment_shader == 0) return false; glAttachShader(prog, vertex_shader); glAttachShader(prog, fragment_shader); if (!link_program(prog)) return false; glUseProgram(prog); glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof verts, verts, GL_STATIC_DRAW); glGenVertexArrays(1, &vao); glBindVertexArray(vao); attr = glGetAttribLocation(prog, "piglit_vertex"); glEnableVertexAttribArray(attr); glVertexAttribPointer(attr, 2, /* size */ GL_FLOAT, GL_FALSE, /* normalized */ sizeof (GLfloat) * 2, NULL /* pointer */); color_loc = glGetUniformLocation(prog, "color"); SDL_GetWindowSize(data->window, &window_width, &window_height); glGenFramebuffers(1, &fbo); glBindFramebuffer(GL_FRAMEBUFFER, fbo); glGenRenderbuffers(1, &rb); glBindRenderbuffer(GL_RENDERBUFFER, rb); glRenderbufferStorageMultisample(GL_RENDERBUFFER, n_samples, /* samples */ GL_RGBA, window_width, window_height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, rb); if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { fprintf(stderr, "Framebuffer incomplete\n"); return false; } for (frame_count = 0; frame_count < N_FRAMES_TO_SKIP + N_FRAMES_TO_DRAW; frame_count++) { while (SDL_PollEvent(&event)) { if (event.type == SDL_QUIT) exit(1); } if (frame_count == N_FRAMES_TO_SKIP) start_time = SDL_GetTicks(); glBindFramebuffer(GL_FRAMEBUFFER, fbo); switch (draw_mode) { case DRAW_MODE_TRIANGLES: glUniform3f(color_loc, rand() / (float) RAND_MAX, rand() / (float) RAND_MAX, rand() / (float) RAND_MAX); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); break; case DRAW_MODE_CLEAR: glClearColor(rand() / (float) RAND_MAX, rand() / (float) RAND_MAX, rand() / (float) RAND_MAX, 1.0f); glClear(GL_COLOR_BUFFER_BIT); break; } glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glBlitFramebuffer(0, 0, window_width, window_height, 0, 0, window_width, window_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); SDL_GL_SwapWindow(data->window); } end_time = SDL_GetTicks(); *result = N_FRAMES_TO_DRAW * 1000.0f / (end_time - start_time); glDeleteBuffers(1, &vbo); glDeleteVertexArrays(1, &vao); glDeleteProgram(prog); return ret; } static void run_all_tests(struct data *data) { static int sample_counts[] = { 16, 8, 4, 2, 0, -1 }; int i, n_samples; bool res; int draw_mode; float fps; for (draw_mode = 0; draw_mode < 2; draw_mode++) { if (draw_mode > 0) fputc('\n', stdout); switch ((enum draw_mode) draw_mode) { case DRAW_MODE_CLEAR: printf("With glClear:\n" "\n"); break; case DRAW_MODE_TRIANGLES: printf("With triangles to fill buffer:\n" "\n"); break; } for (i = 0; sample_counts[i] != -1; i++) { n_samples = sample_counts[i]; res = run_test(data, n_samples, draw_mode, &fps); if (res) printf("%i,%f\n", n_samples, fps); } } } int main(int argc, char **argv) { struct data data; Uint32 flags; int res; int ret = EXIT_SUCCESS; res = SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK); if (res < 0) { fprintf(stderr, "Unable to init SDL: %s\n", SDL_GetError()); ret = EXIT_FAILURE; goto out; } SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); flags = SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE; data.window = SDL_CreateWindow("Test", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 800, 600, flags); if (data.window == NULL) { fprintf(stderr, "Failed to create SDL window: %s", SDL_GetError()); ret = EXIT_FAILURE; goto out_sdl; } data.gl_context = create_gl_context(data.window); if (data.gl_context == NULL) { fprintf(stderr, "Failed to create GL context: %s", SDL_GetError()); ret = EXIT_FAILURE; goto out_window; } SDL_GL_MakeCurrent(data.window, data.gl_context); SDL_GL_SetSwapInterval(0); run_all_tests(&data); SDL_GL_MakeCurrent(NULL, NULL); SDL_GL_DeleteContext(data.gl_context); out_window: SDL_DestroyWindow(data.window); out_sdl: SDL_Quit(); out: return ret; }
Ian Romanick <i...@freedesktop.org> writes: > From: Ian Romanick <ian.d.roman...@intel.com> > > Somewhat surprisingly, this didn't have any affect on performance in the > benchmarks that Martin tried for me. > > Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> > --- > src/mesa/drivers/common/meta_blit.c | 10 +++++++++- > 1 file changed, 9 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/common/meta_blit.c > b/src/mesa/drivers/common/meta_blit.c > index 28aabd3..c0ec51f 100644 > --- a/src/mesa/drivers/common/meta_blit.c > +++ b/src/mesa/drivers/common/meta_blit.c > @@ -530,6 +530,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, > fs_source = ralloc_asprintf(mem_ctx, > "#version 130\n" > "#extension GL_ARB_texture_multisample: > require\n" > + "#extension > GL_EXT_shader_samples_identical: enable\n" > "#define gvec4 %svec4\n" > "uniform %ssampler2DMS%s texSampler;\n" > "in %s texCoords;\n" > @@ -569,7 +570,14 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, > " i%s tc = i%s(texCoords);\n" > " int i;\n" > "\n" > - " for (i = 0; i < SAMPLES; i++)\n" > + " S[0] = texelFetch(texSampler, tc, > 0);\n" > + "#if > defined(GL_EXT_shader_samples_identical) && SAMPLES > 1\n" > + " if > (textureSamplesIdenticalEXT(texSampler, tc)) {\n" > + " emit2(S[0]);\n" > + " return;\n" > + " }\n" > + "#endif\n" > + " for (i = 1; i < SAMPLES; i++)\n" > " S[i] = texelFetch(texSampler, > tc, i);\n" > "\n" > " REDUCE(s16, s32);\n" > -- > 2.5.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev