I made a pathological test case (attached) which repeatedly renders into
an MSAA FBO and then blits it to the screen and measures the framerate.
It checks it with a range of different sample counts. The rendering is
done either by rendering two triangles to fill the framebuffer or by
calling glClear.

The percentage increase in framerate after applying the patch is like
this:

With triangles to fill buffer:
        
16      62,27%
8       48,59%
4       27,72%
2       5,34%
0       0,58%
        
With glClear:   
        
16      -5,20%
8       -7,08%
4       -2,45%
2       -20,76%
0       3,71%

It seems like a pretty convincing win for the triangle case but the
clear case makes it slightly worse. Presumably this is because we don't
do anything to detect the value stored in the MCS buffer when doing a
fast clear so the fast path isn't taken and the shader being more
complicated makes it slower.

Not sure if we want to try and do anything about that because
potentially the cleared pixels aren't very common in a framebuffer from
a real use case so it might not really matter.

Currently we don't use SIMD16 for 16x MSAA because we can't allocate the
registers well enough to make it worthwhile. This patch makes that
problem a bit more interesting because even if we end up spilling a lot
it might still be worth doing SIMD16 because the cases where the spilled
instructions are hit would be much less common.

- Neil

#include <stdio.h>
#include <SDL.h>
#include <epoxy/gl.h>
#include <stdint.h>
#include <stdbool.h>

#define N_FRAMES_TO_SKIP 1000
#define N_FRAMES_TO_DRAW 10000

enum draw_mode {
        DRAW_MODE_TRIANGLES,
        DRAW_MODE_CLEAR
};

struct data {
        SDL_Window *window;
        SDL_GLContext gl_context;
};

static SDL_GLContext
create_gl_context(SDL_Window *window)
{
        /* First try creating a core context because if we get one it
         * can be more efficient.
         */
        SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3);
        SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1);
        SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK,
                            SDL_GL_CONTEXT_PROFILE_CORE);

        return SDL_GL_CreateContext(window);
}

static const char
vertex_shader_source[] =
        "#version 150\n"
        "in vec2 piglit_vertex;\n"
        "\n"
        "void\n"
        "main()\n"
        "{\n"
        "        gl_Position = vec4(piglit_vertex, 0.0, 1.0);\n"
        "}\n";

static const char
fragment_shader_source[] =
        "#version 150\n"
        "uniform vec3 color;\n"
        "\n"
        "void\n"
        "main()\n"
        "{\n"
        "        gl_FragColor = vec4(color, 1.0);\n"
        "}\n";

static GLuint
create_shader(const char *name,
              GLenum type,
              const char *source,
              int source_length)
{
        GLuint shader;
        GLint length, compile_status;
        GLsizei actual_length;
        GLchar *info_log;

        shader = glCreateShader(type);

        glShaderSource(shader,
                       1, /* n_strings */
                       &source,
                       &source_length);

        glCompileShader(shader);

        glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length);

        if (length > 0) {
                info_log = malloc(length);
                glGetShaderInfoLog(shader, length,
                                   &actual_length,
                                   info_log);
                if (*info_log) {
                        fprintf(stderr,
                                "Info log for %s:\n%s\n",
                                name,
                                info_log);
                }
                free(info_log);
        }

        glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);

        if (!compile_status) {
                fprintf(stderr, "%s compilation failed", name);
                glDeleteShader(shader);
                return 0;
        }

        return shader;
}

static bool
link_program(GLuint program)
{
        GLint length, link_status;
        GLsizei actual_length;
        GLchar *info_log;

        glLinkProgram(program);

        glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length);

        if (length > 0) {
                info_log = malloc(length);
                glGetProgramInfoLog(program, length,
                                    &actual_length,
                                    info_log);
                if (*info_log)
                        fprintf(stderr, "Link info log:\n%s\n", info_log);
                free(info_log);
        }

        glGetProgramiv(program, GL_LINK_STATUS, &link_status);

        if (!link_status) {
                fprintf(stderr, "program link failed");
                return false;
        }

        return true;
}

static bool
run_test(struct data *data,
         int n_samples,
         enum draw_mode draw_mode,
         float *result)
{
        float verts[] = {
                -1.0f, -1.0f,
                1.0f, -1.0f,
                -1.0f, 1.0f,
                1.0f, 1.0f
        };
        GLuint vbo, vao;
        GLuint prog;
        GLuint fragment_shader, vertex_shader;
        GLint color_loc;
        int window_width, window_height;
        int attr;
        SDL_Event event;
        GLuint fbo, rb;
        int frame_count;
        uint32_t start_time = 0, end_time;
        bool ret = true;

        prog = glCreateProgram();

        vertex_shader = create_shader("vertex",
                                      GL_VERTEX_SHADER,
                                      vertex_shader_source,
                                      sizeof vertex_shader_source - 1);

        fragment_shader = create_shader("fragment",
                                        GL_FRAGMENT_SHADER,
                                        fragment_shader_source,
                                        sizeof fragment_shader_source - 1);

        if (vertex_shader == 0 || fragment_shader == 0)
                return false;

        glAttachShader(prog, vertex_shader);
        glAttachShader(prog, fragment_shader);
        if (!link_program(prog))
                return false;

        glUseProgram(prog);

        glGenBuffers(1, &vbo);
        glBindBuffer(GL_ARRAY_BUFFER, vbo);
        glBufferData(GL_ARRAY_BUFFER, sizeof verts,
                     verts, GL_STATIC_DRAW);

        glGenVertexArrays(1, &vao);
        glBindVertexArray(vao);

        attr = glGetAttribLocation(prog, "piglit_vertex");
        glEnableVertexAttribArray(attr);
        glVertexAttribPointer(attr,
                              2, /* size */
                              GL_FLOAT,
                              GL_FALSE, /* normalized */
                              sizeof (GLfloat) * 2,
                              NULL /* pointer */);

        color_loc = glGetUniformLocation(prog, "color");

        SDL_GetWindowSize(data->window, &window_width, &window_height);

        glGenFramebuffers(1, &fbo);
        glBindFramebuffer(GL_FRAMEBUFFER, fbo);
        glGenRenderbuffers(1, &rb);
        glBindRenderbuffer(GL_RENDERBUFFER, rb);
        glRenderbufferStorageMultisample(GL_RENDERBUFFER,
                                         n_samples, /* samples */
                                         GL_RGBA,
                                         window_width,
                                         window_height);
        glFramebufferRenderbuffer(GL_FRAMEBUFFER,
                                  GL_COLOR_ATTACHMENT0,
                                  GL_RENDERBUFFER,
                                  rb);

        if (glCheckFramebufferStatus(GL_FRAMEBUFFER) !=
            GL_FRAMEBUFFER_COMPLETE) {
                fprintf(stderr, "Framebuffer incomplete\n");
                return false;
        }

        for (frame_count = 0;
             frame_count < N_FRAMES_TO_SKIP + N_FRAMES_TO_DRAW;
             frame_count++) {
                while (SDL_PollEvent(&event)) {
                        if (event.type == SDL_QUIT)
                                exit(1);
                }

                if (frame_count == N_FRAMES_TO_SKIP)
                        start_time = SDL_GetTicks();

                glBindFramebuffer(GL_FRAMEBUFFER, fbo);

                switch (draw_mode) {
                case DRAW_MODE_TRIANGLES:
                        glUniform3f(color_loc,
                                    rand() / (float) RAND_MAX,
                                    rand() / (float) RAND_MAX,
                                    rand() / (float) RAND_MAX);
                        glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
                        break;
                case DRAW_MODE_CLEAR:
                        glClearColor(rand() / (float) RAND_MAX,
                                     rand() / (float) RAND_MAX,
                                     rand() / (float) RAND_MAX,
                                     1.0f);
                        glClear(GL_COLOR_BUFFER_BIT);
                        break;
                }

                glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo);
                glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
                glBlitFramebuffer(0, 0, window_width, window_height,
                                  0, 0, window_width, window_height,
                                  GL_COLOR_BUFFER_BIT,
                                  GL_NEAREST);

                SDL_GL_SwapWindow(data->window);
        }

        end_time = SDL_GetTicks();

        *result = N_FRAMES_TO_DRAW * 1000.0f / (end_time - start_time);

        glDeleteBuffers(1, &vbo);
        glDeleteVertexArrays(1, &vao);

        glDeleteProgram(prog);

        return ret;
}

static void
run_all_tests(struct data *data)
{
        static int sample_counts[] = { 16, 8, 4, 2, 0, -1 };
        int i, n_samples;
        bool res;
        int draw_mode;
        float fps;

        for (draw_mode = 0; draw_mode < 2; draw_mode++) {
                if (draw_mode > 0)
                        fputc('\n', stdout);

                switch ((enum draw_mode) draw_mode) {
                case DRAW_MODE_CLEAR:
                        printf("With glClear:\n"
                               "\n");
                        break;
                case DRAW_MODE_TRIANGLES:
                        printf("With triangles to fill buffer:\n"
                               "\n");
                        break;
                }

                for (i = 0; sample_counts[i] != -1; i++) {
                        n_samples = sample_counts[i];

                        res = run_test(data, n_samples, draw_mode, &fps);

                        if (res)
                                printf("%i,%f\n", n_samples, fps);
                }
        }
}

int
main(int argc, char **argv)
{
        struct data data;
        Uint32 flags;
        int res;
        int ret = EXIT_SUCCESS;

        res = SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK);
        if (res < 0) {
                fprintf(stderr, "Unable to init SDL: %s\n", SDL_GetError());
                ret = EXIT_FAILURE;
                goto out;
        }

        SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
        SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
        SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
        SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8);
        SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 8);
        SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);

        flags = SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE;

        data.window = SDL_CreateWindow("Test",
                                       SDL_WINDOWPOS_UNDEFINED,
                                       SDL_WINDOWPOS_UNDEFINED,
                                       800, 600,
                                       flags);
        if (data.window == NULL) {
                fprintf(stderr,
                        "Failed to create SDL window: %s",
                        SDL_GetError());
                ret = EXIT_FAILURE;
                goto out_sdl;
        }

        data.gl_context = create_gl_context(data.window);
        if (data.gl_context == NULL) {
                fprintf(stderr,
                        "Failed to create GL context: %s",
                        SDL_GetError());
                ret = EXIT_FAILURE;
                goto out_window;
        }

        SDL_GL_MakeCurrent(data.window, data.gl_context);

        SDL_GL_SetSwapInterval(0);

        run_all_tests(&data);

        SDL_GL_MakeCurrent(NULL, NULL);
        SDL_GL_DeleteContext(data.gl_context);
out_window:
        SDL_DestroyWindow(data.window);
out_sdl:
        SDL_Quit();
out:
        return ret;
}
Ian Romanick <i...@freedesktop.org> writes:

> From: Ian Romanick <ian.d.roman...@intel.com>
>
> Somewhat surprisingly, this didn't have any affect on performance in the
> benchmarks that Martin tried for me.
>
> Signed-off-by: Ian Romanick <ian.d.roman...@intel.com>
> ---
>  src/mesa/drivers/common/meta_blit.c | 10 +++++++++-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/common/meta_blit.c 
> b/src/mesa/drivers/common/meta_blit.c
> index 28aabd3..c0ec51f 100644
> --- a/src/mesa/drivers/common/meta_blit.c
> +++ b/src/mesa/drivers/common/meta_blit.c
> @@ -530,6 +530,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
>           fs_source = ralloc_asprintf(mem_ctx,
>                                       "#version 130\n"
>                                       "#extension GL_ARB_texture_multisample: 
> require\n"
> +                                     "#extension 
> GL_EXT_shader_samples_identical: enable\n"
>                                       "#define gvec4 %svec4\n"
>                                       "uniform %ssampler2DMS%s texSampler;\n"
>                                       "in %s texCoords;\n"
> @@ -569,7 +570,14 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
>                                       "   i%s tc = i%s(texCoords);\n"
>                                       "   int i;\n"
>                                       "\n"
> -                                     "   for (i = 0; i < SAMPLES; i++)\n"
> +                                     "   S[0] = texelFetch(texSampler, tc, 
> 0);\n"
> +                                     "#if 
> defined(GL_EXT_shader_samples_identical) && SAMPLES > 1\n"
> +                                     "   if 
> (textureSamplesIdenticalEXT(texSampler, tc)) {\n"
> +                                     "      emit2(S[0]);\n"
> +                                     "      return;\n"
> +                                     "   }\n"
> +                                     "#endif\n"
> +                                     "   for (i = 1; i < SAMPLES; i++)\n"
>                                       "      S[i] = texelFetch(texSampler, 
> tc, i);\n"
>                                       "\n"
>                                       "   REDUCE(s16, s32);\n"
> -- 
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to