From: Vadim Girlin <vadimgir...@gmail.com>

In some cases TGSI shader has more color outputs than the number of CBs,
so it seems we need to limit the number of color exports. This requires
different shader variants depending on the nr_cbufs, but on the other hand
we are doing less exports, which are very costly.

v2: fix various piglit regressions

Signed-off-by: Vadim Girlin <vadimgir...@gmail.com>
Signed-off-by: Jerome Glisse <jgli...@redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c   |   10 +++-------
 src/gallium/drivers/r600/r600_shader.c       |   25 ++++++++++++++++++++++---
 src/gallium/drivers/r600/r600_shader.h       |    7 ++++++-
 src/gallium/drivers/r600/r600_state.c        |    2 ++
 src/gallium/drivers/r600/r600_state_common.c |    4 ++--
 5 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index b618ca8..3fe95e1 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2641,18 +2641,14 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
                db_shader_control |= S_02880C_KILL_ENABLE(1);
 
        exports_ps = 0;
-       num_cout = 0;
        for (i = 0; i < rshader->noutput; i++) {
                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
                    rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
                        exports_ps |= 1;
-               else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
-                       if (rshader->fs_write_all)
-                               num_cout = rshader->nr_cbufs;
-                       else
-                               num_cout++;
-               }
        }
+
+       num_cout = rshader->nr_ps_color_exports;
+
        exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
        if (!exports_ps) {
                /* always at least export 1 component per pixel */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index d294084..37914eb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -801,6 +801,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                                ctx->cv_output = i;
                                break;
                        }
+               } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                       switch (d->Semantic.Name) {
+                       case TGSI_SEMANTIC_COLOR:
+                               ctx->shader->nr_ps_max_color_exports++;
+                               break;
+                       }
                }
                break;
        case TGSI_FILE_CONSTANT:
@@ -1153,8 +1159,10 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
        ctx.colors_used = 0;
        ctx.clip_vertex_write = 0;
 
+       shader->nr_ps_color_exports = 0;
+       shader->nr_ps_max_color_exports = 0;
+
        shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && 
rctx->two_side;
-       shader->nr_cbufs = rctx->nr_cbufs;
 
        /* register allocations */
        /* Values [0,127] correspond to GPR[0..127].
@@ -1289,6 +1297,9 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
                }
        }
 
+       if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
+               shader->nr_ps_max_color_exports = 8;
+
        if (ctx.fragcoord_input >= 0) {
                if (ctx.bc->chip_class == CAYMAN) {
                        for (j = 0 ; j < 4; j++) {
@@ -1528,10 +1539,17 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
                        break;
                case TGSI_PROCESSOR_FRAGMENT:
                        if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+                               /* never export more colors than the number of 
CBs */
+                               if (next_pixel_base && next_pixel_base >= 
(rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
+                                       /* skip export */
+                                       j--;
+                                       continue;
+                               }
                                output[j].array_base = next_pixel_base++;
                                output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               shader->nr_ps_color_exports++;
                                if (shader->fs_write_all && (rctx->chip_class 
>= EVERGREEN)) {
-                                       for (k = 1; k < shader->nr_cbufs; k++) {
+                                       for (k = 1; k < rctx->nr_cbufs; k++) {
                                                j++;
                                                memset(&output[j], 0, 
sizeof(struct r600_bytecode_output));
                                                output[j].gpr = 
shader->output[i].gpr;
@@ -1545,6 +1563,7 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
                                                output[j].array_base = 
next_pixel_base++;
                                                output[j].inst = 
BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
                                                output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                                               shader->nr_ps_color_exports++;
                                        }
                                }
                        } else if (shader->output[i].name == 
TGSI_SEMANTIC_POSITION) {
@@ -1595,7 +1614,7 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
        }
 
        /* add fake pixel export */
-       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
+       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
                memset(&output[j], 0, sizeof(struct r600_bytecode_output));
                output[j].gpr = 0;
                output[j].elem_size = 3;
diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index 2d35e77..eb0bbf6 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -49,7 +49,12 @@ struct r600_shader {
        boolean                 fs_write_all;
        boolean                 vs_prohibit_ucps;
        boolean                 two_side;
-       unsigned                nr_cbufs;
+       /* Number of color outputs in the TGSI shader,
+        * sometimes it could be higher than nr_cbufs (bug?).
+        * Also with writes_all property on eg+ it will be set to max CB number 
*/
+       unsigned                nr_ps_max_color_exports;
+       /* Real number of ps color exports compiled in the bytecode */
+       unsigned                nr_ps_color_exports;
        /* bit n is set if the shader writes gl_ClipDistance[n] */
        unsigned                clip_dist_write;
        /* flag is set if the shader writes VS_OUT_MISC_VEC (e.g. for PSIZE) */
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index fc75781..b314edc 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1640,6 +1640,8 @@ static void r600_set_framebuffer_state(struct 
pipe_context *ctx,
 
        /* build states */
        rctx->have_depth_fb = 0;
+       rctx->nr_cbufs = state->nr_cbufs;
+
        for (int i = 0; i < state->nr_cbufs; i++) {
                r600_cb(rctx, rstate, state, i);
        }
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 00e1bd0..7755c9e 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -690,8 +690,8 @@ static void r600_update_derived_state(struct r600_context 
*rctx)
        }
 
        if ((rctx->ps_shader->shader.two_side != rctx->two_side) ||
-           ((rctx->chip_class >= EVERGREEN) && 
rctx->ps_shader->shader.fs_write_all &&
-            (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) {
+            (MIN2(rctx->ps_shader->shader.nr_ps_max_color_exports, 
rctx->nr_cbufs + rctx->dual_src_blend)
+             != rctx->ps_shader->shader.nr_ps_color_exports)) {
                r600_shader_rebuild(&rctx->context, rctx->ps_shader);
        }
 
-- 
1.7.10.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to