On Mon, 24 Nov 2014 04:36:06 +0100, Dave Airlie <airl...@gmail.com> wrote:

From: Dave Airlie <airl...@redhat.com>

We are using 1 more buffer than we have, although in the future the
driver should just end up using one buffer in total probably, this
is a good first step, it merges the txq cube array and buffer info
constants on r600 and evergreen.

this also most likely breaks llvm backend, I've changed it,
but it definitely needs fixes for this.

this fixes a bunch of geom shader textureSize tests on rv635
from gpu reset to pass.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/gallium/drivers/r600/r600_llvm.c         |  3 +-
 src/gallium/drivers/r600/r600_pipe.h         |  8 +--
 src/gallium/drivers/r600/r600_shader.c       | 18 ++++--
src/gallium/drivers/r600/r600_state_common.c | 92 +++++++++++-----------------
 4 files changed, 54 insertions(+), 67 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index c19693a..470c65f 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -23,7 +23,6 @@
#define CONSTANT_BUFFER_0_ADDR_SPACE 8
#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER) -#define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + R600_TXQ_CONST_BUFFER)
 #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
        (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
@@ -690,7 +689,7 @@ static void llvm_emit_tex(
                if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
                        LLVMValueRef ZLayer = 
LLVMBuildExtractElement(gallivm->builder,
-                               llvm_load_const_buffer(bld_base, offset, 
CONSTANT_TXQ_BUFFER),
+ llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER,
                                lp_build_const_int32(gallivm, 0), "");

I don't see how llvm_emit_tex can possibly work in its current condition. This patch breaks it a little more...

emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 40b0328..e27e877 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -44,14 +44,13 @@
 #define R600_TRACE_CS_DWORDS           7
#define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 4
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
/* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
/* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is
  * trying to use 17. Avoid accidentally aliasing with user UBOs for

The comment here needs updating, its should be down to 16 buffers total now ie within hardware limits.

D3D 11 does require 15 to be available for application use though so eventually all the driver buffers ought to be merged, but outside of scope of this patch.

SAMPLE_POSITIONS by using an id<16.
* UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -316,7 +315,6 @@ struct r600_samplerview_state {
        uint32_t                        dirty_mask;
        uint32_t                        compressed_depthtex_mask; /* which 
textures are depth */
        uint32_t                        compressed_colortex_mask;
-       boolean                         dirty_txq_constants;
        boolean                         dirty_buffer_constants;
 };
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index a772dee..61ff162 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5035,8 +5035,9 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
        alu.op = ALU_OP1_MOV;
        if (ctx->bc->chip_class >= EVERGREEN) {
-               alu.src[0].sel = 512 + (id / 4);
-               alu.src[0].chan = id % 4;
+               /* channel 0 or 2 of each word */
+               alu.src[0].sel = 512 + (id / 2);
+               alu.src[0].chan = (id % 2) * 2;
        } else {
                /* r600 we have them at channel 2 of the second dword */
                alu.src[0].sel = 512 + (id * 2) + 1;
@@ -5697,9 +5698,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
                alu.op = ALU_OP1_MOV;
-               alu.src[0].sel = 512 + (id / 4);
-               alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER;
-               alu.src[0].chan = id % 4;
+               if (ctx->bc->chip_class >= EVERGREEN) {
+                       /* channel 1 or 3 of each word */
+                       alu.src[0].sel = 512 + (id / 2);
+                       alu.src[0].chan = ((id % 2) * 2) + 1;
+               } else {
+                       /* r600 we have them at channel 2 of the second dword */
+                       alu.src[0].sel = 512 + (id * 2) + 1;
+                       alu.src[0].chan = 2;
+               }
+               alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
                tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
                alu.last = 1;
                r = r600_bytecode_add_alu(ctx->bc, &alu);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index c3f21cb..ff94454 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -649,7 +649,6 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
        dst->views.dirty_mask |= new_mask;
        dst->views.compressed_depthtex_mask &= dst->views.enabled_mask;
        dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
-       dst->views.dirty_txq_constants = TRUE;
        dst->views.dirty_buffer_constants = TRUE;
        r600_sampler_views_dirty(rctx, &dst->views);
@@ -984,6 +983,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
  * then in the shader, we AND the 4 components with 0xffffffff or 0,
  * then OR the alpha with the value given here.
  * We use a 6th constant to store the txq buffer size in
+ * we use 7th slot for cube map

The comment neglects to tell what value is placed in this 7th slot?

  */
static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type)
 {
@@ -1022,6 +1022,7 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
                                samplers->buffer_constants[offset + 4] = 0;
samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
                }
        }
@@ -1048,12 +1049,16 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type

Comment for this function is out of date and needs updating.

        samplers->views.dirty_buffer_constants = FALSE;
        bits = util_last_bit(samplers->views.enabled_mask);
-       array_size = bits * sizeof(uint32_t) * 4;
+       array_size = bits * 2 * sizeof(uint32_t) * 4;
samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
        memset(samplers->buffer_constants, 0, array_size);
-       for (i = 0; i < bits; i++)
-               if (samplers->views.enabled_mask & (1 << i))
- samplers->buffer_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+       for (i = 0; i < bits; i++) {
+               if (samplers->views.enabled_mask & (1 << i)) {
+                       uint32_t offset = i * 2;
+ samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+               }
+       }
        cb.buffer = NULL;
        cb.user_buffer = samplers->buffer_constants;
@@ -1063,35 +1068,6 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
        pipe_resource_reference(&cb.buffer, NULL);
 }
-static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type)
-{
-       struct r600_textures_info *samplers = &rctx->samplers[shader_type];
-       int bits;
-       uint32_t array_size;
-       struct pipe_constant_buffer cb;
-       int i;
-
-       if (!samplers->views.dirty_txq_constants)
-               return;
-
-       samplers->views.dirty_txq_constants = FALSE;
-
-       bits = util_last_bit(samplers->views.enabled_mask);
-       array_size = bits * sizeof(uint32_t) * 4;
-       samplers->txq_constants = realloc(samplers->txq_constants, array_size);
-       memset(samplers->txq_constants, 0, array_size);
-       for (i = 0; i < bits; i++)
-               if (samplers->views.enabled_mask & (1 << i))
- samplers->txq_constants[i] = samplers->views.views[i]->base.texture->array_size / 6;
-
-       cb.buffer = NULL;
-       cb.user_buffer = samplers->txq_constants;
-       cb.buffer_offset = 0;
-       cb.buffer_size = array_size;
- rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_TXQ_CONST_BUFFER, &cb);
-       pipe_resource_reference(&cb.buffer, NULL);
-}
-
 /* set sample xy locations as array of fragment shader constants */
void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
 {
@@ -1175,7 +1151,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
        struct pipe_context * ctx = (struct pipe_context*)rctx;
        bool ps_dirty = false, vs_dirty = false, gs_dirty = false;
        bool blend_disable;
-
+       bool need_buf_const;
        if (!rctx->blitter->running) {
                unsigned i;
@@ -1296,29 +1272,35 @@ static bool r600_update_derived_state(struct r600_context *rctx)
        /* on R600 we stuff masks + txq info into one constant buffer */
        /* on evergreen we only need a txq info one */
-       if (rctx->b.chip_class < EVERGREEN) {
- if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
-                       r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
- if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
-                       r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
- if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers)
-                       r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
-       } else {
- if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
-                       eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
- if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
-                       eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
- if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers)
-                       eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
+       if (rctx->ps_shader) {
+ need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers || rctx->ps_shader->current->shader.has_txq_cube_array_z_comp;
+               if (need_buf_const) {
+                       if (rctx->b.chip_class < EVERGREEN)
+                               r600_setup_buffer_constants(rctx, 
PIPE_SHADER_FRAGMENT);
+                       else
+                               eg_setup_buffer_constants(rctx, 
PIPE_SHADER_FRAGMENT);
+               }
        }
+       if (rctx->vs_shader) {
+ need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers || rctx->vs_shader->current->shader.has_txq_cube_array_z_comp;
+               if (need_buf_const) {
+                       if (rctx->b.chip_class < EVERGREEN)
+                               r600_setup_buffer_constants(rctx, 
PIPE_SHADER_VERTEX);
+                       else
+                               eg_setup_buffer_constants(rctx, 
PIPE_SHADER_VERTEX);
+               }
+       }
- if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_cube_array_z_comp)
-               r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT);
- if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp)
-               r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX);
- if (rctx->gs_shader && rctx->gs_shader->current->shader.has_txq_cube_array_z_comp)
-               r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_GEOMETRY);
+       if (rctx->gs_shader) {
+ need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers || rctx->gs_shader->current->shader.has_txq_cube_array_z_comp;
+               if (need_buf_const) {
+                       if (rctx->b.chip_class < EVERGREEN)
+                               r600_setup_buffer_constants(rctx, 
PIPE_SHADER_GEOMETRY);
+                       else
+                               eg_setup_buffer_constants(rctx, 
PIPE_SHADER_GEOMETRY);
+               }
+       }
if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
                if (!r600_adjust_gprs(rctx)) {


Passes piglits on a Turks with no obvious regressions, so with nits above fixed, consider it
Reviewed-by: Glenn Kennard <glenn.kenn...@gmail.com>
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to