Cache values that are loaded more then once, or where various components
are loaded at separate places. This saves repeated calculation of the offsets.

Signed-off-by: Gert Wollny <gw.foss...@gmail.com>
---
 src/gallium/drivers/r600/r600_shader.c | 211 +++++++++++++++++++++++++++++----
 1 file changed, 190 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 9fa83189bc..5713eda6b0 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -317,6 +317,20 @@ struct eg_interp {
        unsigned                                ij_index;
 };
 
+struct  r600_tess_input_cache_entry {
+       struct tgsi_full_src_register key;
+       unsigned reg: 16;
+       unsigned initialized:1;
+       unsigned read_access:1;
+       unsigned was_written:1;
+       unsigned mask:4;
+};
+
+struct r600_tess_input_cache {
+       struct r600_tess_input_cache_entry data[32];
+       int fill;
+};
+
 struct r600_shader_ctx {
        struct tgsi_shader_info                 info;
        struct tgsi_parse_context               parse;
@@ -353,6 +367,7 @@ struct r600_shader_ctx {
        unsigned                                enabled_stream_buffers_mask;
        unsigned                                tess_input_info; /* temp with 
tess input offsets */
        unsigned                                tess_output_info; /* temp with 
tess input offsets */
+       struct r600_tess_input_cache            tess_input_cache;
 };
 
 struct r600_shader_tgsi_instruction {
@@ -1810,7 +1825,8 @@ static int fetch_mask( struct tgsi_src_register *reg)
        return mask; 
 }
 
-static int fetch_tes_input(struct r600_shader_ctx *ctx, struct 
tgsi_full_src_register *src, unsigned int dst_reg)
+static int fetch_tes_input(struct r600_shader_ctx *ctx, struct 
tgsi_full_src_register *src,
+                          unsigned int dst_reg, unsigned mask)
 {
        int r;
        unsigned temp_reg = r600_get_temp(ctx);
@@ -1826,13 +1842,14 @@ static int fetch_tes_input(struct r600_shader_ctx *ctx, 
struct tgsi_full_src_reg
        if (r)
                return r;
 
-       r = do_lds_fetch_values(ctx, temp_reg, dst_reg, 
fetch_mask(&src->Register));
+       r = do_lds_fetch_values(ctx, temp_reg, dst_reg, mask);
        if (r)
                return r;
        return 0;
 }
 
-static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct 
tgsi_full_src_register *src, unsigned int dst_reg)
+static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct 
tgsi_full_src_register *src,
+                          unsigned int dst_reg, unsigned mask)
 {
        int r;
        unsigned temp_reg = r600_get_temp(ctx);
@@ -1852,13 +1869,14 @@ static int fetch_tcs_input(struct r600_shader_ctx *ctx, 
struct tgsi_full_src_reg
        if (r)
                return r;
 
-       r = do_lds_fetch_values(ctx, temp_reg, dst_reg, 
fetch_mask(&src->Register));
+       r = do_lds_fetch_values(ctx, temp_reg, dst_reg, mask);
        if (r)
                return r;
        return 0;
 }
 
-static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct 
tgsi_full_src_register *src, unsigned int dst_reg)
+static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct 
tgsi_full_src_register *src,
+                           unsigned int dst_reg, unsigned mask)
 {
        int r;
        unsigned temp_reg = r600_get_temp(ctx);
@@ -1874,12 +1892,153 @@ static int fetch_tcs_output(struct r600_shader_ctx 
*ctx, struct tgsi_full_src_re
        if (r)
                return r;
 
-       r = do_lds_fetch_values(ctx, temp_reg, dst_reg, 
fetch_mask(&src->Register));
+       r = do_lds_fetch_values(ctx, temp_reg, dst_reg, mask);
        if (r)
                return r;
        return 0;
 }
 
+static int tgsi_full_src_register_equal_for_cache(struct 
tgsi_full_src_register *lhs,
+                                       struct tgsi_full_src_register *rhs)
+{
+       if (lhs->Register.Index != rhs->Register.Index)
+               return 0;
+
+       if (lhs->Register.File != rhs->Register.File)
+
+       if (lhs->Register.Indirect || rhs->Register.Indirect)
+               return 0;
+
+       if (lhs->Register.Dimension)  {
+               if (!rhs->Register.Dimension ||
+                        (rhs->Dimension.Index != lhs->Dimension.Index) ||
+                        (rhs->Dimension.Dimension != lhs->Dimension.Dimension))
+                       return 0;
+
+               if (lhs->Dimension.Indirect || rhs->Dimension.Indirect)
+                       return 0;
+       } else if (rhs->Register.Dimension)
+               return 0;
+
+       return 1;
+}
+
+static void tess_input_cache_store(struct r600_tess_input_cache *cache,
+                                  struct tgsi_full_src_register *src)
+{
+       if (cache->fill < 32) {
+               memcpy(&cache->data[cache->fill].key, src, sizeof(struct 
tgsi_full_src_register));
+               cache->data[cache->fill].mask = fetch_mask(&src->Register);
+               cache->data[cache->fill].reg = 0;
+               cache->data[cache->fill].was_written = src->Register.File == 
TGSI_FILE_OUTPUT;
+               ++cache->fill;
+       }
+}
+
+static void tess_input_cache_check(struct r600_tess_input_cache *cache,
+                                  struct tgsi_full_src_register *src)
+{
+       int i;
+       for (i = 0; i < cache->fill; ++i) {
+               /* indirect loads can come from anywhere, no use caching them */
+               if (src->Register.Indirect || src->Dimension.Indirect)
+                       return;
+
+               if (tgsi_full_src_register_equal_for_cache(src, 
&cache->data[i].key)) {
+                       cache->data[i].mask |= fetch_mask(&src->Register);
+                       cache->data[i].read_access = src->Register.File == 
TGSI_FILE_INPUT;
+                       if (!cache->data[i].was_written) {
+                               ++cache->data[i].reg;
+                               cache->data[i].was_written = src->Register.File 
== TGSI_FILE_OUTPUT;
+                       } else {
+                               /* FIXME: If the entry was written before 
reading it, we can not cache it,
+                                * instead we could store theaddress to speed 
up access, or keep the written
+                                * value. The latter should check whether there 
is syncronisation within the
+                                * work group to ensure that the stored value 
is not overwritten by another
+                                * thread.
+                                */
+                               cache->data[i].reg = 0;
+                       }
+                       return;
+               }
+       }
+       tess_input_cache_store(cache, src);
+}
+
+static int tess_input_cache_count_multiused(struct r600_tess_input_cache 
*cache,
+                                           unsigned reg_base)
+{
+       int i;
+       int cnt = 0;
+       for (i = 0; i < cache->fill; ++i) {
+               if (cache->data[i].reg > 0 && cache->data[i].read_access) {
+                       if (i != cnt)
+                               memcpy(&cache->data[cnt], &cache->data[i],
+                                      sizeof(struct 
r600_tess_input_cache_entry));
+                       cache->data[cnt].reg = reg_base + cnt;
+                       cache->data[cnt].initialized = 0;
+                       ++cnt;
+               }
+       }
+       cache->fill = cnt;
+       return cnt;
+}
+
+static struct  r600_tess_input_cache_entry *
+tess_input_cache_load(struct r600_tess_input_cache *cache,
+                     struct tgsi_full_src_register *src)
+{
+       struct  r600_tess_input_cache_entry *retval = NULL;
+       int i;
+       for (i = 0; i < cache->fill; ++i) {
+               struct r600_tess_input_cache_entry *ce = &cache->data[i];
+               if (tgsi_full_src_register_equal_for_cache(src, &ce->key)) {
+                       retval = ce;
+                       break;
+               }
+       }
+       return retval;
+}
+
+typedef int (*fetch_tessdata_from_lds)(struct r600_shader_ctx *ctx,
+                                      struct tgsi_full_src_register *src,
+                                      unsigned int dst_reg, unsigned mask);
+
+static int r600_load_tess_data(struct r600_shader_ctx *ctx,
+                              struct tgsi_full_src_register *src,
+                              fetch_tessdata_from_lds fetch_call)
+{
+       int treg;
+       struct r600_tess_input_cache_entry *ce;
+       ce = tess_input_cache_load(&ctx->tess_input_cache, src);
+       if (!ce) {
+               treg = r600_get_temp(ctx);
+               fetch_call(ctx, src, treg, fetch_mask(&src->Register));
+       } else {
+               if (!ce->initialized) {
+                       fetch_call(ctx, src, ce->reg, ce->mask);
+                       ce->initialized = 1;
+               }
+               treg = ce->reg;
+       }
+       return treg;
+}
+
+
+static void count_tess_inputs(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       unsigned i;
+
+       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+               struct tgsi_full_src_register *src = &inst->Src[i];
+               if (((src->Register.File == TGSI_FILE_INPUT) && (ctx->type == 
PIPE_SHADER_TESS_EVAL)) ||
+                   (ctx->type == PIPE_SHADER_TESS_CTRL &&
+                    (src->Register.File == TGSI_FILE_INPUT || 
src->Register.File == TGSI_FILE_OUTPUT)))
+                       tess_input_cache_check(&ctx->tess_input_cache, src);
+       }
+}
+
 static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
@@ -1889,21 +2048,15 @@ static int tgsi_split_lds_inputs(struct r600_shader_ctx 
*ctx)
                struct tgsi_full_src_register *src = &inst->Src[i];
 
                if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == 
TGSI_FILE_INPUT) {
-                       int treg = r600_get_temp(ctx);
-                       fetch_tes_input(ctx, src, treg);
-                       ctx->src[i].sel = treg;
+                       ctx->src[i].sel = r600_load_tess_data(ctx, src, 
fetch_tes_input);
                        ctx->src[i].rel = 0;
                }
                if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == 
TGSI_FILE_INPUT) {
-                       int treg = r600_get_temp(ctx);
-                       fetch_tcs_input(ctx, src, treg);
-                       ctx->src[i].sel = treg;
+                       ctx->src[i].sel = r600_load_tess_data(ctx, src, 
fetch_tcs_input);
                        ctx->src[i].rel = 0;
                }
                if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == 
TGSI_FILE_OUTPUT) {
-                       int treg = r600_get_temp(ctx);
-                       fetch_tcs_output(ctx, src, treg);
-                       ctx->src[i].sel = treg;
+                       ctx->src[i].sel = r600_load_tess_data(ctx, src, 
fetch_tcs_output);
                        ctx->src[i].rel = 0;
                }
        }
@@ -2982,6 +3135,8 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
        bool lds_inputs = false;
        bool pos_emitted = false;
 
+       ctx.tess_input_cache.fill = 0;
+
        ctx.bc = &shader->bc;
        ctx.shader = shader;
        ctx.native_integers = true;
@@ -3162,21 +3317,35 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
                ctx.temp_reg = ctx.bc->ar_reg + 3;
        }
 
+       if (lds_inputs) {
+               tgsi_parse_init(&ctx.parse, tokens);
+               while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
+                       tgsi_parse_token(&ctx.parse);
+
+                       if (ctx.parse.FullToken.Token.Type != 
TGSI_TOKEN_TYPE_INSTRUCTION)
+                               continue;
+
+                       count_tess_inputs(&ctx);
+               }
+               ctx.temp_reg += 
tess_input_cache_count_multiused(&ctx.tess_input_cache, ctx.temp_reg);
+               tgsi_parse_init(&ctx.parse, tokens);
+       }
+
        shader->max_arrays = 0;
        shader->num_arrays = 0;
        if (indirect_gprs) {
 
                if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
                        r600_add_gpr_array(shader, 
ctx.file_offset[TGSI_FILE_INPUT],
-                                          ctx.file_offset[TGSI_FILE_OUTPUT] -
-                                          ctx.file_offset[TGSI_FILE_INPUT],
-                                          0x0F);
+                                          ctx.file_offset[TGSI_FILE_OUTPUT] -
+                                          ctx.file_offset[TGSI_FILE_INPUT],
+                                          0x0F);
                }
                if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
                        r600_add_gpr_array(shader, 
ctx.file_offset[TGSI_FILE_OUTPUT],
-                                          ctx.file_offset[TGSI_FILE_TEMPORARY] 
-
-                                          ctx.file_offset[TGSI_FILE_OUTPUT],
-                                          0x0F);
+                                          ctx.file_offset[TGSI_FILE_TEMPORARY] 
-
+                                          ctx.file_offset[TGSI_FILE_OUTPUT],
+                                          0x0F);
                }
        }
 
-- 
2.13.6

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to