Module: Mesa
Branch: master
Commit: 1e5c207dba4dbd07919bff2efe57ad361a44ac84
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e5c207dba4dbd07919bff2efe57ad361a44ac84

Author: Rob Clark <robcl...@freedesktop.org>
Date:   Tue Dec 30 20:02:36 2014 -0500

freedreno/ir3: start on indirect gpr reads

Handle TEMP[ADDR[]] src registers by generating a fanin to group array
elements, similarly to how texture fetch instructions work.

NOTE:
For all the scalar instructions generated for a single tgsi vector
operation which uses an array src (or possibly even uses the same array
as multiple srcs), re-use the same fanin node.  Since a vector operation
operates on all components at the same time, it should never see more
than one version of the same array.

Signed-off-by: Rob Clark <robcl...@freedesktop.org>

---

 src/gallium/drivers/freedreno/ir3/ir3.h          |    5 +-
 src/gallium/drivers/freedreno/ir3/ir3_compiler.c |  131 +++++++++++++++++++++-
 src/gallium/drivers/freedreno/ir3/ir3_ra.c       |   18 ++-
 3 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h 
b/src/gallium/drivers/freedreno/ir3/ir3.h
index b1fb08f..a3bbba9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -209,6 +209,9 @@ struct ir3_instruction {
                struct {
                        struct ir3_block *block;
                } inout;
+               struct {
+                       int off;              /* offset relative to addr reg */
+               } deref;
 
                /* XXX keep this as big as all other union members! */
                uint32_t info[3];
@@ -465,7 +468,7 @@ static inline struct ir3_instruction *ssa(struct 
ir3_register *reg)
 
 static inline bool reg_gpr(struct ir3_register *r)
 {
-       if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | 
IR3_REG_ADDR))
+       if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
                return false;
        if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
                return false;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c 
b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 99bad37..8c88bf7 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -88,6 +88,17 @@ struct ir3_compile_context {
 
        struct tgsi_shader_info info;
 
+       /* hmm, would be nice if tgsi_scan_shader figured this out
+        * for us:
+        */
+       struct {
+               unsigned first, last;
+               struct ir3_instruction *fanin;
+       } array[16];
+       uint32_t array_dirty;
+       /* offset into array[], per file, of first array info */
+       uint8_t array_offsets[TGSI_FILE_COUNT];
+
        /* for calculating input/output positions/linkages: */
        unsigned next_inloc;
 
@@ -130,11 +141,21 @@ static void create_mov(struct ir3_compile_context *ctx,
                struct tgsi_dst_register *dst, struct tgsi_src_register *src);
 static type_t get_ftype(struct ir3_compile_context *ctx);
 
+static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, 
unsigned i)
+{
+       /* ArrayID 0 for a given file is the legacy array spanning the entire 
file: */
+       ctx->array[i].first = 0;
+       ctx->array[i].last = ctx->info.file_max[file];
+       ctx->array_offsets[file] = i;
+       i += ctx->info.array_max[file] + 1;
+       return i;
+}
+
 static unsigned
 compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
                const struct tgsi_token *tokens)
 {
-       unsigned ret;
+       unsigned ret, i;
        struct tgsi_shader_info *info = &ctx->info;
        struct tgsi_lowering_config lconfig = {
                        .color_two_side = so->key.color_two_side,
@@ -190,6 +211,7 @@ compile_init(struct ir3_compile_context *ctx, struct 
ir3_shader_variant *so,
        }
        ctx->ir = so->ir;
        ctx->so = so;
+       ctx->array_dirty = 0;
        ctx->next_inloc = 8;
        ctx->num_internal_temps = 0;
        ctx->branch_count = 0;
@@ -204,10 +226,12 @@ compile_init(struct ir3_compile_context *ctx, struct 
ir3_shader_variant *so,
        ctx->using_tmp_dst = false;
 
        memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
+       memset(ctx->array, 0, sizeof(ctx->array));
+       memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets));
 
 #define FM(x) (1 << TGSI_FILE_##x)
        /* optimize can't deal with relative addressing: */
-       if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
+       if (info->indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | 
FM(OUTPUT)))
                return TGSI_PARSE_ERROR;
 
        /* NOTE: if relative addressing is used, we set constlen in
@@ -217,6 +241,12 @@ compile_init(struct ir3_compile_context *ctx, struct 
ir3_shader_variant *so,
        if (info->indirect_files & FM(CONSTANT))
                so->constlen = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1);
 
+       i = 0;
+       i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
+       i += setup_arrays(ctx, TGSI_FILE_TEMPORARY, i);
+       i += setup_arrays(ctx, TGSI_FILE_OUTPUT, i);
+       /* any others? we don't track arrays for const..*/
+
        /* Immediates go after constants: */
        so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
        ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
@@ -275,6 +305,12 @@ instr_finish(struct ir3_compile_context *ctx)
                *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr;
 
        ctx->num_output_updates = 0;
+
+       while (ctx->array_dirty) {
+               unsigned aid = ffs(ctx->array_dirty) - 1;
+               ctx->array[aid].fanin = NULL;
+               ctx->array_dirty &= ~(1 << aid);
+       }
 }
 
 /* For "atomic" groups of instructions, for example the four scalar
@@ -515,6 +551,8 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, 
unsigned n)
                         * NOTE: *don't* use instr_create() here!
                         */
                        instr = create_immed(ctx, 0.0);
+                       /* no need to recreate the immed for every access: */
+                       block->temporaries[n] = instr;
                }
                break;
        }
@@ -522,17 +560,68 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, 
unsigned n)
        return instr;
 }
 
+static int array_id(struct ir3_compile_context *ctx,
+               const struct tgsi_src_register *src)
+{
+       // XXX complete hack to recover tgsi_full_src_register...
+       // nothing that isn't wrapped in a tgsi_full_src_register
+       // should be indirect
+       const struct tgsi_full_src_register *fsrc = (const void *)src;
+       debug_assert(src->File != TGSI_FILE_CONSTANT);
+       return fsrc->Indirect.ArrayID + ctx->array_offsets[src->File];
+}
+
 static void
 ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg,
                const struct tgsi_src_register *src, unsigned chan)
 {
        struct ir3_instruction *instr;
 
-       instr = ssa_instr(ctx, src->File, regid(src->Index, chan));
+       if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
+               /* for relative addressing of gpr's (due to register assignment)
+                * we must generate a fanin instruction to collect all possible
+                * array elements that the instruction could address together:
+                */
+               unsigned i, j, aid = array_id(ctx, src);
+
+               if (ctx->array[aid].fanin) {
+                       instr = ctx->array[aid].fanin;
+               } else {
+                       unsigned first, last;
+
+                       first = ctx->array[aid].first;
+                       last  = ctx->array[aid].last;
+
+                       instr = ir3_instr_create2(ctx->block, -1, OPC_META_FI,
+                                       1 + (4 * (last + 1 - first)));
+                       ir3_reg_create(instr, 0, 0);
+                       for (i = first; i <= last; i++) {
+                               for (j = 0; j < 4; j++) {
+                                       unsigned n = (i * 4) + j;
+                                       ir3_reg_create(instr, 0, 
IR3_REG_SSA)->instr =
+                                                       ssa_instr(ctx, 
src->File, n);
+                               }
+                       }
+                       ctx->array[aid].fanin = instr;
+                       ctx->array_dirty |= (1 << aid);
+               }
+       } else {
+               /* normal case (not relative addressed GPR) */
+               instr = ssa_instr(ctx, src->File, regid(src->Index, chan));
+       }
 
        if (instr) {
                reg->flags |= IR3_REG_SSA;
                reg->instr = instr;
+       } else if (reg->flags & IR3_REG_SSA) {
+               /* special hack for trans_samp() which calls ssa_src() directly
+                * to build up the collect (fanin) for const src.. (so SSA flag
+                * set but no src instr... it basically gets lucky because we
+                * default to 0.0 for "undefined" src instructions, which is
+                * what it wants.  We probably need to give it a better way to
+                * do this, but for now this hack:
+                */
+               reg->instr = create_immed(ctx, 0.0);
        }
 }
 
@@ -689,11 +778,23 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
                instr = ir3_instr_create(ctx->block, -1, OPC_META_DEREF);
                ir3_reg_create(instr, 0, 0);
                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = 
ctx->block->address;
+
+               if (src->File != TGSI_FILE_CONSTANT) {
+                       unsigned aid = array_id(ctx, src);
+                       unsigned off = src->Index - ctx->array[aid].first; /* 
vec4 offset */
+                       instr->deref.off = regid(off, chan);
+               }
        }
 
        reg = ir3_reg_create(instr, regid(num, chan), flags);
 
-       reg->wrmask = wrmask;
+       if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
+               unsigned aid = array_id(ctx, src);
+               reg->size = 4 * (1 + ctx->array[aid].last - 
ctx->array[aid].first);
+       } else {
+               reg->wrmask = wrmask;
+       }
+
        if (wrmask == 0x1) {
                /* normal case */
                ssa_src(ctx, reg, src, chan);
@@ -729,8 +830,11 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
        }
 
        if (src->Indirect) {
+               unsigned size = reg->size;
+
                reg = ir3_reg_create(orig, 0, flags | IR3_REG_SSA);
                reg->instr = instr;
+               reg->size = size;
        }
        return reg;
 }
@@ -2990,11 +3094,26 @@ compile_instructions(struct ir3_compile_context *ctx)
                case TGSI_TOKEN_TYPE_DECLARATION: {
                        struct tgsi_full_declaration *decl =
                                        &ctx->parser.FullToken.FullDeclaration;
-                       if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+                       unsigned file = decl->Declaration.File;
+                       if (file == TGSI_FILE_OUTPUT) {
                                decl_out(ctx, decl);
-                       } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+                       } else if (file == TGSI_FILE_INPUT) {
                                decl_in(ctx, decl);
                        }
+
+                       if ((file != TGSI_FILE_CONSTANT) && 
decl->Declaration.Array) {
+                               int aid = decl->Array.ArrayID + 
ctx->array_offsets[file];
+
+                               compile_assert(ctx, aid < 
ARRAY_SIZE(ctx->array));
+
+                               /* legacy ArrayID==0 stuff probably isn't going 
to work
+                                * well (and is at least untested).. let's just 
scream:
+                                */
+                               compile_assert(ctx, aid != 0);
+
+                               ctx->array[aid].first = decl->Range.First;
+                               ctx->array[aid].last  = decl->Range.Last;
+                       }
                        break;
                }
                case TGSI_TOKEN_TYPE_IMMEDIATE: {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c 
b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 0854046..eaeba0a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -199,6 +199,14 @@ static void compute_liveregs(struct ir3_ra_ctx *ctx,
                if (r)
                        regmask_set_if_not(liveregs, r, &written);
        }
+
+       /* if instruction is output, we need a reg that isn't written
+        * before the end.. equiv to the instr_used_by() check above
+        * in the loop body
+        * TODO maybe should follow fanin/fanout?
+        */
+       if (instr_is_output(instr))
+               regmask_or(liveregs, liveregs, &written);
 }
 
 static int find_available(regmask_t *liveregs, int size, bool half)
@@ -364,6 +372,14 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx,
                case OPC_META_FI:
                        instr_assign(ctx, instr, name - (r - 1));
                        return;
+               case OPC_META_DEREF:
+                       /* first arg of meta:deref is the addr reg (do not
+                        * propagate), 2nd is actual src (fanin) which does
+                        * get propagated)
+                        */
+                       if (r == 2)
+                               instr_assign(ctx, instr, name + 
instr->deref.off);
+                       break;
                default:
                        break;
                }
@@ -467,7 +483,7 @@ static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
                /* already partially assigned, just finish the job */
        } else if (is_addr(instr)) {
                debug_assert(!instr->cp.right);
-               name = instr->regs[2]->num;
+               name = instr->regs[2]->num + instr->deref.off;
        } else if (reg_gpr(dst)) {
                int size;
                /* number of consecutive registers to assign: */

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to