From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 29 ++++++++++++++--------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 22ff18e..e4bfa74 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -437,33 +437,42 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
                decl_size = 4 * ((last - first) + 1);
                if (decl->Declaration.Array) {
                        unsigned id = decl->Array.ArrayID - 1;
                        if (!ctx->arrays) {
                                int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
                                ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
                        }
 
                        ctx->arrays[id].range = decl->Range;
 
-                       /* If the array is more than 16 elements (each element
-                        * is 32-bits), then store it in a vector.  Storing the
-                        * array in a vector will causes the compiler to store
-                        * the array in registers and access it using indirect
-                        * addressing.  16 is number of vector elements that
-                        * LLVM will store in a register.
-                        * FIXME: We shouldn't need to do this.  LLVM should be
-                        * smart enough to promote allocas int registers when
-                        * profitable.
+                       /* If the array has more than 16 elements, store it
+                        * in memory using an alloca that spans the entire
+                        * array.
+                        *
+                        * Otherwise, store each array element individually.
+                        * We will then generate vectors (per-channel, up to
+                        * <4 x float>) for indirect addressing.
+                        *
+                        * Note that 16 is the number of vector elements that
+                        * LLVM will store in a register, so theoretically an
+                        * array with up to 4 * 16 = 64 elements could be
+                        * handled this way, but whether that's a good idea
+                        * depends on VGPR register pressure elsewhere.
+                        *
+                        * FIXME: We shouldn't need to have the non-alloca
+                        * code path for arrays. LLVM should be smart enough to
+                        * promote allocas into registers when profitable.
                         */
                        if (decl_size > 16) {
                                array_alloca = LLVMBuildAlloca(builder,
-                                       LLVMArrayType(bld_base->base.vec_type, 
decl_size),"array");
+                                       LLVMArrayType(bld_base->base.vec_type,
+                                                     decl_size), "array");
                                ctx->arrays[id].alloca = array_alloca;
                        }
                }
 
                if (!ctx->temps_count) {
                        ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
                        ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
                }
                if (!array_alloca) {
                        for (i = 0; i < decl_size; ++i) {
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to