On 02.04.2016 08:10, Bas Nieuwenhuizen wrote:
Declares the shared memory as a global variable so that
LLVM is aware of it and it does not conflict with passes
like AMDGPUPromoteAlloca.

Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
---
  src/gallium/drivers/radeon/radeon_llvm.h           |  3 ++
  .../drivers/radeon/radeon_setup_tgsi_llvm.c        |  4 +++
  src/gallium/drivers/radeonsi/si_shader.c           | 35 ++++++++++++++++++++++
  src/gallium/drivers/radeonsi/si_shader.h           |  3 ++
  4 files changed, 45 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 0a164bb..3e11b36 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -68,6 +68,9 @@ struct radeon_llvm_context {
                        unsigned index,
                        const struct tgsi_full_declaration *decl);

+       void (*declare_memory_region)(struct radeon_llvm_context *,
+                       const struct tgsi_full_declaration *decl);
+
        /** This array contains the input values for the shader.  Typically 
these
          * values will be in the form of a target intrinsic that will inform 
the
          * backend how to load the actual inputs to the shader.
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index fb883cb..5a3b586 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -366,6 +366,10 @@ static void emit_declaration(
                break;
        }

+       case TGSI_FILE_MEMORY:
+               if (ctx->declare_memory_region)
+                       ctx->declare_memory_region(ctx, decl);
+

I think you should drop the if-statement. TGSI_FILE_MEMORY should only occur when it makes sense and the function exists.

Also, add a break;.

        default:
                break;
        }
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2c44b14..2ce37ca 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -121,6 +121,9 @@ struct si_shader_context
        LLVMTypeRef v4i32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
+
+       unsigned memory_region_count;
+       LLVMValueRef *memory_regions;
  };

  static struct si_shader_context *si_shader_context(
@@ -1320,6 +1323,37 @@ static void declare_system_value(
        radeon_bld->system_values[index] = value;
  }

+static void declare_compute_memory(struct radeon_llvm_context *radeon_bld,
+                                   const struct tgsi_full_declaration *decl)
+{
+       struct si_shader_context *ctx =
+               si_shader_context(&radeon_bld->soa.bld_base);
+       struct si_shader_selector *sel = ctx->shader->selector;
+       struct gallivm_state *gallivm = &radeon_bld->gallivm;
+
+       LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
+       LLVMTypeRef i8p = LLVMPointerType(i8, LOCAL_ADDR_SPACE);
+       LLVMValueRef var;
+
+       assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
+       assert(decl->Range.First == decl->Range.Last);
+
+       if (ctx->memory_region_count <= decl->Range.Last) {
+               ctx->memory_regions = realloc(ctx->memory_regions,
+                               (decl->Range.Last + 1) * sizeof(LLVMValueRef));
+
+               ctx->memory_region_count = decl->Range.Last + 1;
+       }
+       var = LLVMAddGlobalInAddressSpace(gallivm->module,
+                                         LLVMArrayType(i8, sel->local_size),
+                                         "compute_lds",
+                                         LOCAL_ADDR_SPACE);
+       LLVMSetAlignment(var, 4);
+
+       ctx->memory_regions[decl->Range.First] =
+                      LLVMBuildBitCast(gallivm->builder, var, i8p, "");
+}

The entire logic here seems a bit off, e.g. realloc doesn't zero-initialize memory etc.

Do we ever get TGSI shaders with more than one memory declaration?

I guess not, otherwise local_size doesn't really make sense. In that case, just forget about the whole realloc business, have a single LLVMValueRef in si_shader_context and add asserts for Range.First/Last.

Cheers,
Nicolai

+
  static LLVMValueRef fetch_constant(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
@@ -5778,6 +5812,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        bld_base->emit_epilogue = si_llvm_return_fs_outputs;
                break;
        case TGSI_PROCESSOR_COMPUTE:
+               ctx.radeon_bld.declare_memory_region = declare_compute_memory;
                break;
        default:
                assert(!"Unsupported shader type");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 5043d43..70ae46f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -222,6 +222,9 @@ struct si_shader_selector {
         */
        unsigned        colors_written_4bit;

+       /* CS parameters */
+       unsigned local_size;
+
        /* masks of "get_unique_index" bits */
        uint64_t        outputs_written;
        uint32_t        patch_outputs_written;

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to