Module: Mesa
Branch: master
Commit: 3109c5257b71391b841ffe8f9e66c017f7ee4dea
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3109c5257b71391b841ffe8f9e66c017f7ee4dea

Author: Daniel Schürmann <[email protected]>
Date:   Tue May 15 11:27:25 2018 +0200

ac: add support for 16bit ssbo stores

Reviewed-by: Bas Nieuwenhuizen <[email protected]>

---

 src/amd/common/ac_nir_to_llvm.c | 144 +++++++++++++++++++++++-----------------
 1 file changed, 84 insertions(+), 60 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c55f3c5068..e3b5ae49fd 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1420,31 +1420,24 @@ static uint32_t widen_mask(uint32_t mask, unsigned 
multiplier)
 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, 
LLVMValueRef src,
                                          unsigned start, unsigned count)
 {
-       LLVMTypeRef type = LLVMTypeOf(src);
+       LLVMValueRef mask[] = {
+       LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
+       LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };
 
-       if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
+       unsigned src_elements = ac_get_llvm_num_components(src);
+
+       if (count == src_elements) {
                assert(start == 0);
-               assert(count == 1);
                return src;
+       } else if (count == 1) {
+               assert(start < src_elements);
+               return LLVMBuildExtractElement(ctx->builder, src, mask[start],  
"");
+       } else {
+               assert(start + count <= src_elements);
+               assert(count <= 4);
+               LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
+               return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, 
"");
        }
-
-       unsigned src_elements = LLVMGetVectorSize(type);
-       assert(start < src_elements);
-       assert(start + count <= src_elements);
-
-       if (start == 0 && count == src_elements)
-               return src;
-
-       if (count == 1)
-               return LLVMBuildExtractElement(ctx->builder, src, 
LLVMConstInt(ctx->i32, start, false), "");
-
-       assert(count <= 8);
-       LLVMValueRef indices[8];
-       for (unsigned i = 0; i < count; ++i)
-               indices[i] = LLVMConstInt(ctx->i32, start + i, false);
-
-       LLVMValueRef swizzle = LLVMConstVector(indices, count);
-       return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
 }
 
 static void visit_store_ssbo(struct ac_nir_context *ctx,
@@ -1452,33 +1445,19 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 {
        const char *store_name;
        LLVMValueRef src_data = get_src(ctx, instr->src[0]);
-       LLVMTypeRef data_type = ctx->ac.f32;
-       int elem_size_mult = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 
32;
-       int components_32bit = elem_size_mult * instr->num_components;
+       int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) 
/ 8;
        unsigned writemask = nir_intrinsic_write_mask(instr);
-       LLVMValueRef base_data, base_offset;
-       LLVMValueRef params[6];
 
-       params[1] = ctx->abi->load_ssbo(ctx->abi,
+       LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
                                        get_src(ctx, instr->src[1]), true);
-       params[2] = ctx->ac.i32_0; /* vindex */
-       params[4] = ctx->ac.i1false;  /* glc */
-       params[5] = ctx->ac.i1false;  /* slc */
-
-       if (components_32bit > 1)
-               data_type = LLVMVectorType(ctx->ac.f32, components_32bit);
-
-       writemask = widen_mask(writemask, elem_size_mult);
-
-       base_data = ac_to_float(&ctx->ac, src_data);
+       LLVMValueRef base_data = ac_to_float(&ctx->ac, src_data);
        base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
-       base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
-                                    data_type, "");
-       base_offset = get_src(ctx, instr->src[2]);      /* voffset */
+       LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
+
        while (writemask) {
                int start, count;
-               LLVMValueRef data;
-               LLVMValueRef offset;
+               LLVMValueRef data, offset;
+               LLVMTypeRef data_type;
 
                u_bit_scan_consecutive_range(&writemask, &start, &count);
 
@@ -1488,31 +1467,76 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
                        writemask |= 1 << (start + 2);
                        count = 2;
                }
+               int num_bytes = count * elem_size_bytes; /* count in bytes */
 
-               if (count > 4) {
-                       writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
-                       count = 4;
+               /* we can only store 4 DWords at the same time.
+                * can only happen for 64 Bit vectors. */
+               if (num_bytes > 16) {
+                       writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
+                       count = 2;
+                       num_bytes = 16;
                }
 
-               if (count == 4) {
-                       store_name = "llvm.amdgcn.buffer.store.v4f32";
-               } else if (count == 2) {
-                       store_name = "llvm.amdgcn.buffer.store.v2f32";
-
-               } else {
-                       assert(count == 1);
-                       store_name = "llvm.amdgcn.buffer.store.f32";
+               /* check alignment of 16 Bit stores */
+               if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
+                       writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+                       count = 1;
+                       num_bytes = 2;
                }
                data = extract_vector_range(&ctx->ac, base_data, start, count);
 
-               offset = base_offset;
-               if (start != 0) {
-                       offset = LLVMBuildAdd(ctx->ac.builder, offset, 
LLVMConstInt(ctx->ac.i32, start * 4, false), "");
+               if (start == 0) {
+                       offset = base_offset;
+               } else {
+                       offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
+                                             LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
+               }
+               if (num_bytes == 2) {
+                       store_name = "llvm.amdgcn.tbuffer.store.i32";
+                       data_type = ctx->ac.i32;
+                       LLVMValueRef tbuffer_params[] = {
+                               data,
+                               rsrc,
+                               ctx->ac.i32_0, /* vindex */
+                               offset,        /* voffset */
+                               ctx->ac.i32_0,
+                               ctx->ac.i32_0,
+                               LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 
16bit)
+                               LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= 
uint)
+                               ctx->ac.i1false,
+                               ctx->ac.i1false,
+                       };
+                       ac_build_intrinsic(&ctx->ac, store_name,
+                                          ctx->ac.voidt, tbuffer_params, 10, 
0);
+               } else {
+                       switch (num_bytes) {
+                       case 16: /* v4f32 */
+                               store_name = "llvm.amdgcn.buffer.store.v4f32";
+                               data_type = ctx->ac.v4f32;
+                               break;
+                       case 8: /* v2f32 */
+                               store_name = "llvm.amdgcn.buffer.store.v2f32";
+                               data_type = ctx->ac.v2f32;
+                               break;
+                       case 4: /* f32 */
+                               store_name = "llvm.amdgcn.buffer.store.f32";
+                               data_type = ctx->ac.f32;
+                               break;
+                       default:
+                               unreachable("Malformed vector store.");
+                       }
+                       data = LLVMBuildBitCast(ctx->ac.builder, data, 
data_type, "");
+                       LLVMValueRef params[] = {
+                               data,
+                               rsrc,
+                               ctx->ac.i32_0, /* vindex */
+                               offset,
+                               ctx->ac.i1false,  /* glc */
+                               ctx->ac.i1false,  /* slc */
+                       };
+                       ac_build_intrinsic(&ctx->ac, store_name,
+                                          ctx->ac.voidt, params, 6, 0);
                }
-               params[0] = data;
-               params[3] = offset;
-               ac_build_intrinsic(&ctx->ac, store_name,
-                                  ctx->ac.voidt, params, 6, 0);
        }
 }
 

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to