Module: Mesa
Branch: main
Commit: a2b054c8f0a712c1377580c038d1c68995eca028
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2b054c8f0a712c1377580c038d1c68995eca028

Author: Emma Anholt <[email protected]>
Date:   Tue Feb 28 12:13:39 2023 -0800

gallivm: Use first_active_invocation for ubo/kernel memory loads.

If we're just loading memory, we can take the scalar offset_is_uniform
paths even the first active invocation is nonzero, saving a bunch of
looping and bounds checking for per-element loads.  And, if we don't have
an active invocation, doing the load for element 0 (which is
bounds-checked to return 0 if element 0 had a bad value in it) before
throwing away the result is still better than doing bounds-checked loads
for each element before throwing away the result.

dEQP-VK.ubo.random.16bit.scalar.92 goes from 16.5 to 14.0 seconds.

Reviewed-by: Dave Airlie <[email protected]>
Reviewed-by: Roland Scheidegger <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21142>

---

 src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
index 7daabd0c858..31487af8f09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
@@ -844,12 +844,8 @@ static void emit_load_kernel_arg(struct 
lp_build_nir_context *bld_base,
    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
    kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
 
-   if (!invocation_0_must_be_active(bld_base)) {
-      mesa_logw_once("Treating load_kernel_arg in control flow as uniform, 
results may be incorrect.");
-   }
-
    if (offset_is_uniform) {
-      offset = LLVMBuildExtractElement(builder, offset, 
lp_build_const_int32(gallivm, 0), "");
+      offset = LLVMBuildExtractElement(builder, offset, 
first_active_invocation(bld_base), "");
 
       for (unsigned c = 0; c < nc; c++) {
          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, 
offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : 
lp_build_const_int32(gallivm, c), "");
@@ -857,6 +853,8 @@ static void emit_load_kernel_arg(struct 
lp_build_nir_context *bld_base,
          LLVMValueRef scalar = lp_build_pointer_get2(builder, 
bld_broad->elem_type, kernel_args_ptr, this_offset);
          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
       }
+   } else {
+      unreachable("load_kernel_arg must have a uniform offset.");
    }
 }
 
@@ -1169,8 +1167,8 @@ static void emit_load_ubo(struct lp_build_nir_context 
*bld_base,
    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
    consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
 
-   if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
-      offset = LLVMBuildExtractElement(builder, offset, 
lp_build_const_int32(gallivm, 0), "");
+   if (offset_is_uniform) {
+      offset = LLVMBuildExtractElement(builder, offset, 
first_active_invocation(bld_base), "");
       struct lp_build_context *load_bld = get_int_bld(bld_base, true, 
bit_size);
       switch (bit_size) {
       case 8:

Reply via email to