Module: Mesa Branch: main Commit: a2b054c8f0a712c1377580c038d1c68995eca028 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2b054c8f0a712c1377580c038d1c68995eca028
Author: Emma Anholt <[email protected]> Date: Tue Feb 28 12:13:39 2023 -0800 gallivm: Use first_active_invocation for ubo/kernel memory loads. If we're just loading memory, we can take the scalar offset_is_uniform paths even the first active invocation is nonzero, saving a bunch of looping and bounds checking for per-element loads. And, if we don't have an active invocation, doing the load for element 0 (which is bounds-checked to return 0 if element 0 had a bad value in it) before throwing away the result is still better than doing bounds-checked loads for each element before throwing away the result. dEQP-VK.ubo.random.16bit.scalar.92 goes from 16.5 to 14.0 seconds. Reviewed-by: Dave Airlie <[email protected]> Reviewed-by: Roland Scheidegger <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21142> --- src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 7daabd0c858..31487af8f09 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -844,12 +844,8 @@ static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base, LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0); kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, ""); - if (!invocation_0_must_be_active(bld_base)) { - mesa_logw_once("Treating load_kernel_arg in control flow as uniform, results may be incorrect."); - } - if (offset_is_uniform) { - offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); + offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), ""); for (unsigned c = 0; c < nc; c++) { LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), ""); @@ -857,6 +853,8 @@ static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base, LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset); result[c] = lp_build_broadcast_scalar(bld_broad, scalar); } + } else { + unreachable("load_kernel_arg must have a uniform offset."); } } @@ -1169,8 +1167,8 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base, LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0); consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, ""); - if (offset_is_uniform && invocation_0_must_be_active(bld_base)) { - offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); + if (offset_is_uniform) { + offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), ""); struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size); switch (bit_size) { case 8:
