On Fri, Jun 9, 2017 at 5:04 PM, Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> wrote: > On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott > <conn...@valvesoftware.com> wrote: >> From: Connor Abbott <cwabbo...@gmail.com> >> >> Signed-off-by: Connor Abbott <cwabbo...@gmail.com> >> --- >> src/amd/common/ac_nir_to_llvm.c | 75 >> +++++++++++++++++++++++++++++++++++++++++ >> src/amd/vulkan/radv_device.c | 8 +++++ >> src/amd/vulkan/radv_pipeline.c | 2 ++ >> 3 files changed, 85 insertions(+) >> >> diff --git a/src/amd/common/ac_nir_to_llvm.c >> b/src/amd/common/ac_nir_to_llvm.c >> index 5bbd1c5..111e575 100644 >> --- a/src/amd/common/ac_nir_to_llvm.c >> +++ b/src/amd/common/ac_nir_to_llvm.c >> @@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct >> nir_to_llvm_context *ctx, >> case nir_intrinsic_load_patch_vertices_in: >> result = LLVMConstInt(ctx->i32, >> ctx->options->key.tcs.input_vertices, false); >> break; >> + case nir_intrinsic_ballot: >> + result = ac_build_ballot(&ctx->ac, get_src(ctx, >> instr->src[0])); >> + break; >> + case nir_intrinsic_read_first_invocation: { >> + LLVMValueRef src0 = get_src(ctx, instr->src[0]); >> + ac_build_optimization_barrier(&ctx->ac, &src0); >> + LLVMValueRef srcs[1] = { src0 }; >> + result = ac_build_intrinsic(&ctx->ac, >> "llvm.amdgcn.readfirstlane", >> + ctx->i32, srcs, 1, >> + AC_FUNC_ATTR_NOUNWIND | >> + AC_FUNC_ATTR_READNONE | >> + AC_FUNC_ATTR_CONVERGENT); >> + break; >> + } >> + case nir_intrinsic_read_invocation: { >> + LLVMValueRef src0 = get_src(ctx, instr->src[0]); >> + ac_build_optimization_barrier(&ctx->ac, &src0); >> + LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) }; >> + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", >> + ctx->i32, srcs, 2, >> + AC_FUNC_ATTR_NOUNWIND | >> + AC_FUNC_ATTR_READNONE | >> + AC_FUNC_ATTR_CONVERGENT); >> + break; >> + } >> + case nir_intrinsic_load_subgroup_invocation: >> + result = ac_get_thread_id(&ctx->ac); >> + break; >> + case nir_intrinsic_load_subgroup_size: >> + result = LLVMConstInt(ctx->i32, 64, 0); >> + break; >> + case nir_intrinsic_all_invocations: >> + result = LLVMBuildSExt(ctx->builder, >> + ac_build_vote_all(&ctx->ac, >> + get_src(ctx, >> instr->src[0])), >> + ctx->i32, ""); > > How well does LLVM optimize this? I've always found the boolean as > int32 with -1 and 0 an awkward mapping to LLVM, and am wondering > whether LLVM is able to optimize the SExt away or if a select might be > better.
From looking at the shader dump of my test, LLVM seems to be able to optimize it away. In fact, it's what radeonsi uses for all their comparisons (since TGSI also uses -1 for true), so I'd expect it to be at least as good as a select; it might be better. It might be interesting to make booleans have a bit-size of 1, like in LLVM... it would probably require a lot of churn, though. > > >> + break; >> + case nir_intrinsic_any_invocations: >> + result = LLVMBuildSExt(ctx->builder, >> + ac_build_vote_any(&ctx->ac, >> + get_src(ctx, >> instr->src[0])), >> + ctx->i32, ""); >> + break; >> + case nir_intrinsic_all_invocations_equal: >> + result = LLVMBuildSExt(ctx->builder, >> + ac_build_vote_eq(&ctx->ac, >> + get_src(ctx, >> instr->src[0])), >> + ctx->i32, ""); >> + break; >> + case nir_intrinsic_load_subgroup_eq_mask: { >> + LLVMValueRef id = ac_get_thread_id(&ctx->ac); >> + id = LLVMBuildZExt(ctx->builder, id, ctx->i64, ""); >> + result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, >> 1, 0), id, ""); >> + break; >> + } >> + case nir_intrinsic_load_subgroup_ge_mask: >> + case nir_intrinsic_load_subgroup_gt_mask: >> + case nir_intrinsic_load_subgroup_le_mask: >> + case nir_intrinsic_load_subgroup_lt_mask: { >> + LLVMValueRef id = ac_get_thread_id(&ctx->ac); >> + if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask >> || >> + instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) >> { >> + /* All bits set except LSB */ >> + result = LLVMConstInt(ctx->i64, -2, 0); >> + } else { >> + /* All bits set */ >> + result = LLVMConstInt(ctx->i64, -1, 0); >> + } >> + id = LLVMBuildZExt(ctx->builder, id, ctx->i64, ""); >> + result = LLVMBuildShl(ctx->builder, result, id, ""); >> + if (instr->intrinsic == nir_intrinsic_load_subgroup_le_mask >> || >> + instr->intrinsic == nir_intrinsic_load_subgroup_lt_mask) >> + result = LLVMBuildNot(ctx->builder, result, ""); >> + break; >> + } >> default: >> fprintf(stderr, "Unknown intrinsic: "); >> nir_print_instr(&instr->instr, stderr); >> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c >> index e9bf44c..ea50acc 100644 >> --- a/src/amd/vulkan/radv_device.c >> +++ b/src/amd/vulkan/radv_device.c >> @@ -127,6 +127,14 @@ static const VkExtensionProperties >> common_device_extensions[] = { >> .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME, >> .specVersion = 1, >> }, >> + { >> + .extensionName = >> VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, >> + .specVersion = 1, >> + }, >> + { >> + .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, >> + .specVersion = 1, >> + }, >> }; >> >> static VkResult >> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c >> index 39cbd5a..242890a 100644 >> --- a/src/amd/vulkan/radv_pipeline.c >> +++ b/src/amd/vulkan/radv_pipeline.c >> @@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device, >> .image_write_without_format = true, >> .tessellation = true, >> .int64 = true, >> + .shader_ballot = true, >> + .shader_group_vote = true, >> }; >> entry_point = spirv_to_nir(spirv, module->size / 4, >> spec_entries, num_spec_entries, >> -- >> 2.9.4 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev