On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott <conn...@valvesoftware.com> wrote: > From: Connor Abbott <cwabbo...@gmail.com> > > Signed-off-by: Connor Abbott <cwabbo...@gmail.com> > --- > src/amd/common/ac_nir_to_llvm.c | 75 > +++++++++++++++++++++++++++++++++++++++++ > src/amd/vulkan/radv_device.c | 8 +++++ > src/amd/vulkan/radv_pipeline.c | 2 ++ > 3 files changed, 85 insertions(+) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 5bbd1c5..111e575 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct nir_to_llvm_context > *ctx, > case nir_intrinsic_load_patch_vertices_in: > result = LLVMConstInt(ctx->i32, > ctx->options->key.tcs.input_vertices, false); > break; > + case nir_intrinsic_ballot: > + result = ac_build_ballot(&ctx->ac, get_src(ctx, > instr->src[0])); > + break; > + case nir_intrinsic_read_first_invocation: { > + LLVMValueRef src0 = get_src(ctx, instr->src[0]); > + ac_build_optimization_barrier(&ctx->ac, &src0); > + LLVMValueRef srcs[1] = { src0 }; > + result = ac_build_intrinsic(&ctx->ac, > "llvm.amdgcn.readfirstlane", > + ctx->i32, srcs, 1, > + AC_FUNC_ATTR_NOUNWIND | > + AC_FUNC_ATTR_READNONE | > + AC_FUNC_ATTR_CONVERGENT); > + break; > + } > + case nir_intrinsic_read_invocation: { > + LLVMValueRef src0 = get_src(ctx, instr->src[0]); > + ac_build_optimization_barrier(&ctx->ac, &src0); > + LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) }; > + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", > + ctx->i32, srcs, 2, > + AC_FUNC_ATTR_NOUNWIND | > + AC_FUNC_ATTR_READNONE | > + AC_FUNC_ATTR_CONVERGENT); > + break; > + } > + case nir_intrinsic_load_subgroup_invocation: > + result = ac_get_thread_id(&ctx->ac); > + break; > + case nir_intrinsic_load_subgroup_size: > + result = LLVMConstInt(ctx->i32, 64, 0); > + break; > + case nir_intrinsic_all_invocations: > + result = LLVMBuildSExt(ctx->builder, > + ac_build_vote_all(&ctx->ac, > + get_src(ctx, > instr->src[0])), > + ctx->i32, "");
How well does LLVM optimize this? I've always found the boolean as int32 with -1 and 0 an awkward mapping to LLVM, and am wondering whether LLVM is able to optimize the SExt away or if a select might be better. > + break; > + case nir_intrinsic_any_invocations: > + result = LLVMBuildSExt(ctx->builder, > + ac_build_vote_any(&ctx->ac, > + get_src(ctx, > instr->src[0])), > + ctx->i32, ""); > + break; > + case nir_intrinsic_all_invocations_equal: > + result = LLVMBuildSExt(ctx->builder, > + ac_build_vote_eq(&ctx->ac, > + get_src(ctx, > instr->src[0])), > + ctx->i32, ""); > + break; > + case nir_intrinsic_load_subgroup_eq_mask: { > + LLVMValueRef id = ac_get_thread_id(&ctx->ac); > + id = LLVMBuildZExt(ctx->builder, id, ctx->i64, ""); > + result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 1, > 0), id, ""); > + break; > + } > + case nir_intrinsic_load_subgroup_ge_mask: > + case nir_intrinsic_load_subgroup_gt_mask: > + case nir_intrinsic_load_subgroup_le_mask: > + case nir_intrinsic_load_subgroup_lt_mask: { > + LLVMValueRef id = ac_get_thread_id(&ctx->ac); > + if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask || > + instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) { > + /* All bits set except LSB */ > + result = LLVMConstInt(ctx->i64, -2, 0); > + } else { > + /* All bits set */ > + result = LLVMConstInt(ctx->i64, -1, 0); > + } > + id = LLVMBuildZExt(ctx->builder, id, ctx->i64, ""); > + result = LLVMBuildShl(ctx->builder, result, id, ""); > + if (instr->intrinsic == nir_intrinsic_load_subgroup_le_mask || > + instr->intrinsic == nir_intrinsic_load_subgroup_lt_mask) > + result = LLVMBuildNot(ctx->builder, result, ""); > + break; > + } > default: > fprintf(stderr, "Unknown intrinsic: "); > nir_print_instr(&instr->instr, stderr); > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index e9bf44c..ea50acc 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -127,6 +127,14 @@ static const VkExtensionProperties > common_device_extensions[] = { > .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME, > .specVersion = 1, > }, > + { > + .extensionName = VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, > + .specVersion = 1, > + }, > + { > + .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, > + .specVersion = 1, > + }, > }; > > static VkResult > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index 39cbd5a..242890a 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device, > .image_write_without_format = true, > .tessellation = true, > .int64 = true, > + .shader_ballot = true, > + .shader_group_vote = true, > }; > entry_point = spirv_to_nir(spirv, module->size / 4, > spec_entries, num_spec_entries, > -- > 2.9.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev