On Mon, Jul 10, 2017 at 4:05 PM, Connor Abbott <cwabbo...@gmail.com> wrote: > On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner <matts...@gmail.com> wrote: >> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott <cwabbo...@gmail.com> wrote: >>> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner <matts...@gmail.com> wrote: >>>> We already had a channel_num system value, which I'm renaming to >>>> subgroup_invocation to match the rest of the new system values. >>>> >>>> Note that while ballotARB(true) will return zeros in the high 32-bits on >>>> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB >>>> variables do not consider whether channels are enabled. See issue (1) of >>>> ARB_shader_ballot. >>>> --- >>>> src/compiler/nir/nir.c | 4 ++++ >>>> src/compiler/nir/nir_intrinsics.h | 8 +++++++- >>>> src/compiler/nir/nir_lower_system_values.c | 28 >>>> ++++++++++++++++++++++++++++ >>>> src/intel/compiler/brw_fs_nir.cpp | 2 +- >>>> src/intel/compiler/brw_nir_intrinsics.c | 4 ++-- >>>> 5 files changed, 42 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c >>>> index 491b908396..9827e129ca 100644 >>>> --- a/src/compiler/nir/nir.c >>>> +++ b/src/compiler/nir/nir.c >>>> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val) >>>> return nir_intrinsic_load_helper_invocation; >>>> case SYSTEM_VALUE_VIEW_INDEX: >>>> return nir_intrinsic_load_view_index; >>>> + case SYSTEM_VALUE_SUBGROUP_SIZE: >>>> + return nir_intrinsic_load_subgroup_size; >>>> + case SYSTEM_VALUE_SUBGROUP_INVOCATION: >>>> + return nir_intrinsic_load_subgroup_invocation; >>>> default: >>>> unreachable("system value does not directly correspond to >>>> intrinsic"); >>>> } >>>> diff --git a/src/compiler/nir/nir_intrinsics.h >>>> b/src/compiler/nir/nir_intrinsics.h >>>> index 6c6ba4cf59..96ecfbc338 100644 >>>> --- a/src/compiler/nir/nir_intrinsics.h >>>> +++ b/src/compiler/nir/nir_intrinsics.h >>>> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) >>>> SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) >>>> SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) >>>> SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) >>>> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) >>>> SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) >>>> SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) >>>> SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) >>>> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) >>>> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) >>>> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) >>>> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) >>>> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) >>>> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) >>>> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) >>>> >>>> /* Blend constant color values. Float values are clamped. */ >>>> SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) >>>> diff --git a/src/compiler/nir/nir_lower_system_values.c >>>> b/src/compiler/nir/nir_lower_system_values.c >>>> index 810100a081..faf0c3c9da 100644 >>>> --- a/src/compiler/nir/nir_lower_system_values.c >>>> +++ b/src/compiler/nir/nir_lower_system_values.c >>>> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) >>>> nir_load_base_instance(b)); >>>> break; >>>> >>>> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: >>>> + case SYSTEM_VALUE_SUBGROUP_GE_MASK: >>>> + case SYSTEM_VALUE_SUBGROUP_GT_MASK: >>>> + case SYSTEM_VALUE_SUBGROUP_LE_MASK: >>>> + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { >>>> + nir_ssa_def *count = nir_load_subgroup_invocation(b); >>>> + >>>> + switch (var->data.location) { >>>> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: >>>> + sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); >>>> + break; >>>> + case SYSTEM_VALUE_SUBGROUP_GE_MASK: >>>> + sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); >>>> + break; >>>> + case SYSTEM_VALUE_SUBGROUP_GT_MASK: >>>> + sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); >>>> + break; >>>> + case SYSTEM_VALUE_SUBGROUP_LE_MASK: >>>> + sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), >>>> count)); >>>> + break; >>>> + case SYSTEM_VALUE_SUBGROUP_LT_MASK: >>>> + sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), >>>> count)); >>>> + break; >>>> + default: >>>> + unreachable("you seriously can't tell this is unreachable?"); >>>> + } >>>> + } >>>> + >>> >>> While this fine to do for both Intel and AMD, Nvidia actually has >>> special system values for these, and AMD has special instructions for >>> bitCount(foo & gl_SubGroupLtMask), so I think we should have actual >> >> So, just add this to the above switch statement? >> >> if (!b->shader->options->lower_subgroup_masks) >> break; >> >> I'll also add the missing cases to nir_intrinsic_from_system_value() >> and nir_system_value_from_intrinsic(). > > Well, it gets a little more complicated... with SPIR-V, you also have > variants of these system values that return 4 32-bit integers. My plan > was to lower them to the normal 64-bit intrinsics, and then have > another pass lower those if necessary. So it might be better if you > don't do any lowering here, and lower these to shifts in your > intrinsic opt pass instead -- that makes it a little easier to share > the lowering to shifts between GLSL and SPIR-V. I can adapt to > whatever you want to do though.
Thanks, that makes sense. I'll squash the attached patch in. Should I expect to see any Reviewed-bys from you?
From eaa686cb9a4e61f9f51d15ce576ae80ac6f1ef38 Mon Sep 17 00:00:00 2001 From: Matt Turner <matts...@gmail.com> Date: Mon, 10 Jul 2017 15:01:06 -0700 Subject: [PATCH] fixup! nir: Add system values from ARB_shader_ballot --- src/compiler/nir/nir_lower_system_values.c | 30 ++++++----------------------- src/compiler/nir/nir_opt_intrinsics.c | 31 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 459032e4ec..ba20d3083f 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -121,30 +121,12 @@ convert_block(nir_block *block, nir_builder *b) case SYSTEM_VALUE_SUBGROUP_GT_MASK: case SYSTEM_VALUE_SUBGROUP_LE_MASK: case SYSTEM_VALUE_SUBGROUP_LT_MASK: { - if (!b->shader->options->lower_subgroup_masks) - break; - - nir_ssa_def *count = nir_load_subgroup_invocation(b); - - switch (var->data.location) { - case SYSTEM_VALUE_SUBGROUP_EQ_MASK: - sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); - break; - case SYSTEM_VALUE_SUBGROUP_GE_MASK: - sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); - break; - case SYSTEM_VALUE_SUBGROUP_GT_MASK: - sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); - break; - case SYSTEM_VALUE_SUBGROUP_LE_MASK: - sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count)); - break; - case SYSTEM_VALUE_SUBGROUP_LT_MASK: - sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count)); - break; - default: - unreachable("you seriously can't tell this is unreachable?"); - } + nir_intrinsic_op op = + nir_intrinsic_from_system_value(var->data.location); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL); + nir_builder_instr_insert(b, &load->instr); + sysval = &load->dest.ssa; break; } diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index d30c1cf6bb..20a0f7d85d 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -80,6 +80,37 @@ opt_intrinsics_impl(nir_function_impl *impl) nir_imm_int(&b, 0)); break; } + case nir_intrinsic_load_subgroup_eq_mask: + case nir_intrinsic_load_subgroup_ge_mask: + case nir_intrinsic_load_subgroup_gt_mask: + case nir_intrinsic_load_subgroup_le_mask: + case nir_intrinsic_load_subgroup_lt_mask: { + if (!b.shader->options->lower_subgroup_masks) + break; + + nir_ssa_def *count = nir_load_subgroup_invocation(&b); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_subgroup_eq_mask: + replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count); + break; + case nir_intrinsic_load_subgroup_ge_mask: + replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count); + break; + case nir_intrinsic_load_subgroup_gt_mask: + replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count); + break; + case nir_intrinsic_load_subgroup_le_mask: + replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count)); + break; + case nir_intrinsic_load_subgroup_lt_mask: + replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~0ull), count)); + break; + default: + unreachable("you seriously can't tell this is unreachable?"); + } + break; + } default: break; } -- 2.13.0
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev