Some hardware, like i965, doesn't support group sizes greater than 32.
In that case, we can reduce the destination size of the ballot
intrinsic, which will simplify our code generation.
---
v2: Just change the intrinsic size, and don't add a new intrinsic (Connor)

 src/compiler/nir/nir.h                |  2 ++
 src/compiler/nir/nir_opt_intrinsics.c | 18 ++++++++++++++++++
 src/intel/compiler/brw_compiler.c     |  1 +
 3 files changed, 21 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 1e2d7d3cf6..5518807b0b 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1842,6 +1842,8 @@ typedef struct nir_shader_compiler_options {
     */
    bool use_interpolated_input_intrinsics;
 
+   unsigned max_subgroup_size;
+
    unsigned max_unroll_iterations;
 } nir_shader_compiler_options;
 
diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
b/src/compiler/nir/nir_opt_intrinsics.c
index 0358680aae..d30c1cf6bb 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -62,6 +62,24 @@ opt_intrinsics_impl(nir_function_impl *impl)
             replacement = nir_imm_int(&b, NIR_TRUE);
             break;
          }
+         case nir_intrinsic_ballot: {
+            assert(b.shader->options->max_subgroup_size != 0);
+            if (b.shader->options->max_subgroup_size > 32 ||
+                intrin->dest.ssa.bit_size <= 32)
+               continue;
+
+            nir_intrinsic_instr *ballot =
+               nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
+            nir_ssa_dest_init(&ballot->instr, &ballot->dest, 1, 32, NULL);
+            ballot->src[0] = intrin->src[0];
+
+            nir_builder_instr_insert(&b, &ballot->instr);
+
+            replacement = nir_pack_64_2x32_split(&b,
+                                                 &ballot->dest.ssa,
+                                                 nir_imm_int(&b, 0));
+            break;
+         }
          default:
             break;
          }
diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index 397c8cccf9..b910fcbc3d 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -57,6 +57,7 @@ static const struct nir_shader_compiler_options 
scalar_nir_options = {
    .lower_unpack_snorm_4x8 = true,
    .lower_unpack_unorm_2x16 = true,
    .lower_unpack_unorm_4x8 = true,
+   .max_subgroup_size = 64, /* FIXME */
    .max_unroll_iterations = 32,
 };
 
-- 
2.13.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to