Module: Mesa
Branch: master
Commit: 1286e73c2c0c5aac1bbc5a979230e9fd2c4a0600
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1286e73c2c0c5aac1bbc5a979230e9fd2c4a0600

Author: Alyssa Rosenzweig <[email protected]>
Date:   Tue Jan  5 15:35:10 2021 -0500

nir/lower_idiv: Add 8-bit and 16-bit lowering path

Roundtrip to a larger float and divide there. The extra details for
mod/rem are handled directly in integer space to simplify verification
of rounding details. The one issue is that the mantissa might be
rounded down which will cause issues; adding 1 unconditionally (proposed
by Jonathan Marek) fixes this. The lowerings here were tested
exhaustively on all pairs of 16-bit integers.

v2: Update idiv lowering per Rhys Perry's comment.

v3: Rewrite lowerings.

v4: Remove useless ftrunc, fix 8-bit issue, simplify code.

v5: Remove useless ffloor

Signed-off-by: Alyssa Rosenzweig <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Tested-by: Danylo Piliaiev <[email protected]>
Tested-by: Jason Ekstrand <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8339>

---

 src/compiler/nir/nir_lower_idiv.c | 45 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_lower_idiv.c 
b/src/compiler/nir/nir_lower_idiv.c
index b30500190ea..c2f58df6b8c 100644
--- a/src/compiler/nir/nir_lower_idiv.c
+++ b/src/compiler/nir/nir_lower_idiv.c
@@ -198,6 +198,45 @@ convert_instr_precise(nir_builder *bld, nir_op op,
       return emit_idiv(bld, numer, denom, op);
 }
 
+static nir_ssa_def *
+convert_instr_small(nir_builder *b, nir_op op,
+      nir_ssa_def *numer, nir_ssa_def *denom)
+{
+   unsigned sz = numer->bit_size;
+   nir_alu_type int_type = nir_op_infos[op].output_type | sz;
+   nir_alu_type float_type = nir_type_float | (sz * 2);
+
+   nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type);
+   nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type);
+
+   /* Take 1/q but offset mantissa by 1 to correct for rounding. This is
+    * needed for correct results and has been checked exhaustively for
+    * all pairs of 16-bit integers */
+   nir_ssa_def *rcp = nir_iadd_imm(b, nir_frcp(b, q), 1);
+
+   /* Divide by multiplying by adjusted reciprocal */
+   nir_ssa_def *res = nir_fmul(b, p, rcp);
+
+   /* Convert back to integer space with rounding inferred by type */
+   res = nir_type_convert(b, res, float_type, int_type);
+
+   /* Get remainder given the quotient */
+   if (op == nir_op_umod || op == nir_op_imod || op == nir_op_irem)
+      res = nir_isub(b, numer, nir_imul(b, denom, res));
+
+   /* Adjust for sign, see constant folding definition */
+   if (op == nir_op_imod) {
+      nir_ssa_def *zero = nir_imm_zero(b, 1, sz);
+      nir_ssa_def *diff_sign =
+               nir_ine(b, nir_ige(b, numer, zero), nir_ige(b, denom, zero));
+
+      nir_ssa_def *adjust = nir_iand(b, diff_sign, nir_ine(b, res, zero));
+      res = nir_iadd(b, res, nir_bcsel(b, adjust, denom, zero));
+   }
+
+   return res;
+}
+
 static nir_ssa_def *
 lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
 {
@@ -207,7 +246,9 @@ lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
    nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0);
    nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1);
 
-   if (*path == nir_lower_idiv_precise)
+   if (numer->bit_size < 32)
+      return convert_instr_small(b, alu->op, numer, denom);
+   else if (*path == nir_lower_idiv_precise)
       return convert_instr_precise(b, alu->op, numer, denom);
    else
       return convert_instr(b, alu->op, numer, denom);
@@ -221,7 +262,7 @@ inst_is_idiv(const nir_instr *instr, UNUSED const void 
*_state)
 
    nir_alu_instr *alu = nir_instr_as_alu(instr);
 
-   if (alu->dest.dest.ssa.bit_size != 32)
+   if (alu->dest.dest.ssa.bit_size > 32)
       return false;
 
    switch (alu->op) {

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to