This corresponds to instructions used on vc4 for its blending inside of shaders. I've seen these opcodes on other architectures before, but I think it's the first time these are needed in Mesa. --- src/glsl/nir/nir_opcodes.py | 45 +++++++++++++++++++++++++++++++++++++++ src/glsl/nir/nir_opt_algebraic.py | 6 ++++++ 2 files changed, 51 insertions(+)
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index df5b7e2..ff54823 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -460,6 +460,51 @@ binop("fmax", tfloat, "", "fmaxf(src0, src1)") binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") +# Saturated vector add for 4 8bit ints. +binop("isadd_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; +} +""") + +# Saturated vector subtract for 4 8bit ints. +binop("issub_4x8", tint, "", """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + if (src0_chan > src1_chan) + dst |= (src0_chan - src1_chan) << i; +} +""") + +# vector min for 4 8bit ints. +binop("imin_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# vector max for 4 8bit ints. +binop("imax_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# unorm multiply: (a * b) / 255. +binop("imul_unorm_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + dst |= ((src0_chan * src1_chan) / 255) << i; +} +""") + binop("fpow", tfloat, "", "powf(src0, src1)") binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 226e0a8..e1f2638 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -56,12 +56,16 @@ optimizations = [ (('iabs', ('ineg', a)), ('iabs', a)), (('fadd', a, 0.0), a), (('iadd', a, 0), a), + (('isadd_4x8', a, 0), a), + (('isadd_4x8', a, ~0), ~0), (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), (('fadd', ('fneg', a), a), 0.0), (('iadd', ('ineg', a), a), 0), (('fmul', a, 0.0), 0.0), (('imul', a, 0), 0), + (('imul_unorm_4x8', a, 0), 0), + (('imul_unorm_4x8', a, ~0), a), (('fmul', a, 1.0), a), (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), @@ -200,6 +204,8 @@ optimizations = [ # Subtracts (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('issub_4x8', a, 0), a), + (('issub_4x8', a, ~0), 0), (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), -- 2.1.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev