Module: Mesa
Branch: main
Commit: 997a0884a52e8ca898cff96e5c613b1adb654900
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=997a0884a52e8ca898cff96e5c613b1adb654900

Author: Rhys Perry <[email protected]>
Date:   Tue Nov 28 17:57:58 2023 +0000

aco: implement 16-bit fsign on GFX8

GFX8 doesn't have v_med3_i16.

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26445>

---

 src/amd/compiler/aco_instruction_selection.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index d7a2159c63b..ddceeb3bbd6 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2878,12 +2878,18 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
    case nir_op_fsign: {
       Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
       if (dst.regClass() == v2b) {
-         assert(ctx->program->gfx_level >= GFX9);
          /* replace negative zero with positive zero */
          src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), 
src);
-         src =
-            bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), 
src, Operand::c16(1u));
-         bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
+         if (ctx->program->gfx_level >= GFX9) {
+            src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), 
Operand::c16(-1), src,
+                           Operand::c16(1u));
+            bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
+         } else {
+            src = convert_int(ctx, bld, src, 16, 32, true);
+            src = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), 
Operand::c32(-1), src,
+                           Operand::c32(1u));
+            bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
+         }
       } else if (dst.regClass() == v1) {
          if (ctx->block->fp_mode.denorm32 == fp_denorm_flush) {
             /* If denormals are flushed, then v_mul_legacy_f32(2.0, src) can 
become omod. */

Reply via email to