Signed-off-by: WANG Xuerui <g...@xen0n.name> Reviewed-by: Richard Henderson <richard.hender...@linaro.org> --- tcg/loongarch64/tcg-target-con-set.h | 1 + tcg/loongarch64/tcg-target.c.inc | 42 ++++++++++++++++++++++++++++ tcg/loongarch64/tcg-target.h | 8 +++--- 3 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index d958183020..2975e03127 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -18,4 +18,5 @@ C_O0_I1(r) C_O1_I1(r, r) C_O1_I2(r, r, rC) C_O1_I2(r, r, rU) +C_O1_I2(r, r, rW) C_O1_I2(r, 0, rZ) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 51f0e3b1f1..1ab690bab6 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -384,6 +384,28 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_addi_w(s, ret, arg, 0); } +static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, + TCGReg a0, TCGReg a1, TCGReg a2, + bool c2, bool is_32bit) +{ + if (c2) { + /* + * Fast path: semantics already satisfied due to constraint and + * insn behavior, single instruction is enough. + */ + tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); + /* all clz/ctz insns belong to DJ-format */ + tcg_out32(s, encode_dj_insn(opc, a0, a1)); + return; + } + + tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); + /* a0 = a1 ? REG_TMP0 : a2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); + tcg_out_opc_masknez(s, a0, a2, a1); + tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); +} + /* * Entry-points */ @@ -544,6 +566,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_revb_d(s, a0, a1); break; + case INDEX_op_clz_i32: + tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); + break; + case INDEX_op_clz_i64: + tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); + break; + + case INDEX_op_ctz_i32: + tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); + break; + case INDEX_op_ctz_i64: + tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); + break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: default: @@ -604,6 +640,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) /* LoongArch reg-imm bitops have their imms ZERO-extended */ return C_O1_I2(r, r, rU); + case INDEX_op_clz_i32: + case INDEX_op_clz_i64: + case INDEX_op_ctz_i32: + case INDEX_op_ctz_i64: + return C_O1_I2(r, r, rW); + case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: /* Must deposit into the same register as input */ diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 02d17d2f6d..ef489cbc86 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -120,8 +120,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_clz_i32 0 -#define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_clz_i32 1 +#define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_brcond2 0 @@ -156,8 +156,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_clz_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_clz_i64 1 +#define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 -- 2.33.0