Re: [PATCH v2 04/10] tcg/loongarch64: Optimize immediate loading

2023-01-22 Thread WANG Xuerui

On 1/18/23 09:11, Richard Henderson wrote:

From: Rui Wang 

diff:
   Imm Before  After
   addi.w  rd, zero, 0 addi.w  rd, zero, 0
   lu52i.d rd, zero, 0
   f800lu12i.w rd, -1  addi.w  rd, zero, -2048
   ori rd, rd, 2048lu32i.d rd, 0
   lu32i.d rd, 0
   ...

Signed-off-by: Rui Wang 
Message-Id: <20221107144713.845550-1-wang...@loongson.cn>
Signed-off-by: Richard Henderson 
---
  tcg/loongarch64/tcg-target.c.inc | 35 +++-
  1 file changed, 12 insertions(+), 23 deletions(-)


Reviewed-by: WANG Xuerui 

Thanks!




[PATCH v2 04/10] tcg/loongarch64: Optimize immediate loading

2023-01-17 Thread Richard Henderson
From: Rui Wang 

diff:
  Imm Before  After
  addi.w  rd, zero, 0 addi.w  rd, zero, 0
  lu52i.d rd, zero, 0
  f800lu12i.w rd, -1  addi.w  rd, zero, -2048
  ori rd, rd, 2048lu32i.d rd, 0
  lu32i.d rd, 0
  ...

Signed-off-by: Rui Wang 
Message-Id: <20221107144713.845550-1-wang...@loongson.cn>
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 35 +++-
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 3174557ce3..428f3abd71 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -274,16 +274,6 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, 
TCGReg ret, TCGReg arg)
 return true;
 }
 
-static bool imm_part_needs_loading(bool high_bits_are_ones,
-   tcg_target_long part)
-{
-if (high_bits_are_ones) {
-return part != -1;
-} else {
-return part != 0;
-}
-}
-
 /* Loads a 32-bit immediate into rd, sign-extended.  */
 static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
 {
@@ -291,16 +281,16 @@ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, 
int32_t val)
 tcg_target_long hi12 = sextreg(val, 12, 20);
 
 /* Single-instruction cases.  */
-if (lo == val) {
-/* val fits in simm12: addi.w rd, zero, val */
-tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
-return;
-}
-if (0x800 <= val && val <= 0xfff) {
+if (hi12 == 0) {
 /* val fits in uimm12: ori rd, zero, val */
 tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
 return;
 }
+if (hi12 == sextreg(lo, 12, 20)) {
+/* val fits in simm12: addi.w rd, zero, val */
+tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
+return;
+}
 
 /* High bits must be set; load with lu12i.w + optional ori.  */
 tcg_out_opc_lu12i_w(s, rd, hi12);
@@ -334,8 +324,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg rd,
 
 intptr_t pc_offset;
 tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
-tcg_target_long hi32, hi52;
-bool rd_high_bits_are_ones;
+tcg_target_long hi12, hi32, hi52;
 
 /* Value fits in signed i32.  */
 if (type == TCG_TYPE_I32 || val == (int32_t)val) {
@@ -366,25 +355,25 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg rd,
 return;
 }
 
+hi12 = sextreg(val, 12, 20);
 hi32 = sextreg(val, 32, 20);
 hi52 = sextreg(val, 52, 12);
 
 /* Single cu52i.d case.  */
-if (ctz64(val) >= 52) {
+if ((hi52 != 0) && (ctz64(val) >= 52)) {
 tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52);
 return;
 }
 
 /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
 tcg_out_movi_i32(s, rd, val);
-rd_high_bits_are_ones = (int32_t)val < 0;
 
-if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) {
+/* Load hi32 and hi52 explicitly when they are unexpected values. */
+if (hi32 != sextreg(hi12, 20, 20)) {
 tcg_out_opc_cu32i_d(s, rd, hi32);
-rd_high_bits_are_ones = hi32 < 0;
 }
 
-if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) {
+if (hi52 != sextreg(hi32, 20, 12)) {
 tcg_out_opc_cu52i_d(s, rd, rd, hi52);
 }
 }
-- 
2.34.1