Re: [PATCH v3 10/33] target/arm: Convert SRSHL and URSHL (register) to gvec

2024-05-30 Thread Peter Maydell
On Tue, 28 May 2024 at 21:33, Richard Henderson
 wrote:
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH v3 10/33] target/arm: Convert SRSHL and URSHL (register) to gvec

2024-05-28 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper.h | 10 +
 target/arm/tcg/translate.h  |  4 
 target/arm/tcg/neon-dp.decode   | 10 ++---
 target/arm/tcg/gengvec.c| 22 +++
 target/arm/tcg/neon_helper.c| 38 -
 target/arm/tcg/translate-a64.c  | 17 ++-
 target/arm/tcg/translate-neon.c |  6 ++
 7 files changed, 84 insertions(+), 23 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index a14c040451..25eb7bf5df 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -327,6 +327,16 @@ DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32)
 DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64)
 DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
 
+DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_2(neon_add_u8, i32, i32, i32)
 DEF_HELPER_2(neon_add_u16, i32, i32, i32)
 DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 87439dcc61..ea63ffc47b 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -459,6 +459,10 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, 
uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
 
 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode
index fd3a01bfa0..8525c65c0d 100644
--- a/target/arm/tcg/neon-dp.decode
+++ b/target/arm/tcg/neon-dp.decode
@@ -117,14 +117,8 @@ VSHL_U_3s 001 1 0 . ..   0100 . . . 0 
 @3same_rev
   VQSHL_U64_3s    001 1 0 . ..   0100 . . . 1  @3same_64_rev
   VQSHL_U_3s  001 1 0 . ..   0100 . . . 1  @3same_rev
 }
-{
-  VRSHL_S64_3s    001 0 0 . ..   0101 . . . 0  @3same_64_rev
-  VRSHL_S_3s  001 0 0 . ..   0101 . . . 0  @3same_rev
-}
-{
-  VRSHL_U64_3s    001 1 0 . ..   0101 . . . 0  @3same_64_rev
-  VRSHL_U_3s  001 1 0 . ..   0101 . . . 0  @3same_rev
-}
+VRSHL_S_3s    001 0 0 . ..   0101 . . . 0  @3same_rev
+VRSHL_U_3s    001 1 0 . ..   0101 . . . 0  @3same_rev
 {
   VQRSHL_S64_3s   001 0 0 . ..   0101 . . . 1  @3same_64_rev
   VQRSHL_S_3s 001 0 0 . ..   0101 . . . 1  @3same_rev
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
index 740f3f864e..216a9f81e3 100644
--- a/target/arm/tcg/gengvec.c
+++ b/target/arm/tcg/gengvec.c
@@ -1218,6 +1218,28 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, 
uint32_t rn_ofs,
 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, [vece]);
 }
 
+void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+static gen_helper_gvec_3 * const fns[] = {
+gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h,
+gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d,
+};
+tcg_debug_assert(vece <= MO_64);
+tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
+}
+
+void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+static gen_helper_gvec_3 * const fns[] = {
+gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h,
+gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d,
+};
+tcg_debug_assert(vece <= MO_64);
+tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
+}
+
 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp 
esz)
 {
 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
index 0af15e9f6e..516ecc1dcb 100644
--- a/target/arm/tcg/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -6,10 +6,11 @@