Re: [PATCH v3 12/33] target/arm: Convert SQSHL and UQSHL (register) to gvec

2024-05-30 Thread Peter Maydell
On Tue, 28 May 2024 at 21:33, Richard Henderson
 wrote:
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH v3 12/33] target/arm: Convert SQSHL and UQSHL (register) to gvec

2024-05-28 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper.h |  8 
 target/arm/tcg/translate.h  |  4 
 target/arm/tcg/neon-dp.decode   | 10 ++---
 target/arm/tcg/gengvec.c| 24 ++
 target/arm/tcg/neon_helper.c| 36 +
 target/arm/tcg/translate-a64.c  | 17 +++-
 target/arm/tcg/translate-neon.c |  6 ++
 7 files changed, 83 insertions(+), 22 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 25eb7bf5df..f345087ddb 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -326,6 +326,14 @@ DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32)
 DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32)
 DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64)
 DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
+DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
 
 DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index ea63ffc47b..6c6d4d49e7 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -463,6 +463,10 @@ void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, 
uint32_t rn_ofs,
 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
 void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
 
 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode
index 8525c65c0d..6d4996b8d8 100644
--- a/target/arm/tcg/neon-dp.decode
+++ b/target/arm/tcg/neon-dp.decode
@@ -109,14 +109,8 @@ VSHL_U_3s 001 1 0 . ..   0100 . . . 0 
 @3same_rev
 @3same_64_rev ... . . . 11    . q:1 . .  \
  &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
 
-{
-  VQSHL_S64_3s    001 0 0 . ..   0100 . . . 1  @3same_64_rev
-  VQSHL_S_3s  001 0 0 . ..   0100 . . . 1  @3same_rev
-}
-{
-  VQSHL_U64_3s    001 1 0 . ..   0100 . . . 1  @3same_64_rev
-  VQSHL_U_3s  001 1 0 . ..   0100 . . . 1  @3same_rev
-}
+VQSHL_S_3s    001 0 0 . ..   0100 . . . 1  @3same_rev
+VQSHL_U_3s    001 1 0 . ..   0100 . . . 1  @3same_rev
 VRSHL_S_3s    001 0 0 . ..   0101 . . . 0  @3same_rev
 VRSHL_U_3s    001 1 0 . ..   0101 . . . 0  @3same_rev
 {
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
index 216a9f81e3..63c3ec2e73 100644
--- a/target/arm/tcg/gengvec.c
+++ b/target/arm/tcg/gengvec.c
@@ -1240,6 +1240,30 @@ void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, 
uint32_t rn_ofs,
 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
 }
 
+void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+static gen_helper_gvec_3_ptr * const fns[] = {
+gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h,
+gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d,
+};
+tcg_debug_assert(vece <= MO_64);
+tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
+   opr_sz, max_sz, 0, fns[vece]);
+}
+
+void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+static gen_helper_gvec_3_ptr * const fns[] = {
+gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h,
+gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d,
+};
+tcg_debug_assert(vece <= MO_64);
+tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
+   opr_sz, max_sz, 0, fns[vece]);
+}
+
 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp 
esz)
 {
 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
diff --git