Re: [PATCH for-6.2 33/34] target/arm: Implement MVE scatter-gather immediate forms

2021-07-21 Thread Richard Henderson

On 7/13/21 3:37 AM, Peter Maydell wrote:

Implement the MVE VLDR/VSTR insns which do scatter-gather using base
addresses from Qm plus or minus an immediate offset (possibly with
writeback). Note that writeback is not predicated but it does have
to honour ECI state, so we have to add an eci_mask check to the
VSTR_SG macros (the VLDR_SG macros already needed this to be able
to distinguish "skip beat" from "set predicated element to 0").

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h|  5 +++
  target/arm/mve.decode  | 10 +
  target/arm/mve_helper.c| 91 --
  target/arm/translate-mve.c | 66 +++
  4 files changed, 140 insertions(+), 32 deletions(-)


Reviewed-by: Richard Henderson 

r~



[PATCH for-6.2 33/34] target/arm: Implement MVE scatter-gather immediate forms

2021-07-13 Thread Peter Maydell
Implement the MVE VLDR/VSTR insns which do scatter-gather using base
addresses from Qm plus or minus an immediate offset (possibly with
writeback). Note that writeback is not predicated but it does have
to honour ECI state, so we have to add an eci_mask check to the
VSTR_SG macros (the VLDR_SG macros already needed this to be able
to distinguish "skip beat" from "set predicated element to 0").

Signed-off-by: Peter Maydell 
---
 target/arm/helper-mve.h|  5 +++
 target/arm/mve.decode  | 10 +
 target/arm/mve_helper.c| 91 --
 target/arm/translate-mve.c | 66 +++
 4 files changed, 140 insertions(+), 32 deletions(-)

diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 9c570270c61..16799b110fd 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -65,6 +65,11 @@ DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uw, TCG_CALL_NO_WG, void, 
env, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(mve_vstrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
 DEF_HELPER_FLAGS_4(mve_vstrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
 
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+
 DEF_HELPER_FLAGS_3(mve_vdup, TCG_CALL_NO_WG, void, env, ptr, i32)
 
 DEF_HELPER_FLAGS_4(mve_vidupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index b0e39f36723..76e9b9c721c 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -43,6 +43,7 @@
  qm rda size
  qn qm rda size
 _sg qd qm rn size msize os
+_sg_imm qd qm a w imm
 
 # scatter-gather memory size is in bits 6:4
 %sg_msize 6:1 4:1
@@ -54,6 +55,10 @@
 @vldst_sg    rn:4  ... size:2 ... ... os:1 _sg \
   qd=%qd qm=%qm msize=%sg_msize
 
+# Qm is in the fields usually labeled Qn
+@vldst_sg_imm   a:1 . w:1 .    . imm:7 _sg_imm \
+  qd=%qd qm=%qn
+
 @1op    size:2 ..     &1op qd=%qd qm=%qm
 @1op_nosz         &1op qd=%qd qm=%qm size=0
 @2op   .. size:2      &2op qd=%qd qm=%qm qn=%qn
@@ -148,6 +153,11 @@ VLDR_S_sg111 0 1100 1 . 01  ... 0 111 .  
 @vldst_sg
 VLDR_U_sg111 1 1100 1 . 01  ... 0 111 .   @vldst_sg
 VSTR_sg  111 0 1100 1 . 00  ... 0 111 .   @vldst_sg
 
+VLDRW_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1110   @vldst_sg_imm
+VLDRD_sg_imm 111 1 1101 ... 1 ... 0 ... 1    @vldst_sg_imm
+VSTRW_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1110   @vldst_sg_imm
+VSTRD_sg_imm 111 1 1101 ... 0 ... 0 ... 1    @vldst_sg_imm
+
 # Moves between 2 32-bit vector lanes and 2 general purpose registers
 VMOV_to_2gp  1110 1100 0 . 00 rt2:4 ... 0  000 idx:1 rt:4 qd=%qd
 VMOV_from_2gp1110 1100 0 . 01 rt2:4 ... 0  000 idx:1 rt:4 qd=%qd
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index 36592b88372..293c0e11819 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -220,7 +220,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
  * For loads, predicated lanes are zeroed instead of retaining
  * their previous values.
  */
-#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN)\
+#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB)\
 void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
   uint32_t base)\
 {   \
@@ -237,25 +237,35 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
 addr = ADDRFN(base, m[H##ESIZE(e)]);\
 d[H##ESIZE(e)] = (mask & 1) ?   \
 cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
+if (WB) {   \
+m[H##ESIZE(e)] = addr;  \
+}   \
 }   \
 mve_advance_vpt(env);   \
 }
 
 /* We know here TYPE is unsigned so always the same as the offset type */
-#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN) \
+#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB) \
 void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
   uint32_t base)\
 {   \