Re: [PATCH for-6.2 32/34] target/arm: Implement MVE scatter-gather insns

2021-07-22 Thread Peter Maydell
On Thu, 22 Jul 2021 at 01:36, Richard Henderson
 wrote:
>
> On 7/13/21 3:37 AM, Peter Maydell wrote:
> > +static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
> > +{
> > +TCGv_i32 addr;
> > +TCGv_ptr qd, qm;
> > +
> > +if (!dc_isar_feature(aa32_mve, s) ||
> > +!mve_check_qreg_bank(s, a->qd | a->qm) ||
> > +!fn || a->rn == 15) {
> > +/* Rn case is UNPREDICTABLE */
> > +return false;
> > +}
>
> No Qd != Qm check for loads?  Given that we know in advance that it simply 
> won't work for
> VLDRD, it would be nice to diagnose the error.
>
> > +static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
> > +{
> > +static MVEGenLdStSGFn * const fns[2][4][4] = { {
> > +{ NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
> > +{ NULL, NULL,   F(vldrh_sg_sw), NULL },
> > +{ NULL, NULL,   NULL,   NULL },
> > +{ NULL, NULL,   NULL,   NULL }
> > +}, {
> > +{ NULL, NULL,  NULL,  NULL },
> > +{ NULL, NULL,  F(vldrh_sg_os_sw), NULL },
> > +{ NULL, NULL,  NULL,  NULL },
> > +{ NULL, NULL,  NULL,  NULL }
> > +}
> > +};
>
> A little bit unfortunate with table density here, but whatever.

Yes; I initially wrote things this way incorrectly thinking more of the
tables would be populated than they are, but I think overall it's
a fairly readable way to go.

I noticed yesterday that these load/store implementations aren't
enforcing the alignment fault requirements, though, so I need to
add that.

thanks
-- PMM



Re: [PATCH for-6.2 32/34] target/arm: Implement MVE scatter-gather insns

2021-07-21 Thread Richard Henderson

On 7/13/21 3:37 AM, Peter Maydell wrote:

+static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
+{
+TCGv_i32 addr;
+TCGv_ptr qd, qm;
+
+if (!dc_isar_feature(aa32_mve, s) ||
+!mve_check_qreg_bank(s, a->qd | a->qm) ||
+!fn || a->rn == 15) {
+/* Rn case is UNPREDICTABLE */
+return false;
+}


No Qd != Qm check for loads?  Given that we know in advance that it simply won't work for 
VLDRD, it would be nice to diagnose the error.



+static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
+{
+static MVEGenLdStSGFn * const fns[2][4][4] = { {
+{ NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
+{ NULL, NULL,   F(vldrh_sg_sw), NULL },
+{ NULL, NULL,   NULL,   NULL },
+{ NULL, NULL,   NULL,   NULL }
+}, {
+{ NULL, NULL,  NULL,  NULL },
+{ NULL, NULL,  F(vldrh_sg_os_sw), NULL },
+{ NULL, NULL,  NULL,  NULL },
+{ NULL, NULL,  NULL,  NULL }
+}
+};


A little bit unfortunate with table density here, but whatever.

Reviewed-by: Richard Henderson 


r~



[PATCH for-6.2 32/34] target/arm: Implement MVE scatter-gather insns

2021-07-13 Thread Peter Maydell
Implement the MVE gather-loads and scatter-stores which
form the address by adding a base value from a scalar
register to an offset in each element of a vector.

Signed-off-by: Peter Maydell 
---
 target/arm/helper-mve.h|  32 +
 target/arm/mve.decode  |  12 
 target/arm/mve_helper.c| 129 +
 target/arm/translate-mve.c |  91 ++
 4 files changed, 264 insertions(+)

diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 55f9151ccbf..9c570270c61 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -33,6 +33,38 @@ DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, 
ptr, i32)
 DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
 DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
 
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, 
i32)
+
 DEF_HELPER_FLAGS_3(mve_vdup, TCG_CALL_NO_WG, void, env, ptr, i32)
 
 DEF_HELPER_FLAGS_4(mve_vidupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index 82dc07bc30e..b0e39f36723 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -42,11 +42,18 @@
 _scalar qda rm size
  qm rda size
  qn qm rda size
+_sg qd qm rn size msize os
+
+# scatter-gather memory size is in bits 6:4
+%sg_msize 6:1 4:1
 
 @vldr_vstr ... . . . . l:1 rn:4 ... .. imm:7 _vstr qd=%qd u=0
 # Note that both Rn and Qd are 3 bits only (no D bit)
 @vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 _vstr
 
+@vldst_sg    rn:4  ... size:2 ... ... os:1 _sg \
+  qd=%qd qm=%qm msize=%sg_msize
+
 @1op    size:2 ..     &1op qd=%qd qm=%qm
 @1op_nosz         &1op qd=%qd qm=%qm size=0
 @2op   .. size:2      &2op qd=%qd qm=%qm qn=%qn
@@ -136,6 +143,11 @@ VLDR_VSTR1110110 1 a:1 . w:1 .  ... 01 
...   @vldr_vstr \
 VLDR_VSTR1110110 1 a:1 . w:1 .  ... 10 ...   @vldr_vstr \
  size=2 p=1
 
+# gather loads/scatter stores
+VLDR_S_sg111 0 1100 1 . 01  ... 0 111 .   @vldst_sg
+VLDR_U_sg111 1 1100 1 . 01  ... 0 111 .   @vldst_sg
+VSTR_sg  111 0 1100 1 . 00  ... 0 111 .   @vldst_sg
+
 # Moves between 2 32-bit vector lanes and 2 general purpose registers
 VMOV_to_2gp  1110 1100 0 . 00 rt2:4 ... 0  000 idx:1 rt:4 qd=%qd
 VMOV_from_2gp1110 1100 0 . 01 rt2:4 ... 0  000 idx:1 rt:4 qd=%qd
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index 210e70d1727..36592b88372 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -213,6 +213,135 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
 #undef DO_VLDR
 #undef DO_VSTR
 
+/*
+ * Gather loads/scatter stores. Here each element of Qm specifies
+ * an offset to use from the base register Rm. In the