Implement the MVE gather-loads and scatter-stores which
form the address by adding a base value from a scalar
register to an offset in each element of a vector.
Signed-off-by: Peter Maydell
---
target/arm/helper-mve.h| 32 +
target/arm/mve.decode | 12
target/arm/mve_helper.c| 129 +
target/arm/translate-mve.c | 91 ++
4 files changed, 264 insertions(+)
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 55f9151ccbf..9c570270c61 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -33,6 +33,38 @@ DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env,
ptr, i32)
DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_sw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+
DEF_HELPER_FLAGS_3(mve_vdup, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vidupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index 82dc07bc30e..b0e39f36723 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -42,11 +42,18 @@
_scalar qda rm size
qm rda size
qn qm rda size
+_sg qd qm rn size msize os
+
+# scatter-gather memory size is in bits 6:4
+%sg_msize 6:1 4:1
@vldr_vstr ... . . . . l:1 rn:4 ... .. imm:7 _vstr qd=%qd u=0
# Note that both Rn and Qd are 3 bits only (no D bit)
@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 _vstr
+@vldst_sg rn:4 ... size:2 ... ... os:1 _sg \
+ qd=%qd qm=%qm msize=%sg_msize
+
@1op size:2 .. &1op qd=%qd qm=%qm
@1op_nosz &1op qd=%qd qm=%qm size=0
@2op .. size:2 &2op qd=%qd qm=%qm qn=%qn
@@ -136,6 +143,11 @@ VLDR_VSTR1110110 1 a:1 . w:1 . ... 01
... @vldr_vstr \
VLDR_VSTR1110110 1 a:1 . w:1 . ... 10 ... @vldr_vstr \
size=2 p=1
+# gather loads/scatter stores
+VLDR_S_sg111 0 1100 1 . 01 ... 0 111 . @vldst_sg
+VLDR_U_sg111 1 1100 1 . 01 ... 0 111 . @vldst_sg
+VSTR_sg 111 0 1100 1 . 00 ... 0 111 . @vldst_sg
+
# Moves between 2 32-bit vector lanes and 2 general purpose registers
VMOV_to_2gp 1110 1100 0 . 00 rt2:4 ... 0 000 idx:1 rt:4 qd=%qd
VMOV_from_2gp1110 1100 0 . 01 rt2:4 ... 0 000 idx:1 rt:4 qd=%qd
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index 210e70d1727..36592b88372 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -213,6 +213,135 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
#undef DO_VLDR
#undef DO_VSTR
+/*
+ * Gather loads/scatter stores. Here each element of Qm specifies
+ * an offset to use from the base register Rm. In the