Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/cpu-features.h | 5 +++++
target/arm/tcg/helper-defs.h | 1 +
target/arm/tcg/translate-sme.c | 6 ++++++
target/arm/tcg/vec_helper.c | 14 ++++++++++++++
target/arm/tcg/sme.decode | 6 ++++++
5 files changed, 32 insertions(+)
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 29d0464a03..007e656ed4 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -1643,6 +1643,11 @@ static inline bool isar_feature_aa64_sme2_f8cvt(const
ARMISARegisters *id)
return isar_feature_aa64_sme2(id) && isar_feature_aa64_f8cvt(id);
}
+static inline bool isar_feature_aa64_sme2p1_lutv2(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_sme2p1(id) && isar_feature_aa64_sme_lutv2(id);
+}
+
static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id)
{
return isar_feature_aa64_sve(id) && isar_feature_aa64_sme_sve_i8mm(id);
diff --git a/target/arm/tcg/helper-defs.h b/target/arm/tcg/helper-defs.h
index 05ccf795e8..8ec6c16319 100644
--- a/target/arm/tcg/helper-defs.h
+++ b/target/arm/tcg/helper-defs.h
@@ -1120,6 +1120,7 @@ DEF_HELPER_FLAGS_4(sme2_luti4_2b, TCG_CALL_NO_RWG, void,
ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_luti4_2h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_luti4_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(sme2_luti4_4b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_luti4_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_luti4_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 214427db1f..0af133c1c4 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -1846,6 +1846,9 @@ TRANS_FEAT(LUTI4_c_2s, aa64_sme2, do_lut, a,
gen_helper_sme2_luti4_2s, false)
TRANS_FEAT(LUTI4_c_4h, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_4h, false)
TRANS_FEAT(LUTI4_c_4s, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_4s, false)
+TRANS_FEAT(LUTI4_c_4b, aa64_sme_lutv2, do_lut, a,
+ gen_helper_sme2_luti4_4b, false)
+
static bool do_lut_s4(DisasContext *s, arg_lut *a, gen_helper_gvec_2_ptr *fn)
{
return !(a->zd & 0b01100) && do_lut(s, a, fn, true);
@@ -1866,3 +1869,6 @@ TRANS_FEAT(LUTI4_s_2b, aa64_sme2p1, do_lut_s8, a,
gen_helper_sme2_luti4_2b)
TRANS_FEAT(LUTI4_s_2h, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti4_2h)
TRANS_FEAT(LUTI4_s_4h, aa64_sme2p1, do_lut_s4, a, gen_helper_sme2_luti4_4h)
+
+TRANS_FEAT(LUTI4_s_4b, aa64_sme2p1_lutv2, do_lut_s4, a,
+ gen_helper_sme2_luti4_4b)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index cb633817d7..eaf15a0cb5 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -3349,6 +3349,20 @@ DO_SME2_LUT(4,4,s, 4)
#undef DO_SME2_LUT
+void helper_sme2_luti4_4b(void *zd, void *zn, CPUARMState *env, uint32_t desc)
+{
+ unsigned vl = simd_oprsz(desc);
+ unsigned strided = extract32(desc, SIMD_DATA_SHIFT, 1);
+ unsigned dstride = !strided ? 1 : 4;
+ uint64_t indexes[ARM_MAX_VQ * 4];
+
+ memcpy(&indexes, zn, vl);
+ memcpy((void *)&indexes + vl, zn + sizeof(ARMVectorReg), vl);
+
+ do_lut_b(zd, indexes, (void *)env->za_state.zt0, vl, 0,
+ dstride * sizeof(ARMVectorReg), 4, 32, 4);
+}
+
void HELPER(gvec_luti2_b)(void *vd, void *vn, void *vm, uint32_t desc)
{
unsigned part = simd_data(desc);
diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode
index 339de72b8a..495330aed7 100644
--- a/target/arm/tcg/sme.decode
+++ b/target/arm/tcg/sme.decode
@@ -1014,8 +1014,14 @@ LUTI4_c_2s 1100 0000 1000 101 idx:2 1 10 00 zn:5
.... 0 &lut zd=%zd_ax2
LUTI4_c_4h 1100 0000 1000 101 idx:1 10 01 00 zn:5 ... 00 &lut zd=%zd_ax4
LUTI4_c_4s 1100 0000 1000 101 idx:1 10 10 00 zn:5 ... 00 &lut zd=%zd_ax4
+LUTI4_c_4b 1100 0000 1000 101 1 00 00 00 ....0 ...00 \
+ &lut zd=%zd_ax4 zn=%zn_ax2 idx=0
+
# LUTI4, strided (must check zd alignment)
LUTI4_s_2b 1100 0000 1001 101 idx:2 1 00 00 zn:5 zd:5 &lut
LUTI4_s_2h 1100 0000 1001 101 idx:2 1 01 00 zn:5 zd:5 &lut
LUTI4_s_4h 1100 0000 1001 101 idx:1 10 01 00 zn:5 zd:5 &lut
+
+LUTI4_s_4b 1100 0000 1001 101 1 00 00 00 ....0 zd:5 \
+ &lut zn=%zn_ax2 idx=0
--
2.43.0