Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/cpu-features.h | 6 +++
target/arm/tcg/translate.h | 8 ++++
target/arm/tcg/translate-a64.c | 1 +
target/arm/tcg/translate-sve.c | 68 ++++++++++++++++++++++++++++++++++
target/arm/tcg/sve.decode | 11 +++++-
5 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index c1f092b690..47294eb807 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -1643,6 +1643,12 @@ isar_feature_aa64_sme2_or_sve2_f8cvt(const
ARMISARegisters *id)
return isar_feature_aa64_sme2_or_sve2(id) && isar_feature_aa64_f8cvt(id);
}
+static inline bool
+isar_feature_aa64_sme2_or_sve2_lut(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_sme2_or_sve2(id) && isar_feature_aa64_lut(id);
+}
+
/*
* Feature tests for "does this exist in either 32-bit or 64-bit?"
*/
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 1648c2c96f..b703e75b70 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -90,6 +90,7 @@ typedef struct DisasContext {
int vl; /* current vector length in bytes */
int svl; /* current streaming vector length in bytes */
int max_svl; /* maximum implemented streaming vector length */
+ int max_any_vl; /* maximum implemented vector length */
bool vfp_enabled; /* FP enabled via FPSCR.EN */
int vec_len;
int vec_stride;
@@ -874,4 +875,11 @@ static inline void gen_restore_rmode(TCGv_i32 old,
TCGv_ptr fpst)
return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
}
+#define TRANS_FEAT_SME1_NONSTREAMING(NAME, FEAT, FUNC, ...) \
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
+ { \
+ s->is_nonstreaming = !dc_isar_feature(aa64_sme2, s); \
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
+ }
+
#endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 508d8e377b..ee71c63116 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -10820,6 +10820,7 @@ static void
aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
dc->max_svl = arm_cpu->sme_max_vq * 16;
+ dc->max_any_vl = MAX(dc->max_svl, arm_cpu->sve_max_vq * 16);
dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
dc->bt = EX_TBFLAG_A64(tb_flags, BT);
dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 13f7ab01af..ea0d66178e 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -8268,3 +8268,71 @@ TRANS_FEAT(LD1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c,
a, false, true)
TRANS_FEAT(LD1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, false, true)
TRANS_FEAT(ST1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, true, true)
TRANS_FEAT(ST1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, true, true)
+
+TRANS_FEAT_SME1_NONSTREAMING(LUTI2_1b, aa64_sme2_or_sve2_lut,
+ gen_gvec_ool_zzz, gen_helper_gvec_luti2_b,
+ a->rd, a->rn, a->rm, a->index)
+TRANS_FEAT_SME1_NONSTREAMING(LUTI2_1h, aa64_sme2_or_sve2_lut,
+ gen_gvec_ool_zzz, gen_helper_gvec_luti2_h,
+ a->rd, a->rn, a->rm, a->index)
+TRANS_FEAT_SME1_NONSTREAMING(LUTI4_1b, aa64_sme2_or_sve2_lut,
+ gen_gvec_ool_zzz, gen_helper_gvec_luti4_b,
+ a->rd, a->rn, a->rm, a->index)
+
+static bool trans_LUTI4_1h(DisasContext *s, arg_LUTI4_1h *a)
+{
+ if (!dc_isar_feature(aa64_sme2_or_sve2_lut, s)) {
+ return false;
+ }
+ s->is_nonstreaming = !dc_isar_feature(aa64_sme2, s);
+
+ /*
+ * The MaxImplementedAnyVL check happens in the decode pseudocode,
+ * before the Check*SVEEnabled check in the operation pseudocode.
+ */
+ if (s->max_any_vl < 32) {
+ unallocated_encoding(s);
+ } else if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+
+ /* Then there's a second check against CurrentVL. */
+ if (vsz < 32) {
+ unallocated_encoding(s);
+ } else {
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, a->index,
+ gen_helper_gvec_luti4_h);
+ }
+ }
+ return true;
+}
+
+static bool trans_LUTI4_2h(DisasContext *s, arg_LUTI4_2h *a)
+{
+ if (!dc_isar_feature(aa64_sme2_or_sve2_lut, s)) {
+ return false;
+ }
+ s->is_nonstreaming = !dc_isar_feature(aa64_sme2, s);
+
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ /*
+ * (Ab)use preg_tmp to merge two disjoint 128-bit quantities
+ * into a sequential 256-bit table.
+ */
+ QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, vfp.preg_tmp) < 32);
+ unsigned tmp_ofs = offsetof(CPUARMState, vfp.preg_tmp);
+ unsigned rn0_ofs = vec_full_reg_offset(s, a->rn);
+ unsigned rn1_ofs = vec_full_reg_offset(s, (a->rn + 1) % 32);
+
+ tcg_gen_gvec_mov(MO_64, tmp_ofs, rn0_ofs, 16, 16);
+ tcg_gen_gvec_mov(MO_64, tmp_ofs + 16, rn1_ofs, 16, 16);
+
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), tmp_ofs,
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, a->index, gen_helper_gvec_luti4_h);
+ }
+ return true;
+}
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index 72755b27af..e2106fc7f5 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -31,6 +31,7 @@
%dtype_23_13 23:2 13:2
%index3_22_19 22:1 19:2
%index3_22_17 22:1 17:2
+%index3_22_12 22:2 12:1
%index3_19_11 19:2 11:1
%index2_20_11 20:1 11:1
@@ -1737,11 +1738,19 @@ RSUBHNT 01000101 .. 1 ..... 011 111 ..... .....
@rd_rn_rm
MATCH 01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
NMATCH 01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
-### SVE2 Histogram Computation
+### SVE2 Histogram Computation and Lookup Table
HISTCNT 01000101 .. 1 ..... 110 ... ..... ..... @rd_pg_rn_rm
HISTSEG 01000101 .. 1 ..... 101 000 ..... ..... @rd_rn_rm
+LUTI2_1b 01000101 index:2 1 rm:5 101100 rn:5 rd:5 &rrx_esz esz=0
+LUTI2_1h 01000101 .. 1 rm:5 101.10 rn:5 rd:5 \
+ &rrx_esz esz=1 index=%index3_22_12
+
+LUTI4_1b 01000101 index:1 11 rm:5 101001 rn:5 rd:5 &rrx_esz esz=0
+LUTI4_1h 01000101 index:2 1 rm:5 101111 rn:5 rd:5 &rrx_esz esz=1
+LUTI4_2h 01000101 index:2 1 rm:5 101101 rn:5 rd:5 &rrx_esz esz=1
+
## SVE2 floating-point pairwise operations
FADDP 01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
--
2.43.0