Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/cpu-features.h | 5 +++++
target/arm/tcg/translate-sve.c | 33 +++++++++++++++++++++++++++++++++
target/arm/tcg/sve.decode | 7 +++++++
3 files changed, 45 insertions(+)
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index ee20d74164..c0b646415c 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -1545,6 +1545,11 @@ static inline bool isar_feature_aa64_sve_b16b16(const
ARMISARegisters *id)
return FIELD_EX64_IDREG(id, ID_AA64ZFR0, B16B16);
}
+static inline bool isar_feature_aa64_ssve_f8fma(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64SMFR0, SF8FMA);
+}
+
static inline bool isar_feature_aa64_sme_b16b16(const ARMISARegisters *id)
{
return FIELD_EX64_IDREG(id, ID_AA64SMFR0, B16B16);
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index ea0d66178e..aa785fa0c3 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -8336,3 +8336,36 @@ static bool trans_LUTI4_2h(DisasContext *s, arg_LUTI4_2h
*a)
}
return true;
}
+
+static bool do_fmla_fp8(DisasContext *s, arg_rxx *a, gen_helper_gvec_3_ptr *fn)
+{
+ bool fp8fma = dc_isar_feature(aa64_f8fma, s);
+ bool ssve_fp8fma = dc_isar_feature(aa64_ssve_f8fma, s);
+ bool ok = false;
+
+ /* Feature detection and enabling are complex here. */
+ if (!(ssve_fp8fma || (fp8fma && dc_isar_feature(aa64_sve2, s)))) {
+ return false;
+ }
+ if (fpmr_access_check(s)) {
+ if (fp8fma) {
+ s->is_nonstreaming = !ssve_fp8fma;
+ ok = sve_access_check(s);
+ } else {
+ ok = sme_sm_enabled_check(s);
+ }
+ }
+
+ if (ok) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ tcg_env, vsz, vsz,
+ a->idxn | (a->idxm << 2), fn);
+ }
+ return true;
+}
+
+TRANS(FMLAL_hb, do_fmla_fp8, a, gen_helper_gvec_fmla_hb)
+TRANS(FMLAL_idx_hb, do_fmla_fp8, a, gen_helper_gvec_fmla_idx_hb)
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index e2106fc7f5..71ec09393c 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -29,6 +29,7 @@
%imm9_16_10 16:s6 10:3
%size_23 23:2
%dtype_23_13 23:2 13:2
+%index4_19_10 19:2 10:2
%index3_22_19 22:1 19:2
%index3_22_17 22:1 17:2
%index3_22_12 22:2 12:1
@@ -73,6 +74,7 @@
&rri rd rn imm
&rr_dbm rd rn dbm
&rrri rd rn rm imm
+&rxx rd rn rm idxn idxm
&rri_esz rd rn imm esz
&rrri_esz rd rn rm imm esz
&rrr_esz rd rn rm esz
@@ -1864,6 +1866,8 @@ BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... .....
@rda_rn_rm_ex esz=2
BFMLSLB_zzzw 01100100 11 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_ex esz=2
BFMLSLT_zzzw 01100100 11 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_ex esz=2
+FMLAL_hb 01100100 10 1 rm:5 100 idxn:1 10 rn:5 rd:5 &rxx idxm=0
+
### SVE2 floating-point dot-product
FDOT_zzzz 01100100 00 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2
BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2
@@ -1880,6 +1884,9 @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... .....
@rrxr_3a esz=2
BFMLSLB_zzxw 01100100 11 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2
BFMLSLT_zzxw 01100100 11 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2
+FMLAL_idx_hb 01100100 idxn:1 01 .. rm:3 0101 .. rn:5 rd:5 \
+ &rxx idxm=%index4_19_10
+
### SVE2 floating-point dot-product (indexed)
FDOT_zzxz 01100100 00 1 ..... 010000 ..... ..... @rrxr_2 esz=2
--
2.43.0