Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/cpu-features.h | 5 +++++
target/arm/tcg/helper-fp8-defs.h | 2 ++
target/arm/tcg/fp8_helper.c | 25 +++++++++++++++++++++++++
target/arm/tcg/translate-a64.c | 1 +
target/arm/tcg/a64.decode | 2 ++
5 files changed, 35 insertions(+)
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 90098c3cbe..67b9d7e982 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -1620,6 +1620,11 @@ static inline bool isar_feature_aa64_f8dp2(const
ARMISARegisters *id)
return FIELD_EX64_IDREG(id, ID_AA64FPFR0, F8DP2);
}
+static inline bool isar_feature_aa64_f8mm8(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64FPFR0, F8MM8);
+}
+
/*
* Combinations of feature tests, for ease of use with TRANS_FEAT.
*/
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
index 5995d77577..3c74f02022 100644
--- a/target/arm/tcg/helper-fp8-defs.h
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -35,3 +35,5 @@ DEF_HELPER_FLAGS_5(gvec_fdot_idx_sb, TCG_CALL_NO_RWG, void,
ptr, ptr, ptr, env,
DEF_HELPER_FLAGS_5(gvec_fdot_hb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env,
i32)
DEF_HELPER_FLAGS_5(gvec_fdot_idx_hb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr,
env, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmmla_sb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env,
i32)
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
index d3bbea1735..160850be53 100644
--- a/target/arm/tcg/fp8_helper.c
+++ b/target/arm/tcg/fp8_helper.c
@@ -807,3 +807,28 @@ void HELPER(gvec_fdot_idx_hb)(void *vd, void *vn, void *vm,
clear_tail(vd, oprsz, simd_maxsz(desc));
}
+
+void HELPER(gvec_fmmla_sb)(void *vd, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc)
+{
+ FP8MulContext ctx = fp8_mul_start(env, -1);
+ size_t oprsz = simd_oprsz(desc);
+ size_t nseg = oprsz / 16;
+ uint64_t *n = vn;
+ uint64_t *m = vm;
+ float32 *d = vd;
+
+ for (size_t seg = 0; seg < nseg; seg++, d += 4, n += 2, m += 2) {
+ float32 d0 = f8dotadd_s(n[0], m[0], 8, d[H4(0)], &ctx);
+ float32 d1 = f8dotadd_s(n[0], m[1], 8, d[H4(1)], &ctx);
+ float32 d2 = f8dotadd_s(n[1], m[0], 8, d[H4(2)], &ctx);
+ float32 d3 = f8dotadd_s(n[1], m[1], 8, d[H4(3)], &ctx);
+
+ d[H4(0)] = d0;
+ d[H4(1)] = d1;
+ d[H4(2)] = d2;
+ d[H4(3)] = d3;
+ }
+
+ clear_tail(vd, oprsz, simd_maxsz(desc));
+}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index c5ea6b27a9..02d5e007f9 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -7418,6 +7418,7 @@ static bool do_f8dot(DisasContext *s, arg_qrrr_e *a,
TRANS_FEAT(FDOT_sb_v, aa64_f8dp4, do_f8dot, a, gen_helper_gvec_fdot_sb)
TRANS_FEAT(FDOT_hb_v, aa64_f8dp2, do_f8dot, a, gen_helper_gvec_fdot_hb)
+TRANS_FEAT(FMMLA_sb, aa64_f8mm8, do_f8dot, a, gen_helper_gvec_fmmla_sb)
static bool do_f8dot_idx(DisasContext *s, arg_qrrx_e *a,
gen_helper_gvec_3_ptr *fn)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index d1254355b6..6404c26540 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1216,6 +1216,8 @@ FMLALL_sb_v 0.00 1110 0.0 rm:5 110001 rn:5 rd:5 \
FDOT_sb_v 0.00 1110 000 ..... 11111 1 ..... ..... @qrrr_s
FDOT_hb_v 0.00 1110 010 ..... 11111 1 ..... ..... @qrrr_h
+FMMLA_sb 0110 1110 100 ..... 11101 1 ..... ..... @rrr_q1e0
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
--
2.43.0