FEAT_SVE_B16B16 adds bfloat16 versions of the FMLA and FMLS insns in the "SVE floating-point multiply-accumulate writing addend" group, encoded as sz=0b00.
Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> Reviewed-by: Richard Henderson <richard.hender...@linaro.org> Message-id: 20250718173032.2498900-6-peter.mayd...@linaro.org --- target/arm/tcg/helper-sve.h | 14 +++++++ target/arm/tcg/sve_helper.c | 69 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sve.c | 21 ++++++++--- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index cb6c2355e58..5e4b7fd8cf4 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -1541,6 +1541,8 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1548,6 +1550,8 @@ DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1555,6 +1559,8 @@ DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1562,6 +1568,8 @@ DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1569,6 +1577,8 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1576,6 +1586,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1583,6 +1595,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 1a56fa86d9c..105cc5dd122 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -5099,6 +5099,75 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) #undef DO_ZPZ_FP +static void do_fmla_zpzzz_b16(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint16_t neg1, uint16_t neg3, int flags) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 2; + if (likely((pg >> (i & 63)) & 1)) { + float16 e1, e2, e3, r; + + e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; + e2 = *(uint16_t *)(vm + H1_2(i)); + e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; + r = bfloat16_muladd(e1, e2, e3, flags, status); + *(uint16_t *)(vd + H1_2(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); +} + +void HELPER(sve_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); +} + +void HELPER(sve_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); +} + static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, uint16_t neg1, uint16_t neg3, int flags) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 918cf6e1bd4..37ecbc2b7c0 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -4368,19 +4368,28 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a, + gen_helper_gvec_5_ptr *fn) +{ + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } + return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + #define DO_FMLA(NAME, name, ah_name) \ static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ - NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ - NULL, gen_helper_sve_##ah_name##_h, \ + gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ }; \ - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ - s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ - a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) -- 2.43.0