From: Richard Henderson <[email protected]> This argument is no longer used.
Reviewed-by: Philippe Mathieu-Daudé <[email protected]> Signed-off-by: Richard Henderson <[email protected]> Message-id: [email protected] Message-Id: <[email protected]> Signed-off-by: Peter Maydell <[email protected]> --- target/arm/tcg/sme_helper.c | 6 ++--- target/arm/tcg/vec_helper.c | 50 +++++++++++++++-------------------- target/arm/tcg/vec_internal.h | 12 +++------ 3 files changed, 29 insertions(+), 39 deletions(-) diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index 2bfb12c086..a1c08c91d8 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -1429,9 +1429,9 @@ static void do_bfmopa_w(void *vza, void *vzn, void *vzm, uint32_t desc, uint32_t negx, bool ah_neg) { intptr_t row, col, oprsz = simd_maxsz(desc); - float_status fpst, fpst_odd; + float_status fpst; - if (is_ebf(env, &fpst, &fpst_odd)) { + if (is_ebf(env, &fpst)) { for (row = 0; row < oprsz; ) { uint16_t prow = pn[H2(row >> 4)]; do { @@ -1452,7 +1452,7 @@ static void do_bfmopa_w(void *vza, void *vzn, void *vzm, uint32_t m = *(uint32_t *)(vzm + H1_4(col)); m = f16mop_adj_pair(m, pcol, 0); - *a = bfdotadd_ebf(*a, n, m, &fpst, &fpst_odd); + *a = bfdotadd_ebf(*a, n, m, &fpst); } col += 4; pcol >>= 4; diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 12fb62958f..bd8ae5d6a4 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -2845,7 +2845,7 @@ DO_MMLA_B(gvec_usmmla_b, do_usmmla_b) * BFloat16 Dot Product */ -bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) +bool is_ebf(CPUARMState *env, float_status *statusp) { /* * For BFDOT, BFMMLA, etc, the behaviour depends on FPCR.EBF. @@ -2865,11 +2865,7 @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; set_default_nan_mode(true, statusp); - if (ebf) { - /* EBF=1 needs to do a step with round-to-odd semantics */ - *oddstatusp = *statusp; - set_float_rounding_mode(float_round_to_odd, oddstatusp); - } else { + if (!ebf) { set_flush_to_zero(true, statusp); set_flush_inputs_to_zero(true, statusp); set_float_rounding_mode(float_round_to_odd_inf, statusp); @@ -2893,8 +2889,7 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst) return t1; } -float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, - float_status *fpst, float_status *fpst_odd) +float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst) { /* Unpack two BFloat16 into two Float32, trivially. */ float32 s1r = e1 << 16; @@ -2964,11 +2959,11 @@ void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, intptr_t i, opr_sz = simd_oprsz(desc); float32 *d = vd, *a = va; uint32_t *n = vn, *m = vm; - float_status fpst, fpst_odd; + float_status fpst; - if (is_ebf(env, &fpst, &fpst_odd)) { + if (is_ebf(env, &fpst)) { for (i = 0; i < opr_sz / 4; ++i) { - d[i] = bfdotadd_ebf(a[i], n[i], m[i], &fpst, &fpst_odd); + d[i] = bfdotadd_ebf(a[i], n[i], m[i], &fpst); } } else { for (i = 0; i < opr_sz / 4; ++i) { @@ -2987,14 +2982,14 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm, intptr_t eltspersegment = MIN(16 / 4, elements); float32 *d = vd, *a = va; uint32_t *n = vn, *m = vm; - float_status fpst, fpst_odd; + float_status fpst; - if (is_ebf(env, &fpst, &fpst_odd)) { + if (is_ebf(env, &fpst)) { for (i = 0; i < elements; i += eltspersegment) { uint32_t m_idx = m[i + H4(index)]; for (j = i; j < i + eltspersegment; j++) { - d[j] = bfdotadd_ebf(a[j], n[j], m_idx, &fpst, &fpst_odd); + d[j] = bfdotadd_ebf(a[j], n[j], m_idx, &fpst); } } } else { @@ -3021,17 +3016,16 @@ void HELPER(sme2_bfvdot_idx)(void *vd, void *vn, void *vm, uint16_t *n0 = vn; uint16_t *n1 = vn + sizeof(ARMVectorReg); uint32_t *m = vm; - float_status fpst, fpst_odd; + float_status fpst; - if (is_ebf(env, &fpst, &fpst_odd)) { + if (is_ebf(env, &fpst)) { for (i = 0; i < elements; i += eltspersegment) { uint32_t m_idx = m[i + H4(idx)]; for (j = 0; j < eltspersegment; j++) { uint32_t nn = (n0[H2(2 * (i + j) + sel)]) | (n1[H2(2 * (i + j) + sel)] << 16); - d[i + H4(j)] = bfdotadd_ebf(a[i + H4(j)], nn, m_idx, - &fpst, &fpst_odd); + d[i + H4(j)] = bfdotadd_ebf(a[i + H4(j)], nn, m_idx, &fpst); } } } else { @@ -3054,9 +3048,9 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, intptr_t s, opr_sz = simd_oprsz(desc); float32 *d = vd, *a = va; uint32_t *n = vn, *m = vm; - float_status fpst, fpst_odd; + float_status fpst; - if (is_ebf(env, &fpst, &fpst_odd)) { + if (is_ebf(env, &fpst)) { for (s = 0; s < opr_sz / 4; s += 4) { float32 sum00, sum01, sum10, sum11; @@ -3068,20 +3062,20 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, * i j i k j k */ sum00 = a[s + H4(0 + 0)]; - sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd); - sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd); + sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst); + sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst); sum01 = a[s + H4(0 + 1)]; - sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd); - sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd); + sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst); + sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst); sum10 = a[s + H4(2 + 0)]; - sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd); - sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd); + sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst); + sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst); sum11 = a[s + H4(2 + 1)]; - sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd); - sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd); + sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst); + sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst); d[s + H4(0 + 0)] = sum00; d[s + H4(0 + 1)] = sum01; diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h index 84f7f15228..06fef2a209 100644 --- a/target/arm/tcg/vec_internal.h +++ b/target/arm/tcg/vec_internal.h @@ -271,7 +271,6 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst); * @sum: addend * @e1, @e2: multiplicand vectors * @fpst: floating-point status to use - * @fpst_odd: floating-point status to use for round-to-odd operations * * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum. * The @e1 and @e2 operands correspond to the 32-bit source vector @@ -280,23 +279,20 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst); * Corresponds to the ARM pseudocode function BFDotAdd, specialized * for the FPCR.EBF == 1 case. */ -float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, - float_status *fpst, float_status *fpst_odd); +float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst); /** * is_ebf: * @env: CPU state * @statusp: pointer to floating point status to fill in - * @oddstatusp: pointer to floating point status to fill in for round-to-odd * * Determine whether a BFDotAdd operation should use FPCR.EBF = 0 - * or FPCR.EBF = 1 semantics. On return, has initialized *statusp - * and *oddstatusp to suitable float_status arguments to use with either - * bfdotadd() or bfdotadd_ebf(). + * or FPCR.EBF = 1 semantics. On return, has initialized *statusp as suitable + * for float_status arguments to either bfdotadd() or bfdotadd_ebf(). * Returns true for EBF = 1, false for EBF = 0. (The caller should use this * to decide whether to call bfdotadd() or bfdotadd_ebf().) */ -bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); +bool is_ebf(CPUARMState *env, float_status *statusp); /* * Negate as for FPCR.AH=1 -- do not negate NaNs. -- 2.43.0
