On Thu, 7 May 2026 at 17:10, Richard Henderson <[email protected]> wrote: > > Check the likely case of normal product and normal or > zero addend first; shift NaN and infinity detection down; > end with zero product + addend. > > Signed-off-by: Richard Henderson <[email protected]> > --- > fpu/softfloat-parts.c.inc | 155 +++++++++++++++++--------------------- > 1 file changed, 70 insertions(+), 85 deletions(-) > > diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc > index 3a9c2748cd..3d2606c07c 100644 > --- a/fpu/softfloat-parts.c.inc > +++ b/fpu/softfloat-parts.c.inc > @@ -681,11 +681,47 @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, > FloatPartsN *b, > FloatPartsN *c, > int flags, float_status *s) > { > - int ab_mask, abc_mask; > - FloatPartsW p_widen, c_widen; > + int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); > + int c_mask = float_cmask(c->cls); > + int abc_mask = ab_mask | c_mask; > + bool c_sign = c->sign ^ !!(flags & float_muladd_negate_c); > + bool p_sign = a->sign ^ b->sign ^ !!(flags & > float_muladd_negate_product); > > - ab_mask = float_cmask(a->cls) | float_cmask(b->cls); > - abc_mask = float_cmask(c->cls) | ab_mask; > + /* > + * The "likely" case is A and B normal, so that the product is normal, > + * and C normal or zero so that the result is normal. > + */ > + int likely_mask = ab_mask | (c_mask & ~float_cmask_zero); > + if (likely(cmask_is_only_normals(likely_mask))) { > + record_denormals_used(abc_mask, s); > + > + /* Perform the multiplication step. */ > + FloatPartsW p_widen = { .sign = p_sign, .exp = a->exp + b->exp + 1 }; > + fracN(mulw)(&p_widen, a, b); > + if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { > + fracW(add)(&p_widen, &p_widen, &p_widen); > + p_widen.exp -= 1; > + } > + > + /* Perform the addition step. */ > + if (!(c_mask & float_cmask_zero)) { > + /* Zero-extend C to less significant bits. */ > + FloatPartsW c_widen = { .sign = c_sign, .exp = c->exp }; > + fracN(widen)(&c_widen, c); > + > + if (p_sign == c_sign) { > + partsW(add_normal)(&p_widen, &c_widen); > + } else if (!partsW(sub_normal)(&p_widen, &c_widen)) { > + goto return_sub_zero; > + } > + } > + > + /* Narrow with sticky bit, for proper rounding later. */ > + fracN(truncjam)(a, &p_widen); > + a->sign = p_widen.sign; > + a->exp = p_widen.exp; > + return a; > + } > > /* > * It is implementation-defined whether the cases of (0,inf,qnan) > @@ -698,97 +734,46 @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, > FloatPartsN *b, > return a; > } > > - if (flags & float_muladd_negate_c) { > - c->sign ^= 1; > + if (unlikely(ab_mask == float_cmask_infzero)) { > + /* Inf * Zero == NaN */ > + float_raise(float_flag_invalid | float_flag_invalid_imz, s); > + goto d_nan; > } > > - /* Compute the sign of the product into A. */ > - a->sign ^= b->sign; > - if (flags & float_muladd_negate_product) { > - a->sign ^= 1; > - } > - > - if (unlikely(!cmask_is_only_normals(ab_mask))) { > - if (unlikely(ab_mask == float_cmask_infzero)) { > - float_raise(float_flag_invalid | float_flag_invalid_imz, s); > + if (unlikely(ab_mask & float_cmask_inf)) { > + if ((c_mask & float_cmask_inf) && p_sign != c_sign) { > + /* Inf - Inf == NaN */ > + float_raise(float_flag_invalid | float_flag_invalid_isi, s); > goto d_nan; > } > - > - if (ab_mask & float_cmask_inf) { > - if (c->cls == float_class_inf && a->sign != c->sign) { > - float_raise(float_flag_invalid | float_flag_invalid_isi, s); > - goto d_nan; > - } > - goto return_inf; > - } > - > - g_assert(ab_mask & float_cmask_zero); > - if (is_anynorm(c->cls)) { > - *a = *c; > - goto finish_sign; > - } > - if (c->cls == float_class_zero) { > - if (flags & float_muladd_suppress_add_product_zero) { > - a->sign = c->sign; > - } else if (a->sign != c->sign) { > - goto return_sub_zero; > - } > - goto return_zero; > - } > - g_assert(c->cls == float_class_inf); > + /* Inf + C == Inf */ > + record_denormals_used(abc_mask, s); > + a->sign = p_sign; > + a->cls = float_class_inf; > + return a; > } > - > - if (unlikely(c->cls == float_class_inf)) { > - a->sign = c->sign; > - goto return_inf; > - } > - > - /* Perform the multiplication step. */ > - p_widen.sign = a->sign; > - p_widen.exp = a->exp + b->exp + 1; > - fracN(mulw)(&p_widen, a, b); > - if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { > - fracW(add)(&p_widen, &p_widen, &p_widen); > - p_widen.exp -= 1; > - } > - > - /* Perform the addition step. */ > - if (c->cls != float_class_zero) { > - /* Zero-extend C to less significant bits. */ > - fracN(widen)(&c_widen, c); > - c_widen.exp = c->exp; > - > - if (a->sign == c->sign) { > - partsW(add_normal)(&p_widen, &c_widen); > - } else if (!partsW(sub_normal)(&p_widen, &c_widen)) { > - goto return_sub_zero; > - } > - } > - > - /* Narrow with sticky bit, for proper rounding later. */ > - fracN(truncjam)(a, &p_widen); > - a->sign = p_widen.sign; > - a->exp = p_widen.exp; > - > - finish_sign: > - /* > - * All result types except for "return the default NaN > - * because this is an Invalid Operation" go through here; > - * this matches the set of cases where we consumed a > - * denormal input. > - */ > record_denormals_used(abc_mask, s); > - return a; > + > + /* Only remaining case is zero product. */ > + assert(ab_mask & float_cmask_zero);
The patch is a bit awkward to read, but looking at the code when the patch is applied, don't we trip this assert for the "normal * normal + Inf" case ? The rearranged function checks for: * A, B normal, C normal or zero * any of A,B,C are a NaN * A, B are Inf and 0 in some order * A or B are Inf and (normal, normal, inf) gets past all those. thnaks -- PMM
