Rather than compare the fractions before subtracting, do the subtract and examine the result, possibly negating it.
Looking toward re-using addsub_floats(N**2) for the addition stage of muladd_floats(N), this will important because of the longer fraction sizes. Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- fpu/softfloat.c | 4 ++++ fpu/softfloat-parts.c.inc | 32 ++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 294c573fb9..bf808a1b74 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -732,6 +732,7 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, #define EQ0(P) ((P) == 0) #define EQ(P1, P2) ((P1) == (P2)) #define GEU(P1, P2) ((P1) >= (P2)) +#define NEG(P) (-(P)) #define OR(P1, P2) ((P1) | (P2)) #define SHL(P, C) ((P) << (C)) #define SHR(P, C) ((P) >> (C)) @@ -755,6 +756,7 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, #undef EQ0 #undef EQ #undef GEU +#undef NEG #undef OR #undef SHL #undef SHR @@ -777,6 +779,7 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, #define EQ0(P) (!int128_nz(P)) #define EQ(P1, P2) int128_eq(P1, P2) #define GEU(P1, P2) int128_geu(P1, P2) +#define NEG(P) int128_neg(P) #define OR(P1, P2) int128_or(P1, P2) #define SHL(P, C) int128_shl(P, C) #define SHR(P, C) int128_shr(P, C) @@ -801,6 +804,7 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, #undef EQ0 #undef EQ #undef GEU +#undef NEG #undef SHL #undef SHR #undef SHR_JAM diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc index d2b6454903..9762cf8b66 100644 --- a/fpu/softfloat-parts.c.inc +++ b/fpu/softfloat-parts.c.inc @@ -254,29 +254,37 @@ FUNC(addsub_floats)(PARTS_TYPE a, PARTS_TYPE b, /* Subtraction */ if (likely(ab_mask == float_cmask_normal)) { - if (a.exp > b.exp || (a.exp == b.exp && GEU(a.frac, b.frac))) { - b.frac = SHR_JAM(b.frac, a.exp - b.exp); + int shift, diff_exp = a.exp - b.exp; + + if (diff_exp > 0) { + b.frac = SHR_JAM(b.frac, diff_exp); a.frac = SUB(a.frac, b.frac); - } else { - a.frac = SHR_JAM(a.frac, b.exp - a.exp); + } else if (diff_exp < 0) { + a.frac = SHR_JAM(a.frac, -diff_exp); a.frac = SUB(b.frac, a.frac); a.exp = b.exp; a.sign ^= 1; + } else { + a.frac = SUB(b.frac, a.frac); + /* a.frac < b.frac results in carry into the overflow bit. */ + if (HI(a.frac) & DECOMPOSED_OVERFLOW_BIT) { + a.frac = NEG(a.frac); + a.sign ^= 1; + } else if (EQ0(a.frac)) { + a.cls = float_class_zero; + goto sub_zero; + } } - if (EQ0(a.frac)) { - a.cls = float_class_zero; - a.sign = s->float_rounding_mode == float_round_down; - } else { - int shift = CLZ(a.frac) - 1; - a.frac = SHL(a.frac, shift); - a.exp -= shift; - } + shift = CLZ(a.frac) - 1; + a.frac = SHL(a.frac, shift); + a.exp -= shift; return a; } /* 0 - 0 */ if (ab_mask == float_cmask_zero) { + sub_zero: a.sign = s->float_rounding_mode == float_round_down; return a; } -- 2.25.1