On Mon, Feb 23, 2026 at 07:21:53PM +1100, Richard Henderson wrote: > From: Max Chou <[email protected]> > > Signed-off-by: Max Chou <[email protected]> > [rth: Split out of a larger patch; adjust overflow detection.] > Signed-off-by: Richard Henderson <[email protected]> > --- > include/fpu/softfloat-types.h | 1 + > include/fpu/softfloat.h | 4 +++ > fpu/softfloat.c | 62 +++++++++++++++++++++++++++++++++++ > fpu/softfloat-parts.c.inc | 45 +++++++++++++++++++++++-- > 4 files changed, 109 insertions(+), 3 deletions(-) > > diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h > index be7e2de6e3..9c84a101e5 100644 > --- a/include/fpu/softfloat-types.h > +++ b/include/fpu/softfloat-types.h > @@ -122,6 +122,7 @@ typedef uint16_t bfloat16; > /* > * Open Compute Project (OCP) Microscaling Formats > */ > +typedef uint8_t float8_e4m3; > typedef uint8_t float8_e5m2; > > /* > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 4385462992..31d3f76d3f 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -193,6 +193,10 @@ float128 uint128_to_float128(Int128, float_status > *status); > | OCP FP8 conversion routines. > > *----------------------------------------------------------------------------*/ > > +bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status); > +float8_e4m3 bfloat16_to_float8_e4m3(bfloat16, bool sat, float_status > *status); > +float8_e4m3 float32_to_float8_e4m3(float32, bool sat, float_status *status); > + > bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status); > float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool sat, float_status > *status); > float8_e5m2 float32_to_float8_e5m2(float32, bool sat, float_status *status); > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 0dc769283d..6e21882ab2 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -528,6 +528,8 @@ typedef enum __attribute__((__packed__)) { > float_expmax_ieee, > /* exp==max is a normal number; no infinity or nan representation. */ > float_expmax_normal, > + /* exp==max, frac==max ? nan : normal; no infinity representation. */ > + float_expmax_e4m3, > } FloatFmtExpMaxKind; > > /* > @@ -572,6 +574,14 @@ typedef struct { > .frac_shift = (-F - 1) & 63, \ > .round_mask = (1ull << ((-F - 1) & 63)) - 1 > > +static const FloatFmt float8_e4m3_params = { > + FLOAT_PARAMS(4, 3), > + .exp_max_kind = float_expmax_e4m3 > +}; > + > +/* 110 << frac_shift, with the implicit bit set */ > +#define E4M3_NORMAL_FRAC_MAX 0xe000000000000000ull > + > static const FloatFmt float8_e5m2_params = { > FLOAT_PARAMS(5, 2) > }; > @@ -631,6 +641,11 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt > *fmt, uint64_t raw) > }; > } > > +static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 > f) > +{ > + unpack_raw64(p, &float8_e4m3_params, f); > +} > + > static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 > f) > { > unpack_raw64(p, &float8_e5m2_params, f); > @@ -693,6 +708,11 @@ static uint64_t pack_raw64(const FloatParts64 *p, const > FloatFmt *fmt) > return ret; > } > > +static float8_e4m3 QEMU_FLATTEN float8_e4m3_pack_raw(const FloatParts64 *p) > +{ > + return pack_raw64(p, &float8_e4m3_params); > +} > + > static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p) > { > return pack_raw64(p, &float8_e5m2_params); > @@ -1689,6 +1709,13 @@ static const uint16_t rsqrt_tab[128] = { > * Pack/unpack routines with a specific FloatFmt. > */ > > +static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f, > + float_status *s) > +{ > + float8_e4m3_unpack_raw(p, f); > + parts_canonicalize(p, s, &float8_e4m3_params); > +} > + > static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f, > float_status *s) > { > @@ -1716,6 +1743,14 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, > bfloat16 f, > parts_canonicalize(p, s, &bfloat16_params); > } > > +static float8_e4m3 float8_e4m3_round_pack_canonical(FloatParts64 *p, > + float_status *s, > + bool saturate) > +{ > + parts_uncanon(p, s, &float8_e4m3_params, saturate); > + return float8_e4m3_pack_raw(p); > +} > + > static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p, > float_status *s, > bool saturate) > @@ -2894,6 +2929,15 @@ static void parts_float_to_float_widen(FloatParts128 > *a, FloatParts64 *b, > } > } > > +bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s) > +{ > + FloatParts64 p; > + > + float8_e4m3_unpack_canonical(&p, a, s); > + parts_float_to_float(&p, s); > + return bfloat16_round_pack_canonical(&p, s); > +} > + > bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s) > { > FloatParts64 p; > @@ -2923,6 +2967,15 @@ float64 float16_to_float64(float16 a, bool ieee, > float_status *s) > return float64_round_pack_canonical(&p, s); > } > > +float8_e4m3 float32_to_float8_e4m3(float32 a, bool saturate, float_status *s) > +{ > + FloatParts64 p; > + > + float32_unpack_canonical(&p, a, s); > + parts_float_to_float(&p, s); > + return float8_e4m3_round_pack_canonical(&p, s, saturate); > +} > + > float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s) > { > FloatParts64 p; > @@ -2999,6 +3052,15 @@ float32 float64_to_float32(float64 a, float_status *s) > return float32_round_pack_canonical(&p, s); > } > > +float8_e4m3 bfloat16_to_float8_e4m3(bfloat16 a, bool saturate, float_status > *s) > +{ > + FloatParts64 p; > + > + bfloat16_unpack_canonical(&p, a, s); > + parts_float_to_float(&p, s); > + return float8_e4m3_round_pack_canonical(&p, s, saturate); > +} > + > float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status > *s) > { > FloatParts64 p; > diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc > index 09be686645..61b07307bf 100644 > --- a/fpu/softfloat-parts.c.inc > +++ b/fpu/softfloat-parts.c.inc > @@ -242,6 +242,15 @@ static void partsN(canonicalize)(FloatPartsN *p, > float_status *status, > return; > case float_expmax_normal: > break; > + case float_expmax_e4m3: > + if (p->frac_hi == 0b111) { > + frac_shl(p, fmt->frac_shift); > + p->cls = (parts_is_snan_frac(p->frac_hi, status) > + ? float_class_snan : float_class_qnan); > + return; > + } > + /* otherwise normal */ > + break; > default: > g_assert_not_reached(); > } > @@ -262,6 +271,21 @@ static void partsN(canonicalize)(FloatPartsN *p, > float_status *status, > * The saturate parameter controls saturation behavior for formats that > * support it -- when true, overflow produces max normal instead of infinity. > */ > + > +/* Helper for uncanon_normal and uncanon, for FP8 E4M3. */ > +static void partsN(uncanon_e4m3_overflow)(FloatPartsN *p, float_status *s, > + const FloatFmt *fmt, bool saturate) > +{ > + assert(N == 64); > + float_raise(float_flag_overflow | float_flag_inexact, s); > + if (saturate) { > + p->exp = fmt->exp_max; > + p->frac_hi = E4M3_NORMAL_FRAC_MAX; > + } else { > + parts_default_nan(p, s); > + } > +} > + > static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, > const FloatFmt *fmt, bool saturate) > { > @@ -360,6 +384,12 @@ static void partsN(uncanon_normal)(FloatPartsN *p, > float_status *s, > } > break; > > + case float_expmax_e4m3: > + if (exp > exp_max || p->frac_hi > E4M3_NORMAL_FRAC_MAX) { > + partsN(uncanon_e4m3_overflow)(p, s, fmt, overflow_norm); > + } > + break; > + > default: > g_assert_not_reached(); > } > @@ -459,9 +489,18 @@ static void partsN(uncanon)(FloatPartsN *p, float_status > *s, > frac_clear(p); > return; > case float_class_inf: > - assert(fmt->exp_max_kind == float_expmax_ieee); > - p->exp = fmt->exp_max; > - frac_clear(p); > + switch (fmt->exp_max_kind) { > + case float_expmax_ieee: > + p->exp = fmt->exp_max; > + frac_clear(p); > + break; > + case float_expmax_e4m3: > + partsN(uncanon_e4m3_overflow)(p, s, fmt, saturate); > + break; > + case float_expmax_normal: > + default: > + g_assert_not_reached(); > + } > return; > case float_class_qnan: > case float_class_snan: > -- > 2.43.0 >
Reviewed-by: Chao Liu <[email protected]> Thanks, Chao
