On Mon, Feb 23, 2026 at 07:21:52PM +1100, Richard Henderson wrote: > From: Max Chou <[email protected]> > > Signed-off-by: Max Chou <[email protected]> > [rth: Split out of a larger patch] > Signed-off-by: Richard Henderson <[email protected]> > --- > include/fpu/softfloat-types.h | 5 +++ > include/fpu/softfloat.h | 8 ++++ > fpu/softfloat.c | 85 +++++++++++++++++++++++++++++++++++ > 3 files changed, 98 insertions(+) > > diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h > index 8f82fdfc97..be7e2de6e3 100644 > --- a/include/fpu/softfloat-types.h > +++ b/include/fpu/softfloat-types.h > @@ -119,6 +119,11 @@ typedef struct { > */ > typedef uint16_t bfloat16; > > +/* > + * Open Compute Project (OCP) Microscaling Formats > + */ > +typedef uint8_t float8_e5m2; > + > /* > * Software IEC/IEEE floating-point underflow tininess-detection mode. > */ > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 69b9041ef0..4385462992 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -189,6 +189,14 @@ float128 int128_to_float128(Int128, float_status > *status); > float128 uint64_to_float128(uint64_t, float_status *status); > float128 uint128_to_float128(Int128, float_status *status); > > +/*---------------------------------------------------------------------------- > +| OCP FP8 conversion routines. > +*----------------------------------------------------------------------------*/ > + > +bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status); > +float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool sat, float_status > *status); > +float8_e5m2 float32_to_float8_e5m2(float32, bool sat, float_status *status); > + > > /*---------------------------------------------------------------------------- > | Software half-precision conversion routines. > > *----------------------------------------------------------------------------*/ > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index b3c4104854..0dc769283d 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -572,6 +572,10 @@ typedef struct { > .frac_shift = (-F - 1) & 63, \ > .round_mask = (1ull << ((-F - 1) & 63)) - 1 > > +static const FloatFmt float8_e5m2_params = { > + FLOAT_PARAMS(5, 2) > +}; > + > static const FloatFmt float16_params = { > FLOAT_PARAMS(5, 10) > }; > @@ -627,6 +631,11 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt > *fmt, uint64_t raw) > }; > } > > +static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 > f) > +{ > + unpack_raw64(p, &float8_e5m2_params, f); > +} > + > static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f) > { > unpack_raw64(p, &float16_params, f); > @@ -684,6 +693,11 @@ static uint64_t pack_raw64(const FloatParts64 *p, const > FloatFmt *fmt) > return ret; > } > > +static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p) > +{ > + return pack_raw64(p, &float8_e5m2_params); > +} > + > static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p) > { > return make_float16(pack_raw64(p, &float16_params)); > @@ -1675,6 +1689,13 @@ static const uint16_t rsqrt_tab[128] = { > * Pack/unpack routines with a specific FloatFmt. > */ > > +static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f, > + float_status *s) > +{ > + float8_e5m2_unpack_raw(p, f); > + parts_canonicalize(p, s, &float8_e5m2_params); > +} > + > static void float16a_unpack_canonical(FloatParts64 *p, float16 f, > float_status *s, const FloatFmt > *params) > { > @@ -1695,6 +1716,14 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, > bfloat16 f, > parts_canonicalize(p, s, &bfloat16_params); > } > > +static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p, > + float_status *s, > + bool saturate) > +{ > + parts_uncanon(p, s, &float8_e5m2_params, saturate); > + return float8_e5m2_pack_raw(p); > +} > + > static float16 float16a_round_pack_canonical(FloatParts64 *p, > float_status *s, > const FloatFmt *params) > @@ -2772,6 +2801,35 @@ static void parts_float_to_ahp(FloatParts64 *a, > float_status *s) > } > } > > +static void parts_float_to_e5m2(FloatParts64 *a, float_status *s, bool > saturate) > +{ > + switch (a->cls) { > + case float_class_snan: > + case float_class_qnan: > + parts_return_nan(a, s); > + break; > + > + case float_class_inf: > + /* Per OCP, conversion in SATURATE mode bounds Inf to MAX. */ > + if (saturate) { > + a->cls = float_class_normal; > + a->exp = float8_e5m2_params.exp_max - 1; > + a->frac = MAKE_64BIT_MASK(float8_e5m2_params.frac_shift, > + float8_e5m2_params.frac_size + 1); > + } > + break; > + > + case float_class_denormal: > + float_raise(float_flag_input_denormal_used, s); > + break; > + case float_class_normal: > + case float_class_zero: > + break; > + default: > + g_assert_not_reached(); > + } > +} > + > static void parts64_float_to_float(FloatParts64 *a, float_status *s) > { > if (is_nan(a->cls)) { > @@ -2836,6 +2894,15 @@ static void parts_float_to_float_widen(FloatParts128 > *a, FloatParts64 *b, > } > } > > +bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s) > +{ > + FloatParts64 p; > + > + float8_e5m2_unpack_canonical(&p, a, s); > + parts_float_to_float(&p, s); > + return bfloat16_round_pack_canonical(&p, s); > +} > + > float32 float16_to_float32(float16 a, bool ieee, float_status *s) > { > const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; > @@ -2856,6 +2923,15 @@ float64 float16_to_float64(float16 a, bool ieee, > float_status *s) > return float64_round_pack_canonical(&p, s); > } > > +float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s) > +{ > + FloatParts64 p; > + > + float32_unpack_canonical(&p, a, s); > + parts_float_to_e5m2(&p, s, saturate); > + return float8_e5m2_round_pack_canonical(&p, s, saturate); > +} > + > float16 float32_to_float16(float32 a, bool ieee, float_status *s) > { > FloatParts64 p; > @@ -2923,6 +2999,15 @@ float32 float64_to_float32(float64 a, float_status *s) > return float32_round_pack_canonical(&p, s); > } > > +float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status > *s) > +{ > + FloatParts64 p; > + > + bfloat16_unpack_canonical(&p, a, s); > + parts_float_to_e5m2(&p, s, saturate); > + return float8_e5m2_round_pack_canonical(&p, s, saturate); > +} > + > float32 bfloat16_to_float32(bfloat16 a, float_status *s) > { > FloatParts64 p; > -- > 2.43.0 >
Reviewed-by: Chao Liu <[email protected]> Thanks, Chao
