On 1/9/26 02:16, Max Chou wrote:
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 8094358c2e..0c7f052ec0 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -544,6 +544,8 @@ typedef struct {
int frac_shift;
bool arm_althp;
bool has_explicit_bit;
+ bool ocpfp;
+ bool ocpfp_sat;
uint64_t round_mask;
} FloatFmt;
@@ -560,6 +562,28 @@ typedef struct {
.frac_shift = (-F - 1) & 63, \
.round_mask = (1ull << ((-F - 1) & 63)) - 1
+static const FloatFmt float8_e4m3_params = {
+ FLOAT_PARAMS(4, 3),
+ .ocpfp = true
+};
+
+static const FloatFmt float8_e4m3_params_sat = {
+ FLOAT_PARAMS(4, 3),
+ .ocpfp = true,
+ .ocpfp_sat = true
+};
+
+static const FloatFmt float8_e5m2_params = {
+ FLOAT_PARAMS(5, 2),
+ .ocpfp = true
+};
+
+static const FloatFmt float8_e5m2_params_sat = {
+ FLOAT_PARAMS(5, 2),
+ .ocpfp = true,
+ .ocpfp_sat = true
+};
Saturation is not part of the format, it's part of the conversion operation.
I suggest you pass that as a bool parameter to bfloat16_to_float8_e4m3 etc.
This would then be handled as part of round-and-pack, maybe a separate step, maybe via
float_round_nearest_even_max.
I'm not sure what to do with arm_althp vs ocpfp. It seems like they have a couple of
things in common. Perhaps we should decompose these to separate behavior flags.
r~