On 1/9/26 02:16, Max Chou wrote:
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 8094358c2e..0c7f052ec0 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -544,6 +544,8 @@ typedef struct {
      int frac_shift;
      bool arm_althp;
      bool has_explicit_bit;
+    bool ocpfp;
+    bool ocpfp_sat;
      uint64_t round_mask;
  } FloatFmt;
@@ -560,6 +562,28 @@ typedef struct {
      .frac_shift     = (-F - 1) & 63,                    \
      .round_mask     = (1ull << ((-F - 1) & 63)) - 1
+static const FloatFmt float8_e4m3_params = {
+    FLOAT_PARAMS(4, 3),
+    .ocpfp = true
+};
+
+static const FloatFmt float8_e4m3_params_sat = {
+    FLOAT_PARAMS(4, 3),
+    .ocpfp = true,
+    .ocpfp_sat = true
+};
+
+static const FloatFmt float8_e5m2_params = {
+    FLOAT_PARAMS(5, 2),
+    .ocpfp = true
+};
+
+static const FloatFmt float8_e5m2_params_sat = {
+    FLOAT_PARAMS(5, 2),
+    .ocpfp = true,
+    .ocpfp_sat = true
+};

Saturation is not part of the format, it's part of the conversion operation.

I suggest you pass that as a bool parameter to bfloat16_to_float8_e4m3 etc.
This would then be handled as part of round-and-pack, maybe a separate step, maybe via float_round_nearest_even_max.

I'm not sure what to do with arm_althp vs ocpfp. It seems like they have a couple of things in common. Perhaps we should decompose these to separate behavior flags.


r~

Reply via email to