Re: [RFC PATCH 6/8] fpu/softfloat: define operation for bfloat16

2020-07-13 Thread Richard Henderson
On 7/12/20 4:45 PM, LIU Zhiwei wrote:
> @@ -3039,7 +3143,7 @@ static FloatParts scalbn_decomposed
>  return return_nan(a, s);
>  }
>  if (a.cls == float_class_normal) {
> -/* The largest float type (even though not supported by FloatParts)
> +/* The largest float type (even though nt supported by FloatParts)

Oops.

Otherwise,
Reviewed-by: Richard Henderson 


r~



[RFC PATCH 6/8] fpu/softfloat: define operation for bfloat16

2020-07-12 Thread LIU Zhiwei
Signed-off-by: LIU Zhiwei 
---
 fpu/softfloat.c | 146 +++-
 include/fpu/softfloat.h |  44 
 2 files changed, 189 insertions(+), 1 deletion(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 54fc889446..9a58107be3 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1182,6 +1182,28 @@ float64_sub(float64 a, float64 b, float_status *s)
 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
 }
 
+/*
+ * Returns the result of adding or subtracting the brain floating-point
+ * values `a' and `b'.
+ */
+bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status 
*status)
+{
+FloatParts pa = bfloat16_unpack_canonical(a, status);
+FloatParts pb = bfloat16_unpack_canonical(b, status);
+FloatParts pr = addsub_floats(pa, pb, false, status);
+
+return bfloat16_round_pack_canonical(pr, status);
+}
+
+bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status 
*status)
+{
+FloatParts pa = bfloat16_unpack_canonical(a, status);
+FloatParts pb = bfloat16_unpack_canonical(b, status);
+FloatParts pr = addsub_floats(pa, pb, true, status);
+
+return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Returns the result of multiplying the floating-point values `a' and
  * `b'. The operation is performed according to the IEC/IEEE Standard
@@ -1284,6 +1306,20 @@ float64_mul(float64 a, float64 b, float_status *s)
 f64_is_zon2, f64_addsubmul_post);
 }
 
+/*
+ * Returns the result of multiplying the brain floating-point
+ * values `a' and `b'.
+ */
+
+bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status 
*status)
+{
+FloatParts pa = bfloat16_unpack_canonical(a, status);
+FloatParts pb = bfloat16_unpack_canonical(b, status);
+FloatParts pr = mul_floats(pa, pb, status);
+
+return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Returns the result of multiplying the floating-point values `a' and
  * `b' then adding 'c', with no intermediate rounding step after the
@@ -1666,6 +1702,23 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int 
flags, float_status *s)
 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
 }
 
+/*
+ * Returns the result of multiplying the brain floating-point values `a'
+ * and `b' then adding 'c', with no intermediate rounding step after the
+ * multiplication.
+ */
+
+bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
+  int flags, float_status *status)
+{
+FloatParts pa = bfloat16_unpack_canonical(a, status);
+FloatParts pb = bfloat16_unpack_canonical(b, status);
+FloatParts pc = bfloat16_unpack_canonical(c, status);
+FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+
+return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Returns the result of dividing the floating-point value `a' by the
  * corresponding value `b'. The operation is performed according to
@@ -1832,6 +1885,20 @@ float64_div(float64 a, float64 b, float_status *s)
 f64_div_pre, f64_div_post);
 }
 
+/*
+ * Returns the result of dividing the brain floating-point
+ * value `a' by the corresponding value `b'.
+ */
+
+bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
+{
+FloatParts pa = bfloat16_unpack_canonical(a, status);
+FloatParts pb = bfloat16_unpack_canonical(b, status);
+FloatParts pr = div_floats(pa, pb, status);
+
+return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Float to Float conversions
  *
@@ -2871,6 +2938,25 @@ MINMAX(64, maxnummag, false, true, true)
 
 #undef MINMAX
 
+#define BF16_MINMAX(name, ismin, isiee, ismag)  \
+bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
+{   \
+FloatParts pa = bfloat16_unpack_canonical(a, s);\
+FloatParts pb = bfloat16_unpack_canonical(b, s);\
+FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);  \
+\
+return bfloat16_round_pack_canonical(pr, s);\
+}
+
+BF16_MINMAX(min, true, false, false)
+BF16_MINMAX(minnum, true, true, false)
+BF16_MINMAX(minnummag, true, true, true)
+BF16_MINMAX(max, false, false, false)
+BF16_MINMAX(maxnum, false, true, false)
+BF16_MINMAX(maxnummag, false, true, true)
+
+#undef BF16_MINMAX
+
 /* Floating point compare */
 static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
 float_status *s)
@@ -3032,6 +3118,24 @@ FloatRelation float64_compare_quiet(float64 a, float64 
b, float_status *s)
 return f64_compare(a, b, true, s);
 }
 
+static int QEMU_FLATTEN
+soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
+{
+FloatParts pa =