Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/tcg/helper-fp8-defs.h | 2 +
target/arm/tcg/fp8_helper.c | 87 ++++++++++++++++++++++++++++++++
target/arm/tcg/translate-sve.c | 3 ++
target/arm/tcg/sve.decode | 2 +
4 files changed, 94 insertions(+)
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
index b5dc2b7064..bbc8d69e28 100644
--- a/target/arm/tcg/helper-fp8-defs.h
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -12,3 +12,5 @@ DEF_HELPER_FLAGS_4(advsimd_fcvtl_hb, TCG_CALL_NO_RWG, void,
ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sve2_fcvt_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_fcvt_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_fcvtl_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(sve2_bfcvtn_bh, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
index ec802ed391..8d01393ff9 100644
--- a/target/arm/tcg/fp8_helper.c
+++ b/target/arm/tcg/fp8_helper.c
@@ -84,6 +84,16 @@ static FP8Context fp8_src_start(CPUARMState *env, uint32_t
desc, int scale_mask)
return fp8_start(env, desc, f8fmt, scale);
}
+static FP8Context fp8_dst_start(CPUARMState *env, uint32_t desc, bool is_f16)
+{
+ uint64_t fpmr = env->vfp.fpmr;
+ FPMRType f8fmt = FIELD_EX64(fpmr, FPMR, F8D);
+ int scale = (is_f16
+ ? FIELD_SEX64(fpmr, FPMR, NSCALE_F16)
+ : FIELD_SEX64(fpmr, FPMR, NSCALE));
+
+ return fp8_start(env, desc, f8fmt, scale);
+}
static FloatParts64 fp8_invalid_input(uint8_t x, float_status *s)
{
@@ -119,6 +129,60 @@ static float16 fcvt_fp8_to_f16(uint8_t x, fp8_input_fn
*f8fmt,
return float16_round_pack_canonical(&p, s);
}
+typedef uint8_t fcvt_fp8_output_fn(FloatParts64 *, int, bool, float_status *);
+
+static uint8_t fcvt_fp8_invalid_output(FloatParts64 *p, int scale,
+ bool saturate, float_status *s)
+{
+ /* Invalid output format writes -1 and raises invalid. */
+ float_raise(float_flag_invalid, s);
+ return 0xff;
+}
+
+static uint8_t fcvt_fp8_e4m3_output(FloatParts64 *p, int scale,
+ bool saturate, float_status *s)
+{
+ *p = parts64_scalbn(p, scale, s);
+ /*
+ * Saturating Inf -> Max handled in uncanon_e4m3_overflow
+ * because there is no infinity encoding.
+ */
+ return float8_e4m3_round_pack_canonical(p, s, saturate);
+}
+
+static uint8_t fcvt_fp8_e5m2_output(FloatParts64 *p, int scale,
+ bool saturate, float_status *s)
+{
+ /*
+ * Because e5m2 has an infinity encoding, we need to handle
+ * conversion of Inf -> Max manually. This will be converted
+ * to the actual maximum value during rounding.
+ */
+ if (unlikely(p->cls == float_class_inf)) {
+ if (saturate) {
+ p->cls = float_class_normal;
+ p->exp = INT_MAX;
+ p->frac = -1;
+ }
+ } else {
+ *p = parts64_scalbn(p, scale, s);
+ }
+ return float8_e5m2_round_pack_canonical(p, s, saturate);
+}
+
+static fcvt_fp8_output_fn * const fcvt_fp8_output_fmt[8] = {
+ [0 ... 7] = fcvt_fp8_invalid_output,
+ [OFP8_E5M2] = fcvt_fp8_e5m2_output,
+ [OFP8_E4M3] = fcvt_fp8_e4m3_output,
+};
+
+static uint8_t fcvt_b16_to_fp8(bfloat16 x, fcvt_fp8_output_fn *f8fmt,
+ int scale, bool saturate, float_status *s)
+{
+ FloatParts64 p = bfloat16_unpack_canonical(x, s);
+ return f8fmt(&p, scale, saturate, s);
+}
+
void HELPER(advsimd_bfcvtl)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
{
FP8Context ctx = fp8_src_start(env, desc, 0x3f);
@@ -285,3 +349,26 @@ void HELPER(sme2_fcvtl_hb)(void *vd, void *vn, CPUARMState
*env, uint32_t desc)
fp8_finish(env, &ctx);
}
+
+void HELPER(sve2_bfcvtn_bh)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
+{
+ FP8Context ctx = fp8_dst_start(env, desc, false);
+ fcvt_fp8_output_fn *output_fmt = fcvt_fp8_output_fmt[ctx.f8fmt];
+ uint16_t *n0 = vn;
+ uint16_t *n1 = vn + sizeof(ARMVectorReg);
+ uint8_t *d = vd;
+ size_t oprsz = simd_oprsz(desc);
+ size_t nelem = oprsz / 2;
+ bool osc = FIELD_EX64(env->vfp.fpmr, FPMR, OSC);
+
+ for (size_t i = 0; i < nelem; ++i) {
+ bfloat16 e0 = n0[H2(i)];
+ bfloat16 e1 = n1[H2(i)];
+ d[H1(2 * i + 0)] = fcvt_b16_to_fp8(e0, output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ d[H1(2 * i + 1)] = fcvt_b16_to_fp8(e1, output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+}
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 5200f3d034..7276d9c44a 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -4099,6 +4099,9 @@ TRANS_FEAT(BF1CVTLT, aa64_sme2_or_sve2_f8cvt, do_f8cvt, a,
TRANS_FEAT(BF2CVTLT, aa64_sme2_or_sve2_f8cvt, do_f8cvt, a,
gen_helper_sve2_bfcvt, true, true)
+TRANS_FEAT(BFCVTN, aa64_sme2_or_sve2_f8cvt, do_f8cvt,
+ a, gen_helper_sve2_bfcvtn_bh, false, false)
+
/*
*** SVE Floating Point Compare with Zero Group
*/
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index ca110f4bc1..b6ef8ed8de 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -1101,6 +1101,8 @@ BF2CVT 01100101 00 001 000 001111 ..... .....
@rd_rn_e0
BF1CVTLT 01100101 00 001 001 001110 ..... ..... @rd_rn_e0
BF2CVTLT 01100101 00 001 001 001111 ..... ..... @rd_rn_e0
+BFCVTN 01100101 00 001 010 001110 ....0 ..... @rd_rnx2 esz=1
+
### SVE FP Compare with Zero Group
FCMGE_ppz0 01100101 .. 0100 00 001 ... ..... 0 .... @pd_pg_rn
--
2.43.0