Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/tcg/helper-fp8-defs.h | 2 ++
target/arm/tcg/fp8_helper.c | 47 ++++++++++++++++++++++++++++++++
target/arm/tcg/translate-sve.c | 4 +++
target/arm/tcg/sve.decode | 2 ++
4 files changed, 55 insertions(+)
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
index e67fb191c2..5863a6dbb8 100644
--- a/target/arm/tcg/helper-fp8-defs.h
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -19,3 +19,5 @@ DEF_HELPER_FLAGS_5(gvec_fcvt_bh, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sve2_fcvtn_bh, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(advsimd_fcvt_bs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env,
i32)
+DEF_HELPER_FLAGS_4(sve2_fcvtnb_bs, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(sve2_fcvtnt_bs, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
index c32577a9e4..e1d255f864 100644
--- a/target/arm/tcg/fp8_helper.c
+++ b/target/arm/tcg/fp8_helper.c
@@ -465,3 +465,50 @@ void HELPER(advsimd_fcvt_bs)(void *vd, void *vn, void *vm,
fp8_finish(env, &ctx);
clear_tail(vd, ctx.high ? 16 : 8, simd_maxsz(desc));
}
+
+void HELPER(sve2_fcvtnb_bs)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
+{
+ FP8Context ctx = fp8_dst_start(env, desc, false);
+ fcvt_fp8_output_fn *output_fmt = fcvt_fp8_output_fmt[ctx.f8fmt];
+ uint32_t *n0 = vn;
+ uint32_t *n1 = vn + sizeof(ARMVectorReg);
+ uint16_t *d = vd;
+ bool osc = FIELD_EX64(env->vfp.fpmr, FPMR, OSC);
+ size_t oprsz = simd_oprsz(desc);
+ size_t nelem = oprsz / 4;
+
+ for (size_t i = 0; i < nelem; ++i) {
+ float32 e0 = n0[H4(i)];
+ float32 e1 = n1[H4(i)];
+ /* Zero-extend uint8_t to clear the odd lanes. */
+ d[H2(2 * i + 0)] = fcvt_f32_to_fp8(e0, output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ d[H2(2 * i + 1)] = fcvt_f32_to_fp8(e1, output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+}
+
+void HELPER(sve2_fcvtnt_bs)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
+{
+ FP8Context ctx = fp8_dst_start(env, desc, false);
+ fcvt_fp8_output_fn *output_fmt = fcvt_fp8_output_fmt[ctx.f8fmt];
+ uint32_t *n0 = vn;
+ uint32_t *n1 = vn + sizeof(ARMVectorReg);
+ uint8_t *d = vd;
+ bool osc = FIELD_EX64(env->vfp.fpmr, FPMR, OSC);
+ size_t oprsz = simd_oprsz(desc);
+ size_t nelem = oprsz / 4;
+
+ for (size_t i = 0; i < nelem; ++i) {
+ float32 e0 = n0[H4(i)];
+ float32 e1 = n1[H4(i)];
+ d[H1(4 * i + 1)] = fcvt_f32_to_fp8(e0, output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ d[H1(4 * i + 3)] = fcvt_f32_to_fp8(e1, output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+}
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index c7fcf27183..13f7ab01af 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -4103,6 +4103,10 @@ TRANS_FEAT(FCVTN, aa64_sme2_or_sve2_f8cvt, do_f8cvt,
a, gen_helper_sve2_fcvtn_bh, false, false)
TRANS_FEAT(BFCVTN, aa64_sme2_or_sve2_f8cvt, do_f8cvt,
a, gen_helper_sve2_bfcvtn_bh, false, false)
+TRANS_FEAT(FCVTNB, aa64_sme2_or_sve2_f8cvt, do_f8cvt,
+ a, gen_helper_sve2_fcvtnb_bs, false, false)
+TRANS_FEAT(FCVTNT, aa64_sme2_or_sve2_f8cvt, do_f8cvt,
+ a, gen_helper_sve2_fcvtnt_bs, false, false)
/*
*** SVE Floating Point Compare with Zero Group
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index 806953bc35..72755b27af 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -1103,6 +1103,8 @@ BF2CVTLT 01100101 00 001 001 001111 ..... .....
@rd_rn_e0
FCVTN 01100101 00 001 010 001100 ....0 ..... @rd_rnx2 esz=1
BFCVTN 01100101 00 001 010 001110 ....0 ..... @rd_rnx2 esz=1
+FCVTNB 01100101 00 001 010 001101 ....0 ..... @rd_rnx2 esz=2
+FCVTNT 01100101 00 001 010 001111 ....0 ..... @rd_rnx2 esz=2
### SVE FP Compare with Zero Group
--
2.43.0