Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/tcg/helper-fp8-defs.h | 2 ++
target/arm/tcg/fp8_helper.c | 33 ++++++++++++++++++++++++++++++++
target/arm/tcg/translate-a64.c | 16 ++++++++++++++++
target/arm/tcg/a64.decode | 1 +
4 files changed, 52 insertions(+)
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
index 6530d1a6da..023a49e12f 100644
--- a/target/arm/tcg/helper-fp8-defs.h
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -16,3 +16,5 @@ DEF_HELPER_FLAGS_4(sme2_fcvtl_hb, TCG_CALL_NO_RWG, void, ptr,
ptr, env, i32)
DEF_HELPER_FLAGS_4(sve2_bfcvtn_bh, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_fcvt_bh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env,
i32)
+
+DEF_HELPER_FLAGS_5(advsimd_fcvt_bs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env,
i32)
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
index e209860a8f..2252d2c526 100644
--- a/target/arm/tcg/fp8_helper.c
+++ b/target/arm/tcg/fp8_helper.c
@@ -182,6 +182,13 @@ static uint8_t fcvt_f16_to_fp8(float16 x,
fcvt_fp8_output_fn *f8fmt,
return f8fmt(&p, scale, saturate, s);
}
+static uint8_t fcvt_f32_to_fp8(float32 x, fcvt_fp8_output_fn *f8fmt,
+ int scale, bool saturate, float_status *s)
+{
+ FloatParts64 p = float32_unpack_canonical(x, s);
+ return f8fmt(&p, scale, saturate, s);
+}
+
void HELPER(advsimd_bfcvtl)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
{
FP8Context ctx = fp8_src_start(env, desc, 0x3f);
@@ -401,3 +408,29 @@ void HELPER(gvec_fcvt_bh)(void *vd, void *vn, void *vm,
fp8_finish(env, &ctx);
clear_tail(vd, oprsz, simd_maxsz(desc));
}
+
+void HELPER(advsimd_fcvt_bs)(void *vd, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc)
+{
+ FP8Context ctx = fp8_dst_start(env, desc);
+ fcvt_fp8_output_fn *output_fmt = fcvt_fp8_output_fmt[ctx.f8fmt];
+ uint32_t *n = vn, *m = vm, scratch[4];
+ uint8_t *d = vd + 8 * ctx.high;
+ bool osc = FIELD_EX64(env->vfp.fpmr, FPMR, OSC);
+
+ if (vd == vm) {
+ m = memcpy(scratch, vm, 16);
+ }
+
+ for (size_t i = 0; i < 4; ++i) {
+ d[H1(i + 0)] = fcvt_f32_to_fp8(n[H4(i)], output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ }
+ for (size_t i = 0; i < 4; ++i) {
+ d[H1(i + 4)] = fcvt_f32_to_fp8(m[H4(i)], output_fmt,
+ ctx.scale, osc, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+ clear_tail(vd, ctx.high ? 16 : 8, simd_maxsz(desc));
+}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 0927eb6516..3c784afc99 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6537,6 +6537,22 @@ static bool trans_FCVTN_bh(DisasContext *s, arg_qrrr_e
*a)
return true;
}
+static bool trans_FCVTN_bs(DisasContext *s, arg_qrrr_e *a)
+{
+ if (!dc_isar_feature(aa64_f8cvt, s)) {
+ return false;
+ }
+ if (fpmr_access_check(s) && fp_access_check(s)) {
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ tcg_env, 16, vec_full_reg_size(s),
+ (a->q << 1) | FPST_A64 << 2,
+ gen_helper_advsimd_fcvt_bs);
+ }
+ return true;
+}
+
static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
{
if (fp_access_check(s)) {
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 71456d44e1..a9cf259b9b 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1202,6 +1202,7 @@ FSCALE 0.10 1110 110 ..... 00111 1 ..... .....
@qrrr_h
FSCALE 0.10 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd
FCVTN_bh 0.00 1110 010 ..... 11110 1 ..... ..... @qrrr_h
+FCVTN_bs 0.00 1110 000 ..... 11110 1 ..... ..... @qrrr_h
### Advanced SIMD scalar x indexed element
--
2.43.0