Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/tcg/helper-fp8-defs.h | 2 ++
target/arm/tcg/fp8_helper.c | 29 +++++++++++++++++++++++++++++
target/arm/tcg/translate-a64.c | 3 +++
target/arm/tcg/a64.decode | 3 +++
4 files changed, 37 insertions(+)
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
index 966f83d796..718463422b 100644
--- a/target/arm/tcg/helper-fp8-defs.h
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -7,3 +7,5 @@ DEF_HELPER_FLAGS_4(advsimd_bfcvtl, TCG_CALL_NO_RWG, void, ptr,
ptr, env, i32)
DEF_HELPER_FLAGS_4(sve2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_bfcvt_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sme2_bfcvtl_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(advsimd_fcvtl_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
index 427ec4cc3d..90f17f9e10 100644
--- a/target/arm/tcg/fp8_helper.c
+++ b/target/arm/tcg/fp8_helper.c
@@ -105,6 +105,14 @@ static bfloat16 fcvt_fp8_to_b16(uint8_t x, fp8_input_fn
*f8fmt,
return bfloat16_round_pack_canonical(&p, s);
}
+static float16 fcvt_fp8_to_f16(uint8_t x, fp8_input_fn *f8fmt,
+ int scale, float_status *s)
+{
+ FloatParts64 p = f8fmt(x, s);
+ p = parts64_scalbn(&p, scale, s);
+ return float16_round_pack_canonical(&p, s);
+}
+
void HELPER(advsimd_bfcvtl)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
{
FP8Context ctx = fp8_src_start(env, desc, 0x3f);
@@ -125,6 +133,27 @@ void HELPER(advsimd_bfcvtl)(void *vd, void *vn,
CPUARMState *env, uint32_t desc)
clear_tail(vd, 16, simd_maxsz(desc));
}
+void HELPER(advsimd_fcvtl_hb)(void *vd, void *vn,
+ CPUARMState *env, uint32_t desc)
+{
+ FP8Context ctx = fp8_src_start(env, desc, 0xf);
+ fp8_input_fn *input_fmt = fp8_input_fmt[ctx.f8fmt];
+ uint8_t *n = vn, scratch[16];
+ float16 *d = vd;
+
+ if (vd == vn) {
+ n = memcpy(scratch, vn, 16);
+ }
+ n += ctx.high * 8;
+
+ for (size_t i = 0; i < 8; ++i) {
+ d[H2(i)] = fcvt_fp8_to_f16(n[H1(i)], input_fmt, ctx.scale, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+ clear_tail(vd, 16, simd_maxsz(desc));
+}
+
void HELPER(sve2_bfcvt)(void *vd, void *vn, CPUARMState *env, uint32_t desc)
{
FP8Context ctx = fp8_src_start(env, desc, 0x3f);
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 085e7e3b95..565053a1a4 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -10643,6 +10643,9 @@ static bool do_f8cvt(DisasContext *s, arg_qrr_e *a,
return true;
}
+TRANS_FEAT(F1CVTL, aa64_f8cvt, do_f8cvt, a, gen_helper_advsimd_fcvtl_hb, false)
+TRANS_FEAT(F2CVTL, aa64_f8cvt, do_f8cvt, a, gen_helper_advsimd_fcvtl_hb, true)
+
TRANS_FEAT(BF1CVTL, aa64_f8cvt, do_f8cvt, a, gen_helper_advsimd_bfcvtl, false)
TRANS_FEAT(BF2CVTL, aa64_f8cvt, do_f8cvt, a, gen_helper_advsimd_bfcvtl, true)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index b7aac148f2..26d31d0a33 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1910,6 +1910,9 @@ URSQRTE_v 0.10 1110 101 00001 11001 0 ..... .....
@qrr_s
FCVTL_v 0.00 1110 0.1 00001 01111 0 ..... ..... @qrr_sd
+F1CVTL 0.10 1110 001 00001 01111 0 ..... ..... @qrr_h
+F2CVTL 0.10 1110 011 00001 01111 0 ..... ..... @qrr_h
+
BF1CVTL 0.10 1110 101 00001 01111 0 ..... ..... @qrr_h
BF2CVTL 0.10 1110 111 00001 01111 0 ..... ..... @qrr_h
--
2.43.0