Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/tcg/helper-fp8-defs.h | 2 ++
target/arm/tcg/fp8_helper.c | 47 ++++++++++++++++++++++++++++++++
target/arm/tcg/translate-sme.c | 5 ++++
target/arm/tcg/sme.decode | 5 ++++
4 files changed, 59 insertions(+)
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
index 3021dafd44..b5dc2b7064 100644
--- a/target/arm/tcg/helper-fp8-defs.h
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -10,3 +10,5 @@ DEF_HELPER_FLAGS_4(sme2_bfcvtl_hb, TCG_CALL_NO_RWG, void,
ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(advsimd_fcvtl_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(sve2_fcvt_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvt_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvtl_hb, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
index b5ce5554b1..ec802ed391 100644
--- a/target/arm/tcg/fp8_helper.c
+++ b/target/arm/tcg/fp8_helper.c
@@ -219,6 +219,33 @@ void HELPER(sme2_bfcvt_hb)(void *vd, void *vn, CPUARMState
*env, uint32_t desc)
fp8_finish(env, &ctx);
}
+void HELPER(sme2_fcvt_hb)(void *vd, void *vn, CPUARMState *env, uint32_t desc)
+{
+ FP8Context ctx = fp8_src_start(env, desc, 0xf);
+ fp8_input_fn *input_fmt = fp8_input_fmt[ctx.f8fmt];
+ uint8_t *n = vn;
+ uint16_t *d0 = vd;
+ uint16_t *d1 = vd + sizeof(ARMVectorReg);
+ size_t oprsz = simd_oprsz(desc);
+ size_t nelem = oprsz / 2;
+ ARMVectorReg scratch;
+
+ if (vectors_overlap(vd, 2, vn, 1)) {
+ n = memcpy(&scratch, vn, oprsz);
+ }
+
+ for (size_t i = 0; i < nelem; ++i) {
+ d0[H2(i)] = fcvt_fp8_to_f16(n[H1(i)], input_fmt,
+ ctx.scale, &ctx.stat);
+ }
+ for (size_t i = 0; i < nelem; ++i) {
+ d1[H2(i)] = fcvt_fp8_to_f16(n[H1(i + nelem)], input_fmt,
+ ctx.scale, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+}
+
void HELPER(sme2_bfcvtl_hb)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
{
FP8Context ctx = fp8_src_start(env, desc, 0x3f);
@@ -238,3 +265,23 @@ void HELPER(sme2_bfcvtl_hb)(void *vd, void *vn,
CPUARMState *env, uint32_t desc)
fp8_finish(env, &ctx);
}
+
+void HELPER(sme2_fcvtl_hb)(void *vd, void *vn, CPUARMState *env, uint32_t desc)
+{
+ FP8Context ctx = fp8_src_start(env, desc, 0xf);
+ fp8_input_fn *input_fmt = fp8_input_fmt[ctx.f8fmt];
+ uint8_t *n = vn;
+ uint16_t *d0 = vd;
+ uint16_t *d1 = vd + sizeof(ARMVectorReg);
+ size_t oprsz = simd_oprsz(desc);
+ size_t nelem = oprsz / 2;
+
+ for (size_t i = 0; i < nelem; ++i) {
+ uint8_t e0 = n[H1(2 * i + 0)];
+ uint8_t e1 = n[H1(2 * i + 1)];
+ d0[H2(i)] = fcvt_fp8_to_f16(e0, input_fmt, ctx.scale, &ctx.stat);
+ d1[H2(i)] = fcvt_fp8_to_f16(e1, input_fmt, ctx.scale, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+}
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 2841b2b8cb..0cbad3e006 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -1546,6 +1546,11 @@ static bool do_f8cvt(DisasContext *s, arg_zz_n *a,
return true;
}
+TRANS_FEAT(F1CVT, aa64_sme2_f8cvt, do_f8cvt, a, gen_helper_sme2_fcvt_hb, 0)
+TRANS_FEAT(F2CVT, aa64_sme2_f8cvt, do_f8cvt, a, gen_helper_sme2_fcvt_hb, 1)
+TRANS_FEAT(F1CVTL, aa64_sme2_f8cvt, do_f8cvt, a, gen_helper_sme2_fcvtl_hb, 0)
+TRANS_FEAT(F2CVTL, aa64_sme2_f8cvt, do_f8cvt, a, gen_helper_sme2_fcvtl_hb, 1)
+
TRANS_FEAT(BF1CVT, aa64_sme2_f8cvt, do_f8cvt, a, gen_helper_sme2_bfcvt_hb, 0)
TRANS_FEAT(BF2CVT, aa64_sme2_f8cvt, do_f8cvt, a, gen_helper_sme2_bfcvt_hb, 1)
TRANS_FEAT(BF1CVTL, aa64_sme2_f8cvt, do_f8cvt, a, gen_helper_sme2_bfcvtl_hb, 0)
diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode
index df9586c1a5..d6192eb59d 100644
--- a/target/arm/tcg/sme.decode
+++ b/target/arm/tcg/sme.decode
@@ -853,6 +853,11 @@ UUNPK_4bh 11000001 011 10101 111000 ....0 ...01
@zz_4x2_n1
UUNPK_4hs 11000001 101 10101 111000 ....0 ...01 @zz_4x2_n1
UUNPK_4sd 11000001 111 10101 111000 ....0 ...01 @zz_4x2_n1
+F1CVT 11000001 001 00110 111000 ..... ....0 @zz_2x1
+F2CVT 11000001 101 00110 111000 ..... ....0 @zz_2x1
+F1CVTL 11000001 001 00110 111000 ..... ....1 @zz_2x1
+F2CVTL 11000001 101 00110 111000 ..... ....1 @zz_2x1
+
BF1CVT 11000001 011 00110 111000 ..... ....0 @zz_2x1
BF2CVT 11000001 111 00110 111000 ..... ....0 @zz_2x1
BF1CVTL 11000001 011 00110 111000 ..... ....1 @zz_2x1
--
2.43.0