Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/helper-fp8.h | 14 ++++
target/arm/tcg/helper-fp8-defs.h | 6 ++
target/arm/tcg/translate-a64.h | 1 +
target/arm/tcg/fp8_helper.c | 126 +++++++++++++++++++++++++++++++
target/arm/tcg/translate-a64.c | 34 +++++++++
target/arm/tcg/a64.decode | 3 +
target/arm/tcg/meson.build | 1 +
7 files changed, 185 insertions(+)
create mode 100644 target/arm/helper-fp8.h
create mode 100644 target/arm/tcg/helper-fp8-defs.h
create mode 100644 target/arm/tcg/fp8_helper.c
diff --git a/target/arm/helper-fp8.h b/target/arm/helper-fp8.h
new file mode 100644
index 0000000000..c45211ba22
--- /dev/null
+++ b/target/arm/helper-fp8.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HELPER_FP8_H
+#define HELPER_FP8_H
+
+#include "exec/helper-proto-common.h"
+#include "exec/helper-gen-common.h"
+
+#define HELPER_H "tcg/helper-fp8-defs.h"
+#include "exec/helper-proto.h.inc"
+#include "exec/helper-gen.h.inc"
+#undef HELPER_H
+
+#endif /* HELPER_FP8_H */
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
new file mode 100644
index 0000000000..0caaf63749
--- /dev/null
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -0,0 +1,6 @@
+/*
+ * AArch64 FP8 helper definitions
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+DEF_HELPER_FLAGS_4(advsimd_bfcvtl, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
index 9c45f89305..35f8d4f82e 100644
--- a/target/arm/tcg/translate-a64.h
+++ b/target/arm/tcg/translate-a64.h
@@ -25,6 +25,7 @@ TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf);
void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
unsigned int imms, unsigned int immr);
+bool fpmr_access_check(DisasContext *s);
bool sve_access_check(DisasContext *s);
bool sme_enabled_check(DisasContext *s);
bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
new file mode 100644
index 0000000000..7c8c4d6e06
--- /dev/null
+++ b/target/arm/tcg/fp8_helper.c
@@ -0,0 +1,126 @@
+/*
+ * AArch64 FP8 Operations
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "fpu/softfloat.h"
+#include "fpu/softfloat-parts.h"
+#include "helper-fp8.h"
+#include "vec_internal.h"
+
+#define HELPER_H "tcg/helper-fp8-defs.h"
+#include "exec/helper-info.c.inc"
+
+typedef enum FPMRType {
+ OFP8_E5M2 = 0,
+ OFP8_E4M3 = 1,
+ Unsupp2 = 2,
+ Unsupp3 = 3,
+ Unsupp4 = 4,
+ Unsupp5 = 5,
+ Unsupp6 = 6,
+ Unsupp7 = 7,
+} FPMRType;
+
+typedef struct FP8Context {
+ float_status stat;
+ ARMFPStatusFlavour fpst;
+ FPMRType f8fmt;
+ int scale;
+ bool high;
+} FP8Context;
+
+static FP8Context fp8_start(CPUARMState *env, uint32_t desc,
+ FPMRType f8fmt, int scale)
+{
+ ARMFPStatusFlavour fpst = extract32(desc, SIMD_DATA_SHIFT + 2, 4);
+
+ FP8Context ret = {
+ .stat = env->vfp.fp_status[fpst],
+ .fpst = fpst,
+ .f8fmt = f8fmt,
+ .scale = scale,
+ .high = extract32(desc, SIMD_DATA_SHIFT + 1, 1),
+ };
+
+ set_flush_to_zero(0, &ret.stat);
+ set_flush_inputs_to_zero(0, &ret.stat);
+ set_default_nan_mode(true, &ret.stat);
+ set_float_rounding_mode(float_round_nearest_even, &ret.stat);
+
+ return ret;
+}
+
+static void fp8_finish(CPUARMState *env, FP8Context *c)
+{
+ int new_flags = get_float_exception_flags(&c->stat);
+
+ new_flags &= ~float_flag_input_denormal_used;
+ float_raise(new_flags, &env->vfp.fp_status[c->fpst]);
+}
+
+static FP8Context fp8_src_start(CPUARMState *env, uint32_t desc, int
scale_mask)
+{
+ bool issrc2 = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t fpmr = env->vfp.fpmr;
+ FPMRType f8fmt = (issrc2
+ ? FIELD_EX64(fpmr, FPMR, F8S2)
+ : FIELD_EX64(fpmr, FPMR, F8S1));
+ int scale;
+
+ scale = fpmr >> (issrc2 ? R_FPMR_LSCALE2_SHIFT : R_FPMR_LSCALE_SHIFT);
+ scale = -(scale & scale_mask);
+
+ return fp8_start(env, desc, f8fmt, scale);
+}
+
+
+static FloatParts64 fp8_invalid_input(uint8_t x, float_status *s)
+{
+ /*
+ * Invalid input format is treated as snan, then one of the uses
+ * will convert to default nan and raise invalid.
+ */
+ float_raise(float_flag_invalid | float_flag_invalid_snan, s);
+ return parts64_default_nan(s);
+}
+
+typedef FloatParts64 fp8_input_fn(uint8_t x, float_status *s);
+
+static fp8_input_fn * const fp8_input_fmt[8] = {
+ [0 ... 7] = fp8_invalid_input,
+ [OFP8_E5M2] = float8_e5m2_unpack_canonical,
+ [OFP8_E4M3] = float8_e4m3_unpack_canonical,
+};
+
+static bfloat16 fcvt_fp8_to_b16(uint8_t x, fp8_input_fn *f8fmt,
+ int scale, float_status *s)
+{
+ FloatParts64 p = f8fmt(x, s);
+ p = parts64_scalbn(&p, scale, s);
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
+void HELPER(advsimd_bfcvtl)(void *vd, void *vn, CPUARMState *env, uint32_t
desc)
+{
+ FP8Context ctx = fp8_src_start(env, desc, 0x3f);
+ fp8_input_fn *input_fmt = fp8_input_fmt[ctx.f8fmt];
+ uint8_t *n = vn, scratch[16];
+ bfloat16 *d = vd;
+
+ if (vd == vn) {
+ n = memcpy(scratch, vn, 16);
+ }
+ n += ctx.high * 8;
+
+ for (size_t i = 0; i < 8; ++i) {
+ d[H2(i)] = fcvt_fp8_to_b16(n[H1(i)], input_fmt, ctx.scale, &ctx.stat);
+ }
+
+ fp8_finish(env, &ctx);
+ clear_tail(vd, 16, simd_maxsz(desc));
+}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index ac18ceeeab..085e7e3b95 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -22,6 +22,7 @@
#include "helper-a64.h"
#include "helper-sme.h"
#include "helper-sve.h"
+#include "helper-fp8.h"
#include "translate.h"
#include "translate-a64.h"
#include "tcg/tcg-op.h"
@@ -1457,6 +1458,24 @@ static bool fp_access_check(DisasContext *s)
return fp_access_check_only(s) && nonstreaming_check(s);
}
+/*
+ * Check that FPMR access is enabled, for an indirect reference by a
+ * vector instruction. See CheckFPMREnabled().
+ */
+bool fpmr_access_check(DisasContext *s)
+{
+ if (s->fpmr_el) {
+ /*
+ * While denied direct access to the FPMR raises SystemRegisterTrap
+ * and targets a specific EL, denied indirect access to the FPMR
+ * results in a simple UNDEFINED to the default exception level.
+ */
+ unallocated_encoding(s);
+ return false;
+ }
+ return true;
+}
+
/*
* Return <0 for non-supported element sizes, with MO_16 controlled by
* FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
@@ -10612,6 +10631,21 @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e
*a)
return true;
}
+static bool do_f8cvt(DisasContext *s, arg_qrr_e *a,
+ gen_helper_gvec_2_ptr *fn, bool issrc2)
+{
+ if (fpmr_access_check(s) && fp_access_check(s)) {
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ tcg_env, 16, vec_full_reg_size(s),
+ issrc2 | (a->q << 1) | (FPST_A64 << 2), fn);
+ }
+ return true;
+}
+
+TRANS_FEAT(BF1CVTL, aa64_f8cvt, do_f8cvt, a, gen_helper_advsimd_bfcvtl, false)
+TRANS_FEAT(BF2CVTL, aa64_f8cvt, do_f8cvt, a, gen_helper_advsimd_bfcvtl, true)
+
static bool trans_OK(DisasContext *s, arg_OK *a)
{
return true;
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 02c7264cb9..b7aac148f2 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1910,6 +1910,9 @@ URSQRTE_v 0.10 1110 101 00001 11001 0 ..... .....
@qrr_s
FCVTL_v 0.00 1110 0.1 00001 01111 0 ..... ..... @qrr_sd
+BF1CVTL 0.10 1110 101 00001 01111 0 ..... ..... @qrr_h
+BF2CVTL 0.10 1110 111 00001 01111 0 ..... ..... @qrr_h
+
&fcvt_q rd rn esz q shift
@fcvtq_h . q:1 . ...... 001 .... ...... rn:5 rd:5 \
&fcvt_q esz=1 shift=%fcvt_f_sh_h
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index d2364aa39c..56be383189 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -46,6 +46,7 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
'sme_helper.c',
'sve_helper.c',
'vec_helper64.c',
+ 'fp8_helper.c',
))
arm_common_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c'))
--
2.43.0