Signed-off-by: Richard Henderson
---
target/arm/helper-sve.h| 10 +
target/arm/sve_helper.c| 108 +
target/arm/translate-sve.c | 92 ++
target/arm/sve.decode | 17 +++
4 files changed, 227 insertions(+)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 2831e1643b..79493ab647 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -404,6 +404,16 @@ DEF_HELPER_FLAGS_4(sve_uqaddi_s, TCG_CALL_NO_RWG, void,
ptr, ptr, s64, i32)
DEF_HELPER_FLAGS_4(sve_uqaddi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(sve_uqsubi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_cpy_z_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
DEF_HELPER_FLAGS_5(sve_and_, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr,
i32)
DEF_HELPER_FLAGS_5(sve_bic_, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr,
i32)
DEF_HELPER_FLAGS_5(sve_eor_, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr,
i32)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index cfda16d520..6a95d1ec48 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1361,3 +1361,111 @@ void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b,
uint32_t desc)
*(uint64_t *)(d + i) = (ai < b ? 0 : ai - b);
}
}
+
+/* Two operand predicated copy immediate with merge. All valid immediates
+ * can fit within 17 signed bits in the simd_data field.
+ */
+void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+mm = (mm & 0xff) * (-1ull / 0xff);
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+uint64_t pp = expand_pred_b(pg[H1(i)]);
+d[i] = (mm & pp) | (nn & ~pp);
+}
+}
+
+void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+mm = (mm & 0x) * (-1ull / 0x);
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+uint64_t pp = expand_pred_h(pg[H1(i)]);
+d[i] = (mm & pp) | (nn & ~pp);
+}
+}
+
+void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+mm = deposit64(mm, 32, 32, mm);
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+uint64_t pp = expand_pred_s(pg[H1(i)]);
+d[i] = (mm & pp) | (nn & ~pp);
+}
+}
+
+void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+d[i] = (pg[H1(i)] & 1 ? mm : nn);
+}
+}
+
+void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+val = (val & 0xff) * (-1ull / 0xff);
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = val & expand_pred_b(pg[H1(i)]);
+}
+}
+
+void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+val = (val & 0x) * (-1ull / 0x);
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = val & expand_pred_h(pg[H1(i)]);
+}
+}
+
+void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+val = deposit64(val, 32, 32, val);
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = val & expand_pred_s(pg[H1(i)]);
+}
+}
+
+void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = (pg[H1(i)] & 1 ? val : 0);
+}
+}
diff --git a/target/arm/translate-sve.c