Re: [Qemu-devel] [PATCH v2 25/67] target/arm: Implement SVE Integer Wide Immediate - Predicated Group

2018-02-23 Thread Richard Henderson
On 02/23/2018 06:18 AM, Peter Maydell wrote:
>> +mm = (mm & 0xff) * (-1ull / 0xff);
> 
> What is this expression doing? I guess from context that it's
> replicating the low 8 bits of mm across the 64-bit value,
> but this is too obscure to do without a comment or wrapping
> it in a helper function with a useful name, I think.

I do have a helper now -- dup_const.  I thought I'd converted all of the uses,
but clearly missed one/some.


r~



Re: [Qemu-devel] [PATCH v2 25/67] target/arm: Implement SVE Integer Wide Immediate - Predicated Group

2018-02-23 Thread Peter Maydell
On 17 February 2018 at 18:22, Richard Henderson
 wrote:
> Signed-off-by: Richard Henderson 

> +/* Two operand predicated copy immediate with merge.  All valid immediates
> + * can fit within 17 signed bits in the simd_data field.
> + */
> +void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
> + uint64_t mm, uint32_t desc)
> +{
> +intptr_t i, opr_sz = simd_oprsz(desc) / 8;
> +uint64_t *d = vd, *n = vn;
> +uint8_t *pg = vg;
> +
> +mm = (mm & 0xff) * (-1ull / 0xff);

What is this expression doing? I guess from context that it's
replicating the low 8 bits of mm across the 64-bit value,
but this is too obscure to do without a comment or wrapping
it in a helper function with a useful name, I think.


Otherwise
Reviewed-by: Peter Maydell 

thanks
-- PMM



[Qemu-devel] [PATCH v2 25/67] target/arm: Implement SVE Integer Wide Immediate - Predicated Group

2018-02-17 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sve.h|  10 +
 target/arm/sve_helper.c| 108 +
 target/arm/translate-sve.c |  92 ++
 target/arm/sve.decode  |  17 +++
 4 files changed, 227 insertions(+)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 2831e1643b..79493ab647 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -404,6 +404,16 @@ DEF_HELPER_FLAGS_4(sve_uqaddi_s, TCG_CALL_NO_RWG, void, 
ptr, ptr, s64, i32)
 DEF_HELPER_FLAGS_4(sve_uqaddi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(sve_uqsubi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
+DEF_HELPER_FLAGS_5(sve_cpy_m_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_cpy_z_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
 DEF_HELPER_FLAGS_5(sve_and_, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
 DEF_HELPER_FLAGS_5(sve_bic_, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
 DEF_HELPER_FLAGS_5(sve_eor_, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index cfda16d520..6a95d1ec48 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1361,3 +1361,111 @@ void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, 
uint32_t desc)
 *(uint64_t *)(d + i) = (ai < b ? 0 : ai - b);
 }
 }
+
+/* Two operand predicated copy immediate with merge.  All valid immediates
+ * can fit within 17 signed bits in the simd_data field.
+ */
+void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+mm = (mm & 0xff) * (-1ull / 0xff);
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+uint64_t pp = expand_pred_b(pg[H1(i)]);
+d[i] = (mm & pp) | (nn & ~pp);
+}
+}
+
+void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+mm = (mm & 0x) * (-1ull / 0x);
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+uint64_t pp = expand_pred_h(pg[H1(i)]);
+d[i] = (mm & pp) | (nn & ~pp);
+}
+}
+
+void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+mm = deposit64(mm, 32, 32, mm);
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+uint64_t pp = expand_pred_s(pg[H1(i)]);
+d[i] = (mm & pp) | (nn & ~pp);
+}
+}
+
+void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+for (i = 0; i < opr_sz; i += 1) {
+uint64_t nn = n[i];
+d[i] = (pg[H1(i)] & 1 ? mm : nn);
+}
+}
+
+void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+val = (val & 0xff) * (-1ull / 0xff);
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = val & expand_pred_b(pg[H1(i)]);
+}
+}
+
+void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+val = (val & 0x) * (-1ull / 0x);
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = val & expand_pred_h(pg[H1(i)]);
+}
+}
+
+void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+val = deposit64(val, 32, 32, val);
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = val & expand_pred_s(pg[H1(i)]);
+}
+}
+
+void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd;
+uint8_t *pg = vg;
+
+for (i = 0; i < opr_sz; i += 1) {
+d[i] = (pg[H1(i)] & 1 ? val : 0);
+}
+}
diff --git a/target/arm/translate-sve.c