Re: [PATCH 09/31] target/arm: Implement SVE2 integer pairwise arithmetic

2020-04-13 Thread Laurent Desnogues
On Fri, Mar 27, 2020 at 12:16 AM Richard Henderson
 wrote:
[...]
> diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
> index 5d75aed7b7..d7c181ddb8 100644
> --- a/target/arm/sve_helper.c
> +++ b/target/arm/sve_helper.c
> @@ -681,6 +681,73 @@ DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D)
>  #undef DO_ZPZZ
>  #undef DO_ZPZZ_D
>
> +/*
> + * Three operand expander, operating on element pairs.
> + * If the slot I is even, the elements from from VN {I, I+1}.
> + * If the slot I is odd, the elements from from VM {I-1, I}.
> + */
> +#define DO_ZPZZ_PAIR(NAME, TYPE, H, OP) \
> +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
> +{   \
> +intptr_t i, opr_sz = simd_oprsz(desc);  \
> +for (i = 0; i < opr_sz; ) { \
> +uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
> +do {\
> +if (pg & 1) {   \
> +void *p = (i & 1 ? vm : vn);\
> +TYPE nn = *(TYPE *)(p + H(i & ~1)); \
> +TYPE mm = *(TYPE *)(p + H(i | 1));  \
> +*(TYPE *)(vd + H(i)) = OP(nn, mm);  \
> +}   \
> +i += sizeof(TYPE), pg >>= sizeof(TYPE); \
> +} while (i & 15);   \
> +}   \
> +}

You should not use 1 as mask but sizeof(TYPE).
A temporary should be used because vd also is a source.

> +/* Similarly, specialized for 64-bit operands.  */
> +#define DO_ZPZZ_PAIR_D(NAME, TYPE, OP)  \
> +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
> +{   \
> +intptr_t i, opr_sz = simd_oprsz(desc) / 8;  \
> +TYPE *d = vd, *n = vn, *m = vm; \
> +uint8_t *pg = vg;   \
> +for (i = 0; i < opr_sz; i += 1) {   \
> +if (pg[H1(i)] & 1) {\
> +TYPE *p = (i & 1 ? m : n) + (i & ~1);   \
> +TYPE nn = p[0], mm = p[1];  \
> +d[i] = OP(nn, mm);  \
> +}   \
> +}   \
> +}

A temporary should be used because vd also is a source.

Laurent



[PATCH 09/31] target/arm: Implement SVE2 integer pairwise arithmetic

2020-03-26 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sve.h| 45 +
 target/arm/sve.decode  |  8 +
 target/arm/sve_helper.c| 67 ++
 target/arm/translate-sve.c |  6 
 4 files changed, 126 insertions(+)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 149fff1fae..028c3b85a8 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -326,6 +326,51 @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_b, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_b, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_b, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_b, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(sve_asr_zpzw_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_asr_zpzw_h, TCG_CALL_NO_RWG,
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 54076bb607..86a6bf7088 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1142,3 +1142,11 @@ SRHADD  01000100 .. 010 100 100 ... . .  
@rdn_pg_rm
 URHADD  01000100 .. 010 101 100 ... . .  @rdn_pg_rm
 SHSUB   01000100 .. 010 110 100 ... . .  @rdm_pg_rn # SHSUBR
 UHSUB   01000100 .. 010 111 100 ... . .  @rdm_pg_rn # UHSUBR
+
+### SVE2 integer pairwise arithmetic
+
+ADDP01000100 .. 010 001 101 ... . .  @rdn_pg_rm
+SMAXP   01000100 .. 010 100 101 ... . .  @rdn_pg_rm
+UMAXP   01000100 .. 010 101 101 ... . .  @rdn_pg_rm
+SMINP   01000100 .. 010 110 101 ... . .  @rdn_pg_rm
+UMINP   01000100 .. 010 111 101 ... . .  @rdn_pg_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 5d75aed7b7..d7c181ddb8 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -681,6 +681,73 @@ DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D)
 #undef DO_ZPZZ
 #undef DO_ZPZZ_D
 
+/*
+ * Three operand expander, operating on element pairs.
+ * If the slot I is even, the elements from from VN {I, I+1}.
+ * If the slot I is odd, the elements from from VM {I-1, I}.
+ */
+#define DO_ZPZZ_PAIR(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{   \
+intptr_t i, opr_sz = simd_oprsz(desc);  \
+for (i = 0; i < opr_sz; ) { \
+uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+do {\
+if (pg & 1) {   \
+void *p = (i & 1 ? vm : vn);\
+TYPE nn = *(TYPE *)(p + H(i & ~1)); \
+