ping?

On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
<christophe.l...@linaro.org> wrote:
>
> This patch adds new movmisalign<mode>_mve_load and store patterns for
> MVE to help vectorization. They are very similar to their Neon
> counterparts, but use different iterators and instructions.
>
> Indeed MVE supports less vectors modes than Neon, so we use
> the MVE_VLD_ST iterator where Neon uses VQX.
>
> Since the supported modes are different from the ones valid for
> arithmetic operators, we introduce two new sets of macros:
>
> ARM_HAVE_NEON_<MODE>_LDST
>   true if Neon has vector load/store instructions for <MODE>
>
> ARM_HAVE_<MODE>_LDST
>   true if any vector extension has vector load/store instructions for <MODE>
>
> We move the movmisalign<mode> expander from neon.md to vec-commond.md, and
> replace the TARGET_NEON enabler with ARM_HAVE_<MODE>_LDST.
>
> The patch also updates the mve-vneg.c test to scan for the better code
> generation when loading and storing the vectors involved: it checks
> that no 'orr' instruction is generated to cope with misalignment at
> runtime.
> This test was chosen among the other mve tests, but any other should
> be OK. Using a plain vector copy loop (dest[i] = a[i]) is not a good
> test because the compiler chooses to use memcpy.
>
> For instance we now generate:
> test_vneg_s32x4:
>         vldrw.32       q3, [r1]
>         vneg.s32  q3, q3
>         vstrw.32       q3, [r0]
>         bx      lr
>
> instead of:
> test_vneg_s32x4:
>         orr     r3, r1, r0
>         lsls    r3, r3, #28
>         bne     .L15
>         vldrw.32        q3, [r1]
>         vneg.s32  q3, q3
>         vstrw.32        q3, [r0]
>         bx      lr
>         .L15:
>         push    {r4, r5}
>         ldrd    r2, r3, [r1, #8]
>         ldrd    r5, r4, [r1]
>         rsbs    r2, r2, #0
>         rsbs    r5, r5, #0
>         rsbs    r4, r4, #0
>         rsbs    r3, r3, #0
>         strd    r5, r4, [r0]
>         pop     {r4, r5}
>         strd    r2, r3, [r0, #8]
>         bx      lr
>
> 2020-12-15  Christophe Lyon  <christophe.l...@linaro.org>
>
>         PR target/97875
>         gcc/
>         * config/arm/arm.h (ARM_HAVE_NEON_V8QI_LDST): New macro.
>         (ARM_HAVE_NEON_V16QI_LDST, ARM_HAVE_NEON_V4HI_LDST): Likewise.
>         (ARM_HAVE_NEON_V8HI_LDST, ARM_HAVE_NEON_V2SI_LDST): Likewise.
>         (ARM_HAVE_NEON_V4SI_LDST, ARM_HAVE_NEON_V4HF_LDST): Likewise.
>         (ARM_HAVE_NEON_V8HF_LDST, ARM_HAVE_NEON_V4BF_LDST): Likewise.
>         (ARM_HAVE_NEON_V8BF_LDST, ARM_HAVE_NEON_V2SF_LDST): Likewise.
>         (ARM_HAVE_NEON_V4SF_LDST, ARM_HAVE_NEON_DI_LDST): Likewise.
>         (ARM_HAVE_NEON_V2DI_LDST): Likewise.
>         (ARM_HAVE_V8QI_LDST, ARM_HAVE_V16QI_LDST): Likewise.
>         (ARM_HAVE_V4HI_LDST, ARM_HAVE_V8HI_LDST): Likewise.
>         (ARM_HAVE_V2SI_LDST, ARM_HAVE_V4SI_LDST, ARM_HAVE_V4HF_LDST): 
> Likewise.
>         (ARM_HAVE_V8HF_LDST, ARM_HAVE_V4BF_LDST, ARM_HAVE_V8BF_LDST): 
> Likewise.
>         (ARM_HAVE_V2SF_LDST, ARM_HAVE_V4SF_LDST, ARM_HAVE_DI_LDST): Likewise.
>         (ARM_HAVE_V2DI_LDST): Likewise.
>         * config/arm/mve.md (*movmisalign<mode>_mve_store): New pattern.
>         (*movmisalign<mode>_mve_load): New pattern.
>         * config/arm/neon.md (movmisalign<mode>): Move to ...
>         * config/arm/vec-common.md: ... here.
>
>         PR target/97875
>         gcc/testsuite/
>         * gcc.target/arm/simd/mve-vneg.c: Update test.
> ---
>  gcc/config/arm/arm.h                         | 40 
> ++++++++++++++++++++++++++++
>  gcc/config/arm/mve.md                        | 25 +++++++++++++++++
>  gcc/config/arm/neon.md                       | 25 -----------------
>  gcc/config/arm/vec-common.md                 | 24 +++++++++++++++++
>  gcc/testsuite/gcc.target/arm/simd/mve-vneg.c |  3 +++
>  5 files changed, 92 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index 4a63d33..d44e0c6 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -1151,6 +1151,46 @@ extern const int arm_arch_cde_coproc_bits[];
>  #define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || 
> TARGET_HAVE_MVE_FLOAT)
>  #define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || 
> TARGET_HAVE_MVE_FLOAT)
>
> +/* The conditions under which vector modes are supported by load/store
> +   instructions using Neon.  */
> +
> +#define ARM_HAVE_NEON_V8QI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V16QI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4HI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V8HI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V2SI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4SI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4HF_LDST TARGET_NEON_FP16INST
> +#define ARM_HAVE_NEON_V8HF_LDST TARGET_NEON_FP16INST
> +#define ARM_HAVE_NEON_V4BF_LDST TARGET_BF16_SIMD
> +#define ARM_HAVE_NEON_V8BF_LDST TARGET_BF16_SIMD
> +#define ARM_HAVE_NEON_V2SF_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4SF_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_DI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V2DI_LDST TARGET_NEON
> +
> +/* The conditions under which vector modes are supported by load/store
> +   instructions by any vector extension.  */
> +
> +#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || TARGET_REALLY_IWMMXT)
> +#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || TARGET_REALLY_IWMMXT)
> +#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || TARGET_REALLY_IWMMXT)
> +
> +#define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE)
> +#define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE)
> +#define ARM_HAVE_V4SI_LDST (ARM_HAVE_NEON_V4SI_LDST || TARGET_HAVE_MVE)
> +#define ARM_HAVE_DI_LDST ARM_HAVE_NEON_DI_LDST
> +#define ARM_HAVE_V2DI_LDST ARM_HAVE_NEON_V2DI_LDST
> +
> +#define ARM_HAVE_V4HF_LDST ARM_HAVE_NEON_V4HF_LDST
> +#define ARM_HAVE_V2SF_LDST ARM_HAVE_NEON_V2SF_LDST
> +
> +#define ARM_HAVE_V4BF_LDST ARM_HAVE_NEON_V4BF_LDST
> +#define ARM_HAVE_V8BF_LDST ARM_HAVE_NEON_V8BF_LDST
> +
> +#define ARM_HAVE_V8HF_LDST (ARM_HAVE_NEON_V8HF_LDST || TARGET_HAVE_MVE_FLOAT)
> +#define ARM_HAVE_V4SF_LDST (ARM_HAVE_NEON_V4SF_LDST || TARGET_HAVE_MVE_FLOAT)
> +
>  /* The register numbers in sequence, for passing to arm_gen_load_multiple.  
> */
>  extern int arm_regs_in_sequence[];
>
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index b4c5a1e2..673a83c 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -10937,3 +10937,28 @@ (define_insn "arm_vcx3q<a>_p_v16qi"
>    [(set_attr "type" "coproc")
>     (set_attr "length" "8")]
>  )
> +
> +(define_insn "*movmisalign<mode>_mve_store"
> +  [(set (match_operand:MVE_VLD_ST 0 "neon_permissive_struct_operand"        
> "=Um")
> +       (unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "s_register_operand" 
> " w")]
> +        UNSPEC_MISALIGNED_ACCESS))]
> +  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
> +   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))
> +   && !BYTES_BIG_ENDIAN && unaligned_access"
> +  "vstr<V_sz_elem1>.<V_sz_elem>\t%q1, %E0"
> +  [(set_attr "type" "mve_store")
> +   (set_attr "length" "4")]
> +)
> +
> +
> +(define_insn "*movmisalign<mode>_mve_load"
> +  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand"                     
>            "=w")
> +       (unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 
> "neon_permissive_struct_operand" " Um")]
> +        UNSPEC_MISALIGNED_ACCESS))]
> +  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
> +   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))
> +   && !BYTES_BIG_ENDIAN && unaligned_access"
> +  "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
> +  [(set_attr "type" "mve_load")
> +   (set_attr "length" "4")]
> +)
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index d2e92ba..50220be 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -280,31 +280,6 @@ (define_split
>    neon_disambiguate_copy (operands, dest, src, 4);
>  })
>
> -(define_expand "movmisalign<mode>"
> -  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
> -       (unspec:VDQX [(match_operand:VDQX 1 
> "neon_perm_struct_or_reg_operand")]
> -                    UNSPEC_MISALIGNED_ACCESS))]
> -  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
> -{
> -  rtx adjust_mem;
> -  /* This pattern is not permitted to fail during expansion: if both 
> arguments
> -     are non-registers (e.g. memory := constant, which can be created by the
> -     auto-vectorizer), force operand 1 into a register.  */
> -  if (!s_register_operand (operands[0], <MODE>mode)
> -      && !s_register_operand (operands[1], <MODE>mode))
> -    operands[1] = force_reg (<MODE>mode, operands[1]);
> -
> -  if (s_register_operand (operands[0], <MODE>mode))
> -    adjust_mem = operands[1];
> -  else
> -    adjust_mem = operands[0];
> -
> -  /* Legitimize address.  */
> -  if (!neon_vector_mem_operand (adjust_mem, 2, true))
> -    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
> -
> -})
> -
>  (define_insn "*movmisalign<mode>_neon_store"
>    [(set (match_operand:VDX 0 "neon_permissive_struct_operand"  "=Um")
>         (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> index 2d0932b..f6a79e2 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -205,3 +205,27 @@ (define_expand "neg<mode>2"
>         (neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
>    "ARM_HAVE_<MODE>_ARITH"
>  )
> +
> +(define_expand "movmisalign<mode>"
> +  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
> +       (unspec:VDQX [(match_operand:VDQX 1 
> "neon_perm_struct_or_reg_operand")]
> +        UNSPEC_MISALIGNED_ACCESS))]
> +  "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access"
> +{
> +  rtx adjust_mem;
> +  /* This pattern is not permitted to fail during expansion: if both 
> arguments
> +     are non-registers (e.g. memory := constant, which can be created by the
> +     auto-vectorizer), force operand 1 into a register.  */
> +  if (!s_register_operand (operands[0], <MODE>mode)
> +      && !s_register_operand (operands[1], <MODE>mode))
> +    operands[1] = force_reg (<MODE>mode, operands[1]);
> +
> +  if (s_register_operand (operands[0], <MODE>mode))
> +    adjust_mem = operands[1];
> +  else
> +    adjust_mem = operands[0];
> +
> +  /* Legitimize address.  */
> +  if (!neon_vector_mem_operand (adjust_mem, 2, true))
> +    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
> +})
> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c 
> b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> index afd0d60..7945a06 100644
> --- a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> @@ -47,3 +47,6 @@ FUNC(f, float, 16, 8, -, vneg)
>     functions above.  */
>  /* { dg-final { scan-assembler-times {vneg.s[0-9]+  q[0-9]+, q[0-9]+} 6 } } 
> */
>  /* { dg-final { scan-assembler-times {vneg.f[0-9]+  q[0-9]+, q[0-9]+} 2 } } 
> */
> +/* { dg-final { scan-assembler-times {vldr[bhw].[0-9]+\tq[0-9]+} 8 } } */
> +/* { dg-final { scan-assembler-times {vstr[bhw].[0-9]+\tq[0-9]+} 8 } } */
> +/* { dg-final { scan-assembler-not {orr\tr[0-9]+, r[0-9]+, r[0-9]+} } } */
> --
> 2.7.4
>

Reply via email to