ping^2?

On Wed, 30 Dec 2020 at 11:33, Christophe Lyon
<christophe.l...@linaro.org> wrote:
>
> ping?
>
> On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
> <christophe.l...@linaro.org> wrote:
> >
> > This patch adds new movmisalign<mode>_mve_load and store patterns for
> > MVE to help vectorization. They are very similar to their Neon
> > counterparts, but use different iterators and instructions.
> >
> > Indeed MVE supports less vectors modes than Neon, so we use
> > the MVE_VLD_ST iterator where Neon uses VQX.
> >
> > Since the supported modes are different from the ones valid for
> > arithmetic operators, we introduce two new sets of macros:
> >
> > ARM_HAVE_NEON_<MODE>_LDST
> >   true if Neon has vector load/store instructions for <MODE>
> >
> > ARM_HAVE_<MODE>_LDST
> >   true if any vector extension has vector load/store instructions for <MODE>
> >
> > We move the movmisalign<mode> expander from neon.md to vec-commond.md, and
> > replace the TARGET_NEON enabler with ARM_HAVE_<MODE>_LDST.
> >
> > The patch also updates the mve-vneg.c test to scan for the better code
> > generation when loading and storing the vectors involved: it checks
> > that no 'orr' instruction is generated to cope with misalignment at
> > runtime.
> > This test was chosen among the other mve tests, but any other should
> > be OK. Using a plain vector copy loop (dest[i] = a[i]) is not a good
> > test because the compiler chooses to use memcpy.
> >
> > For instance we now generate:
> > test_vneg_s32x4:
> >         vldrw.32       q3, [r1]
> >         vneg.s32  q3, q3
> >         vstrw.32       q3, [r0]
> >         bx      lr
> >
> > instead of:
> > test_vneg_s32x4:
> >         orr     r3, r1, r0
> >         lsls    r3, r3, #28
> >         bne     .L15
> >         vldrw.32        q3, [r1]
> >         vneg.s32  q3, q3
> >         vstrw.32        q3, [r0]
> >         bx      lr
> >         .L15:
> >         push    {r4, r5}
> >         ldrd    r2, r3, [r1, #8]
> >         ldrd    r5, r4, [r1]
> >         rsbs    r2, r2, #0
> >         rsbs    r5, r5, #0
> >         rsbs    r4, r4, #0
> >         rsbs    r3, r3, #0
> >         strd    r5, r4, [r0]
> >         pop     {r4, r5}
> >         strd    r2, r3, [r0, #8]
> >         bx      lr
> >
> > 2020-12-15  Christophe Lyon  <christophe.l...@linaro.org>
> >
> >         PR target/97875
> >         gcc/
> >         * config/arm/arm.h (ARM_HAVE_NEON_V8QI_LDST): New macro.
> >         (ARM_HAVE_NEON_V16QI_LDST, ARM_HAVE_NEON_V4HI_LDST): Likewise.
> >         (ARM_HAVE_NEON_V8HI_LDST, ARM_HAVE_NEON_V2SI_LDST): Likewise.
> >         (ARM_HAVE_NEON_V4SI_LDST, ARM_HAVE_NEON_V4HF_LDST): Likewise.
> >         (ARM_HAVE_NEON_V8HF_LDST, ARM_HAVE_NEON_V4BF_LDST): Likewise.
> >         (ARM_HAVE_NEON_V8BF_LDST, ARM_HAVE_NEON_V2SF_LDST): Likewise.
> >         (ARM_HAVE_NEON_V4SF_LDST, ARM_HAVE_NEON_DI_LDST): Likewise.
> >         (ARM_HAVE_NEON_V2DI_LDST): Likewise.
> >         (ARM_HAVE_V8QI_LDST, ARM_HAVE_V16QI_LDST): Likewise.
> >         (ARM_HAVE_V4HI_LDST, ARM_HAVE_V8HI_LDST): Likewise.
> >         (ARM_HAVE_V2SI_LDST, ARM_HAVE_V4SI_LDST, ARM_HAVE_V4HF_LDST): 
> > Likewise.
> >         (ARM_HAVE_V8HF_LDST, ARM_HAVE_V4BF_LDST, ARM_HAVE_V8BF_LDST): 
> > Likewise.
> >         (ARM_HAVE_V2SF_LDST, ARM_HAVE_V4SF_LDST, ARM_HAVE_DI_LDST): 
> > Likewise.
> >         (ARM_HAVE_V2DI_LDST): Likewise.
> >         * config/arm/mve.md (*movmisalign<mode>_mve_store): New pattern.
> >         (*movmisalign<mode>_mve_load): New pattern.
> >         * config/arm/neon.md (movmisalign<mode>): Move to ...
> >         * config/arm/vec-common.md: ... here.
> >
> >         PR target/97875
> >         gcc/testsuite/
> >         * gcc.target/arm/simd/mve-vneg.c: Update test.
> > ---
> >  gcc/config/arm/arm.h                         | 40 
> > ++++++++++++++++++++++++++++
> >  gcc/config/arm/mve.md                        | 25 +++++++++++++++++
> >  gcc/config/arm/neon.md                       | 25 -----------------
> >  gcc/config/arm/vec-common.md                 | 24 +++++++++++++++++
> >  gcc/testsuite/gcc.target/arm/simd/mve-vneg.c |  3 +++
> >  5 files changed, 92 insertions(+), 25 deletions(-)
> >
> > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> > index 4a63d33..d44e0c6 100644
> > --- a/gcc/config/arm/arm.h
> > +++ b/gcc/config/arm/arm.h
> > @@ -1151,6 +1151,46 @@ extern const int arm_arch_cde_coproc_bits[];
> >  #define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || 
> > TARGET_HAVE_MVE_FLOAT)
> >  #define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || 
> > TARGET_HAVE_MVE_FLOAT)
> >
> > +/* The conditions under which vector modes are supported by load/store
> > +   instructions using Neon.  */
> > +
> > +#define ARM_HAVE_NEON_V8QI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V16QI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V4HI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V8HI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V2SI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V4SI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V4HF_LDST TARGET_NEON_FP16INST
> > +#define ARM_HAVE_NEON_V8HF_LDST TARGET_NEON_FP16INST
> > +#define ARM_HAVE_NEON_V4BF_LDST TARGET_BF16_SIMD
> > +#define ARM_HAVE_NEON_V8BF_LDST TARGET_BF16_SIMD
> > +#define ARM_HAVE_NEON_V2SF_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V4SF_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_DI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V2DI_LDST TARGET_NEON
> > +
> > +/* The conditions under which vector modes are supported by load/store
> > +   instructions by any vector extension.  */
> > +
> > +#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || 
> > TARGET_REALLY_IWMMXT)
> > +#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || 
> > TARGET_REALLY_IWMMXT)
> > +#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || 
> > TARGET_REALLY_IWMMXT)
> > +
> > +#define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE)
> > +#define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE)
> > +#define ARM_HAVE_V4SI_LDST (ARM_HAVE_NEON_V4SI_LDST || TARGET_HAVE_MVE)
> > +#define ARM_HAVE_DI_LDST ARM_HAVE_NEON_DI_LDST
> > +#define ARM_HAVE_V2DI_LDST ARM_HAVE_NEON_V2DI_LDST
> > +
> > +#define ARM_HAVE_V4HF_LDST ARM_HAVE_NEON_V4HF_LDST
> > +#define ARM_HAVE_V2SF_LDST ARM_HAVE_NEON_V2SF_LDST
> > +
> > +#define ARM_HAVE_V4BF_LDST ARM_HAVE_NEON_V4BF_LDST
> > +#define ARM_HAVE_V8BF_LDST ARM_HAVE_NEON_V8BF_LDST
> > +
> > +#define ARM_HAVE_V8HF_LDST (ARM_HAVE_NEON_V8HF_LDST || 
> > TARGET_HAVE_MVE_FLOAT)
> > +#define ARM_HAVE_V4SF_LDST (ARM_HAVE_NEON_V4SF_LDST || 
> > TARGET_HAVE_MVE_FLOAT)
> > +
> >  /* The register numbers in sequence, for passing to arm_gen_load_multiple. 
> >  */
> >  extern int arm_regs_in_sequence[];
> >
> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > index b4c5a1e2..673a83c 100644
> > --- a/gcc/config/arm/mve.md
> > +++ b/gcc/config/arm/mve.md
> > @@ -10937,3 +10937,28 @@ (define_insn "arm_vcx3q<a>_p_v16qi"
> >    [(set_attr "type" "coproc")
> >     (set_attr "length" "8")]
> >  )
> > +
> > +(define_insn "*movmisalign<mode>_mve_store"
> > +  [(set (match_operand:MVE_VLD_ST 0 "neon_permissive_struct_operand"       
> >  "=Um")
> > +       (unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 
> > "s_register_operand" " w")]
> > +        UNSPEC_MISALIGNED_ACCESS))]
> > +  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
> > +   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))
> > +   && !BYTES_BIG_ENDIAN && unaligned_access"
> > +  "vstr<V_sz_elem1>.<V_sz_elem>\t%q1, %E0"
> > +  [(set_attr "type" "mve_store")
> > +   (set_attr "length" "4")]
> > +)
> > +
> > +
> > +(define_insn "*movmisalign<mode>_mve_load"
> > +  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand"                   
> >              "=w")
> > +       (unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 
> > "neon_permissive_struct_operand" " Um")]
> > +        UNSPEC_MISALIGNED_ACCESS))]
> > +  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
> > +   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))
> > +   && !BYTES_BIG_ENDIAN && unaligned_access"
> > +  "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
> > +  [(set_attr "type" "mve_load")
> > +   (set_attr "length" "4")]
> > +)
> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > index d2e92ba..50220be 100644
> > --- a/gcc/config/arm/neon.md
> > +++ b/gcc/config/arm/neon.md
> > @@ -280,31 +280,6 @@ (define_split
> >    neon_disambiguate_copy (operands, dest, src, 4);
> >  })
> >
> > -(define_expand "movmisalign<mode>"
> > -  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
> > -       (unspec:VDQX [(match_operand:VDQX 1 
> > "neon_perm_struct_or_reg_operand")]
> > -                    UNSPEC_MISALIGNED_ACCESS))]
> > -  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
> > -{
> > -  rtx adjust_mem;
> > -  /* This pattern is not permitted to fail during expansion: if both 
> > arguments
> > -     are non-registers (e.g. memory := constant, which can be created by 
> > the
> > -     auto-vectorizer), force operand 1 into a register.  */
> > -  if (!s_register_operand (operands[0], <MODE>mode)
> > -      && !s_register_operand (operands[1], <MODE>mode))
> > -    operands[1] = force_reg (<MODE>mode, operands[1]);
> > -
> > -  if (s_register_operand (operands[0], <MODE>mode))
> > -    adjust_mem = operands[1];
> > -  else
> > -    adjust_mem = operands[0];
> > -
> > -  /* Legitimize address.  */
> > -  if (!neon_vector_mem_operand (adjust_mem, 2, true))
> > -    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
> > -
> > -})
> > -
> >  (define_insn "*movmisalign<mode>_neon_store"
> >    [(set (match_operand:VDX 0 "neon_permissive_struct_operand"  "=Um")
> >         (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
> > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> > index 2d0932b..f6a79e2 100644
> > --- a/gcc/config/arm/vec-common.md
> > +++ b/gcc/config/arm/vec-common.md
> > @@ -205,3 +205,27 @@ (define_expand "neg<mode>2"
> >         (neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
> >    "ARM_HAVE_<MODE>_ARITH"
> >  )
> > +
> > +(define_expand "movmisalign<mode>"
> > +  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
> > +       (unspec:VDQX [(match_operand:VDQX 1 
> > "neon_perm_struct_or_reg_operand")]
> > +        UNSPEC_MISALIGNED_ACCESS))]
> > +  "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access"
> > +{
> > +  rtx adjust_mem;
> > +  /* This pattern is not permitted to fail during expansion: if both 
> > arguments
> > +     are non-registers (e.g. memory := constant, which can be created by 
> > the
> > +     auto-vectorizer), force operand 1 into a register.  */
> > +  if (!s_register_operand (operands[0], <MODE>mode)
> > +      && !s_register_operand (operands[1], <MODE>mode))
> > +    operands[1] = force_reg (<MODE>mode, operands[1]);
> > +
> > +  if (s_register_operand (operands[0], <MODE>mode))
> > +    adjust_mem = operands[1];
> > +  else
> > +    adjust_mem = operands[0];
> > +
> > +  /* Legitimize address.  */
> > +  if (!neon_vector_mem_operand (adjust_mem, 2, true))
> > +    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
> > +})
> > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c 
> > b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> > index afd0d60..7945a06 100644
> > --- a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> > @@ -47,3 +47,6 @@ FUNC(f, float, 16, 8, -, vneg)
> >     functions above.  */
> >  /* { dg-final { scan-assembler-times {vneg.s[0-9]+  q[0-9]+, q[0-9]+} 6 } 
> > } */
> >  /* { dg-final { scan-assembler-times {vneg.f[0-9]+  q[0-9]+, q[0-9]+} 2 } 
> > } */
> > +/* { dg-final { scan-assembler-times {vldr[bhw].[0-9]+\tq[0-9]+} 8 } } */
> > +/* { dg-final { scan-assembler-times {vstr[bhw].[0-9]+\tq[0-9]+} 8 } } */
> > +/* { dg-final { scan-assembler-not {orr\tr[0-9]+, r[0-9]+, r[0-9]+} } } */
> > --
> > 2.7.4
> >

Reply via email to