ping^2?
On Wed, 30 Dec 2020 at 11:33, Christophe Lyon <christophe.l...@linaro.org> wrote: > > ping? > > On Thu, 17 Dec 2020 at 18:48, Christophe Lyon > <christophe.l...@linaro.org> wrote: > > > > This patch adds new movmisalign<mode>_mve_load and store patterns for > > MVE to help vectorization. They are very similar to their Neon > > counterparts, but use different iterators and instructions. > > > > Indeed MVE supports less vectors modes than Neon, so we use > > the MVE_VLD_ST iterator where Neon uses VQX. > > > > Since the supported modes are different from the ones valid for > > arithmetic operators, we introduce two new sets of macros: > > > > ARM_HAVE_NEON_<MODE>_LDST > > true if Neon has vector load/store instructions for <MODE> > > > > ARM_HAVE_<MODE>_LDST > > true if any vector extension has vector load/store instructions for <MODE> > > > > We move the movmisalign<mode> expander from neon.md to vec-commond.md, and > > replace the TARGET_NEON enabler with ARM_HAVE_<MODE>_LDST. > > > > The patch also updates the mve-vneg.c test to scan for the better code > > generation when loading and storing the vectors involved: it checks > > that no 'orr' instruction is generated to cope with misalignment at > > runtime. > > This test was chosen among the other mve tests, but any other should > > be OK. Using a plain vector copy loop (dest[i] = a[i]) is not a good > > test because the compiler chooses to use memcpy. > > > > For instance we now generate: > > test_vneg_s32x4: > > vldrw.32 q3, [r1] > > vneg.s32 q3, q3 > > vstrw.32 q3, [r0] > > bx lr > > > > instead of: > > test_vneg_s32x4: > > orr r3, r1, r0 > > lsls r3, r3, #28 > > bne .L15 > > vldrw.32 q3, [r1] > > vneg.s32 q3, q3 > > vstrw.32 q3, [r0] > > bx lr > > .L15: > > push {r4, r5} > > ldrd r2, r3, [r1, #8] > > ldrd r5, r4, [r1] > > rsbs r2, r2, #0 > > rsbs r5, r5, #0 > > rsbs r4, r4, #0 > > rsbs r3, r3, #0 > > strd r5, r4, [r0] > > pop {r4, r5} > > strd r2, r3, [r0, #8] > > bx lr > > > > 2020-12-15 Christophe Lyon <christophe.l...@linaro.org> > > > > PR target/97875 > > gcc/ > > * config/arm/arm.h (ARM_HAVE_NEON_V8QI_LDST): New macro. > > (ARM_HAVE_NEON_V16QI_LDST, ARM_HAVE_NEON_V4HI_LDST): Likewise. > > (ARM_HAVE_NEON_V8HI_LDST, ARM_HAVE_NEON_V2SI_LDST): Likewise. > > (ARM_HAVE_NEON_V4SI_LDST, ARM_HAVE_NEON_V4HF_LDST): Likewise. > > (ARM_HAVE_NEON_V8HF_LDST, ARM_HAVE_NEON_V4BF_LDST): Likewise. > > (ARM_HAVE_NEON_V8BF_LDST, ARM_HAVE_NEON_V2SF_LDST): Likewise. > > (ARM_HAVE_NEON_V4SF_LDST, ARM_HAVE_NEON_DI_LDST): Likewise. > > (ARM_HAVE_NEON_V2DI_LDST): Likewise. > > (ARM_HAVE_V8QI_LDST, ARM_HAVE_V16QI_LDST): Likewise. > > (ARM_HAVE_V4HI_LDST, ARM_HAVE_V8HI_LDST): Likewise. > > (ARM_HAVE_V2SI_LDST, ARM_HAVE_V4SI_LDST, ARM_HAVE_V4HF_LDST): > > Likewise. > > (ARM_HAVE_V8HF_LDST, ARM_HAVE_V4BF_LDST, ARM_HAVE_V8BF_LDST): > > Likewise. > > (ARM_HAVE_V2SF_LDST, ARM_HAVE_V4SF_LDST, ARM_HAVE_DI_LDST): > > Likewise. > > (ARM_HAVE_V2DI_LDST): Likewise. > > * config/arm/mve.md (*movmisalign<mode>_mve_store): New pattern. > > (*movmisalign<mode>_mve_load): New pattern. > > * config/arm/neon.md (movmisalign<mode>): Move to ... > > * config/arm/vec-common.md: ... here. > > > > PR target/97875 > > gcc/testsuite/ > > * gcc.target/arm/simd/mve-vneg.c: Update test. > > --- > > gcc/config/arm/arm.h | 40 > > ++++++++++++++++++++++++++++ > > gcc/config/arm/mve.md | 25 +++++++++++++++++ > > gcc/config/arm/neon.md | 25 ----------------- > > gcc/config/arm/vec-common.md | 24 +++++++++++++++++ > > gcc/testsuite/gcc.target/arm/simd/mve-vneg.c | 3 +++ > > 5 files changed, 92 insertions(+), 25 deletions(-) > > > > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h > > index 4a63d33..d44e0c6 100644 > > --- a/gcc/config/arm/arm.h > > +++ b/gcc/config/arm/arm.h > > @@ -1151,6 +1151,46 @@ extern const int arm_arch_cde_coproc_bits[]; > > #define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || > > TARGET_HAVE_MVE_FLOAT) > > #define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || > > TARGET_HAVE_MVE_FLOAT) > > > > +/* The conditions under which vector modes are supported by load/store > > + instructions using Neon. */ > > + > > +#define ARM_HAVE_NEON_V8QI_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V16QI_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V4HI_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V8HI_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V2SI_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V4SI_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V4HF_LDST TARGET_NEON_FP16INST > > +#define ARM_HAVE_NEON_V8HF_LDST TARGET_NEON_FP16INST > > +#define ARM_HAVE_NEON_V4BF_LDST TARGET_BF16_SIMD > > +#define ARM_HAVE_NEON_V8BF_LDST TARGET_BF16_SIMD > > +#define ARM_HAVE_NEON_V2SF_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V4SF_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_DI_LDST TARGET_NEON > > +#define ARM_HAVE_NEON_V2DI_LDST TARGET_NEON > > + > > +/* The conditions under which vector modes are supported by load/store > > + instructions by any vector extension. */ > > + > > +#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || > > TARGET_REALLY_IWMMXT) > > +#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || > > TARGET_REALLY_IWMMXT) > > +#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || > > TARGET_REALLY_IWMMXT) > > + > > +#define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE) > > +#define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE) > > +#define ARM_HAVE_V4SI_LDST (ARM_HAVE_NEON_V4SI_LDST || TARGET_HAVE_MVE) > > +#define ARM_HAVE_DI_LDST ARM_HAVE_NEON_DI_LDST > > +#define ARM_HAVE_V2DI_LDST ARM_HAVE_NEON_V2DI_LDST > > + > > +#define ARM_HAVE_V4HF_LDST ARM_HAVE_NEON_V4HF_LDST > > +#define ARM_HAVE_V2SF_LDST ARM_HAVE_NEON_V2SF_LDST > > + > > +#define ARM_HAVE_V4BF_LDST ARM_HAVE_NEON_V4BF_LDST > > +#define ARM_HAVE_V8BF_LDST ARM_HAVE_NEON_V8BF_LDST > > + > > +#define ARM_HAVE_V8HF_LDST (ARM_HAVE_NEON_V8HF_LDST || > > TARGET_HAVE_MVE_FLOAT) > > +#define ARM_HAVE_V4SF_LDST (ARM_HAVE_NEON_V4SF_LDST || > > TARGET_HAVE_MVE_FLOAT) > > + > > /* The register numbers in sequence, for passing to arm_gen_load_multiple. > > */ > > extern int arm_regs_in_sequence[]; > > > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > > index b4c5a1e2..673a83c 100644 > > --- a/gcc/config/arm/mve.md > > +++ b/gcc/config/arm/mve.md > > @@ -10937,3 +10937,28 @@ (define_insn "arm_vcx3q<a>_p_v16qi" > > [(set_attr "type" "coproc") > > (set_attr "length" "8")] > > ) > > + > > +(define_insn "*movmisalign<mode>_mve_store" > > + [(set (match_operand:MVE_VLD_ST 0 "neon_permissive_struct_operand" > > "=Um") > > + (unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 > > "s_register_operand" " w")] > > + UNSPEC_MISALIGNED_ACCESS))] > > + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) > > + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)) > > + && !BYTES_BIG_ENDIAN && unaligned_access" > > + "vstr<V_sz_elem1>.<V_sz_elem>\t%q1, %E0" > > + [(set_attr "type" "mve_store") > > + (set_attr "length" "4")] > > +) > > + > > + > > +(define_insn "*movmisalign<mode>_mve_load" > > + [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" > > "=w") > > + (unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 > > "neon_permissive_struct_operand" " Um")] > > + UNSPEC_MISALIGNED_ACCESS))] > > + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) > > + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)) > > + && !BYTES_BIG_ENDIAN && unaligned_access" > > + "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1" > > + [(set_attr "type" "mve_load") > > + (set_attr "length" "4")] > > +) > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > > index d2e92ba..50220be 100644 > > --- a/gcc/config/arm/neon.md > > +++ b/gcc/config/arm/neon.md > > @@ -280,31 +280,6 @@ (define_split > > neon_disambiguate_copy (operands, dest, src, 4); > > }) > > > > -(define_expand "movmisalign<mode>" > > - [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") > > - (unspec:VDQX [(match_operand:VDQX 1 > > "neon_perm_struct_or_reg_operand")] > > - UNSPEC_MISALIGNED_ACCESS))] > > - "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" > > -{ > > - rtx adjust_mem; > > - /* This pattern is not permitted to fail during expansion: if both > > arguments > > - are non-registers (e.g. memory := constant, which can be created by > > the > > - auto-vectorizer), force operand 1 into a register. */ > > - if (!s_register_operand (operands[0], <MODE>mode) > > - && !s_register_operand (operands[1], <MODE>mode)) > > - operands[1] = force_reg (<MODE>mode, operands[1]); > > - > > - if (s_register_operand (operands[0], <MODE>mode)) > > - adjust_mem = operands[1]; > > - else > > - adjust_mem = operands[0]; > > - > > - /* Legitimize address. */ > > - if (!neon_vector_mem_operand (adjust_mem, 2, true)) > > - XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); > > - > > -}) > > - > > (define_insn "*movmisalign<mode>_neon_store" > > [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") > > (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] > > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > > index 2d0932b..f6a79e2 100644 > > --- a/gcc/config/arm/vec-common.md > > +++ b/gcc/config/arm/vec-common.md > > @@ -205,3 +205,27 @@ (define_expand "neg<mode>2" > > (neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))] > > "ARM_HAVE_<MODE>_ARITH" > > ) > > + > > +(define_expand "movmisalign<mode>" > > + [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") > > + (unspec:VDQX [(match_operand:VDQX 1 > > "neon_perm_struct_or_reg_operand")] > > + UNSPEC_MISALIGNED_ACCESS))] > > + "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access" > > +{ > > + rtx adjust_mem; > > + /* This pattern is not permitted to fail during expansion: if both > > arguments > > + are non-registers (e.g. memory := constant, which can be created by > > the > > + auto-vectorizer), force operand 1 into a register. */ > > + if (!s_register_operand (operands[0], <MODE>mode) > > + && !s_register_operand (operands[1], <MODE>mode)) > > + operands[1] = force_reg (<MODE>mode, operands[1]); > > + > > + if (s_register_operand (operands[0], <MODE>mode)) > > + adjust_mem = operands[1]; > > + else > > + adjust_mem = operands[0]; > > + > > + /* Legitimize address. */ > > + if (!neon_vector_mem_operand (adjust_mem, 2, true)) > > + XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); > > +}) > > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c > > b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c > > index afd0d60..7945a06 100644 > > --- a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c > > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c > > @@ -47,3 +47,6 @@ FUNC(f, float, 16, 8, -, vneg) > > functions above. */ > > /* { dg-final { scan-assembler-times {vneg.s[0-9]+ q[0-9]+, q[0-9]+} 6 } > > } */ > > /* { dg-final { scan-assembler-times {vneg.f[0-9]+ q[0-9]+, q[0-9]+} 2 } > > } */ > > +/* { dg-final { scan-assembler-times {vldr[bhw].[0-9]+\tq[0-9]+} 8 } } */ > > +/* { dg-final { scan-assembler-times {vstr[bhw].[0-9]+\tq[0-9]+} 8 } } */ > > +/* { dg-final { scan-assembler-not {orr\tr[0-9]+, r[0-9]+, r[0-9]+} } } */ > > -- > > 2.7.4 > >