Andrew Pinski <quic_apin...@quicinc.com> writes: > The backend currently defines a whole vector shift left for 64bit vectors, > adding the > shift right can also improve code for some PERMs too. So this adds that > pattern.
Is this reversed? It looks like we have the shift right and the patch is adding the shift left (at least in GCC internal and little-endian terms). But on many Arm cores, EXT has a higher throughput than SHL, so I don't think we should do this unconditionally. Thanks, Richard > > I added a testcase for the shift left also. I also fixed the instruction > template > there which was using a space instead of a tab after the instruction. > > Built and tested on aarch64-linux-gnu. > > PR target/113872 > > gcc/ChangeLog: > > * config/aarch64/aarch64-simd.md (vec_shr_<mode><vczle><vczbe>): Use > tab instead of space after > the instruction in the template. > (vec_shl_<mode><vczle><vczbe>): New pattern > * config/aarch64/iterators.md (unspec): Add UNSPEC_VEC_SHL > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/perm_zero-1.c: New test. > * gcc.target/aarch64/perm_zero-2.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/config/aarch64/aarch64-simd.md | 18 ++++++++++++++++-- > gcc/config/aarch64/iterators.md | 1 + > gcc/testsuite/gcc.target/aarch64/perm_zero-1.c | 15 +++++++++++++++ > gcc/testsuite/gcc.target/aarch64/perm_zero-2.c | 15 +++++++++++++++ > 4 files changed, 47 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/perm_zero-1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/perm_zero-2.c > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index f8bb973a278..0d2f1ea3902 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -1592,9 +1592,23 @@ (define_insn "vec_shr_<mode><vczle><vczbe>" > "TARGET_SIMD" > { > if (BYTES_BIG_ENDIAN) > - return "shl %d0, %d1, %2"; > + return "shl\t%d0, %d1, %2"; > else > - return "ushr %d0, %d1, %2"; > + return "ushr\t%d0, %d1, %2"; > + } > + [(set_attr "type" "neon_shift_imm")] > +) > +(define_insn "vec_shl_<mode><vczle><vczbe>" > + [(set (match_operand:VD 0 "register_operand" "=w") > + (unspec:VD [(match_operand:VD 1 "register_operand" "w") > + (match_operand:SI 2 "immediate_operand" "i")] > + UNSPEC_VEC_SHL))] > + "TARGET_SIMD" > + { > + if (BYTES_BIG_ENDIAN) > + return "ushr\t%d0, %d1, %2"; > + else > + return "shl\t%d0, %d1, %2"; > } > [(set_attr "type" "neon_shift_imm")] > ) > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 99cde46f1ba..3aebe9cf18a 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -758,6 +758,7 @@ (define_c_enum "unspec" > UNSPEC_PMULL ; Used in aarch64-simd.md. > UNSPEC_PMULL2 ; Used in aarch64-simd.md. > UNSPEC_REV_REGLIST ; Used in aarch64-simd.md. > + UNSPEC_VEC_SHL ; Used in aarch64-simd.md. > UNSPEC_VEC_SHR ; Used in aarch64-simd.md. > UNSPEC_SQRDMLAH ; Used in aarch64-simd.md. > UNSPEC_SQRDMLSH ; Used in aarch64-simd.md. > diff --git a/gcc/testsuite/gcc.target/aarch64/perm_zero-1.c > b/gcc/testsuite/gcc.target/aarch64/perm_zero-1.c > new file mode 100644 > index 00000000000..3c8f0591a2f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/perm_zero-1.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* PR target/113872 */ > +/* For 64bit vectors, PERM with a constant 0 should produce a shift instead > of the ext instruction. */ > + > +#define vect64 __attribute__((vector_size(8))) > + > +void f(vect64 unsigned short *a) > +{ > + *a = __builtin_shufflevector((vect64 unsigned short){0},*a, 3,4,5,6); > +} > + > +/* { dg-final { scan-assembler-times "ushr\t" 1 { target aarch64_big_endian > } } } */ > +/* { dg-final { scan-assembler-times "shl\t" 1 { target > aarch64_little_endian } } } */ > +/* { dg-final { scan-assembler-not "ext\t" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/perm_zero-2.c > b/gcc/testsuite/gcc.target/aarch64/perm_zero-2.c > new file mode 100644 > index 00000000000..970e428f832 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/perm_zero-2.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* PR target/113872 */ > +/* For 64bit vectors, PERM with a constant 0 should produce a shift instead > of the ext instruction. */ > + > +#define vect64 __attribute__((vector_size(8))) > + > +void f(vect64 unsigned short *a) > +{ > + *a = __builtin_shufflevector(*a, (vect64 unsigned short){0},3,4,5,6); > +} > + > +/* { dg-final { scan-assembler-times "shl\t" 1 { target aarch64_big_endian } > } } */ > +/* { dg-final { scan-assembler-times "ushr\t" 1 { target > aarch64_little_endian } } } */ > +/* { dg-final { scan-assembler-not "ext\t" } } */