Andrew Pinski <quic_apin...@quicinc.com> writes:
> The backend currently defines a whole vector shift left for 64bit vectors, 
> adding the
> shift right can also improve code for some PERMs too. So this adds that 
> pattern.

Is this reversed?  It looks like we have the shift right and the patch is
adding the shift left (at least in GCC internal and little-endian terms).

But on many Arm cores, EXT has a higher throughput than SHL, so I don't think
we should do this unconditionally.

Thanks,
Richard

>
> I added a testcase for the shift left also. I also fixed the instruction 
> template
> there which was using a space instead of a tab after the instruction.
>
> Built and tested on aarch64-linux-gnu.
>
>       PR target/113872
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-simd.md (vec_shr_<mode><vczle><vczbe>): Use 
> tab instead of space after
>       the instruction in the template.
>       (vec_shl_<mode><vczle><vczbe>): New pattern
>       * config/aarch64/iterators.md (unspec): Add UNSPEC_VEC_SHL
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/perm_zero-1.c: New test.
>       * gcc.target/aarch64/perm_zero-2.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  gcc/config/aarch64/aarch64-simd.md             | 18 ++++++++++++++++--
>  gcc/config/aarch64/iterators.md                |  1 +
>  gcc/testsuite/gcc.target/aarch64/perm_zero-1.c | 15 +++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/perm_zero-2.c | 15 +++++++++++++++
>  4 files changed, 47 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/perm_zero-1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/perm_zero-2.c
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index f8bb973a278..0d2f1ea3902 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1592,9 +1592,23 @@ (define_insn "vec_shr_<mode><vczle><vczbe>"
>    "TARGET_SIMD"
>    {
>      if (BYTES_BIG_ENDIAN)
> -      return "shl %d0, %d1, %2";
> +      return "shl\t%d0, %d1, %2";
>      else
> -      return "ushr %d0, %d1, %2";
> +      return "ushr\t%d0, %d1, %2";
> +  }
> +  [(set_attr "type" "neon_shift_imm")]
> +)
> +(define_insn "vec_shl_<mode><vczle><vczbe>"
> +  [(set (match_operand:VD 0 "register_operand" "=w")
> +        (unspec:VD [(match_operand:VD 1 "register_operand" "w")
> +                 (match_operand:SI 2 "immediate_operand" "i")]
> +                UNSPEC_VEC_SHL))]
> +  "TARGET_SIMD"
> +  {
> +    if (BYTES_BIG_ENDIAN)
> +      return "ushr\t%d0, %d1, %2";
> +    else
> +      return "shl\t%d0, %d1, %2";
>    }
>    [(set_attr "type" "neon_shift_imm")]
>  )
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 99cde46f1ba..3aebe9cf18a 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -758,6 +758,7 @@ (define_c_enum "unspec"
>      UNSPEC_PMULL        ; Used in aarch64-simd.md.
>      UNSPEC_PMULL2       ; Used in aarch64-simd.md.
>      UNSPEC_REV_REGLIST  ; Used in aarch64-simd.md.
> +    UNSPEC_VEC_SHL      ; Used in aarch64-simd.md.
>      UNSPEC_VEC_SHR      ; Used in aarch64-simd.md.
>      UNSPEC_SQRDMLAH     ; Used in aarch64-simd.md.
>      UNSPEC_SQRDMLSH     ; Used in aarch64-simd.md.
> diff --git a/gcc/testsuite/gcc.target/aarch64/perm_zero-1.c 
> b/gcc/testsuite/gcc.target/aarch64/perm_zero-1.c
> new file mode 100644
> index 00000000000..3c8f0591a2f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/perm_zero-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2"  } */
> +/* PR target/113872 */
> +/* For 64bit vectors, PERM with a constant 0 should produce a shift instead 
> of the ext instruction. */
> +
> +#define vect64 __attribute__((vector_size(8)))
> +
> +void f(vect64  unsigned short *a)
> +{
> +  *a = __builtin_shufflevector((vect64 unsigned short){0},*a, 3,4,5,6);
> +}
> +
> +/* { dg-final { scan-assembler-times "ushr\t" 1 { target aarch64_big_endian 
> } } } */
> +/* { dg-final { scan-assembler-times "shl\t" 1 { target 
> aarch64_little_endian } } } */
> +/* { dg-final { scan-assembler-not "ext\t"  } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/perm_zero-2.c 
> b/gcc/testsuite/gcc.target/aarch64/perm_zero-2.c
> new file mode 100644
> index 00000000000..970e428f832
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/perm_zero-2.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2"  } */
> +/* PR target/113872 */
> +/* For 64bit vectors, PERM with a constant 0 should produce a shift instead 
> of the ext instruction. */
> +
> +#define vect64 __attribute__((vector_size(8)))
> +
> +void f(vect64  unsigned short *a)
> +{
> +  *a = __builtin_shufflevector(*a, (vect64 unsigned short){0},3,4,5,6);
> +}
> +
> +/* { dg-final { scan-assembler-times "shl\t" 1 { target aarch64_big_endian } 
> } } */
> +/* { dg-final { scan-assembler-times "ushr\t" 1 { target 
> aarch64_little_endian } } } */
> +/* { dg-final { scan-assembler-not "ext\t"  } } */

Reply via email to