Hi,
  Gently ping this:
  https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html

Gui Haochen
Thanks

在 2023/2/8 13:08, HAO CHEN GUI 写道:
> Hi,
>   The logical operations for TImode is split after reload pass right now. Some
> potential optimizations miss as the split is too late. This patch removes
> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical
> operations can be split at expand pass. The new test case illustrates the
> optimization.
> 
>   Two test cases of pr92398 are merged into one as all sub-targets generates
> the same sequence of instructions with the patch.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> 
> Thanks
> Gui Haochen
> 
> 
> ChangeLog
> 2023-02-08  Haochen Gui <guih...@linux.ibm.com>
> 
> gcc/
>       PR target/100694
>       * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit
>       vector types.
>       (and<mode>3): Replace BOOL_128 with BOOL_128_V.
>       (ior<mode>3): Likewise.
>       (xor<mode>3): Likewise.
>       (one_cmpl<mode>2 expander): New expander with BOOL_128_V.
>       (one_cmpl<mode>2 insn_and_split): Rename to ...
>       (*one_cmpl<mode>2): ... this.
> 
> gcc/testsuite/
>       PR target/100694
>       * gcc.target/powerpc/pr100694.c: New.
>       * gcc.target/powerpc/pr92398.c: New.
>       * gcc.target/powerpc/pr92398.h: Remove.
>       * gcc.target/powerpc/pr92398.p9-.c: Remove.
>       * gcc.target/powerpc/pr92398.p9+.c: Remove.
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 4bd1dfd3da9..455b7329643 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128           [TI
>                                        (V2DF  "TARGET_ALTIVEC")
>                                        (V1TI  "TARGET_ALTIVEC")])
> 
> +;; Mode iterator for logical operations on 128-bit vector types
> +(define_mode_iterator BOOL_128_V     [(V16QI "TARGET_ALTIVEC")
> +                                      (V8HI  "TARGET_ALTIVEC")
> +                                      (V4SI  "TARGET_ALTIVEC")
> +                                      (V4SF  "TARGET_ALTIVEC")
> +                                      (V2DI  "TARGET_ALTIVEC")
> +                                      (V2DF  "TARGET_ALTIVEC")
> +                                      (V1TI  "TARGET_ALTIVEC")])
> +
>  ;; For the GPRs we use 3 constraints for register outputs, two that are the
>  ;; same as the output register, and a third where the output register is an
>  ;; early clobber, so we don't have to deal with register overlaps.  For the
> @@ -7135,23 +7144,23 @@ (define_expand "subti3"
>  ;; 128-bit logical operations expanders
> 
>  (define_expand "and<mode>3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> -     (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -                   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> +     (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> +                     (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>    ""
>    "")
> 
>  (define_expand "ior<mode>3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> -        (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -                   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> +     (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> +                     (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>    ""
>    "")
> 
>  (define_expand "xor<mode>3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> -        (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -                   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> +     (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> +                     (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>    ""
>    "")
> 
> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2"
>        (const_string "16")))])
> 
>  ;; 128-bit one's complement
> -(define_insn_and_split "one_cmpl<mode>2"
> +(define_expand "one_cmpl<mode>2"
> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
> +     (not:BOOL_128_V
> +       (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))]
> +  ""
> +  "")
> +
> +(define_insn_and_split "*one_cmpl<mode>2"
>    [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
>       (not:BOOL_128
>         (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))]
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c 
> b/gcc/testsuite/gcc.target/powerpc/pr100694.c
> new file mode 100644
> index 00000000000..96a895d6c44
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */
> +
> +/* It just needs two std and one blr.  */
> +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long 
> lo)
> +{
> +   unsigned __int128 i = hi;
> +   i <<= 64;
> +   i |= lo;
> +   *res = i;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c 
> b/gcc/testsuite/gcc.target/powerpc/pr92398.c
> new file mode 100644
> index 00000000000..7d6201cc5bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */
> +
> +/* All platforms should generate the same instructions: not;not;std;std.  */
> +void bar (__int128_t *dst, __int128_t src)
> +{
> +  *dst =  ~src;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h 
> b/gcc/testsuite/gcc.target/powerpc/pr92398.h
> deleted file mode 100644
> index 5a4a8bcab80..00000000000
> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h
> +++ /dev/null
> @@ -1,17 +0,0 @@
> -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c.
> -   The two files have the tests for the number of instructions generated for
> -   P9- versus P9+.
> -
> -   store generates difference instructions as below:
> -   P9+: mtvsrdd;xxlnot;stxv.
> -   P8/P7/P6 LE: not;not;std;std.
> -   P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x.
> -   P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x.
> -   P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected.  */
> -
> -void
> -bar (__int128_t *dst, __int128_t src)
> -{
> -  *dst =  ~src;
> -}
> -
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c 
> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
> deleted file mode 100644
> index 72dd1d9a274..00000000000
> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
> +++ /dev/null
> @@ -1,12 +0,0 @@
> -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */
> -/* { dg-require-effective-target powerpc_vsx_ok } */
> -/* { dg-options "-O2 -mvsx" } */
> -
> -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */
> -/* { dg-final { scan-assembler-not {\mld\M} } } */
> -/* { dg-final { scan-assembler-not {\mnot\M} } } */
> -
> -/* Source code for the test in pr92398.h */
> -#include "pr92398.h"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c 
> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
> deleted file mode 100644
> index bd7fa98af51..00000000000
> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
> +++ /dev/null
> @@ -1,10 +0,0 @@
> -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */
> -/* { dg-require-effective-target powerpc_vsx_ok } */
> -/* { dg-options "-O2 -mvsx" } */
> -
> -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */
> -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! 
> has_arch_pwr9} && has_arch_pwr8 } && be } } } } */
> -
> -/* Source code for the test in pr92398.h */
> -#include "pr92398.h"
> -

Reply via email to