Hi, Gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html
Thanks Gui Haochen 在 2023/2/20 10:10, HAO CHEN GUI 写道: > Hi, > Gently ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html > > Gui Haochen > Thanks > > 在 2023/2/8 13:08, HAO CHEN GUI 写道: >> Hi, >> The logical operations for TImode is split after reload pass right now. >> Some >> potential optimizations miss as the split is too late. This patch removes >> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical >> operations can be split at expand pass. The new test case illustrates the >> optimization. >> >> Two test cases of pr92398 are merged into one as all sub-targets generates >> the same sequence of instructions with the patch. >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> >> Thanks >> Gui Haochen >> >> >> ChangeLog >> 2023-02-08 Haochen Gui <guih...@linux.ibm.com> >> >> gcc/ >> PR target/100694 >> * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit >> vector types. >> (and<mode>3): Replace BOOL_128 with BOOL_128_V. >> (ior<mode>3): Likewise. >> (xor<mode>3): Likewise. >> (one_cmpl<mode>2 expander): New expander with BOOL_128_V. >> (one_cmpl<mode>2 insn_and_split): Rename to ... >> (*one_cmpl<mode>2): ... this. >> >> gcc/testsuite/ >> PR target/100694 >> * gcc.target/powerpc/pr100694.c: New. >> * gcc.target/powerpc/pr92398.c: New. >> * gcc.target/powerpc/pr92398.h: Remove. >> * gcc.target/powerpc/pr92398.p9-.c: Remove. >> * gcc.target/powerpc/pr92398.p9+.c: Remove. >> >> >> patch.diff >> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >> index 4bd1dfd3da9..455b7329643 100644 >> --- a/gcc/config/rs6000/rs6000.md >> +++ b/gcc/config/rs6000/rs6000.md >> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI >> (V2DF "TARGET_ALTIVEC") >> (V1TI "TARGET_ALTIVEC")]) >> >> +;; Mode iterator for logical operations on 128-bit vector types >> +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") >> + (V8HI "TARGET_ALTIVEC") >> + (V4SI "TARGET_ALTIVEC") >> + (V4SF "TARGET_ALTIVEC") >> + (V2DI "TARGET_ALTIVEC") >> + (V2DF "TARGET_ALTIVEC") >> + (V1TI "TARGET_ALTIVEC")]) >> + >> ;; For the GPRs we use 3 constraints for register outputs, two that are the >> ;; same as the output register, and a third where the output register is an >> ;; early clobber, so we don't have to deal with register overlaps. For the >> @@ -7135,23 +7144,23 @@ (define_expand "subti3" >> ;; 128-bit logical operations expanders >> >> (define_expand "and<mode>3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >> "" >> "") >> >> (define_expand "ior<mode>3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> - (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >> "" >> "") >> >> (define_expand "xor<mode>3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> - (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >> "" >> "") >> >> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2" >> (const_string "16")))]) >> >> ;; 128-bit one's complement >> -(define_insn_and_split "one_cmpl<mode>2" >> +(define_expand "one_cmpl<mode>2" >> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >> + (not:BOOL_128_V >> + (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >> + "" >> + "") >> + >> +(define_insn_and_split "*one_cmpl<mode>2" >> [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >> (not:BOOL_128 >> (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c >> b/gcc/testsuite/gcc.target/powerpc/pr100694.c >> new file mode 100644 >> index 00000000000..96a895d6c44 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c >> @@ -0,0 +1,14 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target int128 } */ >> +/* { dg-options "-O2" } */ >> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ >> + >> +/* It just needs two std and one blr. */ >> +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long >> lo) >> +{ >> + unsigned __int128 i = hi; >> + i <<= 64; >> + i |= lo; >> + *res = i; >> +} >> + >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c >> b/gcc/testsuite/gcc.target/powerpc/pr92398.c >> new file mode 100644 >> index 00000000000..7d6201cc5bb >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c >> @@ -0,0 +1,12 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target int128 } */ >> +/* { dg-options "-O2" } */ >> +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ >> + >> +/* All platforms should generate the same instructions: not;not;std;std. */ >> +void bar (__int128_t *dst, __int128_t src) >> +{ >> + *dst = ~src; >> +} >> + >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h >> b/gcc/testsuite/gcc.target/powerpc/pr92398.h >> deleted file mode 100644 >> index 5a4a8bcab80..00000000000 >> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h >> +++ /dev/null >> @@ -1,17 +0,0 @@ >> -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c. >> - The two files have the tests for the number of instructions generated for >> - P9- versus P9+. >> - >> - store generates difference instructions as below: >> - P9+: mtvsrdd;xxlnot;stxv. >> - P8/P7/P6 LE: not;not;std;std. >> - P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x. >> - P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x. >> - P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected. */ >> - >> -void >> -bar (__int128_t *dst, __int128_t src) >> -{ >> - *dst = ~src; >> -} >> - >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >> deleted file mode 100644 >> index 72dd1d9a274..00000000000 >> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >> +++ /dev/null >> @@ -1,12 +0,0 @@ >> -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */ >> -/* { dg-require-effective-target powerpc_vsx_ok } */ >> -/* { dg-options "-O2 -mvsx" } */ >> - >> -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ >> -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */ >> -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ >> -/* { dg-final { scan-assembler-not {\mld\M} } } */ >> -/* { dg-final { scan-assembler-not {\mnot\M} } } */ >> - >> -/* Source code for the test in pr92398.h */ >> -#include "pr92398.h" >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >> deleted file mode 100644 >> index bd7fa98af51..00000000000 >> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >> +++ /dev/null >> @@ -1,10 +0,0 @@ >> -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */ >> -/* { dg-require-effective-target powerpc_vsx_ok } */ >> -/* { dg-options "-O2 -mvsx" } */ >> - >> -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */ >> -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! >> has_arch_pwr9} && has_arch_pwr8 } && be } } } } */ >> - >> -/* Source code for the test in pr92398.h */ >> -#include "pr92398.h" >> -