Re: [PATCH-1v2, rs6000] Enable SImode in FP registers on P7 [PR88558]
Hi Kewen, 在 2023/9/12 17:33, Kewen.Lin 写道: > Ok, at least regression testing doesn't expose any needs to do disparaging > for this. Could you also test this patch with SPEC2017 for P7 and P8 > separately at options like -O2 or -O3, to see if there is any assembly > change, and if yes filtering out some typical to check it's expected or > not? I think it can help us to better evaluate the impact. Thanks! Just compared the object files of SPEC2017 for P7 and P8. There is no difference between P7s'. For P8, some different object files are found. All differences are the same. Patched object files replace xxlor with fmr. It's expected as the fmr is added to ahead of xxlor in "*movsi_internal1". Thanks Gui Haochen
[PATCH-1v2, rs6000] Enable SImode in FP registers on P7 [PR88558]
Hi, This patch enables SImode in FP registers on P7. Instruction "fctiw" stores its integer output in an FP register. So SImode in FP register needs be enabled on P7 if we want support "fctiw" on P7. The test case is in the second patch which implements 32bit inline lrint. Compared to the last version, the main change it to remove disparaging on the alternatives of "fmr". Test shows it doesn't cause regression. https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628435.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. ChangeLog rs6000: enable SImode in FP register on P7 gcc/ PR target/88558 * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Enable SImode in FP registers on P7. * config/rs6000/rs6000.md (*movsi_internal1): Add fmr for SImode move between FP registers. Set attribute isa of stfiwx to "*" and attribute of stxsiwx to "p7". patch.diff diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 44b448d2ba6..99085c2cdd7 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1903,7 +1903,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) return 1; - if (TARGET_P8_VECTOR && (mode == SImode)) + if (TARGET_POPCNTD && mode == SImode) return 1; if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index cdab49fbb91..edf49bd74e3 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -7566,7 +7566,7 @@ (define_split (define_insn "*movsi_internal1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r, r, + "=r, r, d, r, d, v, m, ?Z, ?Z, r, r, r, r, @@ -7575,7 +7575,7 @@ (define_insn "*movsi_internal1" wa, r, r, *h, *h") (match_operand:SI 1 "input_operand" - "r, U, + "r, U, d, m, ?Z, ?Z, r, d, v, I, L, eI, n, @@ -7588,6 +7588,7 @@ (define_insn "*movsi_internal1" "@ mr %0,%1 la %0,%a1 + fmr %0,%1 lwz%U1%X1 %0,%1 lfiwzx %0,%y1 lxsiwzx %x0,%y1 @@ -7611,7 +7612,7 @@ (define_insn "*movsi_internal1" mt%0 %1 nop" [(set_attr "type" - "*, *, + "*, *, fpsimple, load, fpload, fpload, store, fpstore,fpstore, *, *, *, *, @@ -7620,7 +7621,7 @@ (define_insn "*movsi_internal1" mtvsr, mfvsr, *, *, *") (set_attr "length" - "*, *, + "*, *, *, *, *, *, *, *, *, *, *, *, 8, @@ -7629,9 +7630,9 @@ (define_insn "*movsi_internal1" *, *, *, *, *") (set_attr "isa" - "*, *, - *, p8v,p8v, - *, p8v,p8v, + "*, *, *, + *, p7, p8v, + *, *, p8v, *, *, p10,*, p8v,p9v,p9v,p8v, p9v,p8v,p9v,
[PATCH-2v2, rs6000] Implement 32bit inline lrint [PR88558]
Hi, This patch implements 32bit inline lrint by "fctiw". It depends on the patch1 to do SImode move from FP registers on P7. Compared to last version, the main change is to add tests for "lrintf" and adjust the count of corresponding instructions. https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628436.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: support 32bit inline lrint gcc/ PR target/88558 * config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND from insn condition. (lrintsi2): New insn pattern for 32bit lrint. gcc/testsuite/ PR target/106769 * gcc.target/powerpc/pr88558.h: New. * gcc.target/powerpc/pr88558-p7.c: New. * gcc.target/powerpc/pr88558-p8.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index edf49bd74e3..a41898e0e08 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6655,10 +6655,18 @@ (define_insn "lrintdi2" [(set (match_operand:DI 0 "gpc_reg_operand" "=d") (unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")] UNSPEC_FCTID))] - "TARGET_HARD_FLOAT && TARGET_FPRND" + "TARGET_HARD_FLOAT" "fctid %0,%1" [(set_attr "type" "fp")]) +(define_insn "lrintsi2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=d") + (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")] + UNSPEC_FCTIW))] + "TARGET_HARD_FLOAT && TARGET_POPCNTD" + "fctiw %0,%1" + [(set_attr "type" "fp")]) + (define_insn "btrunc2" [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa") (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")] diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c new file mode 100644 index 000..f302491c4d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power7" } */ + +/* -fno-math-errno is required to make {i,l,ll}rint inlined */ + +#include "pr88558.h" + +/* { dg-final { scan-assembler-times {\mfctid\M} 3 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 3 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mstfiwx\M} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mstfiwx\M} 3 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c new file mode 100644 index 000..33398aa74c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power8" } */ + +/* -fno-math-errno is required to make {i,l,ll}rint inlined */ + +#include "pr88558.h" + +/* { dg-final { scan-assembler-times {\mfctid\M} 3 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 3 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 3 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558.h b/gcc/testsuite/gcc.target/powerpc/pr88558.h new file mode 100644 index 000..698640c0ef7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr88558.h @@ -0,0 +1,19 @@ +long int test1 (double a) +{ + return __builtin_lrint (a); +} + +long long test2 (double a) +{ + return __builtin_llrint (a); +} + +int test3 (double a) +{ + return __builtin_irint (a); +} + +long int test4 (float a) +{ + return __builtin_lrintf (a); +}
Re: [PATCH, rs6000] Call vector load/store with length expand only on 64-bit Power10 [PR96762]
Kewen, I refined the patch according to your comments and it passed bootstrap and regression test. I committed it as https://gcc.gnu.org/g:946b8967b905257ac9f140225db744c9a6ab91be Thanks Gui Haochen 在 2023/8/29 16:55, Kewen.Lin 写道: > Hi Haochen, > > on 2023/8/29 10:50, HAO CHEN GUI wrote: >> Hi, >> This patch adds "TARGET_64BIT" check when calling vector load/store >> with length expand in expand_block_move. It matches the expand condition >> of "lxvl" and "stxvl" defined in vsx.md. >> >> This patch fixes the ICE occurred with the test case on 32-bit Power10. >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> >> Thanks >> Gui Haochen >> >> >> ChangeLog >> rs6000: call vector load/store with length expand only on 64-bit Power10 >> >> gcc/ >> PR target/96762 >> * config/rs6000/rs6000-string.cc (expand_block_move): Call vector >> load/store with length expand only on 64-bit Power10. >> >> gcc/testsuite/ >> PR target/96762 >> * gcc.target/powerpc/pr96762.c: New. >> >> >> patch.diff >> diff --git a/gcc/config/rs6000/rs6000-string.cc >> b/gcc/config/rs6000/rs6000-string.cc >> index cd8ee8c..d1b48c2 100644 >> --- a/gcc/config/rs6000/rs6000-string.cc >> +++ b/gcc/config/rs6000/rs6000-string.cc >> @@ -2811,8 +2811,9 @@ expand_block_move (rtx operands[], bool might_overlap) >>gen_func.mov = gen_vsx_movv2di_64bit; >> } >>else if (TARGET_BLOCK_OPS_UNALIGNED_VSX >> - && TARGET_POWER10 && bytes < 16 >> - && orig_bytes > 16 >> + /* Only use lxvl/stxvl on 64bit POWER10. */ >> + && TARGET_POWER10 && TARGET_64BIT >> + && bytes < 16 && orig_bytes > 16 >> && !(bytes == 1 || bytes == 2 >> || bytes == 4 || bytes == 8) >> && (align >= 128 || !STRICT_ALIGNMENT)) > > Nit: Since you touched this part of code, could you format it better as well, > like: > > else if (TARGET_BLOCK_OPS_UNALIGNED_VSX > /* Only use lxvl/stxvl on 64bit POWER10. */ > && TARGET_POWER10 > && TARGET_64BIT > && bytes < 16 > && orig_bytes > 16 > && !(bytes == 1 > || bytes == 2 > || bytes == 4 > || bytes == 8) > && (align >= 128 > || !STRICT_ALIGNMENT)) > > >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr96762.c >> b/gcc/testsuite/gcc.target/powerpc/pr96762.c >> new file mode 100644 >> index 000..1145dd1 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr96762.c >> @@ -0,0 +1,11 @@ >> +/* { dg-do compile { target ilp32 } } */ > > Nit: we can compile this on lp64, so you can remove the ilp32 restriction, > ... > >> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ >> + > > ... but add one comment line to note the initial purpose, like: > > /* Verify there is no ICE on ilp32 env. */ > > or similar. > > Okay for trunk with these nits fixed, thanks! > > BR, > Kewen > >> +extern void foo (char *); >> + >> +void >> +bar (void) >> +{ >> + char zj[] = ""; >> + foo (zj); >> +}
[PATCH, rs6000] Call vector load/store with length expand only on 64-bit Power10 [PR96762]
Hi, This patch adds "TARGET_64BIT" check when calling vector load/store with length expand in expand_block_move. It matches the expand condition of "lxvl" and "stxvl" defined in vsx.md. This patch fixes the ICE occurred with the test case on 32-bit Power10. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: call vector load/store with length expand only on 64-bit Power10 gcc/ PR target/96762 * config/rs6000/rs6000-string.cc (expand_block_move): Call vector load/store with length expand only on 64-bit Power10. gcc/testsuite/ PR target/96762 * gcc.target/powerpc/pr96762.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index cd8ee8c..d1b48c2 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -2811,8 +2811,9 @@ expand_block_move (rtx operands[], bool might_overlap) gen_func.mov = gen_vsx_movv2di_64bit; } else if (TARGET_BLOCK_OPS_UNALIGNED_VSX - && TARGET_POWER10 && bytes < 16 - && orig_bytes > 16 + /* Only use lxvl/stxvl on 64bit POWER10. */ + && TARGET_POWER10 && TARGET_64BIT + && bytes < 16 && orig_bytes > 16 && !(bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8) && (align >= 128 || !STRICT_ALIGNMENT)) diff --git a/gcc/testsuite/gcc.target/powerpc/pr96762.c b/gcc/testsuite/gcc.target/powerpc/pr96762.c new file mode 100644 index 000..1145dd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr96762.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target ilp32 } } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ + +extern void foo (char *); + +void +bar (void) +{ + char zj[] = ""; + foo (zj); +}
[PATCH-2, rs6000] Implement 32bit inline lrint [PR88558]
Hi, This patch implements 32bit inline lrint by "fctiw". It depends on the patch1 to do SImode move from FP register on P7. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: support 32bit inline lrint gcc/ PR target/88558 * config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND from insn condition. (lrintsi2): New insn pattern for 32bit lrint. gcc/testsuite/ PR target/106769 * gcc.target/powerpc/pr88558.h: New. * gcc.target/powerpc/pr88558-p7.c: New. * gcc.target/powerpc/pr88558-p8v.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index fd263e8dfe3..b36304de8c6 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6655,10 +6655,18 @@ (define_insn "lrintdi2" [(set (match_operand:DI 0 "gpc_reg_operand" "=d") (unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")] UNSPEC_FCTID))] - "TARGET_HARD_FLOAT && TARGET_FPRND" + "TARGET_HARD_FLOAT" "fctid %0,%1" [(set_attr "type" "fp")]) +(define_insn "lrintsi2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=d") + (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")] + UNSPEC_FCTIW))] + "TARGET_HARD_FLOAT && TARGET_POPCNTD" + "fctiw %0,%1" + [(set_attr "type" "fp")]) + (define_insn "btrunc2" [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa") (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")] diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c new file mode 100644 index 000..6437c55fa61 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power7" } */ + +#include "pr88558.h" + +/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mstfiwx\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c new file mode 100644 index 000..fd22123ffb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power8" } */ + +long int foo (double a) +{ + return __builtin_lrint (a); +} + +long long bar (double a) +{ + return __builtin_llrint (a); +} + +int baz (double a) +{ + return __builtin_irint (a); +} + +/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558.h b/gcc/testsuite/gcc.target/powerpc/pr88558.h new file mode 100644 index 000..0cc0c68dd4e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr88558.h @@ -0,0 +1,14 @@ +long int foo (double a) +{ + return __builtin_lrint (a); +} + +long long bar (double a) +{ + return __builtin_llrint (a); +} + +int baz (double a) +{ + return __builtin_irint (a); +}
[PATCH-1, rs6000] Enable SImode in FP register on P7 [PR88558]
Hi, This patch enables SImode in FP register on P7. Instruction "fctiw" stores its integer output in an FP register. So SImode in FP register needs be enabled on P7 if we want support "fctiw" on P7. The test case is in the second patch which implements 32bit inline lrint. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: enable SImode in FP register on P7 gcc/ PR target/88558 * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Enable Simode in FP register for P7. * config/rs6000/rs6000.md (*movsi_internal1): Add fmr for SImode move between FP register. Set attribute isa of stfiwx to "*" and attribute of stxsiwx to "p7". patch.diff diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 44b448d2ba6..99085c2cdd7 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1903,7 +1903,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) return 1; - if (TARGET_P8_VECTOR && (mode == SImode)) + if (TARGET_POPCNTD && mode == SImode) return 1; if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index cdab49fbb91..ac5d29a2cf8 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -7566,7 +7566,7 @@ (define_split (define_insn "*movsi_internal1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r, r, + "=r, r, ^d, r, d, v, m, ?Z, ?Z, r, r, r, r, @@ -7575,7 +7575,7 @@ (define_insn "*movsi_internal1" wa, r, r, *h, *h") (match_operand:SI 1 "input_operand" - "r, U, + "r, U, ^d, m, ?Z, ?Z, r, d, v, I, L, eI, n, @@ -7588,6 +7588,7 @@ (define_insn "*movsi_internal1" "@ mr %0,%1 la %0,%a1 + fmr %0,%1 lwz%U1%X1 %0,%1 lfiwzx %0,%y1 lxsiwzx %x0,%y1 @@ -7611,7 +7612,7 @@ (define_insn "*movsi_internal1" mt%0 %1 nop" [(set_attr "type" - "*, *, + "*, *, fpsimple, load, fpload, fpload, store, fpstore,fpstore, *, *, *, *, @@ -7620,7 +7621,7 @@ (define_insn "*movsi_internal1" mtvsr, mfvsr, *, *, *") (set_attr "length" - "*, *, + "*, *, *, *, *, *, *, *, *, *, *, *, 8, @@ -7629,9 +7630,9 @@ (define_insn "*movsi_internal1" *, *, *, *, *") (set_attr "isa" - "*, *, - *, p8v,p8v, - *, p8v,p8v, + "*, *, *, + *, p7, p8v, + *, *, p8v, *, *, p10,*, p8v,p9v,p9v,p8v, p9v,p8v,p9v,
[PATCHv2, rs6000] Extract the element in dword0 by mfvsrd and shift/mask [PR110331]
Hi, This patch implements the vector element extraction by mfvsrd and shift/mask when the element is in dword0 of the vector. Originally, it generates vsplat/mfvsrd on P8 and li/vextract on P9. Since mfvsrd has lower latency than vextract and rldicl has lower latency than vsplat, the new sequence has the benefit. Specially, the shift/mask is no need when the element is the first element of dword0. So it saves another rldicl when it returns a sign extend value. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Extract the element in dword0 by mfvsrd and shift/mask gcc/ PR target/110331 * config/rs6000/rs6000-protos.h (rs6000_vsx_element_in_dword0_p): Declare. (rs6000_vsx_extract_element_from_dword0): Declare. * config/rs6000/rs6000.cc (rs6000_vsx_element_in_dword0_p): New function to judge if an element is in dword0 of a vector. (rs6000_vsx_extract_element_from_dword0): Extract an element from dword0 by mfvsrd and lshiftrt and mask. * config/rs6000/rs6000.md (*rotl3_mask): Rename to... (rotl3_mask): ...this * config/rs6000/vsx.md (split pattern for p9 vector extract): Call rs6000_vsx_extract_element_from_dword0 if the element is in dword0. (*vsx_extract__di_p9): Assert the extracted elements isn't in dword0. (*vsx_extract_v4si_w023): Call rs6000_vsx_extract_element_from_dword0 if the element is in dword0. (*vsx_extract__zero_extend): Zero extend pattern for vector extract on the element in dword0. (*vsx_extract__p8): Call rs6000_vsx_extract_element_from_dword0 when the extracted element is in dword0. Refined the pattern and remove reload_completed from split condition. gcc/testsuite/ PR target/110331 * gcc.target/powerpc/fold-vec-extract-char.p8.c: Set the extracted elements in dword1. * gcc.target/powerpc/fold-vec-extract-char.p9.c: Likewise. * gcc.target/powerpc/fold-vec-extract-int.p8.c: Likewise. * gcc.target/powerpc/fold-vec-extract-int.p9.c: Likewise. * gcc.target/powerpc/fold-vec-extract-short.p8.c: Likewise. * gcc.target/powerpc/fold-vec-extract-short.p9.c: Likewise. * gcc.target/powerpc/p9-extract-1.c: Likewise. * gcc.target/powerpc/pr110331-p8.c: New. * gcc.target/powerpc/pr110331-p9.c: New. * gcc.target/powerpc/pr110331.h: New. patch.diff diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index f70118ea40f..ccef280122b 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -161,6 +161,8 @@ extern bool rs6000_function_pcrel_p (struct function *); extern bool rs6000_pcrel_p (void); extern bool rs6000_fndecl_pcrel_p (const_tree); extern void rs6000_output_addr_vec_elt (FILE *, int); +extern bool rs6000_vsx_element_in_dword0_p (rtx, enum machine_mode); +extern void rs6000_vsx_extract_element_from_dword0 (rtx, rtx, rtx, bool); /* Different PowerPC instruction formats that are used by GCC. There are various other instruction formats used by the PowerPC hardware, but these diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index efe9adce1f8..e15f8bd964c 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -29105,6 +29105,74 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt) return false; } +/* Return true when the element is in dword0 of a vector. Exclude word + element 1 (BE order) as the word can be extracted by mfvsrwz directly. */ + +bool +rs6000_vsx_element_in_dword0_p (rtx op, enum machine_mode mode) +{ + gcc_assert (CONST_INT_P (op)); + gcc_assert (mode == V16QImode || mode == V8HImode || mode == V4SImode); + + int units = GET_MODE_NUNITS (mode); + int elt = INTVAL (op); + elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt; + + if (elt > units / 2 + || (elt == units / 2 && mode != V4SImode)) +return true; + else +return false; +} + +/* Extract element from dword0 by mfvsrd and lshiftrt and mask. Extend_p + indicates if zero extend is needed or not. */ + +void +rs6000_vsx_extract_element_from_dword0 (rtx dest, rtx src, rtx element, + bool extend_p) +{ + enum machine_mode mode = GET_MODE (src); + gcc_assert (rs6000_vsx_element_in_dword0_p (element, mode)); + + enum machine_mode dest_mode = GET_MODE (dest); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int units = GET_MODE_NUNITS (mode); + int elt = INTVAL (element); + elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt; + int value, shift; + unsigned int mask; + + rtx vec_tmp = gen_lowpart (V2DImode, src); + rtx tmp1 = can_create_pseudo_p () +? gen_reg_rtx (DImode) +: simplify_gen_subreg (DImode, dest, dest_mode, 0); + value = BYTES_BIG_ENDIAN ? 0 : 1; + emit_insn (gen_vsx_extract_v2di
Re: [PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]
Jeff, Thanks a lot for your comments. The widen shift mode is on i1/i2 before they're combined with i3 to newpat. The newpat matches rotate/mask pattern. The i1/i2 itself don't match rotate/mask pattern. I did an experiment to disable widen shift mode for lshiftrt. I tested it on powerpc/x86/aarch64. There is no regression occurred. I thought that the widen shift mode is helpful for newpat matching. But it seems not, at least no impact on powerpc/x86/aarch64. diff --git a/gcc/combine.cc b/gcc/combine.cc index 4bf867d74b0..0b9b115f9bb 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -10479,11 +10479,6 @@ try_widen_shift_mode (enum rtx_code code, rtx op, int count, return orig_mode; case LSHIFTRT: - /* Similarly here but with zero bits. */ - if (HWI_COMPUTABLE_MODE_P (mode) - && (nonzero_bits (op, mode) & ~GET_MODE_MASK (orig_mode)) == 0) - return mode; - /* We can also widen if the bits brought in will be masked off. This operation is performed in ORIG_MODE. */ if (outer_code == AND) Segher, Could you inform me what's the purpose of widen shift mode in simplify_shift_const? Does it definitely reduce the rtx cost or it helps match patterns? Thanks a lot. Thanks Gui Haochen 在 2023/8/5 7:32, Jeff Law 写道: > > > On 7/20/23 18:59, HAO CHEN GUI wrote: >> Hi Jeff, >> >> 在 2023/7/21 5:27, Jeff Law 写道: >>> Wouldn't it make more sense to just try rotate/mask in the original mode >>> before trying a shift in a widened mode? I'm not sure why we need a target >>> hook here. >> >> There is no change to try rotate/mask with the original mode when >> expensive_optimizations is set. The subst widens the shift mode. > But we can add it before the attempt in the wider mode. > >> >> if (flag_expensive_optimizations) >> { >> /* Pass pc_rtx so no substitutions are done, just >> simplifications. */ >> if (i1) >> { >> subst_low_luid = DF_INSN_LUID (i1); >> i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0); >> } >> >> subst_low_luid = DF_INSN_LUID (i2); >> i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0); >> } >> >> I don't know if the wider mode is helpful to other targets, so >> I added the target hook. > In this scenario we're often better off relying on rtx_costs (even with all > its warts) rather than adding yet another target hook. > > I'd love to hear from Segher here to see if he's got other ideas. > > jeff
Re: [PATCHv4, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]
Committed after fixing the comments. https://gcc.gnu.org/g:a79cf858b39e01c80537bc5d47a5e9004418c267 Thanks Gui Haochen 在 2023/8/14 15:47, Kewen.Lin 写道: > Hi Haochen, > > on 2023/8/14 10:18, HAO CHEN GUI wrote: >> Hi, >> This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx >> for all sub targets when the mode is V4SI and the extracted element is word >> 1 from BE order. Also this patch adds a insn pattern for mfvsrwz which >> helps eliminate redundant zero extend. >> >> Compared to last version, the main change is to put the word index >> checking in the split condition of "*vsx_extract_v4si_w023". Also modified >> some comments. >> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625380.html >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> >> Thanks >> Gui Haochen >> >> ChangeLog >> rs6000: Generate mfvsrwz for all platform and remove redundant zero extend >> >> mfvsrwz has lower latency than xxextractuw or vextuw[lr]x. So it should be >> generated even with p9 vector enabled. Also the instruction is already >> zero extended. A combine pattern is needed to eliminate redundant zero >> extend instructions. >> >> gcc/ >> PR target/106769 >> * config/rs6000/vsx.md (expand vsx_extract_): Set it only >> for V8HI and V16QI. >> (vsx_extract_v4si): New expand for V4SI extraction. >> (vsx_extract_v4si_w1): New insn pattern for V4SI extraction on >> word 1 from BE order. >> (*mfvsrwz): New insn pattern for mfvsrwz. >> (*vsx_extract__di_p9): Assert that it won't be generated on >> word 1 from BE order. >> (*vsx_extract_si): Remove. >> (*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2, >> 3 from BE order. >> >> gcc/testsuite/ >> PR target/106769 >> * gcc.target/powerpc/pr106769.h: New. >> * gcc.target/powerpc/pr106769-p8.c: New. >> * gcc.target/powerpc/pr106769-p9.c: New. >> >> patch.diff >> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md >> index 0a34ceebeb5..1cbdc2f1c01 100644 >> --- a/gcc/config/rs6000/vsx.md >> +++ b/gcc/config/rs6000/vsx.md >> @@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1" >> (define_expand "vsx_extract_" >>[(parallel [(set (match_operand: 0 "gpc_reg_operand") >> (vec_select: >> -(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") >> +(match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand") >> (parallel [(match_operand:QI 2 "const_int_operand")]))) >> - (clobber (match_scratch:VSX_EXTRACT_I 3))])] >> + (clobber (match_scratch:VSX_EXTRACT_I2 3))])] >>"VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" >> { >>/* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ >> @@ -3736,6 +3736,63 @@ (define_expand "vsx_extract_" >> } >> }) >> >> +(define_expand "vsx_extract_v4si" >> + [(parallel [(set (match_operand:SI 0 "gpc_reg_operand") >> + (vec_select:SI >> +(match_operand:V4SI 1 "gpc_reg_operand") >> +(parallel [(match_operand:QI 2 "const_0_to_3_operand")]))) >> + (clobber (match_scratch:V4SI 3))])] >> + "TARGET_DIRECT_MOVE_64BIT" >> +{ >> + /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx. So just >> + fall through to vsx_extract_v4si_w1. */ >> + if (TARGET_P9_VECTOR >> + && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)) >> +{ >> + emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1], >> + operands[2])); >> + DONE; >> +} >> +}) >> + >> +/* Extract from word 1 (BE order); */ > > Nit: I guessed I requested this before, please use ";" instead of > "/* ... */" for the comments, to align with the existing ones. > >> +(define_insn "vsx_extract_v4si_w1" >> + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa") >> +(vec_select:SI >> + (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0") >> + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) >> + (clobber (match_scratch:V4SI 3 "=v,v,v,v"))] >> + "TARGET_DIRECT_MOVE_64BIT >> + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)" >> +{ >> + if (which_alternative == 0) >> + return "mfvsrwz %0,%x1"; >> + >> + if (which_alternative == 1) >> + return "xxlor %x0,%x1,%x1"; >> + >> + if (which_alternative == 2) >> + return "stxsiwx %x1,%y0"; >> + >> + return ASM_COMMENT_START " vec_extract to same register"; >> +} >> + [(set_attr "type" "mfvsr,veclogical,fpstore,*") >> + (set_attr "length" "4,4,4,0") >> + (set_attr "isa" "p8v,*,p8v,*")]) >> + >> +(define_insn "*mfvsrwz" >> + [(set (match_operand:DI 0 "register_operand" "=r") >> +(zero_extend:DI >> + (vec_select:SI >> +(match_operand:V4SI 1 "vsx_register_operand" "wa") >> +(parallel [(match_operand:QI 2 "const_int_operand" "n")] >> + (clobber (match_scratch:V4SI 3 "=v"))]
Re: [PATCH, rs6000] Skip redundant vector extract if the element is first element of dword0 [PR110429]
Committed after tweaking and testing. https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d471bdb0453de7b738f49148b66d57cb5871937d Thanks Gui Haochen 在 2023/7/28 17:32, Kewen.Lin 写道: > Hi Haochen, > > on 2023/7/5 11:22, HAO CHEN GUI wrote: >> Hi, >> This patch skips redundant vector extract insn to be generated when >> the extracted element is the first element of dword0 and the destination > > "The first element" is confusing, it's easy to be misunderstood as element > 0, but in fact the extracted element index is: > - for byte, 7 on BE while 8 on LE; > - for half word, 3 on BE while 4 on LE; > > so maybe just say when the extracted index for byte and half word like above, > the element to be stored is already in the corresponding place for stxsi[hb]x, > we don't need a redundant vector extraction at all. > >> is a memory operand. Only one 'stxsi[hb]x' instruction is enough. >> >> The V4SImode is fixed in a previous patch. >> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> Thanks >> Gui Haochen >> >> ChangeLog >> rs6000: Skip redundant vector extract if the element is first element of >> dword0 >> >> gcc/ >> PR target/110429 >> * config/rs6000/vsx.md (*vsx_extract__store_p9): Skip vector >> extract when the element is the first element of dword0. >> >> gcc/testsuite/ >> PR target/110429 >> * gcc.target/powerpc/pr110429.c: New. >> >> >> patch.diff >> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md >> index 0c269e4e8d9..b3fec910eb6 100644 >> --- a/gcc/config/rs6000/vsx.md >> +++ b/gcc/config/rs6000/vsx.md >> @@ -3855,7 +3855,22 @@ (define_insn_and_split "*vsx_extract__store_p9" >> (parallel [(match_dup 2)]))) >>(clobber (match_dup 4))]) >> (set (match_dup 0) >> -(match_dup 3))]) >> +(match_dup 3))] >> +{ >> + enum machine_mode dest_mode = GET_MODE (operands[0]); > > Nit: Move this line ... > >> + >> + if (which_alternative == 0 >> + && ((mode == V16QImode >> + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8)) >> + || (mode == V8HImode >> + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4 >> +{ > > ... here. > >> + emit_move_insn (operands[0], >> + gen_rtx_REG (dest_mode, REGNO (operands[3]))); >> + DONE; >> +} >> +}) >> + >> >> (define_insn_and_split "*vsx_extract_si" >>[(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z") >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110429.c >> b/gcc/testsuite/gcc.target/powerpc/pr110429.c >> new file mode 100644 >> index 000..5a938f9f90a >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr110429.c >> @@ -0,0 +1,28 @@ >> +/* { dg-do compile } */ >> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */ >> +/* { dg-require-effective-target powerpc_p9vector_ok } */ >> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ >> +/* { dg-require-effective-target has_arch_ppc64 } */ >> + >> +#include >> + >> +#ifdef __BIG_ENDIAN__ >> +#define DWORD0_FIRST_SHORT 3 >> +#define DWORD0_FIRST_CHAR 7 >> +#else >> +#define DWORD0_FIRST_SHORT 4 >> +#define DWORD0_FIRST_CHAR 8 >> +#endif >> + >> +void vec_extract_short (vector short v, short* p) >> +{ >> + *p = vec_extract(v, DWORD0_FIRST_SHORT); >> +} >> + >> +void vec_extract_char (vector char v, char* p) >> +{ >> + *p = vec_extract(v, DWORD0_FIRST_CHAR); >> +} >> + >> +/* { dg-final { scan-assembler-times "stxsi\[hb\]x" 2 } } */ > > Nit: Break this check into stxsihx and stxsibx, and surround > with \m and \M. > >> +/* { dg-final { scan-assembler-not "vextractu\[hb\]" } } */ > > Also with \m and \M. > > OK for trunk with these nits tweaked and testing goes well, > thanks! > > BR, > Kewen
[PATCHv4, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]
Hi, This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx for all sub targets when the mode is V4SI and the extracted element is word 1 from BE order. Also this patch adds a insn pattern for mfvsrwz which helps eliminate redundant zero extend. Compared to last version, the main change is to put the word index checking in the split condition of "*vsx_extract_v4si_w023". Also modified some comments. https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625380.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Generate mfvsrwz for all platform and remove redundant zero extend mfvsrwz has lower latency than xxextractuw or vextuw[lr]x. So it should be generated even with p9 vector enabled. Also the instruction is already zero extended. A combine pattern is needed to eliminate redundant zero extend instructions. gcc/ PR target/106769 * config/rs6000/vsx.md (expand vsx_extract_): Set it only for V8HI and V16QI. (vsx_extract_v4si): New expand for V4SI extraction. (vsx_extract_v4si_w1): New insn pattern for V4SI extraction on word 1 from BE order. (*mfvsrwz): New insn pattern for mfvsrwz. (*vsx_extract__di_p9): Assert that it won't be generated on word 1 from BE order. (*vsx_extract_si): Remove. (*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2, 3 from BE order. gcc/testsuite/ PR target/106769 * gcc.target/powerpc/pr106769.h: New. * gcc.target/powerpc/pr106769-p8.c: New. * gcc.target/powerpc/pr106769-p9.c: New. patch.diff diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0a34ceebeb5..1cbdc2f1c01 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1" (define_expand "vsx_extract_" [(parallel [(set (match_operand: 0 "gpc_reg_operand") (vec_select: - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") + (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand") (parallel [(match_operand:QI 2 "const_int_operand")]))) - (clobber (match_scratch:VSX_EXTRACT_I 3))])] + (clobber (match_scratch:VSX_EXTRACT_I2 3))])] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" { /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ @@ -3736,6 +3736,63 @@ (define_expand "vsx_extract_" } }) +(define_expand "vsx_extract_v4si" + [(parallel [(set (match_operand:SI 0 "gpc_reg_operand") + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand") + (parallel [(match_operand:QI 2 "const_0_to_3_operand")]))) + (clobber (match_scratch:V4SI 3))])] + "TARGET_DIRECT_MOVE_64BIT" +{ + /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx. So just + fall through to vsx_extract_v4si_w1. */ + if (TARGET_P9_VECTOR + && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)) +{ + emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1], + operands[2])); + DONE; +} +}) + +/* Extract from word 1 (BE order); */ +(define_insn "vsx_extract_v4si_w1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa") + (vec_select:SI +(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0") +(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) + (clobber (match_scratch:V4SI 3 "=v,v,v,v"))] + "TARGET_DIRECT_MOVE_64BIT + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)" +{ + if (which_alternative == 0) + return "mfvsrwz %0,%x1"; + + if (which_alternative == 1) + return "xxlor %x0,%x1,%x1"; + + if (which_alternative == 2) + return "stxsiwx %x1,%y0"; + + return ASM_COMMENT_START " vec_extract to same register"; +} + [(set_attr "type" "mfvsr,veclogical,fpstore,*") + (set_attr "length" "4,4,4,0") + (set_attr "isa" "p8v,*,p8v,*")]) + +(define_insn "*mfvsrwz" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "vsx_register_operand" "wa") + (parallel [(match_operand:QI 2 "const_int_operand" "n")] + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_DIRECT_MOVE_64BIT + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)" + "mfvsrwz %0,%x1" + [(set_attr "type" "mfvsr") + (set_attr "isa" "p8v")]) + (define_insn "vsx_extract__p9" [(set (match_operand: 0 "gpc_reg_operand" "=r,") (vec_select: @@ -3807,6 +3864,9 @@ (define_insn_and_split "*vsx_extract__di_p9" (parallel [(match_dup 2)]))) (clobber (match_dup 3))])] { + gcc_assert (mode != V4SImode + || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)); + operands[4] = gen_rtx_REG (mode, REGNO
[PATCHv2, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]
Hi, This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx for all subtargets when the mode is V4SI and the index of extracted element is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz which helps eliminate redundant zero extend. Compared to last version, the main change is to move "vsx_extract_v4si_w1" and "*mfvsrwz" to the front of "*vsx_extract__di_p9". Also some insn conditions are changed to assertions. https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625128.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Generate mfvsrwz for all platform and remove redundant zero extend mfvsrwz has lower latency than xxextractuw or vextuw[lr]x. So it should be generated even with p9 vector enabled. Also the instruction is already zero extended. A combine pattern is needed to eliminate redundant zero extend instructions. gcc/ PR target/106769 * config/rs6000/vsx.md (expand vsx_extract_): Set it only for V8HI and V16QI. (vsx_extract_v4si): New expand for V4SI extraction. (vsx_extract_v4si_w1): New insn pattern for V4SI extraction when the index of extracted element is 1 with BE and 2 with LE. (*mfvsrwz): New insn pattern. (*vsx_extract__di_p9): Not generate the insn when the index of extracted element is 1 with BE and 2 with LE. (*vsx_extract_si): Removed. (*vsx_extract_v4si_not_w1): New insn and split pattern which deals with the cases not handled by vsx_extract_v4si_w1. gcc/testsuite/ PR target/106769 * gcc.target/powerpc/pr106769.h: New. * gcc.target/powerpc/pr106769-p8.c: New. * gcc.target/powerpc/pr106769-p9.c: New. patch.diff diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0a34ceebeb5..0065b76fef8 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1" (define_expand "vsx_extract_" [(parallel [(set (match_operand: 0 "gpc_reg_operand") (vec_select: - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") + (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand") (parallel [(match_operand:QI 2 "const_int_operand")]))) - (clobber (match_scratch:VSX_EXTRACT_I 3))])] + (clobber (match_scratch:VSX_EXTRACT_I2 3))])] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" { /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ @@ -3736,6 +3736,63 @@ (define_expand "vsx_extract_" } }) +(define_expand "vsx_extract_v4si" + [(parallel [(set (match_operand:SI 0 "gpc_reg_operand") + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand") + (parallel [(match_operand:QI 2 "const_0_to_3_operand")]))) + (clobber (match_scratch:V4SI 3))])] + "TARGET_DIRECT_MOVE_64BIT" +{ + /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx. So just + fall through to vsx_extract_v4si_w1. */ + if (TARGET_P9_VECTOR + && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)) +{ + emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1], + operands[2])); + DONE; +} +}) + +/* Extract from word 1 (BE order). */ +(define_insn "vsx_extract_v4si_w1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa") + (vec_select:SI +(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0") +(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) + (clobber (match_scratch:V4SI 3 "=v,v,v,v"))] + "TARGET_DIRECT_MOVE_64BIT + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)" +{ + if (which_alternative == 0) + return "mfvsrwz %0,%x1"; + + if (which_alternative == 1) + return "xxlor %x0,%x1,%x1"; + + if (which_alternative == 2) + return "stxsiwx %x1,%y0"; + + return ASM_COMMENT_START " vec_extract to same register"; +} + [(set_attr "type" "mfvsr,veclogical,fpstore,*") + (set_attr "length" "4,4,4,0") + (set_attr "isa" "p8v,*,p8v,*")]) + +(define_insn "*mfvsrwz" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "vsx_register_operand" "wa") + (parallel [(match_operand:QI 2 "const_int_operand" "n")] + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_DIRECT_MOVE_64BIT + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)" + "mfvsrwz %0,%x1" + [(set_attr "type" "mfvsr") + (set_attr "isa" "p8v")]) + (define_insn "vsx_extract__p9" [(set (match_operand: 0 "gpc_reg_operand" "=r,") (vec_select: @@ -3798,7 +3855,7 @@ (define_insn_and_split "*vsx_extract__di_p9" (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,") (parallel [(match_operand:QI 2 "const_int_operand"
[PATCHv2, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]
Hi, This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx for all subtargets when the mode is V4SI and the index of extracted element is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz which can help eliminate redundant zero extend. Compared to last version, the main change is to add a new expand for V4SI and separate "vsx_extract_si" to 2 insn patterns. https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Generate mfvsrwz for all subtargets and remove redundant zero extend mfvsrwz has lower latency than xxextractuw or vextuw[lr]x. So it should be generated even with p9 vector enabled. Also the instruction is already zero extended. A combine pattern is needed to eliminate redundant zero extend instructions. gcc/ PR target/106769 * config/rs6000/vsx.md (expand vsx_extract_): Set it only for V8HI and V16QI. (vsx_extract_v4si): New expand for V4SI. (*vsx_extract__di_p9): Not generate the insn when it can be generated by mfvsrwz. (mfvsrwz): New insn pattern for zero extended vsx_extract_v4si. (*vsx_extract_si): Removed. (vsx_extract_v4si_0): New insn pattern to deal with V4SI extract when the index of extracted element is 1 with BE and 2 with LE. (vsx_extract_v4si_1): New insn and split pattern which deals with the cases not handled by vsx_extract_v4si_0. gcc/testsuite/ PR target/106769 * gcc.target/powerpc/pr106769.h: New. * gcc.target/powerpc/pr106769-p8.c: New. * gcc.target/powerpc/pr106769-p9.c: New. patch.diff diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0a34ceebeb5..ad249441bcf 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1" (define_expand "vsx_extract_" [(parallel [(set (match_operand: 0 "gpc_reg_operand") (vec_select: - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") + (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand") (parallel [(match_operand:QI 2 "const_int_operand")]))) - (clobber (match_scratch:VSX_EXTRACT_I 3))])] + (clobber (match_scratch:VSX_EXTRACT_I2 3))])] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" { /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ @@ -3736,6 +3736,23 @@ (define_expand "vsx_extract_" } }) +(define_expand "vsx_extract_v4si" + [(parallel [(set (match_operand:SI 0 "gpc_reg_operand") + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand") + (parallel [(match_operand:QI 2 "const_0_to_3_operand")]))) + (clobber (match_scratch:V4SI 3))])] + "TARGET_DIRECT_MOVE_64BIT" +{ + if (TARGET_P9_VECTOR + && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)) +{ + emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1], + operands[2])); + DONE; +} +}) + (define_insn "vsx_extract__p9" [(set (match_operand: 0 "gpc_reg_operand" "=r,") (vec_select: @@ -3798,7 +3815,9 @@ (define_insn_and_split "*vsx_extract__di_p9" (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,") (parallel [(match_operand:QI 2 "const_int_operand" "n,n")] (clobber (match_scratch:SI 3 "=r,X"))] - "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB" + "TARGET_VEXTRACTUB + && (mode != V4SImode + || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))" "#" "&& reload_completed" [(parallel [(set (match_dup 4) @@ -3830,58 +3849,78 @@ (define_insn_and_split "*vsx_extract__store_p9" (set (match_dup 0) (match_dup 3))]) -(define_insn_and_split "*vsx_extract_si" +(define_insn "mfvsrwz" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "vsx_register_operand" "wa") + (parallel [(match_operand:QI 2 "const_int_operand" "n")] + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_DIRECT_MOVE_64BIT + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)" + "mfvsrwz %0,%x1" + [(set_attr "type" "mfvsr") + (set_attr "isa" "p8v")]) + +(define_insn "vsx_extract_v4si_0" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa") + (vec_select:SI +(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0") +(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) + (clobber (match_scratch:V4SI 3 "=v,v,v,v"))] + "TARGET_DIRECT_MOVE_64BIT + && (!TARGET_P9_VECTOR || INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2))" +{ + if (which_alternative == 0) + return "mfvsrwz %0,%x1"; + + if (which_alternative == 1) + return "xxlor %x0,%x1,%x1"; + + if
Re: [PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]
Sorry for the typo s/change/chance 在 2023/7/21 8:59, HAO CHEN GUI 写道: > Hi Jeff, > > 在 2023/7/21 5:27, Jeff Law 写道: >> Wouldn't it make more sense to just try rotate/mask in the original mode >> before trying a shift in a widened mode? I'm not sure why we need a target >> hook here. > > There is no change to try rotate/mask with the original mode when > expensive_optimizations is set. The subst widens the shift mode. > > if (flag_expensive_optimizations) > { > /* Pass pc_rtx so no substitutions are done, just > simplifications. */ > if (i1) > { > subst_low_luid = DF_INSN_LUID (i1); > i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0); > } > > subst_low_luid = DF_INSN_LUID (i2); > i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0); > } > > I don't know if the wider mode is helpful to other targets, so > I added the target hook. > > Thanks > Gui Haochen
Re: [PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]
Hi Jeff, 在 2023/7/21 5:27, Jeff Law 写道: > Wouldn't it make more sense to just try rotate/mask in the original mode > before trying a shift in a widened mode? I'm not sure why we need a target > hook here. There is no change to try rotate/mask with the original mode when expensive_optimizations is set. The subst widens the shift mode. if (flag_expensive_optimizations) { /* Pass pc_rtx so no substitutions are done, just simplifications. */ if (i1) { subst_low_luid = DF_INSN_LUID (i1); i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0); } subst_low_luid = DF_INSN_LUID (i2); i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0); } I don't know if the wider mode is helpful to other targets, so I added the target hook. Thanks Gui Haochen
Ping [PATCH v7, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]
Hi, As the ticket(PR107013, adding fmin/max to RTL code) is suspended, I ping this patch. The unspec of fmin/max can be replaced with corresponding RTL code after that ticket is fixed. https://gcc.gnu.org/pipermail/gcc-patches/2022-September/602181.html Thanks Gui Haochen 在 2022/9/26 11:35, HAO CHEN GUI 写道: > Hi, > This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000. > Tests show that outputs of xs[min/max]dp are consistent with the standard > of C99 fmin/max. > > This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead > of smin/max when fast-math is not set. While fast-math is set, xs[min/max]dp > are folded to MIN/MAX_EXPR in gimple, and finally expanded to smin/max. > > Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-09-26 Haochen Gui > > gcc/ > PR target/103605 > * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Gimple > fold RS6000_BIF_XSMINDP and RS6000_BIF_XSMAXDP when fast-math is set. > * config/rs6000/rs6000.md (FMINMAX): New int iterator. > (minmax_op): New int attribute. > (UNSPEC_FMAX, UNSPEC_FMIN): New unspecs. > (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN. > * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set > pattern to fmaxdf3. > (__builtin_vsx_xsmindp): Set pattern to fmindf3. > > gcc/testsuite/ > PR target/103605 > * gcc.dg/powerpc/pr103605.h: New. > * gcc.dg/powerpc/pr103605-1.c: New. > * gcc.dg/powerpc/pr103605-2.c: New. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtin.cc > b/gcc/config/rs6000/rs6000-builtin.cc > index e925ba9fad9..944ae9fe55c 100644 > --- a/gcc/config/rs6000/rs6000-builtin.cc > +++ b/gcc/config/rs6000/rs6000-builtin.cc > @@ -1588,6 +1588,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) >gimple_set_location (g, gimple_location (stmt)); >gsi_replace (gsi, g, true); >return true; > +/* fold into MIN_EXPR when fast-math is set. */ > +case RS6000_BIF_XSMINDP: > /* flavors of vec_min. */ > case RS6000_BIF_XVMINDP: > case RS6000_BIF_XVMINSP: > @@ -1614,6 +1616,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) >gimple_set_location (g, gimple_location (stmt)); >gsi_replace (gsi, g, true); >return true; > +/* fold into MAX_EXPR when fast-math is set. */ > +case RS6000_BIF_XSMAXDP: > /* flavors of vec_max. */ > case RS6000_BIF_XVMAXDP: > case RS6000_BIF_XVMAXSP: > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index f4a9f24bcc5..8b735493b40 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -1613,10 +1613,10 @@ > XSCVSPDP vsx_xscvspdp {} > >const double __builtin_vsx_xsmaxdp (double, double); > -XSMAXDP smaxdf3 {} > +XSMAXDP fmaxdf3 {} > >const double __builtin_vsx_xsmindp (double, double); > -XSMINDP smindf3 {} > +XSMINDP fmindf3 {} > >const double __builtin_vsx_xsrdpi (double); > XSRDPI vsx_xsrdpi {} > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index bf85baa5370..ae0dd98f0f9 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -158,6 +158,8 @@ (define_c_enum "unspec" > UNSPEC_HASHCHK > UNSPEC_XXSPLTIDP_CONST > UNSPEC_XXSPLTIW_CONST > + UNSPEC_FMAX > + UNSPEC_FMIN >]) > > ;; > @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr" >DONE; > }) > > + > +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN]) > + > +(define_int_attr minmax_op [(UNSPEC_FMAX "max") > + (UNSPEC_FMIN "min")]) > + > +(define_insn "f3" > + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") > + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") > + (match_operand:SFDF 2 "vsx_register_operand" "wa")] > + FMINMAX))] > + "TARGET_VSX && !flag_finite_math_only" > + "xsdp %x0,%x1,%x2" > + [(set_attr "type" "fp")] > +) > + > (define_expand "movcc" > [(set (match_operand:GPR 0 "gpc_reg_operand") >(if_then_else:GPR (match_operand 1 "comparison_operator") > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-1.c > b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c > new file mode 100644 > index 000..923deec6a1e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c > @@ -0,0 +1,7 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-O2 -mvsx" } */ > +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */ > +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */ > + > +#include "pr103605.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-2.c >
[PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]
Hi, The shift mode will be widen in combine pass if the operand has a normal subreg. But when the target already has rotate/mask/insert instructions on the narrow mode, it's unnecessary to widen the mode for lshiftrt. As the lshiftrt is commonly converted to rotate/mask insn, the widen mode blocks it to be further combined to rotate/mask/insert insn. The PR93738 shows the case. The lshiftrt:SI (subreg:SI (reg:DI)) is converted to subreg:SI (lshiftrt:DI (reg:DI)) and fails to match rotate/mask pattern. Trying 13, 10 -> 14: 13: r127:SI=r125:SI&0xf0ff REG_DEAD r125:SI 10: r124:SI=r129:DI#4 0>>0xc&0xf00 REG_DEAD r129:DI 14: r128:SI=r127:SI|r124:SI Failed to match this instruction: (set (reg:SI 128) (ior:SI (and:SI (reg:SI 125 [+-2 ]) (const_int -3841 [0xf0ff])) (and:SI (subreg:SI (zero_extract:DI (reg:DI 129) (const_int 32 [0x20]) (const_int 20 [0x14])) 4) (const_int 3840 [0xf00] Failed to match this instruction: (set (reg:SI 128) (ior:SI (and:SI (reg:SI 125 [+-2 ]) (const_int -3841 [0xf0ff])) (and:SI (subreg:SI (and:DI (lshiftrt:DI (reg:DI 129) (const_int 12 [0xc])) (const_int 4294967295 [0x])) 4) (const_int 3840 [0xf00] If not widen the shift mode, it can be combined to rotate/mask/insert insn as expected. Trying 13, 10 -> 14: 13: r127:SI=r125:SI&0xf0ff REG_DEAD r125:SI 10: r124:SI=r129:DI#4 0>>0xc&0xf00 REG_DEAD r129:DI 14: r128:SI=r127:SI|r124:SI REG_DEAD r127:SI REG_DEAD r124:SI Successfully matched this instruction: (set (reg:SI 128) (ior:SI (and:SI (reg:SI 125 [+-2 ]) (const_int -3841 [0xf0ff])) (and:SI (lshiftrt:SI (subreg:SI (reg:DI 129) 4) (const_int 12 [0xc])) (const_int 3840 [0xf00] This patch adds a target hook to indicate if rotate/mask instructions are supported on certain mode. If it's true, widen lshiftrt mode is skipped and shift is done on original mode. The patch fixes the regression of other rs6000 test cases. They're listed in the second patch. The patch passed regression test on Power Linux and x86 platforms. Thanks Gui Haochen ChangeLog combine: Not winden shift mode when target has rotate/mask instruction on original mode To winden shift mode is unnecessary when target already has rotate/mask instuctions on the original mode. It might blocks the further combine optimization on the original mode. For instance, further combine the insns to a rotate/mask/insert instruction on the original mode. This patch adds a hook to indicate if a target supports rotate/mask instructions on the certain mode. If it returns true, the widen shift mode will be skipped on lshiftrt. gcc/ PR target/93738 * combine.cc (try_widen_shift_mode): Skip to widen mode for lshiftrt when the target has rotate/mask instructions on original mode. * doc/tm.texi: Regenerate. * doc/tm.texi.in (TARGET_HAVE_ROTATE_AND_MASK): Add. * target.def (have_rotate_and_mask): New target hook. * targhooks.cc (default_have_rotate_and_mask): New function. * targhooks.h (default_have_rotate_and_mask): Declare. patch.diff diff --git a/gcc/combine.cc b/gcc/combine.cc index 304c020ec79..f22fe42931b 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -10475,20 +10475,25 @@ try_widen_shift_mode (enum rtx_code code, rtx op, int count, return orig_mode; case LSHIFTRT: - /* Similarly here but with zero bits. */ - if (HWI_COMPUTABLE_MODE_P (mode) - && (nonzero_bits (op, mode) & ~GET_MODE_MASK (orig_mode)) == 0) - return mode; - - /* We can also widen if the bits brought in will be masked off. This -operation is performed in ORIG_MODE. */ - if (outer_code == AND) + /* Skip wider mode when the target has rotate and mask instructions on +orig_mode. */ + if (!targetm.have_rotate_and_mask (orig_mode)) { - int care_bits = low_bitmask_len (orig_mode, outer_const); - - if (care_bits >= 0 - && GET_MODE_PRECISION (orig_mode) - care_bits >= count) + /* Similarly here but with zero bits. */ + if (HWI_COMPUTABLE_MODE_P (mode) + && (nonzero_bits (op, mode) & ~GET_MODE_MASK (orig_mode)) == 0) return mode; + + /* We can also widen if the bits brought in will be masked off. +This operation is performed in ORIG_MODE. */ + if (outer_code == AND) + { + int care_bits = low_bitmask_len (orig_mode, outer_const); + + if (care_bits >= 0 + && GET_MODE_PRECISION (orig_mode) - care_bits >= count) + return mode; + } } /* fall through */ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
[PATCH-2, rs6000] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]
Hi, The patch relies on the fist patch. The reason of the change is also described in the first patch. This patch implements the target hook have_rotate_and_mask. It also modifies some test cases. The regression of rlwimi-2.c is fixed. For rlwinm-0.c and rlwinm-2.c, one more 32bit rotate/mask instruction is generated and one less 64bit rotate/mask instruction. The patch passed regression test on Power Linux platforms. Test shows the patch has no performance regression on SPECint. Thanks Gui Haochen ChangeLog rs6000: implement target hook have_rotate_and_mask gcc/ PR target/93738 * config/rs6000/rs6000.cc (TARGET_HAVE_ROTATE_AND_MASK): Define. (rs6000_have_rotate_and_mask): New function. gcc/testsuite/ PR target/93738 * gcc.target/powerpc/rlwimi-2.c: Adjust the number of 64bit and 32bit rotate instuctions. * gcc.target/powerpc/rlwinm-0.c: Likewise. * gcc.target/powerpc/rlwinm-2.c: Likewise. patch.diff diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 44b448d2ba6..98873afddb4 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1764,6 +1764,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_CONST_ANCHOR #define TARGET_CONST_ANCHOR 0x8000 +#undef TARGET_HAVE_ROTATE_AND_MASK +#define TARGET_HAVE_ROTATE_AND_MASK rs6000_have_rotate_and_mask + /* Processor table. */ @@ -29097,6 +29100,17 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt) return false; } +bool +rs6000_have_rotate_and_mask (machine_mode mode) +{ + gcc_assert (SCALAR_INT_MODE_P (mode)); + + if (mode == SImode || mode == DImode) +return true; + + return false; +} + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rs6000.h" diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c index bafa371db73..62344a95aa0 100644 --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c @@ -6,10 +6,9 @@ /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */ /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6728 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 } } */ /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c b/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c index 4f4fca2d8ef..b6b1b227c7e 100644 --- a/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c @@ -7,10 +7,10 @@ /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 3081 { target lp64 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 3197 { target ilp32 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 3093 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 3094 { target lp64 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+rotlwi} 154 } } */ /* { dg-final { scan-assembler-times {(?n)^\s+srwi} 13 { target ilp32 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 13 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 12 { target lp64 } } } */ #define SL diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c index bddcfe2b76f..0315ca91dd7 100644 --- a/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c @@ -7,9 +7,9 @@ /* { dg-final { scan-assembler-times {(?n)^\s+rldic} 2726 { target lp64 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 833 { target ilp32 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 720 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 721 { target lp64 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+srwi} 13 { target ilp32 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 13 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 12 { target lp64 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 2518 } } */
[PATCH, rs6000] Skip redundant vector extract if the element is first element of dword0 [PR110429]
Hi, This patch skips redundant vector extract insn to be generated when the extracted element is the first element of dword0 and the destination is a memory operand. Only one 'stxsi[hb]x' instruction is enough. The V4SImode is fixed in a previous patch. https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Skip redundant vector extract if the element is first element of dword0 gcc/ PR target/110429 * config/rs6000/vsx.md (*vsx_extract__store_p9): Skip vector extract when the element is the first element of dword0. gcc/testsuite/ PR target/110429 * gcc.target/powerpc/pr110429.c: New. patch.diff diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0c269e4e8d9..b3fec910eb6 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3855,7 +3855,22 @@ (define_insn_and_split "*vsx_extract__store_p9" (parallel [(match_dup 2)]))) (clobber (match_dup 4))]) (set (match_dup 0) - (match_dup 3))]) + (match_dup 3))] +{ + enum machine_mode dest_mode = GET_MODE (operands[0]); + + if (which_alternative == 0 + && ((mode == V16QImode + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8)) + || (mode == V8HImode + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4 +{ + emit_move_insn (operands[0], + gen_rtx_REG (dest_mode, REGNO (operands[3]))); + DONE; +} +}) + (define_insn_and_split "*vsx_extract_si" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z") diff --git a/gcc/testsuite/gcc.target/powerpc/pr110429.c b/gcc/testsuite/gcc.target/powerpc/pr110429.c new file mode 100644 index 000..5a938f9f90a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr110429.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ + +#include + +#ifdef __BIG_ENDIAN__ +#define DWORD0_FIRST_SHORT 3 +#define DWORD0_FIRST_CHAR 7 +#else +#define DWORD0_FIRST_SHORT 4 +#define DWORD0_FIRST_CHAR 8 +#endif + +void vec_extract_short (vector short v, short* p) +{ + *p = vec_extract(v, DWORD0_FIRST_SHORT); +} + +void vec_extract_char (vector char v, char* p) +{ + *p = vec_extract(v, DWORD0_FIRST_CHAR); +} + +/* { dg-final { scan-assembler-times "stxsi\[hb\]x" 2 } } */ +/* { dg-final { scan-assembler-not "vextractu\[hb\]" } } */
[PATCH, rs6000] Extract the element in dword0 by mfvsrd and shift/mask [PR110331]
Hi, This patch implements the vector element extraction by mfvsrd and shift/mask when the element is in dword0 of the vector. Originally, it generates vsplat/mfvsrd on P8 and li/vextract on P9. Since mfvsrd has lower latency than vextract and rldicl has lower latency than vsplat, the new sequence has the benefit. Specially, the shift/mask is no need when the element is the first element of dword0. So it saves another rldicl when it returns a sign extend value. This patch is based on previous one. https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Extract the element in dword0 by mfvsrd and shift/mask gcc/ PR target/110331 * config/rs6000/rs6000-protos.h (rs6000_vsx_element_in_dword0_p): Declare. (rs6000_vsx_extract_element_from_dword0): Declare. * config/rs6000/rs6000.cc (rs6000_vsx_element_in_dword0_p): New function to judge if an element is in dword0 of a vector. (rs6000_vsx_extract_element_from_dword0): Extract an element from dword0 by mfvsrd and lshiftrt and mask. * config/rs6000/rs6000.md (*rotl3_mask): Rename to... (rotl3_mask): ...this * config/rs6000/vsx.md (vsx_extract_): Add a comment. (split pattern for p9 vector extract): Call rs6000_vsx_extract_element_from_dword0 if the element is in dword0. (*vsx_extract__di_p9): Exclude the elements in dword0 which are processed by *vsx_extract__zero_extend for both p8 and p9. (*vsx_extract__zero_extend): Zero extend pattern for vector extract on the element of dword0. (*vsx_extract__p8): Call rs6000_vsx_extract_element_from_dword0 when the extracted element is in dword0. Refined the pattern and remove reload_completed from split condition. gcc/testsuite/ PR target/110331 * gcc.target/powerpc/fold-vec-extract-char.p8.c: Set the extracted elements in dword1. * gcc.target/powerpc/fold-vec-extract-char.p9.c: Likewise. * gcc.target/powerpc/fold-vec-extract-int.p8.c: Likewise. * gcc.target/powerpc/fold-vec-extract-int.p9.c: Likewise. * gcc.target/powerpc/fold-vec-extract-short.p8.c: Likewise. * gcc.target/powerpc/fold-vec-extract-short.p9.c: Likewise. * gcc.target/powerpc/p9-extract-1.c: Likewise. * gcc.target/powerpc/pr110331-p8.c: New. * gcc.target/powerpc/pr110331-p9.c: New. * gcc.target/powerpc/pr110331.h: New. patch.diff diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index f70118ea40f..ccef280122b 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -161,6 +161,8 @@ extern bool rs6000_function_pcrel_p (struct function *); extern bool rs6000_pcrel_p (void); extern bool rs6000_fndecl_pcrel_p (const_tree); extern void rs6000_output_addr_vec_elt (FILE *, int); +extern bool rs6000_vsx_element_in_dword0_p (rtx, enum machine_mode); +extern void rs6000_vsx_extract_element_from_dword0 (rtx, rtx, rtx, bool); /* Different PowerPC instruction formats that are used by GCC. There are various other instruction formats used by the PowerPC hardware, but these diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 07c3a3d15ac..fad01d6b5dd 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -29098,6 +29098,74 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt) return false; } +/* Return true when the element is in dword0 of a vector. Exclude word + element 1 of VS4SI as the word can be extracted by mfvsrwz directly. */ + +bool +rs6000_vsx_element_in_dword0_p (rtx op, enum machine_mode mode) +{ + gcc_assert (CONST_INT_P (op)); + gcc_assert (mode == V16QImode || mode == V8HImode || mode == V4SImode); + + int units = GET_MODE_NUNITS (mode); + int elt = INTVAL (op); + elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt; + + if (elt > units / 2 + || (elt == units / 2 && mode != V4SImode)) +return true; + else +return false; +} + +/* Extract element from dword0 by mfvsrd and lshiftrt and mask. Extend_p + indicates if zero extend is needed or not. */ + +void +rs6000_vsx_extract_element_from_dword0 (rtx dest, rtx src, rtx element, + bool extend_p) +{ + enum machine_mode mode = GET_MODE (src); + gcc_assert (rs6000_vsx_element_in_dword0_p (element, mode)); + + enum machine_mode dest_mode = GET_MODE (dest); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int units = GET_MODE_NUNITS (mode); + int elt = INTVAL (element); + elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt; + int value, shift; + unsigned int mask; + + rtx vec_tmp = gen_lowpart (V2DImode, src); + rtx tmp1 = can_create_pseudo_p () +? gen_reg_rtx (DImode) +: simplify_gen_subreg (DImode, dest,
[PATCHv4, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]
Hi, This patch adds a new insn for vector splat with small V2DI constants on P8. If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded with vspltisw and vupkhsw on P8. It should be efficient than loading vector from memory. Compared to last version, the main change is to remove the new constraint and use a super constraint in the insn and set the check into insn condition. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-06-25 Haochen Gui gcc/ PR target/104124 * config/rs6000/altivec.md (*altivec_vupkhs_direct): Rename to... (altivec_vupkhs_direct): ...this. * config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New predicate to test if a constant can be loaded with vspltisw and vupkhsw. (easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if a vector constant can be synthesized with a vspltisw and a vupkhsw. * config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare. * config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): New function to return true if OP mode is V2DI and can be synthesized with vupkhsw and vspltisw. * config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up constants with vspltisw and vupkhsw. gcc/testsuite/ PR target/104124 * gcc.target/powerpc/pr104124.c: New. patch.diff diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 49b0c964f4d..2c932854c33 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs" } [(set_attr "type" "vecperm")]) -(define_insn "*altivec_vupkhs_direct" +(define_insn "altivec_vupkhs_direct" [(set (match_operand:VP 0 "register_operand" "=v") (unspec:VP [(match_operand: 1 "register_operand" "v")] UNSPEC_VUNPACK_HI_SIGN_DIRECT))] diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 52c65534e51..f62a4d9b506 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -694,6 +694,12 @@ (define_predicate "xxspltib_constant_split" return num_insns > 1; }) +;; Return true if the operand is a constant that can be loaded with a vspltisw +;; instruction and then a vupkhsw instruction. + +(define_predicate "vspltisw_vupkhsw_constant_split" + (and (match_code "const_vector") + (match_test "vspltisw_vupkhsw_constant_p (op, mode)"))) ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB ;; instruction. @@ -742,6 +748,11 @@ (define_predicate "easy_vector_constant" && xxspltib_constant_p (op, mode, _insns, )) return true; + /* V2DI constant within RANGE (-16, 15) can be synthesized with a +vspltisw and a vupkhsw. */ + if (vspltisw_vupkhsw_constant_p (op, mode, )) + return true; + return easy_altivec_constant (op, mode); } diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 1a4fc1df668..00cb2d82953 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, extern int easy_altivec_constant (rtx, machine_mode); extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); +extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 3be5860dd9b..ae34a02b282 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op, return true; } +/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07 + instructions vupkhsw and vspltisw. + + Return the constant that is being split via CONSTANT_PTR. */ + +bool +vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr) +{ + HOST_WIDE_INT value; + rtx elt; + + if (!TARGET_P8_VECTOR) +return false; + + if (mode != V2DImode) +return false; + + if (!const_vec_duplicate_p (op, )) +return false; + + value = INTVAL (elt); + if (value == 0 || value == 1 + || !EASY_VECTOR_15 (value)) +return false; + + if (constant_ptr) +*constant_ptr = (int) value; + return true; +} + const char * output_vec_const_move (rtx *operands) { diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 7d845df5c2d..4919b073e50 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1174,6 +1174,30 @@ (define_insn_and_split "*xxspltib__split" [(set_attr "type" "vecperm") (set_attr "length" "8")]) +(define_insn_and_split
[PATCHv4, rs6000] Add two peephole2 patterns for mr. insn
Hi, This patch adds two peephole2 patterns which help convert certain insn sequences to "mr." instruction. These insn sequences can't be combined in combine pass. Compared to last version, the empty constraint is removed and test cases run only on powerpc Linux as AIX doesn't support "-mregnames" option. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Add two peephole patterns for "mr." insn When investigating the issue mentioned in PR87871#c30 - if compare and move pattern benefits before RA, I checked the assembly generated for SPEC2017 and found that certain insn sequences aren't converted to "mr." instructions. Following two sequence are never to be combined to "mr." pattern as there is no register link between them. This patch adds two peephole2 patterns to convert them to "mr." instructions. cmp 0,3,0 mr 4,3 mr 4,3 cmp 0,3,0 The patch also creates a new mode iterator which decided by TARGET_POWERPC64. This mode iterator is used in "mr." and its split pattern. The original P iterator is wrong when -m32/-mpowerpc64 is set. In this situation, the "mr." should compares the whole 64-bit register with 0 other than the low 32-bit one. gcc/ * config/rs6000/rs6000.md (peephole2 for compare_and_move): New. (peephole2 for move_and_compare): New. (mode_iterator WORD): New. Set the mode to SI/DImode by TARGET_POWERPC64. (*mov_internal2): Change the mode iterator from P to WORD. (split pattern for compare_and_move): Likewise. gcc/testsuite/ * gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New. * gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b0db8ae508d..2ab1e8d4c80 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI]) ; The size of a pointer. Also, the size of the value that a record-condition ; (one with a '.') will compare; and the size used for arithmetic carries. (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")]) +(define_mode_iterator WORD [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")]) ; Iterator to add PTImode along with TImode (TImode can go in VSX registers, ; PTImode is GPR only) @@ -7879,9 +7880,9 @@ (define_split (define_insn "*mov_internal2" [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y") - (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r") + (compare:CC (match_operand:WORD 1 "gpc_reg_operand" "0,r,r") (const_int 0))) - (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))] + (set (match_operand:WORD 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))] "" "@ cmpi %2,%0,0 @@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2" (set_attr "dot" "yes") (set_attr "length" "4,4,8")]) +(define_peephole2 + [(set (match_operand:CC 2 "cc_reg_operand") + (compare:CC (match_operand:WORD 1 "int_reg_operand") + (const_int 0))) + (set (match_operand:WORD 0 "int_reg_operand") + (match_dup 1))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:WORD 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:WORD 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + +(define_peephole2 + [(set (match_operand:WORD 0 "int_reg_operand") + (match_operand:WORD 1 "int_reg_operand")) + (set (match_operand:CC 2 "cc_reg_operand") + (compare:CC (match_dup 1) + (const_int 0)))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:GPR 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:WORD 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + (define_split [(set (match_operand:CC 2 "cc_reg_not_cr0_operand") - (compare:CC (match_operand:P 1 "gpc_reg_operand") + (compare:CC (match_operand:WORD 1 "gpc_reg_operand") (const_int 0))) - (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))] + (set (match_operand:WORD 0 "gpc_reg_operand") (match_dup 1))] "reload_completed" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c new file mode 100644 index 000..571a3112a74 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c @@ -0,0 +1,60 @@ +/* { dg-do compile { target powerpc*-*-linux* } } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ +/* { dg-options "-O2 -mregnames" } */ + +/* Following instruction sequence is
Re: [PATCH, rs6000] Add two peephole2 patterns for mr. insn
HP, It makes sense. I will update the patch. Thanks Gui Haochen 在 2023/6/20 8:07, Hans-Peter Nilsson 写道: > On Tue, 30 May 2023, HAO CHEN GUI via Gcc-patches wrote: > >> +++ b/gcc/config/rs6000/rs6000.md >> @@ -7891,6 +7891,36 @@ (define_insn "*mov_internal2" >> (set_attr "dot" "yes") >> (set_attr "length" "4,4,8")]) >> >> +(define_peephole2 >> + [(set (match_operand:CC 2 "cc_reg_operand" "") >> +(compare:CC (match_operand:P 1 "int_reg_operand" "") >> +(const_int 0))) >> + (set (match_operand:P 0 "int_reg_operand" "") > > A random comment from the sideline: I'd suggest to remove the > (empty) constraints string from your peephole2's. > > It can be a matter of port-specific-taste but it seems removing > them would be consistent with the other peephole2's in > rs6000.md. > > (In this matter, I believe the examples in md.texi are bad.) > > brgds, H-P
[PATCH, rs6000] Generate mfvsrwz for all platforms and remove redundant zero extend [PR106769]
Hi, This patch modifies vsx extract expander and generates mfvsrwz/stxsiwx for all platforms when the mode is V4SI and the index of extracted element is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz which can help eliminate redundant zero extend. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Generate mfvsrwz for all platforms and remove redundant zero extend mfvsrwz has lower latency than xxextractuw. So it should be generated even with p9 vector enabled if possible. Also the instruction is already zero extended. A combine pattern is needed to eliminate redundant zero extend instructions. gcc/ PR target/106769 * config/rs6000/vsx.md (expand vsx_extract_): Skip calling gen_vsx_extract__p9 when it can be implemented by mfvsrwz/stxsiwx. (*vsx_extract__di_p9): Not generate the insn when it can be generated by mfvsrwz. (mfvsrwz): New insn pattern. (*vsx_extract_si): Rename to... (vsx_extract_si): ..., remove redundant insn condition and generate the insn on p9 when it can be implemented by mfvsrwz/stxsiwx. Add a dup alternative for simple vector moving. Remove reload_completed from split condition as it's unnecessary. Remove unnecessary checking from preparation statements. Set type and length attributes for each alternative. gcc/testsuite/ PR target/106769 * gcc.target/powerpc/pr106769.h: New. * gcc.target/powerpc/pr106769-p8.c: New. * gcc.target/powerpc/pr106769-p9.c: New. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0a34ceebeb5..09b0f83db86 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3728,7 +3728,9 @@ (define_expand "vsx_extract_" "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" { /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ - if (TARGET_P9_VECTOR) + if (TARGET_P9_VECTOR + && (mode != V4SImode + || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))) { emit_insn (gen_vsx_extract__p9 (operands[0], operands[1], operands[2])); @@ -3798,7 +3800,9 @@ (define_insn_and_split "*vsx_extract__di_p9" (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,") (parallel [(match_operand:QI 2 "const_int_operand" "n,n")] (clobber (match_scratch:SI 3 "=r,X"))] - "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB" + "TARGET_VEXTRACTUB + && (mode != V4SImode + || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))" "#" "&& reload_completed" [(parallel [(set (match_dup 4) @@ -3830,58 +3834,67 @@ (define_insn_and_split "*vsx_extract__store_p9" (set (match_dup 0) (match_dup 3))]) -(define_insn_and_split "*vsx_extract_si" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z") +(define_insn "mfvsrwz" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "vsx_register_operand" "wa") + (parallel [(match_operand:QI 2 "const_int_operand" "n")] + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_DIRECT_MOVE_64BIT + && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)" + "mfvsrwz %0,%x1" + [(set_attr "type" "mfvsr") + (set_attr "isa" "p8v")]) + +(define_insn_and_split "vsx_extract_si" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa") (vec_select:SI -(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v") -(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) - (clobber (match_scratch:V4SI 3 "=v,v,v"))] - "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" - "#" - "&& reload_completed" +(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0") +(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) + (clobber (match_scratch:V4SI 3 "=v,v,v,v"))] + "TARGET_DIRECT_MOVE_64BIT + && (!TARGET_P9_VECTOR || INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2))" +{ + if (which_alternative == 0) + return "mfvsrwz %0,%x1"; + + if (which_alternative == 1) + return "xxlor %x0,%x1,%x1"; + + if (which_alternative == 2) + return "stxsiwx %x1,%y0"; + + return ASM_COMMENT_START " vec_extract to same register"; +} + "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)" [(const_int 0)] { rtx dest = operands[0]; rtx src = operands[1]; rtx element = operands[2]; - rtx vec_tmp = operands[3]; - int value; + rtx vec_tmp; + + if (GET_CODE (operands[3]) == SCRATCH) +vec_tmp = gen_reg_rtx (V4SImode); + else +vec_tmp = operands[3]; /* Adjust index for LE element ordering, the below minuend 3 is computed by GET_MODE_NUNITS (V4SImode) - 1. */ if (!BYTES_BIG_ENDIAN) element = GEN_INT (3 - INTVAL (element)); - /* If the value is
[PATCHv3, rs6000] Add two peephole2 patterns for mr. insn
Hi, This patch adds two peephole2 patterns which help convert certain insn sequences to "mr." instruction. These insn sequences can't be combined in combine pass. Compared to last version, it changes the new mode iterator name from "Q" to "WORD". Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Add two peephole patterns for "mr." insn When investigating the issue mentioned in PR87871#c30 - if compare and move pattern benefits before RA, I checked the assembly generated for SPEC2017 and found that certain insn sequences aren't converted to "mr." instructions. Following two sequence are never to be combined to "mr." pattern as there is no register link between them. This patch adds two peephole2 patterns to convert them to "mr." instructions. cmp 0,3,0 mr 4,3 mr 4,3 cmp 0,3,0 The patch also creates a new mode iterator which decided by TARGET_POWERPC64. This mode iterator is used in "mr." and its split pattern. The original P iterator is wrong when -m32/-mpowerpc64 is set. In this situation, the "mr." should compares the whole 64-bit register with 0 other than the low 32-bit one. gcc/ * config/rs6000/rs6000.md (peephole2 for compare_and_move): New. (peephole2 for move_and_compare): New. (mode_iterator WORD): New. Set the mode to SI/DImode by TARGET_POWERPC64. (*mov_internal2): Change the mode iterator from P to WORD. (split pattern for compare_and_move): Likewise. gcc/testsuite/ * gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New. * gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b0db8ae508d..1f0fe85b9b5 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI]) ; The size of a pointer. Also, the size of the value that a record-condition ; (one with a '.') will compare; and the size used for arithmetic carries. (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")]) +(define_mode_iterator WORD [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")]) ; Iterator to add PTImode along with TImode (TImode can go in VSX registers, ; PTImode is GPR only) @@ -7879,9 +7880,9 @@ (define_split (define_insn "*mov_internal2" [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y") - (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r") + (compare:CC (match_operand:WORD 1 "gpc_reg_operand" "0,r,r") (const_int 0))) - (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))] + (set (match_operand:WORD 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))] "" "@ cmpi %2,%0,0 @@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2" (set_attr "dot" "yes") (set_attr "length" "4,4,8")]) +(define_peephole2 + [(set (match_operand:CC 2 "cc_reg_operand" "") + (compare:CC (match_operand:WORD 1 "int_reg_operand" "") + (const_int 0))) + (set (match_operand:WORD 0 "int_reg_operand" "") + (match_dup 1))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:WORD 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:WORD 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + +(define_peephole2 + [(set (match_operand:WORD 0 "int_reg_operand" "") + (match_operand:WORD 1 "int_reg_operand" "")) + (set (match_operand:CC 2 "cc_reg_operand" "") + (compare:CC (match_dup 1) + (const_int 0)))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:GPR 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:WORD 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + (define_split [(set (match_operand:CC 2 "cc_reg_not_cr0_operand") - (compare:CC (match_operand:P 1 "gpc_reg_operand") + (compare:CC (match_operand:WORD 1 "gpc_reg_operand") (const_int 0))) - (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))] + (set (match_operand:WORD 0 "gpc_reg_operand") (match_dup 1))] "reload_completed" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c new file mode 100644 index 000..29234dea7c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c @@ -0,0 +1,60 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ +/* { dg-options "-O2 -mregnames" } */ + +/* Following instruction sequence is found in assembly of + Perl_block_start,
[PATCHv2, rs6000] Add two peephole2 patterns for mr. insn
Hi, This patch adds two peephole2 patterns which help convert certain insn sequences to "mr." instruction. These insn sequences can't be combined in combine pass. Compared to last version, it adds a new mode iterator "Q" which should be used for dot instruction. With "-m32/-mpowerpc64" set, the dot instruction should compare DImode with 0, not the SImode. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Add two peephole patterns for "mr." insn When investigating the issue mentioned in PR87871#c30 - if compare and move pattern benefits before RA, I checked the assembly generated for SPEC2017 and found that certain insn sequences aren't converted to "mr." instructions. Following two sequence are never to be combined to "mr." pattern as there is no register link between them. This patch adds two peephole2 patterns to convert them to "mr." instructions. cmp 0,3,0 mr 4,3 mr 4,3 cmp 0,3,0 The patch also creates a new mode iterator which decided by TARGET_POWERPC64. This mode iterator is used in "mr." and its split pattern. The original P iterator is wrong when -m32/-mpowerpc64 is set. In this situation, the "mr." should compares the whole 64-bit register with 0 other than the low 32-bit one. gcc/ * config/rs6000/rs6000.md (peephole2 for compare_and_move): New. (peephole2 for move_and_compare): New. (mode_iterator Q): New. Set the mode to SI/DImode by TARGET_POWERPC64. (*mov_internal2): Change the mode iterator from P to Q. (split pattern for compare_and_move): Likewise. gcc/testsuite/ * gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New. * gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b0db8ae508d..fdb5b6ed22a 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI]) ; The size of a pointer. Also, the size of the value that a record-condition ; (one with a '.') will compare; and the size used for arithmetic carries. (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")]) +(define_mode_iterator Q [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")]) ; Iterator to add PTImode along with TImode (TImode can go in VSX registers, ; PTImode is GPR only) @@ -7879,9 +7880,9 @@ (define_split (define_insn "*mov_internal2" [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y") - (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r") + (compare:CC (match_operand:Q 1 "gpc_reg_operand" "0,r,r") (const_int 0))) - (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))] + (set (match_operand:Q 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))] "" "@ cmpi %2,%0,0 @@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2" (set_attr "dot" "yes") (set_attr "length" "4,4,8")]) +(define_peephole2 + [(set (match_operand:CC 2 "cc_reg_operand" "") + (compare:CC (match_operand:Q 1 "int_reg_operand" "") + (const_int 0))) + (set (match_operand:Q 0 "int_reg_operand" "") + (match_dup 1))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:Q 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:Q 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + +(define_peephole2 + [(set (match_operand:Q 0 "int_reg_operand" "") + (match_operand:Q 1 "int_reg_operand" "")) + (set (match_operand:CC 2 "cc_reg_operand" "") + (compare:CC (match_dup 1) + (const_int 0)))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:GPR 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:Q 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + (define_split [(set (match_operand:CC 2 "cc_reg_not_cr0_operand") - (compare:CC (match_operand:P 1 "gpc_reg_operand") + (compare:CC (match_operand:Q 1 "gpc_reg_operand") (const_int 0))) - (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))] + (set (match_operand:Q 0 "gpc_reg_operand") (match_dup 1))] "reload_completed" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c new file mode 100644 index 000..29234dea7c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c @@ -0,0 +1,60 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ +/* { dg-options "-O2 -mregnames" } */ + +/* Following
[PATCH, rs6000] Add two peephole2 patterns for mr. insn
Hi, By checking the object files of SPECint, I found that two kinds of compare/move can't be combined to "mr." pattern as there is no register link between them. The patch adds two peephole2 patterns for them. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Add two peephole patterns for "mr." insn Following two insns are never to be combined to "mr." pattern as there is no register link between them. So the patch adds these two peepholes. cmp 0,3,0 mr 4,3 mr 4,3 cmp 0,3,0 gcc/ * config/rs6000/rs6000.md (peephole2 for compare and move): New. (peephole2 for move and compare): New. gcc/testsuite/ * gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New. * gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b0db8ae508d..b60230293f9 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -7891,6 +7891,36 @@ (define_insn "*mov_internal2" (set_attr "dot" "yes") (set_attr "length" "4,4,8")]) +(define_peephole2 + [(set (match_operand:CC 2 "cc_reg_operand" "") + (compare:CC (match_operand:P 1 "int_reg_operand" "") + (const_int 0))) + (set (match_operand:P 0 "int_reg_operand" "") + (match_dup 1))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:P 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:P 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + +(define_peephole2 + [(set (match_operand:P 0 "int_reg_operand" "") + (match_operand:P 1 "int_reg_operand" "")) + (set (match_operand:CC 2 "cc_reg_operand" "") + (compare:CC (match_dup 1) + (const_int 0)))] + "!cc_reg_not_cr0_operand (operands[2], CCmode)" + [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x") + (compare:CC (match_operand:P 1 "int_reg_operand" "r") + (const_int 0))) + (set (match_operand:P 0 "int_reg_operand" "=r") + (match_dup 1))])] + "" +) + (define_split [(set (match_operand:CC 2 "cc_reg_not_cr0_operand") (compare:CC (match_operand:P 1 "gpc_reg_operand") diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c new file mode 100644 index 000..4e094c8fe74 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c @@ -0,0 +1,47 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ +/* { dg-options "-O2 -mregnames" } */ + +int __RTL (startwith ("peephole2")) compare_move_peephole () +{ +(function "compare_move_peephole" + (insn-chain +(block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cinsn 8 (set (reg:CC %cr0) +(compare:CC (reg:SI %r3) +(const_int 0 + (cinsn 2 (set (reg:SI %r4) +(reg:SI %r3))) + ;; Extra insn to avoid the above being deleted by DCE. + (cinsn 18 (use (reg:SI %r4))) + (cinsn 19 (use (reg:CC %cr0))) + (edge-to exit (flags "FALLTHRU")) +) ;; block 2 + ) ;; insn-chain +) ;; function "main" +} + +int __RTL (startwith ("peephole2")) move_compare_peephole () +{ +(function "move_compare_peephole" + (insn-chain +(block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cinsn 2 (set (reg:SI %r4) +(reg:SI %r3))) + (cinsn 8 (set (reg:CC %cr0) +(compare:CC (reg:SI %r3) +(const_int 0 + ;; Extra insn to avoid the above being deleted by DCE. + (cinsn 18 (use (reg:SI %r4))) + (cinsn 19 (use (reg:CC %cr0))) + (edge-to exit (flags "FALLTHRU")) +) ;; block 2 + ) ;; insn-chain +) ;; function "main" +} + +/* { dg-final { scan-assembler-times {\mmr\.} 2 } } */ diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c new file mode 100644 index 000..511d6cc5317 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c @@ -0,0 +1,47 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-O2 -mregnames" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ + +int __RTL (startwith ("peephole2")) compare_move_peephole () +{ +(function "compare_move_peephole" + (insn-chain +(block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cinsn 8 (set (reg:CC %cr0) +(compare:CC (reg:DI %r3) +(const_int 0 + (cinsn 2 (set (reg:DI %r4) +
[PATCHv3, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]
Hi, This patch adds a new insn for vector splat with small V2DI constants on P8. If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded with vspltisw and vupkhsw on P8. It should be efficient than loading vector from memory. Compared to last version, the main change is to set a default value for third parameter of vspltisw_vupkhsw_constant_p and call the function with 2 arguments when the third one doesn't matter. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-05-26 Haochen Gui gcc/ PR target/104124 * config/rs6000/altivec.md (*altivec_vupkhs_direct): Rename to... (altivec_vupkhs_direct): ...this. * config/rs6000/constraints.md (wT constraint): New constant for a vector constraint that can be loaded with vspltisw and vupkhsw. * config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New predicate for wT constraint. (easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if a vector constant can be synthesized with a vspltisw and a vupkhsw. * config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare. * config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): Call * (vspltisw_vupkhsw_constant_p): New function to return true if OP mode is V2DI and can be synthesized with vupkhsw and vspltisw. * config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up constants with vspltisw and vupkhsw. gcc/testsuite/ PR target/104124 * gcc.target/powerpc/pr104124.c: New. patch.diff diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 49b0c964f4d..2c932854c33 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs" } [(set_attr "type" "vecperm")]) -(define_insn "*altivec_vupkhs_direct" +(define_insn "altivec_vupkhs_direct" [(set (match_operand:VP 0 "register_operand" "=v") (unspec:VP [(match_operand: 1 "register_operand" "v")] UNSPEC_VUNPACK_HI_SIGN_DIRECT))] diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index c4a6ccf4efb..e7f185660c0 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -144,6 +144,10 @@ (define_constraint "wS" "@internal Vector constant that can be loaded with XXSPLTIB & sign extension." (match_test "xxspltib_constant_split (op, mode)")) +(define_constraint "wT" + "@internal Vector constant that can be loaded with vspltisw & vupkhsw." + (match_test "vspltisw_vupkhsw_constant_split (op, mode)")) + ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form. ;; Used by LXSD/STXSD/LXSSP/STXSSP. In contrast to "Y", the multiple-of-four ;; offset is enforced for 32-bit too. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 52c65534e51..1ed770bffa6 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -694,6 +694,14 @@ (define_predicate "xxspltib_constant_split" return num_insns > 1; }) +;; Return true if the operand is a constant that can be loaded with a vspltisw +;; instruction and then a vupkhsw instruction. + +(define_predicate "vspltisw_vupkhsw_constant_split" + (match_code "const_vector") +{ + return vspltisw_vupkhsw_constant_p (op, mode); +}) ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB ;; instruction. @@ -742,6 +750,11 @@ (define_predicate "easy_vector_constant" && xxspltib_constant_p (op, mode, _insns, )) return true; + /* V2DI constant within RANGE (-16, 15) can be synthesized with a +vspltisw and a vupkhsw. */ + if (vspltisw_vupkhsw_constant_p (op, mode, )) + return true; + return easy_altivec_constant (op, mode); } diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 1a4fc1df668..00cb2d82953 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, extern int easy_altivec_constant (rtx, machine_mode); extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); +extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 3be5860dd9b..ae34a02b282 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op, return true; } +/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07 + instructions vupkhsw and vspltisw. + +
[PATCHv2, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]
Hi, This patch adds a new insn for vector splat with small V2DI constants on P8. If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded with vspltisw and vupkhsw on P8. It should be efficient than loading vector from TOC. Compared to last version, the main change is to move the constant check from easy_altivec_constant to easy_altivec_constant and remove some unnecessary mode checks. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-05-04 Haochen Gui gcc/ PR target/104124 * config/rs6000/altivec.md (*altivec_vupkhs_direct): Rename to... (altivec_vupkhs_direct): ...this. * config/rs6000/constraints.md (wT constraint): New constant for a vector constraint that can be loaded with vspltisw and vupkhsw. * config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New predicate for wT constraint. (easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if a vector constant can be synthesized with a vspltisw and a vupkhsw. * config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare. * config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): Call * (vspltisw_vupkhsw_constant_p): New function to return true if OP mode is V2DI and can be synthesized with vupkhsw and vspltisw. * config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up constants with vspltisw and vupkhsw. gcc/testsuite/ PR target/104124 * gcc.target/powerpc/pr104124.c: New. patch.diff diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 49b0c964f4d..2c932854c33 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs" } [(set_attr "type" "vecperm")]) -(define_insn "*altivec_vupkhs_direct" +(define_insn "altivec_vupkhs_direct" [(set (match_operand:VP 0 "register_operand" "=v") (unspec:VP [(match_operand: 1 "register_operand" "v")] UNSPEC_VUNPACK_HI_SIGN_DIRECT))] diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index c4a6ccf4efb..e7f185660c0 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -144,6 +144,10 @@ (define_constraint "wS" "@internal Vector constant that can be loaded with XXSPLTIB & sign extension." (match_test "xxspltib_constant_split (op, mode)")) +(define_constraint "wT" + "@internal Vector constant that can be loaded with vspltisw & vupkhsw." + (match_test "vspltisw_vupkhsw_constant_split (op, mode)")) + ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form. ;; Used by LXSD/STXSD/LXSSP/STXSSP. In contrast to "Y", the multiple-of-four ;; offset is enforced for 32-bit too. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 52c65534e51..ff0f625d508 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -694,6 +694,16 @@ (define_predicate "xxspltib_constant_split" return num_insns > 1; }) +;; Return true if the operand is a constant that can be loaded with a vspltisw +;; instruction and then a vupkhsw instruction. + +(define_predicate "vspltisw_vupkhsw_constant_split" + (match_code "const_vector") +{ + int value; + + return vspltisw_vupkhsw_constant_p (op, mode, ); +}) ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB ;; instruction. @@ -742,6 +752,11 @@ (define_predicate "easy_vector_constant" && xxspltib_constant_p (op, mode, _insns, )) return true; + /* V2DI constant within RANGE (-16, 15) can be synthesized with a +vspltisw and a vupkhsw. */ + if (vspltisw_vupkhsw_constant_p (op, mode, )) + return true; + return easy_altivec_constant (op, mode); } diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 1a4fc1df668..ba39a73abf8 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, extern int easy_altivec_constant (rtx, machine_mode); extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); +extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int *); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 3be5860dd9b..697b18e14f1 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op, return true; } +/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07 + instructions vupkhsw and vspltisw. + + Return the constant
Ping [PATCHv2, rs6000] Merge two vector shift when their sources are the same
Hi Gently ping this. https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612944.html Thanks Gui Haochen 在 2023/2/28 10:31, HAO CHEN GUI 写道: > Hi, > This patch merges two "vsldoi" insns when their sources are the > same. Particularly, it is simplified to be one move if the total > shift is multiples of 16 bytes. > > Bootstrapped and tested on powerpc64-linux BE and LE with no > regressions. > > Thanks > Gui Haochen > > > ChangeLog > 2023-02-28 Haochen Gui > > gcc/ > * config/rs6000/altivec.md (*altivec_vsldoi_dup_): New > insn_and_split to merge two vsldoi when the sources are the same. > > gcc/testsuite/ > * gcc.target/powerpc/vsldoi_merge.c: New. > > > > patch.diff > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 84660073f32..fae8ec2b2e8 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -2529,6 +2529,35 @@ (define_insn "altivec_vsldoi_" >"vsldoi %0,%1,%2,%3" >[(set_attr "type" "vecperm")]) > > +(define_insn_and_split "*altivec_vsldoi_dup_" > + [(set (match_operand:VM 0 "register_operand" "=v") > + (unspec:VM [(unspec:VM [(match_operand:VM 1 "register_operand" "v") > + (match_dup 1) > + (match_operand:QI 2 "immediate_operand" "i")] > +UNSPEC_VSLDOI) > + (unspec:VM [(match_dup 1) > + (match_dup 1) > + (match_dup 2)] > +UNSPEC_VSLDOI) > + (match_operand:QI 3 "immediate_operand" "i")] > +UNSPEC_VSLDOI))] > + "TARGET_ALTIVEC" > + "#" > + "&& 1" > + [(const_int 0)] > +{ > + unsigned int shift1 = UINTVAL (operands[2]); > + unsigned int shift2 = UINTVAL (operands[3]); > + > + unsigned int shift = (shift1 + shift2) % 16; > + if (shift) > +emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1], > + operands[1], GEN_INT (shift))); > + else > +emit_move_insn (operands[0], operands[1]); > + DONE; > +}) > + > (define_insn "altivec_vupkhs" >[(set (match_operand:VP 0 "register_operand" "=v") > (unspec:VP [(match_operand: 1 "register_operand" "v")] > diff --git a/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c > b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c > new file mode 100644 > index 000..eebd7b4d382 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c > @@ -0,0 +1,59 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-O2 -mvsx -save-temps" } */ > + > +#include "altivec.h" > + > +#ifdef DEBUG > +#include > +#endif > + > +void abort (void); > + > +__attribute__ ((noipa)) vector signed int > +test1 (vector signed int a) > +{ > + a = vec_sld (a, a, 2); > + a = vec_sld (a, a, 6); > + return a; > +} > + > +__attribute__ ((noipa)) vector signed int > +test2 (vector signed int a) > +{ > + a = vec_sld (a, a, 14); > + a = vec_sld (a, a, 2); > + return a; > +} > + > +int main (void) > +{ > + vector signed int a = {1,2,3,4}; > + vector signed int result_a; > + int i; > + > + result_a = test1 (a); > + vector signed int expect_a = {3,4,1,2}; > + > + for (i = 0; i< 4; i++) > +if (result_a[i] != expect_a[i]) > +#ifdef DEBUG > + printf("ERROR: test1 result[%d] = %d, not expected[%d] = %d\n", > + i, result_a[i], i, expect_a[i]); > +#else > + abort (); > +#endif > + > + result_a = test2 (a); > + > + for (i = 0; i< 4; i++) > +if (result_a[i] != a[i]) > +#ifdef DEBUG > + printf("ERROR: test2 result[%d] = %d, not expected[%d] = %d\n", > + i, result_a[i], i, a[i]); > +#else > + abort (); > +#endif > +} > + > +/* { dg-final { scan-assembler-times {\mvsldoi\M} 1 } } */
Ping^2 [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694]
Hi, Gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html Thanks Gui Haochen 在 2023/2/20 10:10, HAO CHEN GUI 写道: > Hi, > Gently ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html > > Gui Haochen > Thanks > > 在 2023/2/8 13:08, HAO CHEN GUI 写道: >> Hi, >> The logical operations for TImode is split after reload pass right now. >> Some >> potential optimizations miss as the split is too late. This patch removes >> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical >> operations can be split at expand pass. The new test case illustrates the >> optimization. >> >> Two test cases of pr92398 are merged into one as all sub-targets generates >> the same sequence of instructions with the patch. >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> >> Thanks >> Gui Haochen >> >> >> ChangeLog >> 2023-02-08 Haochen Gui >> >> gcc/ >> PR target/100694 >> * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit >> vector types. >> (and3): Replace BOOL_128 with BOOL_128_V. >> (ior3): Likewise. >> (xor3): Likewise. >> (one_cmpl2 expander): New expander with BOOL_128_V. >> (one_cmpl2 insn_and_split): Rename to ... >> (*one_cmpl2): ... this. >> >> gcc/testsuite/ >> PR target/100694 >> * gcc.target/powerpc/pr100694.c: New. >> * gcc.target/powerpc/pr92398.c: New. >> * gcc.target/powerpc/pr92398.h: Remove. >> * gcc.target/powerpc/pr92398.p9-.c: Remove. >> * gcc.target/powerpc/pr92398.p9+.c: Remove. >> >> >> patch.diff >> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >> index 4bd1dfd3da9..455b7329643 100644 >> --- a/gcc/config/rs6000/rs6000.md >> +++ b/gcc/config/rs6000/rs6000.md >> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI >> (V2DF "TARGET_ALTIVEC") >> (V1TI "TARGET_ALTIVEC")]) >> >> +;; Mode iterator for logical operations on 128-bit vector types >> +(define_mode_iterator BOOL_128_V[(V16QI "TARGET_ALTIVEC") >> + (V8HI "TARGET_ALTIVEC") >> + (V4SI "TARGET_ALTIVEC") >> + (V4SF "TARGET_ALTIVEC") >> + (V2DI "TARGET_ALTIVEC") >> + (V2DF "TARGET_ALTIVEC") >> + (V1TI "TARGET_ALTIVEC")]) >> + >> ;; For the GPRs we use 3 constraints for register outputs, two that are the >> ;; same as the output register, and a third where the output register is an >> ;; early clobber, so we don't have to deal with register overlaps. For the >> @@ -7135,23 +7144,23 @@ (define_expand "subti3" >> ;; 128-bit logical operations expanders >> >> (define_expand "and3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> -(and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> +(and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> +(match_operand:BOOL_128_V 2 "vlogical_operand")))] >>"" >>"") >> >> (define_expand "ior3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> -(ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> +(ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> +(match_operand:BOOL_128_V 2 "vlogical_operand")))] >>"" >>"") >> >> (define_expand "xor3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> -(xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> +(xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> +(match_operand:BOOL_128_V 2 "vlogical_operand")))] >>"" >>"") >> >> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv3_internal2" >> (const_string "16")))]) >> >> ;; 128-bit one's complement >> -(define_insn_and_split "one_cmpl2" >> +(define_expand "one_cmpl2" >> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=") >> +(not:BOOL_128_V >> + (match_operand:BOOL_128_V 1 "vlogical_operand" "")))] >> + "" >> + "") >> + >> +(define_insn_and_split "*one_cmpl2" >>[(set (match_operand:BOOL_128 0 "vlogical_operand" "=") >> (not:BOOL_128 >>(match_operand:BOOL_128 1 "vlogical_operand" "")))] >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c >> b/gcc/testsuite/gcc.target/powerpc/pr100694.c >> new file mode 100644 >> index
Re: [PATCH-4, rs6000] Change ilp32 target check for some scalar-extract-sig and scalar-insert-exp test cases
Hi, Gently ping this. https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609372.html Thanks Gui Haochen 在 2023/1/4 14:17, HAO CHEN GUI 写道: > Hi, > "ilp32" is used in these test cases to make sure test cases only run on a > 32-bit environment. Unfortunately, these cases also run with > "-m32/-mpowerpc64" which causes unexpected errors. This patch changes the > target check to skip if "has_arch_ppc64" is set. So the test cases won't run > when arch_ppc64 has been set. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > Gui Haochen > > ChangeLog > 2023-01-03 Haochen Gui > > gcc/testsuite/ > * gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Replace ilp32 check > with dg-skip-if has_arch_ppc64. > * gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Likewise. > > patch.diff > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c > index 39ee74c94dc..148b5fbd9fa 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target ilp32 } */ > +/* { dg-skip-if "" { has_arch_ppc64 } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ > > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c > index efd69725905..956c1183beb 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target ilp32 } */ > +/* { dg-skip-if "" { has_arch_ppc64 } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ > > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c > index f85966a6fdf..9a7949fb89a 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target ilp32 } */ > +/* { dg-skip-if "" { has_arch_ppc64 } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ >
Re: [PATCH-3, rs6000] Change mode and insn condition for scalar insert exp instruction
Hi, Gently ping this. https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609371.html Thanks Gui Haochen 在 2023/1/4 14:17, HAO CHEN GUI 写道: > Hi, > This patch changes the mode of exponent to GPR in scalar insert exp > pattern, as the exponent can be put into a 32-bit register. Also the > condition check is changed from TARGET_64BIT to TARGET_POWERPC64. > > The test cases are modified according to the changes of expand pattern. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > Gui Haochen > > ChangeLog > 2023-01-03 Haochen Gui > > gcc/ > * config/rs6000/rs6000-builtins.def > (__builtin_vsx_scalar_insert_exp): Replace bif-pattern from xsiexpdp > to xsiexpdp_di. > (__builtin_vsx_scalar_insert_exp_dp): Replace bif-pattern from > xsiexpdpf to xsiexpdpf_di. > * config/rs6000/vsx.md (xsiexpdp): Rename to... > (xsiexpdp_): ..., set the mode of second operand to GPR and > replace TARGET_64BIT with TARGET_POWERPC64. > (xsiexpdpf): Rename to... > (xsiexpdpf_): ..., set the mode of second operand to GPR and > replace TARGET_64BIT with TARGET_POWERPC64. > > gcc/testsuite/ > * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check > with has_arch_ppc64. > * gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index 25647b7bdd2..b1b5002d7d9 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -2854,10 +2854,10 @@ > >const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ > unsigned long long); > -VSIEDP xsiexpdp {} > +VSIEDP xsiexpdp_di {} > >const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long > long); > -VSIEDPF xsiexpdpf {} > +VSIEDPF xsiexpdpf_di {} > >pure vsc __builtin_vsx_xl_len_r (void *, signed long); > XL_LEN_R xl_len_r {} > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 27e03a4cf6c..3376090cc6f 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -5137,22 +5137,22 @@ (define_insn "xsiexpqp_" >[(set_attr "type" "vecmove")]) > > ;; VSX Scalar Insert Exponent Double-Precision > -(define_insn "xsiexpdp" > +(define_insn "xsiexpdp_" >[(set (match_operand:DF 0 "vsx_register_operand" "=wa") > (unspec:DF [(match_operand:DI 1 "register_operand" "r") > - (match_operand:DI 2 "register_operand" "r")] > + (match_operand:GPR 2 "register_operand" "r")] >UNSPEC_VSX_SIEXPDP))] > - "TARGET_P9_VECTOR && TARGET_64BIT" > + "TARGET_P9_VECTOR && TARGET_POWERPC64" >"xsiexpdp %x0,%1,%2" >[(set_attr "type" "fpsimple")]) > > ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument > -(define_insn "xsiexpdpf" > +(define_insn "xsiexpdpf_" >[(set (match_operand:DF 0 "vsx_register_operand" "=wa") > (unspec:DF [(match_operand:DF 1 "register_operand" "r") > - (match_operand:DI 2 "register_operand" "r")] > + (match_operand:GPR 2 "register_operand" "r")] >UNSPEC_VSX_SIEXPDP))] > - "TARGET_P9_VECTOR && TARGET_64BIT" > + "TARGET_P9_VECTOR && TARGET_POWERPC64" >"xsiexpdp %x0,%1,%2" >[(set_attr "type" "fpsimple")]) > > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c > index d8243258a67..88d77564158 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target lp64 } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > > /* This test should succeed only on 64-bit configurations. */ > #include > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c > index 8260b107178..2f219ddc83a 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target lp64 } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power8" } */ >
Ping [PATCH-2, rs6000] Change mode and insn condition for scalar extract sig instruction
Hi, Gently ping this. https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609370.html Thanks Gui Haochen 在 2023/1/4 14:16, HAO CHEN GUI 写道: > Hi, > This patch changes the return type of __builtin_vsx_scalar_extract_sig > from const signed long to const signed long long, so that it can be called > with "-m32/-mpowerpc64" option. The bif needs TARGET_POWERPC64 instead of > TARGET_64BIT. So the condition check in the expander is changed. > > The test cases are modified according to the changes of expand pattern. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > Gui Haochen > > ChangeLog > 2023-01-03 Haochen Gui > > gcc/ > * config/rs6000/rs6000-builtins.def > (__builtin_vsx_scalar_extract_sig): Set return type to const signed > long long. > * config/rs6000/vsx.md (xsxsigdp): Replace TARGET_64BIT with > TARGET_POWERPC64. > > gcc/testsuite/ > * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Replace lp64 check > with has_arch_ppc64. > * gcc.target/powerpc/bfp/scalar-extract-sig-1.c: Likewise. > * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index a8f1d3f1b3d..25647b7bdd2 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -2849,7 +2849,7 @@ >pure vsc __builtin_vsx_lxvl (const void *, signed long); > LXVL lxvl {} > > - const signed long __builtin_vsx_scalar_extract_sig (double); > + const signed long long __builtin_vsx_scalar_extract_sig (double); > VSESDP xsxsigdp {} > >const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 229c26c3a61..27e03a4cf6c 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -5111,7 +5111,7 @@ (define_insn "xsxsigdp" >[(set (match_operand:DI 0 "register_operand" "=r") > (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] >UNSPEC_VSX_SXSIG))] > - "TARGET_P9_VECTOR && TARGET_64BIT" > + "TARGET_P9_VECTOR && TARGET_POWERPC64" >"xsxsigdp %0,%x1" >[(set_attr "type" "integer")]) > > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c > index 637080652b7..d22f7d1b274 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target lp64 } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > > /* This test should succeed only on 64-bit configurations. */ > #include > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c > index f12eed3d9d5..64747d73a51 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target lp64 } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power8" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > > /* This test should succeed only on 64-bit configurations. */ > #include > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c > index c85072da138..561be53fb9b 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c > @@ -1,7 +1,7 @@ > /* { dg-do run { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target lp64 } */ > /* { dg-require-effective-target p9vector_hw } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > > /* This test should succeed only on 64-bit configurations. */ > #include
Ping [PATCH-1, rs6000] Change mode and insn condition for scalar extract exp instruction
Hi, Gently ping this. https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609369.html Thanks Gui Haochen 在 2023/1/4 14:16, HAO CHEN GUI 写道: > Hi, > This patch changes the return type of __builtin_vsx_scalar_extract_exp > from const signed long to const signed int, as the exponent can be put in > a signed int. It is also inline with the external interface definition of > the bif. The mode of exponent operand in "xsxexpdp" is changed to GPR mode > and TARGET_64BIT check is removed, as the instruction can be executed on > a 32-bit environment. > > The test cases are modified according to the changes of expand pattern. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > Gui Haochen > > ChangeLog > 2022-12-23 Haochen Gui > > gcc/ > * config/rs6000/rs6000-builtins.def > (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned > int and set its bif-pattern to xsxexpdp_si, move it from power9-64 to > power9 catalog. > * config/rs6000/vsx.md (xsxexpdp): Rename to ... > (xsxexpdp_): ..., set mode of operand 0 to GPR and remove > TARGET_64BIT check. > * doc/extend.texi (scalar_extract_exp): Remove 64-bit environment > requirement when it has a 64-bit argument. > > gcc/testsuite/ > * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check. > * gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Likewise. > * gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is > invalid. > * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Remove lp64 check. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index f76f54793d7..a8f1d3f1b3d 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -2833,6 +2833,8 @@ >const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128); > TSTSFI_OV_TD dfptstsfi_unordered_td {} > > + const signed int __builtin_vsx_scalar_extract_exp (double); > +VSEEDP xsxexpdp_si {} > > [power9-64] >void __builtin_altivec_xst_len_r (vsc, void *, long); > @@ -2847,9 +2849,6 @@ >pure vsc __builtin_vsx_lxvl (const void *, signed long); > LXVL lxvl {} > > - const signed long __builtin_vsx_scalar_extract_exp (double); > -VSEEDP xsxexpdp {} > - >const signed long __builtin_vsx_scalar_extract_sig (double); > VSESDP xsxsigdp {} > > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 992fbc983be..229c26c3a61 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -5089,11 +5089,11 @@ (define_insn "xsxexpqp_" >[(set_attr "type" "vecmove")]) > > ;; VSX Scalar Extract Exponent Double-Precision > -(define_insn "xsxexpdp" > - [(set (match_operand:DI 0 "register_operand" "=r") > - (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] > +(define_insn "xsxexpdp_" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (unspec:GPR [(match_operand:DF 1 "vsx_register_operand" "wa")] >UNSPEC_VSX_SXEXPDP))] > - "TARGET_P9_VECTOR && TARGET_64BIT" > + "TARGET_P9_VECTOR" >"xsxexpdp %0,%x1" >[(set_attr "type" "integer")]) > > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index d3812fa55b0..7c087967234 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -19598,7 +19598,10 @@ bool scalar_test_neg (double source); > bool scalar_test_neg (__ieee128 source); > @end smallexample > > -The @code{scalar_extract_exp} and @code{scalar_extract_sig} > +The @code{scalar_extract_exp} with a 64-bit source argument > +function requires an environment supporting ISA 3.0 or later. > +The @code{scalar_extract_exp} with a 128-bit source argument > +and @code{scalar_extract_sig} > functions require a 64-bit environment supporting ISA 3.0 or later. > The @code{scalar_extract_exp} and @code{scalar_extract_sig} built-in > functions return the significand and the biased exponent value > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c > index 35bf1b240f3..d971833748e 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c > @@ -1,9 +1,7 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target lp64 } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ > > -/* This test should succeed only on 64-bit configurations. */ > #include > > unsigned int > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c > b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c > index 9737762c1d4..1cb438f9b70 100644 > ---
PING^2 [PATCH, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601909.html Thanks Gui Haochen 在 2022/12/14 13:30, HAO CHEN GUI 写道: > Hi, >Gentle ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601909.html > > Thanks > Gui Haochen > > 在 2022/9/21 13:13, HAO CHEN GUI 写道: >> Hi, >> This patch adds a new insn for vector splat with small V2DI constants on >> P8. >> If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be >> loaded >> with vspltisw and vupkhsw on P8. It should be efficient than loading vector >> from >> TOC. >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> Is this okay for trunk? Any recommendations? Thanks a lot. >> >> ChangeLog >> 2022-09-21 Haochen Gui >> >> gcc/ >> PR target/104124 >> * config/rs6000/altivec.md (*altivec_vupkhs_direct): Renamed >> to... >> (altivec_vupkhs_direct): ...this. >> * config/rs6000/constraints.md (wT constraint): New constant for a >> vector constraint that can be loaded with vspltisw and vupkhsw. >> * config/rs6000/predicates.md (vspltisw_constant_split): New >> predicate for wT constraint. >> * config/rs6000/rs6000-protos.h (vspltisw_constant_p): Add declaration. >> * config/rs6000/rs6000.cc (easy_altivec_constant): Call >> vspltisw_constant_p to judge if a V2DI constant can be synthesized with >> a vspltisw and a vupkhsw. >> * (vspltisw_constant_p): New function to return true if OP mode is >> V2DI and can be synthesized with ISA 2.07 instruction vupkhsw and >> vspltisw. >> * gcc/config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up >> constants with vspltisw and vupkhsw. >> >> gcc/testsuite/ >> PR target/104124 >> * gcc.target/powerpc/p8-splat.c: New. >> >> patch.diff >> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md >> index 2c4940f2e21..185414df021 100644 >> --- a/gcc/config/rs6000/altivec.md >> +++ b/gcc/config/rs6000/altivec.md >> @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs" >> } >>[(set_attr "type" "vecperm")]) >> >> -(define_insn "*altivec_vupkhs_direct" >> +(define_insn "altivec_vupkhs_direct" >>[(set (match_operand:VP 0 "register_operand" "=v") >> (unspec:VP [(match_operand: 1 "register_operand" "v")] >> UNSPEC_VUNPACK_HI_SIGN_DIRECT))] >> diff --git a/gcc/config/rs6000/constraints.md >> b/gcc/config/rs6000/constraints.md >> index 5a44a92142e..f65dea6e0c7 100644 >> --- a/gcc/config/rs6000/constraints.md >> +++ b/gcc/config/rs6000/constraints.md >> @@ -150,6 +150,10 @@ (define_constraint "wS" >>"@internal Vector constant that can be loaded with XXSPLTIB & sign >> extension." >>(match_test "xxspltib_constant_split (op, mode)")) >> >> +(define_constraint "wT" >> + "@internal Vector constant that can be loaded with vspltisw & vupkhsw." >> + (match_test "vspltisw_constant_split (op, mode)")) >> + >> ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update >> form. >> ;; Used by LXSD/STXSD/LXSSP/STXSSP. In contrast to "Y", the >> multiple-of-four >> ;; offset is enforced for 32-bit too. >> diff --git a/gcc/config/rs6000/predicates.md >> b/gcc/config/rs6000/predicates.md >> index b1fcc69bb60..00cf60bbe58 100644 >> --- a/gcc/config/rs6000/predicates.md >> +++ b/gcc/config/rs6000/predicates.md >> @@ -694,6 +694,19 @@ (define_predicate "xxspltib_constant_split" >>return num_insns > 1; >> }) >> >> +;; Return true if the operand is a constant that can be loaded with a >> vspltisw >> +;; instruction and then a vupkhsw instruction. >> + >> +(define_predicate "vspltisw_constant_split" >> + (match_code "const_vector,vec_duplicate") >> +{ >> + int value = 32; >> + >> + if (!vspltisw_constant_p (op, mode, )) >> +return false; >> + >> + return true; >> +}) >> >> ;; Return 1 if the operand is constant that can loaded directly with a >> XXSPLTIB >> ;; instruction. >> diff --git a/gcc/config/rs6000/rs6000-protos.h >> b/gcc/config/rs6000/rs6000-protos.h >> index b3c16e7448d..45f3d044eee 100644 >> --- a/gcc/config/rs6000/rs6000-protos.h >> +++ b/gcc/config/rs6000/rs6000-protos.h >> @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, >> rtx, int, int, int, >> >> extern int easy_altivec_constant (rtx, machine_mode); >> extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); >> +extern bool vspltisw_constant_p (rtx, machine_mode, int *); >> extern int vspltis_shifted (rtx); >> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); >> extern bool macho_lo_sum_memory_operand (rtx, machine_mode); >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >> index df491bee2ea..984624026c2 100644 >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -6292,6 +6292,12 @@ easy_altivec_constant (rtx op, machine_mode mode) >>&& INTVAL (CONST_VECTOR_ELT (op, 1))
[PATCH 2/2, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]
Hi, This patch xfails a float128 comparison test case on powerpc64 that fails due to a longstanding issue with floating-point compares. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information. The patch passed regression test on Power Linux platforms. Thanks Gui Haochen ChangeLog rs6000: xfail float128 comparison test case that fails on powerpc64. This patch xfails a float128 comparison test cases on powerpc64 that fails due to a longstanding issue with floating-point compares. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information. When float128 hardware is enabled (-mfloat128-hardware), xscmpuqp is generated for comparison which is unexpected. When float128 software simulation is enabled (-mno-float128-hardware), we still have to xfail the hardware version (__lekf2_hw) which finally invokes xscmpuqp. gcc/testsuite/ PR target/108728 * gcc.dg/torture/float128-cmp-invalid.c: Add xfail. patch.diff diff --git a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c index 1f675efdd61..a86592b3328 100644 --- a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c +++ b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c @@ -1,5 +1,6 @@ /* Test for "invalid" exceptions from __float128 comparisons. */ /* { dg-do run } */ +/* { dg-xfail-run-if "ppc float128_hw" { ppc_float128_hw || { ppc_cpu_supports_hw && p9vector_hw } } } */ /* { dg-options "" } */ /* { dg-require-effective-target __float128 } */ /* { dg-require-effective-target base_quadfloat_support } */
[PATCH 2/1, rs6000] make ppc_cpu_supports_hw as effective target keyword [PR108728]
Hi, This patch adds ppc_cpu_supports_hw into explicit name checking in proc is-effective-target-keyword. So ppc_cpu_supports_hw can be used as a target selector in test directives. It's required by patch2 of this issue. Thanks Gui Haochen ChangeLog testsuite: make ppc_cpu_supports_hw as effective target keyword [PR108728] gcc/testsuite/ PR target/108728 * lib/target-supports.exp (is-effective-target-keyword): Add ppc_cpu_supports_hw. patch.diff diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 1d6cc6f8d88..e65b447663f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9170,6 +9170,7 @@ proc is-effective-target-keyword { arg } { "named_sections" { return 1 } "gc_sections"{ return 1 } "cxa_atexit" { return 1 } + "ppc_cpu_supports_hw" { return 1 } default { return 0 } } }
[PATCH-1, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]
Hi, This patch xfails a float128 comparison test case on powerpc64 that fails due to a longstanding issue with floating-point compares. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information. The case is xfailed when instructions of float128 hardware are generated. When software simulation is used, the case should pass. The patch passed regression test on Power Linux platforms. Thanks Gui Haochen ChangeLog rs6000: xfail float128 comparison test case that fails on powerpc64. This patch xfails a float128 comparison test cases on powerpc64 that fails due to a longstanding issue with floating-point compares. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information. gcc/testsuite/ PR target/108728 * gcc.dg/torture/float128-cmp-invalid.c: Add xfail. patch.diff diff --git a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c index 1f675efdd61..7b520d1f9f1 100644 --- a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c +++ b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c @@ -1,5 +1,5 @@ /* Test for "invalid" exceptions from __float128 comparisons. */ -/* { dg-do run } */ +/* { dg-do run { xfail { ppc_float128_hw || { ppc_cpu_supports_hw && p9vector_hw } } } } */ /* { dg-options "" } */ /* { dg-require-effective-target __float128 } */ /* { dg-require-effective-target base_quadfloat_support } */
[PATCH-2, rs6000] Add ppc_cpu_supports_hw into proc is-effective-target-keyword [PR108728]
Hi, This patch adds ppc_cpu_supports_hw into explicit name checking in proc is-effective-target-keyword. So ppc_cpu_supports_hw can be used as a target selector in test directives. The patch passed regression test on Power Linux platforms. Thanks Gui Haochen ChangeLog rs6000: Add ppc_cpu_supports_hw into proc is-effective-target-keyword. gcc/testsuite/ PR target/108728 * lib/target-supports.exp (is-effective-target-keyword): Add ppc_cpu_supports_hw. patch.diff diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 1d6cc6f8d88..e65b447663f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9170,6 +9170,7 @@ proc is-effective-target-keyword { arg } { "named_sections" { return 1 } "gc_sections"{ return 1 } "cxa_atexit" { return 1 } + "ppc_cpu_supports_hw" { return 1 } default { return 0 } } }
Re: [PATCH, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]
Hi Kewen, 在 2023/4/13 16:32, Kewen.Lin 写道: > xfail all powerpc*-*-* can have some XPASSes on those ENVs with > software emulation. Since the related hw insn xscmpuqp is guarded > with TARGET_FLOAT128_HW, could we use the effective target > ppc_float128_hw instead? Thanks for your review comments. It's tricky. It invokes "__lekf2" with "-mno-float128_hw". But it doesn't always pass the check. With math library on P8, it can. With the library on P9, it fails. So it's totally depended on the version of library which is not controlled by GCC. What's your opinion? Test result on P9 make check-gcc-c RUNTESTFLAGS="--target_board=unix'{-mno-float128-hardware}' dg-torture.exp=float128-cmp-invalid.c" FAIL: gcc.dg/torture/float128-cmp-invalid.c -O0 execution test FAIL: gcc.dg/torture/float128-cmp-invalid.c -O1 execution test FAIL: gcc.dg/torture/float128-cmp-invalid.c -O2 execution test FAIL: gcc.dg/torture/float128-cmp-invalid.c -O3 -g execution test FAIL: gcc.dg/torture/float128-cmp-invalid.c -Os execution test FAIL: gcc.dg/torture/float128-cmp-invalid.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test FAIL: gcc.dg/torture/float128-cmp-invalid.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects execution test === gcc Summary === # of expected passes7 # of unexpected failures7 Gui Haochen Thanks
[PATCH, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]
Hi, This patch xfails a float128 comparison test case on powerpc64 that fails due to a longstanding issue with floating-point compares. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information. The patch passed regression test on Power Linux platforms. Thanks Gui Haochen ChangeLog rs6000: xfail float128 comparison test case that fails on powerpc64. This patch xfails a float128 comparison test case on powerpc64 that fails due to a longstanding issue with floating-point compares. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information. gcc/testsuite/ PR target/108728 * gcc.dg/torture/float128-cmp-invalid.c: Add xfail. patch.diff diff --git a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c index 1f675efdd61..f52686e0a24 100644 --- a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c +++ b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c @@ -1,5 +1,5 @@ /* Test for "invalid" exceptions from __float128 comparisons. */ -/* { dg-do run } */ +/* { dg-do run { xfail { powerpc*-*-* } } } */ /* { dg-options "" } */ /* { dg-require-effective-target __float128 } */ /* { dg-require-effective-target base_quadfloat_support } */
[PATCHv3, rs6000] rs6000: correct vector sign extend built-ins on Big Endian [PR108812]
Hi, This patch removes byte reverse operation before vector integer sign extension on big endian. These built-ins require to sign extend the element of the input vector that would fall in the least significant portion of the result element. So both BE and LE should do the same operation and the byte reversion is no need. This patch fixes it. Now these built-ins have the same behavior on all compilers. The unnecessary expand patterns are removed and the names of insn pattern are set to the same style. Also the test cases are modified. The patch passed regression test on Power Linux platforms. Thanks Gui Haochen ChangeLog rs6000: correct vector sign extend builtins on Big Endian gcc/ PR target/108812 * config/rs6000/vsx.md (vsx_sign_extend_qi_): Rename to... (vsx_sign_extend_v16qi_): ... this. (vsx_sign_extend_hi_): Rename to... (vsx_sign_extend_v8hi_): ... this. (vsx_sign_extend_si_v2di): Rename to... (vsx_sign_extend_v4si_v2di): ... this. (vsignextend_qi_): Remove. (vsignextend_hi_): Remove. (vsignextend_si_v2di): Remove. (vsignextend_v2di_v1ti): Remove. (*xxspltib__split): Replace gen_vsx_sign_extend_qi_v2di with gen_vsx_sign_extend_v16qi_v2di and gen_vsx_sign_extend_qi_v4si with gen_vsx_sign_extend_v16qi_v4si. * config/rs6000/rs6000.md (split for DI constant generation): Replace gen_vsx_sign_extend_qi_si with gen_vsx_sign_extend_v16qi_si. (split for HSDI constant generation): Replace gen_vsx_sign_extend_qi_di with gen_vsx_sign_extend_v16qi_di and gen_vsx_sign_extend_qi_si with gen_vsx_sign_extend_v16qi_si. * config/rs6000/rs6000-builtins.def (__builtin_altivec_vsignextsb2d): Set bif-pattern to vsx_sign_extend_v16qi_v2di. (__builtin_altivec_vsignextsb2w): Set bif-pattern to vsx_sign_extend_v16qi_v4si. (__builtin_altivec_visgnextsh2d): Set bif-pattern to vsx_sign_extend_v8hi_v2di. (__builtin_altivec_vsignextsh2w): Set bif-pattern to vsx_sign_extend_v8hi_v4si. (__builtin_altivec_vsignextsw2d): Set bif-pattern to vsx_sign_extend_si_v2di. (__builtin_altivec_vsignext): Set bif-pattern to vsx_sign_extend_v2di_v1ti. * config/rs6000/rs6000-builtin.cc (lxvrse_expand_builtin): Replace gen_vsx_sign_extend_qi_v2di with gen_vsx_sign_extend_v16qi_v2di, gen_vsx_sign_extend_hi_v2di with gen_vsx_sign_extend_v8hi_v2di and gen_vsx_sign_extend_si_v2di with gen_vsx_sign_extend_v4si_v2di. gcc/testsuite/ PR target/108812 * gcc.target/powerpc/p9-sign_extend-runnable.c: Set corresponding expected vectors for Big Endian. * gcc.target/powerpc/int_128bit-runnable.c: Likewise. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 90ab39dc258..c66cff17681 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -2840,17 +2840,17 @@ lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op, if (icode == CODE_FOR_vsx_lxvrbx) { temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); + emit_insn (gen_vsx_sign_extend_v16qi_v2di (discratch, temp1)); } else if (icode == CODE_FOR_vsx_lxvrhx) { temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); + emit_insn (gen_vsx_sign_extend_v8hi_v2di (discratch, temp1)); } else if (icode == CODE_FOR_vsx_lxvrwx) { temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); + emit_insn (gen_vsx_sign_extend_v4si_v2di (discratch, temp1)); } else if (icode == CODE_FOR_vsx_lxvrdx) discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f76f54793d7..6bfe9246a02 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2687,19 +2687,19 @@ VRLWNM altivec_vrlwnm {} const vsll __builtin_altivec_vsignextsb2d (vsc); -VSIGNEXTSB2D vsignextend_qi_v2di {} +VSIGNEXTSB2D vsx_sign_extend_v16qi_v2di {} const vsi __builtin_altivec_vsignextsb2w (vsc); -VSIGNEXTSB2W vsignextend_qi_v4si {} +VSIGNEXTSB2W vsx_sign_extend_v16qi_v4si {} const vsll __builtin_altivec_visgnextsh2d (vss); -VSIGNEXTSH2D vsignextend_hi_v2di {} +VSIGNEXTSH2D vsx_sign_extend_v8hi_v2di {} const vsi __builtin_altivec_vsignextsh2w (vss); -VSIGNEXTSH2W vsignextend_hi_v4si {} +VSIGNEXTSH2W vsx_sign_extend_v8hi_v4si {} const vsll __builtin_altivec_vsignextsw2d (vsi); -VSIGNEXTSW2D vsignextend_si_v2di {} +VSIGNEXTSW2D vsx_sign_extend_v4si_v2di {}
[PATCHv2, rs6000] rs6000: correct vector sign extend built-ins on Big Endian [PR108812]
Hi, This patch removes byte reverse operation before vector integer sign extension on big endian. These built-ins require to sign extend the element of the input vector that would fall in the least significant portion of the result element. So both BE and LE should do the same operation and the byte reversion is no need. This patch fixes it. Now these built-ins have the same behavior on all compilers. The unnecessary expand patterns are removed and the names of insn pattern are set to the same style. Also the test case is modified. The patch passed regression test on Power Linux platforms. Thanks Gui Haochen ChangeLog rs6000: correct vector sign extend builtins on Big Endian gcc/ PR target/108812 * config/rs6000/vsx.md (vsx_sign_extend_qi_): Rename to... (vsx_sign_extend_v16qi_): ... this. (vsx_sign_extend_hi_): Rename to... (vsx_sign_extend_v8hi_): ... this. (vsx_sign_extend_si_v2di): Rename to... (vsx_sign_extend_v4si_v2di): ... this. (vsignextend_qi_): Remove. (vsignextend_hi_): Remove. (vsignextend_si_v2di): Remove. (*xxspltib__split): Replace gen_vsx_sign_extend_qi_v2di with gen_vsx_sign_extend_v16qi_v2di and gen_vsx_sign_extend_qi_v4si with gen_vsx_sign_extend_v16qi_v4si. * config/rs6000/rs6000.md (split for DI constant generation): Replace gen_vsx_sign_extend_qi_si with gen_vsx_sign_extend_v16qi_si. (split for HSDI constant generation): Replace gen_vsx_sign_extend_qi_di with gen_vsx_sign_extend_v16qi_di and gen_vsx_sign_extend_qi_si with gen_vsx_sign_extend_v16qi_si. * config/rs6000/rs6000-builtins.def (__builtin_altivec_vsignextsb2d): Set bif-pattern to vsx_sign_extend_v16qi_v2di. (__builtin_altivec_vsignextsb2w): Set bif-pattern to vsx_sign_extend_v16qi_v4si. (__builtin_altivec_visgnextsh2d): Set bif-pattern to vsx_sign_extend_v8hi_v2di. (__builtin_altivec_vsignextsh2w): Set bif-pattern to vsx_sign_extend_v8hi_v4si. (__builtin_altivec_vsignextsw2d): Set bif-pattern to vsx_sign_extend_si_v2di. * config/rs6000/rs6000-builtin.cc (lxvrse_expand_builtin): Replace gen_vsx_sign_extend_qi_v2di with gen_vsx_sign_extend_v16qi_v2di, gen_vsx_sign_extend_hi_v2di with gen_vsx_sign_extend_v8hi_v2di and gen_vsx_sign_extend_si_v2di with gen_vsx_sign_extend_v4si_v2di. gcc/testsuite/ PR target/108812 * gcc.target/powerpc/p9-sign_extend-runnable.c: Set different expected vectors for Big Endian. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 90ab39dc258..c66cff17681 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -2840,17 +2840,17 @@ lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op, if (icode == CODE_FOR_vsx_lxvrbx) { temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); + emit_insn (gen_vsx_sign_extend_v16qi_v2di (discratch, temp1)); } else if (icode == CODE_FOR_vsx_lxvrhx) { temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); + emit_insn (gen_vsx_sign_extend_v8hi_v2di (discratch, temp1)); } else if (icode == CODE_FOR_vsx_lxvrwx) { temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); + emit_insn (gen_vsx_sign_extend_v4si_v2di (discratch, temp1)); } else if (icode == CODE_FOR_vsx_lxvrdx) discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f76f54793d7..55e9cf9ece9 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2687,19 +2687,19 @@ VRLWNM altivec_vrlwnm {} const vsll __builtin_altivec_vsignextsb2d (vsc); -VSIGNEXTSB2D vsignextend_qi_v2di {} +VSIGNEXTSB2D vsx_sign_extend_v16qi_v2di {} const vsi __builtin_altivec_vsignextsb2w (vsc); -VSIGNEXTSB2W vsignextend_qi_v4si {} +VSIGNEXTSB2W vsx_sign_extend_v16qi_v4si {} const vsll __builtin_altivec_visgnextsh2d (vss); -VSIGNEXTSH2D vsignextend_hi_v2di {} +VSIGNEXTSH2D vsx_sign_extend_v8hi_v2di {} const vsi __builtin_altivec_vsignextsh2w (vss); -VSIGNEXTSH2W vsignextend_hi_v4si {} +VSIGNEXTSH2W vsx_sign_extend_v8hi_v4si {} const vsll __builtin_altivec_vsignextsw2d (vsi); -VSIGNEXTSW2D vsignextend_si_v2di {} +VSIGNEXTSW2D vsx_sign_extend_v4si_v2di {} const vsc __builtin_altivec_vslv (vsc, vsc); VSLV vslv {} diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6011f5bf76a..17b5cd171b1 100644 ---
Re: [PATCH] [rs6000] Correct match pattern in pr56605.c
Kewen, The case still fails with trunk. FAIL: gcc.target/powerpc/pr56605.c scan-rtl-dump-times combine "\\(compare:CC \\((?:and|zero_extend):(?:[SD]I) \\((?:sub)?reg:[SD]I" 1 === gcc Summary === # of expected passes1 # of unexpected failures1 With the trunk, it should match the pattern. (compare:CC (and:SI (subreg:SI (reg:DI 207) 0) Thanks Gui Haochen 在 2023/3/27 15:41, Kewen.Lin 写道: > Hi Alexandre and Haochen, > > on 2023/3/25 16:42, Alexandre Oliva via Gcc-patches wrote: >> >> Ping https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html >> >> From: Haochen Gui >> >> This patch corrects the match pattern in pr56605.c. The former pattern >> is wrong and test case fails with GCC11. It should match following >> insn on each subtarget after mode promotion is disabled. The patch >> need to be backported to GCC11. > > Comment https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102146#c21 made me > feel that this test issue was just in branches, but this proposed patch > seems to say it still exists on trunk, could you confirm that? > > BR, > Kewen > >> >> //gimple >> _17 = (unsigned int) _20; >> prolog_loop_niters.4_23 = _17 & 3; >> >> //rtl >> (insn 19 18 20 2 (parallel [ >> (set (reg:CC 208) >> (compare:CC (and:SI (subreg:SI (reg:DI 207) 0) >> (const_int 3 [0x3])) >> (const_int 0 [0]))) >> (set (reg:SI 129 [ prolog_loop_niters.5 ]) >> (and:SI (subreg:SI (reg:DI 207) 0) >> (const_int 3 [0x3]))) >> ]) 197 {*andsi3_imm_mask_dot2} >> >> Rebased. Regstrapped on ppc64-linux-gnu. Also tested with >> ppc64-vxworks7r2 (gcc-12), where it's also needed. Ok to install? >> >> >> for gcc/testsuite/ChangeLog >> >> PR target/102146 >> * gcc.target/powerpc/pr56605.c: Correct match pattern in >> combine pass. >> --- >> gcc/testsuite/gcc.target/powerpc/pr56605.c |3 +-- >> 1 file changed, 1 insertion(+), 2 deletions(-) >> >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr56605.c >> b/gcc/testsuite/gcc.target/powerpc/pr56605.c >> index 7695f87db6f66..651a88e3cc7f9 100644 >> --- a/gcc/testsuite/gcc.target/powerpc/pr56605.c >> +++ b/gcc/testsuite/gcc.target/powerpc/pr56605.c >> @@ -11,5 +11,4 @@ void foo (short* __restrict sb, int* __restrict ia) >> ia[i] = (int) sb[i]; >> } >> >> -/* { dg-final { scan-rtl-dump-times {\(compare:CC >> \((?:and|zero_extend):(?:[SD]I) \((?:sub)?reg:[SD]I} 1 "combine" } } */ >> - >> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI >> \(reg:DI} 1 "combine" } } */ >
[PATCH, rs6000] rs6000: correct vector sign extend built-ins on Big Endian [PR108812]
Hi, This patch removes byte reverse operation before vector integer sign extension on Big Endian. These built-ins require to sign extend the rightmost element. So both BE and LE should do the same operation and the byte reversion is no need. This patch fixes it. Now these built-ins have the same behavior on all compilers. The test case is modified also. The patch passed regression test on Power Linux platforms. Thanks Gui Haochen ChangeLog rs6000: correct vector sign extend builtins on Big Endian gcc/ PR target/108812 * config/rs6000/vsx.md (vsignextend_qi_): Remove byte reverse for Big Endian. (vsignextend_hi_): Likewise. (vsignextend_si_v2di): Remove. * config/rs6000/rs6000-builtins.def (__builtin_altivec_vsignextsw2d): Set bif-pattern to vsx_sign_extend_si_v2di. gcc/testsuite/ PR target/108812 * gcc.target/powerpc/p9-sign_extend-runnable.c: Set different expected vectors for Big Endian. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f76f54793d7..059a455b388 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2699,7 +2699,7 @@ VSIGNEXTSH2W vsignextend_hi_v4si {} const vsll __builtin_altivec_vsignextsw2d (vsi); -VSIGNEXTSW2D vsignextend_si_v2di {} +VSIGNEXTSW2D vsx_sign_extend_si_v2di {} const vsc __builtin_altivec_vslv (vsc, vsc); VSLV vslv {} diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 992fbc983be..9e9b33f56ab 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4941,14 +4941,7 @@ (define_expand "vsignextend_qi_" UNSPEC_VSX_SIGN_EXTEND))] "TARGET_P9_VECTOR" { - if (BYTES_BIG_ENDIAN) -{ - rtx tmp = gen_reg_rtx (V16QImode); - emit_insn (gen_altivec_vrevev16qi2(tmp, operands[1])); - emit_insn (gen_vsx_sign_extend_qi_(operands[0], tmp)); -} - else -emit_insn (gen_vsx_sign_extend_qi_(operands[0], operands[1])); + emit_insn (gen_vsx_sign_extend_qi_(operands[0], operands[1])); DONE; }) @@ -4968,14 +4961,7 @@ (define_expand "vsignextend_hi_" UNSPEC_VSX_SIGN_EXTEND))] "TARGET_P9_VECTOR" { - if (BYTES_BIG_ENDIAN) -{ - rtx tmp = gen_reg_rtx (V8HImode); - emit_insn (gen_altivec_vrevev8hi2(tmp, operands[1])); - emit_insn (gen_vsx_sign_extend_hi_(operands[0], tmp)); -} - else - emit_insn (gen_vsx_sign_extend_hi_(operands[0], operands[1])); + emit_insn (gen_vsx_sign_extend_hi_(operands[0], operands[1])); DONE; }) @@ -4987,24 +4973,6 @@ (define_insn "vsx_sign_extend_si_v2di" "vextsw2d %0,%1" [(set_attr "type" "vecexts")]) -(define_expand "vsignextend_si_v2di" - [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") - (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] -UNSPEC_VSX_SIGN_EXTEND))] - "TARGET_P9_VECTOR" -{ - if (BYTES_BIG_ENDIAN) -{ - rtx tmp = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vrevev4si2(tmp, operands[1])); - emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], tmp)); -} - else - emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], operands[1])); - DONE; -}) - ;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on ;; power10. On earlier systems, the machine independent code will generate a ;; shift left to sign extend the 64-bit value to 128-bit. diff --git a/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c index fdcad019b96..03c0f1201e4 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c @@ -34,7 +34,12 @@ int main () /* test sign extend byte to word */ vec_arg_qi = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8, -1, -2, -3, -4, -5, -6, -7, -8}; + +#ifdef __BIG_ENDIAN__ + vec_expected_wi = (vector signed int) {4, 8, -4, -8}; +#else vec_expected_wi = (vector signed int) {1, 5, -1, -5}; +#endif vec_result_wi = vec_signexti (vec_arg_qi); @@ -54,7 +59,12 @@ int main () /* test sign extend byte to double */ vec_arg_qi = (vector signed char){1, 2, 3, 4, 5, 6, 7, 8, -1, -2, -3, -4, -5, -6, -7, -8}; + +#ifdef __BIG_ENDIAN__ + vec_expected_di = (vector signed long long int){8, -8}; +#else vec_expected_di = (vector signed long long int){1, -1}; +#endif vec_result_di = vec_signextll(vec_arg_qi); @@ -72,7 +82,12 @@ int main () /* test sign extend short to word */ vec_arg_hi = (vector signed short int){1, 2, 3, 4, -1, -2, -3, -4}; + +#ifdef __BIG_ENDIAN__ + vec_expected_wi = (vector signed int){2, 4, -2, -4}; +#else vec_expected_wi = (vector signed int){1, 3, -1, -3}; +#endif vec_result_wi = vec_signexti(vec_arg_hi); @@ -90,7 +105,12 @@ int main () /* test sign
Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]
Hi Richard, 在 2023/3/16 15:57, Richard Biener 写道: > I'm not sure if careful constraints massaging like adding magic letters to > alternatives with constants to pessimize them for LRA, making them > more expensive than spilling the constant to a register but avoid > secondary reloads with spilling a register to the stack to make room > for the constant, is possible - but in theory a special constraint modifier > for this purpose could be invented. I have made some tests on constraint modifiers. They all seems not work. By checking the code, I found that the no reloading is always better than reloading in LRA. So there is no way to spill the constant to register in LRA. /* If this alternative can be made to work by reloading, and it needs less reloading than the others checked so far, record it as the chosen goal for reloading. */ if ((best_losers != 0 && losers == 0) || (((best_losers == 0 && losers == 0) || (best_losers != 0 && losers != 0)) && (best_overall > overall || (best_overall == overall ... // set goal_alt Looking forward to your advice. Thanks Gui Haochen
[PATCHv4, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]
Hi, I refined the patch according to reviewer's advice. The main change is to check if buffer_p is set and buffered error exists. Also two regtests are fixed by catching the new error. I sent out the revised one for review due to my limited knowledge on Fortran front end. The patch escalates the failure when Hollerith constant to real conversion fails in native_interpret_expr. It finally reports an "Cannot simplify expression" error in do_simplify method. The patch for pr95450 added a verification for decoding/encoding checking in native_interpret_expr. native_interpret_expr may fail on real type conversion and returns a NULL tree then. But upper layer calls don't handle the failure so that an ICE is reported when the verification fails. IBM long double is an example. It doesn't have a unique memory presentation for some real values. So it may not pass the verification. The new test case shows the problem. errorcount is used to check if an error is already reported or not when getting a bad expr. Buffered errors need to be excluded as they don't increase error count either. The patch passed regression test on Power and x86 linux platforms. Thanks Gui Haochen ChangeLog 2023-03-21 Haochen Gui gcc/ PR target/103628 * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when native_interpret_expr gets a NULL tree. * fortran/arith.cc (gfc_hollerith2real): Return NULL when gfc_interpret_float fails. * fortran/error.cc (gfc_buffered_p): Define. * fortran/gfortran.h (gfc_buffered_p): Declare. * fortran/intrinsic.cc: Add diagnostic.h to include list. (do_simplify): Save errorcount and check it at finish. Report a "Cannot simplify expression" error on a bad result if error count doesn't change and no other errors buffered. gcc/testsuite/ PR target/103628 * gfortran.dg/assumed_size_refs_2.f90: Catch "Cannot simplify expression" error. * gfortran.dg/unpack_field_1.f90: Likewise. * gfortran.dg/pr103628.f90: New. Co-Authored-By: Tobias Burnus patch.diff diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index c0d12cfad9d..d3d38c7eb6a 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind) result = gfc_get_constant_expr (BT_REAL, kind, >where); hollerith2representation (result, src); - gfc_interpret_float (kind, (unsigned char *) result->representation.string, - result->representation.length, result->value.real); - - return result; + if (gfc_interpret_float (kind, + (unsigned char *) result->representation.string, + result->representation.length, result->value.real)) +return result; + else +return NULL; } /* Convert character to real. The constant will be padded or truncated. */ diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc index 214fb78ba7b..872d42e731e 100644 --- a/gcc/fortran/error.cc +++ b/gcc/fortran/error.cc @@ -49,6 +49,13 @@ static gfc_error_buffer error_buffer; static output_buffer *pp_error_buffer, *pp_warning_buffer; static int warningcount_buffered, werrorcount_buffered; +/* Return buffered_p. */ +bool +gfc_buffered_p (void) +{ + return buffered_p; +} + /* Return true if there output_buffer is empty. */ static bool diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 219ef8c7612..edfe11796a6 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3328,6 +3328,7 @@ void gfc_internal_error (const char *, ...) ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC void gfc_clear_error (void); bool gfc_error_check (void); bool gfc_error_flag_test (void); +bool gfc_buffered_p (void); notification gfc_notification_std (int); bool gfc_notify_std (int, const char *, ...) ATTRIBUTE_GCC_GFC(2,3); diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc index e89131f5a71..2572b7a3448 100644 --- a/gcc/fortran/intrinsic.cc +++ b/gcc/fortran/intrinsic.cc @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see #include "options.h" #include "gfortran.h" #include "intrinsic.h" +#include "diagnostic.h" /* For errorcount. */ /* Namespace to hold the resolved symbols for intrinsic subroutines. */ static gfc_namespace *gfc_intrinsic_namespace; @@ -4620,6 +4621,7 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e) { gfc_expr *result, *a1, *a2, *a3, *a4, *a5, *a6; gfc_actual_arglist *arg; + int old_errorcount = errorcount; /* Max and min require special handling due to the variable number of args. */ @@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e) finish: if (result == _bad_expr) -return false; +{ + if (errorcount == old_errorcount + && (!gfc_buffered_p () && !gfc_error_flag_test ())) + gfc_error ("Cannot simplify expression at %L", >where); +
Ping [PATCHv3, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]
Hi, Gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613497.html Thanks Gui Haochen 在 2023/3/7 16:55, HAO CHEN GUI 写道: > Hi, > The patch escalates the failure when Hollerith constant to real conversion > fails in native_interpret_expr. It finally reports an "Cannot simplify > expression" error in do_simplify method. > > The patch of pr95450 added a verification for decoding/encoding checking > in native_interpret_expr. native_interpret_expr may fail on real type > conversion and returns a NULL tree then. But upper layer calls don't handle > the failure so that an ICE is reported when the verification fails. > > IBM long double is an example. It doesn't have a unique memory presentation > for some real values. So it may not pass the verification. The new test > case shows the problem. > > errorcount is used to check if an error is already reported or not when > getting a bad expr. Buffered errors need to be excluded as they don't > increase error count either. > > The patch passed regression test on Power and x86 linux platforms. > > Gui Haochen > Thanks > > ChangeLog > 2023-03-07 Haochen Gui > > gcc/ > PR target/103628 > * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when > native_interpret_expr gets a NULL tree. > * fortran/arith.cc (gfc_hollerith2real): Return NULL when > gfc_interpret_float fails. > * fortran/error.cc (gfc_buffered_p): Define. > * fortran/gfortran.h (gfc_buffered_p): Declare. > * fortran/intrinsic.cc: Add diagnostic.h to include list. > (do_simplify): Save errorcount and check it at finish. Report a > "Cannot simplify expression" error on a bad result if error count > doesn't change and no other errors buffered. > > gcc/testsuite/ > PR target/103628 > * gfortran.dg/pr103628.f90: New. > > Co-Authored-By: Tobias Burnus > > patch.diff > diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc > index c0d12cfad9d..d3d38c7eb6a 100644 > --- a/gcc/fortran/arith.cc > +++ b/gcc/fortran/arith.cc > @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind) >result = gfc_get_constant_expr (BT_REAL, kind, >where); > >hollerith2representation (result, src); > - gfc_interpret_float (kind, (unsigned char *) result->representation.string, > -result->representation.length, result->value.real); > - > - return result; > + if (gfc_interpret_float (kind, > +(unsigned char *) result->representation.string, > +result->representation.length, result->value.real)) > +return result; > + else > +return NULL; > } > > /* Convert character to real. The constant will be padded or truncated. */ > diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc > index 214fb78ba7b..872d42e731e 100644 > --- a/gcc/fortran/error.cc > +++ b/gcc/fortran/error.cc > @@ -49,6 +49,13 @@ static gfc_error_buffer error_buffer; > static output_buffer *pp_error_buffer, *pp_warning_buffer; > static int warningcount_buffered, werrorcount_buffered; > > +/* Return buffered_p. */ > +bool > +gfc_buffered_p (void) > +{ > + return buffered_p; > +} > + > /* Return true if there output_buffer is empty. */ > > static bool > diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h > index 219ef8c7612..edfe11796a6 100644 > --- a/gcc/fortran/gfortran.h > +++ b/gcc/fortran/gfortran.h > @@ -3328,6 +3328,7 @@ void gfc_internal_error (const char *, ...) > ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC > void gfc_clear_error (void); > bool gfc_error_check (void); > bool gfc_error_flag_test (void); > +bool gfc_buffered_p (void); > > notification gfc_notification_std (int); > bool gfc_notify_std (int, const char *, ...) ATTRIBUTE_GCC_GFC(2,3); > diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc > index e89131f5a71..9d049001a51 100644 > --- a/gcc/fortran/intrinsic.cc > +++ b/gcc/fortran/intrinsic.cc > @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see > #include "options.h" > #include "gfortran.h" > #include "intrinsic.h" > +#include "diagnostic.h" /* For errorcount. */ > > /* Namespace to hold the resolved symbols for intrinsic subroutines. */ > static gfc_namespace *gfc_intrinsic_namespace; > @@ -4620,6 +4621,7 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e) > { >gfc_expr *result, *a1, *a2, *a3, *a4, *a5, *a6; >gfc_actual_arglist *arg; > + int old_errorcount = errorcount; > >/* Max and min require special handling due to the variable number > of args. */ > @@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e) > > finish: >if (result == _bad_expr) > -return false; > +{ > + if (errorcount == old_errorcount > + && (gfc_buffered_p () && !gfc_error_flag_test ())) > + gfc_error ("Cannot simplify expression at %L", >where); > + return false; > +} > >if (result == NULL) >
Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]
Hi Richard, 在 2023/3/16 18:36, Richard Biener 写道: > On Thu, Mar 16, 2023 at 10:04 AM HAO CHEN GUI wrote: >> >> Hi Richard, >> >> 在 2023/3/16 15:57, Richard Biener 写道: >>> So this is one way around the lack of CSE/PRE of constant operands. I'd >>> argue that a better spot for this _might_ be LRA (split the constant out if >>> there's a free register available), postreload-[g]cse (CSE the constants) >>> and >>> then maybe cprop_hardreg to combine back single-use constants? >>> >>> I'm not sure if careful constraints massaging like adding magic letters to >>> alternatives with constants to pessimize them for LRA, making them >>> more expensive than spilling the constant to a register but avoid >>> secondary reloads with spilling a register to the stack to make room >>> for the constant, is possible - but in theory a special constraint modifier >>> for this purpose could be invented. >> >> Thanks so much for your advice. >> >> cse/gcse doesn't take cost of constant set (the def insn of the constant) >> into >> consideration. So it won't replace the register with a constant as it costs 1 >> insn with the register and costs 2 insn with the constant. > > I think it does (and should) cost the constant set (IIRC we had some > improvements > there, or at least proposed, during this stage1). But sure - this is why your > "trick" works. > It's doable if post-reload gsc costs the constant set. I will draft a patch to test it. >> Finally, the single- >> use constants can't be back to 2 insn. > > And that's because of the issue you point out above? No. my original concern is the constant can't be back. If post-reload gsc doen't cost the constant set, the insn with a register always cost less than two insns with immediates. Commonly the constant set itself costs 2 insn also. > >> Not sure if I understand it correctly. >> Looking forward to your advice. > > My main point is that CSEing constants has impacts on register pressure > and thus should probably be done after or within register allocation. RTL > expansion itself is probably a bad time to pro-actively split out constants > even more if, as you say, nothing puts them back. > I agree. Thank a lot. > Richard. > >> Thanks >> Gui Haochen Gui Haochen
Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]
Hi Richard, 在 2023/3/16 15:57, Richard Biener 写道: > So this is one way around the lack of CSE/PRE of constant operands. I'd > argue that a better spot for this _might_ be LRA (split the constant out if > there's a free register available), postreload-[g]cse (CSE the constants) and > then maybe cprop_hardreg to combine back single-use constants? > > I'm not sure if careful constraints massaging like adding magic letters to > alternatives with constants to pessimize them for LRA, making them > more expensive than spilling the constant to a register but avoid > secondary reloads with spilling a register to the stack to make room > for the constant, is possible - but in theory a special constraint modifier > for this purpose could be invented. Thanks so much for your advice. cse/gcse doesn't take cost of constant set (the def insn of the constant) into consideration. So it won't replace the register with a constant as it costs 1 insn with the register and costs 2 insn with the constant. Finally, the single- use constants can't be back to 2 insn. Not sure if I understand it correctly. Looking forward to your advice. Thanks Gui Haochen
[PATCH-2, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]
Hi, The background and motivation of the patch are listed in the note of PATCH-1. This patch changes the expander of ior/xor and force constant to a pseudo when it needs 2 insn. Also a combine and split pattern for ior/xor is defined. rtx_cost of ior insn is adjusted as now it may have 2 insns for certain constants. We need to check the cost of each operand. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-03-14 Haochen Gui gcc/ * gcc/config/rs6000/rs6000.cc (rs6000_rtx_costs): Check the cost of each operand for IOR as it may have 2 insn for certain constants. * config/rs6000/rs6000.md (3): Put the second operand into register when it's a constant and need 2 ior/xor insns. (split for ior/xor): Remove. (*_2insn): New insn_and split pattern for 2-insn ior/xor. gcc/testsuite/ * gcc.target/powerpc/pr86106.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index b3a609f3aa3..f53daff547f 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -22081,10 +22081,6 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, return false; case IOR: - /* FIXME */ - *total = COSTS_N_INSNS (1); - return true; - case CLZ: case XOR: case ZERO_EXTRACT: diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index dba41e3df90..0541f48c42a 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -3892,7 +3892,8 @@ (define_expand "3" DONE; } - if (non_logical_cint_operand (operands[2], mode)) + if (non_logical_cint_operand (operands[2], mode) + && !can_create_pseudo_p ()) { rtx tmp = ((!can_create_pseudo_p () || rtx_equal_p (operands[0], operands[1])) @@ -3907,15 +3908,17 @@ (define_expand "3" DONE; } - if (!reg_or_logical_cint_operand (operands[2], mode)) + if (!logical_operand (operands[2], mode)) operands[2] = force_reg (mode, operands[2]); }) -(define_split - [(set (match_operand:GPR 0 "gpc_reg_operand") - (iorxor:GPR (match_operand:GPR 1 "gpc_reg_operand") - (match_operand:GPR 2 "non_logical_cint_operand")))] +(define_insn_and_split "*_2insn" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (iorxor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "non_logical_cint_operand" "n")))] "" + "#" + "&& (!reload_completed || rtx_equal_p (operands[0], operands[1]))" [(set (match_dup 3) (iorxor:GPR (match_dup 1) (match_dup 4))) @@ -3933,7 +3936,8 @@ (define_split operands[4] = GEN_INT (hi); operands[5] = GEN_INT (lo); -}) +} + [(set_attr "length" "8")]) (define_insn "*bool3_imm" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") diff --git a/gcc/testsuite/gcc.target/powerpc/pr86106.c b/gcc/testsuite/gcc.target/powerpc/pr86106.c new file mode 100644 index 000..71501476800 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr86106.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-prefixed" } */ + +unsigned int +foo (unsigned int val) +{ + unsigned int mask = 0x7f7f7f7f; + + return ~(((val & mask) + mask) | val | mask); +} + +/* { dg-final { scan-assembler-not {\maddis\M} } } */ +/* { dg-final { scan-assembler-not {\maddi\M} } } */ +/* { dg-final { scan-assembler-not {\moris\M} } } */
[PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]
Hi, Currently, rs6000 directly expands to 2 insns if an integer constant is the second operand and it needs two insns. For example, addi/addis and ori/oris. It may not benefit when the constant is used for more than 2 times in an extended basic block, just like the case in PR shows. One possible solution is to force the constant in pseudo at expand and let propagation pass and combine pass decide if the pseudo should be replaced with the constant or not by comparing the rtx/insn cost. It generates a constant move if the constant is forced to a pseudo. There is one constant move if it's used only once. The combine pass can combine the constant move and add/ior/xor insn and eliminate the move as the insn cost reduces. There are multiple moves if the constant is used for several times. In an extended basic block, these constant moves are merged to one by propagation pass. The combine pass can't replace the pseudo with the constant as it is no cost saving. In an extreme case, the constant is used twice in an extended basic block. The cost(latency) is unchanged between putting constant in pseudo and generating 2 insns. The dependence of instructions reduces but one more register is used. In other case, it should be always optimal to put constant in a pseudo. This patch changes the expander of integer add and force constant to a pseudo when it needs 2 insn. Also a combine and split pattern is defined. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-03-14 Haochen Gui gcc/ * config/rs6000/predicates.md (add_2insn_cint_operand): New predicate which returns true when op is a 32-bit but not a 16-bit signed integer constant. * config/rs6000/rs6000.md (add3): Put the second operand into register when it's a constant and need 2 add insns. (*add_2insn): New insn_and_split for 2-insn add. patch.diff diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index a1764018545..09e59a48cd3 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -282,6 +282,13 @@ (define_predicate "s32bit_cint_operand" (and (match_code "const_int") (match_test "(0x8000 + UINTVAL (op)) >> 32 == 0"))) +;; Return 1 if op is a 32-bit but not 16-bit constant signed integer +(define_predicate "add_2insn_cint_operand" + (and (match_code "const_int") + (and (match_operand 0 "s32bit_cint_operand") + (and (not (match_operand 0 "short_cint_operand")) +(not (match_operand 0 "upper16_cint_operand")) + ;; Return 1 if op is a constant 32-bit unsigned (define_predicate "c32bit_cint_operand" (and (match_code "const_int") diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6011f5bf76a..dba41e3df90 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1796,12 +1796,44 @@ (define_expand "add3" /* The ordering here is important for the prolog expander. When space is allocated from the stack, adding 'low' first may produce a temporary deallocation (which would be bad). */ - emit_insn (gen_add3 (tmp, operands[1], GEN_INT (rest))); - emit_insn (gen_add3 (operands[0], tmp, GEN_INT (low))); - DONE; + if (!can_create_pseudo_p ()) + { + emit_insn (gen_add3 (tmp, operands[1], GEN_INT (rest))); + emit_insn (gen_add3 (operands[0], tmp, GEN_INT (low))); + DONE; + } + + operands[2] = force_reg (mode, operands[2]); } }) +/* The ordering here is important for the prolog expander. + When space is allocated from the stack, adding 'low' first may + produce a temporary deallocation (which would be bad). */ + +(define_insn_and_split "*add_2insn" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=b") + (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%b") + (match_operand:GPR 2 "add_2insn_cint_operand" "n")))] + "!TARGET_PREFIXED" + "#" + "&& 1" + [(set (match_dup 0) + (plus:GPR (match_dup 1) + (match_dup 3))) + (set (match_dup 0) + (plus:GPR (match_dup 0) + (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = sext_hwi (val, 16); + HOST_WIDE_INT rest = trunc_int_for_mode (val - low, mode); + + operands[3] = GEN_INT (rest); + operands[4] = GEN_INT (low); +} + [(set_attr "length" "8")]) + (define_insn "*add3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r") (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b,b")
Re: [PATCH] testsuite, rs6000: Adjust ppc-fortran.exp to support dg-{warning,error}
Hi Kewen, I tested it with my fortran test case. It works. Thanks a lot. Gui Haochen 在 2023/3/6 17:27, Kewen.Lin 写道: > Hi, > > According to Haochen's finding in [1], currently ppc-fortran.exp > doesn't support Fortran specific warning or error messages well. > By looking into it, it's due to that gfortran uses some different > warning/error prefixes as follows: > > set gcc_warning_prefix "\[Ww\]arning:" > set gcc_error_prefix "(Fatal )?\[Ee\]rror:" > > comparing to: > > set gcc_warning_prefix "warning:" > set gcc_error_prefix "(fatal )?error:" > > So this is to override these two prefixes and make it support > dg-{warning,error} checks. > > Tested on powerpc64-linux-gnu P7/P8/P9 and > powerpc64le-linux-gnu P9/P10. > > [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613302.html > > BR, > Kewen > - > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/ppc-fortran/ppc-fortran.exp: Override > gcc_{warning,error}_prefix with Fortran specific one used in > gfortran_init. > --- > gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp > b/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp > index a556d7b48a3..f7e99ac8487 100644 > --- a/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp > +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp > @@ -58,6 +58,11 @@ proc dg-compile-aux-modules { args } { > } > } > > +# Override gcc_{warning,error}_prefix with Fortran specific prefixes used > +# in gfortran_init to support dg-{warning,error} checks. > +set gcc_warning_prefix "\[Ww\]arning:" > +set gcc_error_prefix "(Fatal )?\[Ee\]rror:" > + > # Main loop. > gfortran-dg-runtest [lsort \ > [glob -nocomplain $srcdir/$subdir/*.\[fF\]{,90,95,03,08} ] ] "" > $DEFAULT_FFLAGS > -- > 2.39.1
[PATCHv3, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]
Hi, The patch escalates the failure when Hollerith constant to real conversion fails in native_interpret_expr. It finally reports an "Cannot simplify expression" error in do_simplify method. The patch of pr95450 added a verification for decoding/encoding checking in native_interpret_expr. native_interpret_expr may fail on real type conversion and returns a NULL tree then. But upper layer calls don't handle the failure so that an ICE is reported when the verification fails. IBM long double is an example. It doesn't have a unique memory presentation for some real values. So it may not pass the verification. The new test case shows the problem. errorcount is used to check if an error is already reported or not when getting a bad expr. Buffered errors need to be excluded as they don't increase error count either. The patch passed regression test on Power and x86 linux platforms. Gui Haochen Thanks ChangeLog 2023-03-07 Haochen Gui gcc/ PR target/103628 * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when native_interpret_expr gets a NULL tree. * fortran/arith.cc (gfc_hollerith2real): Return NULL when gfc_interpret_float fails. * fortran/error.cc (gfc_buffered_p): Define. * fortran/gfortran.h (gfc_buffered_p): Declare. * fortran/intrinsic.cc: Add diagnostic.h to include list. (do_simplify): Save errorcount and check it at finish. Report a "Cannot simplify expression" error on a bad result if error count doesn't change and no other errors buffered. gcc/testsuite/ PR target/103628 * gfortran.dg/pr103628.f90: New. Co-Authored-By: Tobias Burnus patch.diff diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index c0d12cfad9d..d3d38c7eb6a 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind) result = gfc_get_constant_expr (BT_REAL, kind, >where); hollerith2representation (result, src); - gfc_interpret_float (kind, (unsigned char *) result->representation.string, - result->representation.length, result->value.real); - - return result; + if (gfc_interpret_float (kind, + (unsigned char *) result->representation.string, + result->representation.length, result->value.real)) +return result; + else +return NULL; } /* Convert character to real. The constant will be padded or truncated. */ diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc index 214fb78ba7b..872d42e731e 100644 --- a/gcc/fortran/error.cc +++ b/gcc/fortran/error.cc @@ -49,6 +49,13 @@ static gfc_error_buffer error_buffer; static output_buffer *pp_error_buffer, *pp_warning_buffer; static int warningcount_buffered, werrorcount_buffered; +/* Return buffered_p. */ +bool +gfc_buffered_p (void) +{ + return buffered_p; +} + /* Return true if there output_buffer is empty. */ static bool diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 219ef8c7612..edfe11796a6 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3328,6 +3328,7 @@ void gfc_internal_error (const char *, ...) ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC void gfc_clear_error (void); bool gfc_error_check (void); bool gfc_error_flag_test (void); +bool gfc_buffered_p (void); notification gfc_notification_std (int); bool gfc_notify_std (int, const char *, ...) ATTRIBUTE_GCC_GFC(2,3); diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc index e89131f5a71..9d049001a51 100644 --- a/gcc/fortran/intrinsic.cc +++ b/gcc/fortran/intrinsic.cc @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see #include "options.h" #include "gfortran.h" #include "intrinsic.h" +#include "diagnostic.h" /* For errorcount. */ /* Namespace to hold the resolved symbols for intrinsic subroutines. */ static gfc_namespace *gfc_intrinsic_namespace; @@ -4620,6 +4621,7 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e) { gfc_expr *result, *a1, *a2, *a3, *a4, *a5, *a6; gfc_actual_arglist *arg; + int old_errorcount = errorcount; /* Max and min require special handling due to the variable number of args. */ @@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e) finish: if (result == _bad_expr) -return false; +{ + if (errorcount == old_errorcount + && (gfc_buffered_p () && !gfc_error_flag_test ())) + gfc_error ("Cannot simplify expression at %L", >where); + return false; +} if (result == NULL) resolve_intrinsic (specific, e); /* Must call at run-time */ diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc index 7ce7d736629..0c47aa6b842 100644 --- a/gcc/fortran/target-memory.cc +++ b/gcc/fortran/target-memory.cc @@ -416,11 +416,14 @@ gfc_interpret_float (int kind, unsigned char *buffer, size_t buffer_size, mpfr_t real)
Re: [PATCHv2, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]
Hi Tobias, 在 2023/3/3 17:29, Tobias Burnus 写道: > But could you also include the 'gcc/fortran/intrinsic.cc' change > proposed in > https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613030.html (and > acknowledged by Steve)? Sure, I will merge it into the patch and do the regression test. Additionally, Kewen suggested: >> Since this test case is powerpc only, I think it can be moved to >> gcc/testsuite/gcc.target/powerpc/ppc-fortran. > > Which sounds reasonable. Test cases under gcc.target are tested by check-gcc-c. It greps "warning" and "error" (C style, lower case) from the output while check-gcc-fortran greps "Warning" and "Error" (upper case). As the test case needs to check the "Warning" and "Error" messages. I have to put it in gfortran.dg directory. What's your opinion? Gui Haochen Thanks
Re: [PATCHv2, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]
Hi, The patch passed regression test on Power linux platforms. Sorry for missing the information. Gui Haochen 在 2023/3/3 17:12, HAO CHEN GUI via Gcc-patches 写道: > Hi, > The patch escalates the failure when Hollerith constant to real conversion > fails in native_interpret_expr. It finally reports an "Unclassifiable > statement" error. > > The patch of pr95450 added a verification for decoding/encoding checking > in native_interpret_expr. native_interpret_expr may fail on real type > conversion and returns a NULL tree then. But upper layer calls don't handle > the failure so that an ICE is reported when the verification fails. > > IBM long double is an example. It doesn't have a unique memory presentation > for some real values. So it may not pass the verification. The new test > case shows the problem. > > Compared to last version, this version moves the mpfr_init after NULL tree > test and fixes the format problem according to Tobias's advice. Thanks a lot. > > Gui Haochen > Thanks > > ChangeLog > 2023-03-01 Haochen Gui > > gcc/ > PR target/103628 > * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when > native_interpret_expr gets a NULL tree. > * fortran/arith.cc (gfc_hollerith2real): Return NULL when > gfc_interpret_float fails. > > gcc/testsuite/ > PR target/103628 > * gfortran.dg/pr103628.f90: New. > > patch.diff > diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc > index c0d12cfad9d..d3d38c7eb6a 100644 > --- a/gcc/fortran/arith.cc > +++ b/gcc/fortran/arith.cc > @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind) >result = gfc_get_constant_expr (BT_REAL, kind, >where); > >hollerith2representation (result, src); > - gfc_interpret_float (kind, (unsigned char *) result->representation.string, > -result->representation.length, result->value.real); > - > - return result; > + if (gfc_interpret_float (kind, > +(unsigned char *) result->representation.string, > +result->representation.length, result->value.real)) > +return result; > + else > +return NULL; > } > > /* Convert character to real. The constant will be padded or truncated. */ > diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc > index 7ce7d736629..0c47aa6b842 100644 > --- a/gcc/fortran/target-memory.cc > +++ b/gcc/fortran/target-memory.cc > @@ -416,11 +416,14 @@ gfc_interpret_float (int kind, unsigned char *buffer, > size_t buffer_size, >mpfr_t real) > { >gfc_set_model_kind (kind); > - mpfr_init (real); > - gfc_conv_tree_to_mpfr (real, > - native_interpret_expr (gfc_get_real_type (kind), > - buffer, buffer_size)); > > + tree source = native_interpret_expr (gfc_get_real_type (kind), buffer, > +buffer_size); > + if (!source) > +return 0; > + > + mpfr_init (real); > + gfc_conv_tree_to_mpfr (real, source); >return size_float (kind); > } > > diff --git a/gcc/testsuite/gfortran.dg/pr103628.f90 > b/gcc/testsuite/gfortran.dg/pr103628.f90 > new file mode 100644 > index 000..e49aefc18fd > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/pr103628.f90 > @@ -0,0 +1,14 @@ > +! { dg-do compile { target powerpc*-*-* } } > +! { dg-options "-O2 -mabi=ibmlongdouble" } > + > +! Test to ensure that it reports an "Unclassifiable statement" error > +! instead of throwing an ICE when the memory represent of the HOLLERITH > +! string is not unique with ibm long double encoding. > + > +program main > + integer, parameter :: k = 16 > + real(kind = k):: b = 4h1234 > +end program main > + > +! { dg-warning "Conversion from HOLLERITH" "warning" { target powerpc*-*-* } > 10 } > +! { dg-error "Unclassifiable statement" "error" { target powerpc*-*-* } 10 }
[PATCHv2, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]
Hi, The patch escalates the failure when Hollerith constant to real conversion fails in native_interpret_expr. It finally reports an "Unclassifiable statement" error. The patch of pr95450 added a verification for decoding/encoding checking in native_interpret_expr. native_interpret_expr may fail on real type conversion and returns a NULL tree then. But upper layer calls don't handle the failure so that an ICE is reported when the verification fails. IBM long double is an example. It doesn't have a unique memory presentation for some real values. So it may not pass the verification. The new test case shows the problem. Compared to last version, this version moves the mpfr_init after NULL tree test and fixes the format problem according to Tobias's advice. Thanks a lot. Gui Haochen Thanks ChangeLog 2023-03-01 Haochen Gui gcc/ PR target/103628 * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when native_interpret_expr gets a NULL tree. * fortran/arith.cc (gfc_hollerith2real): Return NULL when gfc_interpret_float fails. gcc/testsuite/ PR target/103628 * gfortran.dg/pr103628.f90: New. patch.diff diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index c0d12cfad9d..d3d38c7eb6a 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind) result = gfc_get_constant_expr (BT_REAL, kind, >where); hollerith2representation (result, src); - gfc_interpret_float (kind, (unsigned char *) result->representation.string, - result->representation.length, result->value.real); - - return result; + if (gfc_interpret_float (kind, + (unsigned char *) result->representation.string, + result->representation.length, result->value.real)) +return result; + else +return NULL; } /* Convert character to real. The constant will be padded or truncated. */ diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc index 7ce7d736629..0c47aa6b842 100644 --- a/gcc/fortran/target-memory.cc +++ b/gcc/fortran/target-memory.cc @@ -416,11 +416,14 @@ gfc_interpret_float (int kind, unsigned char *buffer, size_t buffer_size, mpfr_t real) { gfc_set_model_kind (kind); - mpfr_init (real); - gfc_conv_tree_to_mpfr (real, -native_interpret_expr (gfc_get_real_type (kind), - buffer, buffer_size)); + tree source = native_interpret_expr (gfc_get_real_type (kind), buffer, + buffer_size); + if (!source) +return 0; + + mpfr_init (real); + gfc_conv_tree_to_mpfr (real, source); return size_float (kind); } diff --git a/gcc/testsuite/gfortran.dg/pr103628.f90 b/gcc/testsuite/gfortran.dg/pr103628.f90 new file mode 100644 index 000..e49aefc18fd --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr103628.f90 @@ -0,0 +1,14 @@ +! { dg-do compile { target powerpc*-*-* } } +! { dg-options "-O2 -mabi=ibmlongdouble" } + +! Test to ensure that it reports an "Unclassifiable statement" error +! instead of throwing an ICE when the memory represent of the HOLLERITH +! string is not unique with ibm long double encoding. + +program main + integer, parameter :: k = 16 + real(kind = k):: b = 4h1234 +end program main + +! { dg-warning "Conversion from HOLLERITH" "warning" { target powerpc*-*-* } 10 } +! { dg-error "Unclassifiable statement" "error" { target powerpc*-*-* } 10 }
[PATCH, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]
Hi, The patch escalates the failure when Hollerith constant to real conversion fails in native_interpret_expr. It finally reports an "Unclassifiable statement" error. The patch of pr95450 added a verification for decoding/encoding checking in native_interpret_expr. native_interpret_expr may fail on real type conversion and returns a NULL tree then. But upper layer calls don't handle the failure so that an ICE is reported when the verification fails. IBM long double is an example. It doesn't have a unique memory presentation for some real values. So it may not pass the verification. The new test case shows the problem. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-03-01 Haochen Gui gcc/ PR target/103628 * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when native_interpret_expr gets a NULL tree. * fortran/arith.cc (gfc_hollerith2real): Return NULL when gfc_interpret_float fails. gcc/testsuite/ PR target/103628 * gfortran.dg/pr103628.f90: New. patch.diff diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index c0d12cfad9d..d3d38c7eb6a 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind) result = gfc_get_constant_expr (BT_REAL, kind, >where); hollerith2representation (result, src); - gfc_interpret_float (kind, (unsigned char *) result->representation.string, - result->representation.length, result->value.real); - - return result; + if (gfc_interpret_float (kind, + (unsigned char *) result->representation.string, + result->representation.length, result->value.real)) +return result; + else +return NULL; } /* Convert character to real. The constant will be padded or truncated. */ diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc index 7ce7d736629..04afc357e3c 100644 --- a/gcc/fortran/target-memory.cc +++ b/gcc/fortran/target-memory.cc @@ -417,10 +417,13 @@ gfc_interpret_float (int kind, unsigned char *buffer, size_t buffer_size, { gfc_set_model_kind (kind); mpfr_init (real); - gfc_conv_tree_to_mpfr (real, -native_interpret_expr (gfc_get_real_type (kind), - buffer, buffer_size)); + tree source = native_interpret_expr (gfc_get_real_type (kind), buffer, + buffer_size); + if (!source) +return 0; + + gfc_conv_tree_to_mpfr (real, source); return size_float (kind); } diff --git a/gcc/testsuite/gfortran.dg/pr103628.f90 b/gcc/testsuite/gfortran.dg/pr103628.f90 new file mode 100644 index 000..e49aefc18fd --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr103628.f90 @@ -0,0 +1,14 @@ +! { dg-do compile { target powerpc*-*-* } } +! { dg-options "-O2 -mabi=ibmlongdouble" } + +! Test to ensure that it reports an "Unclassifiable statement" error +! instead of throwing an ICE when the memory represent of the HOLLERITH +! string is not unique with ibm long double encoding. + +program main + integer, parameter :: k = 16 + real(kind = k):: b = 4h1234 +end program main + +! { dg-warning "Conversion from HOLLERITH" "warning" { target powerpc*-*-* } 10 } +! { dg-error "Unclassifiable statement" "error" { target powerpc*-*-* } 10 }
[PATCHv2, rs6000] Merge two vector shift when their sources are the same
Hi, This patch merges two "vsldoi" insns when their sources are the same. Particularly, it is simplified to be one move if the total shift is multiples of 16 bytes. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-02-28 Haochen Gui gcc/ * config/rs6000/altivec.md (*altivec_vsldoi_dup_): New insn_and_split to merge two vsldoi when the sources are the same. gcc/testsuite/ * gcc.target/powerpc/vsldoi_merge.c: New. patch.diff diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 84660073f32..fae8ec2b2e8 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2529,6 +2529,35 @@ (define_insn "altivec_vsldoi_" "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) +(define_insn_and_split "*altivec_vsldoi_dup_" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_dup 1) + (match_operand:QI 2 "immediate_operand" "i")] + UNSPEC_VSLDOI) + (unspec:VM [(match_dup 1) + (match_dup 1) + (match_dup 2)] + UNSPEC_VSLDOI) + (match_operand:QI 3 "immediate_operand" "i")] + UNSPEC_VSLDOI))] + "TARGET_ALTIVEC" + "#" + "&& 1" + [(const_int 0)] +{ + unsigned int shift1 = UINTVAL (operands[2]); + unsigned int shift2 = UINTVAL (operands[3]); + + unsigned int shift = (shift1 + shift2) % 16; + if (shift) +emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1], + operands[1], GEN_INT (shift))); + else +emit_move_insn (operands[0], operands[1]); + DONE; +}) + (define_insn "altivec_vupkhs" [(set (match_operand:VP 0 "register_operand" "=v") (unspec:VP [(match_operand: 1 "register_operand" "v")] diff --git a/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c new file mode 100644 index 000..eebd7b4d382 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c @@ -0,0 +1,59 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -save-temps" } */ + +#include "altivec.h" + +#ifdef DEBUG +#include +#endif + +void abort (void); + +__attribute__ ((noipa)) vector signed int +test1 (vector signed int a) +{ + a = vec_sld (a, a, 2); + a = vec_sld (a, a, 6); + return a; +} + +__attribute__ ((noipa)) vector signed int +test2 (vector signed int a) +{ + a = vec_sld (a, a, 14); + a = vec_sld (a, a, 2); + return a; +} + +int main (void) +{ + vector signed int a = {1,2,3,4}; + vector signed int result_a; + int i; + + result_a = test1 (a); + vector signed int expect_a = {3,4,1,2}; + + for (i = 0; i< 4; i++) +if (result_a[i] != expect_a[i]) +#ifdef DEBUG + printf("ERROR: test1 result[%d] = %d, not expected[%d] = %d\n", + i, result_a[i], i, expect_a[i]); +#else + abort (); +#endif + + result_a = test2 (a); + + for (i = 0; i< 4; i++) +if (result_a[i] != a[i]) +#ifdef DEBUG + printf("ERROR: test2 result[%d] = %d, not expected[%d] = %d\n", + i, result_a[i], i, a[i]); +#else + abort (); +#endif +} + +/* { dg-final { scan-assembler-times {\mvsldoi\M} 1 } } */
Ping [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694]
Hi, Gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html Gui Haochen Thanks 在 2023/2/8 13:08, HAO CHEN GUI 写道: > Hi, > The logical operations for TImode is split after reload pass right now. Some > potential optimizations miss as the split is too late. This patch removes > TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical > operations can be split at expand pass. The new test case illustrates the > optimization. > > Two test cases of pr92398 are merged into one as all sub-targets generates > the same sequence of instructions with the patch. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > > Thanks > Gui Haochen > > > ChangeLog > 2023-02-08 Haochen Gui > > gcc/ > PR target/100694 > * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit > vector types. > (and3): Replace BOOL_128 with BOOL_128_V. > (ior3): Likewise. > (xor3): Likewise. > (one_cmpl2 expander): New expander with BOOL_128_V. > (one_cmpl2 insn_and_split): Rename to ... > (*one_cmpl2): ... this. > > gcc/testsuite/ > PR target/100694 > * gcc.target/powerpc/pr100694.c: New. > * gcc.target/powerpc/pr92398.c: New. > * gcc.target/powerpc/pr92398.h: Remove. > * gcc.target/powerpc/pr92398.p9-.c: Remove. > * gcc.target/powerpc/pr92398.p9+.c: Remove. > > > patch.diff > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 4bd1dfd3da9..455b7329643 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI >(V2DF "TARGET_ALTIVEC") >(V1TI "TARGET_ALTIVEC")]) > > +;; Mode iterator for logical operations on 128-bit vector types > +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") > + (V8HI "TARGET_ALTIVEC") > + (V4SI "TARGET_ALTIVEC") > + (V4SF "TARGET_ALTIVEC") > + (V2DI "TARGET_ALTIVEC") > + (V2DF "TARGET_ALTIVEC") > + (V1TI "TARGET_ALTIVEC")]) > + > ;; For the GPRs we use 3 constraints for register outputs, two that are the > ;; same as the output register, and a third where the output register is an > ;; early clobber, so we don't have to deal with register overlaps. For the > @@ -7135,23 +7144,23 @@ (define_expand "subti3" > ;; 128-bit logical operations expanders > > (define_expand "and3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >"" >"") > > (define_expand "ior3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > -(ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >"" >"") > > (define_expand "xor3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > -(xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >"" >"") > > @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv3_internal2" >(const_string "16")))]) > > ;; 128-bit one's complement > -(define_insn_and_split "one_cmpl2" > +(define_expand "one_cmpl2" > +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=") > + (not:BOOL_128_V > + (match_operand:BOOL_128_V 1 "vlogical_operand" "")))] > + "" > + "") > + > +(define_insn_and_split "*one_cmpl2" >[(set (match_operand:BOOL_128 0 "vlogical_operand" "=") > (not:BOOL_128 > (match_operand:BOOL_128 1 "vlogical_operand" "")))] > diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c > b/gcc/testsuite/gcc.target/powerpc/pr100694.c > new file mode 100644 > index 000..96a895d6c44 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O2" } */ > +/* { dg-final {
[PATCH, rs6000] Merge two vector shift when their sources are the same
Hi, This patch merges two "vsldoi" insns when their sources are the same. Particularly, it is simplified to be one move if the total shift is multiples of 16 bytes. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-02-20 Haochen Gui gcc/ * config/rs6000/altivec.md (*altivec_vsldoi_dup_): New insn_and_split to merge two vsldoi. gcc/testsuite/ * gcc.target/powerpc/vsldoi_merge.c: New. patch.diff diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 84660073f32..22e9c4c1fc5 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2529,6 +2529,35 @@ (define_insn "altivec_vsldoi_" "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) +(define_insn_and_split "*altivec_vsldoi_dup_" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:QI 3 "immediate_operand" "i")] + UNSPEC_VSLDOI) + (unspec:VM [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_VSLDOI) + (match_operand:QI 4 "immediate_operand" "i")] + UNSPEC_VSLDOI))] + "TARGET_ALTIVEC" + "#" + "&& 1" + [(const_int 0)] +{ + unsigned int shift1 = UINTVAL (operands[3]); + unsigned int shift2 = UINTVAL (operands[4]); + + unsigned int shift = (shift1 + shift2) % 16; + if (shift) +emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1], + operands[1], GEN_INT (shift))); + else +emit_move_insn (operands[0], operands[1]); + DONE; +}) + (define_insn "altivec_vupkhs" [(set (match_operand:VP 0 "register_operand" "=v") (unspec:VP [(match_operand: 1 "register_operand" "v")] diff --git a/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c new file mode 100644 index 000..4ea72561282 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ + +#include "altivec.h" + +vector signed int test1 (vector signed int a, vector signed int b) +{ + a = vec_sld (a, b, 2); + a = vec_sld (a, a, 4); + return a; +} + +vector signed int test2 (vector signed int a, vector signed int b) +{ + a = vec_sld (a, b, 14); + a = vec_sld (a, a, 2); + return a; +} + +/* { dg-final { scan-assembler-times {\mvsldoi\M} 1 } } */
[PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694]
Hi, The logical operations for TImode is split after reload pass right now. Some potential optimizations miss as the split is too late. This patch removes TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical operations can be split at expand pass. The new test case illustrates the optimization. Two test cases of pr92398 are merged into one as all sub-targets generates the same sequence of instructions with the patch. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-02-08 Haochen Gui gcc/ PR target/100694 * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit vector types. (and3): Replace BOOL_128 with BOOL_128_V. (ior3): Likewise. (xor3): Likewise. (one_cmpl2 expander): New expander with BOOL_128_V. (one_cmpl2 insn_and_split): Rename to ... (*one_cmpl2): ... this. gcc/testsuite/ PR target/100694 * gcc.target/powerpc/pr100694.c: New. * gcc.target/powerpc/pr92398.c: New. * gcc.target/powerpc/pr92398.h: Remove. * gcc.target/powerpc/pr92398.p9-.c: Remove. * gcc.target/powerpc/pr92398.p9+.c: Remove. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4bd1dfd3da9..455b7329643 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI (V2DF "TARGET_ALTIVEC") (V1TI "TARGET_ALTIVEC")]) +;; Mode iterator for logical operations on 128-bit vector types +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") +(V8HI "TARGET_ALTIVEC") +(V4SI "TARGET_ALTIVEC") +(V4SF "TARGET_ALTIVEC") +(V2DI "TARGET_ALTIVEC") +(V2DF "TARGET_ALTIVEC") +(V1TI "TARGET_ALTIVEC")]) + ;; For the GPRs we use 3 constraints for register outputs, two that are the ;; same as the output register, and a third where the output register is an ;; early clobber, so we don't have to deal with register overlaps. For the @@ -7135,23 +7144,23 @@ (define_expand "subti3" ;; 128-bit logical operations expanders (define_expand "and3" - [(set (match_operand:BOOL_128 0 "vlogical_operand") - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") - (match_operand:BOOL_128 2 "vlogical_operand")))] + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") + (match_operand:BOOL_128_V 2 "vlogical_operand")))] "" "") (define_expand "ior3" - [(set (match_operand:BOOL_128 0 "vlogical_operand") -(ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") - (match_operand:BOOL_128 2 "vlogical_operand")))] + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") + (match_operand:BOOL_128_V 2 "vlogical_operand")))] "" "") (define_expand "xor3" - [(set (match_operand:BOOL_128 0 "vlogical_operand") -(xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") - (match_operand:BOOL_128 2 "vlogical_operand")))] + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") + (match_operand:BOOL_128_V 2 "vlogical_operand")))] "" "") @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv3_internal2" (const_string "16")))]) ;; 128-bit one's complement -(define_insn_and_split "one_cmpl2" +(define_expand "one_cmpl2" +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=") + (not:BOOL_128_V + (match_operand:BOOL_128_V 1 "vlogical_operand" "")))] + "" + "") + +(define_insn_and_split "*one_cmpl2" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")))] diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c b/gcc/testsuite/gcc.target/powerpc/pr100694.c new file mode 100644 index 000..96a895d6c44 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ + +/* It just needs two std and one blr. */ +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo) +{ + unsigned __int128 i = hi; + i <<= 64; + i |= lo; + *res = i; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c
[PATCH, rs6000] Convert TI AND with a special constant to DI AND [PR93123]
Hi, When TI AND with a special constant (the high part or low part is all ones), it may be converted to DI AND with a 64-bit constant and a simple DI move. When the DI AND can be implemented by rotate and mask or "andi.", it eliminates the 128-bit constant loading to save the cost. The patch creates three insn_and_split patterns to match these cases in combine pass and splits them later. The new predicate "double_wide_cint_operand" is used to identify if a constant is a double wide constant. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. Gui Haochen ChangeLog 2023-01-18 Haochen Gui gcc/ PR target/93123 * config/rs6000/predicates.md (double_wide_cint_operand): New. * config/rs6000/rs6000.md (*andti3_128bit_imm_highpart): New. (*andti3_128bit_imm_lowpart): New. (*andti3_64bit_imm): New. gcc/testsuite/ PR target/93123 * gcc.target/powerpc/pr93123.c: New. patch.diff diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index a1764018545..bacb87c3fb2 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -255,6 +255,19 @@ (define_predicate "u10bit_cint_operand" (and (match_code "const_int") (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023"))) +;; Return 1 if op is a 65-128 bits constant integer. +(define_predicate "double_wide_cint_operand" + (match_operand 0 "const_scalar_int_operand") +{ + if (CONST_INT_P (op)) +return 0; + + if (CONST_WIDE_INT_NUNITS (op) == 2) +return 1; + + return 0; +}) + ;; Return 1 if op is a constant integer that can fit in a D field. (define_predicate "short_cint_operand" (and (match_code "const_int") diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6011f5bf76a..1fecb2d734e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -7199,6 +7199,128 @@ (define_expand "orc3" "mode == TImode || mode == PTImode || TARGET_P8_VECTOR" "") +(define_insn_and_split "*andti3_128bit_imm_highpart" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r") + (and:TI + (match_operand:TI 1 "gpc_reg_operand" "r") + (match_operand:TI 2 "double_wide_cint_operand" "n")))] + "CONST_WIDE_INT_ELT (operands[2], 0) == -1 + && (rs6000_is_valid_and_mask (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)), +E_DImode) + || logical_const_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)), +E_DImode))" + "#" + "&& 1" + [(const_int 0)] +{ + rtx in_lo, in_hi, out_lo, out_hi; + rtx imm = GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)); + int hi_off, lo_off; + + if (BYTES_BIG_ENDIAN) +{ + hi_off = 0; + lo_off = 8; +} + else +{ + hi_off = 8; + lo_off = 0; +} + + in_lo = simplify_gen_subreg (DImode, operands[1], TImode, lo_off); + out_lo = simplify_gen_subreg (DImode, operands[0], TImode, lo_off); + in_hi = simplify_gen_subreg (DImode, operands[1], TImode, hi_off); + out_hi = simplify_gen_subreg (DImode, operands[0], TImode, hi_off); + + if (rs6000_is_valid_and_mask (imm, E_DImode)) +emit_insn (gen_anddi3_mask (out_hi, in_hi, imm)); + else +emit_insn (gen_anddi3_imm (out_hi, in_hi, imm)); + + emit_move_insn (out_lo, in_lo); +} + [(set_attr "length" "8")]) + +(define_insn_and_split "*andti3_128bit_imm_lowpart" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r") + (and:TI + (match_operand:TI 1 "gpc_reg_operand" "r") + (match_operand:TI 2 "double_wide_cint_operand" "n")))] + "CONST_WIDE_INT_ELT (operands[2], 1) == -1 + && (rs6000_is_valid_and_mask (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)), +E_DImode) + || logical_const_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)), +E_DImode))" + "#" + "&& 1" + [(const_int 0)] +{ + rtx in_lo, in_hi, out_lo, out_hi; + rtx imm = GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)); + int hi_off, lo_off; + + if (BYTES_BIG_ENDIAN) +{ + hi_off = 0; + lo_off = 8; +} + else +{ + hi_off = 8; + lo_off = 0; +} + + in_lo = simplify_gen_subreg (DImode, operands[1], TImode, lo_off); + out_lo = simplify_gen_subreg (DImode, operands[0], TImode, lo_off); + in_hi = simplify_gen_subreg (DImode, operands[1], TImode, hi_off); + out_hi = simplify_gen_subreg (DImode, operands[0], TImode, hi_off); + + if (rs6000_is_valid_and_mask (imm, E_DImode)) +emit_insn (gen_anddi3_mask (out_lo, in_lo, imm)); + else +emit_insn (gen_anddi3_imm (out_lo, in_lo, imm)); + + emit_move_insn (out_hi, in_hi); +} + [(set_attr "length" "8")]) + + +(define_insn_and_split "*andti3_64bit_imm" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r") + (and:TI + (match_operand:TI 1 "gpc_reg_operand" "r") + (match_operand:TI
[PATCH-4, rs6000] Change ilp32 target check for some scalar-extract-sig and scalar-insert-exp test cases
Hi, "ilp32" is used in these test cases to make sure test cases only run on a 32-bit environment. Unfortunately, these cases also run with "-m32/-mpowerpc64" which causes unexpected errors. This patch changes the target check to skip if "has_arch_ppc64" is set. So the test cases won't run when arch_ppc64 has been set. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. Gui Haochen ChangeLog 2023-01-03 Haochen Gui gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Replace ilp32 check with dg-skip-if has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Likewise. patch.diff diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c index 39ee74c94dc..148b5fbd9fa 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target ilp32 } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power9" } */ diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c index efd69725905..956c1183beb 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target ilp32 } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power9" } */ diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c index f85966a6fdf..9a7949fb89a 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target ilp32 } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power9" } */
[PATCH-3, rs6000] Change mode and insn condition for scalar insert exp instruction
Hi, This patch changes the mode of exponent to GPR in scalar insert exp pattern, as the exponent can be put into a 32-bit register. Also the condition check is changed from TARGET_64BIT to TARGET_POWERPC64. The test cases are modified according to the changes of expand pattern. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. Gui Haochen ChangeLog 2023-01-03 Haochen Gui gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vsx_scalar_insert_exp): Replace bif-pattern from xsiexpdp to xsiexpdp_di. (__builtin_vsx_scalar_insert_exp_dp): Replace bif-pattern from xsiexpdpf to xsiexpdpf_di. * config/rs6000/vsx.md (xsiexpdp): Rename to... (xsiexpdp_): ..., set the mode of second operand to GPR and replace TARGET_64BIT with TARGET_POWERPC64. (xsiexpdpf): Rename to... (xsiexpdpf_): ..., set the mode of second operand to GPR and replace TARGET_64BIT with TARGET_POWERPC64. gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check with has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 25647b7bdd2..b1b5002d7d9 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2854,10 +2854,10 @@ const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ unsigned long long); -VSIEDP xsiexpdp {} +VSIEDP xsiexpdp_di {} const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long); -VSIEDPF xsiexpdpf {} +VSIEDPF xsiexpdpf_di {} pure vsc __builtin_vsx_xl_len_r (void *, signed long); XL_LEN_R xl_len_r {} diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 27e03a4cf6c..3376090cc6f 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5137,22 +5137,22 @@ (define_insn "xsiexpqp_" [(set_attr "type" "vecmove")]) ;; VSX Scalar Insert Exponent Double-Precision -(define_insn "xsiexpdp" +(define_insn "xsiexpdp_" [(set (match_operand:DF 0 "vsx_register_operand" "=wa") (unspec:DF [(match_operand:DI 1 "register_operand" "r") - (match_operand:DI 2 "register_operand" "r")] + (match_operand:GPR 2 "register_operand" "r")] UNSPEC_VSX_SIEXPDP))] - "TARGET_P9_VECTOR && TARGET_64BIT" + "TARGET_P9_VECTOR && TARGET_POWERPC64" "xsiexpdp %x0,%1,%2" [(set_attr "type" "fpsimple")]) ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument -(define_insn "xsiexpdpf" +(define_insn "xsiexpdpf_" [(set (match_operand:DF 0 "vsx_register_operand" "=wa") (unspec:DF [(match_operand:DF 1 "register_operand" "r") - (match_operand:DI 2 "register_operand" "r")] + (match_operand:GPR 2 "register_operand" "r")] UNSPEC_VSX_SIEXPDP))] - "TARGET_P9_VECTOR && TARGET_64BIT" + "TARGET_P9_VECTOR && TARGET_POWERPC64" "xsiexpdp %x0,%1,%2" [(set_attr "type" "fpsimple")]) diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c index d8243258a67..88d77564158 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power9" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ /* This test should succeed only on 64-bit configurations. */ #include diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c index 8260b107178..2f219ddc83a 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power8" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ /* This test should succeed only on 64-bit configurations. */ #include diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c index 384fc9cc675..9eade34d9ad 100644 ---
[PATCH-2, rs6000] Change mode and insn condition for scalar extract sig instruction
Hi, This patch changes the return type of __builtin_vsx_scalar_extract_sig from const signed long to const signed long long, so that it can be called with "-m32/-mpowerpc64" option. The bif needs TARGET_POWERPC64 instead of TARGET_64BIT. So the condition check in the expander is changed. The test cases are modified according to the changes of expand pattern. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. Gui Haochen ChangeLog 2023-01-03 Haochen Gui gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vsx_scalar_extract_sig): Set return type to const signed long long. * config/rs6000/vsx.md (xsxsigdp): Replace TARGET_64BIT with TARGET_POWERPC64. gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Replace lp64 check with has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-extract-sig-1.c: Likewise. * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index a8f1d3f1b3d..25647b7bdd2 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2849,7 +2849,7 @@ pure vsc __builtin_vsx_lxvl (const void *, signed long); LXVL lxvl {} - const signed long __builtin_vsx_scalar_extract_sig (double); + const signed long long __builtin_vsx_scalar_extract_sig (double); VSESDP xsxsigdp {} const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 229c26c3a61..27e03a4cf6c 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5111,7 +5111,7 @@ (define_insn "xsxsigdp" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] UNSPEC_VSX_SXSIG))] - "TARGET_P9_VECTOR && TARGET_64BIT" + "TARGET_P9_VECTOR && TARGET_POWERPC64" "xsxsigdp %0,%x1" [(set_attr "type" "integer")]) diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c index 637080652b7..d22f7d1b274 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power9" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ /* This test should succeed only on 64-bit configurations. */ #include diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c index f12eed3d9d5..64747d73a51 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power8" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ /* This test should succeed only on 64-bit configurations. */ #include diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c index c85072da138..561be53fb9b 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c @@ -1,7 +1,7 @@ /* { dg-do run { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target p9vector_hw } */ /* { dg-options "-mdejagnu-cpu=power9" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ /* This test should succeed only on 64-bit configurations. */ #include
[PATCH-1, rs6000] Change mode and insn condition for scalar extract exp instruction
Hi, This patch changes the return type of __builtin_vsx_scalar_extract_exp from const signed long to const signed int, as the exponent can be put in a signed int. It is also inline with the external interface definition of the bif. The mode of exponent operand in "xsxexpdp" is changed to GPR mode and TARGET_64BIT check is removed, as the instruction can be executed on a 32-bit environment. The test cases are modified according to the changes of expand pattern. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. Gui Haochen ChangeLog 2022-12-23 Haochen Gui gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned int and set its bif-pattern to xsxexpdp_si, move it from power9-64 to power9 catalog. * config/rs6000/vsx.md (xsxexpdp): Rename to ... (xsxexpdp_): ..., set mode of operand 0 to GPR and remove TARGET_64BIT check. * doc/extend.texi (scalar_extract_exp): Remove 64-bit environment requirement when it has a 64-bit argument. gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check. * gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Likewise. * gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is invalid. * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Remove lp64 check. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f76f54793d7..a8f1d3f1b3d 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2833,6 +2833,8 @@ const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128); TSTSFI_OV_TD dfptstsfi_unordered_td {} + const signed int __builtin_vsx_scalar_extract_exp (double); +VSEEDP xsxexpdp_si {} [power9-64] void __builtin_altivec_xst_len_r (vsc, void *, long); @@ -2847,9 +2849,6 @@ pure vsc __builtin_vsx_lxvl (const void *, signed long); LXVL lxvl {} - const signed long __builtin_vsx_scalar_extract_exp (double); -VSEEDP xsxexpdp {} - const signed long __builtin_vsx_scalar_extract_sig (double); VSESDP xsxsigdp {} diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 992fbc983be..229c26c3a61 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5089,11 +5089,11 @@ (define_insn "xsxexpqp_" [(set_attr "type" "vecmove")]) ;; VSX Scalar Extract Exponent Double-Precision -(define_insn "xsxexpdp" - [(set (match_operand:DI 0 "register_operand" "=r") - (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] +(define_insn "xsxexpdp_" + [(set (match_operand:GPR 0 "register_operand" "=r") + (unspec:GPR [(match_operand:DF 1 "vsx_register_operand" "wa")] UNSPEC_VSX_SXEXPDP))] - "TARGET_P9_VECTOR && TARGET_64BIT" + "TARGET_P9_VECTOR" "xsxexpdp %0,%x1" [(set_attr "type" "integer")]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index d3812fa55b0..7c087967234 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -19598,7 +19598,10 @@ bool scalar_test_neg (double source); bool scalar_test_neg (__ieee128 source); @end smallexample -The @code{scalar_extract_exp} and @code{scalar_extract_sig} +The @code{scalar_extract_exp} with a 64-bit source argument +function requires an environment supporting ISA 3.0 or later. +The @code{scalar_extract_exp} with a 128-bit source argument +and @code{scalar_extract_sig} functions require a 64-bit environment supporting ISA 3.0 or later. The @code{scalar_extract_exp} and @code{scalar_extract_sig} built-in functions return the significand and the biased exponent value diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c index 35bf1b240f3..d971833748e 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c @@ -1,9 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power9" } */ -/* This test should succeed only on 64-bit configurations. */ #include unsigned int diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c index 9737762c1d4..1cb438f9b70 100644 --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c @@ -1,9 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mdejagnu-cpu=power8" } */ -/* This test should succeed only on 64-bit
[PATCH v6, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions
Hi, This patch fixes several problems: 1. The exponent of double-precision can be put into a SImode register. So "xsxexpdp" doesn't require 64-bit environment. Also "xsxsigdp", "xsiexpdp" and "xsiexpdpf" can put exponent into a GPR register. 2. "TARGET_64BIT" check in insn conditions should be replaced with "TARGET_POWERPC64" check. 3. "lp64" check in test cases should be replaced with "has_arch_ppc64" check. "ilp32" check should be replaced with "dg-skip-if has_arch_ppc64". This patch keeps outer interfaces of these builtins unchanged. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-12-19 Haochen Gui gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned int and set its bif-pattern to xsxexpdp_si, move it from power9-64 to power9 catalog. (__builtin_vsx_scalar_extract_sig): Set return type to const unsigned long long. (__builtin_vsx_scalar_insert_exp): Set its bif-pattern to xsiexpdp_di unsigned int. (__builtin_vsx_scalar_insert_exp_dp): Set its bif-pattern to xsiexpdpf_di. * config/rs6000/vsx.md (xsxexpdp): Rename to ... (xsxexpdp_): ..., set mode of operand 0 to GPR and remove TARGET_64BIT check. (xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64. (xsiexpdp): Rename to ... (xsiexpdp_): ..., set mode of operand 2 to GPR and change insn condition from TARGET_64BIT to TARGET_POWERPC64. (xsiexpdpf): Rename to ... (xsiexpdpf_): ..., set mode of operand 2 to GPR and change insn condition from TARGET_64BIT to TARGET_POWERPC64. * doc/extend.texi (scalar_extract_exp): Remove 64-bit environment requirement when it has a 64-bit argument. gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check. * gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Likewise. * gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is invalid now. * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Remove lp64 check. * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Replace lp64 check with has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-extract-sig-1.c: Likewise. * gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Replace ilp32 check with dg skip has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Replace lp64 check with has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Replace ilp32 check with dg skip has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Replace lp64 check with has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Replace ilp32 check with dg-skip-if has_arch_ppc64. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f76f54793d7..b1b5002d7d9 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2833,6 +2833,8 @@ const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128); TSTSFI_OV_TD dfptstsfi_unordered_td {} + const signed int __builtin_vsx_scalar_extract_exp (double); +VSEEDP xsxexpdp_si {} [power9-64] void __builtin_altivec_xst_len_r (vsc, void *, long); @@ -2847,18 +2849,15 @@ pure vsc __builtin_vsx_lxvl (const void *, signed long); LXVL lxvl {} - const signed long __builtin_vsx_scalar_extract_exp (double); -VSEEDP xsxexpdp {} - - const signed long __builtin_vsx_scalar_extract_sig (double); + const signed long long __builtin_vsx_scalar_extract_sig (double); VSESDP xsxsigdp {} const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ unsigned long long); -VSIEDP xsiexpdp {} +VSIEDP xsiexpdp_di {} const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long); -VSIEDPF xsiexpdpf {} +VSIEDPF xsiexpdpf_di {} pure vsc __builtin_vsx_xl_len_r (void *, signed long); XL_LEN_R xl_len_r {} diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index fb5cf04147e..e1c905a3f91 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5089,11 +5089,11 @@ (define_insn "xsxexpqp_" [(set_attr "type" "vecmove")]) ;; VSX Scalar Extract Exponent Double-Precision -(define_insn "xsxexpdp" - [(set (match_operand:DI 0
PING [PATCH, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601909.html Thanks Gui Haochen 在 2022/9/21 13:13, HAO CHEN GUI 写道: > Hi, > This patch adds a new insn for vector splat with small V2DI constants on P8. > If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be > loaded > with vspltisw and vupkhsw on P8. It should be efficient than loading vector > from > TOC. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-09-21 Haochen Gui > > gcc/ > PR target/104124 > * config/rs6000/altivec.md (*altivec_vupkhs_direct): Renamed > to... > (altivec_vupkhs_direct): ...this. > * config/rs6000/constraints.md (wT constraint): New constant for a > vector constraint that can be loaded with vspltisw and vupkhsw. > * config/rs6000/predicates.md (vspltisw_constant_split): New > predicate for wT constraint. > * config/rs6000/rs6000-protos.h (vspltisw_constant_p): Add declaration. > * config/rs6000/rs6000.cc (easy_altivec_constant): Call > vspltisw_constant_p to judge if a V2DI constant can be synthesized with > a vspltisw and a vupkhsw. > * (vspltisw_constant_p): New function to return true if OP mode is > V2DI and can be synthesized with ISA 2.07 instruction vupkhsw and > vspltisw. > * gcc/config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up > constants with vspltisw and vupkhsw. > > gcc/testsuite/ > PR target/104124 > * gcc.target/powerpc/p8-splat.c: New. > > patch.diff > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 2c4940f2e21..185414df021 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs" > } >[(set_attr "type" "vecperm")]) > > -(define_insn "*altivec_vupkhs_direct" > +(define_insn "altivec_vupkhs_direct" >[(set (match_operand:VP 0 "register_operand" "=v") > (unspec:VP [(match_operand: 1 "register_operand" "v")] >UNSPEC_VUNPACK_HI_SIGN_DIRECT))] > diff --git a/gcc/config/rs6000/constraints.md > b/gcc/config/rs6000/constraints.md > index 5a44a92142e..f65dea6e0c7 100644 > --- a/gcc/config/rs6000/constraints.md > +++ b/gcc/config/rs6000/constraints.md > @@ -150,6 +150,10 @@ (define_constraint "wS" >"@internal Vector constant that can be loaded with XXSPLTIB & sign > extension." >(match_test "xxspltib_constant_split (op, mode)")) > > +(define_constraint "wT" > + "@internal Vector constant that can be loaded with vspltisw & vupkhsw." > + (match_test "vspltisw_constant_split (op, mode)")) > + > ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update > form. > ;; Used by LXSD/STXSD/LXSSP/STXSSP. In contrast to "Y", the multiple-of-four > ;; offset is enforced for 32-bit too. > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index b1fcc69bb60..00cf60bbe58 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -694,6 +694,19 @@ (define_predicate "xxspltib_constant_split" >return num_insns > 1; > }) > > +;; Return true if the operand is a constant that can be loaded with a > vspltisw > +;; instruction and then a vupkhsw instruction. > + > +(define_predicate "vspltisw_constant_split" > + (match_code "const_vector,vec_duplicate") > +{ > + int value = 32; > + > + if (!vspltisw_constant_p (op, mode, )) > +return false; > + > + return true; > +}) > > ;; Return 1 if the operand is constant that can loaded directly with a > XXSPLTIB > ;; instruction. > diff --git a/gcc/config/rs6000/rs6000-protos.h > b/gcc/config/rs6000/rs6000-protos.h > index b3c16e7448d..45f3d044eee 100644 > --- a/gcc/config/rs6000/rs6000-protos.h > +++ b/gcc/config/rs6000/rs6000-protos.h > @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, > rtx, int, int, int, > > extern int easy_altivec_constant (rtx, machine_mode); > extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); > +extern bool vspltisw_constant_p (rtx, machine_mode, int *); > extern int vspltis_shifted (rtx); > extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); > extern bool macho_lo_sum_memory_operand (rtx, machine_mode); > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index df491bee2ea..984624026c2 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -6292,6 +6292,12 @@ easy_altivec_constant (rtx op, machine_mode mode) > && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) > return 8; > > + /* If V2DI constant is within RANGE (-16, 15), it can be synthesized > with > + a vspltisw and a vupkhsw. */ > + int value = 32; > + if (vspltisw_constant_p (op, mode, )) > + return 8; > + >return
Re: [PATCH v5, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions
Hi Kewen, 在 2022/12/8 16:47, Kewen.Lin 写道: > This documentation update reminds me of that the current prototype of > __ieee128 > variant can be: > > unsigned int scalar_extract_exp (__ieee128 source); > > type unsigned int is enough for the exponent. It means xsxexpqp_ can > also > use SImode rather than DImode. Could I put the changes of __ieee128 bifs in another patch? So, this patch doesn't touch anything about __ieee128. Thanks a lot Gui Haochen
[PATCH v4, rs6000] Enable have_cbranchcc4 on rs6000
Hi, This patch enables "have_cbranchcc4" on rs6000 by defining a "cbranchcc4" expander. "have_cbrnachcc4" is a flag in ifcvt.cc to indicate if branch by CC bits is invalid or not. With this flag enabled, some branches can be optimized to conditional moves. Compared to last version, the main changes are on the test cases. Test case is renamed and comments are modified. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. BR Gui Haochen ChangeLog 2022-12-07 Haochen Gui gcc/ * config/rs6000/rs6000.md (cbranchcc4): New expander. gcc/testsuite * gcc.target/powerpc/cbranchcc4-1.c: New. * gcc.target/powerpc/cbranchcc4-2.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e9e5cd1e54d..d7ddd96cc70 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -11932,6 +11932,16 @@ (define_expand "cbranch4" DONE; }) +(define_expand "cbranchcc4" + [(set (pc) + (if_then_else (match_operator 0 "branch_comparison_operator" + [(match_operand 1 "cc_reg_operand") +(match_operand 2 "zero_constant")]) + (label_ref (match_operand 3)) + (pc)))] + "" + "") + (define_expand "cstore4_signed" [(use (match_operator 1 "signed_comparison_operator" [(match_operand:P 2 "gpc_reg_operand") diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c new file mode 100644 index 000..6c2cd130b6d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Verify there is no ICE with cbranchcc4 enabled. */ + +int foo (double d) +{ + if (d == 0.0) +return 0; + + d = ((d) >= 0 ? (d) : -(d)); + + if (d < 1.0) +return 1; +} diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4-2.c b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-2.c new file mode 100644 index 000..528ba1a878d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-ce1" } */ +/* { dg-final { scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */ + +/* The inner branch should be detected by ifcvt then be converted to a setcc + with a plus by noce_try_store_flag_constants. */ + +int test (unsigned int a, unsigned int b) +{ +return (a < b ? 0 : (a > b ? 2 : 1)); +}
Re: [PATCH v3, rs6000] Enable have_cbranchcc4 on rs6000
Hi Kewen, Thanks so much for your review comments. I will fix them. 在 2022/12/7 11:06, Kewen.Lin 写道: > Does this issue which relies on the fix for generic part make bootstrapping > fail? > If no, how many failures it can cause? I'm thinking if we can commit this > firstly, > then in the commit log of the fix for generic part you can mention it can fix > the > ICE exposed by this test case. Yes, the bootstrapping fails if we enable cbranchcc4 without the generic patch. Actually, the testcase comes from the ICE found in bootstrapping.
[PATCH v2] Add a new conversion for conditional ternary set into ifcvt [PR106536]
Hi, This patch adds a new conversion to convert a certain branch to conditional ternary set in ifcvt. The branch commonly has following insns. cond_jump ? pc : label setcc (neg/subreg) label: set a constant cond_jump and setcc use the same CC reg and neg/subreg is optional. The branch might be converted to a nested if-then-else insn to eliminate the branch if the insn is supported on target. [(set (match_operand:SI 0 "gpc_reg_operand") (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand") (const_int 0)) (const_int -1) (if_then_else:SI (gt (match_dup 1) (const_int 0)) (const_int 1) (const_int 0] The patch adds a new optab for the nested if-then-else insn, and adds help functions in ifcvt.cc to detect the pattern and emit the insn by the new optab. Compared to last version, this version uses a generic function to detect the candidate branch instead of using a target hook. Also a new optab is added. The insn is generated by the new optab instead of recog. Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-12-07 Haochen Gui gcc/ * ifcvt.cc (noce_emit_ternary_set): New function to emit a conditional ternary set insn by ternary_set_optab. (noce_try_ternary_set): Detect conditional ternary set pattern and call noce_emit_ternary_set to emit the insn. (noce_process_if_block): Call noce_try_ternary_set to do the conversion. * optabs.def (ternary_set_optab): New optab. patch.diff diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index eb8efb89a89..8252d9c2dc5 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -1830,6 +1830,44 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code, return NULL_RTX; } +/* Emit a conditional ternary set insn by its optab. */ + +static rtx +noce_emit_ternary_set (rtx target, enum rtx_code outer_code, + enum rtx_code inner_code, rtx cc, int a, int b, int c) +{ + rtx outer_comp, inner_comp; + machine_mode mode; + machine_mode orig_mode = GET_MODE (target); + outer_comp = gen_rtx_fmt_ee (outer_code, VOIDmode, cc, const0_rtx); + inner_comp = gen_rtx_fmt_ee (inner_code, VOIDmode, cc, const0_rtx); + + class expand_operand ops[7]; + create_fixed_operand ([1], outer_comp); + create_fixed_operand ([3], inner_comp); + create_fixed_operand ([2], cc); + create_integer_operand ([4], a); + create_integer_operand ([5], b); + create_integer_operand ([6], c); + + FOR_EACH_MODE_FROM (mode, orig_mode) +{ + enum insn_code icode; + icode = optab_handler (ternary_set_optab, mode); + if (icode != CODE_FOR_nothing) + { + create_output_operand ([0], target, mode); + if (maybe_expand_insn (icode, 7, ops)) + { + if (ops[0].value != target) + convert_move (target, ops[0].value, false); + return target; + } + } +} + return NULL_RTX; +} + /* Try only simple constants and registers here. More complex cases are handled in noce_try_cmove_arith after noce_try_store_flag_arith has had a go at it. */ @@ -2987,6 +3025,160 @@ noce_try_bitop (struct noce_if_info *if_info) return TRUE; } +/* Try to find pattern "a < b ? -1 : (a > b ? 1 : 0)" and convert it to + a conditional ternary set insn. It commonly has following pattern. + cond_jump + setcc + (neg/subreg) + label: const_set + cond_jump and setcc use the same CC reg. There may be a neg insn after + the setcc insn to negative the result of setcc, and a subreg insn after + the setcc insn to convert the mode. + + The pattern can't be optimized by combine pass due to the branch and + limitation on the number of insns. +*/ + +static int +noce_try_ternary_set (struct noce_if_info *if_info) +{ + machine_mode orig_mode = GET_MODE (if_info->x); + machine_mode mode; + int have_ternary_set = 0; + + FOR_EACH_MODE_FROM (mode, orig_mode) +{ + if (direct_optab_handler (ternary_set_optab, mode) != CODE_FOR_nothing) + { + have_ternary_set = 1; + break; + } +} + + if (!have_ternary_set) +return FALSE; + + if (!if_info->then_bb || !if_info->else_bb) +return FALSE; + + if (!if_info->then_simple && !if_info->else_simple) +return FALSE; + + rtx cc; + basic_block target_bb; + int int1, int2, int3; + + cc = SET_DEST (PATTERN (if_info->cond_earliest)); + if (GET_MODE_CLASS (GET_MODE (cc)) != MODE_CC) +return FALSE; + + /* One arm should be a constant set. */ + + if (CONST_INT_P (if_info->a)) +{ + int1 = INTVAL (if_info->a); + target_bb = if_info->else_bb; +} + else if (CONST_INT_P
[PATCH v3, rs6000] Enable have_cbranchcc4 on rs6000
Hi, This patch enables "have_cbranchcc4" on rs6000 by defining a "cbranchcc4" expander. "have_cbrnachcc4" is a flag in ifcvt.cc to indicate if branch by CC bits is invalid or not. With this flag enabled, some branches can be optimized to conditional moves. The patch relies on the former patch which is under review. https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607810.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. Thanks Gui Haochen ChangeLog 2022-12-06 Haochen Gui gcc/ * config/rs6000/rs6000.md (cbranchcc4): New expander. gcc/testsuite * gcc.target/powerpc/cbranchcc4.c: New. * gcc.target/powerpc/cbranchcc4-1.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e9e5cd1e54d..d7ddd96cc70 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -11932,6 +11932,16 @@ (define_expand "cbranch4" DONE; }) +(define_expand "cbranchcc4" + [(set (pc) + (if_then_else (match_operator 0 "branch_comparison_operator" + [(match_operand 1 "cc_reg_operand") +(match_operand 2 "zero_constant")]) + (label_ref (match_operand 3)) + (pc)))] + "" + "") + (define_expand "cstore4_signed" [(use (match_operator 1 "signed_comparison_operator" [(match_operand:P 2 "gpc_reg_operand") diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c new file mode 100644 index 000..3c8286bf091 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" */ + +/* This case should be successfully compiled after cbranchcc4 is enabled. It + generates a "*cbranch_2insn" insn which makes predicate check of cbranchcc4 + failed and returns a NULL rtx from prepare_cmp_insn. */ + +int foo (double d) +{ + if (d == 0.0) +return 0; + + d = ((d) >= 0 ? (d) : -(d)); + + if (d < 1.0) +return 1; +} diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c new file mode 100644 index 000..528ba1a878d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-ce1" } */ +/* { dg-final { scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */ + +/* The inner branch should be detected by ifcvt then be converted to a setcc + with a plus by noce_try_store_flag_constants. */ + +int test (unsigned int a, unsigned int b) +{ +return (a < b ? 0 : (a > b ? 2 : 1)); +}
Re: [PATCH v2] Return a NULL rtx when targets don't support cbranchcc4 or predicate check fails in prepare_cmp_insn
Hi Richard, 在 2022/12/5 15:31, Richard Biener 写道: > I wonder if you have a testcase you can add showing this change is > worthwhile and > fixes a bug? I want to enable cbranchcc4 on rs6000. But not all sub CCmode is supported on rs6000. So the predicate check(assert) fails and it hits ICE. I drafted two patches. This one is for the generic code, and another is for rs6000. If this one is committed, cbranchcc4 can be enabled on rs6000. Then I can create a testcase and let the predicate check fail. Right now I can't write a testcase for it as it never reaches the failure path. Thanks a lot Gui Haochen
[PATCH v2] Return a NULL rtx when targets don't support cbranchcc4 or predicate check fails in prepare_cmp_insn
Hi, It gets an assertion failure when targers don't support cbranchcc4 or predicate check fails in prepare_cmp_insn. prepare_cmp_insn is a help function to generate compare rtx, so it should not assume that cbranchcc4 is existing or all sub-CC modes are supported on one target. I think it should return the NULL rtx when cbranchcc4 is not supported or predicate check fails, as its callers already check if the return value is null or not for CC mode. This patch just does the change. Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-12-05 Haochen Gui gcc/ * optabs.cc (prepare_cmp_insn): Return a NULL rtx other than assertion failure when targets don't have cbranch optab or predicate check fails. patch.diff diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 165f8d1fa22..f6d3242479b 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -4484,10 +4484,14 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, rtx size, { enum insn_code icode = optab_handler (cbranch_optab, CCmode); test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y); - gcc_assert (icode != CODE_FOR_nothing - && insn_operand_matches (icode, 0, test)); - *ptest = test; - return; + if (icode != CODE_FOR_nothing + && insn_operand_matches (icode, 0, test)) + { + *ptest = test; + return; + } + else + goto fail; } mclass = GET_MODE_CLASS (mode);
[PATCH v5, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions
Hi, For scalar extract/insert instructions, exponent field can be stored in a 32-bit register. So this patch changes the mode of exponent field from DI to SI so that these instructions can be generated in a 32-bit environment. Also it removes TARGET_64BIT check for these instructions. The instructions using DI registers can be invoked with -mpowerpc64 in a 32-bit environment. The patch changes insn condition from TARGET_64BIT to TARGET_POWERPC64 for those instructions. This patch also changes prototypes and catagories of relevant built-ins and effective target checks of test cases. Compared to last version, main changes are to remove 64-bit environment requirement for relevant built-ins in extend.texi. And to change the type of arguments of relevant built-ins in rs6000-overload.def. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-12-01 Haochen Gui gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned int and move it from power9-64 to power9 catatlog. (__builtin_vsx_scalar_extract_sig): Set return type to const unsigned long long. (__builtin_vsx_scalar_insert_exp): Set type of second argument to unsigned int. (__builtin_vsx_scalar_insert_exp_dp): Set type of second argument to unsigned int and move it from power9-64 to power9 catatlog. * config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to SImode. Remove TARGET_64BIT from insn condition. (xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64. (xsiexpdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64. Set mode of third operand to SImode. (xsiexpdpf): Set mode of third operand to SImode. Remove TARGET_64BIT from insn condition. * config/rs6000/rs6000-overload.def (__builtin_vec_scalar_insert_exp): Set type of second argument to unsigned int. * doc/extend.texi (scalar_insert_exp): Set type of second argument to unsigned int and remove 64-bit environment requirement when significand has a float type. (scalar_extract_exp): Remove 64-bit environment requirement. gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check. * gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Remove lp64 check. * gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is invalid now. * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Replace lp64 check with has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise. * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check with has_arch_ppc64. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Replace lp64 check with has_arch_ppc64. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Remove lp64 check. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Remove lp64 check. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Deleted as the case is invalid now. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f76f54793d7..d8d67fa0cad 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2833,6 +2833,11 @@ const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128); TSTSFI_OV_TD dfptstsfi_unordered_td {} + const unsigned int __builtin_vsx_scalar_extract_exp (double); +VSEEDP xsxexpdp {} + + const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int); +VSIEDPF xsiexpdpf {} [power9-64] void __builtin_altivec_xst_len_r (vsc, void *, long); @@ -2847,19 +2852,13 @@ pure vsc __builtin_vsx_lxvl (const void *, signed long); LXVL lxvl {} - const signed long __builtin_vsx_scalar_extract_exp (double); -VSEEDP xsxexpdp {} - - const signed long __builtin_vsx_scalar_extract_sig (double); + const unsigned long long __builtin_vsx_scalar_extract_sig (double); VSESDP xsxsigdp {} const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ -unsigned long long); + unsigned int); VSIEDP xsiexpdp {} - const double
Re: [PATCH] Add a new conversion for conditional ternary set into ifcvt [PR106536]
Hi Nilsson, 在 2022/12/2 10:49, Hans-Peter Nilsson 写道: > On Wed, 23 Nov 2022, HAO CHEN GUI via Gcc-patches wrote: > >> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi >> index 92bda1a7e14..9823eccbe68 100644 >> --- a/gcc/doc/tm.texi >> +++ b/gcc/doc/tm.texi >> @@ -7094,6 +7094,15 @@ the @code{POLY_VALUE_MIN}, @code{POLY_VALUE_MAX} and >> implementation returns the lowest possible value of @var{val}. >> @end deftypefn >> >> +@deftypefn {Target Hook} bool TARGET_NOCE_TERNARY_CSET_P (struct >> noce_if_info *@var{if_info}, rtx *@var{outer_cond}, rtx *@var{inner_cond}, >> int *@var{int1}, int *@var{int2}, int *@var{int3}) >> +This hook returns true if the if-then-else-join blocks describled in > > Random typo spotted: "described" > > Also, IMHO needs more explanation (in tm.texi preferably) why > this doesn't happen as part of general "combine" machinery. Thanks for your comments. Combine can't take it as the insns are not in same block. Also combine has the limitation on the number of insns. I will add those comments. Thanks Gui Haochen > > brgds, H-P
Re: Ping [PATCH] Change the behavior of predicate check failure on cbranchcc4 operand0 in prepare_cmp_insn
Hi Richard, 在 2022/11/29 2:46, Richard Biener 写道: > Anyhow - my question still stands - what's the fallback for the callers > that do not check for failure? How are we sure we're not running into > these when relaxing the requirement that a MODE_CC prepare_cmp_insn > must not fail? I examed the code and found that currently callers should be fine with returning a NULL_RTX for MODE_CC processing. The prepare_cmp_insn is called by following callers. 1 gen_cond_trap which doesn't uses MODE_CC 2 prepare_cmp_insn itself where is after MODE_CC processing, so it never hits MODE_CC 3 emit_cmp_and_jump_insns which doesn't uses MODE_CC 4 emit_conditional_move which checks the output is null or not 5 emit_conditional_add which checks the output is null or not Not sure if I missed something. Looking forward to your advice. Thanks a lot Gui Haochen
Ping [PATCH] Change the behavior of predicate check failure on cbranchcc4 operand0 in prepare_cmp_insn
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607083.html Thanks Gui Haochen 在 2022/11/23 10:54, HAO CHEN GUI 写道: > Hi, > I want to enable "have_cbranchcc4" on rs6000. But not all combinations of > comparison codes and sub CC modes are benefited to generate cbranchcc4 insns > on rs6000. There is an predicate for operand0 of cbranchcc4 to bypass > some combinations. It gets assertion failure in prepare_cmp_insn. I think > we shouldn't suppose that all comparison codes and sub CC modes are supported > and throw an assertion failure in prepare_cmp_insn. It might check the > predicate and go to fail if the predicate can't be satisfied. This patch > changes the behavior of those codes. > > Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no > regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > > ChangeLog > 2022-11-23 Haochen Gui > > gcc/ > * optabs.cc (prepare_cmp_insn): Go to fail other than assert it when > predicate check of "cbranchcc4" operand[0] fails. > > patch.diff > diff --git a/gcc/optabs.cc b/gcc/optabs.cc > index 165f8d1fa22..3ec8f6b17ba 100644 > --- a/gcc/optabs.cc > +++ b/gcc/optabs.cc > @@ -4484,8 +4484,9 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code > comparison, rtx size, > { >enum insn_code icode = optab_handler (cbranch_optab, CCmode); >test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y); > - gcc_assert (icode != CODE_FOR_nothing > - && insn_operand_matches (icode, 0, test)); > + gcc_assert (icode != CODE_FOR_nothing); > + if (!insn_operand_matches (icode, 0, test)) > + goto fail; >*ptest = test; >return; > }
Re: [PATCH] Add a new conversion for conditional ternary set into ifcvt [PR106536]
Hi Richard, 在 2022/11/24 4:06, Richard Biener 写道: > Wouldn't we usually either add an optab or try to recog a canonical > RTL form instead of adding a new target hook for things like this? Thanks so much for your comments. Please let me make it clear. Do you mean we should create an optab for "setb" pattern (the nested if-then-else insn) and detect candidate insns in ifcvt pass? Then generate the insn with the new optab? My concern is that some candidate insns are target specific. For example, different modes cause additional zero_extend or subreg insns generated on different targets. So I put the detection process into a target hook. Looking forward to your advice. Thanks again Gui Haochen
[PATCH] Add a new conversion for conditional ternary set into ifcvt [PR106536]
Hi, There is a new insn on my target, which has a nested if_then_else and set -1, 0 and 1 according to a comparison. [(set (match_operand:SI 0 "gpc_reg_operand" "=r") (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y") (const_int 0)) (const_int -1) (if_then_else (gt (match_dup 1) (const_int 0)) (const_int 1) (const_int 0] In ifcvt pass, it probably contains a comparison, a branch, a setcc and a constant set. 8: r122:CC=cmp(r120:DI#0,r121:DI#0) 9: pc={(r122:CC<0)?L29:pc} 14: r118:SI=r122:CC>0 29: L29: 5: r118:SI=0x This patch adds the new conversion into ifcvt and convert this kind of branch into a nested if-then-else insn if the target supports such pattern. HAVE_ternary_conditional_set indicates if the target has such nested if-then-else insn. It's set in genconfig. noce_try_ternary_cset will be executed to detect suitable pattern and convert it to the nested if-then-else insn if HAVE_ternary_conditional_set is set. The hook TARGET_NOCE_TERNARY_CSET_P detects target specific pattern and output conditions and setting integers for the nested if-then-else. Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-11-23 Haochen Gui gcc/ * doc/tm.texi: Regenerate. * doc/tm.texi.in (TARGET_NOCE_TERNARY_CSET_P): Document new hook. * genconfig.cc (have_ternary_cset_flag): New. (walk_insn_part): Detect nested if-then-else with const_int setting and set have_ternary_cset_flag. (HAVE_ternary_conditional_set): Define. * ifcvt.cc (noce_emit_ternary_cset): New function to emit nested if-then-else insns. (noce_try_ternary_cset): Detect ternary conditional set and emit the insn. (noce_process_if_block): Try to do ternary condition set convertion when a target supports ternary conditional set insn. * target.def (noce_ternary_cset_p): New hook. * targhooks.cc (default_noce_ternary_cset_p): New function. * targhooks.h (default_noce_ternary_cset_p): New declare. patch.diff diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 92bda1a7e14..9823eccbe68 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -7094,6 +7094,15 @@ the @code{POLY_VALUE_MIN}, @code{POLY_VALUE_MAX} and implementation returns the lowest possible value of @var{val}. @end deftypefn +@deftypefn {Target Hook} bool TARGET_NOCE_TERNARY_CSET_P (struct noce_if_info *@var{if_info}, rtx *@var{outer_cond}, rtx *@var{inner_cond}, int *@var{int1}, int *@var{int2}, int *@var{int3}) +This hook returns true if the if-then-else-join blocks describled in +@code{if_info} can be converted to a ternary conditional set implemented by +a nested if-then-else insn. The @code{int1}, @code{int2} and @code{int3} +are three possible results of the nested if-then-else insn. +@code{outer_cond} and @code{inner_cond} are the conditions for outer and +if-then-else. +@end deftypefn + @node Scheduling @section Adjusting the Instruction Scheduler diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 112462310b1..1d6f28cc50a 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4631,6 +4631,8 @@ Define this macro if a non-short-circuit operation produced by @hook TARGET_ESTIMATED_POLY_VALUE +@hook TARGET_NOCE_TERNARY_CSET_P + @node Scheduling @section Adjusting the Instruction Scheduler diff --git a/gcc/genconfig.cc b/gcc/genconfig.cc index b7c6b48eec6..902c832cf5a 100644 --- a/gcc/genconfig.cc +++ b/gcc/genconfig.cc @@ -33,6 +33,7 @@ static int max_recog_operands; /* Largest operand number seen. */ static int max_dup_operands;/* Largest number of match_dup in any insn. */ static int max_clobbers_per_insn; static int have_cmove_flag; +static int have_ternary_cset_flag; static int have_cond_exec_flag; static int have_lo_sum_flag; static int have_rotate_flag; @@ -136,6 +137,12 @@ walk_insn_part (rtx part, int recog_p, int non_pc_set_src) && GET_CODE (XEXP (part, 1)) == MATCH_OPERAND && GET_CODE (XEXP (part, 2)) == MATCH_OPERAND) have_cmove_flag = 1; + else if (recog_p && non_pc_set_src + && GET_CODE (XEXP (part, 1)) == CONST_INT + && GET_CODE (XEXP (part, 2)) == IF_THEN_ELSE + && GET_CODE (XEXP (XEXP (part, 2), 1)) == CONST_INT + && GET_CODE (XEXP (XEXP (part, 2), 2)) == CONST_INT) + have_ternary_cset_flag = 1; break; case COND_EXEC: @@ -328,6 +335,11 @@ main (int argc, const char **argv) else printf ("#define HAVE_conditional_move 0\n"); + if (have_ternary_cset_flag) +printf ("#define HAVE_ternary_conditional_set 1\n"); + else +printf
[PATCH] Change the behavior of predicate check failure on cbranchcc4 operand0 in prepare_cmp_insn
Hi, I want to enable "have_cbranchcc4" on rs6000. But not all combinations of comparison codes and sub CC modes are benefited to generate cbranchcc4 insns on rs6000. There is an predicate for operand0 of cbranchcc4 to bypass some combinations. It gets assertion failure in prepare_cmp_insn. I think we shouldn't suppose that all comparison codes and sub CC modes are supported and throw an assertion failure in prepare_cmp_insn. It might check the predicate and go to fail if the predicate can't be satisfied. This patch changes the behavior of those codes. Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-11-23 Haochen Gui gcc/ * optabs.cc (prepare_cmp_insn): Go to fail other than assert it when predicate check of "cbranchcc4" operand[0] fails. patch.diff diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 165f8d1fa22..3ec8f6b17ba 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -4484,8 +4484,9 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, rtx size, { enum insn_code icode = optab_handler (cbranch_optab, CCmode); test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y); - gcc_assert (icode != CODE_FOR_nothing - && insn_operand_matches (icode, 0, test)); + gcc_assert (icode != CODE_FOR_nothing); + if (!insn_operand_matches (icode, 0, test)) + goto fail; *ptest = test; return; }
Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000
Hi Segher, Thanks for your comments. 在 2022/11/22 7:49, Segher Boessenkool 写道: > *cbranch_2insn is not a machine insn. It generates a cror and a branch > insn. This makes no sense to have in a cbranchcc: those do a branch > based on an existing cr field, so based on the *output* of that cror. > > If ifcvt requires differently, ifcvt needs fixing. > I have a question here. For rs6000, "*cbranch_2insn" should not be generated by cbranch_optab? I mean it gets icode from cbranch_optab and generates insn from this icode. If so, the predicate of cbranchcc4 should be checked every time before insn generation other than just doing an assertion. > We want to use the output of the cror multiple times, not generate more > cror insns. > > I don't think the behaviour of ifcvt is correct here at all, no. It > also does not consider the cost of the code as far as I can see? That > could reduce the impact of this problem at least. ifcvt tries to generate the converted sequence. Then it compares the cost of new sequence to the cost of orginial. If it benefits, the conversion will be done. Thanks Gui Haochen
Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000
Hi Kewen, 在 2022/11/22 11:11, Kewen.Lin 写道: > Maybe we can adjust prepare_cmp_insn to fail if the constructed cbranchcc4 > pattern doesn't satisfy the predicate of operand 0 rather than to assert. > It's something like: > > if (!insn_operand_matches (icode, 0, test)) > goto fail; > > or only assign and return if insn_operand_matches (icode, 0, test). > > The code makes the assumption that all this kind of cbranchcc4 patterns > should match what target defines for cbranchcc4 optab, but unfortunately > it's not sure for our port and I don't see how it should be. Thanks for your comments. I just drafted a patch to let it go to "fail" when predicate of operand 0 is not satisfied. It works and passed bootstrap on ppc64le. But my concern is prepare_cmp_insn is a generic function and is used to create a cmp rtx. It is not only called by emit_conditional* (finally called by ifcvt) but other functions (even new functions). If we change the logical in prepare_cmp_insn, we may lost some potential optimization. After all, the branch_2insn is a valid insn. I think the essential of the problem is we want to exclude those comparisons (from cbranchcc4 used in ifcvt) which need two CC bits. So, we can change the logical of ifcvt - add an additional check with predicate of operand 0 when checking the have_cbranchcc4 flag in ifcvt. What's your opinion? Thanks Gui Haochen
Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000
Hi Segher, 在 2022/11/18 20:18, Segher Boessenkool 写道: > I don't think we should pretend we have any conditional jumps the > machine does not actually have, in cbranchcc4. When would this ever be > useful? cror;beq can be quite expensive, compared to the code it would > replace anyway. > > If something generates those here (which then ICEs later), that is > wrong, fix *that*? Is it ifcvt doing it? "*cbranch_2insn" is a valid insn for rs6000. So it generates such insn at expand pass. The "prepare_cmp_insn" called by ifcvt just wants to verify that the following comparison rtx is valid. (unlt (reg:CCFP 156) (const_int 0 [0])) It should be valid as it's extracted from an existing insn. It hits ICE only when the comparison rtx can't pass the predicate check of "cbranchcc4". So "cbranchcc4" should include "extra_insn_branch_comparison_operator". Then, ifcvt tries to call emit_conditional_move_1 to generates a condition move for FP mode. It definitely fails as there is no conditional move insn for FP mode in rs6000. The behavior of ifcvt is correct. It tries to do conversion but fails. It won't hit ICEs after cbranchcc4 is correctly defined. Actually, "*cbranch_2insn" has the same logical as float "*cbranch" in ifcvt. Both of them get a final false return from "rs6000_emit_int_cmove" as rs6000 doesn't have conditional move for FP mode. So I think "cbranchcc4" should include "extra_insn_branch_comparison_operator" as "*cbranch_2insn" is a valid insn. Just let ifcvt decide a conditional move is valid or not. Thanks Gui Haochen
Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000
Hi David, 在 2022/11/17 21:24, David Edelsohn 写道: > This is better, but the pattern should be near and after the existing > cbranch4 patterns earlier in the file, not the *cbranch pattern. It > doesn't match the comment. Sure, I will put it after existing "cbranch4" patterns. > > Why are you using zero_constant predicate instead of matching (const_int 0) > for operand 2? The "const_int 0" is an operand other than a predicate. We need a predicate here. > > Why does this need the new all_branch_comparison_operator? Can the ifcvt > optimization correctly elide the 2 insn sequence? Because rs6000 defines "*cbranch_2insn" insn, such insns are generated after expand. (jump_insn 50 47 51 11 (set (pc) (if_then_else (ge (reg:CCFP 156) (const_int 0 [0])) (label_ref 53) (pc))) "/home/guihaoc/gcc/gcc-mainline-base/gmp/mpz/cmpabs_d.c":80:7 884 {*cbranch_2insn} (expr_list:REG_DEAD (reg:CCFP 156) (int_list:REG_BR_PROB 633507684 (nil))) -> 53) In prepare_cmp_insn, the comparison is verified by insn_operand_matches. If extra_insn_branch_comparison_operator is not included in "cbranchcc4" predicate, it hits ICE here. if (GET_MODE_CLASS (mode) == MODE_CC) { enum insn_code icode = optab_handler (cbranch_optab, CCmode); test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y); gcc_assert (icode != CODE_FOR_nothing && insn_operand_matches (icode, 0, test)); *ptest = test; return; } The real conditional move is generated by emit_conditional_move_1. Commonly "*cbranch_2insn" can't be optimized out and it returns NULL_RTX. if (COMPARISON_P (comparison)) { saved_pending_stack_adjust save; save_pending_stack_adjust (); last = get_last_insn (); do_pending_stack_adjust (); machine_mode cmpmode = comp.mode; prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1), GET_CODE (comparison), NULL_RTX, unsignedp, OPTAB_WIDEN, , ); if (comparison) { rtx res = emit_conditional_move_1 (target, comparison, op2, op3, mode); if (res != NULL_RTX) return res; } delete_insns_since (last); restore_pending_stack_adjust (); I think that extra_insn_branch_comparison_operator should be included in "cbranchcc4" predicates as such insns exist. And leave it to emit_conditional_move which decides whether it can be optimized or not. Thanks for your comments Gui Haochen
[PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000
Hi, The patch enables have_cbrnachcc4 which is a flag in ifcvt.cc to indicate if branch by CC bits is invalid or not. The new expand pattern "cbranchcc4" is created which intend to match the pattern defined in "*cbranch", "*cbranch_2insn" and "*creturn". The operand sequence in "cbranchcc4" is inline with the definition in gccint. And the operand sequence doesn't matter in pattern matching. So I think it should work. Compared to last version, one new predicate and one new expander are created. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-11-17 Haochen Gui gcc/ * config/rs6000/predicates.md (all_branch_comparison_operator): New, and includes operators in branch_comparison_operator and extra_insn_branch_comparison_operator. * config/rs6000/rs6000.md (cbranchcc4): New expand pattern. gcc/testsuite/ * gcc.target/powerpc/cbranchcc4.c: New. patch.diff diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index b1fcc69bb60..843b6f39b84 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1308,6 +1308,7 @@ (define_special_predicate "equality_operator" ;; Return 1 if OP is a comparison operation that is valid for a branch ;; instruction. We check the opcode against the mode of the CC value. + ;; validate_condition_mode is an assertion. (define_predicate "branch_comparison_operator" (and (match_operand 0 "comparison_operator") @@ -1331,6 +1332,11 @@ (define_predicate "extra_insn_branch_comparison_operator" GET_MODE (XEXP (op, 0))), 1"))) +;; Return 1 if OP is a comparison operation that is valid for a branch. +(define_predicate "all_branch_comparison_operator" + (ior (match_operand 0 "branch_comparison_operator") + (match_operand 0 "extra_insn_branch_comparison_operator"))) + ;; Return 1 if OP is an unsigned comparison operator. (define_predicate "unsigned_comparison_operator" (match_code "ltu,gtu,leu,geu")) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e9e5cd1e54d..7b7d747a85d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -13067,6 +13067,16 @@ (define_insn_and_split "*_cc" ;; Conditional branches. ;; These either are a single bc insn, or a bc around a b. +(define_expand "cbranchcc4" + [(set (pc) + (if_then_else (match_operator 0 "all_branch_comparison_operator" + [(match_operand 1 "cc_reg_operand") +(match_operand 2 "zero_constant")]) + (label_ref (match_operand 3)) + (pc)))] + "" + "") + (define_insn "*cbranch" [(set (pc) (if_then_else (match_operator 1 "branch_comparison_operator" diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c new file mode 100644 index 000..528ba1a878d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-ce1" } */ +/* { dg-final { scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */ + +/* The inner branch should be detected by ifcvt then be converted to a setcc + with a plus by noce_try_store_flag_constants. */ + +int test (unsigned int a, unsigned int b) +{ +return (a < b ? 0 : (a > b ? 2 : 1)); +}
Re: [rs6000, patch] Enable have_cbranchcc4 on rs6000
Hi David, I found definition of the operands in 'cbranch'. The argumnets matters. I will create a new expand pattern for cbranchcc4. Thanks a lot for your comments. 'cbranchmode4’ Conditional branch instruction combined with a compare instruction. Operand 0 is a comparison operator. Operand 1 and operand 2 are the first and second operands of the comparison, respectively. Operand 3 is the code_label to jump to. Gui Haochen Thanks 在 2022/11/16 11:04, David Edelsohn 写道: > It's great to add cbranchcc4 to the Power port where it definitely was an > omission, but adapting *cbranch for that purpose is the wrong approach. The > changes to the pattern are incorrect because they are covering up a > difference in ordering of the operands. One can argue that the named pattern > only enables the functionality in ifcvt and the pattern otherwise is used in > its previous role. But this is a Frankenstein monster approach. You're > trying to twist the existing pattern so that it triggers as cbranchcc4, but > creating a pattern that messes up its arguments and only works because the > new, named pattern never is called. This is too ugly. Please fix.
[rs6000, patch] Enable have_cbranchcc4 on rs6000
Hi, The patch enables have_cbrnachcc4 which is a flag in ifcvt.cc to indicate if branch by CC bits is invalid or not. As rs6000 already has "*cbranch" insn which does branching according to CC bits, the flag should be enabled and relevant branches can be optimized out. The test case illustrates the optimization. "*cbranch" is an anonymous insn which can't be generated directly. So changing "const_int 0" to the third operand predicated by "zero_constant" won't cause ICEs as orginal patterns still can be matched. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-11-16 Haochen Gui gcc/ * config/rs6000/rs6000.md (*cbranch): Rename to... (cbranchcc4): ...this, and set const_int 0 to the third operand. gcc/testsuite/ * gcc.target/powerpc/cbranchcc4.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e9e5cd1e54d..ee171f21f6a 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -13067,11 +13067,11 @@ (define_insn_and_split "*_cc" ;; Conditional branches. ;; These either are a single bc insn, or a bc around a b. -(define_insn "*cbranch" +(define_insn "cbranchcc4" [(set (pc) (if_then_else (match_operator 1 "branch_comparison_operator" [(match_operand 2 "cc_reg_operand" "y") - (const_int 0)]) + (match_operand 3 "zero_constant")]) (label_ref (match_operand 0)) (pc)))] "" diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c new file mode 100644 index 000..1751d274bbf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-ce1" } */ +/* { dg-final {scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */ + +int test (unsigned int a, unsigned int b) +{ +return (a < b ? 0 : (a > b ? 2 : 1)); +}
[PATCH v4, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions
Hi, For scalar extract/insert instructions, exponent field can be stored in a 32-bit register. So this patch changes the mode of exponent field from DI to SI. So these instructions can be generated in a 32-bit environment. The patch removes TARGET_64BIT check for these instructiions. The instructions using DI registers can be invoked with -mpowerpc64 in a 32-bit environment. The patch changes insn condition from TARGET_64BIT to TARGET_POWERPC64 for those instructions. This patch also changes prototypes and catagories of relevant built-ins and effective target checks of test cases. Compared to last version, main changes are to set catagories of relevant built-ins from power9-64 to power9 and remove some unnecessary test cases. Last version: https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601196.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-11-07 Haochen Gui gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned int and move it from power9-64 to power9 catatlog. (__builtin_vsx_scalar_extract_sig): Set return type to const unsigned long long. (__builtin_vsx_scalar_insert_exp): Set type of second argument to unsigned int. (__builtin_vsx_scalar_insert_exp_dp): Set type of second argument to unsigned int and move it from power9-64 to power9 catatlog. * config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to SImode. Remove TARGET_64BIT from insn condition. (xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64. (xsiexpdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64. Set mode of third operand to SImode. (xsiexpdpf): Set mode of third operand to SImode. Remove TARGET_64BIT from insn condition. gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check. * gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Remove lp64 check. * gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as case is invalid now. * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Replace lp64 check with has_arch_ppc64. * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise. * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check with has_arch_ppc64. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Replace lp64 check with has_arch_ppc64. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Remove lp64 check. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Remove lp64 check. Set type of exponent to unsigned int. * gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise. * gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Deleted as case is invalid now. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f76f54793d7..d8d67fa0cad 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2833,6 +2833,11 @@ const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128); TSTSFI_OV_TD dfptstsfi_unordered_td {} + const unsigned int __builtin_vsx_scalar_extract_exp (double); +VSEEDP xsxexpdp {} + + const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int); +VSIEDPF xsiexpdpf {} [power9-64] void __builtin_altivec_xst_len_r (vsc, void *, long); @@ -2847,19 +2852,13 @@ pure vsc __builtin_vsx_lxvl (const void *, signed long); LXVL lxvl {} - const signed long __builtin_vsx_scalar_extract_exp (double); -VSEEDP xsxexpdp {} - - const signed long __builtin_vsx_scalar_extract_sig (double); + const unsigned long long __builtin_vsx_scalar_extract_sig (double); VSESDP xsxsigdp {} const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ -unsigned long long); + unsigned int); VSIEDP xsiexpdp {} - const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long); -VSIEDPF xsiexpdpf {} - pure vsc __builtin_vsx_xl_len_r (void *, signed long); XL_LEN_R xl_len_r {} diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index e226a93bbe5..9d3a2340a79 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5095,10 +5095,10 @@ (define_insn "xsxexpqp_"
[PATCH-2, rs6000] Reverse V8HI on Power8 by vector rotation [PR100866]
Hi, This patch implements V8HI byte reverse on Power8 by vector rotation. It should be effecient than orignial vector permute. The patch comes from Xionghu's comments in PR. I just added a test case for it. Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-10-24 Xionghu Luo gcc/ PR target/100866 * config/rs6000/altivec.md: (*altivec_vrl): Named to... (altivec_vrl): ...this. * config/rs6000/vsx.md (revb_): Call vspltish and vrlh when target is Power8 and mode is V8HI. gcc/testsuite/ PR target/100866 * gcc.target/powerpc/pr100866-2.c: New. patch.diff diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 2c4940f2e21..84660073f32 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1875,7 +1875,7 @@ (define_insn "altivec_vpkuum_direct" } [(set_attr "type" "vecperm")]) -(define_insn "*altivec_vrl" +(define_insn "altivec_vrl" [(set (match_operand:VI2 0 "register_operand" "=v") (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") (match_operand:VI2 2 "register_operand" "v")))] diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index e226a93bbe5..34662a7252d 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -6092,12 +6092,21 @@ (define_expand "revb_" emit_insn (gen_p9_xxbr_ (operands[0], operands[1])); else { - /* Want to have the elements in reverse order relative -to the endian mode in use, i.e. in LE mode, put elements -in BE order. */ - rtx sel = swap_endian_selector_for_mode(mode); - emit_insn (gen_altivec_vperm_ (operands[0], operands[1], - operands[1], sel)); + if (mode == V8HImode) + { + rtx splt = gen_reg_rtx (V8HImode); + emit_insn (gen_altivec_vspltish (splt, GEN_INT (8))); + emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt)); + } + else + { + /* Want to have the elements in reverse order relative +to the endian mode in use, i.e. in LE mode, put elements +in BE order. */ + rtx sel = swap_endian_selector_for_mode (mode); + emit_insn (gen_altivec_vperm_ (operands[0], operands[1], + operands[1], sel)); + } } DONE; diff --git a/gcc/testsuite/gcc.target/powerpc/pr100866-2.c b/gcc/testsuite/gcc.target/powerpc/pr100866-2.c new file mode 100644 index 000..4357d1beb09 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr100866-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ +/* { dg-final { scan-assembler {\mvspltish\M} } } */ +/* { dg-final { scan-assembler {\mvrlh\M} } } */ + +#include + +vector unsigned short revb(vector unsigned short a) +{ + return vec_revb(a); +} +
[PATCH-1, rs6000] Generate permute index directly for little endian target [PR100866]
Hi, This patch modifies the help function which generates permute index for vector byte reversion and generates permute index directly for little endian targets. It saves one "xxlnor" instructions on P8 little endian targets as the original process needs an "xxlnor" to calculate complement for the index. Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-10-11 Haochen Gui gcc/ PR target/100866 * config/rs6000/rs6000-call.cc (swap_endian_selector_for_mode): Generate permute index directly for little endian targets. * config/rs6000/vsx.md (revb_): Call vprem directly with corresponding permute indexes. gcc/testsuite/ PR target/100866 * gcc.target/powerpc/pr100866.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 551968b0995..bad8e9e0e52 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -2839,7 +2839,10 @@ swap_endian_selector_for_mode (machine_mode mode) } for (i = 0; i < 16; ++i) -perm[i] = GEN_INT (swaparray[i]); +if (BYTES_BIG_ENDIAN) + perm[i] = GEN_INT (swaparray[i]); +else + perm[i] = GEN_INT (~swaparray[i] & 0x001f); return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index e226a93bbe5..b68eba48d2c 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -6096,8 +6096,8 @@ (define_expand "revb_" to the endian mode in use, i.e. in LE mode, put elements in BE order. */ rtx sel = swap_endian_selector_for_mode(mode); - emit_insn (gen_altivec_vperm_ (operands[0], operands[1], - operands[1], sel)); + emit_insn (gen_altivec_vperm__direct (operands[0], operands[1], + operands[1], sel)); } DONE; diff --git a/gcc/testsuite/gcc.target/powerpc/pr100866.c b/gcc/testsuite/gcc.target/powerpc/pr100866.c new file mode 100644 index 000..c708dfd502e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr100866.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ +/* { dg-final { scan-assembler-not "xxlnor" } } */ + +#include + +vector unsigned short revb(vector unsigned short a) +{ + return vec_revb(a); +}
[PATCH v7, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]
Hi, This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000. Tests show that outputs of xs[min/max]dp are consistent with the standard of C99 fmin/max. This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead of smin/max when fast-math is not set. While fast-math is set, xs[min/max]dp are folded to MIN/MAX_EXPR in gimple, and finally expanded to smin/max. Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-09-26 Haochen Gui gcc/ PR target/103605 * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Gimple fold RS6000_BIF_XSMINDP and RS6000_BIF_XSMAXDP when fast-math is set. * config/rs6000/rs6000.md (FMINMAX): New int iterator. (minmax_op): New int attribute. (UNSPEC_FMAX, UNSPEC_FMIN): New unspecs. (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN. * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set pattern to fmaxdf3. (__builtin_vsx_xsmindp): Set pattern to fmindf3. gcc/testsuite/ PR target/103605 * gcc.dg/powerpc/pr103605.h: New. * gcc.dg/powerpc/pr103605-1.c: New. * gcc.dg/powerpc/pr103605-2.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index e925ba9fad9..944ae9fe55c 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -1588,6 +1588,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) gimple_set_location (g, gimple_location (stmt)); gsi_replace (gsi, g, true); return true; +/* fold into MIN_EXPR when fast-math is set. */ +case RS6000_BIF_XSMINDP: /* flavors of vec_min. */ case RS6000_BIF_XVMINDP: case RS6000_BIF_XVMINSP: @@ -1614,6 +1616,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) gimple_set_location (g, gimple_location (stmt)); gsi_replace (gsi, g, true); return true; +/* fold into MAX_EXPR when fast-math is set. */ +case RS6000_BIF_XSMAXDP: /* flavors of vec_max. */ case RS6000_BIF_XVMAXDP: case RS6000_BIF_XVMAXSP: diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f4a9f24bcc5..8b735493b40 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -1613,10 +1613,10 @@ XSCVSPDP vsx_xscvspdp {} const double __builtin_vsx_xsmaxdp (double, double); -XSMAXDP smaxdf3 {} +XSMAXDP fmaxdf3 {} const double __builtin_vsx_xsmindp (double, double); -XSMINDP smindf3 {} +XSMINDP fmindf3 {} const double __builtin_vsx_xsrdpi (double); XSRDPI vsx_xsrdpi {} diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index bf85baa5370..ae0dd98f0f9 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -158,6 +158,8 @@ (define_c_enum "unspec" UNSPEC_HASHCHK UNSPEC_XXSPLTIDP_CONST UNSPEC_XXSPLTIW_CONST + UNSPEC_FMAX + UNSPEC_FMIN ]) ;; @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr" DONE; }) + +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN]) + +(define_int_attr minmax_op [(UNSPEC_FMAX "max") +(UNSPEC_FMIN "min")]) + +(define_insn "f3" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") + (match_operand:SFDF 2 "vsx_register_operand" "wa")] +FMINMAX))] + "TARGET_VSX && !flag_finite_math_only" + "xsdp %x0,%x1,%x2" + [(set_attr "type" "fp")] +) + (define_expand "movcc" [(set (match_operand:GPR 0 "gpc_reg_operand") (if_then_else:GPR (match_operand 1 "comparison_operator") diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-1.c b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c new file mode 100644 index 000..923deec6a1e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */ + +#include "pr103605.h" diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-2.c b/gcc/testsuite/gcc.target/powerpc/pr103605-2.c new file mode 100644 index 000..f50fe9468f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103605-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -ffast-math" } */ +/* { dg-final { scan-assembler-times {\mxsmaxcdp\M} 3 { target has_arch_pwr9 } } } */ +/* { dg-final { scan-assembler-times {\mxsmincdp\M} 3 { target has_arch_pwr9 } } } */ +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 { target { ! has_arch_pwr9 } } } } */
Re: [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]
Hi Kewen & Segher, Thanks so much for your review comments. On 22/9/2022 上午 10:28, Kewen.Lin wrote: > on 2022/9/22 05:56, Segher Boessenkool wrote: >> Hi! >> >> On Fri, Jun 24, 2022 at 10:02:19AM +0800, HAO CHEN GUI wrote: >>> This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead >>> of smin/max. So the builtins always generate xs[min/max]dp on all >>> platforms. >> >> But how does this not blow up with -ffast-math? > > Indeed. Since it guards with "TARGET_VSX && !flag_finite_math_only", > the bifs seem to cause ICE at -ffast-math. > > Haochen, could you double check it? I tested it with "-ffast-math". fmin/max functions are converted to MIN/MAX_EXPR in gimple lower pass. But the built-ins are not and hit the ICE. I thought the built-ins are folded to MIN/MAX_EXPR like vec_ versions' when fast-math is set. In fact they're not. Sorry for that. I made a patch to fold these two built-ins to MIN/MAX_EXPR when fast-math is set. Then the built-ins are converted to MIN/MAX_EXPR and expanded to smin/max. Thanks for pointing out the problem! > >> >> In the other direction I am worried that the unspecs will degrade >> performance (relative to smin/smax) when -ffast-math *is* active (and >> this new builtin code and pattern doesn't blow up). > > For fmin/fmax it would be fine, since they are transformed to {MAX,MIN} > EXPR in middle end, and yes, it can degrade for the bifs, although IMHO > the previous expansion to smin/smax contradicts with the bif names (users > expect to map them to xs{min,max}dp than others). > >> >> I still think we should get RTL codes for this, to have access to proper >> floating point min/max semantics always and everywhere. "fmin" and >> "fmax" seem to be good names :-) > > It would be good, especially if we have observed some uses of these bifs > and further opportunities around them. :) > Shall we submit a PR to add fmin/fmax to RTL codes? > BR, > Kewen
Ping [PATCH v3, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601196.html Thanks. On 7/9/2022 下午 3:44, HAO CHEN GUI wrote: > Hi, > > For scalar extract/insert instructions, exponent field can be stored in a > 32-bit register. So this patch changes the mode of exponent field from DI to > SI. The instructions using DI registers can be invoked with -mpowerpc64 in a > 32-bit environment. The patch changes insn condition from TARGET_64BIT to > TARGET_POWERPC64 for those instructions. > > This patch also changes prototypes of relevant built-ins and effective > target of test cases. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-09-07 Haochen Gui > > gcc/ > * config/rs6000/rs6000-builtins.def > (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned > int. > (__builtin_vsx_scalar_extract_sig): Set return type to const unsigned > long long. > (__builtin_vsx_scalar_insert_exp): Set type of second argument to > unsigned int. > (__builtin_vsx_scalar_insert_exp_dp): Likewise. > * config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to > SImode. Remove TARGET_64BIT from insn condition. > (xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64. > (xsiexpdp): Change insn condition from TARGET_64BIT to > TARGET_POWERPC64. Set mode of third operand to SImode. > (xsiexpdpf): Set mode of third operand to SImode. Remove TARGET_64BIT > from insn condition. > > gcc/testsuite/ > * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Change effective > target from lp64 to has_arch_ppc64. > * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Likewise. > * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise. > * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise. > * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index f76f54793d7..ca2a1d7657e 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -2847,17 +2847,17 @@ >pure vsc __builtin_vsx_lxvl (const void *, signed long); > LXVL lxvl {} > > - const signed long __builtin_vsx_scalar_extract_exp (double); > + const unsigned int __builtin_vsx_scalar_extract_exp (double); > VSEEDP xsxexpdp {} > > - const signed long __builtin_vsx_scalar_extract_sig (double); > + const unsigned long long __builtin_vsx_scalar_extract_sig (double); > VSESDP xsxsigdp {} > >const double __builtin_vsx_scalar_insert_exp (unsigned long long, \ > -unsigned long long); > + unsigned int); > VSIEDP xsiexpdp {} > > - const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long > long); > + const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int); > VSIEDPF xsiexpdpf {} > >pure vsc __builtin_vsx_xl_len_r (void *, signed long); > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index e226a93bbe5..9d3a2340a79 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -5095,10 +5095,10 @@ (define_insn "xsxexpqp_" > > ;; VSX Scalar Extract Exponent Double-Precision > (define_insn "xsxexpdp" > - [(set (match_operand:DI 0 "register_operand" "=r") > - (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] > + [(set (match_operand:SI 0 "register_operand" "=r") > + (unspec:SI [(match_operand:DF 1 "vsx_register_operand" "wa")] >UNSPEC_VSX_SXEXPDP))] > - "TARGET_P9_VECTOR && TARGET_64BIT" > + "TARGET_P9_VECTOR" >"xsxexpdp %0,%x1" >[(set_attr "type" "integer")]) > > @@ -5116,7 +5116,7 @@ (define_insn "xsxsigdp" >[(set (match_operand:DI 0 "register_operand" "=r") > (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] >UNSPEC_VSX_SXSIG))] > - "TARGET_P9_VECTOR && TARGET_64BIT" > + "TARGET_P9_VECTOR && TARGET_POWERPC64" >"xsxsigdp %0,%x1" >[(set_attr "type" "integer")]) > > @@ -5145,9 +5145,9 @@ (define_insn "xsiexpqp_" > (define_insn "xsiexpdp" >[(set (match_operand:DF 0 "vsx_register_operand" "=wa") > (unspec:DF [(match_operand:DI 1 "register_operand" "r") > - (match_operand:DI 2 "register_operand" "r")] > + (match_operand:SI 2 "register_operand" "r")] >UNSPEC_VSX_SIEXPDP))] > - "TARGET_P9_VECTOR && TARGET_64BIT" > + "TARGET_P9_VECTOR && TARGET_POWERPC64" >"xsiexpdp %x0,%1,%2" >[(set_attr "type" "fpsimple")]) > > @@
Ping^3 [PATCH v2, rs6000] Use CC for BCD operations [PR100736]
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html Thanks. On 1/8/2022 上午 10:02, HAO CHEN GUI wrote: > Hi, > Gentle ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html > Thanks. > > On 4/7/2022 下午 2:33, HAO CHEN GUI wrote: >> Hi, >>Gentle ping this: >> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html >> Thanks. >> >> On 22/6/2022 下午 4:26, HAO CHEN GUI wrote: >>> Hi, >>> This patch uses CC instead of CCFP for all BCD operations. Thus, infinite >>> math flag has no impact on BCD operations. To support BCD overflow and >>> invalid coding, an UNSPEC is defined to move the bit to a general register. >>> The patterns of condition branch and return with overflow bit are defined as >>> the UNSPEC and branch/return can be combined to one jump insn. The split >>> pattern of overflow bit extension is define for optimization. >>> >>> This patch also replaces bcdadd with bcdsub for BCD invaliding coding >>> expand. >>> >>> ChangeLog >>> 2022-06-22 Haochen Gui >>> >>> gcc/ >>> PR target/100736 >>> * config/rs6000/altivec.md (BCD_TEST): Remove unordered. >>> (bcd_): Replace CCFP with CC. >>> (*bcd_test_): Replace CCFP with CC. Generate >>> condition insn with CC mode. >>> (bcd_overflow_): New. >>> (*bcdoverflow_): New. >>> (*bcdinvalid_): Removed. >>> (bcdinvalid_): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW. >>> (nuun): New. >>> (*overflow_cbranch): New. >>> (*overflow_creturn): New. >>> (*overflow_extendsidi): New. >>> (bcdshift_v16qi): Replace CCFP with CC. >>> (bcdmul10_v16qi): Likewise. >>> (bcddiv10_v16qi): Likewise. >>> (peephole for bcd_add/sub): Likewise. >>> * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set >>> pattern to bcdadd_overflow_v1ti. >>> (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi. >>> (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti. >>> (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi. >>> >>> gcc/testsuite/ >>> PR target/100736 >>> * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub. >>> Scan no cror insns. >>> >>> patch.diff >>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md >>> index efc8ae35c2e..26f131e61ea 100644 >>> --- a/gcc/config/rs6000/altivec.md >>> +++ b/gcc/config/rs6000/altivec.md >>> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB >>> [UNSPEC_BCDADD UNSPEC_BCDSUB]) >>> (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") >>> (UNSPEC_BCDSUB "sub")]) >>> >>> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered]) >>> +(define_code_iterator BCD_TEST [eq lt le gt ge]) >>> (define_mode_iterator VBCD [V1TI V16QI]) >>> >>> (define_insn "bcd_" >>> @@ -4379,7 +4379,7 @@ (define_insn "bcd_" >>> (match_operand:VBCD 2 "register_operand" "v") >>> (match_operand:QI 3 "const_0_to_1_operand" "n")] >>> UNSPEC_BCD_ADD_SUB)) >>> - (clobber (reg:CCFP CR6_REGNO))] >>> + (clobber (reg:CC CR6_REGNO))] >>>"TARGET_P8_VECTOR" >>>"bcd. %0,%1,%2,%3" >>>[(set_attr "type" "vecsimple")]) >>> @@ -4389,9 +4389,9 @@ (define_insn "bcd_" >>> ;; UNORDERED test on an integer type (like V1TImode) is not defined. The >>> type >>> ;; probably should be one that can go in the VMX (Altivec) registers, so we >>> ;; can't use DDmode or DFmode. >>> -(define_insn "*bcd_test_" >>> - [(set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> +(define_insn "bcd_test_" >>> + [(set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v") >>>(match_operand:VBCD 2 "register_operand" "v") >>>(match_operand:QI 3 "const_0_to_1_operand" "i")] >>> @@ -4408,8 +4408,8 @@ (define_insn "*bcd_test2_" >>> (match_operand:VBCD 2 "register_operand" "v") >>> (match_operand:QI 3 "const_0_to_1_operand" "i")] >>> UNSPEC_BCD_ADD_SUB)) >>> - (set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> + (set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_dup 1) >>>(match_dup 2) >>>(match_dup 3)] >>> @@ -4502,8 +4502,8 @@ (define_insn "vclrrb" >>> [(set_attr "type" "vecsimple")]) >>> >>> (define_expand "bcd__" >>> - [(parallel [(set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> + [(parallel [(set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_operand:VBCD 1 "register_operand") >>> (match_operand:VBCD 2 "register_operand") >>> (match_operand:QI 3 "const_0_to_1_operand")] >>> @@ -4511,46 +4511,138 @@ (define_expand "bcd__" >>> (match_dup 4))) >>> (clobber (match_scratch:VBCD 5))]) >>>
Ping^3 [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html Thanks. On 1/8/2022 上午 10:03, HAO CHEN GUI wrote: > Hi, >Gentle ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html > Thanks. > > > On 4/7/2022 下午 2:32, HAO CHEN GUI wrote: >> Hi, >>Gentle ping this: >> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html >> Thanks. >> >> On 24/6/2022 上午 10:02, HAO CHEN GUI wrote: >>> Hi, >>> This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000. >>> Tests show that outputs of xs[min/max]dp are consistent with the standard >>> of C99 fmin/max. >>> >>> This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead >>> of smin/max. So the builtins always generate xs[min/max]dp on all >>> platforms. >>> >>> Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. >>> Is this okay for trunk? Any recommendations? Thanks a lot. >>> >>> ChangeLog >>> 2022-06-24 Haochen Gui >>> >>> gcc/ >>> PR target/103605 >>> * config/rs6000/rs6000.md (FMINMAX): New. >>> (minmax_op): New. >>> (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN. >>> * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set >>> pattern to fmaxdf3. >>> (__builtin_vsx_xsmindp): Set pattern to fmindf3. >>> >>> gcc/testsuite/ >>> PR target/103605 >>> * gcc.dg/powerpc/pr103605.c: New. >>> >>> >>> patch.diff >>> diff --git a/gcc/config/rs6000/rs6000-builtins.def >>> b/gcc/config/rs6000/rs6000-builtins.def >>> index f4a9f24bcc5..8b735493b40 100644 >>> --- a/gcc/config/rs6000/rs6000-builtins.def >>> +++ b/gcc/config/rs6000/rs6000-builtins.def >>> @@ -1613,10 +1613,10 @@ >>> XSCVSPDP vsx_xscvspdp {} >>> >>>const double __builtin_vsx_xsmaxdp (double, double); >>> -XSMAXDP smaxdf3 {} >>> +XSMAXDP fmaxdf3 {} >>> >>>const double __builtin_vsx_xsmindp (double, double); >>> -XSMINDP smindf3 {} >>> +XSMINDP fmindf3 {} >>> >>>const double __builtin_vsx_xsrdpi (double); >>> XSRDPI vsx_xsrdpi {} >>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>> index bf85baa5370..ae0dd98f0f9 100644 >>> --- a/gcc/config/rs6000/rs6000.md >>> +++ b/gcc/config/rs6000/rs6000.md >>> @@ -158,6 +158,8 @@ (define_c_enum "unspec" >>> UNSPEC_HASHCHK >>> UNSPEC_XXSPLTIDP_CONST >>> UNSPEC_XXSPLTIW_CONST >>> + UNSPEC_FMAX >>> + UNSPEC_FMIN >>>]) >>> >>> ;; >>> @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr" >>>DONE; >>> }) >>> >>> + >>> +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN]) >>> + >>> +(define_int_attr minmax_op [(UNSPEC_FMAX "max") >>> +(UNSPEC_FMIN "min")]) >>> + >>> +(define_insn "f3" >>> + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") >>> + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") >>> + (match_operand:SFDF 2 "vsx_register_operand" "wa")] >>> +FMINMAX))] >>> + "TARGET_VSX && !flag_finite_math_only" >>> + "xsdp %x0,%x1,%x2" >>> + [(set_attr "type" "fp")] >>> +) >>> + >>> (define_expand "movcc" >>> [(set (match_operand:GPR 0 "gpc_reg_operand") >>> (if_then_else:GPR (match_operand 1 "comparison_operator") >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c >>> b/gcc/testsuite/gcc.target/powerpc/pr103605.c >>> new file mode 100644 >>> index 000..1c938d40e61 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c >>> @@ -0,0 +1,37 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target powerpc_vsx_ok } */ >>> +/* { dg-options "-O2 -mvsx" } */ >>> +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */ >>> +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */ >>> + >>> +#include >>> + >>> +double test1 (double d0, double d1) >>> +{ >>> + return fmin (d0, d1); >>> +} >>> + >>> +float test2 (float d0, float d1) >>> +{ >>> + return fmin (d0, d1); >>> +} >>> + >>> +double test3 (double d0, double d1) >>> +{ >>> + return fmax (d0, d1); >>> +} >>> + >>> +float test4 (float d0, float d1) >>> +{ >>> + return fmax (d0, d1); >>> +} >>> + >>> +double test5 (double d0, double d1) >>> +{ >>> + return __builtin_vsx_xsmindp (d0, d1); >>> +} >>> + >>> +double test6 (double d0, double d1) >>> +{ >>> + return __builtin_vsx_xsmaxdp (d0, d1); >>> +}