[PATCH v2, rs6000] Disable TImode from Bool expanders [PR100694, PR93123]

2022-02-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch disables TImode for Bool expanders. Thus TI register can be split
to two DI registers during expand.Potential optimizations can be implemented
after the split. The new test case illustrates it.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is
this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-02-21 Haochen Gui 

gcc/
PR target/100694
* config/rs6000/rs6000.md (and3): Disable TImode.
(ior3): Likewise.
(xor3): Likewise.
(nor3): Likewise.
(andc3): Likewise.
(eqv3): Likewise.
(nand3): Likewise.
(orc3): Likewise.
(one_cmpl2): Likewise.
(*one_cmplti2): Enable TImode complement for combine and split.

gcc/testsuite/
PR target/100694
* gcc.target/powerpc/pr100694.c: New.
* gcc.target/powerpc/pr92398.c: New.
* gcc.target/powerpc/pr92398.h: Remove.
* gcc.target/powerpc/pr92398.p9-.c: Remove.
* gcc.target/powerpc/pr92398.p9+.c: Remove.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6f74075f58d..1b1816d72ec 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6976,21 +6976,21 @@ (define_expand "and3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
(and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
  (match_operand:BOOL_128 2 "vlogical_operand")))]
-  ""
+  "mode != TImode"
   "")

 (define_expand "ior3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
 (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
  (match_operand:BOOL_128 2 "vlogical_operand")))]
-  ""
+  "mode != TImode"
   "")

 (define_expand "xor3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
 (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
  (match_operand:BOOL_128 2 "vlogical_operand")))]
-  ""
+  "mode != TImode"
   "")

 (define_expand "nor3"
@@ -6998,7 +6998,7 @@ (define_expand "nor3"
(and:BOOL_128
 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand"))
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"]
-  ""
+  "mode != TImode"
   "")

 (define_expand "andc3"
@@ -7006,7 +7006,7 @@ (define_expand "andc3"
 (and:BOOL_128
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"))
 (match_operand:BOOL_128 1 "vlogical_operand")))]
-  ""
+  "mode != TImode"
   "")

 ;; Power8 vector logical instructions.
@@ -7015,7 +7015,7 @@ (define_expand "eqv3"
(not:BOOL_128
 (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
   (match_operand:BOOL_128 2 "vlogical_operand"]
-  "mode == TImode || mode == PTImode || TARGET_P8_VECTOR"
+  "mode != TImode && (mode == PTImode || TARGET_P8_VECTOR)"
   "")

 ;; Rewrite nand into canonical form
@@ -7024,7 +7024,7 @@ (define_expand "nand3"
(ior:BOOL_128
 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand"))
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"]
-  "mode == TImode || mode == PTImode || TARGET_P8_VECTOR"
+  "mode != TImode && (mode == PTImode || TARGET_P8_VECTOR)"
   "")

 ;; The canonical form is to have the negated element first, so we need to
@@ -7034,7 +7034,7 @@ (define_expand "orc3"
(ior:BOOL_128
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"))
 (match_operand:BOOL_128 1 "vlogical_operand")))]
-  "mode == TImode || mode == PTImode || TARGET_P8_VECTOR"
+  "mode != TImode && (mode == PTImode || TARGET_P8_VECTOR)"
   "")

 ;; 128-bit logical operations insns and split operations
@@ -7291,7 +7291,7 @@ (define_insn_and_split "one_cmpl2"
   [(set (match_operand:BOOL_128 0 "vlogical_operand" "=")
(not:BOOL_128
  (match_operand:BOOL_128 1 "vlogical_operand" "")))]
-  ""
+  "mode != TImode"
 {
   if (TARGET_VSX && vsx_register_operand (operands[0], mode))
 return "xxlnor %x0,%x1,%x1";
@@ -7321,6 +7321,39 @@ (define_insn_and_split "one_cmpl2"
 (const_string "8")
 (const_string "16"])

+(define_insn_and_split "*one_cmplti2"
+  [(set (match_operand:TI 0 "vlogical_operand" "=,r,r,wa,v")
+   (not:TI
+ (match_operand:TI 1 "vlogical_operand" "r,0,0,wa,v")))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], TImode))
+return "xxlnor %x0,%x1,%x1";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], TImode))
+return "vnor %0,%1,%1";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], TImode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, NOT, false, false, false);
+  DONE;
+}
+  [(set (attr "type")
+  (if_then_else
+   (match_test "vsx_register_operand (operands[0], TImode)")
+   (const_string "veclogical")
+   (const_string "integer")))
+   (set (attr "length")
+  (if_then_else
+   

[PATCH v3, rs6000] Add V1TI into vector comparison expand [PR103316]

2022-03-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds V1TI mode into a new mode iterator used in vector
comparison expands.Without the patch, the comparisons between two vector
__int128 are converted to scalar comparisons with branches. The code is
suboptimal.The patch fixes the issue. Now all comparisons between two
vector __int128 generates P10 new comparison instructions. Also the
relative built-ins generate the same instructions after gimple folding.
So they're added back to the list.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-03-16 Haochen Gui 

gcc/
PR target/103316
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
* config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10
V1TI instructions.
(vec_cmp): Set mode iterator to VEC_IC.
(vec_cmpu): Likewise.

gcc/testsuite/
PR target/103316
* gcc.target/powerpc/pr103316.c: New.
* gcc.target/powerpc/fold-vec-cmp-int128.c: New cases for vector
__int128.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 5d34c1bcfc9..fac7f43f438 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1994,16 +1994,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_VCMPEQUH:
 case RS6000_BIF_VCMPEQUW:
 case RS6000_BIF_VCMPEQUD:
-/* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
-   folding produces worse code for 128-bit compares.  */
+case RS6000_BIF_VCMPEQUT:
   fold_compare_helper (gsi, EQ_EXPR, stmt);
   return true;

 case RS6000_BIF_VCMPNEB:
 case RS6000_BIF_VCMPNEH:
 case RS6000_BIF_VCMPNEW:
-/* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
-   folding produces worse code for 128-bit compares.  */
+case RS6000_BIF_VCMPNET:
   fold_compare_helper (gsi, NE_EXPR, stmt);
   return true;

@@ -2015,9 +2013,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_CMPGE_U4SI:
 case RS6000_BIF_CMPGE_2DI:
 case RS6000_BIF_CMPGE_U2DI:
-/* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_CMPGE_1TI:
+case RS6000_BIF_CMPGE_U1TI:
   fold_compare_helper (gsi, GE_EXPR, stmt);
   return true;

@@ -2029,9 +2026,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_VCMPGTUW:
 case RS6000_BIF_VCMPGTUD:
 case RS6000_BIF_VCMPGTSD:
-/* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_VCMPGTUT:
+case RS6000_BIF_VCMPGTST:
   fold_compare_helper (gsi, GT_EXPR, stmt);
   return true;

@@ -2043,9 +2039,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_CMPLE_U4SI:
 case RS6000_BIF_CMPLE_2DI:
 case RS6000_BIF_CMPLE_U2DI:
-/* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_CMPLE_1TI:
+case RS6000_BIF_CMPLE_U1TI:
   fold_compare_helper (gsi, LE_EXPR, stmt);
   return true;

diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index b87a742cca8..d88869cc8d0 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
 ;; Vector int modes
 (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])

+;; Vector int modes for comparison
+(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])
+
 ;; 128-bit int modes
 (define_mode_iterator VEC_TI [V1TI TI])

@@ -533,10 +536,10 @@ (define_expand "vcond_mask_"

 ;; For signed integer vectors comparison.
 (define_expand "vec_cmp"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
(match_operator 1 "signed_or_equality_comparison_operator"
- [(match_operand:VEC_I 2 "vint_operand")
-  (match_operand:VEC_I 3 "vint_operand")]))]
+ [(match_operand:VEC_IC 2 "vint_operand")
+  (match_operand:VEC_IC 3 "vint_operand")]))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
 {
   enum rtx_code code = GET_CODE (operands[1]);
@@ -573,10 +576,10 @@ (define_expand "vec_cmp"

 ;; For unsigned integer vectors comparison.
 (define_expand "vec_cmpu"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
(match_operator 1 

[PATCH, rs6000] Remove TImode from mode iterator BOOL_128 [PR100694]

2022-02-08 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch removes TImode from mode iterator BOOL_128. Thus, bool operations 
(AND, IOR, XOR, NOT)
on TImode will be split to the relevant operations on word mode during expand 
(in optabs.c). Potential
optimizations can be implemented after the split. The former practice splits it 
after the reload
pass which is too later for some optimizations. The new test case illustrates 
it.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is 
this okay for trunk?
Any recommendations? Thanks a lot.

ChangeLog
2022-02-08 Haochen Gui 

gcc/
PR target/100694
* config/rs6000/rs6000.md (BOOL_128): Remove TI.

gcc/testsuite/
PR target/100694
* gcc.target/powerpc/pr100694.c: New.
* gcc.target/powerpc/pr92398.c: New.
* gcc.target/powerpc/pr92398.h: Remove.
* gcc.target/powerpc/pr92398.p9-.c: Remove.
* gcc.target/powerpc/pr92398.p9+.c: Remove.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6f74075f58d..2bc1b8f497a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -750,8 +750,7 @@ (define_mode_attr SI_CONVERT_FP [(SF "TARGET_FCFIDS")
 (DF "TARGET_FCFID")])

 ;; Mode iterator for logical operations on 128-bit types
-(define_mode_iterator BOOL_128 [TI
-PTI
+(define_mode_iterator BOOL_128 [PTI
 (V16QI "TARGET_ALTIVEC")
 (V8HI  "TARGET_ALTIVEC")
 (V4SI  "TARGET_ALTIVEC")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c 
b/gcc/testsuite/gcc.target/powerpc/pr100694.c
new file mode 100644
index 000..7b41d920140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mli\M} } } */
+/* { dg-final { scan-assembler-not {\mor\M} } } */
+
+/* It just needs two std.  */
+void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo)
+{
+   unsigned __int128 i = hi;
+   i <<= 64;
+   i |= lo;
+   *res = i;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.c
new file mode 100644
index 000..7d6201cc5bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */
+
+/* All platforms should generate the same instructions: not;not;std;std.  */
+void bar (__int128_t *dst, __int128_t src)
+{
+  *dst =  ~src;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h 
b/gcc/testsuite/gcc.target/powerpc/pr92398.h
deleted file mode 100644
index 5a4a8bcab80..000
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* This test code is included into pr92398.p9-.c and pr92398.p9+.c.
-   The two files have the tests for the number of instructions generated for
-   P9- versus P9+.
-
-   store generates difference instructions as below:
-   P9+: mtvsrdd;xxlnot;stxv.
-   P8/P7/P6 LE: not;not;std;std.
-   P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x.
-   P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x.
-   P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected.  */
-
-void
-bar (__int128_t *dst, __int128_t src)
-{
-  *dst =  ~src;
-}
-
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
deleted file mode 100644
index 72dd1d9a274..000
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */
-/* { dg-final { scan-assembler-not {\mld\M} } } */
-/* { dg-final { scan-assembler-not {\mnot\M} } } */
-
-/* Source code for the test in pr92398.h */
-#include "pr92398.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
deleted file mode 100644
index bd7fa98af51..000
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
+++ /dev/null
@@ -1,10 +0,0 @@
-/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */
-/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! 

[PATCH, rs6000] Enable absolute jump table for PPC Linux

2022-01-17 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch enables absolute jump table on PPC Linux. When PIC is set, the 
absolute jump tables are
placed in RELRO section. Otherwise, they're placed in rodata section.

   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is 
this okay for trunk?
Any recommendations? Thanks a lot.

ChangeLog
2022-01-18 Haochen Gui 

gcc/
* config/rs6000/linux64.h (JUMP_TABLES_IN_TEXT_SECTION): Define.
(rs6000_linux64_override_options): Define rs6000_relative_jumptables.


patch.diff
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index d617f346f81..2e257c60f8c 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -239,7 +239,7 @@ extern int dot_symbols;

 /* Indicate that jump tables go in the text section.  */
 #undef  JUMP_TABLES_IN_TEXT_SECTION
-#define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT
+#define JUMP_TABLES_IN_TEXT_SECTION 0

 /* The linux ppc64 ABI isn't explicit on whether aggregates smaller
than a doubleword should be padded upward or downward.  You could
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 319182e94d9..7e196e8a43b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3576,6 +3576,8 @@ rs6000_linux64_override_options ()
  error (INVALID_32BIT, "cmodel");
}
 }
+
+  rs6000_relative_jumptables = 0;
 }
 #endif


[PATCH, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-01-18 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds a combine pattern for "CA minus one". As CA only has two
values (0 or 1), we could convert following pattern
  (sign_extend:DI (plus:SI (reg:SI 98 ca)
(const_int -1 [0x]
to
   (plus:DI (reg:DI 98 ca)
(const_int -1 [0x])))
With this patch, it eliminates one unnecessary sign extend. Also in rs6000,
regclass of CA register is set to NO_REGS. So CA is not in hard register set
and it can't match register_operand. The patch changes it to any_operand.

Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-01-19 Haochen Gui 

gcc/
* config/rs6000/predicates.md (ca_operand): Match any_operand as CA
register is not in hard register set.
* config/rs6000/rs6000.md (extenddi_ca_minus_one): Define.

gcc/testsuite/
* gcc.target/powerpc/pr95737.c: New.


patch.diff
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index c65dfb91f3d..cd2ae1dc8e0 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -188,7 +188,7 @@ (define_predicate "vlogical_operand"

 ;; Return 1 if op is the carry register.
 (define_predicate "ca_operand"
-  (match_operand 0 "register_operand")
+  (match_operand 0 "any_operand")
 {
   if (SUBREG_P (op))
 op = SUBREG_REG (op);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6ecb0bd6142..f1b09aad3b5 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2358,6 +2358,21 @@ (define_insn "subf3_carry_in_xx"
   "subfe %0,%0,%0"
   [(set_attr "type" "add")])

+(define_insn_and_split "*extenddi_ca_minus_one"
+  [(set (match_operand:DI 0 "gpc_reg_operand")
+   (sign_extend:DI (plus:SI (match_operand:SI 1 "ca_operand")
+(const_int -1]
+  ""
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+  (plus:DI (match_dup 2)
+   (const_int -1)))
+ (clobber (match_dup 2))])]
+{
+  operands[2] = copy_rtx (operands[1]);
+  PUT_MODE (operands[2], DImode);
+})

 (define_insn "@neg2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c 
b/gcc/testsuite/gcc.target/powerpc/pr95737.c
new file mode 100644
index 000..94320f23423
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
@@ -0,0 +1,10 @@
+/* PR target/95737 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-final { scan-assembler-not {\mextsw\M} } } */
+
+
+unsigned long long negativeLessThan (unsigned long long a, unsigned long long 
b)
+{
+   return -(a < b);
+}


[PATCH v3, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-01-21 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds a combine pattern for "CA minus one". As CA only has two
values (0 or 1), we could convert following pattern
  (sign_extend:DI (plus:SI (reg:SI 98 ca)
(const_int -1 [0x]
to
   (plus:DI (reg:DI 98 ca)
(const_int -1 [0x])))
   With this patch, one unnecessary sign extend is eliminated.

   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-01-20 Haochen Gui 

gcc/
* config/rs6000/rs6000.md (extenddi_ca_minus_one): Define.

gcc/testsuite/
* gcc.target/powerpc/pr95737.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6ecb0bd6142..1d8b212962f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2358,6 +2358,19 @@ (define_insn "subf3_carry_in_xx"
   "subfe %0,%0,%0"
   [(set_attr "type" "add")])

+(define_insn_and_split "*extenddi_ca_minus_one"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (sign_extend:DI (plus:SI (reg:SI CA_REGNO)
+(const_int -1]
+  ""
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+  (plus:DI (reg:DI CA_REGNO)
+   (const_int -1)))
+ (clobber (reg:DI CA_REGNO))])]
+  ""
+)

 (define_insn "@neg2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c 
b/gcc/testsuite/gcc.target/powerpc/pr95737.c
new file mode 100644
index 000..d4d6a4198cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
@@ -0,0 +1,10 @@
+/* PR target/95737 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mno-isel" } */
+/* { dg-final { scan-assembler-not {\mextsw\M} } } */
+
+
+unsigned long negativeLessThan (unsigned long a, unsigned long b)
+{
+   return -(a < b);
+}


Re: [PATCH v2, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-01-20 Thread HAO CHEN GUI via Gcc-patches
Thanks so much for your advice. Please see my comments.

On 21/1/2022 上午 5:42, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Jan 20, 2022 at 01:46:48PM -0500, David Edelsohn wrote:
>> On Thu, Jan 20, 2022 at 2:36 AM HAO CHEN GUI  wrote:
>>>This patch adds a combine pattern for "CA minus one". As CA only has two
>>> values (0 or 1), we could convert following pattern
>>>   (sign_extend:DI (plus:SI (reg:SI 98 ca)
>>> (const_int -1 [0x]
>>> to
>>>(plus:DI (reg:DI 98 ca)
>>> (const_int -1 [0x])))
>>>With this patch, one unnecessary sign extend is eliminated.
>>>
>>>Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>>> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> There are ten gazillion similar things we could make extra backend
> patterns for, and we still would not cover a majority of cases.
> 
> If instead we got some generic way to handle this we could cover many
> more cases, for much less effort.
Could we add an additional pass to exam the finally generated instructions
and its used registers to decide which extension is unnecessary?
> 
> We need both widening modes from SI to DI, amd narrowing modes from DI
> to SI.  Both are useful in certain cases; it is not like using wider
> modes is always better, in some cases narrower modes is better (in cases
> where we can let the generated code then generate whatever bits in the
> high half of the word, for example; a typical example is addition in an
> unsigned int).
Just for this case, converting CA from DI to SI is supported in simplify_rtx.
The original comparison result is in DI mode. But it's truncated to SI mode as
C standard requires.

Trying 8 -> 11:
8: {r127:DI=ca:DI-0x1;clobber ca:DI;}
  REG_DEAD ca:DI
  REG_UNUSED ca:DI
   11: r128:SI=r127:DI#0
  REG_DEAD r127:DI
Successfully matched this instruction:
(set (reg:SI 128)
(plus:SI (reg:SI 98 ca)
(const_int -1 [0x])))
allowing combination of insns 8 and 11
original costs 4 + 4 = 8
replacement cost 4
deferring deletion of insn with uid = 8.
modifying insn i311: {r128:SI=ca:SI-0x1;clobber ca:SI;}
  REG_UNUSED ca:SI
deferring rescan insn with uid = 11.

The C standard type promotion requirement and 64-bit return value are the
root cause of such problem, I think.
> 
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
>>> @@ -0,0 +1,10 @@
>>> +/* PR target/95737 */
>>> +/* { dg-do compile { target lp64 } } */
>>> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
>>
>> Why does the testcase force power8? This testcase is not specific to
>> Power8 or later.
> 
> Yes, and we should generate the same code on older machines.
> 
>>> +/* { dg-final { scan-assembler-not {\mextsw\M} } } */
>>> +
>>> +
>>> +unsigned long long negativeLessThan (unsigned long long a, unsigned long 
>>> long b)
>>> +{
>>> +   return -(a < b);
>>> +}
>>
>> If you're only testing for lp64, the testcase could use "long" instead
>> of "long long".
> 
> The testcase really needs "powerpc64", if that would mean "test if
> -mpowerpc64 is (implicitly) used".  But that is not what it currently
> means (it is something akin to "powerpc64_hw", instead).
> 
> So we test lp64, which is set if and only if -m64 was used.  It is
> reasonable coverage, no one cares much for -m32 -mpowerpc64 .
> 
> 
> Segher


[PATCH v2, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-01-19 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds a combine pattern for "CA minus one". As CA only has two
values (0 or 1), we could convert following pattern
  (sign_extend:DI (plus:SI (reg:SI 98 ca)
(const_int -1 [0x]
to
   (plus:DI (reg:DI 98 ca)
(const_int -1 [0x])))
   With this patch, one unnecessary sign extend is eliminated.

   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-01-20 Haochen Gui 

gcc/
* config/rs6000/rs6000.md (extenddi_ca_minus_one): Define.

gcc/testsuite/
* gcc.target/powerpc/pr95737.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6ecb0bd6142..1d8b212962f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2358,6 +2358,19 @@ (define_insn "subf3_carry_in_xx"
   "subfe %0,%0,%0"
   [(set_attr "type" "add")])

+(define_insn_and_split "*extenddi_ca_minus_one"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (sign_extend:DI (plus:SI (reg:SI CA_REGNO)
+(const_int -1]
+  ""
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+  (plus:DI (reg:DI CA_REGNO)
+   (const_int -1)))
+ (clobber (reg:DI CA_REGNO))])]
+  ""
+)

 (define_insn "@neg2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c 
b/gcc/testsuite/gcc.target/powerpc/pr95737.c
new file mode 100644
index 000..94320f23423
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
@@ -0,0 +1,10 @@
+/* PR target/95737 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-final { scan-assembler-not {\mextsw\M} } } */
+
+
+unsigned long long negativeLessThan (unsigned long long a, unsigned long long 
b)
+{
+   return -(a < b);
+}



Re: [PATCH, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-01-19 Thread HAO CHEN GUI via Gcc-patches



On 19/1/2022 下午 3:52, Andrew Pinski wrote:
> On Tue, Jan 18, 2022 at 11:13 PM HAO CHEN GUI via Gcc-patches
>  wrote:
>>
>> Hi,
>>This patch adds a combine pattern for "CA minus one". As CA only has two
>> values (0 or 1), we could convert following pattern
>>   (sign_extend:DI (plus:SI (reg:SI 98 ca)
>> (const_int -1 [0x]
>> to
>>(plus:DI (reg:DI 98 ca)
>> (const_int -1 [0x])))
>> With this patch, it eliminates one unnecessary sign extend. Also in 
>> rs6000,
>> regclass of CA register is set to NO_REGS. So CA is not in hard register set
>> and it can't match register_operand. The patch changes it to any_operand.
>>
>> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-01-19 Haochen Gui 
>>
>> gcc/
>> * config/rs6000/predicates.md (ca_operand): Match any_operand as CA
>> register is not in hard register set.
>> * config/rs6000/rs6000.md (extenddi_ca_minus_one): Define.
>>
>> gcc/testsuite/
>> * gcc.target/powerpc/pr95737.c: New.
>>
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/predicates.md 
>> b/gcc/config/rs6000/predicates.md
>> index c65dfb91f3d..cd2ae1dc8e0 100644
>> --- a/gcc/config/rs6000/predicates.md
>> +++ b/gcc/config/rs6000/predicates.md
>> @@ -188,7 +188,7 @@ (define_predicate "vlogical_operand"
>>
>>  ;; Return 1 if op is the carry register.
>>  (define_predicate "ca_operand"
>> -  (match_operand 0 "register_operand")
>> +  (match_operand 0 "any_operand")
>>  {
>>if (SUBREG_P (op))
>>  op = SUBREG_REG (op);
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index 6ecb0bd6142..f1b09aad3b5 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -2358,6 +2358,21 @@ (define_insn "subf3_carry_in_xx"
>>"subfe %0,%0,%0"
>>[(set_attr "type" "add")])
>>
>> +(define_insn_and_split "*extenddi_ca_minus_one"
>> +  [(set (match_operand:DI 0 "gpc_reg_operand")
>> +   (sign_extend:DI (plus:SI (match_operand:SI 1 "ca_operand")
>> +(const_int -1]
>> +  ""
>> +  "#"
>> +  ""
>> +  [(parallel [(set (match_dup 0)
>> +  (plus:DI (match_dup 2)
>> +   (const_int -1)))
>> + (clobber (match_dup 2))])]
>> +{
>> +  operands[2] = copy_rtx (operands[1]);
>> +  PUT_MODE (operands[2], DImode);
>> +})
> 
> There are a few things missing I think for this to be correct.
> I think it should be:
> (define_insn_and_split "*extenddi_ca_minus_one"
>   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>(sign_extend:DI (plus:SI (reg:SI CA_REGNO)
>  (const_int -1]
>   ""
>   "#"
>   "&& reload_completed"
>   [(parallel [(set (match_dup 0)
>   (plus:DI (reg:DI CA_REGNO)
>(const_int -1)))
>  (clobber (reg:DI CA_REGNO))])]
> {})
> 
> There is no reason to change ca_operand either since
> subf3_carry_in_xx already hard codes the CA_REGNO too; you can
Yes, we can directly use CA_REGNO. It makes the pattern compact.
But why it needs reload_completed? Could you explain it?

Thanks.
Gui Haochen

> just use it directly like above.
> 
> Sorry for the incorrect whitespace formatting though.
> 
> Thanks,
> Andrew Pinski
> 
>>
>>  (define_insn "@neg2"
>>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr95737.c
>> new file mode 100644
>> index 000..94320f23423
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
>> @@ -0,0 +1,10 @@
>> +/* PR target/95737 */
>> +/* { dg-do compile { target lp64 } } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
>> +/* { dg-final { scan-assembler-not {\mextsw\M} } } */
>> +
>> +
>> +unsigned long long negativeLessThan (unsigned long long a, unsigned long 
>> long b)
>> +{
>> +   return -(a < b);
>> +}


Re: [PATCH, rs6000] Correct match pattern in pr56605.c

2022-04-10 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 9/4/2022 上午 12:48, will schmidt wrote:
> On Mon, 2022-02-28 at 11:17 +0800, HAO CHEN GUI via Gcc-patches wrote:
>> Hi,
>>   This patch corrects the match pattern in pr56605.c. The former pattern
>> is wrong and test case fails with GCC11. It should match following insn on
>> each subtarget after mode promotion is disabled. The patch need to be
>> backported to GCC11.
>>
> 
> Hi,
> 
> I note This patch appears to (partially?) address the P1 [11 regression] pr.  
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102146

There are two issues left in this PR. One is pr56605.c. My patch fixes it.
Another is prefix-no-update.c. The patch Segher proposed in 103197 could fix it.
Thanks.
> 
> 
> The issue makes reference to a different proposed patch 
> in issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103197
> titled  ppc inline expansion of memcpy/memmove should not use lxsibzx/stxsibx 
> for a single byte
> proposed patch named
> rs6000: Disparage lfiwzx and similar
> 
> I can't address any of the background or history there.  :-)
> 
> 
>> //gimple
>> _17 = (unsigned int) _20;
>>  prolog_loop_niters.4_23 = _17 & 3;
>>
>> //rtl
>> (insn 19 18 20 2 (parallel [
>> (set (reg:CC 208)
>> (compare:CC (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3]))
>> (const_int 0 [0])))
>> (set (reg:SI 129 [ prolog_loop_niters.5 ])
>> (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3])))
>> ]) 197 {*andsi3_imm_mask_dot2}
>>
>>
>>   Bootstrapped and tested on powerpc64-linux BE/LE and AIX with no 
>> regressions.
>> Is this okay for trunk and GCC11? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-02-28 Haochen Gui 
>>
>> gcc/testsuite/
>>  PR target/102146
>>  * gcc.target/powerpc/pr56605.c: Correct match pattern in combine pass.
>>
>>
>> patch.diff
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr56605.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> index fdedbfc573d..231d808aa99 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> @@ -11,5 +11,5 @@ void foo (short* __restrict sb, int* __restrict ia)
>>  ia[i] = (int) sb[i];
>>  }
>>
>> -/* { dg-final { scan-rtl-dump-times {\(compare:CC 
>> \((?:and|zero_extend):(?:DI) \((?:sub)?reg:[SD]I} 1 "combine" } } */
>> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>> \(reg:DI} 1 "combine" } } */
> 
> 
> SO with the update, (i squint so this is an approximate handwave) this
> drops the zero_extend and changes the destination type to be DI for the
> scan-rtl.This appears to match the rtl as mentioned in the patch
> comments.
> 
> 
>>
> 


Re: [PATCH, rs6000] Correct match pattern in pr56605.c

2022-04-10 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 9/4/2022 上午 3:36, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Feb 28, 2022 at 11:17:27AM +0800, HAO CHEN GUI wrote:
>>   This patch corrects the match pattern in pr56605.c. The former pattern
>> is wrong and test case fails with GCC11. It should match following insn on
>> each subtarget after mode promotion is disabled. The patch need to be
>> backported to GCC11.
>>
>> //gimple
>> _17 = (unsigned int) _20;
>>  prolog_loop_niters.4_23 = _17 & 3;
>>
>> //rtl
>> (insn 19 18 20 2 (parallel [
>> (set (reg:CC 208)
>> (compare:CC (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3]))
>> (const_int 0 [0])))
>> (set (reg:SI 129 [ prolog_loop_niters.5 ])
>> (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3])))
>> ]) 197 {*andsi3_imm_mask_dot2}
>>
>>
>>   Bootstrapped and tested on powerpc64-linux BE/LE and AIX with no 
>> regressions.
>> Is this okay for trunk and GCC11? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-02-28 Haochen Gui 
>>
>> gcc/testsuite/
>>  PR target/102146
>>  * gcc.target/powerpc/pr56605.c: Correct match pattern in combine pass.
>>
>>
>> patch.diff
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr56605.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> index fdedbfc573d..231d808aa99 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> @@ -11,5 +11,5 @@ void foo (short* __restrict sb, int* __restrict ia)
>>  ia[i] = (int) sb[i];
>>  }
>>
>> -/* { dg-final { scan-rtl-dump-times {\(compare:CC 
>> \((?:and|zero_extend):(?:DI) \((?:sub)?reg:[SD]I} 1 "combine" } } */
>> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>> \(reg:DI} 1 "combine" } } */
> 
> The old pattern uses non-capturing braces here, which are required for
> ...-times to work correctly.  The zero_extend alternative is required as
> well, as is making the subreg optional (we have an actual reg in one of
> the cases currently).  What do you consider wrong about the old pattern,
> what in the generated code is different from what you expect?
> 
> It works correctly on p7 etc. btw; where do you see it fail?  p10?
> 
> 
I saw it failed with GCC11.

FAIL: gcc.target/powerpc/pr56605.c scan-rtl-dump-times combine "\\(compare:CC 
\\((?:and|zero_extend):(?:DI) \\((?:sub)?reg:[SD]I" 1

On ppc64le with GCC11, it should match following insn.

(compare:CC (and:SI (subreg:SI (reg:DI 208) 0)

With GCC12, it should match following insn.

(compare:CC (and:SI (subreg:SI (reg:DI 207) 0)

With GCC12 the pattern actually matches:

(compare:CC (and:DI (subreg:DI (reg:SI 136 [ niters.6 ]) 0)

So GCC12 doesn't fail the case. But it actually match wrong insn.
There is no such insn in GCC11 combine dump. So GCC11 hits the problem.

Thanks.

> Segher


Ping^2 [PATCH, rs6000] Correct match pattern in pr56605.c

2022-04-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gentle ping this:
   https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html
Thanks

On 15/3/2022 上午 10:06, HAO CHEN GUI wrote:
> Hi,
>   Gentle ping this:
>   https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html
> Thanks
> 
> On 28/2/2022 上午 11:17, HAO CHEN GUI wrote:
>> Hi,
>>   This patch corrects the match pattern in pr56605.c. The former pattern
>> is wrong and test case fails with GCC11. It should match following insn on
>> each subtarget after mode promotion is disabled. The patch need to be
>> backported to GCC11.
>>
>> //gimple
>> _17 = (unsigned int) _20;
>>  prolog_loop_niters.4_23 = _17 & 3;
>>
>> //rtl
>> (insn 19 18 20 2 (parallel [
>> (set (reg:CC 208)
>> (compare:CC (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3]))
>> (const_int 0 [0])))
>> (set (reg:SI 129 [ prolog_loop_niters.5 ])
>> (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3])))
>> ]) 197 {*andsi3_imm_mask_dot2}
>>
>>
>>   Bootstrapped and tested on powerpc64-linux BE/LE and AIX with no 
>> regressions.
>> Is this okay for trunk and GCC11? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-02-28 Haochen Gui 
>>
>> gcc/testsuite/
>>  PR target/102146
>>  * gcc.target/powerpc/pr56605.c: Correct match pattern in combine pass.
>>
>>
>> patch.diff
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr56605.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> index fdedbfc573d..231d808aa99 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> @@ -11,5 +11,5 @@ void foo (short* __restrict sb, int* __restrict ia)
>>  ia[i] = (int) sb[i];
>>  }
>>
>> -/* { dg-final { scan-rtl-dump-times {\(compare:CC 
>> \((?:and|zero_extend):(?:DI) \((?:sub)?reg:[SD]I} 1 "combine" } } */
>> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>> \(reg:DI} 1 "combine" } } */
>>


[PATCH-2v2, rs6000] Implement 32bit inline lrint [PR88558]

2023-09-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements 32bit inline lrint by "fctiw". It depends on
the patch1 to do SImode move from FP registers on P7.

  Compared to last version, the main change is to add tests for "lrintf"
and adjust the count of corresponding instructions.
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628436.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: support 32bit inline lrint

gcc/
PR target/88558
* config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND
from insn condition.
(lrintsi2): New insn pattern for 32bit lrint.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr88558.h: New.
* gcc.target/powerpc/pr88558-p7.c: New.
* gcc.target/powerpc/pr88558-p8.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index edf49bd74e3..a41898e0e08 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6655,10 +6655,18 @@ (define_insn "lrintdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT"
   "fctid %0,%1"
   [(set_attr "type" "fp")])

+(define_insn "lrintsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=d")
+   (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+  UNSPEC_FCTIW))]
+  "TARGET_HARD_FLOAT && TARGET_POPCNTD"
+  "fctiw %0,%1"
+  [(set_attr "type" "fp")])
+
 (define_insn "btrunc2"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
new file mode 100644
index 000..f302491c4d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power7" } */
+
+/* -fno-math-errno is required to make {i,l,ll}rint inlined */
+
+#include "pr88558.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 3 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 3 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstfiwx\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mstfiwx\M} 3 { target ilp32 } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c
new file mode 100644
index 000..33398aa74c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power8" } */
+
+/* -fno-math-errno is required to make {i,l,ll}rint inlined */
+
+#include "pr88558.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 3 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 3 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 3 { target ilp32 } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558.h 
b/gcc/testsuite/gcc.target/powerpc/pr88558.h
new file mode 100644
index 000..698640c0ef7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558.h
@@ -0,0 +1,19 @@
+long int test1 (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long test2 (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int test3 (double a)
+{
+  return __builtin_irint (a);
+}
+
+long int test4 (float a)
+{
+  return __builtin_lrintf (a);
+}


[PATCH-1v2, rs6000] Enable SImode in FP registers on P7 [PR88558]

2023-09-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch enables SImode in FP registers on P7. Instruction "fctiw"
stores its integer output in an FP register. So SImode in FP register
needs be enabled on P7 if we want support "fctiw" on P7.

  The test case is in the second patch which implements 32bit inline
lrint.

  Compared to the last version, the main change it to remove disparaging
on the alternatives of "fmr". Test shows it doesn't cause regression.
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628435.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.


ChangeLog
rs6000: enable SImode in FP register on P7

gcc/
PR target/88558
* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Enable SImode in FP registers on P7.
* config/rs6000/rs6000.md (*movsi_internal1): Add fmr for SImode
move between FP registers.  Set attribute isa of stfiwx to "*"
and attribute of stxsiwx to "p7".

patch.diff
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..99085c2cdd7 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1903,7 +1903,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;

- if (TARGET_P8_VECTOR && (mode == SImode))
+ if (TARGET_POPCNTD && mode == SImode)
return 1;

  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index cdab49fbb91..edf49bd74e3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7566,7 +7566,7 @@ (define_split

 (define_insn "*movsi_internal1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
- "=r, r,
+ "=r, r,  d,
   r,  d,  v,
   m,  ?Z, ?Z,
   r,  r,  r,  r,
@@ -7575,7 +7575,7 @@ (define_insn "*movsi_internal1"
   wa, r,
   r,  *h, *h")
(match_operand:SI 1 "input_operand"
- "r,  U,
+ "r,  U,  d,
   m,  ?Z, ?Z,
   r,  d,  v,
   I,  L,  eI, n,
@@ -7588,6 +7588,7 @@ (define_insn "*movsi_internal1"
   "@
mr %0,%1
la %0,%a1
+   fmr %0,%1
lwz%U1%X1 %0,%1
lfiwzx %0,%y1
lxsiwzx %x0,%y1
@@ -7611,7 +7612,7 @@ (define_insn "*movsi_internal1"
mt%0 %1
nop"
   [(set_attr "type"
- "*,  *,
+ "*,  *,  fpsimple,
   load,   fpload, fpload,
   store,  fpstore,fpstore,
   *,  *,  *,  *,
@@ -7620,7 +7621,7 @@ (define_insn "*movsi_internal1"
   mtvsr,  mfvsr,
   *,  *,  *")
(set_attr "length"
- "*,  *,
+ "*,  *,  *,
   *,  *,  *,
   *,  *,  *,
   *,  *,  *,  8,
@@ -7629,9 +7630,9 @@ (define_insn "*movsi_internal1"
   *,  *,
   *,  *,  *")
(set_attr "isa"
- "*,  *,
-  *,  p8v,p8v,
-  *,  p8v,p8v,
+ "*,  *,  *,
+  *,  p7, p8v,
+  *,  *,  p8v,
   *,  *,  p10,*,
   p8v,p9v,p9v,p8v,
   p9v,p8v,p9v,



Re: [PATCH-1v2, rs6000] Enable SImode in FP registers on P7 [PR88558]

2023-09-14 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

在 2023/9/12 17:33, Kewen.Lin 写道:
> Ok, at least regression testing doesn't expose any needs to do disparaging
> for this.  Could you also test this patch with SPEC2017 for P7 and P8
> separately at options like -O2 or -O3, to see if there is any assembly
> change, and if yes filtering out some typical to check it's expected or
> not?  I think it can help us to better evaluate the impact.  Thanks!

Just compared the object files of SPEC2017 for P7 and P8. There is no
difference between P7s'. For P8, some different object files are found.
All differences are the same. Patched object files replace xxlor with fmr.
It's expected as the fmr is added to ahead of xxlor in "*movsi_internal1".

Thanks
Gui Haochen


Re: [PATCH, rs6000] Call vector load/store with length expand only on 64-bit Power10 [PR96762]

2023-08-30 Thread HAO CHEN GUI via Gcc-patches
Kewen,
  I refined the patch according to your comments and it passed bootstrap
and regression test.

  I committed it as
https://gcc.gnu.org/g:946b8967b905257ac9f140225db744c9a6ab91be

Thanks
Gui Haochen

在 2023/8/29 16:55, Kewen.Lin 写道:
> Hi Haochen,
> 
> on 2023/8/29 10:50, HAO CHEN GUI wrote:
>> Hi,
>>   This patch adds "TARGET_64BIT" check when calling vector load/store
>> with length expand in expand_block_move. It matches the expand condition
>> of "lxvl" and "stxvl" defined in vsx.md.
>>
>>   This patch fixes the ICE occurred with the test case on 32-bit Power10.
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>>
>> Thanks
>> Gui Haochen
>>
>>
>> ChangeLog
>> rs6000: call vector load/store with length expand only on 64-bit Power10
>>
>> gcc/
>>  PR target/96762
>>  * config/rs6000/rs6000-string.cc (expand_block_move): Call vector
>>  load/store with length expand only on 64-bit Power10.
>>
>> gcc/testsuite/
>>  PR target/96762
>>  * gcc.target/powerpc/pr96762.c: New.
>>
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/rs6000-string.cc 
>> b/gcc/config/rs6000/rs6000-string.cc
>> index cd8ee8c..d1b48c2 100644
>> --- a/gcc/config/rs6000/rs6000-string.cc
>> +++ b/gcc/config/rs6000/rs6000-string.cc
>> @@ -2811,8 +2811,9 @@ expand_block_move (rtx operands[], bool might_overlap)
>>gen_func.mov = gen_vsx_movv2di_64bit;
>>  }
>>else if (TARGET_BLOCK_OPS_UNALIGNED_VSX
>> -   && TARGET_POWER10 && bytes < 16
>> -   && orig_bytes > 16
>> +   /* Only use lxvl/stxvl on 64bit POWER10.  */
>> +   && TARGET_POWER10 && TARGET_64BIT
>> +   && bytes < 16 && orig_bytes > 16
>> && !(bytes == 1 || bytes == 2
>>  || bytes == 4 || bytes == 8)
>> && (align >= 128 || !STRICT_ALIGNMENT))
> 
> Nit: Since you touched this part of code, could you format it better as well, 
> like:
> 
>   else if (TARGET_BLOCK_OPS_UNALIGNED_VSX
>  /* Only use lxvl/stxvl on 64bit POWER10.  */
>  && TARGET_POWER10
>  && TARGET_64BIT
>  && bytes < 16
>  && orig_bytes > 16
>  && !(bytes == 1
>   || bytes == 2
>   || bytes == 4
>   || bytes == 8)
>  && (align >= 128
>  || !STRICT_ALIGNMENT))
> 
> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr96762.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr96762.c
>> new file mode 100644
>> index 000..1145dd1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr96762.c
>> @@ -0,0 +1,11 @@
>> +/* { dg-do compile { target ilp32 } } */
> 
> Nit: we can compile this on lp64, so you can remove the ilp32 restriction,
> ...
> 
>> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
>> +
> 
> ... but add one comment line to note the initial purpose, like:
> 
> /* Verify there is no ICE on ilp32 env.  */
> 
> or similar.
> 
> Okay for trunk with these nits fixed, thanks!
> 
> BR,
> Kewen
> 
>> +extern void foo (char *);
>> +
>> +void
>> +bar (void)
>> +{
>> +  char zj[] = "";
>> +  foo (zj);
>> +}


Re: [PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]

2023-08-20 Thread HAO CHEN GUI via Gcc-patches
Jeff,
  Thanks a lot for your comments.

  The widen shift mode is on i1/i2 before they're combined with i3 to newpat.
The newpat matches rotate/mask pattern. The i1/i2 itself don't match
rotate/mask pattern.

  I did an experiment to disable widen shift mode for
lshiftrt. I tested it on powerpc/x86/aarch64. There is no regression occurred.
I thought that the widen shift mode is helpful for newpat matching. But it seems
not, at least no impact on powerpc/x86/aarch64.

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 4bf867d74b0..0b9b115f9bb 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -10479,11 +10479,6 @@ try_widen_shift_mode (enum rtx_code code, rtx op, int 
count,
   return orig_mode;

 case LSHIFTRT:
-  /* Similarly here but with zero bits.  */
-  if (HWI_COMPUTABLE_MODE_P (mode)
- && (nonzero_bits (op, mode) & ~GET_MODE_MASK (orig_mode)) == 0)
-   return mode;
-
   /* We can also widen if the bits brought in will be masked off.  This
 operation is performed in ORIG_MODE.  */
   if (outer_code == AND)

Segher,
  Could you inform me what's the purpose of widen shift mode in
simplify_shift_const? Does it definitely reduce the rtx cost or it helps match
patterns? Thanks a lot.

Thanks
Gui Haochen


在 2023/8/5 7:32, Jeff Law 写道:
> 
> 
> On 7/20/23 18:59, HAO CHEN GUI wrote:
>> Hi Jeff,
>>
>> 在 2023/7/21 5:27, Jeff Law 写道:
>>> Wouldn't it make more sense to just try rotate/mask in the original mode 
>>> before trying a shift in a widened mode?  I'm not sure why we need a target 
>>> hook here.
>>
>> There is no change to try rotate/mask with the original mode when
>> expensive_optimizations is set. The subst widens the shift mode.
> But we can add it before the attempt in the wider mode.
> 
>>
>>    if (flag_expensive_optimizations)
>>  {
>>    /* Pass pc_rtx so no substitutions are done, just
>>   simplifications.  */
>>    if (i1)
>>  {
>>    subst_low_luid = DF_INSN_LUID (i1);
>>    i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
>>  }
>>
>>    subst_low_luid = DF_INSN_LUID (i2);
>>    i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
>>  }
>>
>> I don't know if the wider mode is helpful to other targets, so
>> I added the target hook.
> In this scenario we're often better off relying on rtx_costs (even with all 
> its warts) rather than adding yet another target hook.
> 
> I'd love to hear from Segher here to see if he's got other ideas.
> 
> jeff


Re: [PATCHv4, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]

2023-08-16 Thread HAO CHEN GUI via Gcc-patches
Committed after fixing the comments.

https://gcc.gnu.org/g:a79cf858b39e01c80537bc5d47a5e9004418c267

Thanks
Gui Haochen

在 2023/8/14 15:47, Kewen.Lin 写道:
> Hi Haochen,
> 
> on 2023/8/14 10:18, HAO CHEN GUI wrote:
>> Hi,
>>   This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
>> for all sub targets when the mode is V4SI and the extracted element is word
>> 1 from BE order. Also this patch adds a insn pattern for mfvsrwz which
>> helps eliminate redundant zero extend.
>>
>>   Compared to last version, the main change is to put the word index
>> checking in the split condition of "*vsx_extract_v4si_w023". Also modified
>> some comments.
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625380.html
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>>
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> rs6000: Generate mfvsrwz for all platform and remove redundant zero extend
>>
>> mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
>> generated even with p9 vector enabled.  Also the instruction is already
>> zero extended.  A combine pattern is needed to eliminate redundant zero
>> extend instructions.
>>
>> gcc/
>>  PR target/106769
>>  * config/rs6000/vsx.md (expand vsx_extract_): Set it only
>>  for V8HI and V16QI.
>>  (vsx_extract_v4si): New expand for V4SI extraction.
>>  (vsx_extract_v4si_w1): New insn pattern for V4SI extraction on
>>  word 1 from BE order.   
>>  (*mfvsrwz): New insn pattern for mfvsrwz.
>>  (*vsx_extract__di_p9): Assert that it won't be generated on
>>  word 1 from BE order.
>>  (*vsx_extract_si): Remove.
>>  (*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2,
>>  3 from BE order.
>>
>> gcc/testsuite/
>>  PR target/106769
>>  * gcc.target/powerpc/pr106769.h: New.
>>  * gcc.target/powerpc/pr106769-p8.c: New.
>>  * gcc.target/powerpc/pr106769-p9.c: New.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index 0a34ceebeb5..1cbdc2f1c01 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1"
>>  (define_expand  "vsx_extract_"
>>[(parallel [(set (match_operand: 0 "gpc_reg_operand")
>> (vec_select:
>> -(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
>> +(match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
>>  (parallel [(match_operand:QI 2 "const_int_operand")])))
>> -  (clobber (match_scratch:VSX_EXTRACT_I 3))])]
>> +  (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
>>"VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
>>  {
>>/* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
>> @@ -3736,6 +3736,63 @@ (define_expand  "vsx_extract_"
>>  }
>>  })
>>
>> +(define_expand  "vsx_extract_v4si"
>> +  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
>> +   (vec_select:SI
>> +(match_operand:V4SI 1 "gpc_reg_operand")
>> +(parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
>> +  (clobber (match_scratch:V4SI 3))])]
>> +  "TARGET_DIRECT_MOVE_64BIT"
>> +{
>> +  /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx.  So just
>> + fall through to vsx_extract_v4si_w1.  */
>> +  if (TARGET_P9_VECTOR
>> +  && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
>> +{
>> +  emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
>> +  operands[2]));
>> +  DONE;
>> +}
>> +})
>> +
>> +/* Extract from word 1 (BE order);  */
> 
> Nit: I guessed I requested this before, please use ";" instead of
> "/* ... */" for the comments, to align with the existing ones.
> 
>> +(define_insn "vsx_extract_v4si_w1"
>> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
>> +(vec_select:SI
>> + (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
>> + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
>> +   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
>> +  "TAR

Re: [PATCH, rs6000] Skip redundant vector extract if the element is first element of dword0 [PR110429]

2023-08-16 Thread HAO CHEN GUI via Gcc-patches
Committed after tweaking and testing.
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d471bdb0453de7b738f49148b66d57cb5871937d

Thanks
Gui Haochen

在 2023/7/28 17:32, Kewen.Lin 写道:
> Hi Haochen,
> 
> on 2023/7/5 11:22, HAO CHEN GUI wrote:
>> Hi,
>>   This patch skips redundant vector extract insn to be generated when
>> the extracted element is the first element of dword0 and the destination
> 
> "The first element" is confusing, it's easy to be misunderstood as element
> 0, but in fact the extracted element index is: 
>   - for byte, 7 on BE while 8 on LE;
>   - for half word, 3 on BE while 4 on LE;
> 
> so maybe just say when the extracted index for byte and half word like above,
> the element to be stored is already in the corresponding place for stxsi[hb]x,
> we don't need a redundant vector extraction at all.
> 
>> is a memory operand. Only one 'stxsi[hb]x' instruction is enough.
>>
>>   The V4SImode is fixed in a previous patch.
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> rs6000: Skip redundant vector extract if the element is first element of
>> dword0
>>
>> gcc/
>>  PR target/110429
>>  * config/rs6000/vsx.md (*vsx_extract__store_p9): Skip vector
>>  extract when the element is the first element of dword0.
>>
>> gcc/testsuite/
>>  PR target/110429
>>  * gcc.target/powerpc/pr110429.c: New.
>>
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index 0c269e4e8d9..b3fec910eb6 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -3855,7 +3855,22 @@ (define_insn_and_split "*vsx_extract__store_p9"
>>  (parallel [(match_dup 2)])))
>>(clobber (match_dup 4))])
>> (set (match_dup 0)
>> -(match_dup 3))])
>> +(match_dup 3))]
>> +{
>> +  enum machine_mode dest_mode = GET_MODE (operands[0]);
> 
> Nit: Move this line ...
> 
>> +
>> +  if (which_alternative == 0
>> +  && ((mode == V16QImode
>> +   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8))
>> +  || (mode == V8HImode
>> +  && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4
>> +{
> 
> ... here.
> 
>> +  emit_move_insn (operands[0],
>> +  gen_rtx_REG (dest_mode, REGNO (operands[3])));
>> +  DONE;
>> +}
>> +})
>> +
>>
>>  (define_insn_and_split  "*vsx_extract_si"
>>[(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110429.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr110429.c
>> new file mode 100644
>> index 000..5a938f9f90a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr110429.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-do compile } */
>> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +/* { dg-require-effective-target has_arch_ppc64 } */
>> +
>> +#include 
>> +
>> +#ifdef __BIG_ENDIAN__
>> +#define DWORD0_FIRST_SHORT 3
>> +#define DWORD0_FIRST_CHAR 7
>> +#else
>> +#define DWORD0_FIRST_SHORT 4
>> +#define DWORD0_FIRST_CHAR 8
>> +#endif
>> +
>> +void vec_extract_short (vector short v, short* p)
>> +{
>> +   *p = vec_extract(v, DWORD0_FIRST_SHORT);
>> +}
>> +
>> +void vec_extract_char (vector char v, char* p)
>> +{
>> +   *p = vec_extract(v, DWORD0_FIRST_CHAR);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "stxsi\[hb\]x" 2 } } */
> 
> Nit: Break this check into stxsihx and stxsibx, and surround
> with \m and \M.
> 
>> +/* { dg-final { scan-assembler-not "vextractu\[hb\]" } } */
> 
> Also with \m and \M.
> 
> OK for trunk with these nits tweaked and testing goes well,
> thanks!
> 
> BR,
> Kewen


[PATCHv4, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]

2023-08-13 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
for all sub targets when the mode is V4SI and the extracted element is word
1 from BE order. Also this patch adds a insn pattern for mfvsrwz which
helps eliminate redundant zero extend.

  Compared to last version, the main change is to put the word index
checking in the split condition of "*vsx_extract_v4si_w023". Also modified
some comments.
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625380.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Generate mfvsrwz for all platform and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
generated even with p9 vector enabled.  Also the instruction is already
zero extended.  A combine pattern is needed to eliminate redundant zero
extend instructions.

gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_): Set it only
for V8HI and V16QI.
(vsx_extract_v4si): New expand for V4SI extraction.
(vsx_extract_v4si_w1): New insn pattern for V4SI extraction on
word 1 from BE order.   
(*mfvsrwz): New insn pattern for mfvsrwz.
(*vsx_extract__di_p9): Assert that it won't be generated on
word 1 from BE order.
(*vsx_extract_si): Remove.
(*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2,
3 from BE order.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.

patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..1cbdc2f1c01 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1"
 (define_expand  "vsx_extract_"
   [(parallel [(set (match_operand: 0 "gpc_reg_operand")
   (vec_select:
-   (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+   (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
(parallel [(match_operand:QI 2 "const_int_operand")])))
- (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+ (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
   "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
@@ -3736,6 +3736,63 @@ (define_expand  "vsx_extract_"
 }
 })

+(define_expand  "vsx_extract_v4si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+  (vec_select:SI
+   (match_operand:V4SI 1 "gpc_reg_operand")
+   (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+ (clobber (match_scratch:V4SI 3))])]
+  "TARGET_DIRECT_MOVE_64BIT"
+{
+  /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx.  So just
+ fall through to vsx_extract_v4si_w1.  */
+  if (TARGET_P9_VECTOR
+  && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+{
+  emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+ operands[2]));
+  DONE;
+}
+})
+
+/* Extract from word 1 (BE order);  */
+(define_insn "vsx_extract_v4si_w1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+   (vec_select:SI
+(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+{
+   if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+ return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+   (set_attr "length" "4,4,4,0")
+   (set_attr "isa" "p8v,*,p8v,*")])
+
+(define_insn "*mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
+ (vec_select:SI
+   (match_operand:V4SI 1 "vsx_register_operand" "wa")
+   (parallel [(match_operand:QI 2 "const_int_operand" "n")]
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
 (define_insn "vsx_extract__p9"
   [(set (match_operand: 0 "gpc_reg_operand" "=r,")
(vec_select:
@@ -3807,6 +3864,9 @@ (define_insn_and_split "*vsx_extract__di_p9"
(parallel [(match_dup 2)])))
  (clobber (match_dup 3))])]
 {
+  gcc_assert (mode != V4SImode
+ || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2));
+
   operands[4] = gen_rtx_REG (mode, REGNO 

[PATCH, rs6000] Call vector load/store with length expand only on 64-bit Power10 [PR96762]

2023-08-28 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds "TARGET_64BIT" check when calling vector load/store
with length expand in expand_block_move. It matches the expand condition
of "lxvl" and "stxvl" defined in vsx.md.

  This patch fixes the ICE occurred with the test case on 32-bit Power10.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: call vector load/store with length expand only on 64-bit Power10

gcc/
PR target/96762
* config/rs6000/rs6000-string.cc (expand_block_move): Call vector
load/store with length expand only on 64-bit Power10.

gcc/testsuite/
PR target/96762
* gcc.target/powerpc/pr96762.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index cd8ee8c..d1b48c2 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2811,8 +2811,9 @@ expand_block_move (rtx operands[], bool might_overlap)
  gen_func.mov = gen_vsx_movv2di_64bit;
}
   else if (TARGET_BLOCK_OPS_UNALIGNED_VSX
-  && TARGET_POWER10 && bytes < 16
-  && orig_bytes > 16
+  /* Only use lxvl/stxvl on 64bit POWER10.  */
+  && TARGET_POWER10 && TARGET_64BIT
+  && bytes < 16 && orig_bytes > 16
   && !(bytes == 1 || bytes == 2
|| bytes == 4 || bytes == 8)
   && (align >= 128 || !STRICT_ALIGNMENT))
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96762.c 
b/gcc/testsuite/gcc.target/powerpc/pr96762.c
new file mode 100644
index 000..1145dd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96762.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target ilp32 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+extern void foo (char *);
+
+void
+bar (void)
+{
+  char zj[] = "";
+  foo (zj);
+}


[PATCHv2, rs6000] Extract the element in dword0 by mfvsrd and shift/mask [PR110331]

2023-08-22 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements the vector element extraction by mfvsrd and
shift/mask when the element is in dword0 of the vector. Originally,
it generates vsplat/mfvsrd on P8 and li/vextract on P9. Since mfvsrd
has lower latency than vextract and rldicl has lower latency than
vsplat, the new sequence has the benefit. Specially, the shift/mask
is no need when the element is the first element of dword0. So it saves
another rldicl when it returns a sign extend value.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Extract the element in dword0 by mfvsrd and shift/mask

gcc/
PR target/110331
* config/rs6000/rs6000-protos.h (rs6000_vsx_element_in_dword0_p):
Declare.
(rs6000_vsx_extract_element_from_dword0): Declare.
* config/rs6000/rs6000.cc (rs6000_vsx_element_in_dword0_p): New
function to judge if an element is in dword0 of a vector.
(rs6000_vsx_extract_element_from_dword0): Extract an element from
dword0 by mfvsrd and lshiftrt and mask.
* config/rs6000/rs6000.md (*rotl3_mask): Rename to...
(rotl3_mask): ...this
* config/rs6000/vsx.md (split pattern for p9 vector extract): Call
rs6000_vsx_extract_element_from_dword0 if the element is in dword0.
(*vsx_extract__di_p9): Assert the extracted elements isn't in
dword0.
(*vsx_extract_v4si_w023): Call
rs6000_vsx_extract_element_from_dword0 if the element is in dword0.
(*vsx_extract__zero_extend): Zero extend pattern for vector
extract on the element in dword0.
(*vsx_extract__p8): Call rs6000_vsx_extract_element_from_dword0
when the extracted element is in dword0.  Refined the pattern and
remove reload_completed from split condition.

gcc/testsuite/
PR target/110331
* gcc.target/powerpc/fold-vec-extract-char.p8.c: Set the extracted
elements in dword1.
* gcc.target/powerpc/fold-vec-extract-char.p9.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p9.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p9.c: Likewise.
* gcc.target/powerpc/p9-extract-1.c: Likewise.
* gcc.target/powerpc/pr110331-p8.c: New.
* gcc.target/powerpc/pr110331-p9.c: New.
* gcc.target/powerpc/pr110331.h: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index f70118ea40f..ccef280122b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -161,6 +161,8 @@ extern bool rs6000_function_pcrel_p (struct function *);
 extern bool rs6000_pcrel_p (void);
 extern bool rs6000_fndecl_pcrel_p (const_tree);
 extern void rs6000_output_addr_vec_elt (FILE *, int);
+extern bool rs6000_vsx_element_in_dword0_p (rtx, enum machine_mode);
+extern void rs6000_vsx_extract_element_from_dword0 (rtx, rtx, rtx, bool);

 /* Different PowerPC instruction formats that are used by GCC.  There are
various other instruction formats used by the PowerPC hardware, but these
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index efe9adce1f8..e15f8bd964c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -29105,6 +29105,74 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
   return false;
 }

+/* Return true when the element is in dword0 of a vector.  Exclude word
+   element 1 (BE order) as the word can be extracted by mfvsrwz directly.  */
+
+bool
+rs6000_vsx_element_in_dword0_p (rtx op, enum machine_mode mode)
+{
+  gcc_assert (CONST_INT_P (op));
+  gcc_assert (mode == V16QImode || mode == V8HImode || mode == V4SImode);
+
+  int units = GET_MODE_NUNITS (mode);
+  int elt = INTVAL (op);
+  elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+
+  if (elt > units / 2
+  || (elt == units / 2 && mode != V4SImode))
+return true;
+  else
+return false;
+}
+
+/* Extract element from dword0 by mfvsrd and lshiftrt and mask.  Extend_p
+   indicates if zero extend is needed or not.  */
+
+void
+rs6000_vsx_extract_element_from_dword0 (rtx dest, rtx src, rtx element,
+   bool extend_p)
+{
+  enum machine_mode mode = GET_MODE (src);
+  gcc_assert (rs6000_vsx_element_in_dword0_p (element, mode));
+
+  enum machine_mode dest_mode = GET_MODE (dest);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int units = GET_MODE_NUNITS (mode);
+  int elt = INTVAL (element);
+  elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+  int value, shift;
+  unsigned int mask;
+
+  rtx vec_tmp = gen_lowpart (V2DImode, src);
+  rtx tmp1 = can_create_pseudo_p ()
+? gen_reg_rtx (DImode)
+: simplify_gen_subreg (DImode, dest, dest_mode, 0);
+  value = BYTES_BIG_ENDIAN ? 0 : 1;
+  emit_insn (gen_vsx_extract_v2di 

[PATCH-1, rs6000] Enable SImode in FP register on P7 [PR88558]

2023-08-25 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch enables SImode in FP register on P7. Instruction "fctiw"
stores its integer output in an FP register. So SImode in FP register
needs be enabled on P7 if we want support "fctiw" on P7.

  The test case is in the second patch which implements 32bit inline
lrint.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: enable SImode in FP register on P7

gcc/
PR target/88558
* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Enable Simode in FP register for P7.
* config/rs6000/rs6000.md (*movsi_internal1): Add fmr for SImode
move between FP register.  Set attribute isa of stfiwx to "*"
and attribute of stxsiwx to "p7".

patch.diff
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..99085c2cdd7 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1903,7 +1903,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;

- if (TARGET_P8_VECTOR && (mode == SImode))
+ if (TARGET_POPCNTD && mode == SImode)
return 1;

  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index cdab49fbb91..ac5d29a2cf8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7566,7 +7566,7 @@ (define_split

 (define_insn "*movsi_internal1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
- "=r, r,
+ "=r, r,  ^d,
   r,  d,  v,
   m,  ?Z, ?Z,
   r,  r,  r,  r,
@@ -7575,7 +7575,7 @@ (define_insn "*movsi_internal1"
   wa, r,
   r,  *h, *h")
(match_operand:SI 1 "input_operand"
- "r,  U,
+ "r,  U,  ^d,
   m,  ?Z, ?Z,
   r,  d,  v,
   I,  L,  eI, n,
@@ -7588,6 +7588,7 @@ (define_insn "*movsi_internal1"
   "@
mr %0,%1
la %0,%a1
+   fmr %0,%1
lwz%U1%X1 %0,%1
lfiwzx %0,%y1
lxsiwzx %x0,%y1
@@ -7611,7 +7612,7 @@ (define_insn "*movsi_internal1"
mt%0 %1
nop"
   [(set_attr "type"
- "*,  *,
+ "*,  *,  fpsimple,
   load,   fpload, fpload,
   store,  fpstore,fpstore,
   *,  *,  *,  *,
@@ -7620,7 +7621,7 @@ (define_insn "*movsi_internal1"
   mtvsr,  mfvsr,
   *,  *,  *")
(set_attr "length"
- "*,  *,
+ "*,  *,  *,
   *,  *,  *,
   *,  *,  *,
   *,  *,  *,  8,
@@ -7629,9 +7630,9 @@ (define_insn "*movsi_internal1"
   *,  *,
   *,  *,  *")
(set_attr "isa"
- "*,  *,
-  *,  p8v,p8v,
-  *,  p8v,p8v,
+ "*,  *,  *,
+  *,  p7, p8v,
+  *,  *,  p8v,
   *,  *,  p10,*,
   p8v,p9v,p9v,p8v,
   p9v,p8v,p9v,


[PATCH-2, rs6000] Implement 32bit inline lrint [PR88558]

2023-08-25 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements 32bit inline lrint by "fctiw". It depends on
the patch1 to do SImode move from FP register on P7.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: support 32bit inline lrint

gcc/
PR target/88558
* config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND
from insn condition.
(lrintsi2): New insn pattern for 32bit lrint.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr88558.h: New.
* gcc.target/powerpc/pr88558-p7.c: New.
* gcc.target/powerpc/pr88558-p8v.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fd263e8dfe3..b36304de8c6 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6655,10 +6655,18 @@ (define_insn "lrintdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT"
   "fctid %0,%1"
   [(set_attr "type" "fp")])

+(define_insn "lrintsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=d")
+   (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+  UNSPEC_FCTIW))]
+  "TARGET_HARD_FLOAT && TARGET_POPCNTD"
+  "fctiw %0,%1"
+  [(set_attr "type" "fp")])
+
 (define_insn "btrunc2"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
new file mode 100644
index 000..6437c55fa61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power7" } */
+
+#include "pr88558.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstfiwx\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c
new file mode 100644
index 000..fd22123ffb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power8" } */
+
+long int foo (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long bar (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int baz (double a)
+{
+  return __builtin_irint (a);
+}
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558.h 
b/gcc/testsuite/gcc.target/powerpc/pr88558.h
new file mode 100644
index 000..0cc0c68dd4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558.h
@@ -0,0 +1,14 @@
+long int foo (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long bar (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int baz (double a)
+{
+  return __builtin_irint (a);
+}





Re: Ping^2 [PATCH, rs6000] Correct match pattern in pr56605.c

2022-04-19 Thread HAO CHEN GUI via Gcc-patches
Hi,
   I tested the test case on Linux and AIX with both big and little endian.
The test case requires lp64 target, so it won't be tested on 32-bit targets.

On big endian (both AIX and Linux), it should match
(compare:CC (and:SI (subreg:SI (reg:DI 207) 4)

On little endian (both AIX and Linux), it should match
(compare:CC (and:SI (subreg:SI (reg:DI 207) 0)

So, the pattern in my patch should work fine.

/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
\(reg:DI} 1 "combine" } } */

Thanks.

On 14/4/2022 上午 5:30, Segher Boessenkool wrote:
> On Mon, Apr 11, 2022 at 08:54:14PM -0300, Alexandre Oliva wrote:
>> On Apr  7, 2022, HAO CHEN GUI via Gcc-patches  
>> wrote:
>>
>>>   Gentle ping this:
>>>https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html
>>> Thanks
>>
>>>> On 28/2/2022 上午 11:17, HAO CHEN GUI wrote:
>>
>>>>> This patch corrects the match pattern in pr56605.c. The former pattern
>>>>> is wrong and test case fails with GCC11. It should match following insn on
>>>>> each subtarget after mode promotion is disabled. The patch need to be
>>>>> backported to GCC11.
>>
>>>>> -/* { dg-final { scan-rtl-dump-times {\(compare:CC 
>>>>> \((?:and|zero_extend):(?:DI) \((?:sub)?reg:[SD]I} 1 "combine" } } */
>>>>> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>>>>> \(reg:DI} 1 "combine" } } */
>>
>>
>> How about this less strict change instead?
>>
>>
>> ppc: testsuite: PROMOTE_MODE fallout pr56605 [PR102146]
>>
>> The test expects a compare of DImode values, but after the removal of
>> PROMOTE_MODE from rs6000/, we get SImode.  Adjust the expectations.
>>
>> Tested with gcc-11 targeting ppc64-vx7r2.  Ok to install?
> 
> This should have been tested on Linux as well: it is now broken on both
> -m32 and -m64 there.  Please revert?
> 
> 
> Segher


Re: Ping^2 [PATCH, rs6000] Correct match pattern in pr56605.c

2022-04-19 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,
   Yes, the old committed patch caused it matches two insns.
So I submitted the new patch which fixes the problem. Here is
the new patch.
https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html

The new pattern is:
/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
\(reg:DI} 1 "combine" } } */

I tested it and it is fine on all sub-targets.
Thanks.

On 20/4/2022 上午 5:06, Segher Boessenkool wrote:
> On Tue, Apr 19, 2022 at 04:05:06PM +0800, HAO CHEN GUI wrote:
>>I tested the test case on Linux and AIX with both big and little endian.
>> The test case requires lp64 target, so it won't be tested on 32-bit targets.
>>
>> On big endian (both AIX and Linux), it should match
>> (compare:CC (and:SI (subreg:SI (reg:DI 207) 4)
>>
>> On little endian (both AIX and Linux), it should match
>> (compare:CC (and:SI (subreg:SI (reg:DI 207) 0)
>>
>> So, the pattern in my patch should work fine.
>>
>> /* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>> \(reg:DI} 1 "combine" } } */
> 
> On powerpc64-linux:
> 
> FAIL: gcc.target/powerpc/pr56605.c scan-rtl-dump-times combine "\\(compare:CC 
> \\((?:and|zero_extend):(?:[SD]I) \\((?:sub)?reg:[SD]I" 1
> 
> It matches twice instead of once, namely:
> 
> (insn 19 18 20 2 (parallel [
> (set (reg:CC 208)
> (compare:CC (and:SI (subreg:SI (reg:DI 207) 4)
> (const_int 3 [0x3]))
> (const_int 0 [0])))
> (set (reg:SI 129 [ prolog_loop_niters.5 ])
> (and:SI (subreg:SI (reg:DI 207) 4)
> (const_int 3 [0x3])))
> ]) 208 {*andsi3_imm_mask_dot2}
>  (nil))
> 
> (insn 81 80 82 11 (parallel [
> (set (reg:CC 232)
> (compare:CC (and:DI (subreg:DI (reg:SI 136 [ niters.6 ]) 0)
> (const_int 7 [0x7]))
> (const_int 0 [0])))
> (clobber (scratch:DI))
> ]) 207 {*anddi3_imm_mask_dot}
>  (expr_list:REG_DEAD (reg:SI 136 [ niters.6 ])
> (nil)))
> 
> The paradoxical subreg in the latter wasn't expected :-)
> 
> 
> Segher


[PATCH v4, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-05-12 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds a combine pattern for "CA minus one". As CA only has two
values (0 or 1), we could convert following pattern
  (sign_extend:DI (plus:SI (reg:SI 98 ca)
(const_int -1 [0x]
to
   (plus:DI (reg:DI 98 ca)
(const_int -1 [0x])))
   With this patch, one unnecessary sign extend is eliminated.

   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-13 Haochen Gui 

gcc/
PR target/95737
* config/rs6000/rs6000.md (extenddi_ca_minus_one): Define.

gcc/testsuite/
PR target/95737
* gcc.target/powerpc/pr95737.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 64049a6e521..483a93956f8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2353,6 +2353,19 @@ (define_insn "subf3_carry_in_xx"
   "subfe %0,%0,%0"
   [(set_attr "type" "add")])

+(define_insn_and_split "*extenddi_ca_minus_one"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (sign_extend:DI (plus:SI (reg:SI CA_REGNO)
+(const_int -1]
+  ""
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+  (plus:DI (reg:DI CA_REGNO)
+   (const_int -1)))
+ (clobber (reg:DI CA_REGNO))])]
+  ""
+)

 (define_insn "@neg2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c 
b/gcc/testsuite/gcc.target/powerpc/pr95737.c
new file mode 100644
index 000..d4d6a4198cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
@@ -0,0 +1,10 @@
+/* PR target/95737 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mno-isel" } */
+/* { dg-final { scan-assembler-not {\mextsw\M} } } */
+
+
+unsigned long negativeLessThan (unsigned long a, unsigned long b)
+{
+   return -(a < b);
+}


[PATCH v5, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-05-15 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds a combine pattern for "CA minus one". As CA only has two
values (0 or 1), we could convert following pattern
  (sign_extend:DI (plus:SI (reg:SI 98 ca)
(const_int -1 [0x]
to
   (plus:DI (reg:DI 98 ca)
(const_int -1 [0x])))
   With this patch, one unnecessary sign extend is eliminated.

   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-16 Haochen Gui 

gcc/
PR target/95737
* config/rs6000/rs6000.md (subfsi3_carry_in_xx_64): New.

gcc/testsuite/
PR target/95737
* gcc.target/powerpc/pr95737.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 64049a6e521..b97ac453fc0 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2353,6 +2353,19 @@ (define_insn "subf3_carry_in_xx"
   "subfe %0,%0,%0"
   [(set_attr "type" "add")])

+(define_insn_and_split "*subfsi3_carry_in_xx_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (sign_extend:DI (plus:SI (reg:SI CA_REGNO)
+(const_int -1]
+  "TARGET_POWERPC64"
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+  (plus:DI (reg:DI CA_REGNO)
+   (const_int -1)))
+ (clobber (reg:DI CA_REGNO))])]
+  ""
+)

 (define_insn "@neg2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c 
b/gcc/testsuite/gcc.target/powerpc/pr95737.c
new file mode 100644
index 000..30f0f819393
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
@@ -0,0 +1,11 @@
+/* PR target/95737 */
+/* { dg-do compile } */
+/* Disable isel for P9 and later */
+/* { dg-options "-O2 -mno-isel" } */
+/* { dg-final { scan-assembler-not {\mextsw\M} } } */
+
+
+unsigned long negativeLessThan (unsigned long a, unsigned long b)
+{
+   return -(a < b);
+}


[PATCH v5, rs6000] Add V1TI into vector comparison expand [PR103316]

2022-05-30 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds V1TI mode into a new mode iterator used in vector
comparison shift and rotation expands. Without the patch, the comparisons
between two vector __int128 are converted to scalar comparisons and
code is suboptimal. The patch fixes the issue. Now all comparisons
between two vector __int128 generates P10 new comparison instructions.
Also the relative built-ins generate the same instructions after gimple
folding. So they're added back to the folding list.

  This patch also merges some vector comparison shift and rotation expands
for V1T1 and other vector integer modes as they have the similar patterns.
The expands for V1TI only are removed.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-24 Haochen Gui 

gcc/
PR target/103316
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
* config/rs6000/vector.md (VEC_IC): New mode iterator.  Add support
for new Power10 V1TI instructions.
(vec_cmp): Set mode iterator to VEC_IC.
(vec_cmpu): Likewise.
(vector_nlt): Set mode iterator to VEC_IC.
(vector_nltv1ti): Remove.
(vector_gtu): Set mode iterator to VEC_IC.
(vector_gtuv1ti): Remove.
(vector_nltu): Set mode iterator to VEC_IC.
(vector_nltuv1ti): Remove.
(vector_geu): Set mode iterator to VEC_IC.
(vector_ngt): Likewise.
(vector_ngtv1ti): Remove.
(vector_ngtu): Set mode iterator to VEC_IC.
(vector_ngtuv1ti): Remove.
(vector_gtu__p): Set mode iterator to VEC_IC.
(vector_gtu_v1ti_p): Remove.
(vrotl3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
(vrotlv1ti3): Remove.
(vashr3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
(vashrv1ti3): Remove.

gcc/testsuite/
PR target/103316
* gcc.target/powerpc/pr103316.c: New.
* gcc.target/powerpc/fold-vec-cmp-int128.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index e925ba9fad9..b67f4e066a8 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_VCMPEQUH:
 case RS6000_BIF_VCMPEQUW:
 case RS6000_BIF_VCMPEQUD:
-/* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
-   folding produces worse code for 128-bit compares.  */
+case RS6000_BIF_VCMPEQUT:
   fold_compare_helper (gsi, EQ_EXPR, stmt);
   return true;

 case RS6000_BIF_VCMPNEB:
 case RS6000_BIF_VCMPNEH:
 case RS6000_BIF_VCMPNEW:
-/* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
-   folding produces worse code for 128-bit compares.  */
+case RS6000_BIF_VCMPNET:
   fold_compare_helper (gsi, NE_EXPR, stmt);
   return true;

@@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_CMPGE_U4SI:
 case RS6000_BIF_CMPGE_2DI:
 case RS6000_BIF_CMPGE_U2DI:
-/* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_CMPGE_1TI:
+case RS6000_BIF_CMPGE_U1TI:
   fold_compare_helper (gsi, GE_EXPR, stmt);
   return true;

@@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_VCMPGTUW:
 case RS6000_BIF_VCMPGTUD:
 case RS6000_BIF_VCMPGTSD:
-/* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_VCMPGTUT:
+case RS6000_BIF_VCMPGTST:
   fold_compare_helper (gsi, GT_EXPR, stmt);
   return true;

@@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_CMPLE_U4SI:
 case RS6000_BIF_CMPLE_2DI:
 case RS6000_BIF_CMPLE_U2DI:
-/* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_CMPLE_1TI:
+case RS6000_BIF_CMPLE_U1TI:
   fold_compare_helper (gsi, LE_EXPR, stmt);
   return true;

diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 4d0797c48f8..a0d33d2f604 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
 ;; Vector int modes
 (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])

+;; Vector int modes for comparison, shift and rotation
+(define_mode_iterator VEC_IC [V16QI 

Re: [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]

2022-05-25 Thread HAO CHEN GUI via Gcc-patches
Kewen,
  Thanks so much for your advice. Just one question about effective-target.

  For the test cases, it needs both power10_ok and int128 support. I saw some
existing test cases have these two checks as well. But I wonder if power10_ok
already covers int128 on powerpc targets? Can we save one check then?

On 26/5/2022 上午 11:22, Kewen.Lin wrote:
> Hi Haochen,
> 
> on 2022/5/24 16:45, HAO CHEN GUI wrote:
>> Hi,
>>This patch adds V1TI mode into a new mode iterator used in vector
>> comparison and rotation expands. Without the patch, the comparisons
>> between two vector __int128 are converted to scalar comparisons. The
>> code is suboptimal. The patch fixes the issue. Now all comparisons
>> between two vector __int128 generates P10 new comparison instructions.
>> Also the relative built-ins generate the same instructions after gimple
>> folding. So they're added back to the list.
>>
>>   This patch also merges some vector comparison and rotation expands
>> for V1T1 and other vector integer modes as they have the same patterns.
>> The expands for V1TI only are removed.
>>
>>Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-05-24 Haochen Gui 
>>
>> gcc/
>>  PR target/103316
>>  * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
>>  gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
>>  RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
>>  RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
>>  * config/rs6000/vector.md (VEC_IC): Define.  Add support for new Power10
>>  V1TI instructions.
> 
> Nit: Maybe "New mode iterator" is better than "Define".
> 
>>  (vec_cmp): Set mode iterator to VEC_IC.
>>  (vec_cmpu): Likewise.
>>  (vector_nlt): Set mode iterator to VEC_IC.
>>  (vector_nltv1ti): Remove.
>>  (vector_gtu): Set mode iterator to VEC_IC.
>>  (vector_gtuv1ti): Remove.
>>  (vector_nltu): Set mode iterator to VEC_IC.
>>  (vector_nltuv1ti): Remove.
>>  (vector_geu): Set mode iterator to VEC_IC.
>>  (vector_ngt): Likewise.
>>  (vector_ngtv1ti): Remove.
>>  (vector_ngtu): Set mode iterator to VEC_IC.
>>  (vector_ngtuv1ti): Remove.
>>  (vector_gtu__p): Set mode iterator to VEC_IC.
>>  (vector_gtu_v1ti_p): Remove.
>>  (vrotl3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
>>  (vrotlv1ti3): Remove.
>>  (vashr3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
>>  (vashrv1ti3): Remove.
>>
>> gcc/testsuite/
>>  PR target/103316
>>  * gcc.target/powerpc/pr103316.c: New.
>>  * gcc.target/powerpc/fold-vec-cmp-int128.c: New.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
>> b/gcc/config/rs6000/rs6000-builtin.cc
>> index e925ba9fad9..b67f4e066a8 100644
>> --- a/gcc/config/rs6000/rs6000-builtin.cc
>> +++ b/gcc/config/rs6000/rs6000-builtin.cc
>> @@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
>> *gsi)
>>  case RS6000_BIF_VCMPEQUH:
>>  case RS6000_BIF_VCMPEQUW:
>>  case RS6000_BIF_VCMPEQUD:
>> -/* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
>> -   folding produces worse code for 128-bit compares.  */
>> +case RS6000_BIF_VCMPEQUT:
>>fold_compare_helper (gsi, EQ_EXPR, stmt);
>>return true;
>>
>>  case RS6000_BIF_VCMPNEB:
>>  case RS6000_BIF_VCMPNEH:
>>  case RS6000_BIF_VCMPNEW:
>> -/* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
>> -   folding produces worse code for 128-bit compares.  */
>> +case RS6000_BIF_VCMPNET:
>>fold_compare_helper (gsi, NE_EXPR, stmt);
>>return true;
>>
>> @@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>  case RS6000_BIF_CMPGE_U4SI:
>>  case RS6000_BIF_CMPGE_2DI:
>>  case RS6000_BIF_CMPGE_U2DI:
>> -/* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
>> -   for now, because gimple folding produces worse code for 128-bit
>> -   compares.  */
>> +case RS6000_BIF_CMPGE_1TI:
>> +case RS6000_BIF_CMPGE_U1TI:
>>fold_compare_helper (gsi, GE_EXPR, stmt);
>>return true;
>>
>> @@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>  case RS6000

[PATCH v2, rs6000] Fix ICE on expand bcd__ [PR100736]

2022-05-26 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch fixes the ICE reported in PR100736. It removes the condition
check of finite math only flag not setting in "*_cc" pattern.
With or without this flag, we still can use "cror" to check if either
two bits of CC is set or not for "fp_two" codes. We don't need a reverse
comparison (implemented by crnot) here when the finite math flag is set,
as the latency of "cror" and "crnor" are the same.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-26 Haochen Gui 

gcc/
* config/rs6000/rs6000.md (*_cc): Remove condition of
finite math only flag not setting.

gcc/testsuite/
* gcc.target/powerpc/pr100736.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fdfbc6566a5..a6f9cbc9b8b 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -12995,9 +12995,9 @@ (define_insn_and_split "*_cc"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(fp_two:GPR (match_operand:CCFP 1 "cc_reg_operand" "y")
  (const_int 0)))]
-  "!flag_finite_math_only"
+  ""
   "#"
-  "&& 1"
+  ""
   [(pc)]
 {
   rtx cc = rs6000_emit_fp_cror (, mode, operands[1]);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100736.c 
b/gcc/testsuite/gcc.target/powerpc/pr100736.c
new file mode 100644
index 000..32cb6df6cd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100736.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2 -ffinite-math-only" } */
+
+typedef __attribute__ ((altivec (vector__))) unsigned char v;
+
+int foo (v a, v b)
+{
+  return __builtin_vec_bcdsub_ge (a, b, 0);
+}
+
+/* { dg-final { scan-assembler {\mcror\M} } } */



Re: [PATCH v2, rs6000] Fix ICE on expand bcd__ [PR100736]

2022-06-01 Thread HAO CHEN GUI via Gcc-patches
Segher,
  Does BCD comparison return false when either operand is invalid coding?
If yes, the result could be 3-way. We can check gt and eq bits for ge.
We still can't use crnot to only check lt bit as there could be invalid
coding.
  Also, do you think finite-math-only excludes invalid coding? Seems GCC
doesn't clear define it.

Thanks.
Gui Haochen


On 2/6/2022 上午 6:05, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, May 31, 2022 at 06:56:00PM -0500, Segher Boessenkool wrote:
>> It's not clear to me how this can ever happen without finite_math_only?
>> The patch is safe, sure, but it may the real problem is elsewhere.
> 
> So, it is incorrect the RTL for our bcd{add,sub} insns uses CCFP at all.
> 
> CCFP stands for the result of a 4-way comparison, regular float
> comparison: lt gt eq un.  But bcdadd does not have an unordered at all.
> Instead, it has the result of a 3-way comparison (lt gt eq), and bit 3
> is set if an overflow happened -- but still exactly one of bits 0..2 is
> set then!  (If one of the inputs is an invalid number it sets bits 0..3
> to 0001 though.)
> 
> So it would be much more correct and sensible to use regular integer
> comparison results here, so, CC.
> 
> Does that fix the problem?
> 
> 
> Segher


[PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]

2022-05-24 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds V1TI mode into a new mode iterator used in vector
comparison and rotation expands. Without the patch, the comparisons
between two vector __int128 are converted to scalar comparisons. The
code is suboptimal. The patch fixes the issue. Now all comparisons
between two vector __int128 generates P10 new comparison instructions.
Also the relative built-ins generate the same instructions after gimple
folding. So they're added back to the list.

  This patch also merges some vector comparison and rotation expands
for V1T1 and other vector integer modes as they have the same patterns.
The expands for V1TI only are removed.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-24 Haochen Gui 

gcc/
PR target/103316
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
* config/rs6000/vector.md (VEC_IC): Define.  Add support for new Power10
V1TI instructions.
(vec_cmp): Set mode iterator to VEC_IC.
(vec_cmpu): Likewise.
(vector_nlt): Set mode iterator to VEC_IC.
(vector_nltv1ti): Remove.
(vector_gtu): Set mode iterator to VEC_IC.
(vector_gtuv1ti): Remove.
(vector_nltu): Set mode iterator to VEC_IC.
(vector_nltuv1ti): Remove.
(vector_geu): Set mode iterator to VEC_IC.
(vector_ngt): Likewise.
(vector_ngtv1ti): Remove.
(vector_ngtu): Set mode iterator to VEC_IC.
(vector_ngtuv1ti): Remove.
(vector_gtu__p): Set mode iterator to VEC_IC.
(vector_gtu_v1ti_p): Remove.
(vrotl3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
(vrotlv1ti3): Remove.
(vashr3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
(vashrv1ti3): Remove.

gcc/testsuite/
PR target/103316
* gcc.target/powerpc/pr103316.c: New.
* gcc.target/powerpc/fold-vec-cmp-int128.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index e925ba9fad9..b67f4e066a8 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_VCMPEQUH:
 case RS6000_BIF_VCMPEQUW:
 case RS6000_BIF_VCMPEQUD:
-/* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
-   folding produces worse code for 128-bit compares.  */
+case RS6000_BIF_VCMPEQUT:
   fold_compare_helper (gsi, EQ_EXPR, stmt);
   return true;

 case RS6000_BIF_VCMPNEB:
 case RS6000_BIF_VCMPNEH:
 case RS6000_BIF_VCMPNEW:
-/* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
-   folding produces worse code for 128-bit compares.  */
+case RS6000_BIF_VCMPNET:
   fold_compare_helper (gsi, NE_EXPR, stmt);
   return true;

@@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_CMPGE_U4SI:
 case RS6000_BIF_CMPGE_2DI:
 case RS6000_BIF_CMPGE_U2DI:
-/* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_CMPGE_1TI:
+case RS6000_BIF_CMPGE_U1TI:
   fold_compare_helper (gsi, GE_EXPR, stmt);
   return true;

@@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_VCMPGTUW:
 case RS6000_BIF_VCMPGTUD:
 case RS6000_BIF_VCMPGTSD:
-/* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_VCMPGTUT:
+case RS6000_BIF_VCMPGTST:
   fold_compare_helper (gsi, GT_EXPR, stmt);
   return true;

@@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case RS6000_BIF_CMPLE_U4SI:
 case RS6000_BIF_CMPLE_2DI:
 case RS6000_BIF_CMPLE_U2DI:
-/* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
-   for now, because gimple folding produces worse code for 128-bit
-   compares.  */
+case RS6000_BIF_CMPLE_1TI:
+case RS6000_BIF_CMPLE_U1TI:
   fold_compare_helper (gsi, LE_EXPR, stmt);
   return true;

diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 4d0797c48f8..3b7a272994f 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
 ;; Vector int modes
 (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])

+;; Vector int modes for comparison
+(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])
+
 ;; 

Ping [PATCH v3, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-05-29 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595164.html
Thanks.

On 18/5/2022 下午 4:52, HAO CHEN GUI wrote:
> Hi,
>   This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
> Tests show that outputs of xs[min/max]dp are consistent with the standard
> of C99 fmin/max.
> 
>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
> of smin/max. So the builtins always generate xs[min/max]dp on all
> platforms.
> 
>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-05-18 Haochen Gui 
> 
> gcc/
>   PR target/103605
>   * rs6000.md (FMINMAX): New.
>   (minmax_op): New.
>   (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
>   * rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set pattern to fmaxdf3.
>   (__builtin_vsx_xsmindp): Set pattern to fmindf3.
> 
> gcc/testsuite/
>   PR target/103605
>   * gcc.dg/pr103605.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index f4a9f24bcc5..8b735493b40 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -1613,10 +1613,10 @@
>  XSCVSPDP vsx_xscvspdp {}
> 
>const double __builtin_vsx_xsmaxdp (double, double);
> -XSMAXDP smaxdf3 {}
> +XSMAXDP fmaxdf3 {}
> 
>const double __builtin_vsx_xsmindp (double, double);
> -XSMINDP smindf3 {}
> +XSMINDP fmindf3 {}
> 
>const double __builtin_vsx_xsrdpi (double);
>  XSRDPI vsx_xsrdpi {}
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index bf85baa5370..197de0838ee 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -158,6 +158,8 @@ (define_c_enum "unspec"
> UNSPEC_HASHCHK
> UNSPEC_XXSPLTIDP_CONST
> UNSPEC_XXSPLTIW_CONST
> +   UNSPEC_FMAX
> +   UNSPEC_FMIN
>])
> 
>  ;;
> @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
>DONE;
>  })
> 
> +
> +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
> +
> +(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
> +  (UNSPEC_FMIN "min")])
> +
> +(define_insn "f3"
> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
> + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
> +   (match_operand:SFDF 2 "vsx_register_operand" "wa")]
> +   FMINMAX))]
> +"TARGET_VSX"
> +"xsdp %x0,%x1,%x2"
> +[(set_attr "type" "fp")]
> +)
> +
>  (define_expand "movcc"
> [(set (match_operand:GPR 0 "gpc_reg_operand")
>(if_then_else:GPR (match_operand 1 "comparison_operator")
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
> b/gcc/testsuite/gcc.target/powerpc/pr103605.c
> new file mode 100644
> index 000..e43ac40c2d1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O1 -mvsx" } */
> +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
> +
> +#include 
> +
> +double test1 (double d0, double d1)
> +{
> +  return fmin (d0, d1);
> +}
> +
> +float test2 (float d0, float d1)
> +{
> +  return fmin (d0, d1);
> +}
> +
> +double test3 (double d0, double d1)
> +{
> +  return fmax (d0, d1);
> +}
> +
> +float test4 (float d0, float d1)
> +{
> +  return fmax (d0, d1);
> +}
> +
> +double test5 (double d0, double d1)
> +{
> +  return __builtin_vsx_xsmindp (d0, d1);
> +}
> +
> +double test6 (double d0, double d1)
> +{
> +  return __builtin_vsx_xsmaxdp (d0, d1);
> +}


Ping [PATCH v2, rs6000] Fix ICE on expand bcd__ [PR100736]

2022-05-29 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595661.html
Thanks.

On 26/5/2022 下午 3:35, HAO CHEN GUI wrote:
> Hi,
>   This patch fixes the ICE reported in PR100736. It removes the condition
> check of finite math only flag not setting in "*_cc" pattern.
> With or without this flag, we still can use "cror" to check if either
> two bits of CC is set or not for "fp_two" codes. We don't need a reverse
> comparison (implemented by crnot) here when the finite math flag is set,
> as the latency of "cror" and "crnor" are the same.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-05-26 Haochen Gui 
> 
> gcc/
>   * config/rs6000/rs6000.md (*_cc): Remove condition of
>   finite math only flag not setting.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/pr100736.c: New.
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index fdfbc6566a5..a6f9cbc9b8b 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -12995,9 +12995,9 @@ (define_insn_and_split "*_cc"
>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
>   (fp_two:GPR (match_operand:CCFP 1 "cc_reg_operand" "y")
> (const_int 0)))]
> -  "!flag_finite_math_only"
> +  ""
>"#"
> -  "&& 1"
> +  ""
>[(pc)]
>  {
>rtx cc = rs6000_emit_fp_cror (, mode, operands[1]);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100736.c 
> b/gcc/testsuite/gcc.target/powerpc/pr100736.c
> new file mode 100644
> index 000..32cb6df6cd9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr100736.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-mdejagnu-cpu=power8 -O2 -ffinite-math-only" } */
> +
> +typedef __attribute__ ((altivec (vector__))) unsigned char v;
> +
> +int foo (v a, v b)
> +{
> +  return __builtin_vec_bcdsub_ge (a, b, 0);
> +}
> +
> +/* { dg-final { scan-assembler {\mcror\M} } } */
> 


[PATCH v3, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-05-18 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max. So the builtins always generate xs[min/max]dp on all
platforms.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-18 Haochen Gui 

gcc/
PR target/103605
* rs6000.md (FMINMAX): New.
(minmax_op): New.
(f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
* rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set pattern to fmaxdf3.
(__builtin_vsx_xsmindp): Set pattern to fmindf3.

gcc/testsuite/
PR target/103605
* gcc.dg/pr103605.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..197de0838ee 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
+ (match_operand:SFDF 2 "vsx_register_operand" "wa")]
+ FMINMAX))]
+"TARGET_VSX"
+"xsdp %x0,%x1,%x2"
+[(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605.c
new file mode 100644
index 000..e43ac40c2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O1 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include 
+
+double test1 (double d0, double d1)
+{
+  return fmin (d0, d1);
+}
+
+float test2 (float d0, float d1)
+{
+  return fmin (d0, d1);
+}
+
+double test3 (double d0, double d1)
+{
+  return fmax (d0, d1);
+}
+
+float test4 (float d0, float d1)
+{
+  return fmax (d0, d1);
+}
+
+double test5 (double d0, double d1)
+{
+  return __builtin_vsx_xsmindp (d0, d1);
+}
+
+double test6 (double d0, double d1)
+{
+  return __builtin_vsx_xsmaxdp (d0, d1);
+}


Re: [PATCH v5, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-22 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 21/6/2022 上午 7:08, Segher Boessenkool wrote:
> && !flag_trapping_math
> 
> and/or whatever else is needed as well here.
> 
I have a question here. fmin/max are folded to MIN/MAX_EXPR when
flag_finite_math_only is set. Seems no-trapping-math is no need to
fmin/max? Also xs[min|max]dp do raise trapping.

/* Convert fmin/fmax to MIN_EXPR/MAX_EXPR.  C99 requires these
   functions to return the numeric arg if the other one is NaN.
   MIN and MAX don't honor that, so only transform if -ffinite-math-only
   is set.  C99 doesn't require -0.0 to be handled, so we don't have to
   worry about it either.  */
(if (flag_finite_math_only)
 (simplify
  (FMIN_ALL @0 @1)
  (min @0 @1))
 (simplify
  (FMAX_ALL @0 @1)
  (max @0 @1)))


> Are things like
>   fmin(4.0, 2.0);
> (still) optimised correctly?
I have tested it. fmin(4.0, 2.0) is converted to "2.0" in front end.
So my patch doesn't touch it.

Thanks a lot.
Gui Haochen


[PATCH v2, rs6000] Use CC for BCD operations [PR100736]

2022-06-22 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
math flag has no impact on BCD operations. To support BCD overflow and
invalid coding, an UNSPEC is defined to move the bit to a general register.
The patterns of condition branch and return with overflow bit are defined as
the UNSPEC and branch/return can be combined to one jump insn. The split
pattern of overflow bit extension is define for optimization.

  This patch also replaces bcdadd with bcdsub for BCD invaliding coding
expand.

ChangeLog
2022-06-22 Haochen Gui 

gcc/
PR target/100736
* config/rs6000/altivec.md (BCD_TEST): Remove unordered.
(bcd_): Replace CCFP with CC.
(*bcd_test_): Replace CCFP with CC.  Generate
condition insn with CC mode.
(bcd_overflow_): New.
(*bcdoverflow_): New.
(*bcdinvalid_): Removed.
(bcdinvalid_): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
(nuun): New.
(*overflow_cbranch): New.
(*overflow_creturn): New.
(*overflow_extendsidi): New.
(bcdshift_v16qi): Replace CCFP with CC.
(bcdmul10_v16qi): Likewise.
(bcddiv10_v16qi): Likewise.
(peephole for bcd_add/sub): Likewise.
* config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
pattern to bcdadd_overflow_v1ti.
(__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
(__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
(__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.

gcc/testsuite/
PR target/100736
* gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
Scan no cror insns.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index efc8ae35c2e..26f131e61ea 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD 
UNSPEC_BCDSUB])
 (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
  (UNSPEC_BCDSUB "sub")])

-(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
+(define_code_iterator BCD_TEST [eq lt le gt ge])
 (define_mode_iterator VBCD [V1TI V16QI])

 (define_insn "bcd_"
@@ -4379,7 +4379,7 @@ (define_insn "bcd_"
  (match_operand:VBCD 2 "register_operand" "v")
  (match_operand:QI 3 "const_0_to_1_operand" "n")]
 UNSPEC_BCD_ADD_SUB))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P8_VECTOR"
   "bcd. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
@@ -4389,9 +4389,9 @@ (define_insn "bcd_"
 ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The type
 ;; probably should be one that can go in the VMX (Altivec) registers, so we
 ;; can't use DDmode or DFmode.
-(define_insn "*bcd_test_"
-  [(set (reg:CCFP CR6_REGNO)
-   (compare:CCFP
+(define_insn "bcd_test_"
+  [(set (reg:CC CR6_REGNO)
+   (compare:CC
 (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
   (match_operand:VBCD 2 "register_operand" "v")
   (match_operand:QI 3 "const_0_to_1_operand" "i")]
@@ -4408,8 +4408,8 @@ (define_insn "*bcd_test2_"
  (match_operand:VBCD 2 "register_operand" "v")
  (match_operand:QI 3 "const_0_to_1_operand" "i")]
 UNSPEC_BCD_ADD_SUB))
-   (set (reg:CCFP CR6_REGNO)
-   (compare:CCFP
+   (set (reg:CC CR6_REGNO)
+   (compare:CC
 (unspec:V2DF [(match_dup 1)
   (match_dup 2)
   (match_dup 3)]
@@ -4502,8 +4502,8 @@ (define_insn "vclrrb"
[(set_attr "type" "vecsimple")])

 (define_expand "bcd__"
-  [(parallel [(set (reg:CCFP CR6_REGNO)
-  (compare:CCFP
+  [(parallel [(set (reg:CC CR6_REGNO)
+  (compare:CC
(unspec:V2DF [(match_operand:VBCD 1 "register_operand")
  (match_operand:VBCD 2 "register_operand")
  (match_operand:QI 3 "const_0_to_1_operand")]
@@ -4511,46 +4511,138 @@ (define_expand "bcd__"
(match_dup 4)))
  (clobber (match_scratch:VBCD 5))])
(set (match_operand:SI 0 "register_operand")
-   (BCD_TEST:SI (reg:CCFP CR6_REGNO)
+   (BCD_TEST:SI (reg:CC CR6_REGNO)
 (const_int 0)))]
   "TARGET_P8_VECTOR"
 {
   operands[4] = CONST0_RTX (V2DFmode);
+  emit_insn (gen_bcd_test_ (operands[0], operands[1],
+  operands[2], operands[3],
+  operands[4]));
+
+  rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
+  rtx condition_rtx = gen_rtx_ (SImode, cr6, const0_rtx);
+
+  if ( == GE ||  == LE)
+{
+  rtx not_result = gen_reg_rtx (CCEQmode);
+  rtx not_op, rev_cond_rtx;
+  rev_cond_rtx = gen_rtx_fmt_ee 

[PATCH v2] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

2022-07-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies the combine pattern after recog fails. With a helper
- change_pseudo_and_mask, it converts a single pseudo to the pseudo AND with
a mask when the outer operator is IOR/XOR/PLUS and inner operator is ASHIFT
or AND. The conversion helps pattern to match rotate and mask insn on some
targets.

  For test case rlwimi-2.c, current trunk fails on
"scan-assembler-times (?n)^\\s+[a-z]". It reports 21305 times. So my patch
reduces the total number of insns from 21305 to 21279.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-07-07 Haochen Gui 

gcc/
PR target/93453
* combine.cc (change_pseudo_and_mask): New.
(recog_for_combine): If recog fails, try again with the pattern
modified by change_pseudo_and_mask.
* config/rs6000/rs6000.md (plus_ior_xor): Removed.
(anonymous split pattern for plus_ior_xor): Removed.

gcc/testsuite/
PR target/93453
* gcc.target/powerpc/20050603-3.c: Modify dump check conditions.
* gcc.target/powerpc/rlwimi-2.c: Likewise.
* gcc.target/powerpc/pr93453-2.c: New.

patch.diff
diff --git a/gcc/combine.cc b/gcc/combine.cc
index a5fabf397f7..3cd7b2b652b 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -11599,6 +11599,47 @@ change_zero_ext (rtx pat)
   return changed;
 }

+/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
+   ASHIFT/AND, convert a pseudo to psuedo AND with a mask if its nonzero_bits
+   is less than its mode mask.  The nonzero_bits in other pass doesn't return
+   the same value as it does in combine pass.  */
+static bool
+change_pseudo_and_mask (rtx pat)
+{
+  rtx src = SET_SRC (pat);
+  if ((GET_CODE (src) == IOR
+   || GET_CODE (src) == XOR
+   || GET_CODE (src) == PLUS)
+  && (((GET_CODE (XEXP (src, 0)) == ASHIFT
+   || GET_CODE (XEXP (src, 0)) == AND)
+  && REG_P (XEXP (src, 1)
+{
+  rtx *reg =  (SET_SRC (pat), 1);
+  machine_mode mode = GET_MODE (*reg);
+  unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
+  if (nonzero < GET_MODE_MASK (mode))
+   {
+ int shift;
+
+ if (GET_CODE (XEXP (src, 0)) == ASHIFT)
+   shift = INTVAL (XEXP (XEXP (src, 0), 1));
+ else
+   shift = ctz_hwi (INTVAL (XEXP (XEXP (src, 0), 1)));
+
+ if (shift > 0
+ && ((HOST_WIDE_INT_1U << shift) - 1) >= nonzero)
+   {
+ unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << shift) - 1;
+ rtx x = gen_rtx_AND (mode, *reg, GEN_INT (mask));
+ SUBST (*reg, x);
+ maybe_swap_commutative_operands (SET_SRC (pat));
+ return true;
+   }
+   }
+ }
+  return false;
+}
+
 /* Like recog, but we receive the address of a pointer to a new pattern.
We try to match the rtx that the pointer points to.
If that fails, we may try to modify or replace the pattern,
@@ -11646,7 +11687,10 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx 
*pnotes)
}
}
   else
-   changed = change_zero_ext (pat);
+   {
+ changed = change_pseudo_and_mask (pat);
+ changed |= change_zero_ext (pat);
+   }
 }
   else if (GET_CODE (pat) == PARALLEL)
 {
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 1367a2cb779..2bd6bd5f908 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4207,24 +4207,6 @@ (define_insn_and_split "*rotl3_insert_3_"
(ior:GPR (and:GPR (match_dup 3) (match_dup 4))
 (ashift:GPR (match_dup 1) (match_dup 2])

-(define_code_iterator plus_ior_xor [plus ior xor])
-
-(define_split
-  [(set (match_operand:GPR 0 "gpc_reg_operand")
-   (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
- (match_operand:SI 2 "const_int_operand"))
- (match_operand:GPR 3 "gpc_reg_operand")))]
-  "nonzero_bits (operands[3], mode)
-   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
-  [(set (match_dup 0)
-   (ior:GPR (and:GPR (match_dup 3)
- (match_dup 4))
-(ashift:GPR (match_dup 1)
-(match_dup 2]
-{
-  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
-})
-
 (define_insn "*rotlsi3_insert_4"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
(ior:SI (and:SI (match_operand:SI 3 "gpc_reg_operand" "0")
diff --git a/gcc/testsuite/gcc.target/powerpc/20050603-3.c 
b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
index 4017d34f429..e628be11532 100644
--- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
@@ -12,7 +12,7 @@ void rotins (unsigned int x)
   b.y = (x<<12) | (x>>20);
 }

-/* { dg-final { scan-assembler-not {\mrlwinm} } } */
+/* { dg-final { 

[PATCH v3, rs6000] Disable TImode from Bool expanders [PR100694, PR93123]

2022-07-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch fails TImode for all 128-bit logical operation expanders. So
TImode splits to two DI registers during expand. Potential optimizations can
be taken after expand pass. Originally, the TImode logical operations are
split after reload pass. It's too late. The test case illustrates it.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is
this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-07-04 Haochen Gui 

gcc/
PR target/100694
* config/rs6000/rs6000.md (and3): Fail TImode.
(ior3): Likewise.
(xor3): Likewise.
(nor3): Likewise.
(andc3): Likewise.
(eqv3): Likewise.
(nand3): Likewise.
(orc3): Likewise.
(one_cmpl2): Define as an expand and fail TImode.
(*one_cmpl2): Define as an anonymous insn pattern.

gcc/testsuite/
PR target/100694
* gcc.target/powerpc/pr100694.c: New.
* gcc.target/powerpc/pr92398.c: New.
* gcc.target/powerpc/pr92398.h: Remove.
* gcc.target/powerpc/pr92398.p9-.c: Remove.
* gcc.target/powerpc/pr92398.p9+.c: Remove.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c55ee7e171a..6e57aac3ebf 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7078,27 +7078,38 @@ (define_expand "subti3"
 })
 
 ;; 128-bit logical operations expanders
+;; Fail TImode in all 128-bit logical operations expanders and split it into
+;; two DI registers.

 (define_expand "and3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
(and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
  (match_operand:BOOL_128 2 "vlogical_operand")))]
   ""
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 (define_expand "ior3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
 (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
  (match_operand:BOOL_128 2 "vlogical_operand")))]
   ""
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 (define_expand "xor3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
 (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
  (match_operand:BOOL_128 2 "vlogical_operand")))]
   ""
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 (define_expand "nor3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
@@ -7106,7 +7117,10 @@ (define_expand "nor3"
 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand"))
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"]
   ""
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 (define_expand "andc3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
@@ -7114,7 +7128,10 @@ (define_expand "andc3"
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"))
 (match_operand:BOOL_128 1 "vlogical_operand")))]
   ""
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 ;; Power8 vector logical instructions.
 (define_expand "eqv3"
@@ -7123,7 +7140,10 @@ (define_expand "eqv3"
 (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
   (match_operand:BOOL_128 2 "vlogical_operand"]
   "mode == TImode || mode == PTImode || TARGET_P8_VECTOR"
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 ;; Rewrite nand into canonical form
 (define_expand "nand3"
@@ -7132,7 +7152,10 @@ (define_expand "nand3"
 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand"))
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"]
   "mode == TImode || mode == PTImode || TARGET_P8_VECTOR"
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 ;; The canonical form is to have the negated element first, so we need to
 ;; reverse arguments.
@@ -7142,7 +7165,10 @@ (define_expand "orc3"
 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"))
 (match_operand:BOOL_128 1 "vlogical_operand")))]
   "mode == TImode || mode == PTImode || TARGET_P8_VECTOR"
-  "")
+{
+  if (mode == TImode)
+FAIL;
+})

 ;; 128-bit logical operations insns and split operations
 (define_insn_and_split "*and3_internal"
@@ -7394,7 +7420,17 @@ (define_insn_and_split "*eqv3_internal2"
 (const_string "16")))])

 ;; 128-bit one's complement
-(define_insn_and_split "one_cmpl2"
+(define_expand "one_cmpl2"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=")
+   (not:BOOL_128
+ (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  ""
+{
+  if (mode == TImode)
+FAIL;
+})
+
+(define_insn_and_split "*one_cmpl2"
   [(set (match_operand:BOOL_128 0 "vlogical_operand" "=")
(not:BOOL_128
  (match_operand:BOOL_128 1 "vlogical_operand" "")))]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c 
b/gcc/testsuite/gcc.target/powerpc/pr100694.c
new file mode 100644
index 000..99dd3ca89ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* 

Ping [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-07-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
Thanks.

On 24/6/2022 上午 10:02, HAO CHEN GUI wrote:
> Hi,
>   This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
> Tests show that outputs of xs[min/max]dp are consistent with the standard
> of C99 fmin/max.
> 
>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
> of smin/max. So the builtins always generate xs[min/max]dp on all
> platforms.
> 
>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-06-24 Haochen Gui 
> 
> gcc/
>   PR target/103605
>   * config/rs6000/rs6000.md (FMINMAX): New.
>   (minmax_op): New.
>   (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
>   * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
>   pattern to fmaxdf3.
>   (__builtin_vsx_xsmindp): Set pattern to fmindf3.
> 
> gcc/testsuite/
>   PR target/103605
>   * gcc.dg/powerpc/pr103605.c: New.
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index f4a9f24bcc5..8b735493b40 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -1613,10 +1613,10 @@
>  XSCVSPDP vsx_xscvspdp {}
> 
>const double __builtin_vsx_xsmaxdp (double, double);
> -XSMAXDP smaxdf3 {}
> +XSMAXDP fmaxdf3 {}
> 
>const double __builtin_vsx_xsmindp (double, double);
> -XSMINDP smindf3 {}
> +XSMINDP fmindf3 {}
> 
>const double __builtin_vsx_xsrdpi (double);
>  XSRDPI vsx_xsrdpi {}
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index bf85baa5370..ae0dd98f0f9 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -158,6 +158,8 @@ (define_c_enum "unspec"
> UNSPEC_HASHCHK
> UNSPEC_XXSPLTIDP_CONST
> UNSPEC_XXSPLTIW_CONST
> +   UNSPEC_FMAX
> +   UNSPEC_FMIN
>])
> 
>  ;;
> @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
>DONE;
>  })
> 
> +
> +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
> +
> +(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
> +  (UNSPEC_FMIN "min")])
> +
> +(define_insn "f3"
> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
> + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
> +   (match_operand:SFDF 2 "vsx_register_operand" "wa")]
> +  FMINMAX))]
> +  "TARGET_VSX && !flag_finite_math_only"
> +  "xsdp %x0,%x1,%x2"
> +  [(set_attr "type" "fp")]
> +)
> +
>  (define_expand "movcc"
> [(set (match_operand:GPR 0 "gpc_reg_operand")
>(if_then_else:GPR (match_operand 1 "comparison_operator")
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
> b/gcc/testsuite/gcc.target/powerpc/pr103605.c
> new file mode 100644
> index 000..1c938d40e61
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
> +
> +#include 
> +
> +double test1 (double d0, double d1)
> +{
> +  return fmin (d0, d1);
> +}
> +
> +float test2 (float d0, float d1)
> +{
> +  return fmin (d0, d1);
> +}
> +
> +double test3 (double d0, double d1)
> +{
> +  return fmax (d0, d1);
> +}
> +
> +float test4 (float d0, float d1)
> +{
> +  return fmax (d0, d1);
> +}
> +
> +double test5 (double d0, double d1)
> +{
> +  return __builtin_vsx_xsmindp (d0, d1);
> +}
> +
> +double test6 (double d0, double d1)
> +{
> +  return __builtin_vsx_xsmaxdp (d0, d1);
> +}


Re: [PATCH v2, rs6000] Use CC for BCD operations [PR100736]

2022-07-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
Thanks.

On 22/6/2022 下午 4:26, HAO CHEN GUI wrote:
> Hi,
>   This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
> math flag has no impact on BCD operations. To support BCD overflow and
> invalid coding, an UNSPEC is defined to move the bit to a general register.
> The patterns of condition branch and return with overflow bit are defined as
> the UNSPEC and branch/return can be combined to one jump insn. The split
> pattern of overflow bit extension is define for optimization.
> 
>   This patch also replaces bcdadd with bcdsub for BCD invaliding coding
> expand.
> 
> ChangeLog
> 2022-06-22 Haochen Gui 
> 
> gcc/
>   PR target/100736
>   * config/rs6000/altivec.md (BCD_TEST): Remove unordered.
>   (bcd_): Replace CCFP with CC.
>   (*bcd_test_): Replace CCFP with CC.  Generate
>   condition insn with CC mode.
>   (bcd_overflow_): New.
>   (*bcdoverflow_): New.
>   (*bcdinvalid_): Removed.
>   (bcdinvalid_): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
>   (nuun): New.
>   (*overflow_cbranch): New.
>   (*overflow_creturn): New.
>   (*overflow_extendsidi): New.
>   (bcdshift_v16qi): Replace CCFP with CC.
>   (bcdmul10_v16qi): Likewise.
>   (bcddiv10_v16qi): Likewise.
>   (peephole for bcd_add/sub): Likewise.
>   * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
>   pattern to bcdadd_overflow_v1ti.
>   (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
>   (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
>   (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.
> 
> gcc/testsuite/
>   PR target/100736
>   * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
>   Scan no cror insns.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index efc8ae35c2e..26f131e61ea 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD 
> UNSPEC_BCDSUB])
>  (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
> (UNSPEC_BCDSUB "sub")])
> 
> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
> +(define_code_iterator BCD_TEST [eq lt le gt ge])
>  (define_mode_iterator VBCD [V1TI V16QI])
> 
>  (define_insn "bcd_"
> @@ -4379,7 +4379,7 @@ (define_insn "bcd_"
> (match_operand:VBCD 2 "register_operand" "v")
> (match_operand:QI 3 "const_0_to_1_operand" "n")]
>UNSPEC_BCD_ADD_SUB))
> -   (clobber (reg:CCFP CR6_REGNO))]
> +   (clobber (reg:CC CR6_REGNO))]
>"TARGET_P8_VECTOR"
>"bcd. %0,%1,%2,%3"
>[(set_attr "type" "vecsimple")])
> @@ -4389,9 +4389,9 @@ (define_insn "bcd_"
>  ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The 
> type
>  ;; probably should be one that can go in the VMX (Altivec) registers, so we
>  ;; can't use DDmode or DFmode.
> -(define_insn "*bcd_test_"
> -  [(set (reg:CCFP CR6_REGNO)
> - (compare:CCFP
> +(define_insn "bcd_test_"
> +  [(set (reg:CC CR6_REGNO)
> + (compare:CC
>(unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
>  (match_operand:VBCD 2 "register_operand" "v")
>  (match_operand:QI 3 "const_0_to_1_operand" "i")]
> @@ -4408,8 +4408,8 @@ (define_insn "*bcd_test2_"
> (match_operand:VBCD 2 "register_operand" "v")
> (match_operand:QI 3 "const_0_to_1_operand" "i")]
>UNSPEC_BCD_ADD_SUB))
> -   (set (reg:CCFP CR6_REGNO)
> - (compare:CCFP
> +   (set (reg:CC CR6_REGNO)
> + (compare:CC
>(unspec:V2DF [(match_dup 1)
>  (match_dup 2)
>  (match_dup 3)]
> @@ -4502,8 +4502,8 @@ (define_insn "vclrrb"
> [(set_attr "type" "vecsimple")])
> 
>  (define_expand "bcd__"
> -  [(parallel [(set (reg:CCFP CR6_REGNO)
> -(compare:CCFP
> +  [(parallel [(set (reg:CC CR6_REGNO)
> +(compare:CC
>   (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
> (match_operand:VBCD 2 "register_operand")
>   

Re: [PATCH v2] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

2022-07-10 Thread HAO CHEN GUI via Gcc-patches
Hi, Segher

On 8/7/2022 上午 1:31, Segher Boessenkool wrote:
>> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> @@ -2,14 +2,14 @@
>>  /* { dg-options "-O2" } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } 
>> } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } 
>> } } */
> You are saying there should be 21279 instructions generated by this test
> case.  What makes you say that?  Yes, we regressed some time ago, we
> generate too many insns in many cases, but that is *bad*.
> 

The trunk generates 21305. My patch generates 26 "rlwimi" instead of 
"rlwimn+ior". So
it saves 26 insns and reduce the total number of insns from 21305 to 21279 and
increase the number of "rlwimi" from 1666 to 1692.

I did a biset for the problem. After commit "commit 8d2d39587: combine: Do not 
combine
moves from hard registers", the case fails. The root cause is it can't combine 
from the
hard registers and has to use subreg which causes its high part to be 
undefined. Thus,
there is an additional "AND" generated.

Before the commit
Trying 2 -> 7:
2: r125:DI=%3:DI
  REG_DEAD %3:DI
7: r128:SI=r125:DI#0 0>>0x1f
  REG_DEAD r125:DI
Successfully matched this instruction:
(set (reg:SI 128 [ x ])
(lshiftrt:SI (reg:SI 3 3 [ x ])
(const_int 31 [0x1f])))
allowing combination of insns 2 and 7

After the commit
Trying 20 -> 7:
   20: r125:DI=r132:DI
  REG_DEAD r132:DI
7: r128:SI=r125:DI#0 0>>0x1f
  REG_DEAD r125:DI
Failed to match this instruction:
(set (subreg:DI (reg:SI 128 [ x ]) 0)
(zero_extract:DI (reg:DI 132)
(const_int 32 [0x20])
(const_int 1 [0x1])))
Successfully matched this instruction:
(set (subreg:DI (reg:SI 128 [ x ]) 0)
(and:DI (lshiftrt:DI (reg:DI 132)
(const_int 31 [0x1f]))
(const_int 4294967295 [0x])))
allowing combination of insns 20 and 7

The problem should be fixed in another case? Please advice.
Thanks
Gui Haochen


[PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max. So the builtins always generate xs[min/max]dp on all
platforms.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-06-24 Haochen Gui 

gcc/
PR target/103605
* config/rs6000/rs6000.md (FMINMAX): New.
(minmax_op): New.
(f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
pattern to fmaxdf3.
(__builtin_vsx_xsmindp): Set pattern to fmindf3.

gcc/testsuite/
PR target/103605
* gcc.dg/powerpc/pr103605.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..ae0dd98f0f9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
+ (match_operand:SFDF 2 "vsx_register_operand" "wa")]
+FMINMAX))]
+  "TARGET_VSX && !flag_finite_math_only"
+  "xsdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605.c
new file mode 100644
index 000..1c938d40e61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include 
+
+double test1 (double d0, double d1)
+{
+  return fmin (d0, d1);
+}
+
+float test2 (float d0, float d1)
+{
+  return fmin (d0, d1);
+}
+
+double test3 (double d0, double d1)
+{
+  return fmax (d0, d1);
+}
+
+float test4 (float d0, float d1)
+{
+  return fmax (d0, d1);
+}
+
+double test5 (double d0, double d1)
+{
+  return __builtin_vsx_xsmindp (d0, d1);
+}
+
+double test6 (double d0, double d1)
+{
+  return __builtin_vsx_xsmaxdp (d0, d1);
+}


Re: [PATCH-1 v2, rs6000] Replace shift and ior insns with one rotate and mask insn for bswap pattern [PR93453]

2022-06-08 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 7/6/2022 下午 11:59, Segher Boessenkool wrote:
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile { target lp64 } } */
>> +/* { dg-options "-mdejagnu-cpu=power6 -O2" } */
> It doesn't require -m64, only -mpowerpc64.  You can use has_arch_ppc64
> to test for the latter.

Tested it with 'target has_arch_ppc64', it works on both -m32 and -m64.

Thanks.
Gui Haochen


Re: [PATCH v4, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-09 Thread HAO CHEN GUI via Gcc-patches



On 9/6/2022 下午 11:07, Segher Boessenkool wrote:
> Ah, good.  Should we then have an assert that there is no fast-math if
> we ever get the rtl fmin/fmax stuff?

Sure, I will add a condition for it. Thanks a lot.
Gui Haochen


Re: [PATCH v3, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-06 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 2/6/2022 上午 5:01, Segher Boessenkool wrote:
> Hi!
> 
> Some more nitpicking...
> 
> On Wed, May 18, 2022 at 04:52:26PM +0800, HAO CHEN GUI wrote:
>>const double __builtin_vsx_xsmaxdp (double, double);
>> -XSMAXDP smaxdf3 {}
>> +XSMAXDP fmaxdf3 {}
>>
>>const double __builtin_vsx_xsmindp (double, double);
>> -XSMINDP smindf3 {}
>> +XSMINDP fmindf3 {}
> 
> Are s{min,max}df3 still used after this?

Expands reduc_s[min|max]_scal are still using s[min|max]df3.

OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")

Also we could implement reduc_f[min|max]_scal after committing
this patch.

Thanks.
Gui Haochen

> 
>> +   UNSPEC_FMAX
>> +   UNSPEC_FMIN
> 
> Pity we have to do this as an unspec still, this should be handled by
> some generic code, with some new operator (fmin/fmax would be obvious
> names :-) )
> 
>> +(define_insn "f3"
>> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
>> +(unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
>> +  (match_operand:SFDF 2 "vsx_register_operand" "wa")]
>> +  FMINMAX))]
>> +"TARGET_VSX"
>> +"xsdp %x0,%x1,%x2"
>> +[(set_attr "type" "fp")]
>> +)
> 
> Indentation is broken here, correct is
> 
> (define_insn "f3"
>   [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
>   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
> (match_operand:SFDF 2 "vsx_register_operand" "wa")]
>FMINMAX))]
>   "TARGET_VSX"
>   "xsdp %x0,%x1,%x2"
>   [(set_attr "type" "fp")])
> 
> (FMINMAX has the same indent as the preceding [, its sibling;
> "TARGET_VSX" and the next two lines are indented like the same thing
> before it at the same level (the "[(set"); the finishing ) does never
> start a new line).
> 
> 
> Segher


[PATCH-1 v2, rs6000] Replace shift and ior insns with one rotate and mask insn for bswap pattern [PR93453]

2022-06-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch replaces shift and ior insns with one rotate and mask
insn for the split patterns which are for DI byte swap on Power6. The
test cases shows the optimization.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-06-07 Haochen Gui 

gcc/
* config/rs6000/rs6000.md (define_split for bswapdi load): Merge shift
and ior insns to one rotate and mask insn.
(define_split for bswapdi register): Likewise.

gcc/testsuite/
* gcc.target/powerpc/pr93453-1.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..83800df12aa 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2828,8 +2828,8 @@ (define_split
   emit_insn (gen_bswapsi2 (dest_32, word2));
 }

-  emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+  GEN_INT (0x)));
   DONE;
 })

@@ -2914,10 +2914,10 @@ (define_split
   rtx op3_si  = simplify_gen_subreg (SImode, op3, DImode, lo_off);

   emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
-  emit_insn (gen_bswapsi2 (dest_si, src_si));
-  emit_insn (gen_bswapsi2 (op3_si, op2_si));
-  emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_bswapsi2 (op3_si, src_si));
+  emit_insn (gen_bswapsi2 (dest_si, op2_si));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+  GEN_INT (0x)));
   DONE;
 })

diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
new file mode 100644
index 000..4271886561f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=power6 -O2" } */
+
+unsigned long load_byte_reverse (unsigned long *in)
+{
+   return __builtin_bswap64 (*in);
+}
+
+unsigned long byte_reverse (unsigned long in)
+{
+   return __builtin_bswap64 (in);
+}
+
+/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */


Re: [PATCH v2, rs6000] Fix ICE on expand bcd__ [PR100736]

2022-06-06 Thread HAO CHEN GUI via Gcc-patches



On 2/6/2022 下午 5:04, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Jun 02, 2022 at 01:30:04PM +0800, HAO CHEN GUI wrote:
>> Segher,
>>   Does BCD comparison return false when either operand is invalid coding?
> 
> It sets all of LT, GT, and EQ to 0 (it normally sets exactly one of them
> to 1).  It sets bit 3 (the "SO" bit usually) to 1.
> 
> That is what the machine insns do.  What the builtins do is undefined as
> far as I know?  If So we can do whatever is most convenient, so, not
> handle it specifically at all, just go with what falls out.

We defined the following unordered BCD builtins in rs6000-builtin.def.
They check the bit 3 for overflow.

  const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>);
BCDADD_OV_V1TI bcdadd_unordered_v1ti {}

  const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>);
BCDADD_OV_V16QI bcdadd_unordered_v16qi {}

Also Xlc defines three BCD builtins for overflow and invalid coding.
https://www.ibm.com/docs/en/xl-c-and-cpp-linux/16.1.1?topic=functions-bcd-test-add-subtract-overflow
Shall GCC keep up with Xlc? Please advise.

Thanks
Gui Haochen

> 
>> If yes, the result could be 3-way. We can check gt and eq bits for ge.
> 
> You can check the LT bit, instead: it is only one branch insn, and also
> only one setbc[r] insn (it can be slightly more expensive if you can use
> only older insns).
> 
>> We still can't use crnot to only check lt bit as there could be invalid
>> coding.
>>   Also, do you think finite-math-only excludes invalid coding? Seems GCC
>> doesn't clear define it.
> 
> This is not floating-point code at all, it should not be influenced at
> all by finite-math-only!
> 
> 
> Segher


[PATCH-1, rs6000] Replace shift and ior insns with one rotate and mask insn for bswap pattern [PR93453]

2022-06-05 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch replaces shift and ior insns with one rotate and mask
insn for the split patterns which are for DI byte swap on Power6 and
before. The test cases shows the optimization.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-06-06 Haochen Gui 

gcc/
* config/rs6000/rs6000.md (split for DI load byte swap): Merge shift
and ior insns to one rotate and mask insn.
(split for DI register byte swap): Likewise.

gcc/testsuite/
* gcc.target/powerpc/pr93453-1.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..2e38195aaac 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2828,8 +2828,8 @@ (define_split
   emit_insn (gen_bswapsi2 (dest_32, word2));
 }

-  emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+  GEN_INT ((HOST_WIDE_INT_1U << 32) - 1)));
   DONE;
 })

@@ -2914,10 +2914,10 @@ (define_split
   rtx op3_si  = simplify_gen_subreg (SImode, op3, DImode, lo_off);

   emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
-  emit_insn (gen_bswapsi2 (dest_si, src_si));
-  emit_insn (gen_bswapsi2 (op3_si, op2_si));
-  emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_bswapsi2 (op3_si, src_si));
+  emit_insn (gen_bswapsi2 (dest_si, op2_si));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+  GEN_INT ((HOST_WIDE_INT_1U << 32) - 1)));
   DONE;
 })

diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
new file mode 100644
index 000..4271886561f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=power6 -O2" } */
+
+unsigned long load_byte_reverse (unsigned long *in)
+{
+   return __builtin_bswap64 (*in);
+}
+
+unsigned long byte_reverse (unsigned long in)
+{
+   return __builtin_bswap64 (in);
+}
+
+/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */


[PATCH, rs6000] Use CC for BCD operations [PR100736]

2022-06-16 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
math flag has no impact on BCD operations. To support BCD overflow and
invalid coding, ordered and unordered are added into CC mode. With CC,
"ge" and "le" are converted to reverse comparison. So the invalid coding
needs to be tested separately.

  This patch also replaces bcdadd with bcdsub for BCD invaliding coding
expand. The bcdsub with two identical numbers doesn't cause overflow while
bcdadd could.

  Another patch which creates a dedicated CC mode for BCD operations is
ready. With this patch, we don't need ordered and unordered in CC. Please
advice if I can submit it.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

2022-06-16 Haochen Gui 

gcc/
PR target/100736
* config/rs6000/altivec.md (bcd_): Replace CCFP with
CC
(*bcd_test_): Replace CCFP with CC.  Generate
condition insn with CC mode.
(*bcdinvalid_): Replace CCFP with CC.  Replace bcdadd. with
bcdsub.
(bcdinvalid_): Replace CCFP with CC.
(bcdshift_v16qi): Likewise.
(bcdmul10_v16qi): Likewise.
(bcddiv10_v16qi): Likewise.
(peephole for bcd_add/sub): Likewise.
* config/rs6000/predicates.md (branch_comparison_operator): Add
ordered and unordered in CC mode.
* config/rs6000/rs6000.cc (validate_condition_mode): Likewise.

gcc/testsuite/
PR target/100736
* gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
Scan no cror insns.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index efc8ae35c2e..5ffbab17a9e 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4379,7 +4379,7 @@ (define_insn "bcd_"
  (match_operand:VBCD 2 "register_operand" "v")
  (match_operand:QI 3 "const_0_to_1_operand" "n")]
 UNSPEC_BCD_ADD_SUB))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P8_VECTOR"
   "bcd. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
@@ -4389,9 +4389,9 @@ (define_insn "bcd_"
 ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The type
 ;; probably should be one that can go in the VMX (Altivec) registers, so we
 ;; can't use DDmode or DFmode.
-(define_insn "*bcd_test_"
-  [(set (reg:CCFP CR6_REGNO)
-   (compare:CCFP
+(define_insn "bcd_test_"
+  [(set (reg:CC CR6_REGNO)
+   (compare:CC
 (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
   (match_operand:VBCD 2 "register_operand" "v")
   (match_operand:QI 3 "const_0_to_1_operand" "i")]
@@ -4408,8 +4408,8 @@ (define_insn "*bcd_test2_"
  (match_operand:VBCD 2 "register_operand" "v")
  (match_operand:QI 3 "const_0_to_1_operand" "i")]
 UNSPEC_BCD_ADD_SUB))
-   (set (reg:CCFP CR6_REGNO)
-   (compare:CCFP
+   (set (reg:CC CR6_REGNO)
+   (compare:CC
 (unspec:V2DF [(match_dup 1)
   (match_dup 2)
   (match_dup 3)]
@@ -4502,8 +4502,8 @@ (define_insn "vclrrb"
[(set_attr "type" "vecsimple")])

 (define_expand "bcd__"
-  [(parallel [(set (reg:CCFP CR6_REGNO)
-  (compare:CCFP
+  [(parallel [(set (reg:CC CR6_REGNO)
+  (compare:CC
(unspec:V2DF [(match_operand:VBCD 1 "register_operand")
  (match_operand:VBCD 2 "register_operand")
  (match_operand:QI 3 "const_0_to_1_operand")]
@@ -4511,33 +4511,56 @@ (define_expand "bcd__"
(match_dup 4)))
  (clobber (match_scratch:VBCD 5))])
(set (match_operand:SI 0 "register_operand")
-   (BCD_TEST:SI (reg:CCFP CR6_REGNO)
+   (BCD_TEST:SI (reg:CC CR6_REGNO)
 (const_int 0)))]
   "TARGET_P8_VECTOR"
 {
   operands[4] = CONST0_RTX (V2DFmode);
+  emit_insn (gen_bcd_test_ (operands[0], operands[1],
+  operands[2], operands[3],
+  operands[4]));
+
+  rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
+  rtx condition_rtx = gen_rtx_ (SImode, cr6, const0_rtx);
+  rtx_code cond_code = GET_CODE (condition_rtx);
+
+  if (cond_code == GE || cond_code == LE)
+{
+  rtx not_result = gen_reg_rtx (CCEQmode);
+  rtx not_op, rev_cond_rtx;
+  rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode,
+  cond_code),
+SImode, XEXP (condition_rtx, 0),
+const0_rtx);
+  not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
+  emit_insn (gen_rtx_SET (not_result, not_op));
+  condition_rtx = gen_rtx_EQ (SImode, 

Re: [PATCH, rs6000] Use CC for BCD operations [PR100736]

2022-06-17 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 16/6/2022 下午 7:24, Segher Boessenkool wrote:
> There is no normal way to get at bit 3 of a CR field.  We can use some
> unspec though, which is total cheating but it does work, and it is
> safe, albeit sometimes suboptimal.

Thanks so much for your advice. I will use an unspec for setting reg from
the BCD overflow bit.
> 
> You shouldn't need anything like this, bcdinvalid will work just fine if
> written as bcdadd_ov (with vector of 0 as the second op)?

The vector of 0 is not equal to BCD 0, I think. The BCD number contains
preferred sign (PS) bit. So all zeros itself is an invalid encoding. We may
use bcdsub_ov with duplicated op to implement bcdinvalid.


[PATCH v5, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-19 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max. So the builtins always generate xs[min/max]dp on all
platforms.

  Compared with previous version, I added a condition check for finite_math_only
in fmin/max insn.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-06-20 Haochen Gui 

gcc/
PR target/105414
* match.pd (minmax): Skip constant folding for fmin/fmax when both
arguments are sNaN or one is sNaN and another is NaN.

gcc/testsuite/
PR target/105414
* gcc.dg/pr105414.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..ae0dd98f0f9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
+ (match_operand:SFDF 2 "vsx_register_operand" "wa")]
+FMINMAX))]
+  "TARGET_VSX && !flag_finite_math_only"
+  "xsdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605.c
new file mode 100644
index 000..e43ac40c2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O1 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include 
+
+double test1 (double d0, double d1)
+{
+  return fmin (d0, d1);
+}
+
+float test2 (float d0, float d1)
+{
+  return fmin (d0, d1);
+}
+
+double test3 (double d0, double d1)
+{
+  return fmax (d0, d1);
+}
+
+float test4 (float d0, float d1)
+{
+  return fmax (d0, d1);
+}
+
+double test5 (double d0, double d1)
+{
+  return __builtin_vsx_xsmindp (d0, d1);
+}
+
+double test6 (double d0, double d1)
+{
+  return __builtin_vsx_xsmaxdp (d0, d1);
+}



Re: [PATCH v4, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-08 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 8/6/2022 下午 9:24, Segher Boessenkool wrote:
> But it regresses the code quality generated with -ffast-math (because
> the new unspecs arent't optimised like standard rtl is).  This can be
> follow-up work of course -- and the best direction is to make fmin/fmax
> generic, even!  :-)

fmin/max will be folded to MIN/MAX_EXPR when fast-math is set. So the
behavior doesn't change when fast-math is set.



[PATCH v4, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max. So the builtins always generate xs[min/max]dp on all
platforms.

  Compared with previous version, the main change is to fix indent problem.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-31 Haochen Gui 

gcc/
PR target/103605
* config/rs6000/rs6000.md (FMINMAX): New.
(minmax_op): New.
(f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
pattern to fmaxdf3.
(__builtin_vsx_xsmindp): Set pattern to fmindf3.

gcc/testsuite/
PR target/103605
* gcc.dg/powerpc/pr103605.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..42d3edf2eca 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
+ (match_operand:SFDF 2 "vsx_register_operand" "wa")]
+FMINMAX))]
+  "TARGET_VSX"
+  "xsdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605.c
new file mode 100644
index 000..e43ac40c2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O1 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include 
+
+double test1 (double d0, double d1)
+{
+  return fmin (d0, d1);
+}
+
+float test2 (float d0, float d1)
+{
+  return fmin (d0, d1);
+}
+
+double test3 (double d0, double d1)
+{
+  return fmax (d0, d1);
+}
+
+float test4 (float d0, float d1)
+{
+  return fmax (d0, d1);
+}
+
+double test5 (double d0, double d1)
+{
+  return __builtin_vsx_xsmindp (d0, d1);
+}
+
+double test6 (double d0, double d1)
+{
+  return __builtin_vsx_xsmaxdp (d0, d1);
+}


[PATCH, rs6000] Add multiply-add expand pattern [PR103109]

2022-07-24 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds an expand and several insns for multiply-add with
three 64bit operands.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-07-22  Haochen Gui  

gcc/
PR target/103109
* config/rs6000/rs6000.md (maddditi4): New pattern for
multiply-add.
(madddi4_lowpart): New.
(madddi4_lowpart_le): New.
(madddi4_highpart): New.
(madddi4_highpart_le): New.

gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c55ee7e171a..4f3b56e103e 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3226,6 +3226,97 @@ (define_insn "*maddld4"
   "maddld %0,%1,%2,%3"
   [(set_attr "type" "mul")])

+(define_expand "maddditi4"
+  [(set (match_operand:TI 0 "gpc_reg_operand")
+   (plus:TI
+ (mult:TI (any_extend:TI
+(match_operand:DI 1 "gpc_reg_operand"))
+  (any_extend:TI
+(match_operand:DI 2 "gpc_reg_operand")))
+ (any_extend:TI
+   (match_operand:DI 3 "gpc_reg_operand"]
+  "TARGET_POWERPC64 && TARGET_MADDLD"
+{
+  rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0);
+  rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8);
+
+  if (BYTES_BIG_ENDIAN)
+{
+  emit_insn (gen_madddi4_lowpart (op0_lo, operands[1], operands[2],
+operands[3]));
+  emit_insn (gen_madddi4_highpart (op0_hi, operands[1], operands[2],
+ operands[3]));
+}
+  else
+{
+  emit_insn (gen_madddi4_lowpart_le (op0_lo, operands[1], operands[2],
+   operands[3]));
+  emit_insn (gen_madddi4_highpart_le (op0_hi, operands[1], operands[2],
+operands[3]));
+}
+  DONE;
+})
+
+(define_insn "madddi4_lowpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI
+  (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI
+  (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI
+ (match_operand:DI 3 "gpc_reg_operand" "r")))
+8))]
+  "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN"
+  "maddld %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
+(define_insn "madddi4_lowpart_le"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI
+  (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI
+  (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI
+ (match_operand:DI 3 "gpc_reg_operand" "r")))
+0))]
+  "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN"
+  "maddld %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
+(define_insn "madddi4_highpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI
+  (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI
+  (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI
+ (match_operand:DI 3 "gpc_reg_operand" "r")))
+0))]
+  "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN"
+  "maddhd %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
+(define_insn "madddi4_highpart_le"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI
+  (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI
+  (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI
+ (match_operand:DI 3 "gpc_reg_operand" "r")))
+8))]
+  "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN"
+  "maddhd %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
 (define_insn "udiv3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c 
b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 000..256e05d5677
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-require-effective-target powerpc_p9modulo_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */
+
+__int128 test (long a, 

[PATCH v3] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

2022-07-22 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch creates a new function - change_pseudo_and_mask. If recog fails,
the function converts a single pseudo to the pseudo AND with a mask if the
outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
conversion helps pattern to match rotate and mask insn on some targets.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-07-22  Haochen Gui  

gcc/
PR target/93453
* combine.cc (change_pseudo_and_mask): New.
(recog_for_combine): If recog fails, try again with the pattern
modified by change_pseudo_and_mask.
* config/rs6000/rs6000.md (plus_ior_xor): Remove.
(anonymous split pattern for plus_ior_xor): Remove.

gcc/testsuite/
PR target/93453
* gcc.target/powerpc/pr93453-2.c: New.
* gcc.target/powerpc/rlwimi-2.c: Both 32/64 bit platforms generate the
same number of rlwimi.  Reset the counter.

patch.diff
diff --git a/gcc/combine.cc b/gcc/combine.cc
index a5fabf397f7..e1c1aa7da1c 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -11599,6 +11599,48 @@ change_zero_ext (rtx pat)
   return changed;
 }

+/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
+   ASHIFT/AND, convert a pseudo to pseudo AND with a mask if its nonzero_bits
+   is less than its mode mask.  The nonzero_bits in later passes is not a
+   superset of what is known in combine pass.  So an insn with nonzero_bits
+   can't be recoged later.  */
+static bool
+change_pseudo_and_mask (rtx pat)
+{
+  rtx src = SET_SRC (pat);
+  if ((GET_CODE (src) == IOR
+   || GET_CODE (src) == XOR
+   || GET_CODE (src) == PLUS)
+  && (((GET_CODE (XEXP (src, 0)) == ASHIFT
+   || GET_CODE (XEXP (src, 0)) == AND)
+  && REG_P (XEXP (src, 1)
+{
+  rtx reg = XEXP (src, 1);
+  machine_mode mode = GET_MODE (reg);
+  unsigned HOST_WIDE_INT nonzero = nonzero_bits (reg, mode);
+  if (nonzero < GET_MODE_MASK (mode))
+   {
+ int shift;
+
+ if (GET_CODE (XEXP (src, 0)) == ASHIFT)
+   shift = INTVAL (XEXP (XEXP (src, 0), 1));
+ else
+   shift = ctz_hwi (INTVAL (XEXP (XEXP (src, 0), 1)));
+
+ if (shift > 0
+ && (HOST_WIDE_INT_1U << shift) - 1 >= nonzero)
+   {
+ unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << shift) - 1;
+ rtx x = gen_rtx_AND (mode, reg, GEN_INT (mask));
+ SUBST (XEXP (SET_SRC (pat), 1), x);
+ maybe_swap_commutative_operands (SET_SRC (pat));
+ return true;
+   }
+   }
+}
+  return false;
+}
+
 /* Like recog, but we receive the address of a pointer to a new pattern.
We try to match the rtx that the pointer points to.
If that fails, we may try to modify or replace the pattern,
@@ -11646,7 +11688,10 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx 
*pnotes)
}
}
   else
-   changed = change_zero_ext (pat);
+   {
+ changed = change_pseudo_and_mask (pat);
+ changed |= change_zero_ext (pat);
+   }
 }
   else if (GET_CODE (pat) == PARALLEL)
 {
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 1367a2cb779..2bd6bd5f908 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4207,24 +4207,6 @@ (define_insn_and_split "*rotl3_insert_3_"
(ior:GPR (and:GPR (match_dup 3) (match_dup 4))
 (ashift:GPR (match_dup 1) (match_dup 2])

-(define_code_iterator plus_ior_xor [plus ior xor])
-
-(define_split
-  [(set (match_operand:GPR 0 "gpc_reg_operand")
-   (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
- (match_operand:SI 2 "const_int_operand"))
- (match_operand:GPR 3 "gpc_reg_operand")))]
-  "nonzero_bits (operands[3], mode)
-   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
-  [(set (match_dup 0)
-   (ior:GPR (and:GPR (match_dup 3)
- (match_dup 4))
-(ashift:GPR (match_dup 1)
-(match_dup 2]
-{
-  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
-})
-
 (define_insn "*rotlsi3_insert_4"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
(ior:SI (and:SI (match_operand:SI 3 "gpc_reg_operand" "0")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr93453-2.c
new file mode 100644
index 000..a83a6511653
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long foo (char a, long b)
+{
+  long c = a;
+  c = c | (b << 12);
+  return c;
+}
+
+long bar (long b, char a)
+{
+  long c = a;
+  long m = -4096;
+  c = c | (b & m);
+  return c;
+}
+
+/* { dg-final { scan-assembler-times {\mrl[wd]imi\M} 2 } } 

Re: [PATCH v3, rs6000] Disable TImode from Bool expanders [PR100694, PR93123]

2022-07-18 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,
  Thanks for your comments.

On 13/7/2022 上午 1:26, Segher Boessenkool wrote:
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -7078,27 +7078,38 @@ (define_expand "subti3"
>>  })
>>  
>>  ;; 128-bit logical operations expanders
>> +;; Fail TImode in all 128-bit logical operations expanders and split it into
>> +;; two DI registers.
>>
>>  (define_expand "and3"
>>[(set (match_operand:BOOL_128 0 "vlogical_operand")
>>  (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
>>(match_operand:BOOL_128 2 "vlogical_operand")))]
>>""
>> -  "")
>> +{
>> +  if (mode == TImode)
>> +FAIL;
>> +})
> It is better to not FAIL it, but simply not have a pattern for the
> TImode version at all.
> 
> Does nothing depend on the :TI version to exist?
> 
> What about the :PTI version?  Getting rid of that as well will allow
> some nice optimisations.
> 
> Of course we *do* have instructions to do such TImode ops, on newer
> CPUs, but in vector registers only.  It isn't obvious what is faster.
> 

During expand, TI mode is split to two registers when it can't match
any expands. So I failed TI mode in each expand and expect to be
split at expand. TI mode is still in some insn_and_split patterns
(e.g. "*and3_internal"). If later rtl passes generate TI mode
logical operations, they still can be matched.

Originally, the TI mode is split after reload pass by
rs6000_split_logical. It's too late to catch some rtl optimizations.

For the PTI, it can't be split to two registers during expand. PTI
requires an even/odd register pair. So splitting it after reload can
make sure it gets correct registers, I think.

>From my understanding, it's sub-optimal to use vector logical operation
instructions for TI mode if the destination is an integer operand. It
needs three instructions (move to vector register, vector logical
operation and move from vector register). When splitting TImode, it only
needs two logical instructions on two separate registers.

Thanks again
Gui Haochen


[PATCH v2, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-05-11 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max. So the builtins always generate xs[min/max]dp on all
platforms.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-11 Haochen Gui 

gcc/
PR target/103605
* rs6000.md (FMINMAX): New.
(minmax_op): New.
(f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
* rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set pattern to
fmaxdf3.
(__builtin_vsx_xsmindp): Set pattern to fmindf3.

gcc/testsuite/
PR target/103605
* gcc.dg/pr103605.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 64049a6e521..53c94456eb5 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5350,6 +5352,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "")
+ (match_operand:SFDF 2 "vsx_register_operand" "")]
+ FMINMAX))]
+"TARGET_VSX"
+"xsdp %x0,%x1,%x2"
+[(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605.c
new file mode 100644
index 000..e43ac40c2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O1 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include 
+
+double test1 (double d0, double d1)
+{
+  return fmin (d0, d1);
+}
+
+float test2 (float d0, float d1)
+{
+  return fmin (d0, d1);
+}
+
+double test3 (double d0, double d1)
+{
+  return fmax (d0, d1);
+}
+
+float test4 (float d0, float d1)
+{
+  return fmax (d0, d1);
+}
+
+double test5 (double d0, double d1)
+{
+  return __builtin_vsx_xsmindp (d0, d1);
+}
+
+double test6 (double d0, double d1)
+{
+  return __builtin_vsx_xsmaxdp (d0, d1);
+}



[PATCH, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-05-08 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-09 Haochen Gui 

gcc/
PR target/103605
* rs6000.md (unspec): Add UNSPEC_FMAX and UNSPEC_FMIN.
(fminmax): New.
(minmax_op): Likewise.
(3): New pattern.  Implemented by UNSPEC_FMAX and
UNSPEC_FMIN.

gcc/testsuite/
PR target/103605
* gcc.dg/pr103605.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fdfbc6566a5..8aae3e80bcd 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5350,6 +5352,25 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr fminmax [(UNSPEC_FMAX "fmax")
+ (UNSPEC_FMIN "fmin")])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "")
+ (match_operand:SFDF 2 "vsx_register_operand" "")]
+ FMINMAX))]
+"TARGET_VSX"
+"xsdp %x0,%x1,%x2"
+[(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605.c
new file mode 100644
index 000..a40da064742
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O1 -mvsx" } */
+/* { dg-final { scan-assembler-times "xsmaxdp" 2 } } */
+/* { dg-final { scan-assembler-times "xsmindp" 2 } } */
+
+#include 
+
+double test1 (double d0, double d1)
+{
+  return fmin (d0, d1);
+}
+
+float test2 (float d0, float d1)
+{
+  return fmin (d0, d1);
+}
+
+double test3 (double d0, double d1)
+{
+  return fmax (d0, d1);
+}
+
+float test4 (float d0, float d1)
+{
+  return fmax (d0, d1);
+}


[PATCH v2] Skip constant folding for fmin/max when either argument is sNaN [PR105414]

2022-05-10 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch skips constant folding for fmin/max when either argument
is sNaN. According to C standard,
   fmin(sNaN, sNaN)= qNaN, fmin(sNaN, NaN) = qNaN
   So signaling NaN should be tested and skipped for fmin/max in match.pd.

   The V2 patch splits the for loop and keeps MIN/MAX_EXPR unchanged.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-10 Haochen Gui 

gcc/
PR target/105414
* match.pd (minmax): Skip constant folding for fmin/fmax when both
arguments are sNaN or one is sNaN and another is NaN.

gcc/testsuite/
PR target/105414
* gcc.dg/pr105414.c: New.


patch.diff
diff --git a/gcc/match.pd b/gcc/match.pd
index 6d691d302b3..6fb8806412a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3095,10 +3095,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)

 /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax().  */

-(for minmax (min max FMIN_ALL FMAX_ALL)
+(for minmax (min max)
  (simplify
   (minmax @0 @0)
   @0))
+/* For fmin() and fmax(), skip folding when both are sNaN.  */
+(for minmax (FMIN_ALL FMAX_ALL)
+ (simplify
+  (minmax @0 @0)
+  (if (!tree_expr_maybe_signaling_nan_p (@0))
+@0)))
 /* min(max(x,y),y) -> y.  */
 (simplify
  (min:c (max:c @0 @1) @1)
@@ -3198,12 +3204,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(minmax @1 (convert @2)

 (for minmax (FMIN_ALL FMAX_ALL)
- /* If either argument is NaN, return the other one.  Avoid the
-transformation if we get (and honor) a signalling NaN.  */
+ /* If either argument is NaN and other one is not sNaN, return the other
+one.  Avoid the transformation if we get (and honor) a signalling NaN.  */
  (simplify
   (minmax:c @0 REAL_CST@1)
-  (if (real_isnan (TREE_REAL_CST_PTR (@1))
-   && (!HONOR_SNANS (@1) || !TREE_REAL_CST (@1).signalling))
+   (if (real_isnan (TREE_REAL_CST_PTR (@1))
+   && (!HONOR_SNANS (@1) || !TREE_REAL_CST (@1).signalling)
+   && !tree_expr_maybe_signaling_nan_p (@0))
@0)))
 /* Convert fmin/fmax to MIN_EXPR/MAX_EXPR.  C99 requires these
functions to return the numeric arg if the other one is NaN.
diff --git a/gcc/testsuite/gcc.dg/pr105414.c b/gcc/testsuite/gcc.dg/pr105414.c
new file mode 100644
index 000..78772700acf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr105414.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target { *-*-linux* *-*-gnu* } } } */
+/* { dg-options "-O1 -fsignaling-nans -lm" } */
+/* { dg-add-options ieee } */
+/* { dg-require-effective-target issignaling } */
+
+
+#define _GNU_SOURCE
+#include 
+#include 
+
+int main()
+{
+  double a = __builtin_nans ("");
+
+  if (issignaling (fmin (a, a)))
+__builtin_abort ();
+
+  if (issignaling (fmax (a, a)))
+__builtin_abort ();
+
+  double b = __builtin_nan ("");
+
+  if (issignaling (fmin (a, b)))
+__builtin_abort ();
+
+  if (issignaling (fmax (a, b)))
+__builtin_abort ();
+
+  return 0;
+}


Re: [PATCH] Skip constant folding for fmin/max when either argument is sNaN [PR105414]

2022-05-05 Thread HAO CHEN GUI via Gcc-patches



On 5/5/2022 下午 4:30, Kewen.Lin wrote:
> on 2022/5/5 16:09, Richard Biener via Gcc-patches wrote:
>> On Thu, May 5, 2022 at 10:07 AM HAO CHEN GUI via Gcc-patches
>>  wrote:
>>>
>>> Hi,
>>>This patch skips constant folding for fmin/max when either argument
>>> is sNaN. According to C standard,
>>>fmin(sNaN, sNaN)= qNaN, fmin(sNaN, NaN) = qNaN
>>>So signaling NaN should be tested and skipped for fmin/max in match.pd.
>>>
>>>Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
>>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> OK.
>>
>> Thanks,
>> Richard.
>>
>>> ChangeLog
>>>
>>> 2022-05-05 Haochen Gui 
>>>
>>> gcc/
>>> PR target/105414
>>> * match.pd (minmax): Skip constant folding for fmin/fmax when both
>>> arguments are sNaN or one is sNaN and another is NaN.
>>>
>>> gcc/testsuite/
>>> PR target/105414
>>> * gcc.dg/pr105414.c: New.
>>>
>>> patch.diff
>>>
>>> diff --git a/gcc/match.pd b/gcc/match.pd
>>> index cad61848daa..f256bcbb483 100644
>>> --- a/gcc/match.pd
>>> +++ b/gcc/match.pd
>>> @@ -3093,7 +3093,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>>  (for minmax (min max FMIN_ALL FMAX_ALL)
>>>   (simplify
>>>(minmax @0 @0)
>>> -  @0))
>>> +  /* if both are sNaN, it should return qNaN.  */
>>> +  (if (!tree_expr_maybe_signaling_nan_p (@0))
>>> +@0)))
> 
> Sorry for chiming in.
> 
> IIUC this patch is mainly for libc function fmin/fmax and the iterator here
> covers min/max and fmin/fmax.  I wonder if it's intent to make this change
> for min/max as well?
> 
> As tree.def, "if either operand is NaN, then it is unspecified", the 
> optimization
> for min/max seems still acceptable?

For MIN/MAX_EXPR, the result is undefined with NaN. So I think we shouldn't do
constant folding. We should let target decide how to deal with it. The 
"undefined"
here means the result depends on targets as far as I understand.
> 
> BR,
> Kewen


[PATCH] Skip constant folding for fmin/max when either argument is sNaN [PR105414]

2022-05-05 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch skips constant folding for fmin/max when either argument
is sNaN. According to C standard,
   fmin(sNaN, sNaN)= qNaN, fmin(sNaN, NaN) = qNaN
   So signaling NaN should be tested and skipped for fmin/max in match.pd.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog

2022-05-05 Haochen Gui 

gcc/
PR target/105414
* match.pd (minmax): Skip constant folding for fmin/fmax when both
arguments are sNaN or one is sNaN and another is NaN.

gcc/testsuite/
PR target/105414
* gcc.dg/pr105414.c: New.

patch.diff

diff --git a/gcc/match.pd b/gcc/match.pd
index cad61848daa..f256bcbb483 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3093,7 +3093,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (for minmax (min max FMIN_ALL FMAX_ALL)
  (simplify
   (minmax @0 @0)
-  @0))
+  /* if both are sNaN, it should return qNaN.  */
+  (if (!tree_expr_maybe_signaling_nan_p (@0))
+@0)))
 /* min(max(x,y),y) -> y.  */
 (simplify
  (min:c (max:c @0 @1) @1)
@@ -3193,12 +3195,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(minmax @1 (convert @2)

 (for minmax (FMIN_ALL FMAX_ALL)
- /* If either argument is NaN, return the other one.  Avoid the
-transformation if we get (and honor) a signalling NaN.  */
+ /* If either argument is NaN and other one is not sNaN, return the other
+one.  Avoid the transformation if we get (and honor) a signalling NaN.  */
  (simplify
   (minmax:c @0 REAL_CST@1)
-  (if (real_isnan (TREE_REAL_CST_PTR (@1))
-   && (!HONOR_SNANS (@1) || !TREE_REAL_CST (@1).signalling))
+   (if (real_isnan (TREE_REAL_CST_PTR (@1))
+   && (!HONOR_SNANS (@1) || !TREE_REAL_CST (@1).signalling)
+   && !tree_expr_maybe_signaling_nan_p (@0))
@0)))
 /* Convert fmin/fmax to MIN_EXPR/MAX_EXPR.  C99 requires these
functions to return the numeric arg if the other one is NaN.
diff --git a/gcc/testsuite/gcc.dg/pr105414.c b/gcc/testsuite/gcc.dg/pr105414.c
new file mode 100644
index 000..78772700acf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr105414.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target { *-*-linux* *-*-gnu* } } } */
+/* { dg-options "-O1 -fsignaling-nans -lm" } */
+/* { dg-add-options ieee } */
+/* { dg-require-effective-target issignaling } */
+
+
+#define _GNU_SOURCE
+#include 
+#include 
+
+int main()
+{
+  double a = __builtin_nans ("");
+
+  if (issignaling (fmin (a, a)))
+__builtin_abort ();
+
+  if (issignaling (fmax (a, a)))
+__builtin_abort ();
+
+  double b = __builtin_nan ("");
+
+  if (issignaling (fmin (a, b)))
+__builtin_abort ();
+
+  if (issignaling (fmax (a, b)))
+__builtin_abort ();
+
+  return 0;
+}


Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-24 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 24/8/2022 下午 1:24, Kewen.Lin wrote:
> Could you try to test with dg-options "-mdejagnu-cpu=power9 -mpowerpc64" all 
> the time, but still
> having that has_arch_ppc64 effective target on aix?
> 
> I'd expect has_arch_ppc64 check to fail on aix 32bit, the error will not be a 
> problem (turning
> into an UNSUPPORTED then)?

I tested it on AIX. "has_arch_ppc64" fails with dg-options 
"-mdejagnu-cpu=power9 -mpowerpc64" on
32-bit AIX environment. It works as we expected.

Also I found that AIX and Darwin are skipped for bfp test. So in testcase, it's 
no need to care
about them. Not sure if it's intention.

In bfp.exp

# Exit immediately if this isn't a PowerPC target or if the target is
# aix or Darwin.
if { (![istarget powerpc*-*-*] && ![istarget rs6000-*-*])
 || [istarget "powerpc*-*-aix*"]
 || [istarget "powerpc*-*-darwin*"]  } then {
  return
}


[PATCH v2, rs6000] Put dg-options before effective target checks

2022-08-31 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the sequence of test directives for 3 test cases.
Originally, these 3 cases got failed or unsupported on some platforms, as
their effective target checks depend on compiling options.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Thanks
Gui Haochen

ChangeLog
2022-08-31  Haochen Gui  

rs6000: Change the sequence of test directives for some test cases.  Put
dg-options before effective target checks as those has_arch_* adopt
current_compiler_flags in their checks and rely on compiling options to get an
accurate check.  dg-options setting before dg-require-effective-target are
added into current_compiler_flags, but not added if they're after.  So
adjusting the location of dg-options makes the check more robust.

gcc/testsuite/
* gcc.target/powerpc/pr92398.p9+.c: Put dg-options before effective
target check.  Replace lp64 check with has_arch_ppc64 and int128.
* gcc.target/powerpc/pr92398.p9-.c: Likewise.
* gcc.target/powerpc/pr93453-1.c: Put dg-options before effective
target check.


patch.diff
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
index 72dd1d9a274..b4f5c7f4b82 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
@@ -1,6 +1,10 @@
-/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -mvsx" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+/* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
+/* The test case can be compiled on all platforms with compiling option
+   -mdejagnu-cpu=power9.  */

 /* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
 /* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
index bd7fa98af51..4e6a8c8cb8e 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
@@ -1,6 +1,8 @@
-/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
 /* { dg-options "-O2 -mvsx" } */
+/* { dg-do compile { target { ! has_arch_pwr9 } } } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+/* { dg-require-effective-target powerpc_vsx_ok } */

 /* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */
 /* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} 
&& has_arch_pwr8 } && be } } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
index b396458ba12..6f4d899c114 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
@@ -1,5 +1,6 @@
-/* { dg-do compile { target has_arch_ppc64 } } */
+/* { dg-do compile } */
 /* { dg-options "-mdejagnu-cpu=power6 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 unsigned long load_byte_reverse (unsigned long *in)
 {


Re: [PATCH v2, rs6000] Put dg-options before effective target checks

2022-09-01 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,
  Thanks for your review comments. I will refine it according to
your comments.

On 2/9/2022 上午 12:07, Segher Boessenkool wrote:
>> +/* { dg-do compile { target { ! has_arch_pwr9 } } } */
> Please keep dg-do first thing in the file.
Could you inform me if it's a must to put dg-do in the first line?
Here I hit a problem. "! has_arch_pwr9" can not be put into
dg-require-effective-target as it has a NOT. So I put dg-options
in the first line and make it ahead of dg-do.

> 
>> --- a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
>> @@ -1,5 +1,6 @@
>> -/* { dg-do compile { target has_arch_ppc64 } } */
>> +/* { dg-do compile } */
>>  /* { dg-options "-mdejagnu-cpu=power6 -O2" } */
>> +/* { dg-require-effective-target has_arch_ppc64 } */
> This is fine, but it doesn't change anything, unless we have a bug.

This case suffer from "empty translation unit" problem and to be
unsupported on all platform. Put dg-options before the check avoid
the problem.

Thanks
Gui Haochen


Re: [PATCH v2, rs6000] Put dg-options before effective target checks

2022-09-01 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

On 1/9/2022 下午 5:34, Kewen.Lin wrote:
> Thanks for the updated patch!
> 
> I just found that it seems all the three test cases suffer the empty
> TU error issue from those has_arch* effective target checks?
> 
> If yes, it looks we don't need to bother this once patch [1] gets
> landed?
> 
> Sorry, I didn't notice and ask when reviewing the previous version.
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598748.html

Yes, those 3 test cases all suffer from "empty translation unit" problem.
My patch just has an side effect which avoid "empty translation unit"
problem. But the real problem is still there.

pr92398.p9+.c has another problem. It's a compiling case and it should be
compiled on any platform when "-mdejagnu-cpu=power9" is set in dg-options
or RUNTESTFLAGS. Putting dg-options before "has_arch_pwr9" check achieves
this target.

Thanks
Gui Haochen



[PATCH v2, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-09-02 Thread HAO CHEN GUI via Gcc-patches
Hi,

  This patch is for internal issue1136. It changes insn condition from
TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions.
These instructions all use DI registers and can be invoked with -mpowerpc64
in a 32-bit environment.

  This patch also changes prototypes of related built-ins and effective
target of test cases.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-09-01  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
long long.
(__builtin_vsx_scalar_extract_sig): Likewise.
* config/rs6000/vsx.md (xsxexpdp): Change insn condition from
TARGET_64BIT to TARGET_POWERPC64.
(xsxsigdp): Likewise.
(xsiexpdp): Likewise.
(xsiexpdpf): Likewise.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Change effective
target from lp64 to has_arch_ppc64 and add -mpowerpc64 for 32-bit
environment.
* gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-exp-7.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-7.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..4ebfd4704a1 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2847,10 +2847,10 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
+  const unsigned long long __builtin_vsx_scalar_extract_exp (double);
 VSEEDP xsxexpdp {}

-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..a01711aa2cb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5098,7 +5098,7 @@ (define_insn "xsxexpdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsxexpdp %0,%x1"
   [(set_attr "type" "integer")])

@@ -5116,7 +5116,7 @@ (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXSIG))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsxsigdp %0,%x1"
   [(set_attr "type" "integer")])

@@ -5147,7 +5147,7 @@ (define_insn "xsiexpdp"
(unspec:DF [(match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

@@ -5157,7 +5157,7 @@ (define_insn "xsiexpdpf"
(unspec:DF [(match_operand:DF 1 "register_operand" "r")
(match_operand:DI 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
index 35bf1b240f3..81565c50ec7 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
@@ -1,7 +1,8 @@
-/* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
-/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-additional-options "-mpowerpc64" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-6.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-6.c
index b9dd7d61aae..33e55d5abc1 100644
--- 

[PATCH v3, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-09-07 Thread HAO CHEN GUI via Gcc-patches
Hi,

  For scalar extract/insert instructions, exponent field can be stored in a
32-bit register. So this patch changes the mode of exponent field from DI to
SI. The instructions using DI registers can be invoked with -mpowerpc64 in a
32-bit environment. The patch changes insn condition from TARGET_64BIT to
TARGET_POWERPC64 for those instructions.

  This patch also changes prototypes of relevant built-ins and effective
target of test cases.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-09-07  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
int.
(__builtin_vsx_scalar_extract_sig): Set return type to const unsigned
long long.
(__builtin_vsx_scalar_insert_exp): Set type of second argument to
unsigned int.
(__builtin_vsx_scalar_insert_exp_dp): Likewise.
* config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to
SImode.  Remove TARGET_64BIT from insn condition.
(xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64.
(xsiexpdp): Change insn condition from TARGET_64BIT to
TARGET_POWERPC64.  Set mode of third operand to SImode.
(xsiexpdpf): Set mode of third operand to SImode.  Remove TARGET_64BIT
from insn condition.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Change effective
target from lp64 to has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..ca2a1d7657e 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2847,17 +2847,17 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
+  const unsigned int __builtin_vsx_scalar_extract_exp (double);
 VSEEDP xsxexpdp {}

-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
-unsigned long long);
+   unsigned int);
 VSIEDP xsiexpdp {}

-  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long);
+  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int);
 VSIEDPF xsiexpdpf {}

   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..9d3a2340a79 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5095,10 +5095,10 @@ (define_insn "xsxexpqp_"

 ;; VSX Scalar Extract Exponent Double-Precision
 (define_insn "xsxexpdp"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-   (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (unspec:SI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR"
   "xsxexpdp %0,%x1"
   [(set_attr "type" "integer")])

@@ -5116,7 +5116,7 @@ (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXSIG))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsxsigdp %0,%x1"
   [(set_attr "type" "integer")])

@@ -5145,9 +5145,9 @@ (define_insn "xsiexpqp_"
 (define_insn "xsiexpdp"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DI 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:SI 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

@@ -5155,9 +5155,9 @@ (define_insn "xsiexpdp"
 (define_insn "xsiexpdpf"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DF 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:SI 2 

Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-23 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,

On 23/8/2022 下午 10:26, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Aug 19, 2022 at 10:35:54AM +0800, HAO CHEN GUI wrote:
>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>> @@ -1,7 +1,8 @@
>>  /* { dg-do compile { target { powerpc*-*-* } } } */
>> -/* { dg-require-effective-target lp64 } */
>> -/* { dg-require-effective-target powerpc_p9vector_ok } */
>>  /* { dg-options "-mdejagnu-cpu=power9" } */
>> +/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && 
>> ilp32 } } } */
> 
> You can add this always.  It is default on 64-bit systems, but it is
> simpler to just always add it:
> /* { dg-additional-options "-mpowerpc64" } */
> 
> Or are there subtargets that will error on this?
Yes, AIX fails if TARGET_POWERPC64 is set and TARGET_64BIT is not set.
So I add "-mpowerpc64" for Linux 32-bit environment.

  if (TARGET_POWERPC64 && ! TARGET_64BIT)   \
{   \
  error ("%<-maix64%> required: 64-bit computation with 32-bit addressing 
not yet supported"); \
}

Thanks a lot
Gui Haochen


Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-24 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

On 24/8/2022 下午 1:24, Kewen.Lin wrote:
> Could you try to test with dg-options "-mdejagnu-cpu=power9 -mpowerpc64" all 
> the time, but still
> having that has_arch_ppc64 effective target on aix?
> 
> I'd expect has_arch_ppc64 check to fail on aix 32bit, the error will not be a 
> problem (turning
> into an UNSUPPORTED then)?

Good point. I will get an AIX to test it.

Thanks
Gui Haochen


[PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-18 Thread HAO CHEN GUI via Gcc-patches
Hi,

  This patch is for internal issue1136. It changes insn condition from
TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions.
These instructions all use DI registers and can be invoked with -mpowerpc64
in a 32-bit environment.

  This patch also changes prototypes of related built-ins and target selector
of test cases.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.


ChangeLog
2022-08-19  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
long long.
(__builtin_vsx_scalar_extract_sig): Likewise.
* config/rs6000/vsx.md (xsxexpdp): Change insn condition from
TARGET_64BIT to TARGET_POWERPC64.
(xsxsigdp): Likewise.
(xsiexpdp): Likewise.
(xsiexpdpf): Likewise.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Change effective
target from lp64 to has_arch_ppc64 and add -mpowerpc64 for 32-bit
environment.
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.


patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..4ebfd4704a1 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2847,10 +2847,10 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
+  const unsigned long long __builtin_vsx_scalar_extract_exp (double);
 VSEEDP xsxexpdp {}

-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..a01711aa2cb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5098,7 +5098,7 @@ (define_insn "xsxexpdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsxexpdp %0,%x1"
   [(set_attr "type" "integer")])

@@ -5116,7 +5116,7 @@ (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXSIG))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsxsigdp %0,%x1"
   [(set_attr "type" "integer")])

@@ -5147,7 +5147,7 @@ (define_insn "xsiexpdp"
(unspec:DF [(match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

@@ -5157,7 +5157,7 @@ (define_insn "xsiexpdpf"
(unspec:DF [(match_operand:DF 1 "register_operand" "r")
(match_operand:DI 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
index 35bf1b240f3..c9190bc7c6c 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
@@ -1,7 +1,8 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
-/* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && ilp32 
} } } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
index 637080652b7..a391ac8cce3 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
@@ -1,7 +1,8 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
-/* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && ilp32 
} } } */
+/* { 

Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-25 Thread HAO CHEN GUI via Gcc-patches
Hi David,

On 25/8/2022 下午 10:01, David Edelsohn wrote:
> On Thu, Aug 25, 2022 at 1:22 AM Kewen.Lin  wrote:
>>
>> on 2022/8/25 11:37, HAO CHEN GUI wrote:
>>> Hi,
>>>
>>> On 24/8/2022 下午 1:24, Kewen.Lin wrote:
>>>> Could you try to test with dg-options "-mdejagnu-cpu=power9 -mpowerpc64" 
>>>> all the time, but still
>>>> having that has_arch_ppc64 effective target on aix?
>>>>
>>>> I'd expect has_arch_ppc64 check to fail on aix 32bit, the error will not 
>>>> be a problem (turning
>>>> into an UNSUPPORTED then)?
>>>
>>> I tested it on AIX. "has_arch_ppc64" fails with dg-options 
>>> "-mdejagnu-cpu=power9 -mpowerpc64" on
>>> 32-bit AIX environment. It works as we expected.
>>
>> Nice, thanks for your time on testing.
>>
>>>
>>> Also I found that AIX and Darwin are skipped for bfp test. So in testcase, 
>>> it's no need to care
>>> about them. Not sure if it's intention.
>>>
>>> In bfp.exp
>>>
>>> # Exit immediately if this isn't a PowerPC target or if the target is
>>> # aix or Darwin.
>>> if { (![istarget powerpc*-*-*] && ![istarget rs6000-*-*])
>>>  || [istarget "powerpc*-*-aix*"]
>>>  || [istarget "powerpc*-*-darwin*"]  } then {
>>>   return
>>> }
>>
>> I can't find a hint about why we wanted to disable bfp testing on aix, it 
>> looks like a overkill to me.
>>
>> Could you help to further test if all test cases in this small bucket 
>> available on aix?
>>
>> Maybe it can give us some evidences on why it's intentional or not.
>>
>> Hi David & Segher,
>>
>> Do you have some insights on this?
> 
> AIX (and Darwin) are not Linux and not ELF.  There is no support for
> BPF.  All of the tests fail, so they are skipped.

Thanks so much for your info.

Here are test results on P7 AIX7.1. I tested all scalar-extract-sig-* and 
scalar-insert-exp-* cases in
"testsuite/powerpc/bfp" fold. All compiling cases pass except those use 
__ieee128. The runnable cases
fail as expected. p9vector is not supported on P7 servers.

So the __ieee128 blocks Binary floating-point on AIX?

Thanks
Gui Haochen
> 
> Thanks, David


[PATCH, rs6000] Put dg-options ahead of target selector checks

2022-08-26 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the sequence of test directives for 3 cases. Originally,
these 3 cases got failed or unsupported on some platforms, as their target
selector checks depend on compiling options.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Thanks
Gui Haochen

ChangeLog
2022-08-26  Haochen Gui  

rs6000: Change the sequence of test directives for some test cases.  Put
dg-options ahead of target selector checks as the compiling options affect the
result of these checks.

gcc/testsuite/
* gcc.target/powerpc/pr92398.p9+.c: Put dg-options ahead of target
selector check.
* gcc.target/powerpc/pr92398.p9-.c: Likewise.
* gcc.target/powerpc/pr93453-1.c: Likewise.


patch.diff
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
index 72dd1d9a274..4e4fad620e8 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
@@ -1,6 +1,8 @@
-/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -mvsx" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+/* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */

 /* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
 /* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
index bd7fa98af51..4e6a8c8cb8e 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
@@ -1,6 +1,8 @@
-/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
 /* { dg-options "-O2 -mvsx" } */
+/* { dg-do compile { target { ! has_arch_pwr9 } } } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+/* { dg-require-effective-target powerpc_vsx_ok } */

 /* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */
 /* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} 
&& has_arch_pwr8 } && be } } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
index b396458ba12..6f4d899c114 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
@@ -1,5 +1,6 @@
-/* { dg-do compile { target has_arch_ppc64 } } */
+/* { dg-do compile } */
 /* { dg-options "-mdejagnu-cpu=power6 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 unsigned long load_byte_reverse (unsigned long *in)
 {



Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-19 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

On 19/8/2022 上午 11:01, Kewen.Lin wrote:
> Maybe we should add one comment here (also the other touched case) or
> in the commit log saying why we reorder the dg-require-effective-target
> and dg-options, since the reason isn't obvious.  :)

Sure, I will explain it in commit log. I submitted an internal issue for
this problem too.

Thanks for your review comments.

Gui Haochen


[PATCH-2, rs6000] Reverse V8HI on Power8 by vector rotation [PR100866]

2022-10-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements V8HI byte reverse on Power8 by vector rotation.
It should be effecient than orignial vector permute. The patch comes from
Xionghu's comments in PR. I just added a test case for it.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.



ChangeLog
2022-10-24  Xionghu Luo 

gcc/
PR target/100866
* config/rs6000/altivec.md: (*altivec_vrl): Named to...
(altivec_vrl): ...this.
* config/rs6000/vsx.md (revb_): Call vspltish and vrlh when
target is Power8 and mode is V8HI.

gcc/testsuite/
PR target/100866
* gcc.target/powerpc/pr100866-2.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2c4940f2e21..84660073f32 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1875,7 +1875,7 @@ (define_insn "altivec_vpkuum_direct"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vrl"
+(define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
(match_operand:VI2 2 "register_operand" "v")))]
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..34662a7252d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6092,12 +6092,21 @@ (define_expand "revb_"
 emit_insn (gen_p9_xxbr_ (operands[0], operands[1]));
   else
 {
-  /* Want to have the elements in reverse order relative
-to the endian mode in use, i.e. in LE mode, put elements
-in BE order.  */
-  rtx sel = swap_endian_selector_for_mode(mode);
-  emit_insn (gen_altivec_vperm_ (operands[0], operands[1],
-  operands[1], sel));
+  if (mode == V8HImode)
+   {
+ rtx splt = gen_reg_rtx (V8HImode);
+ emit_insn (gen_altivec_vspltish (splt, GEN_INT (8)));
+ emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt));
+   }
+  else
+   {
+ /* Want to have the elements in reverse order relative
+to the endian mode in use, i.e. in LE mode, put elements
+in BE order.  */
+ rtx sel = swap_endian_selector_for_mode (mode);
+ emit_insn (gen_altivec_vperm_ (operands[0], operands[1],
+  operands[1], sel));
+   }
 }

   DONE;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100866-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr100866-2.c
new file mode 100644
index 000..4357d1beb09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100866-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-final { scan-assembler {\mvspltish\M} } } */
+/* { dg-final { scan-assembler {\mvrlh\M} } } */
+
+#include 
+
+vector unsigned short revb(vector unsigned short a)
+{
+   return vec_revb(a);
+}
+


Ping^3 [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-09-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
Thanks.

On 1/8/2022 上午 10:03, HAO CHEN GUI wrote:
> Hi,
>Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
> Thanks.
> 
> 
> On 4/7/2022 下午 2:32, HAO CHEN GUI wrote:
>> Hi,
>>Gentle ping this:
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
>> Thanks.
>>
>> On 24/6/2022 上午 10:02, HAO CHEN GUI wrote:
>>> Hi,
>>>   This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
>>> Tests show that outputs of xs[min/max]dp are consistent with the standard
>>> of C99 fmin/max.
>>>
>>>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
>>> of smin/max. So the builtins always generate xs[min/max]dp on all
>>> platforms.
>>>
>>>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
>>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>>
>>> ChangeLog
>>> 2022-06-24 Haochen Gui 
>>>
>>> gcc/
>>> PR target/103605
>>> * config/rs6000/rs6000.md (FMINMAX): New.
>>> (minmax_op): New.
>>> (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
>>> * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
>>> pattern to fmaxdf3.
>>> (__builtin_vsx_xsmindp): Set pattern to fmindf3.
>>>
>>> gcc/testsuite/
>>> PR target/103605
>>> * gcc.dg/powerpc/pr103605.c: New.
>>>
>>>
>>> patch.diff
>>> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
>>> b/gcc/config/rs6000/rs6000-builtins.def
>>> index f4a9f24bcc5..8b735493b40 100644
>>> --- a/gcc/config/rs6000/rs6000-builtins.def
>>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>>> @@ -1613,10 +1613,10 @@
>>>  XSCVSPDP vsx_xscvspdp {}
>>>
>>>const double __builtin_vsx_xsmaxdp (double, double);
>>> -XSMAXDP smaxdf3 {}
>>> +XSMAXDP fmaxdf3 {}
>>>
>>>const double __builtin_vsx_xsmindp (double, double);
>>> -XSMINDP smindf3 {}
>>> +XSMINDP fmindf3 {}
>>>
>>>const double __builtin_vsx_xsrdpi (double);
>>>  XSRDPI vsx_xsrdpi {}
>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>> index bf85baa5370..ae0dd98f0f9 100644
>>> --- a/gcc/config/rs6000/rs6000.md
>>> +++ b/gcc/config/rs6000/rs6000.md
>>> @@ -158,6 +158,8 @@ (define_c_enum "unspec"
>>> UNSPEC_HASHCHK
>>> UNSPEC_XXSPLTIDP_CONST
>>> UNSPEC_XXSPLTIW_CONST
>>> +   UNSPEC_FMAX
>>> +   UNSPEC_FMIN
>>>])
>>>
>>>  ;;
>>> @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
>>>DONE;
>>>  })
>>>
>>> +
>>> +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
>>> +
>>> +(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
>>> +(UNSPEC_FMIN "min")])
>>> +
>>> +(define_insn "f3"
>>> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
>>> +   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
>>> + (match_operand:SFDF 2 "vsx_register_operand" "wa")]
>>> +FMINMAX))]
>>> +  "TARGET_VSX && !flag_finite_math_only"
>>> +  "xsdp %x0,%x1,%x2"
>>> +  [(set_attr "type" "fp")]
>>> +)
>>> +
>>>  (define_expand "movcc"
>>> [(set (match_operand:GPR 0 "gpc_reg_operand")
>>>  (if_then_else:GPR (match_operand 1 "comparison_operator")
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
>>> b/gcc/testsuite/gcc.target/powerpc/pr103605.c
>>> new file mode 100644
>>> index 000..1c938d40e61
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
>>> @@ -0,0 +1,37 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target powerpc_vsx_ok } */
>>> +/* { dg-options "-O2 -mvsx" } */
>>> +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
>>> +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
>>> +
>>> +#include 
>>> +
>>> +double test1 (double d0, double d1)
>>> +{
>>> +  return fmin (d0, d1);
>>> +}
>>> +
>>> +float test2 (float d0, float d1)
>>> +{
>>> +  return fmin (d0, d1);
>>> +}
>>> +
>>> +double test3 (double d0, double d1)
>>> +{
>>> +  return fmax (d0, d1);
>>> +}
>>> +
>>> +float test4 (float d0, float d1)
>>> +{
>>> +  return fmax (d0, d1);
>>> +}
>>> +
>>> +double test5 (double d0, double d1)
>>> +{
>>> +  return __builtin_vsx_xsmindp (d0, d1);
>>> +}
>>> +
>>> +double test6 (double d0, double d1)
>>> +{
>>> +  return __builtin_vsx_xsmaxdp (d0, d1);
>>> +}


Ping^3 [PATCH v2, rs6000] Use CC for BCD operations [PR100736]

2022-09-20 Thread HAO CHEN GUI via Gcc-patches
 Hi,
 Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
Thanks.

On 1/8/2022 上午 10:02, HAO CHEN GUI wrote:
> Hi,
> Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
> Thanks.
> 
> On 4/7/2022 下午 2:33, HAO CHEN GUI wrote:
>> Hi,
>>Gentle ping this:
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
>> Thanks.
>>
>> On 22/6/2022 下午 4:26, HAO CHEN GUI wrote:
>>> Hi,
>>>   This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
>>> math flag has no impact on BCD operations. To support BCD overflow and
>>> invalid coding, an UNSPEC is defined to move the bit to a general register.
>>> The patterns of condition branch and return with overflow bit are defined as
>>> the UNSPEC and branch/return can be combined to one jump insn. The split
>>> pattern of overflow bit extension is define for optimization.
>>>
>>>   This patch also replaces bcdadd with bcdsub for BCD invaliding coding
>>> expand.
>>>
>>> ChangeLog
>>> 2022-06-22 Haochen Gui 
>>>
>>> gcc/
>>> PR target/100736
>>> * config/rs6000/altivec.md (BCD_TEST): Remove unordered.
>>> (bcd_): Replace CCFP with CC.
>>> (*bcd_test_): Replace CCFP with CC.  Generate
>>> condition insn with CC mode.
>>> (bcd_overflow_): New.
>>> (*bcdoverflow_): New.
>>> (*bcdinvalid_): Removed.
>>> (bcdinvalid_): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
>>> (nuun): New.
>>> (*overflow_cbranch): New.
>>> (*overflow_creturn): New.
>>> (*overflow_extendsidi): New.
>>> (bcdshift_v16qi): Replace CCFP with CC.
>>> (bcdmul10_v16qi): Likewise.
>>> (bcddiv10_v16qi): Likewise.
>>> (peephole for bcd_add/sub): Likewise.
>>> * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
>>> pattern to bcdadd_overflow_v1ti.
>>> (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
>>> (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
>>> (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.
>>>
>>> gcc/testsuite/
>>> PR target/100736
>>> * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
>>> Scan no cror insns.
>>>
>>> patch.diff
>>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
>>> index efc8ae35c2e..26f131e61ea 100644
>>> --- a/gcc/config/rs6000/altivec.md
>>> +++ b/gcc/config/rs6000/altivec.md
>>> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB 
>>> [UNSPEC_BCDADD UNSPEC_BCDSUB])
>>>  (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
>>>   (UNSPEC_BCDSUB "sub")])
>>>
>>> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
>>> +(define_code_iterator BCD_TEST [eq lt le gt ge])
>>>  (define_mode_iterator VBCD [V1TI V16QI])
>>>
>>>  (define_insn "bcd_"
>>> @@ -4379,7 +4379,7 @@ (define_insn "bcd_"
>>>   (match_operand:VBCD 2 "register_operand" "v")
>>>   (match_operand:QI 3 "const_0_to_1_operand" "n")]
>>>  UNSPEC_BCD_ADD_SUB))
>>> -   (clobber (reg:CCFP CR6_REGNO))]
>>> +   (clobber (reg:CC CR6_REGNO))]
>>>"TARGET_P8_VECTOR"
>>>"bcd. %0,%1,%2,%3"
>>>[(set_attr "type" "vecsimple")])
>>> @@ -4389,9 +4389,9 @@ (define_insn "bcd_"
>>>  ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The 
>>> type
>>>  ;; probably should be one that can go in the VMX (Altivec) registers, so we
>>>  ;; can't use DDmode or DFmode.
>>> -(define_insn "*bcd_test_"
>>> -  [(set (reg:CCFP CR6_REGNO)
>>> -   (compare:CCFP
>>> +(define_insn "bcd_test_"
>>> +  [(set (reg:CC CR6_REGNO)
>>> +   (compare:CC
>>>  (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
>>>(match_operand:VBCD 2 "register_operand" "v")
>>>(match_operand:QI 3 "const_0_to_1_operand" "i")]
>>> @@ -4408,8 +4408,8 @@ (define_insn "*bcd_test2_"
>>> 

[PATCH, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

2022-09-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds a new insn for vector splat with small V2DI constants on P8.
If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
TOC.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-09-21 Haochen Gui 

gcc/
PR target/104124
* config/rs6000/altivec.md (*altivec_vupkhs_direct): Renamed
to...
(altivec_vupkhs_direct): ...this.
* config/rs6000/constraints.md (wT constraint): New constant for a
vector constraint that can be loaded with vspltisw and vupkhsw.
* config/rs6000/predicates.md (vspltisw_constant_split): New
predicate for wT constraint.
* config/rs6000/rs6000-protos.h (vspltisw_constant_p): Add declaration.
* config/rs6000/rs6000.cc (easy_altivec_constant): Call
vspltisw_constant_p to judge if a V2DI constant can be synthesized with
a vspltisw and a vupkhsw.
* (vspltisw_constant_p): New function to return true if OP mode is
V2DI and can be synthesized with ISA 2.07 instruction vupkhsw and
vspltisw.
* gcc/config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
constants with vspltisw and vupkhsw.

gcc/testsuite/
PR target/104124
* gcc.target/powerpc/p8-splat.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2c4940f2e21..185414df021 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vupkhs_direct"
+(define_insn "altivec_vupkhs_direct"
   [(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand: 1 "register_operand" "v")]
 UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 5a44a92142e..f65dea6e0c7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -150,6 +150,10 @@ (define_constraint "wS"
   "@internal Vector constant that can be loaded with XXSPLTIB & sign 
extension."
   (match_test "xxspltib_constant_split (op, mode)"))

+(define_constraint "wT"
+  "@internal Vector constant that can be loaded with vspltisw & vupkhsw."
+  (match_test "vspltisw_constant_split (op, mode)"))
+
 ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form.
 ;; Used by LXSD/STXSD/LXSSP/STXSSP.  In contrast to "Y", the multiple-of-four
 ;; offset is enforced for 32-bit too.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b1fcc69bb60..00cf60bbe58 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -694,6 +694,19 @@ (define_predicate "xxspltib_constant_split"
   return num_insns > 1;
 })

+;; Return true if the operand is a constant that can be loaded with a vspltisw
+;; instruction and then a vupkhsw instruction.
+
+(define_predicate "vspltisw_constant_split"
+  (match_code "const_vector,vec_duplicate")
+{
+  int value = 32;
+
+  if (!vspltisw_constant_p (op, mode, ))
+return false;
+
+  return true;
+})

 ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
 ;; instruction.
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index b3c16e7448d..45f3d044eee 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, 
rtx, int, int, int,

 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool vspltisw_constant_p (rtx, machine_mode, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index df491bee2ea..984624026c2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6292,6 +6292,12 @@ easy_altivec_constant (rtx op, machine_mode mode)
  && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
return 8;

+  /* If V2DI constant is within RANGE (-16, 15), it can be synthesized with
+a vspltisw and a vupkhsw.  */
+  int value = 32;
+  if (vspltisw_constant_p (op, mode, ))
+   return 8;
+
   return 0;
 }

@@ -6494,6 +6500,69 @@ xxspltib_constant_p (rtx op,
   return true;
 }

+/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
+   instructions vupkhsw and vspltisw.
+
+   Return the constant that is being split via CONSTANT_PTR.  */
+
+bool
+vspltisw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
+{
+  HOST_WIDE_INT 

Ping [PATCH v3, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-09-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
 Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601196.html
Thanks.

On 7/9/2022 下午 3:44, HAO CHEN GUI wrote:
> Hi,
> 
>   For scalar extract/insert instructions, exponent field can be stored in a
> 32-bit register. So this patch changes the mode of exponent field from DI to
> SI. The instructions using DI registers can be invoked with -mpowerpc64 in a
> 32-bit environment. The patch changes insn condition from TARGET_64BIT to
> TARGET_POWERPC64 for those instructions.
> 
>   This patch also changes prototypes of relevant built-ins and effective
> target of test cases.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-09-07  Haochen Gui  
> 
> gcc/
>   * config/rs6000/rs6000-builtins.def
>   (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
>   int.
>   (__builtin_vsx_scalar_extract_sig): Set return type to const unsigned
>   long long.
>   (__builtin_vsx_scalar_insert_exp): Set type of second argument to
>   unsigned int.
>   (__builtin_vsx_scalar_insert_exp_dp): Likewise.
>   * config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to
>   SImode.  Remove TARGET_64BIT from insn condition.
>   (xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64.
>   (xsiexpdp): Change insn condition from TARGET_64BIT to
>   TARGET_POWERPC64.  Set mode of third operand to SImode.
>   (xsiexpdpf): Set mode of third operand to SImode.  Remove TARGET_64BIT
>   from insn condition.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Change effective
>   target from lp64 to has_arch_ppc64.
>   * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index f76f54793d7..ca2a1d7657e 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2847,17 +2847,17 @@
>pure vsc __builtin_vsx_lxvl (const void *, signed long);
>  LXVL lxvl {}
> 
> -  const signed long __builtin_vsx_scalar_extract_exp (double);
> +  const unsigned int __builtin_vsx_scalar_extract_exp (double);
>  VSEEDP xsxexpdp {}
> 
> -  const signed long __builtin_vsx_scalar_extract_sig (double);
> +  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
>  VSESDP xsxsigdp {}
> 
>const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
> -unsigned long long);
> + unsigned int);
>  VSIEDP xsiexpdp {}
> 
> -  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long 
> long);
> +  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int);
>  VSIEDPF xsiexpdpf {}
> 
>pure vsc __builtin_vsx_xl_len_r (void *, signed long);
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index e226a93bbe5..9d3a2340a79 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -5095,10 +5095,10 @@ (define_insn "xsxexpqp_"
> 
>  ;; VSX Scalar Extract Exponent Double-Precision
>  (define_insn "xsxexpdp"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> - (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> + (unspec:SI [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXEXPDP))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR"
>"xsxexpdp %0,%x1"
>[(set_attr "type" "integer")])
> 
> @@ -5116,7 +5116,7 @@ (define_insn "xsxsigdp"
>[(set (match_operand:DI 0 "register_operand" "=r")
>   (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXSIG))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR && TARGET_POWERPC64"
>"xsxsigdp %0,%x

RE: [PATCH v2, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-09-12 Thread HAO CHEN GUI via Gcc-patches


Hi Segher,

On 10/9/2022 上午 1:17, Segher Boessenkool wrote:
>> In rs6000-overload.def, the vsx_ version built-ins are overridden to vec_
>> version.
> How?  Where?

vec_ version built-ins are defined in rs6000-overload.def. Yes, they're fine
and in line with the definition in PVIPR.

[VEC_VEEDP, vec_extract_exp_dp, __builtin_vec_extract_exp_dp]
  vull __builtin_vec_extract_exp_dp (vd);
VEEDP  VEEDP_DEPR1

[VEC_VEESP, vec_extract_exp_sp, __builtin_vec_extract_exp_sp]
  vui __builtin_vec_extract_exp_sp (vf);
VEESP  VEESP_DEPR1

[VEC_VEE, vec_extract_exp, __builtin_vec_extract_exp]
  vui __builtin_vec_extract_exp (vf);
VEESP
  vull __builtin_vec_extract_exp (vd);
VEEDP

vec-extract-exp-2.c:12:3: note: overloaded builtin ‘__builtin_vec_extract_exp’ 
is implemented by builtin ‘__builtin_vsx_extract_exp_dp’

Thanks
Gui Haochen


Re: [PATCH v2, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-09-12 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,

On 7/9/2022 上午 1:19, Segher Boessenkool wrote:
> make -k -j60 check 
> RUNTESTFLAGS="--target_board=unix'{-m64,-m32,-m32/-mpowerpc64}'"
> 
> It is fine to not test -m32/-mpowerpc64 so often, and certaionly not
> something I will ask everyone to always do :-)

IMO, if we add "-mpowerpc64" into dg-options, the "-m32/-mpowerpc64" will be 
tested
automatically. It will increase the test coverage. So the concern is it 
increases test
time?

Thanks
Gui Haochen


RE: [PATCH v2, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-09-07 Thread HAO CHEN GUI via Gcc-patches



On 7/9/2022 下午 10:25, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Sep 07, 2022 at 08:51:17AM -0500, Paul A. Clarke wrote:
>> On Tue, Sep 06, 2022 at 12:19:06PM -0500, Segher Boessenkool wrote:
>>> On Mon, Sep 05, 2022 at 02:36:30PM +0800, HAO CHEN GUI wrote:
>>>> The return type of vec_ version built-ins are different than their 
>>>> definition
>>>> in PVIPR. In PVIPR, they're vector unsigned int or vector unsigned long 
>>>> long.
>>>> Shall we correct them?
>>>>
>>>>   const vd __builtin_vsx_extract_exp_dp (vd);
>>>> VEEDP xvxexpdp {}
>>>>
>>>>   const vf __builtin_vsx_extract_exp_sp (vf);
>>>> VEESP xvxexpsp {}
>>>>
>>>>   const vd __builtin_vsx_extract_sig_dp (vd);
>>>> VESDP xvxsigdp {}
>>>>
>>>>   const vf __builtin_vsx_extract_sig_sp (vf);
>>>> VESSP xvxsigsp {}
>>>
>>> Those are the vsx_ versions.  I'm not sure what you're asking.
>>>
>>> It won't be easy at all to change types from vector integer to vector
>>> float, it will break all over.  A compatibility nightmare.  It is better
>>> if you can show the current stuff cannot ever work, it's not a problem
>>> to replace it in that case.
>>
>> I think Hao Chen is concerned about the return types:
> 
> Yes, and so am I.
> 
>>>>   const vd __builtin_vsx_extract_exp_dp (vd);
>>>> VEEDP xvxexpdp {}
>>
>> Per PVIPR, this should return vector unsigned long long ("vull" not "vd").
> 
> But changing that will make any existing code that now works, fail
> horribly.  Of course it is possible no such code exists :-)
> 
> What did this do before the builtin rewrite?
> 
> 
>  ~ - ~ - ~
> 
> 
> It looks like it did the right thing before, but that is just based on
> reading the code, I haven't actually tried it :-)
> 
> So, changing the vsx_ code here should be okay, because obviously no one
> is using it.  OTOH, why do we have those separately at all, why do they
> not just redirect to the canonical vec_ versions?  Or, can we just get
> rid of the vsx_ version completely?

In rs6000-overload.def, the vsx_ version built-ins are overridden to vec_
version. And the return types of vec_ version is inline with those defined
in PVIPR. So there should be no problem. Sorry for that.

[VEC_VEEDP, vec_extract_exp_dp, __builtin_vec_extract_exp_dp]
  vull __builtin_vec_extract_exp_dp (vd);
VEEDP  VEEDP_DEPR1

[VEC_VEESP, vec_extract_exp_sp, __builtin_vec_extract_exp_sp]
  vui __builtin_vec_extract_exp_sp (vf);
VEESP  VEESP_DEPR1

[VEC_VEE, vec_extract_exp, __builtin_vec_extract_exp]
  vui __builtin_vec_extract_exp (vf);
VEESP
  vull __builtin_vec_extract_exp (vd);
VEEDP

Thanks
Gui Haochen


Re: [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-09-22 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen & Segher,

Thanks so much for your review comments.

On 22/9/2022 上午 10:28, Kewen.Lin wrote:
> on 2022/9/22 05:56, Segher Boessenkool wrote:
>> Hi!
>>
>> On Fri, Jun 24, 2022 at 10:02:19AM +0800, HAO CHEN GUI wrote:
>>>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
>>> of smin/max. So the builtins always generate xs[min/max]dp on all
>>> platforms.
>>
>> But how does this not blow up with -ffast-math?
> 
> Indeed.  Since it guards with "TARGET_VSX && !flag_finite_math_only",
> the bifs seem to cause ICE at -ffast-math.
> 
> Haochen, could you double check it?
I tested it with "-ffast-math". fmin/max functions are converted to
MIN/MAX_EXPR in gimple lower pass. But the built-ins are not and hit the
ICE. I thought the built-ins are folded to MIN/MAX_EXPR like vec_ versions'
when fast-math is set. In fact they're not. Sorry for that.

I made a patch to fold these two built-ins to MIN/MAX_EXPR when fast-math
is set. Then the built-ins are converted to MIN/MAX_EXPR and expanded to
smin/max.

Thanks for pointing out the problem!

> 
>>
>> In the other direction I am worried that the unspecs will degrade
>> performance (relative to smin/smax) when -ffast-math *is* active (and
>> this new builtin code and pattern doesn't blow up).
> 
> For fmin/fmax it would be fine, since they are transformed to {MAX,MIN}
> EXPR in middle end, and yes, it can degrade for the bifs, although IMHO
> the previous expansion to smin/smax contradicts with the bif names (users
> expect to map them to xs{min,max}dp than others).
> 
>>
>> I still think we should get RTL codes for this, to have access to proper
>> floating point min/max semantics always and everywhere.  "fmin" and
>> "fmax" seem to be good names :-)
> 
> It would be good, especially if we have observed some uses of these bifs
> and further opportunities around them.  :)
> 
Shall we submit a PR to add fmin/fmax to RTL codes?

> BR,
> Kewen


[PATCH v7, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-09-25 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max when fast-math is not set. While fast-math is set, xs[min/max]dp
are folded to MIN/MAX_EXPR in gimple, and finally expanded to smin/max.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-09-26 Haochen Gui 

gcc/
PR target/103605
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Gimple
fold RS6000_BIF_XSMINDP and RS6000_BIF_XSMAXDP when fast-math is set.
* config/rs6000/rs6000.md (FMINMAX): New int iterator.
(minmax_op): New int attribute.
(UNSPEC_FMAX, UNSPEC_FMIN): New unspecs.
(f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
pattern to fmaxdf3.
(__builtin_vsx_xsmindp): Set pattern to fmindf3.

gcc/testsuite/
PR target/103605
* gcc.dg/powerpc/pr103605.h: New.
* gcc.dg/powerpc/pr103605-1.c: New.
* gcc.dg/powerpc/pr103605-2.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index e925ba9fad9..944ae9fe55c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1588,6 +1588,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   gimple_set_location (g, gimple_location (stmt));
   gsi_replace (gsi, g, true);
   return true;
+/* fold into MIN_EXPR when fast-math is set.  */
+case RS6000_BIF_XSMINDP:
 /* flavors of vec_min.  */
 case RS6000_BIF_XVMINDP:
 case RS6000_BIF_XVMINSP:
@@ -1614,6 +1616,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   gimple_set_location (g, gimple_location (stmt));
   gsi_replace (gsi, g, true);
   return true;
+/* fold into MAX_EXPR when fast-math is set.  */
+case RS6000_BIF_XSMAXDP:
 /* flavors of vec_max.  */
 case RS6000_BIF_XVMAXDP:
 case RS6000_BIF_XVMAXSP:
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..ae0dd98f0f9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
+ (match_operand:SFDF 2 "vsx_register_operand" "wa")]
+FMINMAX))]
+  "TARGET_VSX && !flag_finite_math_only"
+  "xsdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c
new file mode 100644
index 000..923deec6a1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include "pr103605.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605-2.c
new file mode 100644
index 000..f50fe9468f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx -ffast-math" } */
+/* { dg-final { scan-assembler-times {\mxsmaxcdp\M} 3 { target has_arch_pwr9 } 
} } */
+/* { dg-final { scan-assembler-times {\mxsmincdp\M} 3 { target has_arch_pwr9 } 
} } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 { target { ! 
has_arch_pwr9 } } } } */

[PATCH-1, rs6000] Generate permute index directly for little endian target [PR100866]

2022-10-11 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies the help function which generates permute index for
vector byte reversion and generates permute index directly for little endian
targets. It saves one "xxlnor" instructions on P8 little endian targets as
the original process needs an "xxlnor" to calculate complement for the index.

Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-10-11  Haochen Gui 

gcc/
PR target/100866
* config/rs6000/rs6000-call.cc (swap_endian_selector_for_mode):
Generate permute index directly for little endian targets.
* config/rs6000/vsx.md (revb_): Call vprem directly with
corresponding permute indexes.

gcc/testsuite/
PR target/100866
* gcc.target/powerpc/pr100866.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index 551968b0995..bad8e9e0e52 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -2839,7 +2839,10 @@ swap_endian_selector_for_mode (machine_mode mode)
 }

   for (i = 0; i < 16; ++i)
-perm[i] = GEN_INT (swaparray[i]);
+if (BYTES_BIG_ENDIAN)
+  perm[i] = GEN_INT (swaparray[i]);
+else
+  perm[i] = GEN_INT (~swaparray[i] & 0x001f);

   return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
 gen_rtvec_v (16, perm)));
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..b68eba48d2c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6096,8 +6096,8 @@ (define_expand "revb_"
 to the endian mode in use, i.e. in LE mode, put elements
 in BE order.  */
   rtx sel = swap_endian_selector_for_mode(mode);
-  emit_insn (gen_altivec_vperm_ (operands[0], operands[1],
-  operands[1], sel));
+  emit_insn (gen_altivec_vperm__direct (operands[0], operands[1],
+ operands[1], sel));
 }

   DONE;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100866.c 
b/gcc/testsuite/gcc.target/powerpc/pr100866.c
new file mode 100644
index 000..c708dfd502e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100866.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-final { scan-assembler-not "xxlnor" } } */
+
+#include 
+
+vector unsigned short revb(vector unsigned short a)
+{
+   return vec_revb(a);
+}



Re: [PATCH v2, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-09-05 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,
  Thanks for your review comments.

On 2/9/2022 下午 11:56, Segher Boessenkool wrote:
>> -  const signed long __builtin_vsx_scalar_extract_exp (double);
>> +  const unsigned long long __builtin_vsx_scalar_extract_exp (double);
>>  VSEEDP xsxexpdp {}
>>
>> -  const signed long __builtin_vsx_scalar_extract_sig (double);
>> +  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
>>  VSESDP xsxsigdp {}
> This also brings these legacy builtins in line with the vec_ versions,
> which are the preferred builtins (they are defined in the PVIPR).

The return type of vec_ version built-ins are different than their definition
in PVIPR. In PVIPR, they're vector unsigned int or vector unsigned long long.
Shall we correct them?

  const vd __builtin_vsx_extract_exp_dp (vd);
VEEDP xvxexpdp {}

  const vf __builtin_vsx_extract_exp_sp (vf);
VEESP xvxexpsp {}

  const vd __builtin_vsx_extract_sig_dp (vd);
VESDP xvxsigdp {}

  const vf __builtin_vsx_extract_sig_sp (vf);
VESSP xvxsigsp {}
> 
>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>> @@ -1,7 +1,8 @@
>> -/* { dg-do compile { target { powerpc*-*-* } } } */
>> -/* { dg-require-effective-target lp64 } */
>> -/* { dg-require-effective-target powerpc_p9vector_ok } */
>> +/* { dg-do compile { target { powerpc*-*-linux* } } } */
> Why?
The powerpc*-*-linux* is no need as bfp.exp excludes AIX and Darwin.
I will modify it.

> 
>>  /* { dg-options "-mdejagnu-cpu=power9" } */
>> +/* { dg-additional-options "-mpowerpc64" } */
>> +/* { dg-require-effective-target has_arch_ppc64 } */
> This is guaranteed already by that -mpowerpc64.
> 
> It probably is best if you do not add -mpowerpc64 at all.  That solves
> both problems, is simpler, and gives better coverage as well :-)
> 
> So just use has_arch_ppc64 instead of lp64.  That makes it run on a
> strict superset of cases :-)
We commonly do regression test with -m32 and -m64. So if -mpowerpc64 is
not added, the combination of "-m32/-mpowerpc64" is not tested.

Thanks
Gui Haochen


Re: [PATCH, rs6000] Correct return value of check_p9modulo_hw_available

2022-08-04 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,
  Thanks so much for your explanation. Now I have a clear picture about
the usage of return value. Patch was committed as r13-1971.

Thanks
Gui Haochen


On 5/8/2022 上午 1:09, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Aug 04, 2022 at 05:55:20PM +0800, HAO CHEN GUI wrote:
>>   This patch corrects return value of check_p9modulo_hw_available. It should
>> return 0 when p9modulo is supported.
> 
> It would be harder to make such mistakes if it used exit() explicitly,
> so that the reader is reminded the shell semantics are used here instead
> of the C conventions.
> 
>> -return (r == 2);
>> +return (r != 2);
> 
> so that then would be smth like
> 
>   if (r == 2)
>   exit (0);
>   else
>   exit (1);
> 
> (which makes the exit code for failure explicit as well).
> 
> Terse is good.  Explicit is good as well :-)
> 
> (You don't have to make this change here of course, but keep it in mind
> for the future :-) )
> 
> 
> Segher


Re: [PATCH, rs6000] TARGET_MADDLD should include TARGET_POWERPC64

2022-08-03 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,

On 4/8/2022 上午 12:54, Segher Boessenkool wrote:
> Hrm.  But the maddld insn is useful for SImode as well, in 32-bit mode,
> it is just its name that is a bit confusing then.  Sorry for confusing
> things :-(
> 
> Add a test for SImode maddld as well?

 Thanks for your comments.

 Just want to double confirm that a maddld test case with "-m32" and
"-mpowerpc64" is needed. As far as I understand, maddld is a 64-bit
instruction and it should be used with "-mpowerpc64" in a 32-bit register
environment.

Thanks again
Gui Haochen


Re: [PATCH v2, rs6000] Add multiply-add expand pattern [PR103109]

2022-08-09 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,
  Thanks for your comments. I checked the cost table. For P9 and P10, the
cost of all mul* insn is the same, not relevant to the size of operand.

  I will split the test case to one compiling and one runnable case.

Thanks.
Gui Haochen

On 10/8/2022 上午 5:43, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Aug 08, 2022 at 02:04:07PM +0800, HAO CHEN GUI wrote:
>>   This patch adds an expand and several insns for multiply-add with three
>> 64bit operands.
> 
> Also for maddld for 32-bit operands.
> 
>>"maddld %0,%1,%2,%3"
>>[(set_attr "type" "mul")])
> 
> I suppose attr "size" isn't relevant for any of the cpus that implement
> these instructions?
> 
> Okay for trunk.  Thanks!
> 
> (The testcase improvements can be done later).
> 
> 
> Segher


[PATCH v2, rs6000] Add multiply-add expand pattern [PR103109]

2022-08-08 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds an expand and several insns for multiply-add with three
64bit operands.

  Compared with last version, the main changes are:
1 The "maddld" pattern is reused for the low-part generation.
2 A runnable testcase replaces the original compiling case.
3 Fixes indention problems.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-08-08  Haochen Gui  

gcc/
PR target/103109
* config/rs6000/rs6000.md (maddditi4): New pattern for multiply-add.
(madddi4_highpart): New.
(madddi4_highpart_le): New.

gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New.



patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c55ee7e171a..4c58023490a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3217,7 +3217,7 @@ (define_expand "mul3"
   DONE;
 })

-(define_insn "*maddld4"
+(define_insn "maddld4"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3226,6 +3226,52 @@ (define_insn "*maddld4"
   "maddld %0,%1,%2,%3"
   [(set_attr "type" "mul")])

+(define_expand "maddditi4"
+  [(set (match_operand:TI 0 "gpc_reg_operand")
+   (plus:TI
+ (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
+  (any_extend:TI (match_operand:DI 2 "gpc_reg_operand")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand"]
+  "TARGET_MADDLD && TARGET_POWERPC64"
+{
+  rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0);
+  rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8);
+
+  emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], operands[3]));
+
+  if (BYTES_BIG_ENDIAN)
+emit_insn (gen_madddi4_highpart (op0_hi, operands[1], operands[2],
+   operands[3]));
+  else
+emit_insn (gen_madddi4_highpart_le (op0_hi, operands[1], operands[2],
+  operands[3]));
+  DONE;
+})
+
+(define_insn "madddi4_highpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+0))]
+  "TARGET_MADDLD && BYTES_BIG_ENDIAN && TARGET_POWERPC64"
+  "maddhd %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
+(define_insn "madddi4_highpart_le"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+8))]
+  "TARGET_MADDLD && !BYTES_BIG_ENDIAN && TARGET_POWERPC64"
+  "maddhd %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
 (define_insn "udiv3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c 
b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 000..969b9751b21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,110 @@
+/* { dg-do run { target { has_arch_ppc64 } } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -save-temps" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target p9modulo_hw } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */
+
+union U {
+  __int128 i128;
+  struct {
+long l1;
+long l2;
+  } s;
+};
+
+__int128
+create_i128 (long most_sig, long least_sig)
+{
+  union U u;
+
+#if __LITTLE_ENDIAN__
+  u.s.l1 = least_sig;
+  u.s.l2 = most_sig;
+#else
+  u.s.l1 = most_sig;
+  u.s.l2 = least_sig;
+#endif
+  return u.i128;
+}
+
+
+#define DEBUG 0
+
+#if DEBUG
+#include 
+#include 
+
+void print_i128(__int128 val, int unsignedp)
+{
+  if (unsignedp)
+printf(" %llu ", (unsigned long long)(val >> 64));
+  else
+printf(" %lld ", (signed long long)(val >> 64));
+
+  printf("%llu (0x%llx %llx)",
+ (unsigned long long)(val & 0x),
+ (unsigned long long)(val >> 64),
+ (unsigned long long)(val & 0x));
+}
+#endif
+
+void abort (void);
+
+__attribute__((noinline))
+__int128 multiply_add (long a, long b, long c)
+{
+  return (__int128) a * b + c;
+}
+
+__attribute__((noinline))
+unsigned __int128 multiply_addu (unsigned long a, unsigned long b,
+  

Re: [PATCH v3] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

2022-08-10 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,
  Really appreciate your review comments.

On 11/8/2022 上午 1:38, Segher Boessenkool wrote:
> Hi!
> 
> Sorry for the tardiness.
> 
> On Fri, Jul 22, 2022 at 03:07:55PM +0800, HAO CHEN GUI wrote:
>>   This patch creates a new function - change_pseudo_and_mask. If recog fails,
>> the function converts a single pseudo to the pseudo AND with a mask if the
>> outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
>> conversion helps pattern to match rotate and mask insn on some targets.
> 
> The name isn't so clear.  It isn't changing a mask, to start with.
How about setting function name to change_pseudo? It converts a pseudo to
the pseudo AND with a mask in a particular situation.

> 
>> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
>> +   ASHIFT/AND,
> 
> "When the outercode of the SET_SRC of PAT is ..."
Yeah, I will change it.

> 
>> convert a pseudo to pseudo AND with a mask if its nonzero_bits
>> +   is less than its mode mask.  The nonzero_bits in later passes is not a
>> +   superset of what is known in combine pass.  So an insn with nonzero_bits
>> +   can't be recoged later.  */
> 
> Can this not be done with a splitter in the machine description?
> 
Sorry, I don't quite understand it. Do you mean if the conversion can be done in
split pass?

If a pseudo has DImode and stem from a char, we get nonzero_bits as 0xff in 
combine
pass. But in split pass, it's nonzero_bits is 0x. So the 
conversion
can only be done in combine pass.

Thanks
Gui Haochen


Re: [PATCH v3] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

2022-08-12 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,

On 12/8/2022 上午 1:40, Segher Boessenkool wrote:
> Yes, but combine will use splitters as well.
Combine pass invokes combine_split_insns for 3-insn combine. If we want
to do the split for 2-insn combine (like the test case in PR), we have to
add a special case?

> 
> You can use nonzero_bits in the split condition (the second condition in
> a define_split, or the sole condition in a define_split) just fine, as
> long as the replacement RTL does not rely on the nonzero_bits itself.
> You cannot use it in the insn condition (the first condition in a
> define_insn_and_split, or the one condition in a define_insn) because
> that RTL will survive past combine, and then nonzero_bits can have bits
> cleared that were set before (that were determined to be always zero
> during combine, but that knowledge is gone later).

I tried to add a define_insn_and split pattern in rs6000.md, just like the
following code. The nonzero_bits is used in insn condition (for combine)
and no condition for the split. I can't set nonzero_bits in split condition
as it never matches and cause ICE then.

I am not sure if it is safe. If such an insn doesn't stem from the combine,
there is no guarantee that the nonzero_bits condition matches.


(define_insn_and_split "*test"
  [(set (match_operand:GPR 0 "gpc_reg_operand")
(plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
  (match_operand:SI 2 "const_int_operand"))
  (match_operand:GPR 3 "gpc_reg_operand")))]
  "nonzero_bits (operands[3], mode)
   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
  "#"
  ""
  [(set (match_dup 0)
(ior:GPR (and:GPR (match_dup 3)
  (match_dup 4))
 (ashift:GPR (match_dup 1)
 (match_dup 2]
{
  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
})

Thanks
Gui Haochen




Ping [PATCH, rs6000] Add multiply-add expand pattern [PR103109]

2022-07-31 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598744.html
Thanks

On 25/7/2022 下午 1:11, HAO CHEN GUI wrote:
> Hi,
>   This patch adds an expand and several insns for multiply-add with
> three 64bit operands.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-07-22  Haochen Gui  
> 
> gcc/
>   PR target/103109
>   * config/rs6000/rs6000.md (maddditi4): New pattern for
>   multiply-add.
>   (madddi4_lowpart): New.
>   (madddi4_lowpart_le): New.
>   (madddi4_highpart): New.
>   (madddi4_highpart_le): New.
> 
> gcc/testsuite/
>   PR target/103109
>   * gcc.target/powerpc/pr103109.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index c55ee7e171a..4f3b56e103e 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3226,6 +3226,97 @@ (define_insn "*maddld4"
>"maddld %0,%1,%2,%3"
>[(set_attr "type" "mul")])
> 
> +(define_expand "maddditi4"
> +  [(set (match_operand:TI 0 "gpc_reg_operand")
> + (plus:TI
> +   (mult:TI (any_extend:TI
> +  (match_operand:DI 1 "gpc_reg_operand"))
> +(any_extend:TI
> +  (match_operand:DI 2 "gpc_reg_operand")))
> +   (any_extend:TI
> + (match_operand:DI 3 "gpc_reg_operand"]
> +  "TARGET_POWERPC64 && TARGET_MADDLD"
> +{
> +  rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 
> 0);
> +  rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 
> 8);
> +
> +  if (BYTES_BIG_ENDIAN)
> +{
> +  emit_insn (gen_madddi4_lowpart (op0_lo, operands[1], operands[2],
> +  operands[3]));
> +  emit_insn (gen_madddi4_highpart (op0_hi, operands[1], operands[2],
> +   operands[3]));
> +}
> +  else
> +{
> +  emit_insn (gen_madddi4_lowpart_le (op0_lo, operands[1], operands[2],
> + operands[3]));
> +  emit_insn (gen_madddi4_highpart_le (op0_hi, operands[1], 
> operands[2],
> +  operands[3]));
> +}
> +  DONE;
> +})
> +
> +(define_insn "madddi4_lowpart"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (subreg:DI
> +   (plus:TI
> + (mult:TI (any_extend:TI
> +(match_operand:DI 1 "gpc_reg_operand" "r"))
> +  (any_extend:TI
> +(match_operand:DI 2 "gpc_reg_operand" "r")))
> + (any_extend:TI
> +   (match_operand:DI 3 "gpc_reg_operand" "r")))
> +  8))]
> +  "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN"
> +  "maddld %0,%1,%2,%3"
> +  [(set_attr "type" "mul")])
> +
> +(define_insn "madddi4_lowpart_le"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (subreg:DI
> +   (plus:TI
> + (mult:TI (any_extend:TI
> +(match_operand:DI 1 "gpc_reg_operand" "r"))
> +  (any_extend:TI
> +(match_operand:DI 2 "gpc_reg_operand" "r")))
> + (any_extend:TI
> +   (match_operand:DI 3 "gpc_reg_operand" "r")))
> +  0))]
> +  "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN"
> +  "maddld %0,%1,%2,%3"
> +  [(set_attr "type" "mul")])
> +
> +(define_insn "madddi4_highpart"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (subreg:DI
> +   (plus:TI
> + (mult:TI (any_extend:TI
> +(match_operand:DI 1 "gpc_reg_operand" "r"))
> +  (any_extend:TI
> +(match_operand:DI 2 "gpc_reg_operand" "r")))
> + (any_extend:TI
> +   (match_operand:DI 3 "gpc_reg_operand" "r")))
> +  0))]
> +  "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN"
> +  "maddhd %0,%1,%2,%3"
> +  [(set_attr "type" "mul")])
> +
> +(define_insn "madddi4_highpart_le"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + 

Ping [PATCH v3] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

2022-07-31 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598685.html
Thanks.

On 22/7/2022 下午 3:07, HAO CHEN GUI wrote:
> Hi,
>   This patch creates a new function - change_pseudo_and_mask. If recog fails,
> the function converts a single pseudo to the pseudo AND with a mask if the
> outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
> conversion helps pattern to match rotate and mask insn on some targets.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-07-22  Haochen Gui  
> 
> gcc/
>   PR target/93453
>   * combine.cc (change_pseudo_and_mask): New.
>   (recog_for_combine): If recog fails, try again with the pattern
>   modified by change_pseudo_and_mask.
>   * config/rs6000/rs6000.md (plus_ior_xor): Remove.
>   (anonymous split pattern for plus_ior_xor): Remove.
> 
> gcc/testsuite/
>   PR target/93453
>   * gcc.target/powerpc/pr93453-2.c: New.
>   * gcc.target/powerpc/rlwimi-2.c: Both 32/64 bit platforms generate the
>   same number of rlwimi.  Reset the counter.
> 
> patch.diff
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index a5fabf397f7..e1c1aa7da1c 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -11599,6 +11599,48 @@ change_zero_ext (rtx pat)
>return changed;
>  }
> 
> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> +   ASHIFT/AND, convert a pseudo to pseudo AND with a mask if its nonzero_bits
> +   is less than its mode mask.  The nonzero_bits in later passes is not a
> +   superset of what is known in combine pass.  So an insn with nonzero_bits
> +   can't be recoged later.  */
> +static bool
> +change_pseudo_and_mask (rtx pat)
> +{
> +  rtx src = SET_SRC (pat);
> +  if ((GET_CODE (src) == IOR
> +   || GET_CODE (src) == XOR
> +   || GET_CODE (src) == PLUS)
> +  && (((GET_CODE (XEXP (src, 0)) == ASHIFT
> + || GET_CODE (XEXP (src, 0)) == AND)
> +&& REG_P (XEXP (src, 1)
> +{
> +  rtx reg = XEXP (src, 1);
> +  machine_mode mode = GET_MODE (reg);
> +  unsigned HOST_WIDE_INT nonzero = nonzero_bits (reg, mode);
> +  if (nonzero < GET_MODE_MASK (mode))
> + {
> +   int shift;
> +
> +   if (GET_CODE (XEXP (src, 0)) == ASHIFT)
> + shift = INTVAL (XEXP (XEXP (src, 0), 1));
> +   else
> + shift = ctz_hwi (INTVAL (XEXP (XEXP (src, 0), 1)));
> +
> +   if (shift > 0
> +   && (HOST_WIDE_INT_1U << shift) - 1 >= nonzero)
> + {
> +   unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << shift) - 1;
> +   rtx x = gen_rtx_AND (mode, reg, GEN_INT (mask));
> +   SUBST (XEXP (SET_SRC (pat), 1), x);
> +   maybe_swap_commutative_operands (SET_SRC (pat));
> +   return true;
> + }
> + }
> +}
> +  return false;
> +}
> +
>  /* Like recog, but we receive the address of a pointer to a new pattern.
> We try to match the rtx that the pointer points to.
> If that fails, we may try to modify or replace the pattern,
> @@ -11646,7 +11688,10 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx 
> *pnotes)
>   }
>   }
>else
> - changed = change_zero_ext (pat);
> + {
> +   changed = change_pseudo_and_mask (pat);
> +   changed |= change_zero_ext (pat);
> + }
>  }
>else if (GET_CODE (pat) == PARALLEL)
>  {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 1367a2cb779..2bd6bd5f908 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -4207,24 +4207,6 @@ (define_insn_and_split "*rotl3_insert_3_"
>   (ior:GPR (and:GPR (match_dup 3) (match_dup 4))
>(ashift:GPR (match_dup 1) (match_dup 2])
> 
> -(define_code_iterator plus_ior_xor [plus ior xor])
> -
> -(define_split
> -  [(set (match_operand:GPR 0 "gpc_reg_operand")
> - (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
> -   (match_operand:SI 2 "const_int_operand"))
> -   (match_operand:GPR 3 "gpc_reg_operand")))]
> -  "nonzero_bits (operands[3], mode)
> -   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
> -  [(set (match_dup 0)
> - (ior:GPR (and:GPR (match_dup 3)
> -   (match_dup 4))
> -  (ashift:GPR (match_dup 1)
> -  (match_dup 2]
> -{
> -  operands[4] = GEN_I

Ping^2 [PATCH v2, rs6000] Use CC for BCD operations [PR100736]

2022-07-31 Thread HAO CHEN GUI via Gcc-patches
Hi,
Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
Thanks.

On 4/7/2022 下午 2:33, HAO CHEN GUI wrote:
> Hi,
>Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
> Thanks.
> 
> On 22/6/2022 下午 4:26, HAO CHEN GUI wrote:
>> Hi,
>>   This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
>> math flag has no impact on BCD operations. To support BCD overflow and
>> invalid coding, an UNSPEC is defined to move the bit to a general register.
>> The patterns of condition branch and return with overflow bit are defined as
>> the UNSPEC and branch/return can be combined to one jump insn. The split
>> pattern of overflow bit extension is define for optimization.
>>
>>   This patch also replaces bcdadd with bcdsub for BCD invaliding coding
>> expand.
>>
>> ChangeLog
>> 2022-06-22 Haochen Gui 
>>
>> gcc/
>>  PR target/100736
>>  * config/rs6000/altivec.md (BCD_TEST): Remove unordered.
>>  (bcd_): Replace CCFP with CC.
>>  (*bcd_test_): Replace CCFP with CC.  Generate
>>  condition insn with CC mode.
>>  (bcd_overflow_): New.
>>  (*bcdoverflow_): New.
>>  (*bcdinvalid_): Removed.
>>  (bcdinvalid_): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
>>  (nuun): New.
>>  (*overflow_cbranch): New.
>>  (*overflow_creturn): New.
>>  (*overflow_extendsidi): New.
>>  (bcdshift_v16qi): Replace CCFP with CC.
>>  (bcdmul10_v16qi): Likewise.
>>  (bcddiv10_v16qi): Likewise.
>>  (peephole for bcd_add/sub): Likewise.
>>  * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
>>  pattern to bcdadd_overflow_v1ti.
>>  (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
>>  (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
>>  (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.
>>
>> gcc/testsuite/
>>  PR target/100736
>>  * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
>>  Scan no cror insns.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
>> index efc8ae35c2e..26f131e61ea 100644
>> --- a/gcc/config/rs6000/altivec.md
>> +++ b/gcc/config/rs6000/altivec.md
>> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD 
>> UNSPEC_BCDSUB])
>>  (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
>>(UNSPEC_BCDSUB "sub")])
>>
>> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
>> +(define_code_iterator BCD_TEST [eq lt le gt ge])
>>  (define_mode_iterator VBCD [V1TI V16QI])
>>
>>  (define_insn "bcd_"
>> @@ -4379,7 +4379,7 @@ (define_insn "bcd_"
>>(match_operand:VBCD 2 "register_operand" "v")
>>(match_operand:QI 3 "const_0_to_1_operand" "n")]
>>   UNSPEC_BCD_ADD_SUB))
>> -   (clobber (reg:CCFP CR6_REGNO))]
>> +   (clobber (reg:CC CR6_REGNO))]
>>"TARGET_P8_VECTOR"
>>"bcd. %0,%1,%2,%3"
>>[(set_attr "type" "vecsimple")])
>> @@ -4389,9 +4389,9 @@ (define_insn "bcd_"
>>  ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The 
>> type
>>  ;; probably should be one that can go in the VMX (Altivec) registers, so we
>>  ;; can't use DDmode or DFmode.
>> -(define_insn "*bcd_test_"
>> -  [(set (reg:CCFP CR6_REGNO)
>> -(compare:CCFP
>> +(define_insn "bcd_test_"
>> +  [(set (reg:CC CR6_REGNO)
>> +(compare:CC
>>   (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
>> (match_operand:VBCD 2 "register_operand" "v")
>> (match_operand:QI 3 "const_0_to_1_operand" "i")]
>> @@ -4408,8 +4408,8 @@ (define_insn "*bcd_test2_"
>>(match_operand:VBCD 2 "register_operand" "v")
>>(match_operand:QI 3 "const_0_to_1_operand" "i")]
>>   UNSPEC_BCD_ADD_SUB))
>> -   (set (reg:CCFP CR6_REGNO)
>> -(compare:CCFP
>> +   (set (reg:CC CR6_REGNO)
>> +(compare:CC
>>   (unspec:V2DF [(match_dup 1)
>> (match_dup 2)
>> (match_dup 3)]
>> @@ -4502,8 +45

Ping^2 [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-07-31 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
Thanks.


On 4/7/2022 下午 2:32, HAO CHEN GUI wrote:
> Hi,
>Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
> Thanks.
> 
> On 24/6/2022 上午 10:02, HAO CHEN GUI wrote:
>> Hi,
>>   This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
>> Tests show that outputs of xs[min/max]dp are consistent with the standard
>> of C99 fmin/max.
>>
>>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
>> of smin/max. So the builtins always generate xs[min/max]dp on all
>> platforms.
>>
>>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-06-24 Haochen Gui 
>>
>> gcc/
>>  PR target/103605
>>  * config/rs6000/rs6000.md (FMINMAX): New.
>>  (minmax_op): New.
>>  (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
>>  * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
>>  pattern to fmaxdf3.
>>  (__builtin_vsx_xsmindp): Set pattern to fmindf3.
>>
>> gcc/testsuite/
>>  PR target/103605
>>  * gcc.dg/powerpc/pr103605.c: New.
>>
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
>> b/gcc/config/rs6000/rs6000-builtins.def
>> index f4a9f24bcc5..8b735493b40 100644
>> --- a/gcc/config/rs6000/rs6000-builtins.def
>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>> @@ -1613,10 +1613,10 @@
>>  XSCVSPDP vsx_xscvspdp {}
>>
>>const double __builtin_vsx_xsmaxdp (double, double);
>> -XSMAXDP smaxdf3 {}
>> +XSMAXDP fmaxdf3 {}
>>
>>const double __builtin_vsx_xsmindp (double, double);
>> -XSMINDP smindf3 {}
>> +XSMINDP fmindf3 {}
>>
>>const double __builtin_vsx_xsrdpi (double);
>>  XSRDPI vsx_xsrdpi {}
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index bf85baa5370..ae0dd98f0f9 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -158,6 +158,8 @@ (define_c_enum "unspec"
>> UNSPEC_HASHCHK
>> UNSPEC_XXSPLTIDP_CONST
>> UNSPEC_XXSPLTIW_CONST
>> +   UNSPEC_FMAX
>> +   UNSPEC_FMIN
>>])
>>
>>  ;;
>> @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
>>DONE;
>>  })
>>
>> +
>> +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
>> +
>> +(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
>> + (UNSPEC_FMIN "min")])
>> +
>> +(define_insn "f3"
>> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
>> +(unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
>> +  (match_operand:SFDF 2 "vsx_register_operand" "wa")]
>> + FMINMAX))]
>> +  "TARGET_VSX && !flag_finite_math_only"
>> +  "xsdp %x0,%x1,%x2"
>> +  [(set_attr "type" "fp")]
>> +)
>> +
>>  (define_expand "movcc"
>> [(set (match_operand:GPR 0 "gpc_reg_operand")
>>   (if_then_else:GPR (match_operand 1 "comparison_operator")
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr103605.c
>> new file mode 100644
>> index 000..1c938d40e61
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
>> @@ -0,0 +1,37 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target powerpc_vsx_ok } */
>> +/* { dg-options "-O2 -mvsx" } */
>> +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
>> +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
>> +
>> +#include 
>> +
>> +double test1 (double d0, double d1)
>> +{
>> +  return fmin (d0, d1);
>> +}
>> +
>> +float test2 (float d0, float d1)
>> +{
>> +  return fmin (d0, d1);
>> +}
>> +
>> +double test3 (double d0, double d1)
>> +{
>> +  return fmax (d0, d1);
>> +}
>> +
>> +float test4 (float d0, float d1)
>> +{
>> +  return fmax (d0, d1);
>> +}
>> +
>> +double test5 (double d0, double d1)
>> +{
>> +  return __builtin_vsx_xsmindp (d0, d1);
>> +}
>> +
>> +double test6 (double d0, double d1)
>> +{
>> +  return __builtin_vsx_xsmaxdp (d0, d1);
>> +}


[PATCH, rs6000] Correct return value of check_p9modulo_hw_available

2022-08-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch corrects return value of check_p9modulo_hw_available. It should
return 0 when p9modulo is supported.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-08-04  Haochen Gui  

gcc/testsuite/
* lib/target-supports.exp (check_p9modulo_hw_available): Correct return
value.


patch.diff
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 4ed7b25b9a4..04a2a8e8659 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2288,7 +2288,7 @@ proc check_p9modulo_hw_available { } {
{
int i = 5, j = 3, r = -1;
asm ("modsw %0,%1,%2" : "+r" (r) : "r" (i), "r" (j));
-   return (r == 2);
+   return (r != 2);
}
} $options
}


[PATCH, rs6000] TARGET_MADDLD should include TARGET_POWERPC64

2022-08-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the definition of TARGET_MADDLD and includes
TARGET_POWERPC64, since maddld is a 64 bit instruction.

  maddld-1.c now checks "has_arch_ppc64". It depends on a patch which fixes
empty TU problem.
https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598744.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-08-03  Haochen Gui  

gcc/
* config/rs6000/rs6000.h (TARGET_MADDLD): Define.

gcc/testsuite/
* gcc.target/powerpc/maddld-1.c: Modify target requirement to compile
it on the target which supports 64 bit instructions.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 7d04556304a..2f15451fd8b 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -466,7 +466,7 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIWUZ TARGET_POPCNTD
 #define TARGET_CTZ TARGET_MODULO
 #define TARGET_EXTSWSLI(TARGET_MODULO && TARGET_POWERPC64)
-#define TARGET_MADDLD  TARGET_MODULO
+#define TARGET_MADDLD  (TARGET_MODULO && TARGET_POWERPC64)

 #define TARGET_XSCVDPSPN   (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
 #define TARGET_XSCVSPDPN   (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
diff --git a/gcc/testsuite/gcc.target/powerpc/maddld-1.c 
b/gcc/testsuite/gcc.target/powerpc/maddld-1.c
index 4edecf1c86d..0a53658e058 100644
--- a/gcc/testsuite/gcc.target/powerpc/maddld-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/maddld-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { has_arch_ppc64 } } } */
 /* { dg-options "-mdejagnu-cpu=power9 -O2" } */

 /* This file tests the maddld instruction can be used in SI mode


[PATCH-3, rs6000] Change mode and insn condition for scalar insert exp instruction

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the mode of exponent to GPR in scalar insert exp
pattern, as the exponent can be put into a 32-bit register. Also the
condition check is changed from TARGET_64BIT to TARGET_POWERPC64.

  The test cases are modified according to the changes of expand pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2023-01-03  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_insert_exp): Replace bif-pattern from xsiexpdp
to xsiexpdp_di.
(__builtin_vsx_scalar_insert_exp_dp): Replace bif-pattern from
xsiexpdpf to xsiexpdpf_di.
* config/rs6000/vsx.md (xsiexpdp): Rename to...
(xsiexpdp_): ..., set the mode of second operand to GPR and
replace TARGET_64BIT with TARGET_POWERPC64.
(xsiexpdpf): Rename to...
(xsiexpdpf_): ..., set the mode of second operand to GPR and
replace TARGET_64BIT with TARGET_POWERPC64.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 25647b7bdd2..b1b5002d7d9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2854,10 +2854,10 @@

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
 unsigned long long);
-VSIEDP xsiexpdp {}
+VSIEDP xsiexpdp_di {}

   const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long);
-VSIEDPF xsiexpdpf {}
+VSIEDPF xsiexpdpf_di {}

   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
 XL_LEN_R xl_len_r {}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27e03a4cf6c..3376090cc6f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5137,22 +5137,22 @@ (define_insn "xsiexpqp_"
   [(set_attr "type" "vecmove")])

 ;; VSX Scalar Insert Exponent Double-Precision
-(define_insn "xsiexpdp"
+(define_insn "xsiexpdp_"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DI 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:GPR 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
-(define_insn "xsiexpdpf"
+(define_insn "xsiexpdpf_"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DF 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:GPR 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
index d8243258a67..88d77564158 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
index 8260b107178..2f219ddc83a 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c
index 384fc9cc675..9eade34d9ad 100644
--- 

<    1   2   3   4   5   >