[gcc r15-1641] optab: Add isfinite_optab for isfinite builtin

2024-06-25 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:680eda8c74100eb957dffbcc5554230e71f25990

commit r15-1641-g680eda8c74100eb957dffbcc5554230e71f25990
Author: Haochen Gui 
Date:   Wed Jun 26 13:35:10 2024 +0800

optab: Add isfinite_optab for isfinite builtin

gcc/
* builtins.cc (interclass_mathfn_icode): Set optab to isfinite_optab
for isfinite builtin.
* optabs.def (isfinite_optab): New.
* doc/md.texi (isfinite): Document.

Diff:
---
 gcc/builtins.cc | 4 +++-
 gcc/doc/md.texi | 6 ++
 gcc/optabs.def  | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index d467d1697b4..03634cdaba1 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -2459,8 +2459,10 @@ interclass_mathfn_icode (tree arg, tree fndecl)
   errno_set = true; builtin_optab = ilogb_optab; break;
 CASE_FLT_FN (BUILT_IN_ISINF):
   builtin_optab = isinf_optab; break;
-case BUILT_IN_ISNORMAL:
 case BUILT_IN_ISFINITE:
+  builtin_optab = isfinite_optab;
+  break;
+case BUILT_IN_ISNORMAL:
 CASE_FLT_FN (BUILT_IN_FINITE):
 case BUILT_IN_FINITED32:
 case BUILT_IN_FINITED64:
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 5730bda80dc..3eb4216141e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -8557,6 +8557,12 @@ operand 2, greater than operand 2 or is unordered with 
operand 2.
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{isfinite@var{m}2} instruction pattern
+@item @samp{isfinite@var{m}2}
+Return 1 if operand 1 is a finite floating point number and 0
+otherwise.  @var{m} is a scalar floating point mode.  Operand 0
+has mode @code{SImode}, and operand 1 has mode @var{m}.
+
 @end table
 
 @end ifset
diff --git a/gcc/optabs.def b/gcc/optabs.def
index bc2611abdc2..9514a317259 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -352,6 +352,7 @@ OPTAB_D (fmod_optab, "fmod$a3")
 OPTAB_D (hypot_optab, "hypot$a3")
 OPTAB_D (ilogb_optab, "ilogb$a2")
 OPTAB_D (isinf_optab, "isinf$a2")
+OPTAB_D (isfinite_optab, "isfinite$a2")
 OPTAB_D (issignaling_optab, "issignaling$a2")
 OPTAB_D (ldexp_optab, "ldexp$a3")
 OPTAB_D (log10_optab, "log10$a2")


[gcc r15-1642] optab: Add isnormal_optab for isnormal builtin

2024-06-25 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:5a10ac0e5921d5aeac58429457423b611ee325d2

commit r15-1642-g5a10ac0e5921d5aeac58429457423b611ee325d2
Author: Haochen Gui 
Date:   Wed Jun 26 13:35:58 2024 +0800

optab: Add isnormal_optab for isnormal builtin

gcc/
* builtins.cc (interclass_mathfn_icode): Set optab to isnormal_optab
for isnormal builtin.
* optabs.def (isnormal_optab): New.
* doc/md.texi (isnormal): Document.

Diff:
---
 gcc/builtins.cc | 2 ++
 gcc/doc/md.texi | 6 ++
 gcc/optabs.def  | 1 +
 3 files changed, 9 insertions(+)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 03634cdaba1..0b902896ddd 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -2463,6 +2463,8 @@ interclass_mathfn_icode (tree arg, tree fndecl)
   builtin_optab = isfinite_optab;
   break;
 case BUILT_IN_ISNORMAL:
+  builtin_optab = isnormal_optab;
+  break;
 CASE_FLT_FN (BUILT_IN_FINITE):
 case BUILT_IN_FINITED32:
 case BUILT_IN_FINITED64:
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 3eb4216141e..4fd7da095fe 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -8563,6 +8563,12 @@ Return 1 if operand 1 is a finite floating point number 
and 0
 otherwise.  @var{m} is a scalar floating point mode.  Operand 0
 has mode @code{SImode}, and operand 1 has mode @var{m}.
 
+@cindex @code{isnormal@var{m}2} instruction pattern
+@item @samp{isnormal@var{m}2}
+Return 1 if operand 1 is a normal floating point number and 0
+otherwise.  @var{m} is a scalar floating point mode.  Operand 0
+has mode @code{SImode}, and operand 1 has mode @var{m}.
+
 @end table
 
 @end ifset
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 9514a317259..2f36ed4cb42 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -353,6 +353,7 @@ OPTAB_D (hypot_optab, "hypot$a3")
 OPTAB_D (ilogb_optab, "ilogb$a2")
 OPTAB_D (isinf_optab, "isinf$a2")
 OPTAB_D (isfinite_optab, "isfinite$a2")
+OPTAB_D (isnormal_optab, "isnormal$a2")
 OPTAB_D (issignaling_optab, "issignaling$a2")
 OPTAB_D (ldexp_optab, "ldexp$a3")
 OPTAB_D (log10_optab, "log10$a2")


[gcc r15-2089] rs6000: Remove redundant guard for float128 mode pattern

2024-07-16 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:ecc2c3cb7235f2d3a75cb3909cace7f6a38a4062

commit r15-2089-gecc2c3cb7235f2d3a75cb3909cace7f6a38a4062
Author: Haochen Gui 
Date:   Wed Jul 17 14:47:36 2024 +0800

rs6000: Remove redundant guard for float128 mode pattern

gcc/
* config/rs6000/rs6000.md (movcc, *movcc_p10,
*movcc_invert_p10, *fpmask, *xxsel,
@ieee_128bit_vsx_abs2, *ieee_128bit_vsx_nabs2,
add3, sub3, mul3, div3, sqrt2,
copysign3, copysign3_hard, copysign3_soft,
@neg2_hw, @abs2_hw, *nabs2_hw, fma4_hw,
*fms4_hw, *nfma4_hw, *nfms4_hw,
extend2_hw, truncdf2_hw,
truncsf2_hw, fix_2_hw,
fix_trunc2,
*fix_trunc2_mem,
float_di2_hw, float_si2_hw,
float2, floatuns_di2_hw,
floatuns_si2_hw, floatuns2,
floor2, ceil2, btrunc2, round2,
add3_odd, sub3_odd, mul3_odd, div3_odd,
sqrt2_odd, fma4_odd, *fms4_odd, *nfma4_odd,
*nfms4_odd, truncdf2_odd, *cmp_hw for IEEE128):
Remove guard FLOAT128_IEEE_P.
(@extenddf2_fprs, @extenddf2_vsx,
truncdf2_internal1, truncdf2_internal2,
fix_trunc_helper, neg2, *cmp_internal1,
*cmp_internal2 for IBM128): Remove guard 
FLOAT128_IBM_P.

Diff:
---
 gcc/config/rs6000/rs6000.md | 115 ++--
 1 file changed, 57 insertions(+), 58 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6a2891c5e509..f59be5365708 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5701,7 +5701,7 @@
 (if_then_else:IEEE128 (match_operand 1 "comparison_operator")
   (match_operand:IEEE128 2 "gpc_reg_operand")
   (match_operand:IEEE128 3 "gpc_reg_operand")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
 DONE;
@@ -5718,7 +5718,7 @@
 (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
 (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
(clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5750,7 +5750,7 @@
 (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
 (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
(clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5785,7 +5785,7 @@
 (match_operand:IEEE128 3 "altivec_register_operand" "v")])
 (match_operand:V2DI 4 "all_ones_constant" "")
 (match_operand:V2DI 5 "zero_constant" "")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "xscmp%V1qp %0,%2,%3"
   [(set_attr "type" "fpcompare")])
 
@@ -5796,7 +5796,7 @@
 (match_operand:V2DI 2 "zero_constant" ""))
 (match_operand:IEEE128 3 "altivec_register_operand" "v")
 (match_operand:IEEE128 4 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "xxsel %x0,%x4,%x3,%x1"
   [(set_attr "type" "vecmove")])
 
@@ -8869,7 +8869,7 @@
 (match_operand:DF 1 "nonimmediate_operand" "d,m,d")))
(use (match_operand:DF 2 "nonimmediate_operand" "m,m,d"))]
   "!TARGET_VSX && TARGET_HARD_FLOAT
-   && TARGET_LONG_DOUBLE_128 && FLOAT128_IBM_P (mode)"
+   && TARGET_LONG_DOUBLE_128"
   "#"
   "&& reload_completed"
   [(set (match_dup 3) (match_dup 1))
@@ -8886,7 +8886,7 @@
   [(set (match_operand:IBM128 0 "gpc_reg_operand" "=d,d")
(float_extend:IBM128
 (match_operand:DF 1 "nonimmediate_operand" "wa,m")))]
-  "TARGET_LONG_DOUBLE_128 && TARGET_VSX && FLOAT128_IBM_P (mode)"
+  "TARGET_LONG_DOUBLE_128 && TARGET_VSX"
   "#"
   "&& reload_completed"
   [(set (match_dup 2) (match_dup 1))
@@ -8932,7 +8932,7 @@
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d")
(float_truncate:DF
 (match_operand:IBM128 1 "gpc_reg_operand" "0,d")))]
-  "FLOAT128_IBM_P (mode) && !TARGET_XL_COMPAT
+  "!TARGET_XL_COMPAT
&& TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
   "@
#
@@ -8948,7 +8948,7 @@
 (define_insn "truncdf2_internal2"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "d")))]
-  "FLOAT128_IBM_P (mode) && TARGET_XL_COMPAT && TARGET_HARD_FLOAT
+  "TARGET_XL_COMPAT && TARGET_HARD_FLOAT
&& TARGET_LONG_DOUBLE_128"
   "fadd %0,%1,%L1"
   [(set_attr "type" "fp")])
@@ -9001,7 +9001,7 @@
(unspec:DF [(match

[gcc r14-9379] fwprop: Avoid volatile rtx to be propagated

2024-03-07 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:a0e945888d973fc1a4a9d2944aa7e96d2a4d7581

commit r14-9379-ga0e945888d973fc1a4a9d2944aa7e96d2a4d7581
Author: Haochen Gui 
Date:   Fri Mar 8 09:30:35 2024 +0800

fwprop: Avoid volatile rtx to be propagated

The patch for PR111267 (commit id 86de9b66480b710202a2898cf513db105d8c432f)
which introduces an exception for propagation on single set insn.  The
propagation which might not be profitable (checked by profitable_p) is still
allowed to be propagated to single set insn.  It has a potential problem
that a volatile operand might be propagated to a singel set insn.  If the
define insn is not eliminated after propagation, the volatile operand will
be executed for multiple times.  This patch fixes the problem by skipping
volatile set source rtx in propagation.

gcc/
* fwprop.cc (forward_propagate_into): Return false for volatile set
source rtx.

gcc/testsuite/
* gcc.target/powerpc/fwprop-1.c: New.

Diff:
---
 gcc/fwprop.cc   |  2 ++
 gcc/testsuite/gcc.target/powerpc/fwprop-1.c | 15 +++
 2 files changed, 17 insertions(+)

diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index 7872609b336..cb6fd6700ca 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -854,6 +854,8 @@ forward_propagate_into (use_info *use, bool reg_prop_only = 
false)
 
   rtx dest = SET_DEST (def_set);
   rtx src = SET_SRC (def_set);
+  if (volatile_refs_p (src))
+return false;
 
   /* Allow propagations into a loop only for reg-to-reg copies, since
  replacing one register by another shouldn't increase the cost.
diff --git a/gcc/testsuite/gcc.target/powerpc/fwprop-1.c 
b/gcc/testsuite/gcc.target/powerpc/fwprop-1.c
new file mode 100644
index 000..07b207f980c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fwprop-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-rtl-fwprop1-details" } */
+/* { dg-final { scan-rtl-dump-not "propagating insn" "fwprop1" } } */
+
+/* Verify that volatile asm operands doesn't try to be propagated.  */
+long long foo ()
+{
+  long long res;
+  __asm__ __volatile__(
+""
+  : "=r" (res)
+  :
+  : "memory");
+  return res;
+}


[gcc r14-10010] rs6000: Fix bcd test case

2024-04-17 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:58a0b190a256bd2a184554de0fae0031a614ec67

commit r14-10010-g58a0b190a256bd2a184554de0fae0031a614ec67
Author: Haochen Gui 
Date:   Wed Apr 17 16:39:25 2024 +0800

rs6000: Fix bcd test case

gcc/testsuite/
* gcc.target/powerpc/bcd-4.c: Enable the case to be tested on P9.
Enable the case to be run on big endian.  Fix function maxbcd and
other misc. problems.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/bcd-4.c | 58 
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c 
b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
index 2c8554dfe82..2c7041c4d32 100644
--- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
@@ -1,7 +1,7 @@
 /* { dg-do run } */
 /* { dg-require-effective-target int128 } */
-/* { dg-require-effective-target power10_hw } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -save-temps" } */
 /* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */
 /* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */
 /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */
@@ -44,10 +44,20 @@ vector unsigned char maxbcd(unsigned int sign)
   vector unsigned char result;
   int i;
 
+#ifdef __BIG_ENDIAN__
+  for (i = 0; i < 15; i++)
+#else
   for (i = 15; i > 0; i--)
+#endif
 result[i] = 0x99;
 
-  result[0] = sign << 4 | 0x9;
+#ifdef __BIG_ENDIAN__
+  result[15] = 0x90 | sign;
+#else
+  result[0] = 0x90 | sign;
+#endif
+
+  return result;
 }
 
 vector unsigned char num2bcd(long int a, int encoding)
@@ -70,9 +80,17 @@ vector unsigned char num2bcd(long int a, int encoding)
 
   hi = a % 10;   // 1st digit
   a = a / 10;
+#ifdef __BIG_ENDIAN__
+  result[15] = hi << 4| sign;
+#else
   result[0] = hi << 4| sign;
+#endif
 
+#ifdef __BIG_ENDIAN__
+  for (i = 14; i >= 0; i--)
+#else
   for (i = 1; i < 16; i++)
+#endif
 {
   low = a % 10;
   a = a / 10;
@@ -117,7 +135,11 @@ int main ()
 }
 
   /* result should be positive */
+#ifdef __BIG_ENDIAN__
+  if ((result[15] & 0xF) != BCD_POS0)
+#else
   if ((result[0] & 0xF) != BCD_POS0)
+#endif
 #if DEBUG
   printf("ERROR: __builtin_bcdadd sign of result is %d.  Does not match "
 "expected_result = %d\n",
@@ -150,7 +172,11 @@ int main ()
 }
 
   /* Result should be positive, alternate encoding.  */
+#ifdef __BIG_ENDIAN__
+  if ((result[15] & 0xF) != BCD_POS1)
+#else
   if ((result[0] & 0xF) != BCD_POS1)
+#endif
 #if DEBUG
 printf("ERROR: __builtin_bcdadd sign of result is %d.  Does not "
   "match expected_result = %d\n",
@@ -183,7 +209,11 @@ int main ()
 }
 
   /* result should be negative */
+#ifdef __BIG_ENDIAN__
+  if ((result[15] & 0xF) != BCD_NEG)
+#else
   if ((result[0] & 0xF) != BCD_NEG)
+#endif
 #if DEBUG
 printf("ERROR: __builtin_bcdadd sign, neg of result is %d.  Does not "
   "match expected_result = %d\n",
@@ -216,8 +246,12 @@ int main ()
 #endif
 }
 
-  /* result should be positive, alt encoding */
+  /* result should be positive */
+#ifdef __BIG_ENDIAN__
+  if ((result[15] & 0xF) != BCD_NEG)
+#else
   if ((result[0] & 0xF) != BCD_NEG)
+#endif
 #if DEBUG
 printf("ERROR: __builtin_bcdadd sign, of result is %d.  Does not match "
   "expected_result = %d\n",
@@ -250,7 +284,11 @@ int main ()
 }
 
   /* result should be positive */
+#ifdef __BIG_ENDIAN__
+  if ((result[15] & 0xF) != BCD_POS1)
+#else
   if ((result[0] & 0xF) != BCD_POS1)
+#endif
 #if DEBUG
 printf("ERROR: __builtin_bcdsub sign, result is %d.  Does not match "
   "expected_result = %d\n",
@@ -283,7 +321,7 @@ int main ()
 abort();
 #endif
 
-  a = maxbcd(BCD_NEG);
+  a = maxbcd(BCD_POS0);
   b = maxbcd(BCD_NEG);
 
   if (__builtin_bcdsub_ofl (a, b, 0) == 0)
@@ -462,8 +500,12 @@ int main ()
 }
 
   /* result should be positive */
+#ifdef __BIG_ENDIAN__
+  if ((result[15] & 0xF) != BCD_POS0)
+#else
   if ((result[0] & 0xF) != BCD_POS0)
-#if 0
+#endif
+#if DEBUG
 printf("ERROR: __builtin_bcdmul10 sign, result is %d.  Does not match "
   "expected_result = %d\n",
   result[0] & 0xF, BCD_POS1);
@@ -492,7 +534,11 @@ int main ()
 }
 
   /* result should be positive */
+#ifdef __BIG_ENDIAN__
+  if ((result[15] & 0xF) != BCD_POS0)
+#else
   if ((result[0] & 0xF) != BCD_POS0)
+#endif
 #if DEBUG
 printf("ERROR: __builtin_bcddiv10 sign, result is %d.  Does not match "
   "expected_result = %d\n",


[gcc r14-10114] rs6000: Use bcdsub. instead of bcdadd. for bcd invalid number checking

2024-04-24 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:09680e3ee7d72978b493dd4127ce2e769f96a45e

commit r14-10114-g09680e3ee7d72978b493dd4127ce2e769f96a45e
Author: Haochen Gui 
Date:   Thu Apr 25 09:55:53 2024 +0800

rs6000: Use bcdsub. instead of bcdadd. for bcd invalid number checking

bcdadd. might causes overflow which also set the overflow/invalid bit.
bcdsub. doesn't have the issue when do subtracting on two same bcd number.

gcc/
* config/rs6000/altivec.md (*bcdinvalid_): Replace bcdadd
with bcdsub.
(bcdinvalid_): Likewise.

gcc/testsuite/
* gcc.target/powerpc/bcd-4.c: Adjust the number of bcdadd and
bcdsub.

Diff:
---
 gcc/config/rs6000/altivec.md | 6 +++---
 gcc/testsuite/gcc.target/powerpc/bcd-4.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 4d4c94ff0a0..bb20441c096 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4586,18 +4586,18 @@
   [(set (reg:CCFP CR6_REGNO)
(compare:CCFP
 (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")]
- UNSPEC_BCDADD)
+ UNSPEC_BCDSUB)
 (match_operand:V2DF 2 "zero_constant" "j")))
(clobber (match_scratch:VBCD 0 "=v"))]
   "TARGET_P8_VECTOR"
-  "bcdadd. %0,%1,%1,0"
+  "bcdsub. %0,%1,%1,0"
   [(set_attr "type" "vecsimple")])
 
 (define_expand "bcdinvalid_"
   [(parallel [(set (reg:CCFP CR6_REGNO)
   (compare:CCFP
(unspec:V2DF [(match_operand:VBCD 1 "register_operand")]
-UNSPEC_BCDADD)
+UNSPEC_BCDSUB)
(match_dup 2)))
  (clobber (match_scratch:VBCD 3))])
(set (match_operand:SI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c 
b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
index 2c7041c4d32..6d2c59ef792 100644
--- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
@@ -2,8 +2,8 @@
 /* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9 -O2 -save-temps" } */
-/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */
-/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */
+/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */
 /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */


[gcc r15-1575] fwprop: invoke change_is_worthwhile to judge if a replacement is worthwhile

2024-06-23 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:ea8061f46a301797e7ba33b52e3b4713fb8e6b48

commit r15-1575-gea8061f46a301797e7ba33b52e3b4713fb8e6b48
Author: Haochen Gui 
Date:   Mon Jun 24 13:12:51 2024 +0800

fwprop: invoke change_is_worthwhile to judge if a replacement is worthwhile

gcc/
* fwprop.cc (try_fwprop_subst_pattern): Invoke change_is_worthwhile
to judge if a replacement is worthwhile.  Remove single_set check
and add is_debug_insn check.
* recog.cc (swap_change): Invalidate recog_data when the cached INSN
is swapped out.
* rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Check if the
insn cost of new rtl is unknown and fail the replacement.

Diff:
---
 gcc/fwprop.cc  | 37 ++---
 gcc/recog.cc   |  6 +-
 gcc/rtl-ssa/changes.cc |  8 
 3 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index de543923b92..bfdc7a1b749 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -453,7 +453,7 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, 
insn_change &use_change,
   && (prop.changed_mem_p ()
  || contains_mem_rtx_p (src)
  || use_insn->is_asm ()
- || !single_set (use_rtl)))
+ || use_insn->is_debug_insn ()))
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "cannot propagate from insn %d into"
@@ -471,29 +471,20 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, 
insn_change &use_change,
   redo_changes (0);
 }
 
-  /* ??? In theory, it should be better to use insn costs rather than
- set_src_costs here.  That would involve replacing this code with
- change_is_worthwhile.  */
   bool ok = recog (attempt, use_change);
-  if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
-if (rtx use_set = single_set (use_rtl))
-  {
-   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
-   temporarily_undo_changes (0);
-   auto old_cost = set_src_cost (SET_SRC (use_set),
- GET_MODE (SET_DEST (use_set)), speed);
-   redo_changes (0);
-   auto new_cost = set_src_cost (SET_SRC (use_set),
- GET_MODE (SET_DEST (use_set)), speed);
-   if (new_cost > old_cost
-   || (new_cost == old_cost && !prop.likely_profitable_p ()))
- {
-   if (dump_file)
- fprintf (dump_file, "change not profitable"
-  " (cost %d -> cost %d)\n", old_cost, new_cost);
-   ok = false;
- }
-  }
+  if (ok
+  && !prop.changed_mem_p ()
+  && !use_insn->is_asm ()
+  && !use_insn->is_debug_insn ())
+{
+  bool strict_p = !prop.likely_profitable_p ();
+  if (!change_is_worthwhile (use_change, strict_p))
+   {
+ if (dump_file)
+   fprintf (dump_file, "change not profitable");
+ ok = false;
+   }
+}
 
   if (!ok)
 {
diff --git a/gcc/recog.cc b/gcc/recog.cc
index a6799e3f5e6..56370e40e01 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -614,7 +614,11 @@ swap_change (int num)
   else
 std::swap (*changes[num].loc, changes[num].old);
   if (changes[num].object && !MEM_P (changes[num].object))
-std::swap (INSN_CODE (changes[num].object), changes[num].old_code);
+{
+  std::swap (INSN_CODE (changes[num].object), changes[num].old_code);
+  if (recog_data.insn == changes[num].object)
+   recog_data.insn = nullptr;
+}
 }
 
 /* Temporarily undo all the changes numbered NUM and up, with a view
diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
index 3101f2dc4fc..bc80d7da829 100644
--- a/gcc/rtl-ssa/changes.cc
+++ b/gcc/rtl-ssa/changes.cc
@@ -190,6 +190,14 @@ rtl_ssa::changes_are_worthwhile (array_slice changes,
  && INSN_CODE (change->rtl ()) != NOOP_MOVE_INSN_CODE)
{
  change->new_cost = insn_cost (change->rtl (), for_speed);
+ /* If the cost is unknown, replacement is not worthwhile.  */
+ if (!change->new_cost)
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file,
+"Reject replacement due to unknown insn cost.\n");
+ return false;
+   }
  new_cost += change->new_cost;
  if (for_speed)
weighted_new_cost += (cfg_bb->count.to_sreal_scale (entry_count)


[gcc r15-1576] rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store

2024-06-23 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:6274f10318d05311f31147c895f76a01aec37830

commit r15-1576-g6274f10318d05311f31147c895f76a01aec37830
Author: Haochen Gui 
Date:   Mon Jun 24 13:16:12 2024 +0800

rs6000: Eliminate unnecessary byte swaps for duplicated constant vector 
store

gcc/
PR target/113325
* config/rs6000/vsx.md (vsx_stxvd2x4_le_const_): New.

gcc/testsuite/
PR target/113325
* gcc.target/powerpc/pr113325.c: New.

Diff:
---
 gcc/config/rs6000/vsx.md| 25 +
 gcc/testsuite/gcc.target/powerpc/pr113325.c |  9 +
 2 files changed, 34 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7a9c19ac903..48ba262f7e4 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3368,6 +3368,31 @@
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
+(define_insn_and_split "vsx_stxvd2x4_le_const_"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+   (match_operand:VSX_W 1 "immediate_operand" "W"))]
+  "!BYTES_BIG_ENDIAN
+   && VECTOR_MEM_VSX_P (mode)
+   && !TARGET_P9_VECTOR
+   && const_vec_duplicate_p (operands[1])
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)
+   (match_dup 1))
+   (set (match_dup 0)
+   (vec_select:VSX_W
+ (match_dup 2)
+ (parallel [(const_int 2) (const_int 3)
+(const_int 0) (const_int 1)])))]
+{
+  /* Here all the constants must be loaded without memory.  */
+  gcc_assert (easy_altivec_constant (operands[1], mode));
+  operands[2] = gen_reg_rtx (mode);
+}
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
 (define_insn "*vsx_stxvd2x8_le_V8HI"
   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 (vec_select:V8HI
diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c 
b/gcc/testsuite/gcc.target/powerpc/pr113325.c
new file mode 100644
index 000..3ca1fcbc9ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */
+/* { dg-require-effective-target powerpc_vsx } */
+/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */
+
+void* foo (void* s1)
+{
+  return __builtin_memset (s1, 0, 32);
+}


[gcc r15-2740] rs6000: Add const_vector into any_operand predicate

2024-08-05 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:3592d3f8cc4b89ae508c747a46a626d73cb9616d

commit r15-2740-g3592d3f8cc4b89ae508c747a46a626d73cb9616d
Author: Haochen Gui 
Date:   Tue Aug 6 14:15:05 2024 +0800

rs6000: Add const_vector into any_operand predicate

gcc/
* config/rs6000/predicates.md (any_operand): Add const_vector.

Diff:
---
 gcc/config/rs6000/predicates.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d23ce9a77a3f..cdfd400f6395 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -19,7 +19,7 @@
 
 ;; Return 1 for anything except PARALLEL.
 (define_predicate "any_operand"
-  (match_code 
"const_int,const_double,const_wide_int,const,symbol_ref,label_ref,subreg,reg,mem"))
+  (match_code 
"const_int,const_double,const_wide_int,const_vector,const,symbol_ref,label_ref,subreg,reg,mem"))
 
 ;; Return 1 for any PARALLEL.
 (define_predicate "any_parallel_operand"


[gcc r15-2922] Value Range: Add range op for builtin isinf

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:cd0930768ff936b681135bf8873e7f7b796a4ed9

commit r15-2922-gcd0930768ff936b681135bf8873e7f7b796a4ed9
Author: Haochen Gui 
Date:   Thu Aug 15 11:19:10 2024 +0800

Value Range: Add range op for builtin isinf

The builtin isinf is not folded at front end if the corresponding optab
exists.  So the range op for isinf is needed for value range analysis.
This patch adds range op for builtin isinf.

gcc/
PR target/114678
* gimple-range-op.cc (class cfn_isinf): New.
(op_cfn_isinf): New variables.
(gimple_range_op_handler::maybe_builtin_call): Handle
CASE_FLT_FN (BUILT_IN_ISINF).

gcc/testsuite/
PR target/114678
* gcc.dg/tree-ssa/range-isinf.c: New test.
* gcc.dg/tree-ssa/range-sincos.c: Remove xfail for s390.
* gcc.dg/tree-ssa/vrp-float-abs-1.c: Likewise.

Diff:
---
 gcc/gimple-range-op.cc  | 62 +
 gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c | 44 ++
 gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c|  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp-float-abs-1.c |  2 +-
 4 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index a80b93cf063..24559951dd6 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -1153,6 +1153,63 @@ private:
   bool m_is_pos;
 } op_cfn_goacc_dim_size (false), op_cfn_goacc_dim_pos (true);
 
+// Implement range operator for CFN_BUILT_IN_ISINF
+class cfn_isinf : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  using range_operator::op1_range;
+  virtual bool fold_range (irange &r, tree type, const frange &op1,
+  const irange &, relation_trio) const override
+  {
+if (op1.undefined_p ())
+  return false;
+
+if (op1.known_isinf ())
+  {
+   wide_int one = wi::one (TYPE_PRECISION (type));
+   r.set (type, one, one);
+   return true;
+  }
+
+if (op1.known_isnan ()
+   || (!real_isinf (&op1.lower_bound ())
+   && !real_isinf (&op1.upper_bound (
+  {
+   r.set_zero (type);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+  virtual bool op1_range (frange &r, tree type, const irange &lhs,
+ const frange &, relation_trio) const override
+  {
+if (lhs.undefined_p ())
+  return false;
+
+if (lhs.zero_p ())
+  {
+   nan_state nan (true);
+   r.set (type, real_min_representable (type),
+  real_max_representable (type), nan);
+   return true;
+  }
+
+if (!range_includes_zero_p (lhs))
+  {
+   // The range is [-INF,-INF][+INF,+INF], but it can't be represented.
+   // Set range to [-INF,+INF]
+   r.set_varying (type);
+   r.clear_nan ();
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+} op_cfn_isinf;
 
 // Implement range operator for CFN_BUILT_IN_
 class cfn_parity : public range_operator
@@ -1246,6 +1303,11 @@ gimple_range_op_handler::maybe_builtin_call ()
   m_operator = &op_cfn_signbit;
   break;
 
+CASE_FLT_FN (BUILT_IN_ISINF):
+  m_op1 = gimple_call_arg (call, 0);
+  m_operator = &op_cfn_isinf;
+  break;
+
 CASE_CFN_COPYSIGN_ALL:
   m_op1 = gimple_call_arg (call, 0);
   m_op2 = gimple_call_arg (call, 1);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c
new file mode 100644
index 000..468f1bcf5c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+#include 
+void link_error();
+
+void
+test1 (double x)
+{
+  if (x > __DBL_MAX__ && !__builtin_isinf (x))
+link_error ();
+  if (x < -__DBL_MAX__ && !__builtin_isinf (x))
+link_error ();
+}
+
+void
+test2 (float x)
+{
+  if (x > __FLT_MAX__ && !__builtin_isinf (x))
+link_error ();
+  if (x < -__FLT_MAX__ && !__builtin_isinf (x))
+link_error ();
+}
+
+void
+test3 (double x)
+{
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x > __DBL_MAX__)
+link_error ();
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x < -__DBL_MAX__)
+link_error ();
+}
+
+void
+test4 (float x)
+{
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x > __FLT_MAX__)
+link_error ();
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x < -__FLT_MAX__)
+link_error ();
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c
index 35b38c3c914..337f9cda02f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c
@@ -40,4 +40,4 @@ stool (double x)
 link_error ();
 }
 
-// { dg-final { scan-tree-dump-not "link_error" 

[gcc r15-2923] Value Range: Add range op for builtin isfinite

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:b1d21582bafa1954db3a62f0972ae3a2e3bc0b48

commit r15-2923-gb1d21582bafa1954db3a62f0972ae3a2e3bc0b48
Author: Haochen Gui 
Date:   Thu Aug 15 11:20:27 2024 +0800

Value Range: Add range op for builtin isfinite

The former patch adds optab for builtin isfinite. Thus builtin isfinite
might not be folded at front end.  So the range op for isfinite is needed
for value range analysis.  This patch adds range op for builtin isfinite.

gcc/
* gimple-range-op.cc (class cfn_isfinite): New.
(op_cfn_finite): New variables.
(gimple_range_op_handler::maybe_builtin_call): Handle
CFN_BUILT_IN_ISFINITE.

gcc/testsuite/
* gcc.dg/tree-ssa/range-isfinite.c: New test.

Diff:
---
 gcc/gimple-range-op.cc | 61 ++
 gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c | 31 +
 2 files changed, 92 insertions(+)

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index 24559951dd6..7edfa8e315f 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -1211,6 +1211,62 @@ public:
   }
 } op_cfn_isinf;
 
+//Implement range operator for CFN_BUILT_IN_ISFINITE
+class cfn_isfinite : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  using range_operator::op1_range;
+  virtual bool fold_range (irange &r, tree type, const frange &op1,
+  const irange &, relation_trio) const override
+  {
+if (op1.undefined_p ())
+  return false;
+
+if (op1.known_isfinite ())
+  {
+   wide_int one = wi::one (TYPE_PRECISION (type));
+   r.set (type, one, one);
+   return true;
+  }
+
+if (op1.known_isnan ()
+   || op1.known_isinf ())
+  {
+   r.set_zero (type);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+  virtual bool op1_range (frange &r, tree type, const irange &lhs,
+ const frange &, relation_trio) const override
+  {
+if (lhs.undefined_p ())
+  return false;
+
+if (lhs.zero_p ())
+  {
+   // The range is [-INF,-INF][+INF,+INF] NAN, but it can't be represented.
+   // Set range to varying
+   r.set_varying (type);
+   return true;
+  }
+
+if (!range_includes_zero_p (lhs))
+  {
+   nan_state nan (false);
+   r.set (type, real_min_representable (type),
+  real_max_representable (type), nan);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+} op_cfn_isfinite;
+
 // Implement range operator for CFN_BUILT_IN_
 class cfn_parity : public range_operator
 {
@@ -1308,6 +1364,11 @@ gimple_range_op_handler::maybe_builtin_call ()
   m_operator = &op_cfn_isinf;
   break;
 
+case CFN_BUILT_IN_ISFINITE:
+  m_op1 = gimple_call_arg (call, 0);
+  m_operator = &op_cfn_isfinite;
+  break;
+
 CASE_CFN_COPYSIGN_ALL:
   m_op1 = gimple_call_arg (call, 0);
   m_op2 = gimple_call_arg (call, 1);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c
new file mode 100644
index 000..f5dce0a0486
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+#include 
+void link_error();
+
+void test1 (double x)
+{
+  if (x < __DBL_MAX__ && x > -__DBL_MAX__ && !__builtin_isfinite (x))
+link_error ();
+}
+
+void test2 (float x)
+{
+  if (x < __FLT_MAX__ && x > -__FLT_MAX__ && !__builtin_isfinite (x))
+link_error ();
+}
+
+void test3 (double x)
+{
+  if (__builtin_isfinite (x) && __builtin_isinf (x))
+link_error ();
+}
+
+void test4 (float x)
+{
+  if (__builtin_isfinite (x) && __builtin_isinf (x))
+link_error ();
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */


[gcc r15-2924] Value Range: Add range op for builtin isnormal

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:d2e90c7d65749a02a20aca717ac47d02ef0b5d81

commit r15-2924-gd2e90c7d65749a02a20aca717ac47d02ef0b5d81
Author: Haochen Gui 
Date:   Thu Aug 15 11:21:08 2024 +0800

Value Range: Add range op for builtin isnormal

The former patch adds optab for builtin isnormal. Thus builtin isnormal
might not be folded at front end.  So the range op for isnormal is needed
for value range analysis.  This patch adds range op for builtin isnormal.

gcc/
* gimple-range-op.cc (class cfn_isfinite): New.
(op_cfn_finite): New variables.
(gimple_range_op_handler::maybe_builtin_call): Handle
CFN_BUILT_IN_ISFINITE.
* value-range.h (class frange): Declear known_isnormal and
known_isdenormal_or_zero.
(frange::known_isnormal): Define.
(frange::known_isdenormal_or_zero): Define.

gcc/testsuite/
* gcc.dg/tree-ssa/range-isnormal.c: New test.

Diff:
---
 gcc/gimple-range-op.cc | 60 ++
 gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c | 37 
 gcc/value-range.h  | 29 +
 3 files changed, 126 insertions(+)

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index 7edfa8e315f..d1c527191f4 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -1267,6 +1267,61 @@ public:
   }
 } op_cfn_isfinite;
 
+//Implement range operator for CFN_BUILT_IN_ISNORMAL
+class cfn_isnormal :  public range_operator
+{
+public:
+  using range_operator::fold_range;
+  using range_operator::op1_range;
+  virtual bool fold_range (irange &r, tree type, const frange &op1,
+  const irange &, relation_trio) const override
+  {
+if (op1.undefined_p ())
+  return false;
+
+if (op1.known_isnormal ())
+  {
+   wide_int one = wi::one (TYPE_PRECISION (type));
+   r.set (type, one, one);
+   return true;
+  }
+
+if (op1.known_isnan ()
+   || op1.known_isinf ()
+   || op1.known_isdenormal_or_zero ())
+  {
+   r.set_zero (type);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+  virtual bool op1_range (frange &r, tree type, const irange &lhs,
+ const frange &, relation_trio) const override
+  {
+if (lhs.undefined_p ())
+  return false;
+
+if (lhs.zero_p ())
+  {
+   r.set_varying (type);
+   return true;
+  }
+
+if (!range_includes_zero_p (lhs))
+  {
+   nan_state nan (false);
+   r.set (type, real_min_representable (type),
+  real_max_representable (type), nan);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+} op_cfn_isnormal;
+
 // Implement range operator for CFN_BUILT_IN_
 class cfn_parity : public range_operator
 {
@@ -1369,6 +1424,11 @@ gimple_range_op_handler::maybe_builtin_call ()
   m_operator = &op_cfn_isfinite;
   break;
 
+case CFN_BUILT_IN_ISNORMAL:
+  m_op1 = gimple_call_arg (call, 0);
+  m_operator = &op_cfn_isnormal;
+  break;
+
 CASE_CFN_COPYSIGN_ALL:
   m_op1 = gimple_call_arg (call, 0);
   m_op2 = gimple_call_arg (call, 1);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c
new file mode 100644
index 000..c4df4d839b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+#include 
+void link_error();
+
+void test1 (double x)
+{
+  if (x < __DBL_MAX__ && x > __DBL_MIN__ && !__builtin_isnormal (x))
+link_error ();
+
+  if (x < -__DBL_MIN__ && x > -__DBL_MAX__ && !__builtin_isnormal (x))
+link_error ();
+}
+
+void test2 (float x)
+{
+  if (x < __FLT_MAX__ && x > __FLT_MIN__ && !__builtin_isnormal (x))
+link_error ();
+
+  if (x < -__FLT_MIN__ && x > - __FLT_MAX__ && !__builtin_isnormal (x))
+link_error ();
+}
+
+void test3 (double x)
+{
+  if (__builtin_isnormal (x) && __builtin_isinf (x))
+link_error ();
+}
+
+void test4 (float x)
+{
+  if (__builtin_isnormal (x) && __builtin_isinf (x))
+link_error ();
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */
diff --git a/gcc/value-range.h b/gcc/value-range.h
index 03af758d152..ff63d4fc5ce 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -588,6 +588,8 @@ public:
   bool maybe_isinf () const;
   bool signbit_p (bool &signbit) const;
   bool nan_signbit_p (bool &signbit) const;
+  bool known_isnormal () const;
+  bool known_isdenormal_or_zero () const;
 
 protected:
   virtual bool contains_p (tree cst) const override;
@@ -1648,6 +1650,33 @@ frange::known_isfinite () const
   return (!maybe_isnan () && !real_isinf (&m_min) && !real_isinf (&m_max));
 }
 
+// Return TRUE if range is known to be normal.
+
+inline bool
+frange::known_isnormal () const
+{
+  if (!known_isfinite

[gcc r15-2925] rs6000: Implement optab_isinf for SFDF and IEEE128

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:53945be1efb502f235d84ff67ceafe4a764b6e1c

commit r15-2925-g53945be1efb502f235d84ff67ceafe4a764b6e1c
Author: Haochen Gui 
Date:   Thu Aug 15 13:38:22 2024 +0800

rs6000: Implement optab_isinf for SFDF and IEEE128

gcc/
PR target/97786
* config/rs6000/rs6000.md (constant VSX_TEST_DATA_CLASS_NAN,
VSX_TEST_DATA_CLASS_POS_INF, VSX_TEST_DATA_CLASS_NEG_INF,
VSX_TEST_DATA_CLASS_POS_ZERO, VSX_TEST_DATA_CLASS_NEG_ZERO,
VSX_TEST_DATA_CLASS_POS_DENORMAL, VSX_TEST_DATA_CLASS_NEG_DENORMAL):
Define.
(mode_attr sdq, vsx_altivec, wa_v, x): Define.
(mode_iterator IEEE_FP): Define.
* config/rs6000/vsx.md (isinf2): New expand.
(expand xststdcqp_, xststdcp): Combine into...
(expand xststdc_): ...this.
(insn *xststdcqp_, *xststdcp): Combine into...
(insn *xststdc_): ...this.
* config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin): Rename
CODE_FOR_xststdcqp_kf as CODE_FOR_xststdc_kf,
CODE_FOR_xststdcqp_tf as CODE_FOR_xststdc_tf.
* config/rs6000/rs6000-builtins.def: Rename xststdcdp as xststdc_df,
xststdcsp as xststdc_sf, xststdcqp_kf as xststdc_kf.

gcc/testsuite/
PR target/97786
* gcc.target/powerpc/pr97786-1.c: New test.
* gcc.target/powerpc/pr97786-2.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc  |  4 +-
 gcc/config/rs6000/rs6000-builtins.def|  6 +--
 gcc/config/rs6000/rs6000.md  | 35 +
 gcc/config/rs6000/vsx.md | 58 +---
 gcc/testsuite/gcc.target/powerpc/pr97786-1.c | 22 +++
 gcc/testsuite/gcc.target/powerpc/pr97786-2.c | 17 
 6 files changed, 97 insertions(+), 45 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 099cbc82245..9bdbae1ecf9 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -3254,8 +3254,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* 
subtarget */,
   case CODE_FOR_xsiexpqpf_kf:
icode = CODE_FOR_xsiexpqpf_tf;
break;
-  case CODE_FOR_xststdcqp_kf:
-   icode = CODE_FOR_xststdcqp_tf;
+  case CODE_FOR_xststdc_kf:
+   icode = CODE_FOR_xststdc_tf;
break;
   case CODE_FOR_xscmpexpqp_eq_kf:
icode = CODE_FOR_xscmpexpqp_eq_tf;
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 5b513a7ef2b..0e9dc05dbcf 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2554,11 +2554,11 @@
 
   const signed int \
   __builtin_vsx_scalar_test_data_class_dp (double, const int<7>);
-VSTDCDP xststdcdp {}
+VSTDCDP xststdc_df {}
 
   const signed int \
   __builtin_vsx_scalar_test_data_class_sp (float, const int<7>);
-VSTDCSP xststdcsp {}
+VSTDCSP xststdc_sf {}
 
   const signed int __builtin_vsx_scalar_test_neg_dp (double);
 VSTDCNDP xststdcnegdp {}
@@ -2727,7 +2727,7 @@
 
   const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \
 const int<7>);
-VSTDCQP xststdcqp_kf {}
+VSTDCQP xststdc_kf {}
 
   const signed int __builtin_vsx_scalar_test_neg_qp (_Float128);
 VSTDCNQP xststdcnegqp_kf {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d352a1431ad..267affa5057 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -53,6 +53,20 @@
(FRAME_POINTER_REGNUM   110)
   ])
 
+;;
+;; Test data class mask bits
+;;
+
+(define_constants
+  [(VSX_TEST_DATA_CLASS_NAN0x40)
+   (VSX_TEST_DATA_CLASS_POS_INF0x20)
+   (VSX_TEST_DATA_CLASS_NEG_INF0x10)
+   (VSX_TEST_DATA_CLASS_POS_ZERO   0x8)
+   (VSX_TEST_DATA_CLASS_NEG_ZERO   0x4)
+   (VSX_TEST_DATA_CLASS_POS_DENORMAL   0x2)
+   (VSX_TEST_DATA_CLASS_NEG_DENORMAL   0x1)
+  ])
+
 ;;
 ;; UNSPEC usage
 ;;
@@ -605,6 +619,10 @@
 (define_mode_attr sd [(SF   "s") (DF   "d")
  (V4SF "s") (V2DF "d")])
 
+; A generic s/d/q attribute, for sp/dp/qp for example.
+(define_mode_attr sdq [(SF "s") (DF "d")
+  (TF "q") (KF "q")])
+
 ; "s" or nothing, for fmuls/fmul for example.
 (define_mode_attr s [(SF "s") (DF "")])
 
@@ -616,6 +634,23 @@
 (define_mode_iterator IEEE128 [(KF "FLOAT128_IEEE_P (KFmode)")
   (TF "FLOAT128_IEEE_P (TFmode)")])
 
+; Iterator for IEEE floating point
+(define_mode_iterator IEEE_FP [SFDF IEEE128])
+
+; "vsx/altivec_register_operand", for IEEE_FP predicates
+(define_mode_attr fp_register_op [(SF "vsx_register_operand")
+ (DF "vsx_register_operand")
+ (TF "altivec_register_operand")
+   

[gcc r15-2926] rs6000: Implement optab_isfinite for SFDF and IEEE128

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:44eb45c2ef7192eb6a811fd46fcb2c7fbeb6f865

commit r15-2926-g44eb45c2ef7192eb6a811fd46fcb2c7fbeb6f865
Author: Haochen Gui 
Date:   Thu Aug 15 13:41:25 2024 +0800

rs6000: Implement optab_isfinite for SFDF and IEEE128

gcc/
PR target/97786
* config/rs6000/vsx.md (isfinite2): New expand.

gcc/testsuite/
PR target/97786
* gcc.target/powerpc/pr97786-4.c: New test.
* gcc.target/powerpc/pr97786-5.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md | 15 +++
 gcc/testsuite/gcc.target/powerpc/pr97786-4.c | 17 +
 gcc/testsuite/gcc.target/powerpc/pr97786-5.c | 12 
 3 files changed, 44 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3a6afd13c16..0f18fd5cffe 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5370,6 +5370,21 @@
   DONE;
 })
 
+(define_expand "isfinite2"
+  [(use (match_operand:SI 0 "gpc_reg_operand"))
+   (use (match_operand:IEEE_FP 1 ""))]
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  /* It is neither infinite nor NAN.  */
+  int mask = VSX_TEST_DATA_CLASS_POS_INF | VSX_TEST_DATA_CLASS_NEG_INF
+| VSX_TEST_DATA_CLASS_NAN;
+  emit_insn (gen_xststdc_ (tmp, operands[1], GEN_INT (mask)));
+  emit_insn (gen_xorsi3 (operands[0], tmp, const1_rtx));
+  DONE;
+})
+
 ;; The VSX Scalar Test Negative Quad-Precision
 (define_expand "xststdcnegqp_"
   [(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-4.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-4.c
new file mode 100644
index 000..9cdde78257d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (double x)
+{
+  return __builtin_isfinite (x);
+}
+
+int test2 (float x)
+{
+  return __builtin_isfinite (x);
+}
+
+/* { dg-final { scan-assembler-not {\mfcmp} } } */
+/* { dg-final { scan-assembler-times {\mxststdcsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxststdcdp\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-5.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-5.c
new file mode 100644
index 000..0ef8b86f6cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -mabi=ieeelongdouble -Wno-psabi" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (long double x)
+{
+  return __builtin_isfinite (x);
+}
+
+/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */
+/* { dg-final { scan-assembler {\mxststdcqp\M} } } */


[gcc r15-2928] rs6000: Add TARGET_FLOAT128_HW guard for quad-precision insns

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:bf891fcabca7a59ce71e85c8f2eea2bfabbffe59

commit r15-2928-gbf891fcabca7a59ce71e85c8f2eea2bfabbffe59
Author: Haochen Gui 
Date:   Thu Aug 15 13:45:35 2024 +0800

rs6000: Add TARGET_FLOAT128_HW guard for quad-precision insns

gcc/
* config/rs6000/rs6000.md (floatti2, floatunsti2,
fix_truncti2): Add guard TARGET_FLOAT128_HW.
* config/rs6000/vsx.md (xsxexpqp__,
xsxsigqp__, xsiexpqpf_,
xsiexpqp__, xscmpexpqp__,
*xscmpexpqp, xststdcnegqp_): Replace guard TARGET_P9_VECTOR
with TARGET_FLOAT128_HW.
(xststdc_, *xststdc_, isinf2): Add guard
TARGET_FLOAT128_HW for the IEEE128 modes.

gcc/testsuite/
* gcc.target/powerpc/float128-cmp2-runnable.c: Replace
ppc_float128_sw with ppc_float128_hw and remove p9vector_hw.

Diff:
---
 gcc/config/rs6000/rs6000.md|  6 +++---
 gcc/config/rs6000/vsx.md   | 23 --
 .../gcc.target/powerpc/float128-cmp2-runnable.c|  3 +--
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 267affa5057..8eda2f7bb0d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6928,7 +6928,7 @@
 (define_insn "floatti2"
   [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
(float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))]
-  "TARGET_POWER10"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   return  "xscvsqqp %0,%1";
 }
@@ -6937,7 +6937,7 @@
 (define_insn "floatunsti2"
   [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
(unsigned_float:IEEE128 (match_operand:TI 1 "vsx_register_operand" 
"v")))]
-  "TARGET_POWER10"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   return  "xscvuqqp %0,%1";
 }
@@ -6946,7 +6946,7 @@
 (define_insn "fix_truncti2"
   [(set (match_operand:TI 0 "vsx_register_operand" "=v")
(fix:TI (match_operand:IEEE128 1 "vsx_register_operand" "v")))]
-  "TARGET_POWER10"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   return  "xscvqpsqz %0,%1";
 }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 705e70bd6e9..27069d070e1 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5196,7 +5196,7 @@
(unspec:V2DI_DI
  [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
 UNSPEC_VSX_SXEXPDP))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsxexpqp %0,%1"
   [(set_attr "type" "vecmove")])
 
@@ -5215,7 +5215,7 @@
(unspec:VEC_TI [(match_operand:IEEE128 1
"altivec_register_operand" "v")]
 UNSPEC_VSX_SXSIG))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsxsigqp %0,%1"
   [(set_attr "type" "vecmove")])
 
@@ -5235,7 +5235,7 @@
 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
  (match_operand:DI 2 "altivec_register_operand" "v")]
 UNSPEC_VSX_SIEXPQP))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsiexpqp %0,%1,%2"
   [(set_attr "type" "vecmove")])
 
@@ -5247,7 +5247,7 @@
 (match_operand:V2DI_DI 2
  "altivec_register_operand" "v")]
 UNSPEC_VSX_SIEXPQP))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsiexpqp %0,%1,%2"
   [(set_attr "type" "vecmove")])
 
@@ -5317,7 +5317,7 @@
(set (match_operand:SI 0 "register_operand" "=r")
(CMP_TEST:SI (match_dup 3)
 (const_int 0)))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
 {
   if ( == UNORDERED && !HONOR_NANS (mode))
 {
@@ -5335,7 +5335,7 @@
  (match_operand:IEEE128 2 "altivec_register_operand" 
"v")]
  UNSPEC_VSX_SCMPEXPQP)
 (match_operand:SI 3 "zero_constant" "j")))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xscmpexpqp %0,%1,%2"
   [(set_attr "type" "fpcompare")])
 
@@ -5354,7 +5354,8 @@
(set (match_operand:SI 0 "register_operand" "=r")
(eq:SI (match_dup 3)
   (const_int 0)))]
-  "TARGET_P9_VECTOR"
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
 {
   operands[3] = gen_reg_rtx (CCFPmode);
   operands[4] = CONST0_RTX (SImode);
@@ -5363,7 +5364,8 @@
 (define_expand "isinf2"
   [(use (match_operand:SI 0 "gpc_reg_operand"))
(use (match_operand:IEEE_FP 1 ""))]
-  "TARGET_HARD_FLOAT && TARGET_P9_VECTOR"
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
 {
   int mask = VSX_TEST_DATA_CLASS_POS_INF | VSX_TEST_DATA_CLASS_NEG_INF;
   emit_insn (gen_xststdc_ (operands[0], operands[1], GEN_INT (mask)));
@@ -5415,7 +5417,7 @@
(set (match_operand:SI 0 "register_operand" "=r")
(lt:SI (match_dup 2)
   (const_int 0)))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
 {
   operands[2] = gen_reg_rtx (CCFPmode);
 })
@@ -5446,7 +5448,8 @@
   (match_operand:SI 2 "u7bit_cint_operand" "n")]
 

[gcc r15-2927] rs6000: Implement optab_isnormal for SFDF and IEEE128

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:08108d57246210de7d5a00b1967dab7102d356bc

commit r15-2927-g08108d57246210de7d5a00b1967dab7102d356bc
Author: Haochen Gui 
Date:   Thu Aug 15 13:43:28 2024 +0800

rs6000: Implement optab_isnormal for SFDF and IEEE128

gcc/
PR target/97786
* config/rs6000/vsx.md (isnormal2): New expand.

gcc/testsuite/
PR target/97786
* gcc.target/powerpc/pr97786-7.c: New test.
* gcc.target/powerpc/pr97786-8.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md | 18 ++
 gcc/testsuite/gcc.target/powerpc/pr97786-7.c | 17 +
 gcc/testsuite/gcc.target/powerpc/pr97786-8.c | 12 
 3 files changed, 47 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0f18fd5cffe..705e70bd6e9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5385,6 +5385,24 @@
   DONE;
 })
 
+(define_expand "isnormal2"
+  [(use (match_operand:SI 0 "gpc_reg_operand"))
+   (use (match_operand:IEEE_FP 1 ""))]
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  /* It is neither NAN, infinite, zero, nor denormal.  */
+  int mask = VSX_TEST_DATA_CLASS_NAN
+| VSX_TEST_DATA_CLASS_POS_INF | VSX_TEST_DATA_CLASS_NEG_INF
+| VSX_TEST_DATA_CLASS_POS_ZERO | VSX_TEST_DATA_CLASS_NEG_ZERO
+| VSX_TEST_DATA_CLASS_POS_DENORMAL
+| VSX_TEST_DATA_CLASS_NEG_DENORMAL;
+  emit_insn (gen_xststdc_ (tmp, operands[1], GEN_INT (mask)));
+  emit_insn (gen_xorsi3 (operands[0], tmp, const1_rtx));
+  DONE;
+})
+
 ;; The VSX Scalar Test Negative Quad-Precision
 (define_expand "xststdcnegqp_"
   [(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-7.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-7.c
new file mode 100644
index 000..eb01eed39d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-7.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (double x)
+{
+  return __builtin_isnormal (x);
+}
+
+int test2 (float x)
+{
+  return __builtin_isnormal (x);
+}
+
+/* { dg-final { scan-assembler-not {\mfcmp} } } */
+/* { dg-final { scan-assembler-times {\mxststdcsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxststdcdp\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-8.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-8.c
new file mode 100644
index 000..eba90d3b1b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -mabi=ieeelongdouble -Wno-psabi" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (long double x)
+{
+  return __builtin_isnormal (x);
+}
+
+/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */
+/* { dg-final { scan-assembler {\mxststdcqp\M} } } */


[gcc r15-3013] aarch64: Implement 16-byte vector mode const0 store by TImode

2024-08-18 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:8d6c6fbc5271dde433998c09407b30e2cf195420

commit r15-3013-g8d6c6fbc5271dde433998c09407b30e2cf195420
Author: Haochen Gui 
Date:   Mon Aug 19 10:35:47 2024 +0800

aarch64: Implement 16-byte vector mode const0 store by TImode

gcc/
* config/aarch64/aarch64-simd.md (mov for VSTRUCT_QD):
Expand 16-byte vector mode const0 store by TImode.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 475f19766c3..23c03a96371 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7809,7 +7809,16 @@
(match_operand:VSTRUCT_QD 1 "general_operand"))]
   "TARGET_FLOAT"
 {
-  if (can_create_pseudo_p ())
+  if (known_eq (GET_MODE_SIZE (mode), 16)
+  && operands[1] == CONST0_RTX (mode)
+  && MEM_P (operands[0])
+  && (can_create_pseudo_p ()
+ || memory_address_p (TImode, XEXP (operands[0], 0
+{
+  operands[0] = adjust_address (operands[0], TImode, 0);
+  operands[1] = CONST0_RTX (TImode);
+}
+  else if (can_create_pseudo_p ())
 {
   if (GET_CODE (operands[0]) != REG)
operands[1] = force_reg (mode, operands[1]);


[gcc r15-480] rs6000: Enable overlapped by-pieces operations

2024-05-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:fbd115fe83e96e0796cd8e262ed773b0ba07db81

commit r15-480-gfbd115fe83e96e0796cd8e262ed773b0ba07db81
Author: Haochen Gui 
Date:   Tue May 14 16:37:06 2024 +0800

rs6000: Enable overlapped by-pieces operations

This patch enables overlapped by-piece operations by defining
TARGET_OVERLAP_OP_BY_PIECES_P to true.  On rs6000, default move/set/clear
ratio is 2.  So the overlap is only enabled with compare by-pieces.

gcc/
* config/rs6000/rs6000.cc (TARGET_OVERLAP_OP_BY_PIECES_P): Define.

gcc/testsuite/
* gcc.target/powerpc/block-cmp-9.c: New.

Diff:
---
 gcc/config/rs6000/rs6000.cc|  3 +++
 gcc/testsuite/gcc.target/powerpc/block-cmp-9.c | 11 +++
 2 files changed, 14 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 117999613d83..e713a1e1d570 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1776,6 +1776,9 @@ static const scoped_attribute_specs *const 
rs6000_attribute_table[] =
 #undef TARGET_CONST_ANCHOR
 #define TARGET_CONST_ANCHOR 0x8000
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
+
 
 
 /* Processor table.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/block-cmp-9.c 
b/gcc/testsuite/gcc.target/powerpc/block-cmp-9.c
new file mode 100644
index ..f16429c2ffb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/block-cmp-9.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not {\ml[hb]z\M} } } */
+
+/* Test if by-piece overlap compare is enabled and following case is
+   implemented by two overlap word loads and compares.  */
+
+int foo (const char* s1, const char* s2)
+{
+  return __builtin_memcmp (s1, s2, 7) == 0;
+}