Re: [PATCH-1v2, rs6000] Enable SImode in FP registers on P7 [PR88558]

2023-09-14 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

在 2023/9/12 17:33, Kewen.Lin 写道:
> Ok, at least regression testing doesn't expose any needs to do disparaging
> for this.  Could you also test this patch with SPEC2017 for P7 and P8
> separately at options like -O2 or -O3, to see if there is any assembly
> change, and if yes filtering out some typical to check it's expected or
> not?  I think it can help us to better evaluate the impact.  Thanks!

Just compared the object files of SPEC2017 for P7 and P8. There is no
difference between P7s'. For P8, some different object files are found.
All differences are the same. Patched object files replace xxlor with fmr.
It's expected as the fmr is added to ahead of xxlor in "*movsi_internal1".

Thanks
Gui Haochen


[PATCH-1v2, rs6000] Enable SImode in FP registers on P7 [PR88558]

2023-09-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch enables SImode in FP registers on P7. Instruction "fctiw"
stores its integer output in an FP register. So SImode in FP register
needs be enabled on P7 if we want support "fctiw" on P7.

  The test case is in the second patch which implements 32bit inline
lrint.

  Compared to the last version, the main change it to remove disparaging
on the alternatives of "fmr". Test shows it doesn't cause regression.
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628435.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.


ChangeLog
rs6000: enable SImode in FP register on P7

gcc/
PR target/88558
* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Enable SImode in FP registers on P7.
* config/rs6000/rs6000.md (*movsi_internal1): Add fmr for SImode
move between FP registers.  Set attribute isa of stfiwx to "*"
and attribute of stxsiwx to "p7".

patch.diff
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..99085c2cdd7 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1903,7 +1903,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;

- if (TARGET_P8_VECTOR && (mode == SImode))
+ if (TARGET_POPCNTD && mode == SImode)
return 1;

  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index cdab49fbb91..edf49bd74e3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7566,7 +7566,7 @@ (define_split

 (define_insn "*movsi_internal1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
- "=r, r,
+ "=r, r,  d,
   r,  d,  v,
   m,  ?Z, ?Z,
   r,  r,  r,  r,
@@ -7575,7 +7575,7 @@ (define_insn "*movsi_internal1"
   wa, r,
   r,  *h, *h")
(match_operand:SI 1 "input_operand"
- "r,  U,
+ "r,  U,  d,
   m,  ?Z, ?Z,
   r,  d,  v,
   I,  L,  eI, n,
@@ -7588,6 +7588,7 @@ (define_insn "*movsi_internal1"
   "@
mr %0,%1
la %0,%a1
+   fmr %0,%1
lwz%U1%X1 %0,%1
lfiwzx %0,%y1
lxsiwzx %x0,%y1
@@ -7611,7 +7612,7 @@ (define_insn "*movsi_internal1"
mt%0 %1
nop"
   [(set_attr "type"
- "*,  *,
+ "*,  *,  fpsimple,
   load,   fpload, fpload,
   store,  fpstore,fpstore,
   *,  *,  *,  *,
@@ -7620,7 +7621,7 @@ (define_insn "*movsi_internal1"
   mtvsr,  mfvsr,
   *,  *,  *")
(set_attr "length"
- "*,  *,
+ "*,  *,  *,
   *,  *,  *,
   *,  *,  *,
   *,  *,  *,  8,
@@ -7629,9 +7630,9 @@ (define_insn "*movsi_internal1"
   *,  *,
   *,  *,  *")
(set_attr "isa"
- "*,  *,
-  *,  p8v,p8v,
-  *,  p8v,p8v,
+ "*,  *,  *,
+  *,  p7, p8v,
+  *,  *,  p8v,
   *,  *,  p10,*,
   p8v,p9v,p9v,p8v,
   p9v,p8v,p9v,



[PATCH-2v2, rs6000] Implement 32bit inline lrint [PR88558]

2023-09-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements 32bit inline lrint by "fctiw". It depends on
the patch1 to do SImode move from FP registers on P7.

  Compared to last version, the main change is to add tests for "lrintf"
and adjust the count of corresponding instructions.
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628436.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: support 32bit inline lrint

gcc/
PR target/88558
* config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND
from insn condition.
(lrintsi2): New insn pattern for 32bit lrint.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr88558.h: New.
* gcc.target/powerpc/pr88558-p7.c: New.
* gcc.target/powerpc/pr88558-p8.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index edf49bd74e3..a41898e0e08 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6655,10 +6655,18 @@ (define_insn "lrintdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT"
   "fctid %0,%1"
   [(set_attr "type" "fp")])

+(define_insn "lrintsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=d")
+   (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+  UNSPEC_FCTIW))]
+  "TARGET_HARD_FLOAT && TARGET_POPCNTD"
+  "fctiw %0,%1"
+  [(set_attr "type" "fp")])
+
 (define_insn "btrunc2"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
new file mode 100644
index 000..f302491c4d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power7" } */
+
+/* -fno-math-errno is required to make {i,l,ll}rint inlined */
+
+#include "pr88558.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 3 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 3 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstfiwx\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mstfiwx\M} 3 { target ilp32 } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c
new file mode 100644
index 000..33398aa74c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power8" } */
+
+/* -fno-math-errno is required to make {i,l,ll}rint inlined */
+
+#include "pr88558.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 3 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 3 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 3 { target ilp32 } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558.h 
b/gcc/testsuite/gcc.target/powerpc/pr88558.h
new file mode 100644
index 000..698640c0ef7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558.h
@@ -0,0 +1,19 @@
+long int test1 (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long test2 (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int test3 (double a)
+{
+  return __builtin_irint (a);
+}
+
+long int test4 (float a)
+{
+  return __builtin_lrintf (a);
+}


Re: [PATCH, rs6000] Call vector load/store with length expand only on 64-bit Power10 [PR96762]

2023-08-30 Thread HAO CHEN GUI via Gcc-patches
Kewen,
  I refined the patch according to your comments and it passed bootstrap
and regression test.

  I committed it as
https://gcc.gnu.org/g:946b8967b905257ac9f140225db744c9a6ab91be

Thanks
Gui Haochen

在 2023/8/29 16:55, Kewen.Lin 写道:
> Hi Haochen,
> 
> on 2023/8/29 10:50, HAO CHEN GUI wrote:
>> Hi,
>>   This patch adds "TARGET_64BIT" check when calling vector load/store
>> with length expand in expand_block_move. It matches the expand condition
>> of "lxvl" and "stxvl" defined in vsx.md.
>>
>>   This patch fixes the ICE occurred with the test case on 32-bit Power10.
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>>
>> Thanks
>> Gui Haochen
>>
>>
>> ChangeLog
>> rs6000: call vector load/store with length expand only on 64-bit Power10
>>
>> gcc/
>>  PR target/96762
>>  * config/rs6000/rs6000-string.cc (expand_block_move): Call vector
>>  load/store with length expand only on 64-bit Power10.
>>
>> gcc/testsuite/
>>  PR target/96762
>>  * gcc.target/powerpc/pr96762.c: New.
>>
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/rs6000-string.cc 
>> b/gcc/config/rs6000/rs6000-string.cc
>> index cd8ee8c..d1b48c2 100644
>> --- a/gcc/config/rs6000/rs6000-string.cc
>> +++ b/gcc/config/rs6000/rs6000-string.cc
>> @@ -2811,8 +2811,9 @@ expand_block_move (rtx operands[], bool might_overlap)
>>gen_func.mov = gen_vsx_movv2di_64bit;
>>  }
>>else if (TARGET_BLOCK_OPS_UNALIGNED_VSX
>> -   && TARGET_POWER10 && bytes < 16
>> -   && orig_bytes > 16
>> +   /* Only use lxvl/stxvl on 64bit POWER10.  */
>> +   && TARGET_POWER10 && TARGET_64BIT
>> +   && bytes < 16 && orig_bytes > 16
>> && !(bytes == 1 || bytes == 2
>>  || bytes == 4 || bytes == 8)
>> && (align >= 128 || !STRICT_ALIGNMENT))
> 
> Nit: Since you touched this part of code, could you format it better as well, 
> like:
> 
>   else if (TARGET_BLOCK_OPS_UNALIGNED_VSX
>  /* Only use lxvl/stxvl on 64bit POWER10.  */
>  && TARGET_POWER10
>  && TARGET_64BIT
>  && bytes < 16
>  && orig_bytes > 16
>  && !(bytes == 1
>   || bytes == 2
>   || bytes == 4
>   || bytes == 8)
>  && (align >= 128
>  || !STRICT_ALIGNMENT))
> 
> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr96762.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr96762.c
>> new file mode 100644
>> index 000..1145dd1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr96762.c
>> @@ -0,0 +1,11 @@
>> +/* { dg-do compile { target ilp32 } } */
> 
> Nit: we can compile this on lp64, so you can remove the ilp32 restriction,
> ...
> 
>> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
>> +
> 
> ... but add one comment line to note the initial purpose, like:
> 
> /* Verify there is no ICE on ilp32 env.  */
> 
> or similar.
> 
> Okay for trunk with these nits fixed, thanks!
> 
> BR,
> Kewen
> 
>> +extern void foo (char *);
>> +
>> +void
>> +bar (void)
>> +{
>> +  char zj[] = "";
>> +  foo (zj);
>> +}


[PATCH, rs6000] Call vector load/store with length expand only on 64-bit Power10 [PR96762]

2023-08-28 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds "TARGET_64BIT" check when calling vector load/store
with length expand in expand_block_move. It matches the expand condition
of "lxvl" and "stxvl" defined in vsx.md.

  This patch fixes the ICE occurred with the test case on 32-bit Power10.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: call vector load/store with length expand only on 64-bit Power10

gcc/
PR target/96762
* config/rs6000/rs6000-string.cc (expand_block_move): Call vector
load/store with length expand only on 64-bit Power10.

gcc/testsuite/
PR target/96762
* gcc.target/powerpc/pr96762.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index cd8ee8c..d1b48c2 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2811,8 +2811,9 @@ expand_block_move (rtx operands[], bool might_overlap)
  gen_func.mov = gen_vsx_movv2di_64bit;
}
   else if (TARGET_BLOCK_OPS_UNALIGNED_VSX
-  && TARGET_POWER10 && bytes < 16
-  && orig_bytes > 16
+  /* Only use lxvl/stxvl on 64bit POWER10.  */
+  && TARGET_POWER10 && TARGET_64BIT
+  && bytes < 16 && orig_bytes > 16
   && !(bytes == 1 || bytes == 2
|| bytes == 4 || bytes == 8)
   && (align >= 128 || !STRICT_ALIGNMENT))
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96762.c 
b/gcc/testsuite/gcc.target/powerpc/pr96762.c
new file mode 100644
index 000..1145dd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96762.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target ilp32 } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+extern void foo (char *);
+
+void
+bar (void)
+{
+  char zj[] = "";
+  foo (zj);
+}


[PATCH-2, rs6000] Implement 32bit inline lrint [PR88558]

2023-08-25 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements 32bit inline lrint by "fctiw". It depends on
the patch1 to do SImode move from FP register on P7.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: support 32bit inline lrint

gcc/
PR target/88558
* config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND
from insn condition.
(lrintsi2): New insn pattern for 32bit lrint.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr88558.h: New.
* gcc.target/powerpc/pr88558-p7.c: New.
* gcc.target/powerpc/pr88558-p8v.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fd263e8dfe3..b36304de8c6 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6655,10 +6655,18 @@ (define_insn "lrintdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT"
   "fctid %0,%1"
   [(set_attr "type" "fp")])

+(define_insn "lrintsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=d")
+   (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+  UNSPEC_FCTIW))]
+  "TARGET_HARD_FLOAT && TARGET_POPCNTD"
+  "fctiw %0,%1"
+  [(set_attr "type" "fp")])
+
 (define_insn "btrunc2"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
new file mode 100644
index 000..6437c55fa61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power7" } */
+
+#include "pr88558.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstfiwx\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c
new file mode 100644
index 000..fd22123ffb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power8" } */
+
+long int foo (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long bar (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int baz (double a)
+{
+  return __builtin_irint (a);
+}
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558.h 
b/gcc/testsuite/gcc.target/powerpc/pr88558.h
new file mode 100644
index 000..0cc0c68dd4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558.h
@@ -0,0 +1,14 @@
+long int foo (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long bar (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int baz (double a)
+{
+  return __builtin_irint (a);
+}





[PATCH-1, rs6000] Enable SImode in FP register on P7 [PR88558]

2023-08-25 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch enables SImode in FP register on P7. Instruction "fctiw"
stores its integer output in an FP register. So SImode in FP register
needs be enabled on P7 if we want support "fctiw" on P7.

  The test case is in the second patch which implements 32bit inline
lrint.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: enable SImode in FP register on P7

gcc/
PR target/88558
* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Enable Simode in FP register for P7.
* config/rs6000/rs6000.md (*movsi_internal1): Add fmr for SImode
move between FP register.  Set attribute isa of stfiwx to "*"
and attribute of stxsiwx to "p7".

patch.diff
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..99085c2cdd7 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1903,7 +1903,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;

- if (TARGET_P8_VECTOR && (mode == SImode))
+ if (TARGET_POPCNTD && mode == SImode)
return 1;

  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index cdab49fbb91..ac5d29a2cf8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7566,7 +7566,7 @@ (define_split

 (define_insn "*movsi_internal1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
- "=r, r,
+ "=r, r,  ^d,
   r,  d,  v,
   m,  ?Z, ?Z,
   r,  r,  r,  r,
@@ -7575,7 +7575,7 @@ (define_insn "*movsi_internal1"
   wa, r,
   r,  *h, *h")
(match_operand:SI 1 "input_operand"
- "r,  U,
+ "r,  U,  ^d,
   m,  ?Z, ?Z,
   r,  d,  v,
   I,  L,  eI, n,
@@ -7588,6 +7588,7 @@ (define_insn "*movsi_internal1"
   "@
mr %0,%1
la %0,%a1
+   fmr %0,%1
lwz%U1%X1 %0,%1
lfiwzx %0,%y1
lxsiwzx %x0,%y1
@@ -7611,7 +7612,7 @@ (define_insn "*movsi_internal1"
mt%0 %1
nop"
   [(set_attr "type"
- "*,  *,
+ "*,  *,  fpsimple,
   load,   fpload, fpload,
   store,  fpstore,fpstore,
   *,  *,  *,  *,
@@ -7620,7 +7621,7 @@ (define_insn "*movsi_internal1"
   mtvsr,  mfvsr,
   *,  *,  *")
(set_attr "length"
- "*,  *,
+ "*,  *,  *,
   *,  *,  *,
   *,  *,  *,
   *,  *,  *,  8,
@@ -7629,9 +7630,9 @@ (define_insn "*movsi_internal1"
   *,  *,
   *,  *,  *")
(set_attr "isa"
- "*,  *,
-  *,  p8v,p8v,
-  *,  p8v,p8v,
+ "*,  *,  *,
+  *,  p7, p8v,
+  *,  *,  p8v,
   *,  *,  p10,*,
   p8v,p9v,p9v,p8v,
   p9v,p8v,p9v,


[PATCHv2, rs6000] Extract the element in dword0 by mfvsrd and shift/mask [PR110331]

2023-08-22 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements the vector element extraction by mfvsrd and
shift/mask when the element is in dword0 of the vector. Originally,
it generates vsplat/mfvsrd on P8 and li/vextract on P9. Since mfvsrd
has lower latency than vextract and rldicl has lower latency than
vsplat, the new sequence has the benefit. Specially, the shift/mask
is no need when the element is the first element of dword0. So it saves
another rldicl when it returns a sign extend value.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Extract the element in dword0 by mfvsrd and shift/mask

gcc/
PR target/110331
* config/rs6000/rs6000-protos.h (rs6000_vsx_element_in_dword0_p):
Declare.
(rs6000_vsx_extract_element_from_dword0): Declare.
* config/rs6000/rs6000.cc (rs6000_vsx_element_in_dword0_p): New
function to judge if an element is in dword0 of a vector.
(rs6000_vsx_extract_element_from_dword0): Extract an element from
dword0 by mfvsrd and lshiftrt and mask.
* config/rs6000/rs6000.md (*rotl3_mask): Rename to...
(rotl3_mask): ...this
* config/rs6000/vsx.md (split pattern for p9 vector extract): Call
rs6000_vsx_extract_element_from_dword0 if the element is in dword0.
(*vsx_extract__di_p9): Assert the extracted elements isn't in
dword0.
(*vsx_extract_v4si_w023): Call
rs6000_vsx_extract_element_from_dword0 if the element is in dword0.
(*vsx_extract__zero_extend): Zero extend pattern for vector
extract on the element in dword0.
(*vsx_extract__p8): Call rs6000_vsx_extract_element_from_dword0
when the extracted element is in dword0.  Refined the pattern and
remove reload_completed from split condition.

gcc/testsuite/
PR target/110331
* gcc.target/powerpc/fold-vec-extract-char.p8.c: Set the extracted
elements in dword1.
* gcc.target/powerpc/fold-vec-extract-char.p9.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p9.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p9.c: Likewise.
* gcc.target/powerpc/p9-extract-1.c: Likewise.
* gcc.target/powerpc/pr110331-p8.c: New.
* gcc.target/powerpc/pr110331-p9.c: New.
* gcc.target/powerpc/pr110331.h: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index f70118ea40f..ccef280122b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -161,6 +161,8 @@ extern bool rs6000_function_pcrel_p (struct function *);
 extern bool rs6000_pcrel_p (void);
 extern bool rs6000_fndecl_pcrel_p (const_tree);
 extern void rs6000_output_addr_vec_elt (FILE *, int);
+extern bool rs6000_vsx_element_in_dword0_p (rtx, enum machine_mode);
+extern void rs6000_vsx_extract_element_from_dword0 (rtx, rtx, rtx, bool);

 /* Different PowerPC instruction formats that are used by GCC.  There are
various other instruction formats used by the PowerPC hardware, but these
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index efe9adce1f8..e15f8bd964c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -29105,6 +29105,74 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
   return false;
 }

+/* Return true when the element is in dword0 of a vector.  Exclude word
+   element 1 (BE order) as the word can be extracted by mfvsrwz directly.  */
+
+bool
+rs6000_vsx_element_in_dword0_p (rtx op, enum machine_mode mode)
+{
+  gcc_assert (CONST_INT_P (op));
+  gcc_assert (mode == V16QImode || mode == V8HImode || mode == V4SImode);
+
+  int units = GET_MODE_NUNITS (mode);
+  int elt = INTVAL (op);
+  elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+
+  if (elt > units / 2
+  || (elt == units / 2 && mode != V4SImode))
+return true;
+  else
+return false;
+}
+
+/* Extract element from dword0 by mfvsrd and lshiftrt and mask.  Extend_p
+   indicates if zero extend is needed or not.  */
+
+void
+rs6000_vsx_extract_element_from_dword0 (rtx dest, rtx src, rtx element,
+   bool extend_p)
+{
+  enum machine_mode mode = GET_MODE (src);
+  gcc_assert (rs6000_vsx_element_in_dword0_p (element, mode));
+
+  enum machine_mode dest_mode = GET_MODE (dest);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int units = GET_MODE_NUNITS (mode);
+  int elt = INTVAL (element);
+  elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+  int value, shift;
+  unsigned int mask;
+
+  rtx vec_tmp = gen_lowpart (V2DImode, src);
+  rtx tmp1 = can_create_pseudo_p ()
+? gen_reg_rtx (DImode)
+: simplify_gen_subreg (DImode, dest, dest_mode, 0);
+  value = BYTES_BIG_ENDIAN ? 0 : 1;
+  emit_insn (gen_vsx_extract_v2di 

Re: [PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]

2023-08-20 Thread HAO CHEN GUI via Gcc-patches
Jeff,
  Thanks a lot for your comments.

  The widen shift mode is on i1/i2 before they're combined with i3 to newpat.
The newpat matches rotate/mask pattern. The i1/i2 itself don't match
rotate/mask pattern.

  I did an experiment to disable widen shift mode for
lshiftrt. I tested it on powerpc/x86/aarch64. There is no regression occurred.
I thought that the widen shift mode is helpful for newpat matching. But it seems
not, at least no impact on powerpc/x86/aarch64.

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 4bf867d74b0..0b9b115f9bb 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -10479,11 +10479,6 @@ try_widen_shift_mode (enum rtx_code code, rtx op, int 
count,
   return orig_mode;

 case LSHIFTRT:
-  /* Similarly here but with zero bits.  */
-  if (HWI_COMPUTABLE_MODE_P (mode)
- && (nonzero_bits (op, mode) & ~GET_MODE_MASK (orig_mode)) == 0)
-   return mode;
-
   /* We can also widen if the bits brought in will be masked off.  This
 operation is performed in ORIG_MODE.  */
   if (outer_code == AND)

Segher,
  Could you inform me what's the purpose of widen shift mode in
simplify_shift_const? Does it definitely reduce the rtx cost or it helps match
patterns? Thanks a lot.

Thanks
Gui Haochen


在 2023/8/5 7:32, Jeff Law 写道:
> 
> 
> On 7/20/23 18:59, HAO CHEN GUI wrote:
>> Hi Jeff,
>>
>> 在 2023/7/21 5:27, Jeff Law 写道:
>>> Wouldn't it make more sense to just try rotate/mask in the original mode 
>>> before trying a shift in a widened mode?  I'm not sure why we need a target 
>>> hook here.
>>
>> There is no change to try rotate/mask with the original mode when
>> expensive_optimizations is set. The subst widens the shift mode.
> But we can add it before the attempt in the wider mode.
> 
>>
>>    if (flag_expensive_optimizations)
>>  {
>>    /* Pass pc_rtx so no substitutions are done, just
>>   simplifications.  */
>>    if (i1)
>>  {
>>    subst_low_luid = DF_INSN_LUID (i1);
>>    i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
>>  }
>>
>>    subst_low_luid = DF_INSN_LUID (i2);
>>    i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
>>  }
>>
>> I don't know if the wider mode is helpful to other targets, so
>> I added the target hook.
> In this scenario we're often better off relying on rtx_costs (even with all 
> its warts) rather than adding yet another target hook.
> 
> I'd love to hear from Segher here to see if he's got other ideas.
> 
> jeff


Re: [PATCHv4, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]

2023-08-16 Thread HAO CHEN GUI via Gcc-patches
Committed after fixing the comments.

https://gcc.gnu.org/g:a79cf858b39e01c80537bc5d47a5e9004418c267

Thanks
Gui Haochen

在 2023/8/14 15:47, Kewen.Lin 写道:
> Hi Haochen,
> 
> on 2023/8/14 10:18, HAO CHEN GUI wrote:
>> Hi,
>>   This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
>> for all sub targets when the mode is V4SI and the extracted element is word
>> 1 from BE order. Also this patch adds a insn pattern for mfvsrwz which
>> helps eliminate redundant zero extend.
>>
>>   Compared to last version, the main change is to put the word index
>> checking in the split condition of "*vsx_extract_v4si_w023". Also modified
>> some comments.
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625380.html
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>>
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> rs6000: Generate mfvsrwz for all platform and remove redundant zero extend
>>
>> mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
>> generated even with p9 vector enabled.  Also the instruction is already
>> zero extended.  A combine pattern is needed to eliminate redundant zero
>> extend instructions.
>>
>> gcc/
>>  PR target/106769
>>  * config/rs6000/vsx.md (expand vsx_extract_): Set it only
>>  for V8HI and V16QI.
>>  (vsx_extract_v4si): New expand for V4SI extraction.
>>  (vsx_extract_v4si_w1): New insn pattern for V4SI extraction on
>>  word 1 from BE order.   
>>  (*mfvsrwz): New insn pattern for mfvsrwz.
>>  (*vsx_extract__di_p9): Assert that it won't be generated on
>>  word 1 from BE order.
>>  (*vsx_extract_si): Remove.
>>  (*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2,
>>  3 from BE order.
>>
>> gcc/testsuite/
>>  PR target/106769
>>  * gcc.target/powerpc/pr106769.h: New.
>>  * gcc.target/powerpc/pr106769-p8.c: New.
>>  * gcc.target/powerpc/pr106769-p9.c: New.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index 0a34ceebeb5..1cbdc2f1c01 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1"
>>  (define_expand  "vsx_extract_"
>>[(parallel [(set (match_operand: 0 "gpc_reg_operand")
>> (vec_select:
>> -(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
>> +(match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
>>  (parallel [(match_operand:QI 2 "const_int_operand")])))
>> -  (clobber (match_scratch:VSX_EXTRACT_I 3))])]
>> +  (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
>>"VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
>>  {
>>/* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
>> @@ -3736,6 +3736,63 @@ (define_expand  "vsx_extract_"
>>  }
>>  })
>>
>> +(define_expand  "vsx_extract_v4si"
>> +  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
>> +   (vec_select:SI
>> +(match_operand:V4SI 1 "gpc_reg_operand")
>> +(parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
>> +  (clobber (match_scratch:V4SI 3))])]
>> +  "TARGET_DIRECT_MOVE_64BIT"
>> +{
>> +  /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx.  So just
>> + fall through to vsx_extract_v4si_w1.  */
>> +  if (TARGET_P9_VECTOR
>> +  && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
>> +{
>> +  emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
>> +  operands[2]));
>> +  DONE;
>> +}
>> +})
>> +
>> +/* Extract from word 1 (BE order);  */
> 
> Nit: I guessed I requested this before, please use ";" instead of
> "/* ... */" for the comments, to align with the existing ones.
> 
>> +(define_insn "vsx_extract_v4si_w1"
>> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
>> +(vec_select:SI
>> + (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
>> + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
>> +   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
>> +  "TARGET_DIRECT_MOVE_64BIT
>> +   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
>> +{
>> +   if (which_alternative == 0)
>> + return "mfvsrwz %0,%x1";
>> +
>> +   if (which_alternative == 1)
>> + return "xxlor %x0,%x1,%x1";
>> +
>> +   if (which_alternative == 2)
>> + return "stxsiwx %x1,%y0";
>> +
>> +   return ASM_COMMENT_START " vec_extract to same register";
>> +}
>> +  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
>> +   (set_attr "length" "4,4,4,0")
>> +   (set_attr "isa" "p8v,*,p8v,*")])
>> +
>> +(define_insn "*mfvsrwz"
>> +  [(set (match_operand:DI 0 "register_operand" "=r")
>> +(zero_extend:DI
>> +  (vec_select:SI
>> +(match_operand:V4SI 1 "vsx_register_operand" "wa")
>> +(parallel [(match_operand:QI 2 "const_int_operand" "n")]
>> +   (clobber (match_scratch:V4SI 3 "=v"))]

Re: [PATCH, rs6000] Skip redundant vector extract if the element is first element of dword0 [PR110429]

2023-08-16 Thread HAO CHEN GUI via Gcc-patches
Committed after tweaking and testing.
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d471bdb0453de7b738f49148b66d57cb5871937d

Thanks
Gui Haochen

在 2023/7/28 17:32, Kewen.Lin 写道:
> Hi Haochen,
> 
> on 2023/7/5 11:22, HAO CHEN GUI wrote:
>> Hi,
>>   This patch skips redundant vector extract insn to be generated when
>> the extracted element is the first element of dword0 and the destination
> 
> "The first element" is confusing, it's easy to be misunderstood as element
> 0, but in fact the extracted element index is: 
>   - for byte, 7 on BE while 8 on LE;
>   - for half word, 3 on BE while 4 on LE;
> 
> so maybe just say when the extracted index for byte and half word like above,
> the element to be stored is already in the corresponding place for stxsi[hb]x,
> we don't need a redundant vector extraction at all.
> 
>> is a memory operand. Only one 'stxsi[hb]x' instruction is enough.
>>
>>   The V4SImode is fixed in a previous patch.
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> rs6000: Skip redundant vector extract if the element is first element of
>> dword0
>>
>> gcc/
>>  PR target/110429
>>  * config/rs6000/vsx.md (*vsx_extract__store_p9): Skip vector
>>  extract when the element is the first element of dword0.
>>
>> gcc/testsuite/
>>  PR target/110429
>>  * gcc.target/powerpc/pr110429.c: New.
>>
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index 0c269e4e8d9..b3fec910eb6 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -3855,7 +3855,22 @@ (define_insn_and_split "*vsx_extract__store_p9"
>>  (parallel [(match_dup 2)])))
>>(clobber (match_dup 4))])
>> (set (match_dup 0)
>> -(match_dup 3))])
>> +(match_dup 3))]
>> +{
>> +  enum machine_mode dest_mode = GET_MODE (operands[0]);
> 
> Nit: Move this line ...
> 
>> +
>> +  if (which_alternative == 0
>> +  && ((mode == V16QImode
>> +   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8))
>> +  || (mode == V8HImode
>> +  && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4
>> +{
> 
> ... here.
> 
>> +  emit_move_insn (operands[0],
>> +  gen_rtx_REG (dest_mode, REGNO (operands[3])));
>> +  DONE;
>> +}
>> +})
>> +
>>
>>  (define_insn_and_split  "*vsx_extract_si"
>>[(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110429.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr110429.c
>> new file mode 100644
>> index 000..5a938f9f90a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr110429.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-do compile } */
>> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +/* { dg-require-effective-target has_arch_ppc64 } */
>> +
>> +#include 
>> +
>> +#ifdef __BIG_ENDIAN__
>> +#define DWORD0_FIRST_SHORT 3
>> +#define DWORD0_FIRST_CHAR 7
>> +#else
>> +#define DWORD0_FIRST_SHORT 4
>> +#define DWORD0_FIRST_CHAR 8
>> +#endif
>> +
>> +void vec_extract_short (vector short v, short* p)
>> +{
>> +   *p = vec_extract(v, DWORD0_FIRST_SHORT);
>> +}
>> +
>> +void vec_extract_char (vector char v, char* p)
>> +{
>> +   *p = vec_extract(v, DWORD0_FIRST_CHAR);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "stxsi\[hb\]x" 2 } } */
> 
> Nit: Break this check into stxsihx and stxsibx, and surround
> with \m and \M.
> 
>> +/* { dg-final { scan-assembler-not "vextractu\[hb\]" } } */
> 
> Also with \m and \M.
> 
> OK for trunk with these nits tweaked and testing goes well,
> thanks!
> 
> BR,
> Kewen


[PATCHv4, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]

2023-08-13 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
for all sub targets when the mode is V4SI and the extracted element is word
1 from BE order. Also this patch adds a insn pattern for mfvsrwz which
helps eliminate redundant zero extend.

  Compared to last version, the main change is to put the word index
checking in the split condition of "*vsx_extract_v4si_w023". Also modified
some comments.
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625380.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Generate mfvsrwz for all platform and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
generated even with p9 vector enabled.  Also the instruction is already
zero extended.  A combine pattern is needed to eliminate redundant zero
extend instructions.

gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_): Set it only
for V8HI and V16QI.
(vsx_extract_v4si): New expand for V4SI extraction.
(vsx_extract_v4si_w1): New insn pattern for V4SI extraction on
word 1 from BE order.   
(*mfvsrwz): New insn pattern for mfvsrwz.
(*vsx_extract__di_p9): Assert that it won't be generated on
word 1 from BE order.
(*vsx_extract_si): Remove.
(*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2,
3 from BE order.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.

patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..1cbdc2f1c01 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1"
 (define_expand  "vsx_extract_"
   [(parallel [(set (match_operand: 0 "gpc_reg_operand")
   (vec_select:
-   (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+   (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
(parallel [(match_operand:QI 2 "const_int_operand")])))
- (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+ (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
   "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
@@ -3736,6 +3736,63 @@ (define_expand  "vsx_extract_"
 }
 })

+(define_expand  "vsx_extract_v4si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+  (vec_select:SI
+   (match_operand:V4SI 1 "gpc_reg_operand")
+   (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+ (clobber (match_scratch:V4SI 3))])]
+  "TARGET_DIRECT_MOVE_64BIT"
+{
+  /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx.  So just
+ fall through to vsx_extract_v4si_w1.  */
+  if (TARGET_P9_VECTOR
+  && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+{
+  emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+ operands[2]));
+  DONE;
+}
+})
+
+/* Extract from word 1 (BE order);  */
+(define_insn "vsx_extract_v4si_w1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+   (vec_select:SI
+(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+{
+   if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+ return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+   (set_attr "length" "4,4,4,0")
+   (set_attr "isa" "p8v,*,p8v,*")])
+
+(define_insn "*mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
+ (vec_select:SI
+   (match_operand:V4SI 1 "vsx_register_operand" "wa")
+   (parallel [(match_operand:QI 2 "const_int_operand" "n")]
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
 (define_insn "vsx_extract__p9"
   [(set (match_operand: 0 "gpc_reg_operand" "=r,")
(vec_select:
@@ -3807,6 +3864,9 @@ (define_insn_and_split "*vsx_extract__di_p9"
(parallel [(match_dup 2)])))
  (clobber (match_dup 3))])]
 {
+  gcc_assert (mode != V4SImode
+ || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2));
+
   operands[4] = gen_rtx_REG (mode, REGNO 

[PATCHv2, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]

2023-07-24 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
for all subtargets when the mode is V4SI and the index of extracted element
is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz
which helps eliminate redundant zero extend.

  Compared to last version, the main change is to move "vsx_extract_v4si_w1"
and "*mfvsrwz" to the front of "*vsx_extract__di_p9". Also some insn
conditions are changed to assertions.
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625128.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Generate mfvsrwz for all platform and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
generated even with p9 vector enabled.  Also the instruction is already
zero extended.  A combine pattern is needed to eliminate redundant zero
extend instructions.

gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_): Set it only
for V8HI and V16QI.
(vsx_extract_v4si): New expand for V4SI extraction.
(vsx_extract_v4si_w1): New insn pattern for V4SI extraction
when the index of extracted element is 1 with BE and 2 with LE.
(*mfvsrwz): New insn pattern.
(*vsx_extract__di_p9): Not generate the insn when the index
of extracted element is 1 with BE and 2 with LE.
(*vsx_extract_si): Removed.
(*vsx_extract_v4si_not_w1): New insn and split pattern which deals
with the cases not handled by vsx_extract_v4si_w1.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.

patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..0065b76fef8 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1"
 (define_expand  "vsx_extract_"
   [(parallel [(set (match_operand: 0 "gpc_reg_operand")
   (vec_select:
-   (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+   (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
(parallel [(match_operand:QI 2 "const_int_operand")])))
- (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+ (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
   "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
@@ -3736,6 +3736,63 @@ (define_expand  "vsx_extract_"
 }
 })

+(define_expand  "vsx_extract_v4si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+  (vec_select:SI
+   (match_operand:V4SI 1 "gpc_reg_operand")
+   (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+ (clobber (match_scratch:V4SI 3))])]
+  "TARGET_DIRECT_MOVE_64BIT"
+{
+  /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx.  So just
+ fall through to vsx_extract_v4si_w1.  */
+  if (TARGET_P9_VECTOR
+  && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+{
+  emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+ operands[2]));
+  DONE;
+}
+})
+
+/* Extract from word 1 (BE order).  */
+(define_insn "vsx_extract_v4si_w1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+   (vec_select:SI
+(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+{
+   if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+ return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+   (set_attr "length" "4,4,4,0")
+   (set_attr "isa" "p8v,*,p8v,*")])
+
+(define_insn "*mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
+ (vec_select:SI
+   (match_operand:V4SI 1 "vsx_register_operand" "wa")
+   (parallel [(match_operand:QI 2 "const_int_operand" "n")]
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
 (define_insn "vsx_extract__p9"
   [(set (match_operand: 0 "gpc_reg_operand" "=r,")
(vec_select:
@@ -3798,7 +3855,7 @@ (define_insn_and_split "*vsx_extract__di_p9"
  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,")
  (parallel [(match_operand:QI 2 "const_int_operand" 

[PATCHv2, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]

2023-07-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
for all subtargets when the mode is V4SI and the index of extracted element
is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz
which can help eliminate redundant zero extend.

  Compared to last version, the main change is to add a new expand for V4SI
and separate "vsx_extract_si" to 2 insn patterns.
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Generate mfvsrwz for all subtargets and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
generated even with p9 vector enabled.  Also the instruction is already
zero extended.  A combine pattern is needed to eliminate redundant zero
extend instructions.

gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_): Set it only
for V8HI and V16QI.
(vsx_extract_v4si): New expand for V4SI.
(*vsx_extract__di_p9): Not generate the insn when it can
be generated by mfvsrwz.
(mfvsrwz): New insn pattern for zero extended vsx_extract_v4si.
(*vsx_extract_si): Removed.
(vsx_extract_v4si_0): New insn pattern to deal with V4SI extract
when the index of extracted element is 1 with BE and 2 with LE.
(vsx_extract_v4si_1): New insn and split pattern which deals with
the cases not handled by vsx_extract_v4si_0.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.

patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..ad249441bcf 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1"
 (define_expand  "vsx_extract_"
   [(parallel [(set (match_operand: 0 "gpc_reg_operand")
   (vec_select:
-   (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+   (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
(parallel [(match_operand:QI 2 "const_int_operand")])))
- (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+ (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
   "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
@@ -3736,6 +3736,23 @@ (define_expand  "vsx_extract_"
 }
 })

+(define_expand  "vsx_extract_v4si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+  (vec_select:SI
+   (match_operand:V4SI 1 "gpc_reg_operand")
+   (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+ (clobber (match_scratch:V4SI 3))])]
+  "TARGET_DIRECT_MOVE_64BIT"
+{
+  if (TARGET_P9_VECTOR
+  && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+{
+  emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+ operands[2]));
+  DONE;
+}
+})
+
 (define_insn "vsx_extract__p9"
   [(set (match_operand: 0 "gpc_reg_operand" "=r,")
(vec_select:
@@ -3798,7 +3815,9 @@ (define_insn_and_split "*vsx_extract__di_p9"
  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,")
  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]
(clobber (match_scratch:SI 3 "=r,X"))]
-  "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB"
+  "TARGET_VEXTRACTUB
+   && (mode != V4SImode
+   || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 4)
@@ -3830,58 +3849,78 @@ (define_insn_and_split "*vsx_extract__store_p9"
(set (match_dup 0)
(match_dup 3))])

-(define_insn_and_split  "*vsx_extract_si"
+(define_insn "mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
+ (vec_select:SI
+   (match_operand:V4SI 1 "vsx_register_operand" "wa")
+   (parallel [(match_operand:QI 2 "const_int_operand" "n")]
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
+(define_insn "vsx_extract_v4si_0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+   (vec_select:SI
+(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && (!TARGET_P9_VECTOR || INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 
2))"
+{
+   if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+   if 

Re: [PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]

2023-07-20 Thread HAO CHEN GUI via Gcc-patches
Sorry for the typo
s/change/chance

在 2023/7/21 8:59, HAO CHEN GUI 写道:
> Hi Jeff,
> 
> 在 2023/7/21 5:27, Jeff Law 写道:
>> Wouldn't it make more sense to just try rotate/mask in the original mode 
>> before trying a shift in a widened mode?  I'm not sure why we need a target 
>> hook here.
> 
> There is no change to try rotate/mask with the original mode when
> expensive_optimizations is set. The subst widens the shift mode.
> 
>   if (flag_expensive_optimizations)
> {
>   /* Pass pc_rtx so no substitutions are done, just
>  simplifications.  */
>   if (i1)
> {
>   subst_low_luid = DF_INSN_LUID (i1);
>   i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
> }
> 
>   subst_low_luid = DF_INSN_LUID (i2);
>   i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
> }
> 
> I don't know if the wider mode is helpful to other targets, so
> I added the target hook.
> 
> Thanks
> Gui Haochen


Re: [PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]

2023-07-20 Thread HAO CHEN GUI via Gcc-patches
Hi Jeff,

在 2023/7/21 5:27, Jeff Law 写道:
> Wouldn't it make more sense to just try rotate/mask in the original mode 
> before trying a shift in a widened mode?  I'm not sure why we need a target 
> hook here.

There is no change to try rotate/mask with the original mode when
expensive_optimizations is set. The subst widens the shift mode.

  if (flag_expensive_optimizations)
{
  /* Pass pc_rtx so no substitutions are done, just
 simplifications.  */
  if (i1)
{
  subst_low_luid = DF_INSN_LUID (i1);
  i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
}

  subst_low_luid = DF_INSN_LUID (i2);
  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
}

I don't know if the wider mode is helpful to other targets, so
I added the target hook.

Thanks
Gui Haochen


Ping [PATCH v7, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2023-07-18 Thread HAO CHEN GUI via Gcc-patches
Hi,
  As the ticket(PR107013, adding fmin/max to RTL code) is suspended, I ping
this patch. The unspec of fmin/max can be replaced with corresponding RTL
code after that ticket is fixed.

https://gcc.gnu.org/pipermail/gcc-patches/2022-September/602181.html

Thanks
Gui Haochen


在 2022/9/26 11:35, HAO CHEN GUI 写道:
> Hi,
>   This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
> Tests show that outputs of xs[min/max]dp are consistent with the standard
> of C99 fmin/max.
> 
>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
> of smin/max when fast-math is not set. While fast-math is set, xs[min/max]dp
> are folded to MIN/MAX_EXPR in gimple, and finally expanded to smin/max.
> 
>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-09-26 Haochen Gui 
> 
> gcc/
>   PR target/103605
>   * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Gimple
>   fold RS6000_BIF_XSMINDP and RS6000_BIF_XSMAXDP when fast-math is set.
>   * config/rs6000/rs6000.md (FMINMAX): New int iterator.
>   (minmax_op): New int attribute.
>   (UNSPEC_FMAX, UNSPEC_FMIN): New unspecs.
>   (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
>   * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
>   pattern to fmaxdf3.
>   (__builtin_vsx_xsmindp): Set pattern to fmindf3.
> 
> gcc/testsuite/
>   PR target/103605
>   * gcc.dg/powerpc/pr103605.h: New.
>   * gcc.dg/powerpc/pr103605-1.c: New.
>   * gcc.dg/powerpc/pr103605-2.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
> b/gcc/config/rs6000/rs6000-builtin.cc
> index e925ba9fad9..944ae9fe55c 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -1588,6 +1588,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>gimple_set_location (g, gimple_location (stmt));
>gsi_replace (gsi, g, true);
>return true;
> +/* fold into MIN_EXPR when fast-math is set.  */
> +case RS6000_BIF_XSMINDP:
>  /* flavors of vec_min.  */
>  case RS6000_BIF_XVMINDP:
>  case RS6000_BIF_XVMINSP:
> @@ -1614,6 +1616,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>gimple_set_location (g, gimple_location (stmt));
>gsi_replace (gsi, g, true);
>return true;
> +/* fold into MAX_EXPR when fast-math is set.  */
> +case RS6000_BIF_XSMAXDP:
>  /* flavors of vec_max.  */
>  case RS6000_BIF_XVMAXDP:
>  case RS6000_BIF_XVMAXSP:
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index f4a9f24bcc5..8b735493b40 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -1613,10 +1613,10 @@
>  XSCVSPDP vsx_xscvspdp {}
> 
>const double __builtin_vsx_xsmaxdp (double, double);
> -XSMAXDP smaxdf3 {}
> +XSMAXDP fmaxdf3 {}
> 
>const double __builtin_vsx_xsmindp (double, double);
> -XSMINDP smindf3 {}
> +XSMINDP fmindf3 {}
> 
>const double __builtin_vsx_xsrdpi (double);
>  XSRDPI vsx_xsrdpi {}
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index bf85baa5370..ae0dd98f0f9 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -158,6 +158,8 @@ (define_c_enum "unspec"
> UNSPEC_HASHCHK
> UNSPEC_XXSPLTIDP_CONST
> UNSPEC_XXSPLTIW_CONST
> +   UNSPEC_FMAX
> +   UNSPEC_FMIN
>])
> 
>  ;;
> @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
>DONE;
>  })
> 
> +
> +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
> +
> +(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
> +  (UNSPEC_FMIN "min")])
> +
> +(define_insn "f3"
> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
> + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
> +   (match_operand:SFDF 2 "vsx_register_operand" "wa")]
> +  FMINMAX))]
> +  "TARGET_VSX && !flag_finite_math_only"
> +  "xsdp %x0,%x1,%x2"
> +  [(set_attr "type" "fp")]
> +)
> +
>  (define_expand "movcc"
> [(set (match_operand:GPR 0 "gpc_reg_operand")
>(if_then_else:GPR (match_operand 1 "comparison_operator")
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c
> new file mode 100644
> index 000..923deec6a1e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
> +
> +#include "pr103605.h"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-2.c 
> 

[PATCH-1, combine] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]

2023-07-18 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The shift mode will be widen in combine pass if the operand has a normal
subreg. But when the target already has rotate/mask/insert instructions on
the narrow mode, it's unnecessary to widen the mode for lshiftrt. As
the lshiftrt is commonly converted to rotate/mask insn, the widen mode
blocks it to be further combined to rotate/mask/insert insn. The PR93738
shows the case.

The lshiftrt:SI (subreg:SI (reg:DI)) is converted to
subreg:SI (lshiftrt:DI (reg:DI)) and fails to match rotate/mask pattern.

Trying 13, 10 -> 14:
   13: r127:SI=r125:SI&0xf0ff
  REG_DEAD r125:SI
   10: r124:SI=r129:DI#4 0>>0xc&0xf00
  REG_DEAD r129:DI
   14: r128:SI=r127:SI|r124:SI

Failed to match this instruction:
(set (reg:SI 128)
(ior:SI (and:SI (reg:SI 125 [+-2 ])
(const_int -3841 [0xf0ff]))
(and:SI (subreg:SI (zero_extract:DI (reg:DI 129)
(const_int 32 [0x20])
(const_int 20 [0x14])) 4)
(const_int 3840 [0xf00]
Failed to match this instruction:
(set (reg:SI 128)
(ior:SI (and:SI (reg:SI 125 [+-2 ])
(const_int -3841 [0xf0ff]))
(and:SI (subreg:SI (and:DI (lshiftrt:DI (reg:DI 129)
(const_int 12 [0xc]))
(const_int 4294967295 [0x])) 4)
(const_int 3840 [0xf00]

If not widen the shift mode, it can be combined to rotate/mask/insert insn
as expected.

Trying 13, 10 -> 14:
   13: r127:SI=r125:SI&0xf0ff
  REG_DEAD r125:SI
   10: r124:SI=r129:DI#4 0>>0xc&0xf00
  REG_DEAD r129:DI
   14: r128:SI=r127:SI|r124:SI
  REG_DEAD r127:SI
  REG_DEAD r124:SI
Successfully matched this instruction:
(set (reg:SI 128)
(ior:SI (and:SI (reg:SI 125 [+-2 ])
(const_int -3841 [0xf0ff]))
(and:SI (lshiftrt:SI (subreg:SI (reg:DI 129) 4)
(const_int 12 [0xc]))
(const_int 3840 [0xf00]


  This patch adds a target hook to indicate if rotate/mask instructions are
supported on certain mode. If it's true, widen lshiftrt mode is skipped
and shift is done on original mode.

  The patch fixes the regression of other rs6000 test cases. They're listed
in the second patch.

  The patch passed regression test on Power Linux and x86 platforms.

Thanks
Gui Haochen

ChangeLog
combine: Not winden shift mode when target has rotate/mask instruction on
original mode

To winden shift mode is unnecessary when target already has rotate/mask
instuctions on the original mode.  It might blocks the further combine
optimization on the original mode.  For instance, further combine the insns
to a rotate/mask/insert instruction on the original mode.

This patch adds a hook to indicate if a target supports rotate/mask
instructions on the certain mode.  If it returns true, the widen shift
mode will be skipped on lshiftrt.

gcc/
PR target/93738
* combine.cc (try_widen_shift_mode): Skip to widen mode for lshiftrt
when the target has rotate/mask instructions on original mode.
* doc/tm.texi: Regenerate.
* doc/tm.texi.in (TARGET_HAVE_ROTATE_AND_MASK): Add.
* target.def (have_rotate_and_mask): New target hook.
* targhooks.cc (default_have_rotate_and_mask): New function.
* targhooks.h (default_have_rotate_and_mask): Declare.

patch.diff
diff --git a/gcc/combine.cc b/gcc/combine.cc
index 304c020ec79..f22fe42931b 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -10475,20 +10475,25 @@ try_widen_shift_mode (enum rtx_code code, rtx op, int 
count,
   return orig_mode;

 case LSHIFTRT:
-  /* Similarly here but with zero bits.  */
-  if (HWI_COMPUTABLE_MODE_P (mode)
- && (nonzero_bits (op, mode) & ~GET_MODE_MASK (orig_mode)) == 0)
-   return mode;
-
-  /* We can also widen if the bits brought in will be masked off.  This
-operation is performed in ORIG_MODE.  */
-  if (outer_code == AND)
+  /* Skip wider mode when the target has rotate and mask instructions on
+orig_mode.  */
+  if (!targetm.have_rotate_and_mask (orig_mode))
{
- int care_bits = low_bitmask_len (orig_mode, outer_const);
-
- if (care_bits >= 0
- && GET_MODE_PRECISION (orig_mode) - care_bits >= count)
+ /* Similarly here but with zero bits.  */
+ if (HWI_COMPUTABLE_MODE_P (mode)
+ && (nonzero_bits (op, mode) & ~GET_MODE_MASK (orig_mode)) == 0)
return mode;
+
+ /* We can also widen if the bits brought in will be masked off.
+This operation is performed in ORIG_MODE.  */
+ if (outer_code == AND)
+   {
+ int care_bits = low_bitmask_len (orig_mode, outer_const);
+
+ if (care_bits >= 0
+ && GET_MODE_PRECISION (orig_mode) - care_bits >= count)
+   return mode;
+   }
}
   /* fall through */

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi

[PATCH-2, rs6000] Don't widen shift mode when target has rotate/mask instruction on original mode [PR93738]

2023-07-18 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch relies on the fist patch. The reason of the change is also described
in the first patch. This patch implements the target hook have_rotate_and_mask.
It also modifies some test cases. The regression of rlwimi-2.c is fixed. For
rlwinm-0.c and rlwinm-2.c, one more 32bit rotate/mask instruction is generated
and one less 64bit rotate/mask instruction.

  The patch passed regression test on Power Linux platforms. Test shows the 
patch
has no performance regression on SPECint.

Thanks
Gui Haochen

ChangeLog
rs6000: implement target hook have_rotate_and_mask

gcc/
PR target/93738
* config/rs6000/rs6000.cc (TARGET_HAVE_ROTATE_AND_MASK): Define.
(rs6000_have_rotate_and_mask): New function.

gcc/testsuite/
PR target/93738
* gcc.target/powerpc/rlwimi-2.c: Adjust the number of 64bit and 32bit
rotate instuctions.
* gcc.target/powerpc/rlwinm-0.c: Likewise.
* gcc.target/powerpc/rlwinm-2.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..98873afddb4 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1764,6 +1764,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #undef TARGET_CONST_ANCHOR
 #define TARGET_CONST_ANCHOR 0x8000

+#undef TARGET_HAVE_ROTATE_AND_MASK
+#define TARGET_HAVE_ROTATE_AND_MASK rs6000_have_rotate_and_mask
+
 

 /* Processor table.  */
@@ -29097,6 +29100,17 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
   return false;
 }

+bool
+rs6000_have_rotate_and_mask (machine_mode mode)
+{
+  gcc_assert (SCALAR_INT_MODE_P (mode));
+
+  if (mode == SImode || mode == DImode)
+return true;
+
+  return false;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;

 #include "gt-rs6000.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c 
b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
index bafa371db73..62344a95aa0 100644
--- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
@@ -6,10 +6,9 @@
 /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } 
*/
+/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6728 { target lp64 } } } 
*/

-/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } 
} */
-/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } 
*/
+/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 } } */

 /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */

diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c 
b/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c
index 4f4fca2d8ef..b6b1b227c7e 100644
--- a/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm-0.c
@@ -7,10 +7,10 @@
 /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 3081 { target lp64 } } } 
*/

 /* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 3197 { target ilp32 } } 
} */
-/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 3093 { target lp64 } } } 
*/
+/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 3094 { target lp64 } } } 
*/
 /* { dg-final { scan-assembler-times {(?n)^\s+rotlwi} 154 } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+srwi} 13 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 13 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 12 { target lp64 } } } */


 #define SL
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c 
b/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c
index bddcfe2b76f..0315ca91dd7 100644
--- a/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm-2.c
@@ -7,9 +7,9 @@
 /* { dg-final { scan-assembler-times {(?n)^\s+rldic} 2726 { target lp64 } } } 
*/

 /* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 833 { target ilp32 } } } 
*/
-/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 720 { target lp64 } } } 
*/
+/* { dg-final { scan-assembler-times {(?n)^\s+rlwinm} 721 { target lp64 } } } 
*/
 /* { dg-final { scan-assembler-times {(?n)^\s+srwi} 13 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 13 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+srdi} 12 { target lp64 } } } */

 /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 2518 } } */



[PATCH, rs6000] Skip redundant vector extract if the element is first element of dword0 [PR110429]

2023-07-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch skips redundant vector extract insn to be generated when
the extracted element is the first element of dword0 and the destination
is a memory operand. Only one 'stxsi[hb]x' instruction is enough.

  The V4SImode is fixed in a previous patch.
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Thanks
Gui Haochen

ChangeLog
rs6000: Skip redundant vector extract if the element is first element of
dword0

gcc/
PR target/110429
* config/rs6000/vsx.md (*vsx_extract__store_p9): Skip vector
extract when the element is the first element of dword0.

gcc/testsuite/
PR target/110429
* gcc.target/powerpc/pr110429.c: New.


patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0c269e4e8d9..b3fec910eb6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3855,7 +3855,22 @@ (define_insn_and_split "*vsx_extract__store_p9"
(parallel [(match_dup 2)])))
  (clobber (match_dup 4))])
(set (match_dup 0)
-   (match_dup 3))])
+   (match_dup 3))]
+{
+  enum machine_mode dest_mode = GET_MODE (operands[0]);
+
+  if (which_alternative == 0
+  && ((mode == V16QImode
+  && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8))
+ || (mode == V8HImode
+ && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4
+{
+  emit_move_insn (operands[0],
+ gen_rtx_REG (dest_mode, REGNO (operands[3])));
+  DONE;
+}
+})
+

 (define_insn_and_split  "*vsx_extract_si"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110429.c 
b/gcc/testsuite/gcc.target/powerpc/pr110429.c
new file mode 100644
index 000..5a938f9f90a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110429.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include 
+
+#ifdef __BIG_ENDIAN__
+#define DWORD0_FIRST_SHORT 3
+#define DWORD0_FIRST_CHAR 7
+#else
+#define DWORD0_FIRST_SHORT 4
+#define DWORD0_FIRST_CHAR 8
+#endif
+
+void vec_extract_short (vector short v, short* p)
+{
+   *p = vec_extract(v, DWORD0_FIRST_SHORT);
+}
+
+void vec_extract_char (vector char v, char* p)
+{
+   *p = vec_extract(v, DWORD0_FIRST_CHAR);
+}
+
+/* { dg-final { scan-assembler-times "stxsi\[hb\]x" 2 } } */
+/* { dg-final { scan-assembler-not "vextractu\[hb\]" } } */


[PATCH, rs6000] Extract the element in dword0 by mfvsrd and shift/mask [PR110331]

2023-07-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements the vector element extraction by mfvsrd and
shift/mask when the element is in dword0 of the vector. Originally,
it generates vsplat/mfvsrd on P8 and li/vextract on P9. Since mfvsrd
has lower latency than vextract and rldicl has lower latency than
vsplat, the new sequence has the benefit. Specially, the shift/mask
is no need when the element is the first element of dword0. So it saves
another rldicl when it returns a sign extend value.

  This patch is based on previous one.
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622101.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Extract the element in dword0 by mfvsrd and shift/mask

gcc/
PR target/110331
* config/rs6000/rs6000-protos.h (rs6000_vsx_element_in_dword0_p):
Declare.
(rs6000_vsx_extract_element_from_dword0): Declare.
* config/rs6000/rs6000.cc (rs6000_vsx_element_in_dword0_p): New
function to judge if an element is in dword0 of a vector.
(rs6000_vsx_extract_element_from_dword0): Extract an element from
dword0 by mfvsrd and lshiftrt and mask.
* config/rs6000/rs6000.md (*rotl3_mask): Rename to...
(rotl3_mask): ...this
* config/rs6000/vsx.md (vsx_extract_): Add a comment.
(split pattern for p9 vector extract): Call
rs6000_vsx_extract_element_from_dword0 if the element is in dword0.
(*vsx_extract__di_p9): Exclude the elements in dword0 which
are processed by *vsx_extract__zero_extend for both p8 and p9.
(*vsx_extract__zero_extend): Zero extend pattern for vector
extract on the element of dword0.
(*vsx_extract__p8): Call rs6000_vsx_extract_element_from_dword0
when the extracted element is in dword0.  Refined the pattern and
remove reload_completed from split condition.

gcc/testsuite/
PR target/110331
* gcc.target/powerpc/fold-vec-extract-char.p8.c: Set the extracted
elements in dword1.
* gcc.target/powerpc/fold-vec-extract-char.p9.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-int.p9.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p8.c: Likewise.
* gcc.target/powerpc/fold-vec-extract-short.p9.c: Likewise.
* gcc.target/powerpc/p9-extract-1.c: Likewise.
* gcc.target/powerpc/pr110331-p8.c: New.
* gcc.target/powerpc/pr110331-p9.c: New.
* gcc.target/powerpc/pr110331.h: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index f70118ea40f..ccef280122b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -161,6 +161,8 @@ extern bool rs6000_function_pcrel_p (struct function *);
 extern bool rs6000_pcrel_p (void);
 extern bool rs6000_fndecl_pcrel_p (const_tree);
 extern void rs6000_output_addr_vec_elt (FILE *, int);
+extern bool rs6000_vsx_element_in_dword0_p (rtx, enum machine_mode);
+extern void rs6000_vsx_extract_element_from_dword0 (rtx, rtx, rtx, bool);

 /* Different PowerPC instruction formats that are used by GCC.  There are
various other instruction formats used by the PowerPC hardware, but these
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 07c3a3d15ac..fad01d6b5dd 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -29098,6 +29098,74 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
   return false;
 }

+/* Return true when the element is in dword0 of a vector.  Exclude word
+   element 1 of VS4SI as the word can be extracted by mfvsrwz directly.  */
+
+bool
+rs6000_vsx_element_in_dword0_p (rtx op, enum machine_mode mode)
+{
+  gcc_assert (CONST_INT_P (op));
+  gcc_assert (mode == V16QImode || mode == V8HImode || mode == V4SImode);
+
+  int units = GET_MODE_NUNITS (mode);
+  int elt = INTVAL (op);
+  elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+
+  if (elt > units / 2
+  || (elt == units / 2 && mode != V4SImode))
+return true;
+  else
+return false;
+}
+
+/* Extract element from dword0 by mfvsrd and lshiftrt and mask.  Extend_p
+   indicates if zero extend is needed or not.  */
+
+void
+rs6000_vsx_extract_element_from_dword0 (rtx dest, rtx src, rtx element,
+   bool extend_p)
+{
+  enum machine_mode mode = GET_MODE (src);
+  gcc_assert (rs6000_vsx_element_in_dword0_p (element, mode));
+
+  enum machine_mode dest_mode = GET_MODE (dest);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int units = GET_MODE_NUNITS (mode);
+  int elt = INTVAL (element);
+  elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+  int value, shift;
+  unsigned int mask;
+
+  rtx vec_tmp = gen_lowpart (V2DImode, src);
+  rtx tmp1 = can_create_pseudo_p ()
+? gen_reg_rtx (DImode)
+: simplify_gen_subreg (DImode, dest, 

[PATCHv4, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

2023-06-24 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds a new insn for vector splat with small V2DI constants on P8.
If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
memory.

  Compared to last version, the main change is to remove the new constraint and
use a super constraint in the insn and set the check into insn condition.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-06-25  Haochen Gui 

gcc/
PR target/104124
* config/rs6000/altivec.md (*altivec_vupkhs_direct): Rename
to...
(altivec_vupkhs_direct): ...this.
* config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
predicate to test if a constant can be loaded with vspltisw and
vupkhsw.
(easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
a vector constant can be synthesized with a vspltisw and a vupkhsw.
* config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
* config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): New function
to return true if OP mode is V2DI and can be synthesized with vupkhsw
and vspltisw.
* config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
constants with vspltisw and vupkhsw.

gcc/testsuite/
PR target/104124
* gcc.target/powerpc/pr104124.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 49b0c964f4d..2c932854c33 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vupkhs_direct"
+(define_insn "altivec_vupkhs_direct"
   [(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand: 1 "register_operand" "v")]
 UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..f62a4d9b506 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -694,6 +694,12 @@ (define_predicate "xxspltib_constant_split"
   return num_insns > 1;
 })

+;; Return true if the operand is a constant that can be loaded with a vspltisw
+;; instruction and then a vupkhsw instruction.
+
+(define_predicate "vspltisw_vupkhsw_constant_split"
+  (and (match_code "const_vector")
+   (match_test "vspltisw_vupkhsw_constant_p (op, mode)")))

 ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
 ;; instruction.
@@ -742,6 +748,11 @@ (define_predicate "easy_vector_constant"
   && xxspltib_constant_p (op, mode, _insns, ))
return true;

+  /* V2DI constant within RANGE (-16, 15) can be synthesized with a
+vspltisw and a vupkhsw.  */
+  if (vspltisw_vupkhsw_constant_p (op, mode, ))
+   return true;
+
   return easy_altivec_constant (op, mode);
 }

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..00cb2d82953 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, 
rtx, int, int, int,

 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..ae34a02b282 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op,
   return true;
 }

+/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
+   instructions vupkhsw and vspltisw.
+
+   Return the constant that is being split via CONSTANT_PTR.  */
+
+bool
+vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
+{
+  HOST_WIDE_INT value;
+  rtx elt;
+
+  if (!TARGET_P8_VECTOR)
+return false;
+
+  if (mode != V2DImode)
+return false;
+
+  if (!const_vec_duplicate_p (op, ))
+return false;
+
+  value = INTVAL (elt);
+  if (value == 0 || value == 1
+  || !EASY_VECTOR_15 (value))
+return false;
+
+  if (constant_ptr)
+*constant_ptr = (int) value;
+  return true;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d845df5c2d..4919b073e50 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1174,6 +1174,30 @@ (define_insn_and_split "*xxspltib__split"
   [(set_attr "type" "vecperm")
(set_attr "length" "8")])

+(define_insn_and_split 

[PATCHv4, rs6000] Add two peephole2 patterns for mr. insn

2023-06-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds two peephole2 patterns which help convert certain insn
sequences to "mr." instruction. These insn sequences can't be combined in
combine pass.

  Compared to last version, the empty constraint is removed and test cases
run only on powerpc Linux as AIX doesn't support "-mregnames" option.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Add two peephole patterns for "mr." insn

When investigating the issue mentioned in PR87871#c30 - if compare
and move pattern benefits before RA, I checked the assembly generated
for SPEC2017 and found that certain insn sequences aren't converted to
"mr." instructions.
Following two sequence are never to be combined to "mr." pattern as
there is no register link between them. This patch adds two peephole2
patterns to convert them to "mr." instructions.

cmp 0,3,0
mr 4,3

mr 4,3
cmp 0,3,0

The patch also creates a new mode iterator which decided by
TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
In this situation, the "mr." should compares the whole 64-bit register
with 0 other than the low 32-bit one.

gcc/
* config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
(peephole2 for move_and_compare): New.
(mode_iterator WORD): New.  Set the mode to SI/DImode by
TARGET_POWERPC64.
(*mov_internal2): Change the mode iterator from P to WORD.
(split pattern for compare_and_move): Likewise.

gcc/testsuite/
* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..2ab1e8d4c80 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
 ; The size of a pointer.  Also, the size of the value that a record-condition
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+(define_mode_iterator WORD [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")])

 ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
 ; PTImode is GPR only)
@@ -7879,9 +7880,9 @@ (define_split

 (define_insn "*mov_internal2"
   [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand" "0,r,r")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
   ""
   "@
cmpi %2,%0,0
@@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2"
(set_attr "dot" "yes")
(set_attr "length" "4,4,8")])

+(define_peephole2
+  [(set (match_operand:CC 2 "cc_reg_operand")
+   (compare:CC (match_operand:WORD 1 "int_reg_operand")
+   (const_int 0)))
+   (set (match_operand:WORD 0 "int_reg_operand")
+   (match_dup 1))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:WORD 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:WORD 0 "int_reg_operand")
+   (match_operand:WORD 1 "int_reg_operand"))
+   (set (match_operand:CC 2 "cc_reg_operand")
+   (compare:CC (match_dup 1)
+   (const_int 0)))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
 (define_split
   [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand") (match_dup 1))]
   "reload_completed"
   [(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c 
b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
new file mode 100644
index 000..571a3112a74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target powerpc*-*-linux* } } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-options "-O2 -mregnames" } */
+
+/* Following instruction sequence is 

Re: [PATCH, rs6000] Add two peephole2 patterns for mr. insn

2023-06-19 Thread HAO CHEN GUI via Gcc-patches
HP,
  It makes sense. I will update the patch.

Thanks
Gui Haochen

在 2023/6/20 8:07, Hans-Peter Nilsson 写道:
> On Tue, 30 May 2023, HAO CHEN GUI via Gcc-patches wrote:
> 
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -7891,6 +7891,36 @@ (define_insn "*mov_internal2"
>> (set_attr "dot" "yes")
>> (set_attr "length" "4,4,8")])
>>
>> +(define_peephole2
>> +  [(set (match_operand:CC 2 "cc_reg_operand" "")
>> +(compare:CC (match_operand:P 1 "int_reg_operand" "")
>> +(const_int 0)))
>> +   (set (match_operand:P 0 "int_reg_operand" "")
> 
> A random comment from the sideline: I'd suggest to remove the 
> (empty) constraints string from your peephole2's.
> 
> It can be a matter of port-specific-taste but it seems removing 
> them would be consistent with the other peephole2's in 
> rs6000.md.
> 
> (In this matter, I believe the examples in md.texi are bad.)
> 
> brgds, H-P


[PATCH, rs6000] Generate mfvsrwz for all platforms and remove redundant zero extend [PR106769]

2023-06-18 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies vsx extract expander and generates mfvsrwz/stxsiwx
for all platforms when the mode is V4SI and the index of extracted element
is 1 for BE and 2 for LE. Also this patch adds a insn pattern for mfvsrwz
which can help eliminate redundant zero extend.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Generate mfvsrwz for all platforms and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw.  So it should be generated
even with p9 vector enabled if possible.  Also the instruction is
already zero extended.  A combine pattern is needed to eliminate
redundant zero extend instructions.

gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_): Skip calling
gen_vsx_extract__p9 when it can be implemented by
mfvsrwz/stxsiwx.
(*vsx_extract__di_p9): Not generate the insn when it can
be generated by mfvsrwz.
(mfvsrwz): New insn pattern.
(*vsx_extract_si): Rename to...
(vsx_extract_si): ..., remove redundant insn condition and
generate the insn on p9 when it can be implemented by
mfvsrwz/stxsiwx.  Add a dup alternative for simple vector moving.
Remove reload_completed from split condition as it's unnecessary.
Remove unnecessary checking from preparation statements.  Set
type and length attributes for each alternative.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..09b0f83db86 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3728,7 +3728,9 @@ (define_expand  "vsx_extract_"
   "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
-  if (TARGET_P9_VECTOR)
+  if (TARGET_P9_VECTOR
+  && (mode != V4SImode
+ || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)))
 {
   emit_insn (gen_vsx_extract__p9 (operands[0], operands[1],
operands[2]));
@@ -3798,7 +3800,9 @@ (define_insn_and_split "*vsx_extract__di_p9"
  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,")
  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]
(clobber (match_scratch:SI 3 "=r,X"))]
-  "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB"
+  "TARGET_VEXTRACTUB
+   && (mode != V4SImode
+   || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 4)
@@ -3830,58 +3834,67 @@ (define_insn_and_split "*vsx_extract__store_p9"
(set (match_dup 0)
(match_dup 3))])

-(define_insn_and_split  "*vsx_extract_si"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
+(define_insn "mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
+ (vec_select:SI
+   (match_operand:V4SI 1 "vsx_register_operand" "wa")
+   (parallel [(match_operand:QI 2 "const_int_operand" "n")]
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
+(define_insn_and_split  "vsx_extract_si"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
(vec_select:SI
-(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
-(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
-   (clobber (match_scratch:V4SI 3 "=v,v,v"))]
-  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && 
!TARGET_P9_VECTOR"
-  "#"
-  "&& reload_completed"
+(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && (!TARGET_P9_VECTOR || INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 
2))"
+{
+   if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+ return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
   [(const_int 0)]
 {
   rtx dest = operands[0];
   rtx src = operands[1];
   rtx element = operands[2];
-  rtx vec_tmp = operands[3];
-  int value;
+  rtx vec_tmp;
+
+  if (GET_CODE (operands[3]) == SCRATCH)
+vec_tmp = gen_reg_rtx (V4SImode);
+  else
+vec_tmp = operands[3];

   /* Adjust index for LE element ordering, the below minuend 3 is computed by
  GET_MODE_NUNITS (V4SImode) - 1.  */
   if (!BYTES_BIG_ENDIAN)
 element = GEN_INT (3 - INTVAL (element));

-  /* If the value is 

[PATCHv3, rs6000] Add two peephole2 patterns for mr. insn

2023-06-13 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds two peephole2 patterns which help convert certain insn
sequences to "mr." instruction. These insn sequences can't be combined in
combine pass.

  Compared to last version, it changes the new mode iterator name from "Q"
to "WORD".

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Add two peephole patterns for "mr." insn

When investigating the issue mentioned in PR87871#c30 - if compare
and move pattern benefits before RA, I checked the assembly generated
for SPEC2017 and found that certain insn sequences aren't converted to
"mr." instructions.
Following two sequence are never to be combined to "mr." pattern as
there is no register link between them. This patch adds two peephole2
patterns to convert them to "mr." instructions.

cmp 0,3,0
mr 4,3

mr 4,3
cmp 0,3,0

The patch also creates a new mode iterator which decided by
TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
In this situation, the "mr." should compares the whole 64-bit register
with 0 other than the low 32-bit one.

gcc/
* config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
(peephole2 for move_and_compare): New.
(mode_iterator WORD): New.  Set the mode to SI/DImode by
TARGET_POWERPC64.
(*mov_internal2): Change the mode iterator from P to WORD.
(split pattern for compare_and_move): Likewise.

gcc/testsuite/
* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..1f0fe85b9b5 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
 ; The size of a pointer.  Also, the size of the value that a record-condition
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+(define_mode_iterator WORD [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")])

 ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
 ; PTImode is GPR only)
@@ -7879,9 +7880,9 @@ (define_split

 (define_insn "*mov_internal2"
   [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand" "0,r,r")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
   ""
   "@
cmpi %2,%0,0
@@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2"
(set_attr "dot" "yes")
(set_attr "length" "4,4,8")])

+(define_peephole2
+  [(set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_operand:WORD 1 "int_reg_operand" "")
+   (const_int 0)))
+   (set (match_operand:WORD 0 "int_reg_operand" "")
+   (match_dup 1))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:WORD 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:WORD 0 "int_reg_operand" "")
+   (match_operand:WORD 1 "int_reg_operand" ""))
+   (set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_dup 1)
+   (const_int 0)))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
 (define_split
   [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand") (match_dup 1))]
   "reload_completed"
   [(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c 
b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
new file mode 100644
index 000..29234dea7c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-options "-O2 -mregnames" } */
+
+/* Following instruction sequence is found in assembly of
+   Perl_block_start, 

[PATCHv2, rs6000] Add two peephole2 patterns for mr. insn

2023-06-11 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds two peephole2 patterns which help convert certain insn
sequences to "mr." instruction. These insn sequences can't be combined in
combine pass.

  Compared to last version, it adds a new mode iterator "Q" which should
be used for dot instruction. With "-m32/-mpowerpc64" set, the dot
instruction should compare DImode with 0, not the SImode.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Add two peephole patterns for "mr." insn

When investigating the issue mentioned in PR87871#c30 - if compare
and move pattern benefits before RA, I checked the assembly generated
for SPEC2017 and found that certain insn sequences aren't converted to
"mr." instructions.
Following two sequence are never to be combined to "mr." pattern as
there is no register link between them. This patch adds two peephole2
patterns to convert them to "mr." instructions.

cmp 0,3,0
mr 4,3

mr 4,3
cmp 0,3,0

The patch also creates a new mode iterator which decided by
TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
In this situation, the "mr." should compares the whole 64-bit register
with 0 other than the low 32-bit one.

gcc/
* config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
(peephole2 for move_and_compare): New.
(mode_iterator Q): New.  Set the mode to SI/DImode by
TARGET_POWERPC64.
(*mov_internal2): Change the mode iterator from P to Q.
(split pattern for compare_and_move): Likewise.

gcc/testsuite/
* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..fdb5b6ed22a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
 ; The size of a pointer.  Also, the size of the value that a record-condition
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+(define_mode_iterator Q [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")])

 ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
 ; PTImode is GPR only)
@@ -7879,9 +7880,9 @@ (define_split

 (define_insn "*mov_internal2"
   [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+   (compare:CC (match_operand:Q 1 "gpc_reg_operand" "0,r,r")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+   (set (match_operand:Q 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
   ""
   "@
cmpi %2,%0,0
@@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2"
(set_attr "dot" "yes")
(set_attr "length" "4,4,8")])

+(define_peephole2
+  [(set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_operand:Q 1 "int_reg_operand" "")
+   (const_int 0)))
+   (set (match_operand:Q 0 "int_reg_operand" "")
+   (match_dup 1))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:Q 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:Q 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:Q 0 "int_reg_operand" "")
+   (match_operand:Q 1 "int_reg_operand" ""))
+   (set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_dup 1)
+   (const_int 0)))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:Q 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
 (define_split
   [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand")
+   (compare:CC (match_operand:Q 1 "gpc_reg_operand")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
+   (set (match_operand:Q 0 "gpc_reg_operand") (match_dup 1))]
   "reload_completed"
   [(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c 
b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
new file mode 100644
index 000..29234dea7c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-options "-O2 -mregnames" } */
+
+/* Following 

[PATCH, rs6000] Add two peephole2 patterns for mr. insn

2023-05-30 Thread HAO CHEN GUI via Gcc-patches
Hi,
  By checking the object files of SPECint, I found that two kinds of
compare/move can't be combined to "mr." pattern as there is no register
link between them. The patch adds two peephole2 patterns for them.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen



ChangeLog
rs6000: Add two peephole patterns for "mr." insn

Following two insns are never to be combined to "mr." pattern as there
is no register link between them.  So the patch adds these two peepholes.

cmp 0,3,0
mr 4,3

mr 4,3
cmp 0,3,0

gcc/
* config/rs6000/rs6000.md (peephole2 for compare and move): New.
(peephole2 for move and compare): New.

gcc/testsuite/
* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..b60230293f9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7891,6 +7891,36 @@ (define_insn "*mov_internal2"
(set_attr "dot" "yes")
(set_attr "length" "4,4,8")])

+(define_peephole2
+  [(set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_operand:P 1 "int_reg_operand" "")
+   (const_int 0)))
+   (set (match_operand:P 0 "int_reg_operand" "")
+   (match_dup 1))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:P 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:P 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:P 0 "int_reg_operand" "")
+   (match_operand:P 1 "int_reg_operand" ""))
+   (set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_dup 1)
+   (const_int 0)))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:P 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:P 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
 (define_split
   [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
(compare:CC (match_operand:P 1 "gpc_reg_operand")
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c 
b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
new file mode 100644
index 000..4e094c8fe74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
@@ -0,0 +1,47 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-options "-O2 -mregnames" } */
+
+int __RTL (startwith ("peephole2")) compare_move_peephole ()
+{
+(function "compare_move_peephole"
+  (insn-chain
+(block 2
+  (edge-from entry (flags "FALLTHRU"))
+  (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+  (cinsn 8 (set (reg:CC %cr0)
+(compare:CC (reg:SI %r3)
+(const_int 0
+  (cinsn 2 (set (reg:SI %r4)
+(reg:SI %r3)))
+  ;; Extra insn to avoid the above being deleted by DCE.
+  (cinsn 18 (use (reg:SI %r4)))
+  (cinsn 19 (use (reg:CC %cr0)))
+  (edge-to exit (flags "FALLTHRU"))
+) ;; block 2
+  ) ;; insn-chain
+) ;; function "main"
+}
+
+int __RTL (startwith ("peephole2")) move_compare_peephole ()
+{
+(function "move_compare_peephole"
+  (insn-chain
+(block 2
+  (edge-from entry (flags "FALLTHRU"))
+  (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+  (cinsn 2 (set (reg:SI %r4)
+(reg:SI %r3)))
+  (cinsn 8 (set (reg:CC %cr0)
+(compare:CC (reg:SI %r3)
+(const_int 0
+  ;; Extra insn to avoid the above being deleted by DCE.
+  (cinsn 18 (use (reg:SI %r4)))
+  (cinsn 19 (use (reg:CC %cr0)))
+  (edge-to exit (flags "FALLTHRU"))
+) ;; block 2
+  ) ;; insn-chain
+) ;; function "main"
+}
+
+/* { dg-final { scan-assembler-times {\mmr\.} 2 } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c 
b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c
new file mode 100644
index 000..511d6cc5317
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c
@@ -0,0 +1,47 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O2 -mregnames" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+int __RTL (startwith ("peephole2")) compare_move_peephole ()
+{
+(function "compare_move_peephole"
+  (insn-chain
+(block 2
+  (edge-from entry (flags "FALLTHRU"))
+  (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+  (cinsn 8 (set (reg:CC %cr0)
+(compare:CC (reg:DI %r3)
+(const_int 0
+  (cinsn 2 (set (reg:DI %r4)
+   

[PATCHv3, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

2023-05-25 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds a new insn for vector splat with small V2DI constants on P8.
If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
memory.

  Compared to last version, the main change is to set a default value for third
parameter of vspltisw_vupkhsw_constant_p and call the function with 2 arguments
when the third one doesn't matter.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-05-26  Haochen Gui 

gcc/
PR target/104124
* config/rs6000/altivec.md (*altivec_vupkhs_direct): Rename
to...
(altivec_vupkhs_direct): ...this.
* config/rs6000/constraints.md (wT constraint): New constant for a
vector constraint that can be loaded with vspltisw and vupkhsw.
* config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
predicate for wT constraint.
(easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
a vector constant can be synthesized with a vspltisw and a vupkhsw.
* config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
* config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): Call
* (vspltisw_vupkhsw_constant_p): New function to return true if OP
mode is V2DI and can be synthesized with vupkhsw and vspltisw.
* config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
constants with vspltisw and vupkhsw.

gcc/testsuite/
PR target/104124
* gcc.target/powerpc/pr104124.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 49b0c964f4d..2c932854c33 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vupkhs_direct"
+(define_insn "altivec_vupkhs_direct"
   [(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand: 1 "register_operand" "v")]
 UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index c4a6ccf4efb..e7f185660c0 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -144,6 +144,10 @@ (define_constraint "wS"
   "@internal Vector constant that can be loaded with XXSPLTIB & sign 
extension."
   (match_test "xxspltib_constant_split (op, mode)"))

+(define_constraint "wT"
+  "@internal Vector constant that can be loaded with vspltisw & vupkhsw."
+  (match_test "vspltisw_vupkhsw_constant_split (op, mode)"))
+
 ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form.
 ;; Used by LXSD/STXSD/LXSSP/STXSSP.  In contrast to "Y", the multiple-of-four
 ;; offset is enforced for 32-bit too.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..1ed770bffa6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -694,6 +694,14 @@ (define_predicate "xxspltib_constant_split"
   return num_insns > 1;
 })

+;; Return true if the operand is a constant that can be loaded with a vspltisw
+;; instruction and then a vupkhsw instruction.
+
+(define_predicate "vspltisw_vupkhsw_constant_split"
+  (match_code "const_vector")
+{
+  return vspltisw_vupkhsw_constant_p (op, mode);
+})

 ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
 ;; instruction.
@@ -742,6 +750,11 @@ (define_predicate "easy_vector_constant"
   && xxspltib_constant_p (op, mode, _insns, ))
return true;

+  /* V2DI constant within RANGE (-16, 15) can be synthesized with a
+vspltisw and a vupkhsw.  */
+  if (vspltisw_vupkhsw_constant_p (op, mode, ))
+   return true;
+
   return easy_altivec_constant (op, mode);
 }

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..00cb2d82953 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, 
rtx, int, int, int,

 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..ae34a02b282 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op,
   return true;
 }

+/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
+   instructions vupkhsw and vspltisw.
+
+ 

[PATCHv2, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

2023-05-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds a new insn for vector splat with small V2DI constants on P8.
If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
TOC.

  Compared to last version, the main change is to move the constant check from
easy_altivec_constant to easy_altivec_constant and remove some unnecessary mode
checks.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-05-04  Haochen Gui 

gcc/
PR target/104124
* config/rs6000/altivec.md (*altivec_vupkhs_direct): Rename
to...
(altivec_vupkhs_direct): ...this.
* config/rs6000/constraints.md (wT constraint): New constant for a
vector constraint that can be loaded with vspltisw and vupkhsw.
* config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
predicate for wT constraint.
(easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
a vector constant can be synthesized with a vspltisw and a vupkhsw.
* config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
* config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): Call
* (vspltisw_vupkhsw_constant_p): New function to return true if OP
mode is V2DI and can be synthesized with vupkhsw and vspltisw.
* config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
constants with vspltisw and vupkhsw.

gcc/testsuite/
PR target/104124
* gcc.target/powerpc/pr104124.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 49b0c964f4d..2c932854c33 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vupkhs_direct"
+(define_insn "altivec_vupkhs_direct"
   [(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand: 1 "register_operand" "v")]
 UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index c4a6ccf4efb..e7f185660c0 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -144,6 +144,10 @@ (define_constraint "wS"
   "@internal Vector constant that can be loaded with XXSPLTIB & sign 
extension."
   (match_test "xxspltib_constant_split (op, mode)"))

+(define_constraint "wT"
+  "@internal Vector constant that can be loaded with vspltisw & vupkhsw."
+  (match_test "vspltisw_vupkhsw_constant_split (op, mode)"))
+
 ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form.
 ;; Used by LXSD/STXSD/LXSSP/STXSSP.  In contrast to "Y", the multiple-of-four
 ;; offset is enforced for 32-bit too.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..ff0f625d508 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -694,6 +694,16 @@ (define_predicate "xxspltib_constant_split"
   return num_insns > 1;
 })

+;; Return true if the operand is a constant that can be loaded with a vspltisw
+;; instruction and then a vupkhsw instruction.
+
+(define_predicate "vspltisw_vupkhsw_constant_split"
+  (match_code "const_vector")
+{
+  int value;
+
+  return vspltisw_vupkhsw_constant_p (op, mode, );
+})

 ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
 ;; instruction.
@@ -742,6 +752,11 @@ (define_predicate "easy_vector_constant"
   && xxspltib_constant_p (op, mode, _insns, ))
return true;

+  /* V2DI constant within RANGE (-16, 15) can be synthesized with a
+vspltisw and a vupkhsw.  */
+  if (vspltisw_vupkhsw_constant_p (op, mode, ))
+   return true;
+
   return easy_altivec_constant (op, mode);
 }

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..ba39a73abf8 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, 
rtx, int, int, int,

 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..697b18e14f1 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op,
   return true;
 }

+/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
+   instructions vupkhsw and vspltisw.
+
+   Return the constant 

Ping [PATCHv2, rs6000] Merge two vector shift when their sources are the same

2023-04-23 Thread HAO CHEN GUI via Gcc-patches
Hi
  Gently ping this.
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612944.html

Thanks
Gui Haochen

在 2023/2/28 10:31, HAO CHEN GUI 写道:
> Hi,
>   This patch merges two "vsldoi" insns when their sources are the
> same. Particularly, it is simplified to be one move if the total
> shift is multiples of 16 bytes.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no
> regressions.
> 
> Thanks
> Gui Haochen
> 
> 
> ChangeLog
> 2023-02-28  Haochen Gui 
> 
> gcc/
>   * config/rs6000/altivec.md (*altivec_vsldoi_dup_): New
>   insn_and_split to merge two vsldoi when the sources are the same.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/vsldoi_merge.c: New.
> 
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 84660073f32..fae8ec2b2e8 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2529,6 +2529,35 @@ (define_insn "altivec_vsldoi_"
>"vsldoi %0,%1,%2,%3"
>[(set_attr "type" "vecperm")])
> 
> +(define_insn_and_split "*altivec_vsldoi_dup_"
> +  [(set (match_operand:VM 0 "register_operand" "=v")
> + (unspec:VM [(unspec:VM [(match_operand:VM 1 "register_operand" "v")
> + (match_dup 1)
> + (match_operand:QI 2 "immediate_operand" "i")]
> +UNSPEC_VSLDOI)
> + (unspec:VM [(match_dup 1)
> + (match_dup 1)
> + (match_dup 2)]
> +UNSPEC_VSLDOI)
> + (match_operand:QI 3 "immediate_operand" "i")]
> +UNSPEC_VSLDOI))]
> +  "TARGET_ALTIVEC"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  unsigned int shift1 = UINTVAL (operands[2]);
> +  unsigned int shift2 = UINTVAL (operands[3]);
> +
> +  unsigned int shift = (shift1 + shift2) % 16;
> +  if (shift)
> +emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1],
> +   operands[1], GEN_INT (shift)));
> +  else
> +emit_move_insn (operands[0], operands[1]);
> +  DONE;
> +})
> +
>  (define_insn "altivec_vupkhs"
>[(set (match_operand:VP 0 "register_operand" "=v")
>   (unspec:VP [(match_operand: 1 "register_operand" "v")]
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c 
> b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c
> new file mode 100644
> index 000..eebd7b4d382
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c
> @@ -0,0 +1,59 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx -save-temps" } */
> +
> +#include "altivec.h"
> +
> +#ifdef DEBUG
> +#include 
> +#endif
> +
> +void abort (void);
> +
> +__attribute__ ((noipa)) vector signed int
> +test1 (vector signed int a)
> +{
> +  a = vec_sld (a, a, 2);
> +  a = vec_sld (a, a, 6);
> +  return a;
> +}
> +
> +__attribute__ ((noipa)) vector signed int
> +test2 (vector signed int a)
> +{
> +  a = vec_sld (a, a, 14);
> +  a = vec_sld (a, a, 2);
> +  return a;
> +}
> +
> +int main (void)
> +{
> +  vector signed int a = {1,2,3,4};
> +  vector signed int result_a;
> +  int i;
> +
> +  result_a = test1 (a);
> +  vector signed int expect_a = {3,4,1,2};
> +
> +  for (i = 0; i< 4; i++)
> +if (result_a[i] != expect_a[i])
> +#ifdef DEBUG
> +  printf("ERROR: test1 result[%d] = %d, not expected[%d] = %d\n",
> +  i, result_a[i], i, expect_a[i]);
> +#else
> +  abort ();
> +#endif
> +
> +  result_a = test2 (a);
> +
> +  for (i = 0; i< 4; i++)
> +if (result_a[i] != a[i])
> +#ifdef DEBUG
> +  printf("ERROR: test2 result[%d] = %d, not expected[%d] = %d\n",
> +  i, result_a[i], i, a[i]);
> +#else
> +  abort ();
> +#endif
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvsldoi\M} 1 } } */


Ping^2 [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694]

2023-04-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gently ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html

Thanks
Gui Haochen

在 2023/2/20 10:10, HAO CHEN GUI 写道:
> Hi,
>   Gently ping this:
>   https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html
> 
> Gui Haochen
> Thanks
> 
> 在 2023/2/8 13:08, HAO CHEN GUI 写道:
>> Hi,
>>   The logical operations for TImode is split after reload pass right now. 
>> Some
>> potential optimizations miss as the split is too late. This patch removes
>> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical
>> operations can be split at expand pass. The new test case illustrates the
>> optimization.
>>
>>   Two test cases of pr92398 are merged into one as all sub-targets generates
>> the same sequence of instructions with the patch.
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>>
>> Thanks
>> Gui Haochen
>>
>>
>> ChangeLog
>> 2023-02-08  Haochen Gui 
>>
>> gcc/
>>  PR target/100694
>>  * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit
>>  vector types.
>>  (and3): Replace BOOL_128 with BOOL_128_V.
>>  (ior3): Likewise.
>>  (xor3): Likewise.
>>  (one_cmpl2 expander): New expander with BOOL_128_V.
>>  (one_cmpl2 insn_and_split): Rename to ...
>>  (*one_cmpl2): ... this.
>>
>> gcc/testsuite/
>>  PR target/100694
>>  * gcc.target/powerpc/pr100694.c: New.
>>  * gcc.target/powerpc/pr92398.c: New.
>>  * gcc.target/powerpc/pr92398.h: Remove.
>>  * gcc.target/powerpc/pr92398.p9-.c: Remove.
>>  * gcc.target/powerpc/pr92398.p9+.c: Remove.
>>
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index 4bd1dfd3da9..455b7329643 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128  [TI
>>   (V2DF  "TARGET_ALTIVEC")
>>   (V1TI  "TARGET_ALTIVEC")])
>>
>> +;; Mode iterator for logical operations on 128-bit vector types
>> +(define_mode_iterator BOOL_128_V[(V16QI "TARGET_ALTIVEC")
>> + (V8HI  "TARGET_ALTIVEC")
>> + (V4SI  "TARGET_ALTIVEC")
>> + (V4SF  "TARGET_ALTIVEC")
>> + (V2DI  "TARGET_ALTIVEC")
>> + (V2DF  "TARGET_ALTIVEC")
>> + (V1TI  "TARGET_ALTIVEC")])
>> +
>>  ;; For the GPRs we use 3 constraints for register outputs, two that are the
>>  ;; same as the output register, and a third where the output register is an
>>  ;; early clobber, so we don't have to deal with register overlaps.  For the
>> @@ -7135,23 +7144,23 @@ (define_expand "subti3"
>>  ;; 128-bit logical operations expanders
>>
>>  (define_expand "and3"
>> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
>> -(and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
>> -  (match_operand:BOOL_128 2 "vlogical_operand")))]
>> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
>> +(and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
>> +(match_operand:BOOL_128_V 2 "vlogical_operand")))]
>>""
>>"")
>>
>>  (define_expand "ior3"
>> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
>> -(ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
>> -  (match_operand:BOOL_128 2 "vlogical_operand")))]
>> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
>> +(ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
>> +(match_operand:BOOL_128_V 2 "vlogical_operand")))]
>>""
>>"")
>>
>>  (define_expand "xor3"
>> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
>> -(xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
>> -  (match_operand:BOOL_128 2 "vlogical_operand")))]
>> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
>> +(xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
>> +(match_operand:BOOL_128_V 2 "vlogical_operand")))]
>>""
>>"")
>>
>> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv3_internal2"
>>   (const_string "16")))])
>>
>>  ;; 128-bit one's complement
>> -(define_insn_and_split "one_cmpl2"
>> +(define_expand "one_cmpl2"
>> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=")
>> +(not:BOOL_128_V
>> +  (match_operand:BOOL_128_V 1 "vlogical_operand" "")))]
>> +  ""
>> +  "")
>> +
>> +(define_insn_and_split "*one_cmpl2"
>>[(set (match_operand:BOOL_128 0 "vlogical_operand" "=")
>>  (not:BOOL_128
>>(match_operand:BOOL_128 1 "vlogical_operand" "")))]
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr100694.c
>> new file mode 100644
>> index 

Re: [PATCH-4, rs6000] Change ilp32 target check for some scalar-extract-sig and scalar-insert-exp test cases

2023-04-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gently ping this.
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609372.html

Thanks
Gui Haochen

在 2023/1/4 14:17, HAO CHEN GUI 写道:
> Hi,
>   "ilp32" is used in these test cases to make sure test cases only run on a
> 32-bit environment. Unfortunately, these cases also run with
> "-m32/-mpowerpc64" which causes unexpected errors. This patch changes the
> target check to skip if "has_arch_ppc64" is set. So the test cases won't run
> when arch_ppc64 has been set.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> Gui Haochen
> 
> ChangeLog
> 2023-01-03  Haochen Gui  
> 
> gcc/testsuite/
>   * gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Replace ilp32 check
>   with dg-skip-if has_arch_ppc64.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Likewise.
> 
> patch.diff
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
> index 39ee74c94dc..148b5fbd9fa 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target ilp32 } */
> +/* { dg-skip-if "" { has_arch_ppc64 } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
> index efd69725905..956c1183beb 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target ilp32 } */
> +/* { dg-skip-if "" { has_arch_ppc64 } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
> index f85966a6fdf..9a7949fb89a 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target ilp32 } */
> +/* { dg-skip-if "" { has_arch_ppc64 } } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> 


Re: [PATCH-3, rs6000] Change mode and insn condition for scalar insert exp instruction

2023-04-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gently ping this.
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609371.html

Thanks
Gui Haochen

在 2023/1/4 14:17, HAO CHEN GUI 写道:
> Hi,
>   This patch changes the mode of exponent to GPR in scalar insert exp
> pattern, as the exponent can be put into a 32-bit register. Also the
> condition check is changed from TARGET_64BIT to TARGET_POWERPC64.
> 
>   The test cases are modified according to the changes of expand pattern.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> Gui Haochen
> 
> ChangeLog
> 2023-01-03  Haochen Gui  
> 
> gcc/
>   * config/rs6000/rs6000-builtins.def
>   (__builtin_vsx_scalar_insert_exp): Replace bif-pattern from xsiexpdp
>   to xsiexpdp_di.
>   (__builtin_vsx_scalar_insert_exp_dp): Replace bif-pattern from
>   xsiexpdpf to xsiexpdpf_di.
>   * config/rs6000/vsx.md (xsiexpdp): Rename to...
>   (xsiexpdp_): ..., set the mode of second operand to GPR and
>   replace TARGET_64BIT with TARGET_POWERPC64.
>   (xsiexpdpf): Rename to...
>   (xsiexpdpf_): ..., set the mode of second operand to GPR and
>   replace TARGET_64BIT with TARGET_POWERPC64.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check
>   with has_arch_ppc64.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 25647b7bdd2..b1b5002d7d9 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2854,10 +2854,10 @@
> 
>const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
>  unsigned long long);
> -VSIEDP xsiexpdp {}
> +VSIEDP xsiexpdp_di {}
> 
>const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long 
> long);
> -VSIEDPF xsiexpdpf {}
> +VSIEDPF xsiexpdpf_di {}
> 
>pure vsc __builtin_vsx_xl_len_r (void *, signed long);
>  XL_LEN_R xl_len_r {}
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 27e03a4cf6c..3376090cc6f 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -5137,22 +5137,22 @@ (define_insn "xsiexpqp_"
>[(set_attr "type" "vecmove")])
> 
>  ;; VSX Scalar Insert Exponent Double-Precision
> -(define_insn "xsiexpdp"
> +(define_insn "xsiexpdp_"
>[(set (match_operand:DF 0 "vsx_register_operand" "=wa")
>   (unspec:DF [(match_operand:DI 1 "register_operand" "r")
> - (match_operand:DI 2 "register_operand" "r")]
> + (match_operand:GPR 2 "register_operand" "r")]
>UNSPEC_VSX_SIEXPDP))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR && TARGET_POWERPC64"
>"xsiexpdp %x0,%1,%2"
>[(set_attr "type" "fpsimple")])
> 
>  ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
> -(define_insn "xsiexpdpf"
> +(define_insn "xsiexpdpf_"
>[(set (match_operand:DF 0 "vsx_register_operand" "=wa")
>   (unspec:DF [(match_operand:DF 1 "register_operand" "r")
> - (match_operand:DI 2 "register_operand" "r")]
> + (match_operand:GPR 2 "register_operand" "r")]
>UNSPEC_VSX_SIEXPDP))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR && TARGET_POWERPC64"
>"xsiexpdp %x0,%1,%2"
>[(set_attr "type" "fpsimple")])
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
> index d8243258a67..88d77564158 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target lp64 } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> 
>  /* This test should succeed only on 64-bit configurations.  */
>  #include 
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
> index 8260b107178..2f219ddc83a 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target lp64 } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power8" } */
> 

Ping [PATCH-2, rs6000] Change mode and insn condition for scalar extract sig instruction

2023-04-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gently ping this.
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609370.html

Thanks
Gui Haochen

在 2023/1/4 14:16, HAO CHEN GUI 写道:
> Hi,
>   This patch changes the return type of __builtin_vsx_scalar_extract_sig
> from const signed long to const signed long long, so that it can be called
> with "-m32/-mpowerpc64" option. The bif needs TARGET_POWERPC64 instead of
> TARGET_64BIT. So the condition check in the expander is changed.
> 
>   The test cases are modified according to the changes of expand pattern.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> Gui Haochen
> 
> ChangeLog
> 2023-01-03  Haochen Gui  
> 
> gcc/
>   * config/rs6000/rs6000-builtins.def
>   (__builtin_vsx_scalar_extract_sig): Set return type to const signed
>   long long.
>   * config/rs6000/vsx.md (xsxsigdp): Replace TARGET_64BIT with
>   TARGET_POWERPC64.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Replace lp64 check
>   with has_arch_ppc64.
>   * gcc.target/powerpc/bfp/scalar-extract-sig-1.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index a8f1d3f1b3d..25647b7bdd2 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2849,7 +2849,7 @@
>pure vsc __builtin_vsx_lxvl (const void *, signed long);
>  LXVL lxvl {}
> 
> -  const signed long __builtin_vsx_scalar_extract_sig (double);
> +  const signed long long __builtin_vsx_scalar_extract_sig (double);
>  VSESDP xsxsigdp {}
> 
>const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 229c26c3a61..27e03a4cf6c 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -5111,7 +5111,7 @@ (define_insn "xsxsigdp"
>[(set (match_operand:DI 0 "register_operand" "=r")
>   (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXSIG))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR && TARGET_POWERPC64"
>"xsxsigdp %0,%x1"
>[(set_attr "type" "integer")])
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
> index 637080652b7..d22f7d1b274 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target lp64 } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> 
>  /* This test should succeed only on 64-bit configurations.  */
>  #include 
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
> index f12eed3d9d5..64747d73a51 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target lp64 } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power8" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> 
>  /* This test should succeed only on 64-bit configurations.  */
>  #include 
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
> index c85072da138..561be53fb9b 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
> @@ -1,7 +1,7 @@
>  /* { dg-do run { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target lp64 } */
>  /* { dg-require-effective-target p9vector_hw } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> 
>  /* This test should succeed only on 64-bit configurations.  */
>  #include 


Ping [PATCH-1, rs6000] Change mode and insn condition for scalar extract exp instruction

2023-04-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gently ping this.
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609369.html

Thanks
Gui Haochen

在 2023/1/4 14:16, HAO CHEN GUI 写道:
> Hi,
>   This patch changes the return type of __builtin_vsx_scalar_extract_exp
> from const signed long to const signed int, as the exponent can be put in
> a signed int. It is also inline with the external interface definition of
> the bif. The mode of exponent operand in "xsxexpdp" is changed to GPR mode
> and TARGET_64BIT check is removed, as the instruction can be executed on
> a 32-bit environment.
> 
>   The test cases are modified according to the changes of expand pattern.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> Gui Haochen
> 
> ChangeLog
> 2022-12-23  Haochen Gui  
> 
> gcc/
>   * config/rs6000/rs6000-builtins.def
>   (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
>   int and set its bif-pattern to xsxexpdp_si, move it from power9-64 to
>   power9 catalog.
>   * config/rs6000/vsx.md (xsxexpdp): Rename to ...
>   (xsxexpdp_): ..., set mode of operand 0 to GPR and remove
>   TARGET_64BIT check.
>   * doc/extend.texi (scalar_extract_exp): Remove 64-bit environment
>   requirement when it has a 64-bit argument.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check.
>   * gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is
>   invalid.
>   * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Remove lp64 check.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index f76f54793d7..a8f1d3f1b3d 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2833,6 +2833,8 @@
>const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128);
>  TSTSFI_OV_TD dfptstsfi_unordered_td {}
> 
> +  const signed int  __builtin_vsx_scalar_extract_exp (double);
> +VSEEDP xsxexpdp_si {}
> 
>  [power9-64]
>void __builtin_altivec_xst_len_r (vsc, void *, long);
> @@ -2847,9 +2849,6 @@
>pure vsc __builtin_vsx_lxvl (const void *, signed long);
>  LXVL lxvl {}
> 
> -  const signed long __builtin_vsx_scalar_extract_exp (double);
> -VSEEDP xsxexpdp {}
> -
>const signed long __builtin_vsx_scalar_extract_sig (double);
>  VSESDP xsxsigdp {}
> 
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 992fbc983be..229c26c3a61 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -5089,11 +5089,11 @@ (define_insn "xsxexpqp_"
>[(set_attr "type" "vecmove")])
> 
>  ;; VSX Scalar Extract Exponent Double-Precision
> -(define_insn "xsxexpdp"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> - (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
> +(define_insn "xsxexpdp_"
> +  [(set (match_operand:GPR 0 "register_operand" "=r")
> + (unspec:GPR [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXEXPDP))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR"
>"xsxexpdp %0,%x1"
>[(set_attr "type" "integer")])
> 
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index d3812fa55b0..7c087967234 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -19598,7 +19598,10 @@ bool scalar_test_neg (double source);
>  bool scalar_test_neg (__ieee128 source);
>  @end smallexample
> 
> -The @code{scalar_extract_exp} and @code{scalar_extract_sig}
> +The @code{scalar_extract_exp} with a 64-bit source argument
> +function requires an environment supporting ISA 3.0 or later.
> +The @code{scalar_extract_exp} with a 128-bit source argument
> +and @code{scalar_extract_sig}
>  functions require a 64-bit environment supporting ISA 3.0 or later.
>  The @code{scalar_extract_exp} and @code{scalar_extract_sig} built-in
>  functions return the significand and the biased exponent value
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
> index 35bf1b240f3..d971833748e 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
> @@ -1,9 +1,7 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target lp64 } */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> 
> -/* This test should succeed only on 64-bit configurations.  */
>  #include 
> 
>  unsigned int
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c
> index 9737762c1d4..1cb438f9b70 100644
> --- 

PING^2 [PATCH, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

2023-04-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601909.html

Thanks
Gui Haochen

在 2022/12/14 13:30, HAO CHEN GUI 写道:
> Hi,
>Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601909.html
> 
> Thanks
> Gui Haochen
> 
> 在 2022/9/21 13:13, HAO CHEN GUI 写道:
>> Hi,
>>   This patch adds a new insn for vector splat with small V2DI constants on 
>> P8.
>> If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be 
>> loaded
>> with vspltisw and vupkhsw on P8. It should be efficient than loading vector 
>> from
>> TOC.
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-09-21 Haochen Gui 
>>
>> gcc/
>>  PR target/104124
>>  * config/rs6000/altivec.md (*altivec_vupkhs_direct): Renamed
>>  to...
>>  (altivec_vupkhs_direct): ...this.
>>  * config/rs6000/constraints.md (wT constraint): New constant for a
>>  vector constraint that can be loaded with vspltisw and vupkhsw.
>>  * config/rs6000/predicates.md (vspltisw_constant_split): New
>>  predicate for wT constraint.
>>  * config/rs6000/rs6000-protos.h (vspltisw_constant_p): Add declaration.
>>  * config/rs6000/rs6000.cc (easy_altivec_constant): Call
>>  vspltisw_constant_p to judge if a V2DI constant can be synthesized with
>>  a vspltisw and a vupkhsw.
>>  * (vspltisw_constant_p): New function to return true if OP mode is
>>  V2DI and can be synthesized with ISA 2.07 instruction vupkhsw and
>>  vspltisw.
>>  * gcc/config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
>>  constants with vspltisw and vupkhsw.
>>
>> gcc/testsuite/
>>  PR target/104124
>>  * gcc.target/powerpc/p8-splat.c: New.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
>> index 2c4940f2e21..185414df021 100644
>> --- a/gcc/config/rs6000/altivec.md
>> +++ b/gcc/config/rs6000/altivec.md
>> @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs"
>>  }
>>[(set_attr "type" "vecperm")])
>>
>> -(define_insn "*altivec_vupkhs_direct"
>> +(define_insn "altivec_vupkhs_direct"
>>[(set (match_operand:VP 0 "register_operand" "=v")
>>  (unspec:VP [(match_operand: 1 "register_operand" "v")]
>>   UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
>> diff --git a/gcc/config/rs6000/constraints.md 
>> b/gcc/config/rs6000/constraints.md
>> index 5a44a92142e..f65dea6e0c7 100644
>> --- a/gcc/config/rs6000/constraints.md
>> +++ b/gcc/config/rs6000/constraints.md
>> @@ -150,6 +150,10 @@ (define_constraint "wS"
>>"@internal Vector constant that can be loaded with XXSPLTIB & sign 
>> extension."
>>(match_test "xxspltib_constant_split (op, mode)"))
>>
>> +(define_constraint "wT"
>> +  "@internal Vector constant that can be loaded with vspltisw & vupkhsw."
>> +  (match_test "vspltisw_constant_split (op, mode)"))
>> +
>>  ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update 
>> form.
>>  ;; Used by LXSD/STXSD/LXSSP/STXSSP.  In contrast to "Y", the 
>> multiple-of-four
>>  ;; offset is enforced for 32-bit too.
>> diff --git a/gcc/config/rs6000/predicates.md 
>> b/gcc/config/rs6000/predicates.md
>> index b1fcc69bb60..00cf60bbe58 100644
>> --- a/gcc/config/rs6000/predicates.md
>> +++ b/gcc/config/rs6000/predicates.md
>> @@ -694,6 +694,19 @@ (define_predicate "xxspltib_constant_split"
>>return num_insns > 1;
>>  })
>>
>> +;; Return true if the operand is a constant that can be loaded with a 
>> vspltisw
>> +;; instruction and then a vupkhsw instruction.
>> +
>> +(define_predicate "vspltisw_constant_split"
>> +  (match_code "const_vector,vec_duplicate")
>> +{
>> +  int value = 32;
>> +
>> +  if (!vspltisw_constant_p (op, mode, ))
>> +return false;
>> +
>> +  return true;
>> +})
>>
>>  ;; Return 1 if the operand is constant that can loaded directly with a 
>> XXSPLTIB
>>  ;; instruction.
>> diff --git a/gcc/config/rs6000/rs6000-protos.h 
>> b/gcc/config/rs6000/rs6000-protos.h
>> index b3c16e7448d..45f3d044eee 100644
>> --- a/gcc/config/rs6000/rs6000-protos.h
>> +++ b/gcc/config/rs6000/rs6000-protos.h
>> @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, 
>> rtx, int, int, int,
>>
>>  extern int easy_altivec_constant (rtx, machine_mode);
>>  extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
>> +extern bool vspltisw_constant_p (rtx, machine_mode, int *);
>>  extern int vspltis_shifted (rtx);
>>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index df491bee2ea..984624026c2 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -6292,6 +6292,12 @@ easy_altivec_constant (rtx op, machine_mode mode)
>>&& INTVAL (CONST_VECTOR_ELT (op, 1)) 

[PATCH 2/2, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]

2023-04-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch xfails a float128 comparison test case on powerpc64
that fails due to a longstanding issue with floating-point
compares.

  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more
information.

  The patch passed regression test on Power Linux platforms.

Thanks
Gui Haochen

ChangeLog
rs6000: xfail float128 comparison test case that fails on powerpc64.

This patch xfails a float128 comparison test cases on powerpc64 that
fails due to a longstanding issue with floating-point compares.

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information.

When float128 hardware is enabled (-mfloat128-hardware), xscmpuqp is
generated for comparison which is unexpected.   When float128 software
simulation is enabled (-mno-float128-hardware), we still have to xfail
the hardware version (__lekf2_hw) which finally invokes xscmpuqp.

gcc/testsuite/
PR target/108728
* gcc.dg/torture/float128-cmp-invalid.c: Add xfail.

patch.diff
diff --git a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c 
b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
index 1f675efdd61..a86592b3328 100644
--- a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
+++ b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
@@ -1,5 +1,6 @@
 /* Test for "invalid" exceptions from __float128 comparisons.  */
 /* { dg-do run } */
+/* { dg-xfail-run-if "ppc float128_hw" { ppc_float128_hw || { 
ppc_cpu_supports_hw && p9vector_hw } } } */
 /* { dg-options "" } */
 /* { dg-require-effective-target __float128 } */
 /* { dg-require-effective-target base_quadfloat_support } */


[PATCH 2/1, rs6000] make ppc_cpu_supports_hw as effective target keyword [PR108728]

2023-04-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds ppc_cpu_supports_hw into explicit name checking in
proc is-effective-target-keyword. So ppc_cpu_supports_hw can be used
as a target selector in test directives. It's required by patch2 of
this issue.

Thanks
Gui Haochen

ChangeLog
testsuite: make ppc_cpu_supports_hw as effective target keyword [PR108728]

gcc/testsuite/
PR target/108728
* lib/target-supports.exp (is-effective-target-keyword): Add
ppc_cpu_supports_hw.


patch.diff
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 1d6cc6f8d88..e65b447663f 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9170,6 +9170,7 @@ proc is-effective-target-keyword { arg } {
  "named_sections" { return 1 }
  "gc_sections"{ return 1 }
  "cxa_atexit" { return 1 }
+ "ppc_cpu_supports_hw" { return 1 }
  default  { return 0 }
}
 }


[PATCH-1, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]

2023-04-17 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch xfails a float128 comparison test case on powerpc64
that fails due to a longstanding issue with floating-point
compares.

  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more
information.

  The case is xfailed when instructions of float128 hardware are
generated. When software simulation is used, the case should pass.

  The patch passed regression test on Power Linux platforms.

Thanks
Gui Haochen

ChangeLog
rs6000: xfail float128 comparison test case that fails on powerpc64.

This patch xfails a float128 comparison test cases on powerpc64 that
fails due to a longstanding issue with floating-point compares.

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information.

gcc/testsuite/
PR target/108728
* gcc.dg/torture/float128-cmp-invalid.c: Add xfail.

patch.diff
diff --git a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c 
b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
index 1f675efdd61..7b520d1f9f1 100644
--- a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
+++ b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
@@ -1,5 +1,5 @@
 /* Test for "invalid" exceptions from __float128 comparisons.  */
-/* { dg-do run } */
+/* { dg-do run { xfail { ppc_float128_hw || { ppc_cpu_supports_hw && 
p9vector_hw } } } } */
 /* { dg-options "" } */
 /* { dg-require-effective-target __float128 } */
 /* { dg-require-effective-target base_quadfloat_support } */


[PATCH-2, rs6000] Add ppc_cpu_supports_hw into proc is-effective-target-keyword [PR108728]

2023-04-17 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds ppc_cpu_supports_hw into explicit name checking in
proc is-effective-target-keyword. So ppc_cpu_supports_hw can be used
as a target selector in test directives.

  The patch passed regression test on Power Linux platforms.

Thanks
Gui Haochen

ChangeLog
rs6000: Add ppc_cpu_supports_hw into proc is-effective-target-keyword.

gcc/testsuite/
PR target/108728
* lib/target-supports.exp (is-effective-target-keyword): Add
ppc_cpu_supports_hw.


patch.diff
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 1d6cc6f8d88..e65b447663f 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9170,6 +9170,7 @@ proc is-effective-target-keyword { arg } {
  "named_sections" { return 1 }
  "gc_sections"{ return 1 }
  "cxa_atexit" { return 1 }
+ "ppc_cpu_supports_hw" { return 1 }
  default  { return 0 }
}
 }


Re: [PATCH, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]

2023-04-13 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

在 2023/4/13 16:32, Kewen.Lin 写道:
> xfail all powerpc*-*-* can have some XPASSes on those ENVs with
> software emulation.  Since the related hw insn xscmpuqp is guarded
> with TARGET_FLOAT128_HW, could we use the effective target
> ppc_float128_hw instead?

Thanks for your review comments. It's tricky. It invokes "__lekf2"
with "-mno-float128_hw". But it doesn't always pass the check.
With math library on P8, it can. With the library on P9, it fails.
So it's totally depended on the version of library which is not
controlled by GCC. What's your opinion?

Test result on P9
make check-gcc-c RUNTESTFLAGS="--target_board=unix'{-mno-float128-hardware}' 
dg-torture.exp=float128-cmp-invalid.c"

FAIL: gcc.dg/torture/float128-cmp-invalid.c   -O0  execution test
FAIL: gcc.dg/torture/float128-cmp-invalid.c   -O1  execution test
FAIL: gcc.dg/torture/float128-cmp-invalid.c   -O2  execution test
FAIL: gcc.dg/torture/float128-cmp-invalid.c   -O3 -g  execution test
FAIL: gcc.dg/torture/float128-cmp-invalid.c   -Os  execution test
FAIL: gcc.dg/torture/float128-cmp-invalid.c   -O2 -flto -fno-use-linker-plugin 
-flto-partition=none  execution test
FAIL: gcc.dg/torture/float128-cmp-invalid.c   -O2 -flto -fuse-linker-plugin 
-fno-fat-lto-objects  execution test

=== gcc Summary ===

# of expected passes7
# of unexpected failures7

Gui Haochen
Thanks


[PATCH, rs6000] xfail float128 comparison test case that fails on powerpc64 [PR108728]

2023-04-11 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch xfails a float128 comparison test case on powerpc64 that
fails due to a longstanding issue with floating-point compares.

  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more
information.

  The patch passed regression test on Power Linux platforms.

Thanks
Gui Haochen

ChangeLog
rs6000: xfail float128 comparison test case that fails on powerpc64.

This patch xfails a float128 comparison test case on powerpc64 that
fails due to a longstanding issue with floating-point compares.

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684 for more information.

gcc/testsuite/
PR target/108728
* gcc.dg/torture/float128-cmp-invalid.c: Add xfail.

patch.diff
diff --git a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c 
b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
index 1f675efdd61..f52686e0a24 100644
--- a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
+++ b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
@@ -1,5 +1,5 @@
 /* Test for "invalid" exceptions from __float128 comparisons.  */
-/* { dg-do run } */
+/* { dg-do run { xfail { powerpc*-*-* } } } */
 /* { dg-options "" } */
 /* { dg-require-effective-target __float128 } */
 /* { dg-require-effective-target base_quadfloat_support } */


[PATCHv3, rs6000] rs6000: correct vector sign extend built-ins on Big Endian [PR108812]

2023-04-05 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch removes byte reverse operation before vector integer sign
extension on big endian. These built-ins require to sign extend the element
of the input vector that would fall in the least significant portion of the
result element. So both BE and LE should do the same operation and the byte
reversion is no need. This patch fixes it. Now these built-ins have the same
behavior on all compilers.

  The unnecessary expand patterns are removed and the names of insn pattern
are set to the same style. Also the test cases are modified.

  The patch passed regression test on Power Linux platforms.

Thanks
Gui Haochen

ChangeLog
rs6000: correct vector sign extend builtins on Big Endian

gcc/
PR target/108812
* config/rs6000/vsx.md (vsx_sign_extend_qi_): Rename to...
(vsx_sign_extend_v16qi_): ... this.
(vsx_sign_extend_hi_): Rename to...
(vsx_sign_extend_v8hi_): ... this.
(vsx_sign_extend_si_v2di): Rename to...
(vsx_sign_extend_v4si_v2di): ... this.
(vsignextend_qi_): Remove.
(vsignextend_hi_): Remove.
(vsignextend_si_v2di): Remove.
(vsignextend_v2di_v1ti): Remove.
(*xxspltib__split): Replace gen_vsx_sign_extend_qi_v2di with
gen_vsx_sign_extend_v16qi_v2di and gen_vsx_sign_extend_qi_v4si
with gen_vsx_sign_extend_v16qi_v4si.
* config/rs6000/rs6000.md (split for DI constant generation):
Replace gen_vsx_sign_extend_qi_si with gen_vsx_sign_extend_v16qi_si.
(split for HSDI constant generation): Replace gen_vsx_sign_extend_qi_di
with gen_vsx_sign_extend_v16qi_di and gen_vsx_sign_extend_qi_si
with gen_vsx_sign_extend_v16qi_si.
* config/rs6000/rs6000-builtins.def (__builtin_altivec_vsignextsb2d):
Set bif-pattern to vsx_sign_extend_v16qi_v2di.
(__builtin_altivec_vsignextsb2w): Set bif-pattern to
vsx_sign_extend_v16qi_v4si.
(__builtin_altivec_visgnextsh2d): Set bif-pattern to
vsx_sign_extend_v8hi_v2di.
(__builtin_altivec_vsignextsh2w): Set bif-pattern to
vsx_sign_extend_v8hi_v4si.
(__builtin_altivec_vsignextsw2d): Set bif-pattern to
vsx_sign_extend_si_v2di.
(__builtin_altivec_vsignext): Set bif-pattern to
vsx_sign_extend_v2di_v1ti.
* config/rs6000/rs6000-builtin.cc (lxvrse_expand_builtin): Replace
gen_vsx_sign_extend_qi_v2di with gen_vsx_sign_extend_v16qi_v2di,
gen_vsx_sign_extend_hi_v2di with gen_vsx_sign_extend_v8hi_v2di and
gen_vsx_sign_extend_si_v2di with gen_vsx_sign_extend_v4si_v2di.

gcc/testsuite/
PR target/108812
* gcc.target/powerpc/p9-sign_extend-runnable.c: Set corresponding
expected vectors for Big Endian.
* gcc.target/powerpc/int_128bit-runnable.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 90ab39dc258..c66cff17681 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2840,17 +2840,17 @@ lxvrse_expand_builtin (rtx target, insn_code icode, rtx 
*op,
   if (icode == CODE_FOR_vsx_lxvrbx)
 {
   temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
-  emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+  emit_insn (gen_vsx_sign_extend_v16qi_v2di (discratch, temp1));
 }
   else if (icode == CODE_FOR_vsx_lxvrhx)
 {
   temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
-  emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+  emit_insn (gen_vsx_sign_extend_v8hi_v2di (discratch, temp1));
 }
   else if (icode == CODE_FOR_vsx_lxvrwx)
 {
   temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
-  emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+  emit_insn (gen_vsx_sign_extend_v4si_v2di (discratch, temp1));
 }
   else if (icode == CODE_FOR_vsx_lxvrdx)
 discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..6bfe9246a02 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2687,19 +2687,19 @@
 VRLWNM altivec_vrlwnm {}

   const vsll __builtin_altivec_vsignextsb2d (vsc);
-VSIGNEXTSB2D vsignextend_qi_v2di {}
+VSIGNEXTSB2D vsx_sign_extend_v16qi_v2di {}

   const vsi __builtin_altivec_vsignextsb2w (vsc);
-VSIGNEXTSB2W vsignextend_qi_v4si {}
+VSIGNEXTSB2W vsx_sign_extend_v16qi_v4si {}

   const vsll __builtin_altivec_visgnextsh2d (vss);
-VSIGNEXTSH2D vsignextend_hi_v2di {}
+VSIGNEXTSH2D vsx_sign_extend_v8hi_v2di {}

   const vsi __builtin_altivec_vsignextsh2w (vss);
-VSIGNEXTSH2W vsignextend_hi_v4si {}
+VSIGNEXTSH2W vsx_sign_extend_v8hi_v4si {}

   const vsll __builtin_altivec_vsignextsw2d (vsi);
-VSIGNEXTSW2D vsignextend_si_v2di {}
+VSIGNEXTSW2D vsx_sign_extend_v4si_v2di {}

[PATCHv2, rs6000] rs6000: correct vector sign extend built-ins on Big Endian [PR108812]

2023-03-28 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch removes byte reverse operation before vector integer sign
extension on big endian. These built-ins require to sign extend the element
of the input vector that would fall in the least significant portion of the
result element. So both BE and LE should do the same operation and the byte
reversion is no need. This patch fixes it. Now these built-ins have the same
behavior on all compilers.

  The unnecessary expand patterns are removed and the names of insn pattern
are set to the same style. Also the test case is modified.

  The patch passed regression test on Power Linux platforms.

Thanks
Gui Haochen

ChangeLog
rs6000: correct vector sign extend builtins on Big Endian

gcc/
PR target/108812
* config/rs6000/vsx.md (vsx_sign_extend_qi_): Rename to...
(vsx_sign_extend_v16qi_): ... this.
(vsx_sign_extend_hi_): Rename to...
(vsx_sign_extend_v8hi_): ... this.
(vsx_sign_extend_si_v2di): Rename to...
(vsx_sign_extend_v4si_v2di): ... this.
(vsignextend_qi_): Remove.
(vsignextend_hi_): Remove.
(vsignextend_si_v2di): Remove.
(*xxspltib__split): Replace gen_vsx_sign_extend_qi_v2di with
gen_vsx_sign_extend_v16qi_v2di and gen_vsx_sign_extend_qi_v4si
with gen_vsx_sign_extend_v16qi_v4si.
* config/rs6000/rs6000.md (split for DI constant generation):
Replace gen_vsx_sign_extend_qi_si with gen_vsx_sign_extend_v16qi_si.
(split for HSDI constant generation): Replace gen_vsx_sign_extend_qi_di
with gen_vsx_sign_extend_v16qi_di and gen_vsx_sign_extend_qi_si
with gen_vsx_sign_extend_v16qi_si.
* config/rs6000/rs6000-builtins.def (__builtin_altivec_vsignextsb2d):
Set bif-pattern to vsx_sign_extend_v16qi_v2di.
(__builtin_altivec_vsignextsb2w): Set bif-pattern to
vsx_sign_extend_v16qi_v4si.
(__builtin_altivec_visgnextsh2d): Set bif-pattern to
vsx_sign_extend_v8hi_v2di.
(__builtin_altivec_vsignextsh2w): Set bif-pattern to
vsx_sign_extend_v8hi_v4si.
(__builtin_altivec_vsignextsw2d): Set bif-pattern to
vsx_sign_extend_si_v2di.
* config/rs6000/rs6000-builtin.cc (lxvrse_expand_builtin): Replace
gen_vsx_sign_extend_qi_v2di with gen_vsx_sign_extend_v16qi_v2di,
gen_vsx_sign_extend_hi_v2di with gen_vsx_sign_extend_v8hi_v2di and
gen_vsx_sign_extend_si_v2di with gen_vsx_sign_extend_v4si_v2di.

gcc/testsuite/
PR target/108812
* gcc.target/powerpc/p9-sign_extend-runnable.c: Set different expected
vectors for Big Endian.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 90ab39dc258..c66cff17681 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2840,17 +2840,17 @@ lxvrse_expand_builtin (rtx target, insn_code icode, rtx 
*op,
   if (icode == CODE_FOR_vsx_lxvrbx)
 {
   temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
-  emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+  emit_insn (gen_vsx_sign_extend_v16qi_v2di (discratch, temp1));
 }
   else if (icode == CODE_FOR_vsx_lxvrhx)
 {
   temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
-  emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+  emit_insn (gen_vsx_sign_extend_v8hi_v2di (discratch, temp1));
 }
   else if (icode == CODE_FOR_vsx_lxvrwx)
 {
   temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
-  emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+  emit_insn (gen_vsx_sign_extend_v4si_v2di (discratch, temp1));
 }
   else if (icode == CODE_FOR_vsx_lxvrdx)
 discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..55e9cf9ece9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2687,19 +2687,19 @@
 VRLWNM altivec_vrlwnm {}

   const vsll __builtin_altivec_vsignextsb2d (vsc);
-VSIGNEXTSB2D vsignextend_qi_v2di {}
+VSIGNEXTSB2D vsx_sign_extend_v16qi_v2di {}

   const vsi __builtin_altivec_vsignextsb2w (vsc);
-VSIGNEXTSB2W vsignextend_qi_v4si {}
+VSIGNEXTSB2W vsx_sign_extend_v16qi_v4si {}

   const vsll __builtin_altivec_visgnextsh2d (vss);
-VSIGNEXTSH2D vsignextend_hi_v2di {}
+VSIGNEXTSH2D vsx_sign_extend_v8hi_v2di {}

   const vsi __builtin_altivec_vsignextsh2w (vss);
-VSIGNEXTSH2W vsignextend_hi_v4si {}
+VSIGNEXTSH2W vsx_sign_extend_v8hi_v4si {}

   const vsll __builtin_altivec_vsignextsw2d (vsi);
-VSIGNEXTSW2D vsignextend_si_v2di {}
+VSIGNEXTSW2D vsx_sign_extend_v4si_v2di {}

   const vsc __builtin_altivec_vslv (vsc, vsc);
 VSLV vslv {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6011f5bf76a..17b5cd171b1 100644
--- 

Re: [PATCH] [rs6000] Correct match pattern in pr56605.c

2023-03-27 Thread HAO CHEN GUI via Gcc-patches
Kewen,
  The case still fails with trunk.

FAIL: gcc.target/powerpc/pr56605.c scan-rtl-dump-times combine "\\(compare:CC 
\\((?:and|zero_extend):(?:[SD]I) \\((?:sub)?reg:[SD]I" 1

=== gcc Summary ===

# of expected passes1
# of unexpected failures1

  With the trunk, it should match the pattern.
(compare:CC (and:SI (subreg:SI (reg:DI 207) 0)

Thanks
Gui Haochen


在 2023/3/27 15:41, Kewen.Lin 写道:
> Hi Alexandre and Haochen,
> 
> on 2023/3/25 16:42, Alexandre Oliva via Gcc-patches wrote:
>>
>> Ping https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html
>>
>> From: Haochen Gui 
>>
>> This patch corrects the match pattern in pr56605.c. The former pattern
>> is wrong and test case fails with GCC11. It should match following
>> insn on each subtarget after mode promotion is disabled. The patch
>> need to be backported to GCC11.
> 
> Comment https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102146#c21 made me
> feel that this test issue was just in branches, but this proposed patch
> seems to say it still exists on trunk, could you confirm that?
> 
> BR,
> Kewen
> 
>>
>> //gimple
>> _17 = (unsigned int) _20;
>>  prolog_loop_niters.4_23 = _17 & 3;
>>
>> //rtl
>> (insn 19 18 20 2 (parallel [
>> (set (reg:CC 208)
>> (compare:CC (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3]))
>> (const_int 0 [0])))
>> (set (reg:SI 129 [ prolog_loop_niters.5 ])
>> (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3])))
>> ]) 197 {*andsi3_imm_mask_dot2}
>>
>> Rebased.  Regstrapped on ppc64-linux-gnu.  Also tested with
>> ppc64-vxworks7r2 (gcc-12), where it's also needed.  Ok to install?
>>
>>
>> for  gcc/testsuite/ChangeLog
>>
>>  PR target/102146
>>  * gcc.target/powerpc/pr56605.c: Correct match pattern in
>>  combine pass.
>> ---
>>  gcc/testsuite/gcc.target/powerpc/pr56605.c |3 +--
>>  1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr56605.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> index 7695f87db6f66..651a88e3cc7f9 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> @@ -11,5 +11,4 @@ void foo (short* __restrict sb, int* __restrict ia)
>>  ia[i] = (int) sb[i];
>>  }
>>  
>> -/* { dg-final { scan-rtl-dump-times {\(compare:CC 
>> \((?:and|zero_extend):(?:[SD]I) \((?:sub)?reg:[SD]I} 1 "combine" } } */
>> -
>> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>> \(reg:DI} 1 "combine" } } */
> 


[PATCH, rs6000] rs6000: correct vector sign extend built-ins on Big Endian [PR108812]

2023-03-27 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch removes byte reverse operation before vector integer sign
extension on Big Endian. These built-ins require to sign extend the rightmost
element. So both BE and LE should do the same operation and the byte reversion
is no need. This patch fixes it. Now these built-ins have the same behavior on
all compilers. The test case is modified also.

  The patch passed regression test on Power Linux platforms.

Thanks
Gui Haochen

ChangeLog
rs6000: correct vector sign extend builtins on Big Endian

gcc/
PR target/108812
* config/rs6000/vsx.md (vsignextend_qi_): Remove byte reverse
for Big Endian.
(vsignextend_hi_): Likewise.
(vsignextend_si_v2di): Remove.
* config/rs6000/rs6000-builtins.def (__builtin_altivec_vsignextsw2d):
Set bif-pattern to vsx_sign_extend_si_v2di.

gcc/testsuite/
PR target/108812
* gcc.target/powerpc/p9-sign_extend-runnable.c: Set different expected
vectors for Big Endian.


patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..059a455b388 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2699,7 +2699,7 @@
 VSIGNEXTSH2W vsignextend_hi_v4si {}

   const vsll __builtin_altivec_vsignextsw2d (vsi);
-VSIGNEXTSW2D vsignextend_si_v2di {}
+VSIGNEXTSW2D vsx_sign_extend_si_v2di {}

   const vsc __builtin_altivec_vslv (vsc, vsc);
 VSLV vslv {}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 992fbc983be..9e9b33f56ab 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4941,14 +4941,7 @@ (define_expand "vsignextend_qi_"
 UNSPEC_VSX_SIGN_EXTEND))]
   "TARGET_P9_VECTOR"
 {
-  if (BYTES_BIG_ENDIAN)
-{
-  rtx tmp = gen_reg_rtx (V16QImode);
-  emit_insn (gen_altivec_vrevev16qi2(tmp, operands[1]));
-  emit_insn (gen_vsx_sign_extend_qi_(operands[0], tmp));
-}
-  else
-emit_insn (gen_vsx_sign_extend_qi_(operands[0], operands[1]));
+  emit_insn (gen_vsx_sign_extend_qi_(operands[0], operands[1]));
   DONE;
 })

@@ -4968,14 +4961,7 @@ (define_expand "vsignextend_hi_"
 UNSPEC_VSX_SIGN_EXTEND))]
   "TARGET_P9_VECTOR"
 {
-  if (BYTES_BIG_ENDIAN)
-{
-  rtx tmp = gen_reg_rtx (V8HImode);
-  emit_insn (gen_altivec_vrevev8hi2(tmp, operands[1]));
-  emit_insn (gen_vsx_sign_extend_hi_(operands[0], tmp));
-}
-  else
- emit_insn (gen_vsx_sign_extend_hi_(operands[0], operands[1]));
+  emit_insn (gen_vsx_sign_extend_hi_(operands[0], operands[1]));
   DONE;
 })

@@ -4987,24 +4973,6 @@ (define_insn "vsx_sign_extend_si_v2di"
   "vextsw2d %0,%1"
   [(set_attr "type" "vecexts")])

-(define_expand "vsignextend_si_v2di"
-  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
-   (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
-UNSPEC_VSX_SIGN_EXTEND))]
-  "TARGET_P9_VECTOR"
-{
-  if (BYTES_BIG_ENDIAN)
-{
-   rtx tmp = gen_reg_rtx (V4SImode);
-
-   emit_insn (gen_altivec_vrevev4si2(tmp, operands[1]));
-   emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], tmp));
-}
-  else
- emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], operands[1]));
-  DONE;
-})
-
 ;; Sign extend DI to TI.  We provide both GPR targets and Altivec targets on
 ;; power10.  On earlier systems, the machine independent code will generate a
 ;; shift left to sign extend the 64-bit value to 128-bit.
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c
index fdcad019b96..03c0f1201e4 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c
@@ -34,7 +34,12 @@ int main ()
   /* test sign extend byte to word */
   vec_arg_qi = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
 -1, -2, -3, -4, -5, -6, -7, -8};
+
+#ifdef __BIG_ENDIAN__
+  vec_expected_wi = (vector signed int) {4, 8, -4, -8};
+#else
   vec_expected_wi = (vector signed int) {1, 5, -1, -5};
+#endif

   vec_result_wi = vec_signexti (vec_arg_qi);

@@ -54,7 +59,12 @@ int main ()
   /* test sign extend byte to double */
   vec_arg_qi = (vector signed char){1, 2, 3, 4, 5, 6, 7, 8,
-1, -2, -3, -4, -5, -6, -7, -8};
+
+#ifdef __BIG_ENDIAN__
+  vec_expected_di = (vector signed long long int){8, -8};
+#else
   vec_expected_di = (vector signed long long int){1, -1};
+#endif

   vec_result_di = vec_signextll(vec_arg_qi);

@@ -72,7 +82,12 @@ int main ()

   /* test sign extend short to word */
   vec_arg_hi = (vector signed short int){1, 2, 3, 4, -1, -2, -3, -4};
+
+#ifdef __BIG_ENDIAN__
+  vec_expected_wi = (vector signed int){2, 4, -2, -4};
+#else
   vec_expected_wi = (vector signed int){1, 3, -1, -3};
+#endif

   vec_result_wi = vec_signexti(vec_arg_hi);

@@ -90,7 +105,12 @@ int main ()

   /* test sign 

Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-21 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2023/3/16 15:57, Richard Biener 写道:
> I'm not sure if careful constraints massaging like adding magic letters to
> alternatives with constants to pessimize them for LRA, making them
> more expensive than spilling the constant to a register but avoid
> secondary reloads with spilling a register to the stack to make room
> for the constant, is possible - but in theory a special constraint modifier
> for this purpose could be invented.

I have made some tests on constraint modifiers. They all seems not work.
By checking the code, I found that the no reloading is always better than
reloading in LRA. So there is no way to spill the constant to register in
LRA.

  /* If this alternative can be made to work by reloading, and it
 needs less reloading than the others checked so far, record
 it as the chosen goal for reloading.  */
  if ((best_losers != 0 && losers == 0)
  || (((best_losers == 0 && losers == 0)
   || (best_losers != 0 && losers != 0))
  && (best_overall > overall
  || (best_overall == overall
 ... // set goal_alt

Looking forward to your advice.

Thanks
Gui Haochen


[PATCHv4, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-21 Thread HAO CHEN GUI via Gcc-patches
Hi,
  I refined the patch according to reviewer's advice. The main change is to
check if buffer_p is set and buffered error exists. Also two regtests are
fixed by catching the new error.

  I sent out the revised one for review due to my limited knowledge on
Fortran front end.

  The patch escalates the failure when Hollerith constant to real conversion
fails in native_interpret_expr. It finally reports an "Cannot simplify
expression" error in do_simplify method.

  The patch for pr95450 added a verification for decoding/encoding checking
in native_interpret_expr. native_interpret_expr may fail on real type
conversion and returns a NULL tree then. But upper layer calls don't handle
the failure so that an ICE is reported when the verification fails.

  IBM long double is an example. It doesn't have a unique memory presentation
for some real values. So it may not pass the verification. The new test
case shows the problem.

  errorcount is used to check if an error is already reported or not when
getting a bad expr. Buffered errors need to be excluded as they don't
increase error count either.

  The patch passed regression test on Power and x86 linux platforms.

Thanks
Gui Haochen

ChangeLog
2023-03-21  Haochen Gui 

gcc/
PR target/103628
* fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
native_interpret_expr gets a NULL tree.
* fortran/arith.cc (gfc_hollerith2real): Return NULL when
gfc_interpret_float fails.
* fortran/error.cc (gfc_buffered_p): Define.
* fortran/gfortran.h (gfc_buffered_p): Declare.
* fortran/intrinsic.cc: Add diagnostic.h to include list.
(do_simplify): Save errorcount and check it at finish.  Report a
"Cannot simplify expression" error on a bad result if error count
doesn't change and no other errors buffered.

gcc/testsuite/
PR target/103628
* gfortran.dg/assumed_size_refs_2.f90: Catch "Cannot simplify
expression" error.
* gfortran.dg/unpack_field_1.f90: Likewise.
* gfortran.dg/pr103628.f90: New.

Co-Authored-By: Tobias Burnus 


patch.diff
diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index c0d12cfad9d..d3d38c7eb6a 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
   result = gfc_get_constant_expr (BT_REAL, kind, >where);

   hollerith2representation (result, src);
-  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
-  result->representation.length, result->value.real);
-
-  return result;
+  if (gfc_interpret_float (kind,
+  (unsigned char *) result->representation.string,
+  result->representation.length, result->value.real))
+return result;
+  else
+return NULL;
 }

 /* Convert character to real.  The constant will be padded or truncated.  */
diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc
index 214fb78ba7b..872d42e731e 100644
--- a/gcc/fortran/error.cc
+++ b/gcc/fortran/error.cc
@@ -49,6 +49,13 @@ static gfc_error_buffer error_buffer;
 static output_buffer *pp_error_buffer, *pp_warning_buffer;
 static int warningcount_buffered, werrorcount_buffered;

+/* Return buffered_p.  */
+bool
+gfc_buffered_p (void)
+{
+  return buffered_p;
+}
+
 /* Return true if there output_buffer is empty.  */

 static bool
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 219ef8c7612..edfe11796a6 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3328,6 +3328,7 @@ void gfc_internal_error (const char *, ...) 
ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC
 void gfc_clear_error (void);
 bool gfc_error_check (void);
 bool gfc_error_flag_test (void);
+bool gfc_buffered_p (void);

 notification gfc_notification_std (int);
 bool gfc_notify_std (int, const char *, ...) ATTRIBUTE_GCC_GFC(2,3);
diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc
index e89131f5a71..2572b7a3448 100644
--- a/gcc/fortran/intrinsic.cc
+++ b/gcc/fortran/intrinsic.cc
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "options.h"
 #include "gfortran.h"
 #include "intrinsic.h"
+#include "diagnostic.h" /* For errorcount.  */

 /* Namespace to hold the resolved symbols for intrinsic subroutines.  */
 static gfc_namespace *gfc_intrinsic_namespace;
@@ -4620,6 +4621,7 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)
 {
   gfc_expr *result, *a1, *a2, *a3, *a4, *a5, *a6;
   gfc_actual_arglist *arg;
+  int old_errorcount = errorcount;

   /* Max and min require special handling due to the variable number
  of args.  */
@@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)

 finish:
   if (result == _bad_expr)
-return false;
+{
+  if (errorcount == old_errorcount
+ && (!gfc_buffered_p () && !gfc_error_flag_test ()))
+   gfc_error ("Cannot simplify expression at %L", >where);
+ 

Ping [PATCHv3, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-19 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gently ping this:
  https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613497.html

Thanks
Gui Haochen

在 2023/3/7 16:55, HAO CHEN GUI 写道:
> Hi,
>   The patch escalates the failure when Hollerith constant to real conversion
> fails in native_interpret_expr. It finally reports an "Cannot simplify
> expression" error in do_simplify method.
> 
>   The patch of pr95450 added a verification for decoding/encoding checking
> in native_interpret_expr. native_interpret_expr may fail on real type
> conversion and returns a NULL tree then. But upper layer calls don't handle
> the failure so that an ICE is reported when the verification fails.
> 
>   IBM long double is an example. It doesn't have a unique memory presentation
> for some real values. So it may not pass the verification. The new test
> case shows the problem.
> 
>   errorcount is used to check if an error is already reported or not when
> getting a bad expr. Buffered errors need to be excluded as they don't
> increase error count either.
> 
>   The patch passed regression test on Power and x86 linux platforms.
> 
> Gui Haochen
> Thanks
> 
> ChangeLog
> 2023-03-07  Haochen Gui 
> 
> gcc/
>   PR target/103628
>   * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
>   native_interpret_expr gets a NULL tree.
>   * fortran/arith.cc (gfc_hollerith2real): Return NULL when
>   gfc_interpret_float fails.
>   * fortran/error.cc (gfc_buffered_p): Define.
>   * fortran/gfortran.h (gfc_buffered_p): Declare.
>   * fortran/intrinsic.cc: Add diagnostic.h to include list.
>   (do_simplify): Save errorcount and check it at finish.  Report a
>   "Cannot simplify expression" error on a bad result if error count
>   doesn't change and no other errors buffered.
> 
> gcc/testsuite/
>   PR target/103628
>   * gfortran.dg/pr103628.f90: New.
> 
> Co-Authored-By: Tobias Burnus 
> 
> patch.diff
> diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
> index c0d12cfad9d..d3d38c7eb6a 100644
> --- a/gcc/fortran/arith.cc
> +++ b/gcc/fortran/arith.cc
> @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
>result = gfc_get_constant_expr (BT_REAL, kind, >where);
> 
>hollerith2representation (result, src);
> -  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
> -result->representation.length, result->value.real);
> -
> -  return result;
> +  if (gfc_interpret_float (kind,
> +(unsigned char *) result->representation.string,
> +result->representation.length, result->value.real))
> +return result;
> +  else
> +return NULL;
>  }
> 
>  /* Convert character to real.  The constant will be padded or truncated.  */
> diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc
> index 214fb78ba7b..872d42e731e 100644
> --- a/gcc/fortran/error.cc
> +++ b/gcc/fortran/error.cc
> @@ -49,6 +49,13 @@ static gfc_error_buffer error_buffer;
>  static output_buffer *pp_error_buffer, *pp_warning_buffer;
>  static int warningcount_buffered, werrorcount_buffered;
> 
> +/* Return buffered_p.  */
> +bool
> +gfc_buffered_p (void)
> +{
> +  return buffered_p;
> +}
> +
>  /* Return true if there output_buffer is empty.  */
> 
>  static bool
> diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
> index 219ef8c7612..edfe11796a6 100644
> --- a/gcc/fortran/gfortran.h
> +++ b/gcc/fortran/gfortran.h
> @@ -3328,6 +3328,7 @@ void gfc_internal_error (const char *, ...) 
> ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC
>  void gfc_clear_error (void);
>  bool gfc_error_check (void);
>  bool gfc_error_flag_test (void);
> +bool gfc_buffered_p (void);
> 
>  notification gfc_notification_std (int);
>  bool gfc_notify_std (int, const char *, ...) ATTRIBUTE_GCC_GFC(2,3);
> diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc
> index e89131f5a71..9d049001a51 100644
> --- a/gcc/fortran/intrinsic.cc
> +++ b/gcc/fortran/intrinsic.cc
> @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "options.h"
>  #include "gfortran.h"
>  #include "intrinsic.h"
> +#include "diagnostic.h" /* For errorcount.  */
> 
>  /* Namespace to hold the resolved symbols for intrinsic subroutines.  */
>  static gfc_namespace *gfc_intrinsic_namespace;
> @@ -4620,6 +4621,7 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)
>  {
>gfc_expr *result, *a1, *a2, *a3, *a4, *a5, *a6;
>gfc_actual_arglist *arg;
> +  int old_errorcount = errorcount;
> 
>/* Max and min require special handling due to the variable number
>   of args.  */
> @@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)
> 
>  finish:
>if (result == _bad_expr)
> -return false;
> +{
> +  if (errorcount == old_errorcount
> +   && (gfc_buffered_p () && !gfc_error_flag_test ()))
> +   gfc_error ("Cannot simplify expression at %L", >where);
> +  return false;
> +}
> 
>if (result == NULL)
> 

Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-16 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2023/3/16 18:36, Richard Biener 写道:
> On Thu, Mar 16, 2023 at 10:04 AM HAO CHEN GUI  wrote:
>>
>> Hi Richard,
>>
>> 在 2023/3/16 15:57, Richard Biener 写道:
>>> So this is one way around the lack of CSE/PRE of constant operands.  I'd
>>> argue that a better spot for this _might_ be LRA (split the constant out if
>>> there's a free register available), postreload-[g]cse (CSE the constants) 
>>> and
>>> then maybe cprop_hardreg to combine back single-use constants?
>>>
>>> I'm not sure if careful constraints massaging like adding magic letters to
>>> alternatives with constants to pessimize them for LRA, making them
>>> more expensive than spilling the constant to a register but avoid
>>> secondary reloads with spilling a register to the stack to make room
>>> for the constant, is possible - but in theory a special constraint modifier
>>> for this purpose could be invented.
>>
>> Thanks so much for your advice.
>>
>> cse/gcse doesn't take cost of constant set (the def insn of the constant) 
>> into
>> consideration. So it won't replace the register with a constant as it costs 1
>> insn with the register and costs 2 insn with the constant.
> 
> I think it does (and should) cost the constant set (IIRC we had some
> improvements
> there, or at least proposed, during this stage1).  But sure - this is why your
> "trick" works.
> 
It's doable if post-reload gsc costs the constant set. I will draft a patch to
test it.

>> Finally, the single-
>> use constants can't be back to 2 insn.
> 
> And that's because of the issue you point out above?
No. my original concern is the constant can't be back. If post-reload gsc doen't
cost the constant set, the insn with a register always cost less than two insns
with immediates. Commonly the constant set itself costs 2 insn also.
> 
>> Not sure if I understand it correctly.
>> Looking forward to your advice.
> 
> My main point is that CSEing constants has impacts on register pressure
> and thus should probably be done after or within register allocation.  RTL
> expansion itself is probably a bad time to pro-actively split out constants
> even more if, as you say, nothing puts them back.
> 
I agree. Thank a lot.
> Richard.
> 
>> Thanks
>> Gui Haochen

Gui Haochen


Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-16 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2023/3/16 15:57, Richard Biener 写道:
> So this is one way around the lack of CSE/PRE of constant operands.  I'd
> argue that a better spot for this _might_ be LRA (split the constant out if
> there's a free register available), postreload-[g]cse (CSE the constants) and
> then maybe cprop_hardreg to combine back single-use constants?
> 
> I'm not sure if careful constraints massaging like adding magic letters to
> alternatives with constants to pessimize them for LRA, making them
> more expensive than spilling the constant to a register but avoid
> secondary reloads with spilling a register to the stack to make room
> for the constant, is possible - but in theory a special constraint modifier
> for this purpose could be invented.

Thanks so much for your advice.

cse/gcse doesn't take cost of constant set (the def insn of the constant) into
consideration. So it won't replace the register with a constant as it costs 1
insn with the register and costs 2 insn with the constant. Finally, the single-
use constants can't be back to 2 insn. Not sure if I understand it correctly.
Looking forward to your advice.

Thanks
Gui Haochen


[PATCH-2, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-15 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The background and motivation of the patch are listed in the note of
PATCH-1.

  This patch changes the expander of ior/xor and force constant to a pseudo
when it needs 2 insn. Also a combine and split pattern for ior/xor is defined.
rtx_cost of ior insn is adjusted as now it may have 2 insns for certain
constants. We need to check the cost of each operand.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-03-14  Haochen Gui 

gcc/
* gcc/config/rs6000/rs6000.cc (rs6000_rtx_costs): Check the cost of
each operand for IOR as it may have 2 insn for certain constants.
* config/rs6000/rs6000.md (3): Put the second operand into
register when it's a constant and need 2 ior/xor insns.
(split for ior/xor): Remove.
(*_2insn): New insn_and split pattern for 2-insn ior/xor.

gcc/testsuite/
* gcc.target/powerpc/pr86106.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b3a609f3aa3..f53daff547f 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -22081,10 +22081,6 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   return false;

 case IOR:
-  /* FIXME */
-  *total = COSTS_N_INSNS (1);
-  return true;
-
 case CLZ:
 case XOR:
 case ZERO_EXTRACT:
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dba41e3df90..0541f48c42a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3892,7 +3892,8 @@ (define_expand "3"
   DONE;
 }

-  if (non_logical_cint_operand (operands[2], mode))
+  if (non_logical_cint_operand (operands[2], mode)
+  && !can_create_pseudo_p ())
 {
   rtx tmp = ((!can_create_pseudo_p ()
  || rtx_equal_p (operands[0], operands[1]))
@@ -3907,15 +3908,17 @@ (define_expand "3"
   DONE;
 }

-  if (!reg_or_logical_cint_operand (operands[2], mode))
+  if (!logical_operand (operands[2], mode))
 operands[2] = force_reg (mode, operands[2]);
 })

-(define_split
-  [(set (match_operand:GPR 0 "gpc_reg_operand")
-   (iorxor:GPR (match_operand:GPR 1 "gpc_reg_operand")
-   (match_operand:GPR 2 "non_logical_cint_operand")))]
+(define_insn_and_split "*_2insn"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+   (iorxor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+   (match_operand:GPR 2 "non_logical_cint_operand" "n")))]
   ""
+  "#"
+  "&& (!reload_completed || rtx_equal_p (operands[0], operands[1]))"
   [(set (match_dup 3)
(iorxor:GPR (match_dup 1)
(match_dup 4)))
@@ -3933,7 +3936,8 @@ (define_split

   operands[4] = GEN_INT (hi);
   operands[5] = GEN_INT (lo);
-})
+}
+  [(set_attr "length" "8")])

 (define_insn "*bool3_imm"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86106.c 
b/gcc/testsuite/gcc.target/powerpc/pr86106.c
new file mode 100644
index 000..71501476800
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr86106.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-prefixed" } */
+
+unsigned int
+foo (unsigned int val)
+{
+  unsigned int mask = 0x7f7f7f7f;
+
+  return ~(((val & mask) + mask) | val | mask);
+}
+
+/* { dg-final { scan-assembler-not {\maddis\M} } } */
+/* { dg-final { scan-assembler-not {\maddi\M} } } */
+/* { dg-final { scan-assembler-not {\moris\M} } } */


[PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-15 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Currently, rs6000 directly expands to 2 insns if an integer constant is the
second operand and it needs two insns. For example, addi/addis and ori/oris.
It may not benefit when the constant is used for more than 2 times in an
extended basic block, just like the case in PR shows.

  One possible solution is to force the constant in pseudo at expand and let
propagation pass and combine pass decide if the pseudo should be replaced
with the constant or not by comparing the rtx/insn cost.

  It generates a constant move if the constant is forced to a pseudo. There
is one constant move if it's used only once. The combine pass can combine
the constant move and add/ior/xor insn and eliminate the move as the insn
cost reduces. There are multiple moves if the constant is used for several
times. In an extended basic block, these constant moves are merged to one by
propagation pass. The combine pass can't replace the pseudo with the constant
as it is no cost saving.

  In an extreme case, the constant is used twice in an extended basic block.
The cost(latency) is unchanged between putting constant in pseudo and
generating 2 insns. The dependence of instructions reduces but one more
register is used. In other case, it should be always optimal to put constant
in a pseudo.

  This patch changes the expander of integer add and force constant to a
pseudo when it needs 2 insn. Also a combine and split pattern is defined.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-03-14  Haochen Gui 

gcc/
* config/rs6000/predicates.md (add_2insn_cint_operand): New predicate
which returns true when op is a 32-bit but not a 16-bit signed
integer constant.
* config/rs6000/rs6000.md (add3): Put the second operand into
register when it's a constant and need 2 add insns.
(*add_2insn): New insn_and_split for 2-insn add.


patch.diff
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index a1764018545..09e59a48cd3 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -282,6 +282,13 @@ (define_predicate "s32bit_cint_operand"
   (and (match_code "const_int")
(match_test "(0x8000 + UINTVAL (op)) >> 32 == 0")))

+;; Return 1 if op is a 32-bit but not 16-bit constant signed integer
+(define_predicate "add_2insn_cint_operand"
+  (and (match_code "const_int")
+   (and (match_operand 0 "s32bit_cint_operand")
+   (and (not (match_operand 0 "short_cint_operand"))
+(not (match_operand 0 "upper16_cint_operand"))
+
 ;; Return 1 if op is a constant 32-bit unsigned
 (define_predicate "c32bit_cint_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6011f5bf76a..dba41e3df90 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1796,12 +1796,44 @@ (define_expand "add3"
   /* The ordering here is important for the prolog expander.
 When space is allocated from the stack, adding 'low' first may
 produce a temporary deallocation (which would be bad).  */
-  emit_insn (gen_add3 (tmp, operands[1], GEN_INT (rest)));
-  emit_insn (gen_add3 (operands[0], tmp, GEN_INT (low)));
-  DONE;
+  if (!can_create_pseudo_p ())
+   {
+ emit_insn (gen_add3 (tmp, operands[1], GEN_INT (rest)));
+ emit_insn (gen_add3 (operands[0], tmp, GEN_INT (low)));
+ DONE;
+   }
+
+  operands[2] = force_reg (mode, operands[2]);
 }
 })

+/* The ordering here is important for the prolog expander.
+   When space is allocated from the stack, adding 'low' first may
+   produce a temporary deallocation (which would be bad).  */
+
+(define_insn_and_split "*add_2insn"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=b")
+   (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%b")
+ (match_operand:GPR 2 "add_2insn_cint_operand" "n")))]
+  "!TARGET_PREFIXED"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (plus:GPR (match_dup 1)
+ (match_dup 3)))
+   (set (match_dup 0)
+   (plus:GPR (match_dup 0)
+ (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = sext_hwi (val, 16);
+  HOST_WIDE_INT rest = trunc_int_for_mode (val - low, mode);
+
+  operands[3] = GEN_INT (rest);
+  operands[4] = GEN_INT (low);
+}
+  [(set_attr "length" "8")])
+
 (define_insn "*add3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r")
(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b,b")


Re: [PATCH] testsuite, rs6000: Adjust ppc-fortran.exp to support dg-{warning,error}

2023-03-10 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,
  I tested it with my fortran test case. It works. Thanks a lot.

Gui Haochen

在 2023/3/6 17:27, Kewen.Lin 写道:
> Hi,
> 
> According to Haochen's finding in [1], currently ppc-fortran.exp
> doesn't support Fortran specific warning or error messages well.
> By looking into it, it's due to that gfortran uses some different
> warning/error prefixes as follows:
> 
> set gcc_warning_prefix "\[Ww\]arning:"
> set gcc_error_prefix "(Fatal )?\[Ee\]rror:"
> 
> comparing to:
> 
> set gcc_warning_prefix "warning:"
> set gcc_error_prefix "(fatal )?error:"
> 
> So this is to override these two prefixes and make it support
> dg-{warning,error} checks.
> 
> Tested on powerpc64-linux-gnu P7/P8/P9 and
> powerpc64le-linux-gnu P9/P10.
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613302.html
> 
> BR,
> Kewen
> -
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/ppc-fortran/ppc-fortran.exp: Override
>   gcc_{warning,error}_prefix with Fortran specific one used in
>   gfortran_init.
> ---
>  gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp 
> b/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp
> index a556d7b48a3..f7e99ac8487 100644
> --- a/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp
> +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fortran/ppc-fortran.exp
> @@ -58,6 +58,11 @@ proc dg-compile-aux-modules { args } {
>  }
>  }
> 
> +# Override gcc_{warning,error}_prefix with Fortran specific prefixes used
> +# in gfortran_init to support dg-{warning,error} checks.
> +set gcc_warning_prefix "\[Ww\]arning:"
> +set gcc_error_prefix "(Fatal )?\[Ee\]rror:"
> +
>  # Main loop.
>  gfortran-dg-runtest [lsort \
> [glob -nocomplain $srcdir/$subdir/*.\[fF\]{,90,95,03,08} ] ] "" 
> $DEFAULT_FFLAGS
> --
> 2.39.1


[PATCHv3, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch escalates the failure when Hollerith constant to real conversion
fails in native_interpret_expr. It finally reports an "Cannot simplify
expression" error in do_simplify method.

  The patch of pr95450 added a verification for decoding/encoding checking
in native_interpret_expr. native_interpret_expr may fail on real type
conversion and returns a NULL tree then. But upper layer calls don't handle
the failure so that an ICE is reported when the verification fails.

  IBM long double is an example. It doesn't have a unique memory presentation
for some real values. So it may not pass the verification. The new test
case shows the problem.

  errorcount is used to check if an error is already reported or not when
getting a bad expr. Buffered errors need to be excluded as they don't
increase error count either.

  The patch passed regression test on Power and x86 linux platforms.

Gui Haochen
Thanks

ChangeLog
2023-03-07  Haochen Gui 

gcc/
PR target/103628
* fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
native_interpret_expr gets a NULL tree.
* fortran/arith.cc (gfc_hollerith2real): Return NULL when
gfc_interpret_float fails.
* fortran/error.cc (gfc_buffered_p): Define.
* fortran/gfortran.h (gfc_buffered_p): Declare.
* fortran/intrinsic.cc: Add diagnostic.h to include list.
(do_simplify): Save errorcount and check it at finish.  Report a
"Cannot simplify expression" error on a bad result if error count
doesn't change and no other errors buffered.

gcc/testsuite/
PR target/103628
* gfortran.dg/pr103628.f90: New.

Co-Authored-By: Tobias Burnus 

patch.diff
diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index c0d12cfad9d..d3d38c7eb6a 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
   result = gfc_get_constant_expr (BT_REAL, kind, >where);

   hollerith2representation (result, src);
-  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
-  result->representation.length, result->value.real);
-
-  return result;
+  if (gfc_interpret_float (kind,
+  (unsigned char *) result->representation.string,
+  result->representation.length, result->value.real))
+return result;
+  else
+return NULL;
 }

 /* Convert character to real.  The constant will be padded or truncated.  */
diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc
index 214fb78ba7b..872d42e731e 100644
--- a/gcc/fortran/error.cc
+++ b/gcc/fortran/error.cc
@@ -49,6 +49,13 @@ static gfc_error_buffer error_buffer;
 static output_buffer *pp_error_buffer, *pp_warning_buffer;
 static int warningcount_buffered, werrorcount_buffered;

+/* Return buffered_p.  */
+bool
+gfc_buffered_p (void)
+{
+  return buffered_p;
+}
+
 /* Return true if there output_buffer is empty.  */

 static bool
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 219ef8c7612..edfe11796a6 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3328,6 +3328,7 @@ void gfc_internal_error (const char *, ...) 
ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC
 void gfc_clear_error (void);
 bool gfc_error_check (void);
 bool gfc_error_flag_test (void);
+bool gfc_buffered_p (void);

 notification gfc_notification_std (int);
 bool gfc_notify_std (int, const char *, ...) ATTRIBUTE_GCC_GFC(2,3);
diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc
index e89131f5a71..9d049001a51 100644
--- a/gcc/fortran/intrinsic.cc
+++ b/gcc/fortran/intrinsic.cc
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "options.h"
 #include "gfortran.h"
 #include "intrinsic.h"
+#include "diagnostic.h" /* For errorcount.  */

 /* Namespace to hold the resolved symbols for intrinsic subroutines.  */
 static gfc_namespace *gfc_intrinsic_namespace;
@@ -4620,6 +4621,7 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)
 {
   gfc_expr *result, *a1, *a2, *a3, *a4, *a5, *a6;
   gfc_actual_arglist *arg;
+  int old_errorcount = errorcount;

   /* Max and min require special handling due to the variable number
  of args.  */
@@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)

 finish:
   if (result == _bad_expr)
-return false;
+{
+  if (errorcount == old_errorcount
+ && (gfc_buffered_p () && !gfc_error_flag_test ()))
+   gfc_error ("Cannot simplify expression at %L", >where);
+  return false;
+}

   if (result == NULL)
 resolve_intrinsic (specific, e);   /* Must call at run-time */
diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc
index 7ce7d736629..0c47aa6b842 100644
--- a/gcc/fortran/target-memory.cc
+++ b/gcc/fortran/target-memory.cc
@@ -416,11 +416,14 @@ gfc_interpret_float (int kind, unsigned char *buffer, 
size_t buffer_size,
 mpfr_t real)
 

Re: [PATCHv2, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-03 Thread HAO CHEN GUI via Gcc-patches
Hi Tobias,

在 2023/3/3 17:29, Tobias Burnus 写道:
> But could you also include the 'gcc/fortran/intrinsic.cc' change
> proposed in
> https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613030.html (and
> acknowledged by Steve)? 

Sure, I will merge it into the patch and do the regression test.

Additionally, Kewen suggested:
>> Since this test case is powerpc only, I think it can be moved to 
>> gcc/testsuite/gcc.target/powerpc/ppc-fortran.
> 
> Which sounds reasonable.

Test cases under gcc.target are tested by check-gcc-c. It greps "warning"
and "error" (C style, lower case) from the output while check-gcc-fortran
greps "Warning" and "Error" (upper case). As the test case needs to check
the "Warning" and "Error" messages. I have to put it in gfortran.dg
directory. What's your opinion?

Gui Haochen
Thanks


Re: [PATCHv2, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch passed regression test on Power linux platforms. Sorry for missing
the information.

Gui Haochen

在 2023/3/3 17:12, HAO CHEN GUI via Gcc-patches 写道:
> Hi,
>   The patch escalates the failure when Hollerith constant to real conversion
> fails in native_interpret_expr. It finally reports an "Unclassifiable
> statement" error.
> 
>   The patch of pr95450 added a verification for decoding/encoding checking
> in native_interpret_expr. native_interpret_expr may fail on real type
> conversion and returns a NULL tree then. But upper layer calls don't handle
> the failure so that an ICE is reported when the verification fails.
> 
>   IBM long double is an example. It doesn't have a unique memory presentation
> for some real values. So it may not pass the verification. The new test
> case shows the problem.
> 
>   Compared to last version, this version moves the mpfr_init after NULL tree
> test and fixes the format problem according to Tobias's advice. Thanks a lot.
> 
> Gui Haochen
> Thanks
> 
> ChangeLog
> 2023-03-01  Haochen Gui 
> 
> gcc/
>   PR target/103628
>   * fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
>   native_interpret_expr gets a NULL tree.
>   * fortran/arith.cc (gfc_hollerith2real): Return NULL when
>   gfc_interpret_float fails.
> 
> gcc/testsuite/
>   PR target/103628
>   * gfortran.dg/pr103628.f90: New.
> 
> patch.diff
> diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
> index c0d12cfad9d..d3d38c7eb6a 100644
> --- a/gcc/fortran/arith.cc
> +++ b/gcc/fortran/arith.cc
> @@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
>result = gfc_get_constant_expr (BT_REAL, kind, >where);
> 
>hollerith2representation (result, src);
> -  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
> -result->representation.length, result->value.real);
> -
> -  return result;
> +  if (gfc_interpret_float (kind,
> +(unsigned char *) result->representation.string,
> +result->representation.length, result->value.real))
> +return result;
> +  else
> +return NULL;
>  }
> 
>  /* Convert character to real.  The constant will be padded or truncated.  */
> diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc
> index 7ce7d736629..0c47aa6b842 100644
> --- a/gcc/fortran/target-memory.cc
> +++ b/gcc/fortran/target-memory.cc
> @@ -416,11 +416,14 @@ gfc_interpret_float (int kind, unsigned char *buffer, 
> size_t buffer_size,
>mpfr_t real)
>  {
>gfc_set_model_kind (kind);
> -  mpfr_init (real);
> -  gfc_conv_tree_to_mpfr (real,
> -  native_interpret_expr (gfc_get_real_type (kind),
> - buffer, buffer_size));
> 
> +  tree source = native_interpret_expr (gfc_get_real_type (kind), buffer,
> +buffer_size);
> +  if (!source)
> +return 0;
> +
> +  mpfr_init (real);
> +  gfc_conv_tree_to_mpfr (real, source);
>return size_float (kind);
>  }
> 
> diff --git a/gcc/testsuite/gfortran.dg/pr103628.f90 
> b/gcc/testsuite/gfortran.dg/pr103628.f90
> new file mode 100644
> index 000..e49aefc18fd
> --- /dev/null
> +++ b/gcc/testsuite/gfortran.dg/pr103628.f90
> @@ -0,0 +1,14 @@
> +! { dg-do compile { target powerpc*-*-* } }
> +! { dg-options "-O2 -mabi=ibmlongdouble" }
> +
> +! Test to ensure that it reports an "Unclassifiable statement" error
> +! instead of throwing an ICE when the memory represent of the HOLLERITH
> +! string is not unique with ibm long double encoding.
> +
> +program main
> +  integer, parameter :: k = 16
> +  real(kind = k):: b = 4h1234
> +end program main
> +
> +! { dg-warning "Conversion from HOLLERITH" "warning" { target powerpc*-*-* } 
> 10 }
> +! { dg-error "Unclassifiable statement" "error" { target powerpc*-*-* } 10 }


[PATCHv2, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch escalates the failure when Hollerith constant to real conversion
fails in native_interpret_expr. It finally reports an "Unclassifiable
statement" error.

  The patch of pr95450 added a verification for decoding/encoding checking
in native_interpret_expr. native_interpret_expr may fail on real type
conversion and returns a NULL tree then. But upper layer calls don't handle
the failure so that an ICE is reported when the verification fails.

  IBM long double is an example. It doesn't have a unique memory presentation
for some real values. So it may not pass the verification. The new test
case shows the problem.

  Compared to last version, this version moves the mpfr_init after NULL tree
test and fixes the format problem according to Tobias's advice. Thanks a lot.

Gui Haochen
Thanks

ChangeLog
2023-03-01  Haochen Gui 

gcc/
PR target/103628
* fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
native_interpret_expr gets a NULL tree.
* fortran/arith.cc (gfc_hollerith2real): Return NULL when
gfc_interpret_float fails.

gcc/testsuite/
PR target/103628
* gfortran.dg/pr103628.f90: New.

patch.diff
diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index c0d12cfad9d..d3d38c7eb6a 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
   result = gfc_get_constant_expr (BT_REAL, kind, >where);

   hollerith2representation (result, src);
-  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
-  result->representation.length, result->value.real);
-
-  return result;
+  if (gfc_interpret_float (kind,
+  (unsigned char *) result->representation.string,
+  result->representation.length, result->value.real))
+return result;
+  else
+return NULL;
 }

 /* Convert character to real.  The constant will be padded or truncated.  */
diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc
index 7ce7d736629..0c47aa6b842 100644
--- a/gcc/fortran/target-memory.cc
+++ b/gcc/fortran/target-memory.cc
@@ -416,11 +416,14 @@ gfc_interpret_float (int kind, unsigned char *buffer, 
size_t buffer_size,
 mpfr_t real)
 {
   gfc_set_model_kind (kind);
-  mpfr_init (real);
-  gfc_conv_tree_to_mpfr (real,
-native_interpret_expr (gfc_get_real_type (kind),
-   buffer, buffer_size));

+  tree source = native_interpret_expr (gfc_get_real_type (kind), buffer,
+  buffer_size);
+  if (!source)
+return 0;
+
+  mpfr_init (real);
+  gfc_conv_tree_to_mpfr (real, source);
   return size_float (kind);
 }

diff --git a/gcc/testsuite/gfortran.dg/pr103628.f90 
b/gcc/testsuite/gfortran.dg/pr103628.f90
new file mode 100644
index 000..e49aefc18fd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr103628.f90
@@ -0,0 +1,14 @@
+! { dg-do compile { target powerpc*-*-* } }
+! { dg-options "-O2 -mabi=ibmlongdouble" }
+
+! Test to ensure that it reports an "Unclassifiable statement" error
+! instead of throwing an ICE when the memory represent of the HOLLERITH
+! string is not unique with ibm long double encoding.
+
+program main
+  integer, parameter :: k = 16
+  real(kind = k):: b = 4h1234
+end program main
+
+! { dg-warning "Conversion from HOLLERITH" "warning" { target powerpc*-*-* } 
10 }
+! { dg-error "Unclassifiable statement" "error" { target powerpc*-*-* } 10 }


[PATCH, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-02-28 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch escalates the failure when Hollerith constant to real conversion
fails in native_interpret_expr. It finally reports an "Unclassifiable
statement" error.

  The patch of pr95450 added a verification for decoding/encoding checking
in native_interpret_expr. native_interpret_expr may fail on real type
conversion and returns a NULL tree then. But upper layer calls don't handle
the failure so that an ICE is reported when the verification fails.

  IBM long double is an example. It doesn't have a unique memory presentation
for some real values. So it may not pass the verification. The new test
case shows the problem.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-03-01  Haochen Gui 

gcc/
PR target/103628
* fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
native_interpret_expr gets a NULL tree.
* fortran/arith.cc (gfc_hollerith2real): Return NULL when
gfc_interpret_float fails.

gcc/testsuite/
PR target/103628
* gfortran.dg/pr103628.f90: New.


patch.diff
diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index c0d12cfad9d..d3d38c7eb6a 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
   result = gfc_get_constant_expr (BT_REAL, kind, >where);

   hollerith2representation (result, src);
-  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
-  result->representation.length, result->value.real);
-
-  return result;
+  if (gfc_interpret_float (kind,
+  (unsigned char *) result->representation.string,
+  result->representation.length, result->value.real))
+return result;
+  else
+return NULL;
 }

 /* Convert character to real.  The constant will be padded or truncated.  */
diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc
index 7ce7d736629..04afc357e3c 100644
--- a/gcc/fortran/target-memory.cc
+++ b/gcc/fortran/target-memory.cc
@@ -417,10 +417,13 @@ gfc_interpret_float (int kind, unsigned char *buffer, 
size_t buffer_size,
 {
   gfc_set_model_kind (kind);
   mpfr_init (real);
-  gfc_conv_tree_to_mpfr (real,
-native_interpret_expr (gfc_get_real_type (kind),
-   buffer, buffer_size));

+  tree source  = native_interpret_expr (gfc_get_real_type (kind), buffer,
+   buffer_size);
+  if (!source)
+return 0;
+
+  gfc_conv_tree_to_mpfr (real, source);
   return size_float (kind);
 }

diff --git a/gcc/testsuite/gfortran.dg/pr103628.f90 
b/gcc/testsuite/gfortran.dg/pr103628.f90
new file mode 100644
index 000..e49aefc18fd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr103628.f90
@@ -0,0 +1,14 @@
+! { dg-do compile { target powerpc*-*-* } }
+! { dg-options "-O2 -mabi=ibmlongdouble" }
+
+! Test to ensure that it reports an "Unclassifiable statement" error
+! instead of throwing an ICE when the memory represent of the HOLLERITH
+! string is not unique with ibm long double encoding.
+
+program main
+  integer, parameter :: k = 16
+  real(kind = k):: b = 4h1234
+end program main
+
+! { dg-warning "Conversion from HOLLERITH" "warning" { target powerpc*-*-* } 
10 }
+! { dg-error "Unclassifiable statement" "error" { target powerpc*-*-* } 10 }


[PATCHv2, rs6000] Merge two vector shift when their sources are the same

2023-02-27 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch merges two "vsldoi" insns when their sources are the
same. Particularly, it is simplified to be one move if the total
shift is multiples of 16 bytes.

  Bootstrapped and tested on powerpc64-linux BE and LE with no
regressions.

Thanks
Gui Haochen


ChangeLog
2023-02-28  Haochen Gui 

gcc/
* config/rs6000/altivec.md (*altivec_vsldoi_dup_): New
insn_and_split to merge two vsldoi when the sources are the same.

gcc/testsuite/
* gcc.target/powerpc/vsldoi_merge.c: New.



patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 84660073f32..fae8ec2b2e8 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2529,6 +2529,35 @@ (define_insn "altivec_vsldoi_"
   "vsldoi %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])

+(define_insn_and_split "*altivec_vsldoi_dup_"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+   (unspec:VM [(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+   (match_dup 1)
+   (match_operand:QI 2 "immediate_operand" "i")]
+  UNSPEC_VSLDOI)
+   (unspec:VM [(match_dup 1)
+   (match_dup 1)
+   (match_dup 2)]
+  UNSPEC_VSLDOI)
+   (match_operand:QI 3 "immediate_operand" "i")]
+  UNSPEC_VSLDOI))]
+  "TARGET_ALTIVEC"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  unsigned int shift1 = UINTVAL (operands[2]);
+  unsigned int shift2 = UINTVAL (operands[3]);
+
+  unsigned int shift = (shift1 + shift2) % 16;
+  if (shift)
+emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1],
+ operands[1], GEN_INT (shift)));
+  else
+emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
 (define_insn "altivec_vupkhs"
   [(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand: 1 "register_operand" "v")]
diff --git a/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c 
b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c
new file mode 100644
index 000..eebd7b4d382
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx -save-temps" } */
+
+#include "altivec.h"
+
+#ifdef DEBUG
+#include 
+#endif
+
+void abort (void);
+
+__attribute__ ((noipa)) vector signed int
+test1 (vector signed int a)
+{
+  a = vec_sld (a, a, 2);
+  a = vec_sld (a, a, 6);
+  return a;
+}
+
+__attribute__ ((noipa)) vector signed int
+test2 (vector signed int a)
+{
+  a = vec_sld (a, a, 14);
+  a = vec_sld (a, a, 2);
+  return a;
+}
+
+int main (void)
+{
+  vector signed int a = {1,2,3,4};
+  vector signed int result_a;
+  int i;
+
+  result_a = test1 (a);
+  vector signed int expect_a = {3,4,1,2};
+
+  for (i = 0; i< 4; i++)
+if (result_a[i] != expect_a[i])
+#ifdef DEBUG
+  printf("ERROR: test1 result[%d] = %d, not expected[%d] = %d\n",
+  i, result_a[i], i, expect_a[i]);
+#else
+  abort ();
+#endif
+
+  result_a = test2 (a);
+
+  for (i = 0; i< 4; i++)
+if (result_a[i] != a[i])
+#ifdef DEBUG
+  printf("ERROR: test2 result[%d] = %d, not expected[%d] = %d\n",
+  i, result_a[i], i, a[i]);
+#else
+  abort ();
+#endif
+}
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 1 } } */


Ping [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694]

2023-02-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gently ping this:
  https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html

Gui Haochen
Thanks

在 2023/2/8 13:08, HAO CHEN GUI 写道:
> Hi,
>   The logical operations for TImode is split after reload pass right now. Some
> potential optimizations miss as the split is too late. This patch removes
> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical
> operations can be split at expand pass. The new test case illustrates the
> optimization.
> 
>   Two test cases of pr92398 are merged into one as all sub-targets generates
> the same sequence of instructions with the patch.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> 
> Thanks
> Gui Haochen
> 
> 
> ChangeLog
> 2023-02-08  Haochen Gui 
> 
> gcc/
>   PR target/100694
>   * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit
>   vector types.
>   (and3): Replace BOOL_128 with BOOL_128_V.
>   (ior3): Likewise.
>   (xor3): Likewise.
>   (one_cmpl2 expander): New expander with BOOL_128_V.
>   (one_cmpl2 insn_and_split): Rename to ...
>   (*one_cmpl2): ... this.
> 
> gcc/testsuite/
>   PR target/100694
>   * gcc.target/powerpc/pr100694.c: New.
>   * gcc.target/powerpc/pr92398.c: New.
>   * gcc.target/powerpc/pr92398.h: Remove.
>   * gcc.target/powerpc/pr92398.p9-.c: Remove.
>   * gcc.target/powerpc/pr92398.p9+.c: Remove.
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 4bd1dfd3da9..455b7329643 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128   [TI
>(V2DF  "TARGET_ALTIVEC")
>(V1TI  "TARGET_ALTIVEC")])
> 
> +;; Mode iterator for logical operations on 128-bit vector types
> +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC")
> +  (V8HI  "TARGET_ALTIVEC")
> +  (V4SI  "TARGET_ALTIVEC")
> +  (V4SF  "TARGET_ALTIVEC")
> +  (V2DI  "TARGET_ALTIVEC")
> +  (V2DF  "TARGET_ALTIVEC")
> +  (V1TI  "TARGET_ALTIVEC")])
> +
>  ;; For the GPRs we use 3 constraints for register outputs, two that are the
>  ;; same as the output register, and a third where the output register is an
>  ;; early clobber, so we don't have to deal with register overlaps.  For the
> @@ -7135,23 +7144,23 @@ (define_expand "subti3"
>  ;; 128-bit logical operations expanders
> 
>  (define_expand "and3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> + (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>""
>"")
> 
>  (define_expand "ior3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> -(ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> + (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>""
>"")
> 
>  (define_expand "xor3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> -(xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> + (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>""
>"")
> 
> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv3_internal2"
>(const_string "16")))])
> 
>  ;; 128-bit one's complement
> -(define_insn_and_split "one_cmpl2"
> +(define_expand "one_cmpl2"
> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=")
> + (not:BOOL_128_V
> +   (match_operand:BOOL_128_V 1 "vlogical_operand" "")))]
> +  ""
> +  "")
> +
> +(define_insn_and_split "*one_cmpl2"
>[(set (match_operand:BOOL_128 0 "vlogical_operand" "=")
>   (not:BOOL_128
> (match_operand:BOOL_128 1 "vlogical_operand" "")))]
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c 
> b/gcc/testsuite/gcc.target/powerpc/pr100694.c
> new file mode 100644
> index 000..96a895d6c44
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { 

[PATCH, rs6000] Merge two vector shift when their sources are the same

2023-02-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch merges two "vsldoi" insns when their sources are the
same. Particularly, it is simplified to be one move if the total
shift is multiples of 16 bytes.

  Bootstrapped and tested on powerpc64-linux BE and LE with no
regressions.

Thanks
Gui Haochen


ChangeLog
2023-02-20  Haochen Gui 

gcc/
* config/rs6000/altivec.md (*altivec_vsldoi_dup_): New
insn_and_split to merge two vsldoi.

gcc/testsuite/
* gcc.target/powerpc/vsldoi_merge.c: New.


patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 84660073f32..22e9c4c1fc5 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2529,6 +2529,35 @@ (define_insn "altivec_vsldoi_"
   "vsldoi %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])

+(define_insn_and_split "*altivec_vsldoi_dup_"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+   (unspec:VM [(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+   (match_operand:VM 2 "register_operand" "v")
+   (match_operand:QI 3 "immediate_operand" "i")]
+  UNSPEC_VSLDOI)
+   (unspec:VM [(match_dup 1)
+   (match_dup 2)
+   (match_dup 3)]
+  UNSPEC_VSLDOI)
+   (match_operand:QI 4 "immediate_operand" "i")]
+  UNSPEC_VSLDOI))]
+  "TARGET_ALTIVEC"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  unsigned int shift1 = UINTVAL (operands[3]);
+  unsigned int shift2 = UINTVAL (operands[4]);
+
+  unsigned int shift = (shift1 + shift2) % 16;
+  if (shift)
+emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1],
+ operands[1], GEN_INT (shift)));
+  else
+emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
 (define_insn "altivec_vupkhs"
   [(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand: 1 "register_operand" "v")]
diff --git a/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c 
b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c
new file mode 100644
index 000..4ea72561282
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsldoi_merge.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#include "altivec.h"
+
+vector signed int test1 (vector signed int a, vector signed int b)
+{
+  a = vec_sld (a, b, 2);
+  a = vec_sld (a, a, 4);
+  return a;
+}
+
+vector signed int test2 (vector signed int a, vector signed int b)
+{
+  a = vec_sld (a, b, 14);
+  a = vec_sld (a, a, 2);
+  return a;
+}
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 1 } } */


[PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694]

2023-02-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The logical operations for TImode is split after reload pass right now. Some
potential optimizations miss as the split is too late. This patch removes
TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical
operations can be split at expand pass. The new test case illustrates the
optimization.

  Two test cases of pr92398 are merged into one as all sub-targets generates
the same sequence of instructions with the patch.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
2023-02-08  Haochen Gui 

gcc/
PR target/100694
* config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit
vector types.
(and3): Replace BOOL_128 with BOOL_128_V.
(ior3): Likewise.
(xor3): Likewise.
(one_cmpl2 expander): New expander with BOOL_128_V.
(one_cmpl2 insn_and_split): Rename to ...
(*one_cmpl2): ... this.

gcc/testsuite/
PR target/100694
* gcc.target/powerpc/pr100694.c: New.
* gcc.target/powerpc/pr92398.c: New.
* gcc.target/powerpc/pr92398.h: Remove.
* gcc.target/powerpc/pr92398.p9-.c: Remove.
* gcc.target/powerpc/pr92398.p9+.c: Remove.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4bd1dfd3da9..455b7329643 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI
 (V2DF  "TARGET_ALTIVEC")
 (V1TI  "TARGET_ALTIVEC")])

+;; Mode iterator for logical operations on 128-bit vector types
+(define_mode_iterator BOOL_128_V   [(V16QI "TARGET_ALTIVEC")
+(V8HI  "TARGET_ALTIVEC")
+(V4SI  "TARGET_ALTIVEC")
+(V4SF  "TARGET_ALTIVEC")
+(V2DI  "TARGET_ALTIVEC")
+(V2DF  "TARGET_ALTIVEC")
+(V1TI  "TARGET_ALTIVEC")])
+
 ;; For the GPRs we use 3 constraints for register outputs, two that are the
 ;; same as the output register, and a third where the output register is an
 ;; early clobber, so we don't have to deal with register overlaps.  For the
@@ -7135,23 +7144,23 @@ (define_expand "subti3"
 ;; 128-bit logical operations expanders

 (define_expand "and3"
-  [(set (match_operand:BOOL_128 0 "vlogical_operand")
-   (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
- (match_operand:BOOL_128 2 "vlogical_operand")))]
+  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
+   (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
+   (match_operand:BOOL_128_V 2 "vlogical_operand")))]
   ""
   "")

 (define_expand "ior3"
-  [(set (match_operand:BOOL_128 0 "vlogical_operand")
-(ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
- (match_operand:BOOL_128 2 "vlogical_operand")))]
+  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
+   (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
+   (match_operand:BOOL_128_V 2 "vlogical_operand")))]
   ""
   "")

 (define_expand "xor3"
-  [(set (match_operand:BOOL_128 0 "vlogical_operand")
-(xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
- (match_operand:BOOL_128 2 "vlogical_operand")))]
+  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
+   (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
+   (match_operand:BOOL_128_V 2 "vlogical_operand")))]
   ""
   "")

@@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv3_internal2"
 (const_string "16")))])

 ;; 128-bit one's complement
-(define_insn_and_split "one_cmpl2"
+(define_expand "one_cmpl2"
+[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=")
+   (not:BOOL_128_V
+ (match_operand:BOOL_128_V 1 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_insn_and_split "*one_cmpl2"
   [(set (match_operand:BOOL_128 0 "vlogical_operand" "=")
(not:BOOL_128
  (match_operand:BOOL_128 1 "vlogical_operand" "")))]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c 
b/gcc/testsuite/gcc.target/powerpc/pr100694.c
new file mode 100644
index 000..96a895d6c44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */
+
+/* It just needs two std and one blr.  */
+void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo)
+{
+   unsigned __int128 i = hi;
+   i <<= 64;
+   i |= lo;
+   *res = i;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c 

[PATCH, rs6000] Convert TI AND with a special constant to DI AND [PR93123]

2023-01-18 Thread HAO CHEN GUI via Gcc-patches
Hi,
  When TI AND with a special constant (the high part or low part is all
ones), it may be converted to DI AND with a 64-bit constant and a simple
DI move. When the DI AND can be implemented by rotate and mask or
"andi.", it eliminates the 128-bit constant loading to save the cost.

  The patch creates three insn_and_split patterns to match these cases
in combine pass and splits them later. The new predicate
"double_wide_cint_operand" is used to identify if a constant is a
double wide constant.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen


ChangeLog
2023-01-18  Haochen Gui 

gcc/
PR target/93123
* config/rs6000/predicates.md (double_wide_cint_operand): New.
* config/rs6000/rs6000.md (*andti3_128bit_imm_highpart): New.
(*andti3_128bit_imm_lowpart): New.
(*andti3_64bit_imm): New.

gcc/testsuite/
PR target/93123
* gcc.target/powerpc/pr93123.c: New.

patch.diff
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index a1764018545..bacb87c3fb2 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -255,6 +255,19 @@ (define_predicate "u10bit_cint_operand"
   (and (match_code "const_int")
(match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023")))

+;; Return 1 if op is a 65-128 bits constant integer.
+(define_predicate "double_wide_cint_operand"
+  (match_operand 0 "const_scalar_int_operand")
+{
+  if (CONST_INT_P (op))
+return 0;
+
+  if (CONST_WIDE_INT_NUNITS (op) == 2)
+return 1;
+
+  return 0;
+})
+
 ;; Return 1 if op is a constant integer that can fit in a D field.
 (define_predicate "short_cint_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6011f5bf76a..1fecb2d734e 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7199,6 +7199,128 @@ (define_expand "orc3"
   "mode == TImode || mode == PTImode || TARGET_P8_VECTOR"
   "")

+(define_insn_and_split "*andti3_128bit_imm_highpart"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r")
+   (and:TI
+ (match_operand:TI 1 "gpc_reg_operand" "r")
+ (match_operand:TI 2 "double_wide_cint_operand" "n")))]
+  "CONST_WIDE_INT_ELT (operands[2], 0) == -1
+   && (rs6000_is_valid_and_mask (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)),
+E_DImode)
+   || logical_const_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)),
+E_DImode))"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx in_lo, in_hi, out_lo, out_hi;
+  rtx imm = GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1));
+  int hi_off, lo_off;
+
+  if (BYTES_BIG_ENDIAN)
+{
+  hi_off = 0;
+  lo_off = 8;
+}
+  else
+{
+  hi_off = 8;
+  lo_off = 0;
+}
+
+  in_lo = simplify_gen_subreg (DImode, operands[1], TImode, lo_off);
+  out_lo = simplify_gen_subreg (DImode, operands[0], TImode, lo_off);
+  in_hi = simplify_gen_subreg (DImode, operands[1], TImode, hi_off);
+  out_hi = simplify_gen_subreg (DImode, operands[0], TImode, hi_off);
+
+  if (rs6000_is_valid_and_mask (imm, E_DImode))
+emit_insn (gen_anddi3_mask (out_hi, in_hi, imm));
+  else
+emit_insn (gen_anddi3_imm (out_hi, in_hi, imm));
+
+  emit_move_insn (out_lo, in_lo);
+}
+  [(set_attr "length" "8")])
+
+(define_insn_and_split "*andti3_128bit_imm_lowpart"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r")
+   (and:TI
+ (match_operand:TI 1 "gpc_reg_operand" "r")
+ (match_operand:TI 2 "double_wide_cint_operand" "n")))]
+  "CONST_WIDE_INT_ELT (operands[2], 1) == -1
+   && (rs6000_is_valid_and_mask (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)),
+E_DImode)
+   || logical_const_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)),
+E_DImode))"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx in_lo, in_hi, out_lo, out_hi;
+  rtx imm = GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0));
+  int hi_off, lo_off;
+
+  if (BYTES_BIG_ENDIAN)
+{
+  hi_off = 0;
+  lo_off = 8;
+}
+  else
+{
+  hi_off = 8;
+  lo_off = 0;
+}
+
+  in_lo = simplify_gen_subreg (DImode, operands[1], TImode, lo_off);
+  out_lo = simplify_gen_subreg (DImode, operands[0], TImode, lo_off);
+  in_hi = simplify_gen_subreg (DImode, operands[1], TImode, hi_off);
+  out_hi = simplify_gen_subreg (DImode, operands[0], TImode, hi_off);
+
+  if (rs6000_is_valid_and_mask (imm, E_DImode))
+emit_insn (gen_anddi3_mask (out_lo, in_lo, imm));
+  else
+emit_insn (gen_anddi3_imm (out_lo, in_lo, imm));
+
+  emit_move_insn (out_hi, in_hi);
+}
+  [(set_attr "length" "8")])
+
+
+(define_insn_and_split "*andti3_64bit_imm"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r")
+   (and:TI
+ (match_operand:TI 1 "gpc_reg_operand" "r")
+ (match_operand:TI 

[PATCH-4, rs6000] Change ilp32 target check for some scalar-extract-sig and scalar-insert-exp test cases

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  "ilp32" is used in these test cases to make sure test cases only run on a
32-bit environment. Unfortunately, these cases also run with
"-m32/-mpowerpc64" which causes unexpected errors. This patch changes the
target check to skip if "has_arch_ppc64" is set. So the test cases won't run
when arch_ppc64 has been set.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2023-01-03  Haochen Gui  

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Replace ilp32 check
with dg-skip-if has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Likewise.

patch.diff
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
index 39ee74c94dc..148b5fbd9fa 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
index efd69725905..956c1183beb 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
index f85966a6fdf..9a7949fb89a 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */



[PATCH-3, rs6000] Change mode and insn condition for scalar insert exp instruction

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the mode of exponent to GPR in scalar insert exp
pattern, as the exponent can be put into a 32-bit register. Also the
condition check is changed from TARGET_64BIT to TARGET_POWERPC64.

  The test cases are modified according to the changes of expand pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2023-01-03  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_insert_exp): Replace bif-pattern from xsiexpdp
to xsiexpdp_di.
(__builtin_vsx_scalar_insert_exp_dp): Replace bif-pattern from
xsiexpdpf to xsiexpdpf_di.
* config/rs6000/vsx.md (xsiexpdp): Rename to...
(xsiexpdp_): ..., set the mode of second operand to GPR and
replace TARGET_64BIT with TARGET_POWERPC64.
(xsiexpdpf): Rename to...
(xsiexpdpf_): ..., set the mode of second operand to GPR and
replace TARGET_64BIT with TARGET_POWERPC64.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 25647b7bdd2..b1b5002d7d9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2854,10 +2854,10 @@

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
 unsigned long long);
-VSIEDP xsiexpdp {}
+VSIEDP xsiexpdp_di {}

   const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long);
-VSIEDPF xsiexpdpf {}
+VSIEDPF xsiexpdpf_di {}

   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
 XL_LEN_R xl_len_r {}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27e03a4cf6c..3376090cc6f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5137,22 +5137,22 @@ (define_insn "xsiexpqp_"
   [(set_attr "type" "vecmove")])

 ;; VSX Scalar Insert Exponent Double-Precision
-(define_insn "xsiexpdp"
+(define_insn "xsiexpdp_"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DI 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:GPR 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
-(define_insn "xsiexpdpf"
+(define_insn "xsiexpdpf_"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DF 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:GPR 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
index d8243258a67..88d77564158 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
index 8260b107178..2f219ddc83a 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c
index 384fc9cc675..9eade34d9ad 100644
--- 

[PATCH-2, rs6000] Change mode and insn condition for scalar extract sig instruction

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the return type of __builtin_vsx_scalar_extract_sig
from const signed long to const signed long long, so that it can be called
with "-m32/-mpowerpc64" option. The bif needs TARGET_POWERPC64 instead of
TARGET_64BIT. So the condition check in the expander is changed.

  The test cases are modified according to the changes of expand pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2023-01-03  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_sig): Set return type to const signed
long long.
* config/rs6000/vsx.md (xsxsigdp): Replace TARGET_64BIT with
TARGET_POWERPC64.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-extract-sig-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index a8f1d3f1b3d..25647b7bdd2 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2849,7 +2849,7 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const signed long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 229c26c3a61..27e03a4cf6c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5111,7 +5111,7 @@ (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXSIG))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsxsigdp %0,%x1"
   [(set_attr "type" "integer")])

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
index 637080652b7..d22f7d1b274 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
index f12eed3d9d5..64747d73a51 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
index c85072da138..561be53fb9b 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
@@ -1,7 +1,7 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 


[PATCH-1, rs6000] Change mode and insn condition for scalar extract exp instruction

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the return type of __builtin_vsx_scalar_extract_exp
from const signed long to const signed int, as the exponent can be put in
a signed int. It is also inline with the external interface definition of
the bif. The mode of exponent operand in "xsxexpdp" is changed to GPR mode
and TARGET_64BIT check is removed, as the instruction can be executed on
a 32-bit environment.

  The test cases are modified according to the changes of expand pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2022-12-23  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
int and set its bif-pattern to xsxexpdp_si, move it from power9-64 to
power9 catalog.
* config/rs6000/vsx.md (xsxexpdp): Rename to ...
(xsxexpdp_): ..., set mode of operand 0 to GPR and remove
TARGET_64BIT check.
* doc/extend.texi (scalar_extract_exp): Remove 64-bit environment
requirement when it has a 64-bit argument.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is
invalid.
* gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Remove lp64 check.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..a8f1d3f1b3d 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2833,6 +2833,8 @@
   const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128);
 TSTSFI_OV_TD dfptstsfi_unordered_td {}

+  const signed int  __builtin_vsx_scalar_extract_exp (double);
+VSEEDP xsxexpdp_si {}

 [power9-64]
   void __builtin_altivec_xst_len_r (vsc, void *, long);
@@ -2847,9 +2849,6 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
-VSEEDP xsxexpdp {}
-
   const signed long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 992fbc983be..229c26c3a61 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5089,11 +5089,11 @@ (define_insn "xsxexpqp_"
   [(set_attr "type" "vecmove")])

 ;; VSX Scalar Extract Exponent Double-Precision
-(define_insn "xsxexpdp"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-   (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
+(define_insn "xsxexpdp_"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (unspec:GPR [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR"
   "xsxexpdp %0,%x1"
   [(set_attr "type" "integer")])

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index d3812fa55b0..7c087967234 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19598,7 +19598,10 @@ bool scalar_test_neg (double source);
 bool scalar_test_neg (__ieee128 source);
 @end smallexample

-The @code{scalar_extract_exp} and @code{scalar_extract_sig}
+The @code{scalar_extract_exp} with a 64-bit source argument
+function requires an environment supporting ISA 3.0 or later.
+The @code{scalar_extract_exp} with a 128-bit source argument
+and @code{scalar_extract_sig}
 functions require a 64-bit environment supporting ISA 3.0 or later.
 The @code{scalar_extract_exp} and @code{scalar_extract_sig} built-in
 functions return the significand and the biased exponent value
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
index 35bf1b240f3..d971833748e 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
@@ -1,9 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

-/* This test should succeed only on 64-bit configurations.  */
 #include 

 unsigned int
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c
index 9737762c1d4..1cb438f9b70 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c
@@ -1,9 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */

-/* This test should succeed only on 64-bit 

[PATCH v6, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-12-19 Thread HAO CHEN GUI via Gcc-patches
Hi,
This patch fixes several problems:
1. The exponent of double-precision can be put into a SImode register.
So "xsxexpdp" doesn't require 64-bit environment. Also "xsxsigdp",
"xsiexpdp" and "xsiexpdpf" can put exponent into a GPR register.

2. "TARGET_64BIT" check in insn conditions should be replaced with
"TARGET_POWERPC64" check.

3. "lp64" check in test cases should be replaced with "has_arch_ppc64"
check. "ilp32" check should be replaced with "dg-skip-if has_arch_ppc64".

This patch keeps outer interfaces of these builtins unchanged.

Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-12-19  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
int and set its bif-pattern to xsxexpdp_si, move it from power9-64 to
power9 catalog.
(__builtin_vsx_scalar_extract_sig): Set return type to const unsigned
long long.
(__builtin_vsx_scalar_insert_exp): Set its bif-pattern to xsiexpdp_di
unsigned int.
(__builtin_vsx_scalar_insert_exp_dp): Set its bif-pattern to
xsiexpdpf_di.
* config/rs6000/vsx.md (xsxexpdp): Rename to ...
(xsxexpdp_): ..., set mode of operand 0 to GPR and remove
TARGET_64BIT check.
(xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64.
(xsiexpdp): Rename to ...
(xsiexpdp_): ..., set mode of operand 2 to GPR and change insn
condition from TARGET_64BIT to TARGET_POWERPC64.
(xsiexpdpf): Rename to ...
(xsiexpdpf_): ..., set mode of operand 2 to GPR and change insn
condition from TARGET_64BIT to TARGET_POWERPC64.
* doc/extend.texi (scalar_extract_exp): Remove 64-bit environment
requirement when it has a 64-bit argument.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is
invalid now.
* gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-extract-sig-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Replace ilp32 check
with dg skip has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Replace ilp32 check
with dg skip has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Replace ilp32 check
with dg-skip-if has_arch_ppc64.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..b1b5002d7d9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2833,6 +2833,8 @@
   const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128);
 TSTSFI_OV_TD dfptstsfi_unordered_td {}

+  const signed int  __builtin_vsx_scalar_extract_exp (double);
+VSEEDP xsxexpdp_si {}

 [power9-64]
   void __builtin_altivec_xst_len_r (vsc, void *, long);
@@ -2847,18 +2849,15 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
-VSEEDP xsxexpdp {}
-
-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const signed long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
 unsigned long long);
-VSIEDP xsiexpdp {}
+VSIEDP xsiexpdp_di {}

   const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long);
-VSIEDPF xsiexpdpf {}
+VSIEDPF xsiexpdpf_di {}

   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
 XL_LEN_R xl_len_r {}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index fb5cf04147e..e1c905a3f91 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5089,11 +5089,11 @@ (define_insn "xsxexpqp_"
   [(set_attr "type" "vecmove")])

 ;; VSX Scalar Extract Exponent Double-Precision
-(define_insn "xsxexpdp"
-  [(set (match_operand:DI 0 

PING [PATCH, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

2022-12-13 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601909.html

Thanks
Gui Haochen

在 2022/9/21 13:13, HAO CHEN GUI 写道:
> Hi,
>   This patch adds a new insn for vector splat with small V2DI constants on P8.
> If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be 
> loaded
> with vspltisw and vupkhsw on P8. It should be efficient than loading vector 
> from
> TOC.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-09-21 Haochen Gui 
> 
> gcc/
>   PR target/104124
>   * config/rs6000/altivec.md (*altivec_vupkhs_direct): Renamed
>   to...
>   (altivec_vupkhs_direct): ...this.
>   * config/rs6000/constraints.md (wT constraint): New constant for a
>   vector constraint that can be loaded with vspltisw and vupkhsw.
>   * config/rs6000/predicates.md (vspltisw_constant_split): New
>   predicate for wT constraint.
>   * config/rs6000/rs6000-protos.h (vspltisw_constant_p): Add declaration.
>   * config/rs6000/rs6000.cc (easy_altivec_constant): Call
>   vspltisw_constant_p to judge if a V2DI constant can be synthesized with
>   a vspltisw and a vupkhsw.
>   * (vspltisw_constant_p): New function to return true if OP mode is
>   V2DI and can be synthesized with ISA 2.07 instruction vupkhsw and
>   vspltisw.
>   * gcc/config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
>   constants with vspltisw and vupkhsw.
> 
> gcc/testsuite/
>   PR target/104124
>   * gcc.target/powerpc/p8-splat.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 2c4940f2e21..185414df021 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs"
>  }
>[(set_attr "type" "vecperm")])
> 
> -(define_insn "*altivec_vupkhs_direct"
> +(define_insn "altivec_vupkhs_direct"
>[(set (match_operand:VP 0 "register_operand" "=v")
>   (unspec:VP [(match_operand: 1 "register_operand" "v")]
>UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
> diff --git a/gcc/config/rs6000/constraints.md 
> b/gcc/config/rs6000/constraints.md
> index 5a44a92142e..f65dea6e0c7 100644
> --- a/gcc/config/rs6000/constraints.md
> +++ b/gcc/config/rs6000/constraints.md
> @@ -150,6 +150,10 @@ (define_constraint "wS"
>"@internal Vector constant that can be loaded with XXSPLTIB & sign 
> extension."
>(match_test "xxspltib_constant_split (op, mode)"))
> 
> +(define_constraint "wT"
> +  "@internal Vector constant that can be loaded with vspltisw & vupkhsw."
> +  (match_test "vspltisw_constant_split (op, mode)"))
> +
>  ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update 
> form.
>  ;; Used by LXSD/STXSD/LXSSP/STXSSP.  In contrast to "Y", the multiple-of-four
>  ;; offset is enforced for 32-bit too.
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index b1fcc69bb60..00cf60bbe58 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -694,6 +694,19 @@ (define_predicate "xxspltib_constant_split"
>return num_insns > 1;
>  })
> 
> +;; Return true if the operand is a constant that can be loaded with a 
> vspltisw
> +;; instruction and then a vupkhsw instruction.
> +
> +(define_predicate "vspltisw_constant_split"
> +  (match_code "const_vector,vec_duplicate")
> +{
> +  int value = 32;
> +
> +  if (!vspltisw_constant_p (op, mode, ))
> +return false;
> +
> +  return true;
> +})
> 
>  ;; Return 1 if the operand is constant that can loaded directly with a 
> XXSPLTIB
>  ;; instruction.
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index b3c16e7448d..45f3d044eee 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, 
> rtx, int, int, int,
> 
>  extern int easy_altivec_constant (rtx, machine_mode);
>  extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
> +extern bool vspltisw_constant_p (rtx, machine_mode, int *);
>  extern int vspltis_shifted (rtx);
>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index df491bee2ea..984624026c2 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -6292,6 +6292,12 @@ easy_altivec_constant (rtx op, machine_mode mode)
> && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
>   return 8;
> 
> +  /* If V2DI constant is within RANGE (-16, 15), it can be synthesized 
> with
> +  a vspltisw and a vupkhsw.  */
> +  int value = 32;
> +  if (vspltisw_constant_p (op, mode, ))
> + return 8;
> +
>return 

Re: [PATCH v5, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-12-11 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

在 2022/12/8 16:47, Kewen.Lin 写道:
> This documentation update reminds me of that the current prototype of 
> __ieee128
> variant can be:
> 
>   unsigned int scalar_extract_exp (__ieee128 source);
> 
> type unsigned int is enough for the exponent.  It means xsxexpqp_ can 
> also
> use SImode rather than DImode.


Could I put the changes of __ieee128 bifs in another patch? So, this patch 
doesn't
touch anything about __ieee128.

Thanks a lot
Gui Haochen


[PATCH v4, rs6000] Enable have_cbranchcc4 on rs6000

2022-12-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch enables "have_cbranchcc4" on rs6000 by defining
a "cbranchcc4" expander. "have_cbrnachcc4" is a flag in ifcvt.cc
to indicate if branch by CC bits is invalid or not. With this
flag enabled, some branches can be optimized to conditional
moves.

  Compared to last version, the main changes are on the test
cases. Test case is renamed and comments are modified.

  Bootstrapped and tested on powerpc64-linux BE and LE with no
regressions. Is this okay for trunk? Any recommendations? Thanks
a lot.

BR
Gui Haochen

ChangeLog
2022-12-07  Haochen Gui 

gcc/
* config/rs6000/rs6000.md (cbranchcc4): New expander.

gcc/testsuite
* gcc.target/powerpc/cbranchcc4-1.c: New.
* gcc.target/powerpc/cbranchcc4-2.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e9e5cd1e54d..d7ddd96cc70 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -11932,6 +11932,16 @@ (define_expand "cbranch4"
   DONE;
 })

+(define_expand "cbranchcc4"
+  [(set (pc)
+   (if_then_else (match_operator 0 "branch_comparison_operator"
+   [(match_operand 1 "cc_reg_operand")
+(match_operand 2 "zero_constant")])
+ (label_ref (match_operand 3))
+ (pc)))]
+  ""
+  "")
+
 (define_expand "cstore4_signed"
   [(use (match_operator 1 "signed_comparison_operator"
  [(match_operand:P 2 "gpc_reg_operand")
diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c 
b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c
new file mode 100644
index 000..6c2cd130b6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Verify there is no ICE with cbranchcc4 enabled.  */
+
+int foo (double d)
+{
+  if (d == 0.0)
+return 0;
+
+  d = ((d) >= 0 ? (d) : -(d));
+
+  if (d < 1.0)
+return 1;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4-2.c 
b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-2.c
new file mode 100644
index 000..528ba1a878d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ce1" } */
+/* { dg-final { scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */
+
+/* The inner branch should be detected by ifcvt then be converted to a setcc
+   with a plus by noce_try_store_flag_constants.  */
+
+int test (unsigned int a, unsigned int b)
+{
+return (a < b ? 0 : (a > b ? 2 : 1));
+}


Re: [PATCH v3, rs6000] Enable have_cbranchcc4 on rs6000

2022-12-06 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,
  Thanks so much for your review comments. I will fix them.

在 2022/12/7 11:06, Kewen.Lin 写道:
> Does this issue which relies on the fix for generic part make bootstrapping 
> fail?
> If no, how many failures it can cause?  I'm thinking if we can commit this 
> firstly,
> then in the commit log of the fix for generic part you can mention it can fix 
> the
> ICE exposed by this test case.

Yes, the bootstrapping fails if we enable cbranchcc4 without the generic patch.
Actually, the testcase comes from the ICE found in bootstrapping.


[PATCH v2] Add a new conversion for conditional ternary set into ifcvt [PR106536]

2022-12-06 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds a new conversion to convert a certain branch to
conditional ternary set in ifcvt.

  The branch commonly has following insns.
  cond_jump ? pc : label
  setcc
  (neg/subreg)
  label: set a constant
  cond_jump and setcc use the same CC reg and neg/subreg is optional.

  The branch might be converted to a nested if-then-else insn to eliminate
the branch if the insn is supported on target.
 [(set (match_operand:SI 0 "gpc_reg_operand")
 (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand")
  (const_int 0))
  (const_int -1)
  (if_then_else:SI (gt (match_dup 1)
   (const_int 0))
   (const_int 1)
   (const_int 0]

  The patch adds a new optab for the nested if-then-else insn, and adds
help functions in ifcvt.cc to detect the pattern and emit the insn by the
new optab.

  Compared to last version, this version uses a generic function to detect
the candidate branch instead of using a target hook. Also a new optab is
added. The insn is generated by the new optab instead of recog.

  Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no
regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-12-07  Haochen Gui 

gcc/
* ifcvt.cc (noce_emit_ternary_set): New function to emit a conditional
ternary set insn by ternary_set_optab.
(noce_try_ternary_set): Detect conditional ternary set pattern and
call noce_emit_ternary_set to emit the insn.
(noce_process_if_block): Call noce_try_ternary_set to do the
conversion.
* optabs.def (ternary_set_optab): New optab.

patch.diff
diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index eb8efb89a89..8252d9c2dc5 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -1830,6 +1830,44 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, 
enum rtx_code code,
 return NULL_RTX;
 }

+/*  Emit a conditional ternary set insn by its optab.  */
+
+static rtx
+noce_emit_ternary_set (rtx target, enum rtx_code outer_code,
+  enum rtx_code inner_code, rtx cc, int a, int b, int c)
+{
+  rtx outer_comp, inner_comp;
+  machine_mode mode;
+  machine_mode orig_mode = GET_MODE (target);
+  outer_comp = gen_rtx_fmt_ee (outer_code, VOIDmode, cc, const0_rtx);
+  inner_comp = gen_rtx_fmt_ee (inner_code, VOIDmode, cc, const0_rtx);
+
+  class expand_operand ops[7];
+  create_fixed_operand ([1], outer_comp);
+  create_fixed_operand ([3], inner_comp);
+  create_fixed_operand ([2], cc);
+  create_integer_operand ([4], a);
+  create_integer_operand ([5], b);
+  create_integer_operand ([6], c);
+
+  FOR_EACH_MODE_FROM (mode, orig_mode)
+{
+  enum insn_code icode;
+  icode = optab_handler (ternary_set_optab, mode);
+  if (icode != CODE_FOR_nothing)
+   {
+ create_output_operand ([0], target, mode);
+ if (maybe_expand_insn (icode, 7, ops))
+   {
+ if (ops[0].value != target)
+   convert_move (target, ops[0].value, false);
+ return target;
+   }
+   }
+}
+  return NULL_RTX;
+}
+
 /* Try only simple constants and registers here.  More complex cases
are handled in noce_try_cmove_arith after noce_try_store_flag_arith
has had a go at it.  */
@@ -2987,6 +3025,160 @@ noce_try_bitop (struct noce_if_info *if_info)
   return TRUE;
 }

+/* Try to find pattern "a < b ? -1 : (a > b ? 1 : 0)" and convert it to
+   a conditional ternary set insn.  It commonly has following pattern.
+   cond_jump
+   setcc
+   (neg/subreg)
+   label: const_set
+   cond_jump and setcc use the same CC reg.  There may be a neg insn after
+   the setcc insn to negative the result of setcc, and a subreg insn after
+   the setcc insn to convert the mode.
+
+   The pattern can't be optimized by combine pass due to the branch and
+   limitation on the number of insns.
+*/
+
+static int
+noce_try_ternary_set (struct noce_if_info *if_info)
+{
+  machine_mode orig_mode = GET_MODE (if_info->x);
+  machine_mode mode;
+  int have_ternary_set = 0;
+
+  FOR_EACH_MODE_FROM (mode, orig_mode)
+{
+  if (direct_optab_handler (ternary_set_optab, mode) != CODE_FOR_nothing)
+   {
+ have_ternary_set = 1;
+ break;
+   }
+}
+
+  if (!have_ternary_set)
+return FALSE;
+
+  if (!if_info->then_bb || !if_info->else_bb)
+return FALSE;
+
+  if (!if_info->then_simple && !if_info->else_simple)
+return FALSE;
+
+  rtx cc;
+  basic_block target_bb;
+  int int1, int2, int3;
+
+  cc = SET_DEST (PATTERN (if_info->cond_earliest));
+  if (GET_MODE_CLASS (GET_MODE (cc)) != MODE_CC)
+return FALSE;
+
+  /* One arm should be a constant set.  */
+
+  if (CONST_INT_P (if_info->a))
+{
+  int1 = INTVAL (if_info->a);
+  target_bb = if_info->else_bb;
+}
+  else if (CONST_INT_P 

[PATCH v3, rs6000] Enable have_cbranchcc4 on rs6000

2022-12-05 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch enables "have_cbranchcc4" on rs6000 by defining
a "cbranchcc4" expander. "have_cbrnachcc4" is a flag in ifcvt.cc
to indicate if branch by CC bits is invalid or not. With this
flag enabled, some branches can be optimized to conditional
moves.

  The patch relies on the former patch which is under review.
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607810.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no
regressions. Is this okay for trunk? Any recommendations? Thanks
a lot.

Thanks
Gui Haochen

ChangeLog
2022-12-06  Haochen Gui 

gcc/
* config/rs6000/rs6000.md (cbranchcc4): New expander.

gcc/testsuite
* gcc.target/powerpc/cbranchcc4.c: New.
* gcc.target/powerpc/cbranchcc4-1.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e9e5cd1e54d..d7ddd96cc70 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -11932,6 +11932,16 @@ (define_expand "cbranch4"
   DONE;
 })

+(define_expand "cbranchcc4"
+  [(set (pc)
+   (if_then_else (match_operator 0 "branch_comparison_operator"
+   [(match_operand 1 "cc_reg_operand")
+(match_operand 2 "zero_constant")])
+ (label_ref (match_operand 3))
+ (pc)))]
+  ""
+  "")
+
 (define_expand "cstore4_signed"
   [(use (match_operator 1 "signed_comparison_operator"
  [(match_operand:P 2 "gpc_reg_operand")
diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c 
b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c
new file mode 100644
index 000..3c8286bf091
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" */
+
+/* This case should be successfully compiled after cbranchcc4 is enabled.  It
+   generates a "*cbranch_2insn" insn which makes predicate check of cbranchcc4
+   failed and returns a NULL rtx from prepare_cmp_insn.  */
+
+int foo (double d)
+{
+  if (d == 0.0)
+return 0;
+
+  d = ((d) >= 0 ? (d) : -(d));
+
+  if (d < 1.0)
+return 1;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c 
b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c
new file mode 100644
index 000..528ba1a878d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ce1" } */
+/* { dg-final { scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */
+
+/* The inner branch should be detected by ifcvt then be converted to a setcc
+   with a plus by noce_try_store_flag_constants.  */
+
+int test (unsigned int a, unsigned int b)
+{
+return (a < b ? 0 : (a > b ? 2 : 1));
+}


Re: [PATCH v2] Return a NULL rtx when targets don't support cbranchcc4 or predicate check fails in prepare_cmp_insn

2022-12-05 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2022/12/5 15:31, Richard Biener 写道:
> I wonder if you have a testcase you can add showing this change is
> worthwhile and
> fixes a bug?

I want to enable cbranchcc4 on rs6000. But not all sub CCmode is
supported on rs6000. So the predicate check(assert) fails and it hits
ICE. I drafted two patches. This one is for the generic code, and
another is for rs6000. If this one is committed, cbranchcc4 can be
enabled on rs6000. Then I can create a testcase and let the predicate
check fail. Right now I can't write a testcase for it as it never
reaches the failure path.

Thanks a lot
Gui Haochen



[PATCH v2] Return a NULL rtx when targets don't support cbranchcc4 or predicate check fails in prepare_cmp_insn

2022-12-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
  It gets an assertion failure when targers don't support cbranchcc4 or
predicate check fails in prepare_cmp_insn. prepare_cmp_insn is a help
function to generate compare rtx, so it should not assume that cbranchcc4
is existing or all sub-CC modes are supported on one target. I think it
should return the NULL rtx when cbranchcc4 is not supported or predicate check
fails, as its callers already check if the return value is null or not for CC
mode. This patch just does the change.

  Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.


ChangeLog
2022-12-05  Haochen Gui 

gcc/
* optabs.cc (prepare_cmp_insn): Return a NULL rtx other than assertion
failure when targets don't have cbranch optab or predicate check
fails.

patch.diff
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 165f8d1fa22..f6d3242479b 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -4484,10 +4484,14 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code 
comparison, rtx size,
 {
   enum insn_code icode = optab_handler (cbranch_optab, CCmode);
   test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y);
-  gcc_assert (icode != CODE_FOR_nothing
-  && insn_operand_matches (icode, 0, test));
-  *ptest = test;
-  return;
+  if (icode != CODE_FOR_nothing
+ && insn_operand_matches (icode, 0, test))
+   {
+ *ptest = test;
+ return;
+   }
+  else
+   goto fail;
 }

   mclass = GET_MODE_CLASS (mode);


[PATCH v5, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-12-01 Thread HAO CHEN GUI via Gcc-patches
Hi,
  For scalar extract/insert instructions, exponent field can be stored in a
32-bit register. So this patch changes the mode of exponent field from DI to
SI so that these instructions can be generated in a 32-bit environment. Also
it removes TARGET_64BIT check for these instructions.

  The instructions using DI registers can be invoked with -mpowerpc64 in a
32-bit environment. The patch changes insn condition from TARGET_64BIT to
TARGET_POWERPC64 for those instructions.

  This patch also changes prototypes and catagories of relevant built-ins and
effective target checks of test cases.

  Compared to last version, main changes are to remove 64-bit environment
requirement for relevant built-ins in extend.texi. And to change the type of
arguments of relevant built-ins in rs6000-overload.def.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-12-01  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
int and move it from power9-64 to power9 catatlog.
(__builtin_vsx_scalar_extract_sig): Set return type to const unsigned
long long.
(__builtin_vsx_scalar_insert_exp): Set type of second argument to
unsigned int.
(__builtin_vsx_scalar_insert_exp_dp): Set type of second argument to
unsigned int and move it from power9-64 to power9 catatlog.
* config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to
SImode.  Remove TARGET_64BIT from insn condition.
(xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64.
(xsiexpdp): Change insn condition from TARGET_64BIT to
TARGET_POWERPC64.  Set mode of third operand to SImode.
(xsiexpdpf): Set mode of third operand to SImode.  Remove TARGET_64BIT
from insn condition.
* config/rs6000/rs6000-overload.def
(__builtin_vec_scalar_insert_exp): Set type of second argument to
unsigned int.
* doc/extend.texi (scalar_insert_exp): Set type of second argument to
unsigned int and remove 64-bit environment requirement when
significand has a float type.
(scalar_extract_exp): Remove 64-bit environment requirement.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is
invalid now.
* gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check
with has_arch_ppc64. Set type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Set type of exponent
to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Replace lp64 check
with has_arch_ppc64. Set type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Remove lp64 check.
Set type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Set type of exponent to
unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Remove lp64 check. Set
type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Deleted as the case is
invalid now.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..d8d67fa0cad 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2833,6 +2833,11 @@
   const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128);
 TSTSFI_OV_TD dfptstsfi_unordered_td {}

+  const unsigned int __builtin_vsx_scalar_extract_exp (double);
+VSEEDP xsxexpdp {}
+
+  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int);
+VSIEDPF xsiexpdpf {}

 [power9-64]
   void __builtin_altivec_xst_len_r (vsc, void *, long);
@@ -2847,19 +2852,13 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
-VSEEDP xsxexpdp {}
-
-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
-unsigned long long);
+   unsigned int);
 VSIEDP xsiexpdp {}

-  const double 

Re: [PATCH] Add a new conversion for conditional ternary set into ifcvt [PR106536]

2022-12-01 Thread HAO CHEN GUI via Gcc-patches
Hi Nilsson,

在 2022/12/2 10:49, Hans-Peter Nilsson 写道:
> On Wed, 23 Nov 2022, HAO CHEN GUI via Gcc-patches wrote:
> 
>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>> index 92bda1a7e14..9823eccbe68 100644
>> --- a/gcc/doc/tm.texi
>> +++ b/gcc/doc/tm.texi
>> @@ -7094,6 +7094,15 @@ the @code{POLY_VALUE_MIN}, @code{POLY_VALUE_MAX} and
>>  implementation returns the lowest possible value of @var{val}.
>>  @end deftypefn
>>
>> +@deftypefn {Target Hook} bool TARGET_NOCE_TERNARY_CSET_P (struct 
>> noce_if_info *@var{if_info}, rtx *@var{outer_cond}, rtx *@var{inner_cond}, 
>> int *@var{int1}, int *@var{int2}, int *@var{int3})
>> +This hook returns true if the if-then-else-join blocks describled in
> 
> Random typo spotted: "described"
> 
> Also, IMHO needs more explanation (in tm.texi preferably) why 
> this doesn't happen as part of general "combine" machinery.

Thanks for your comments. Combine can't take it as the insns are not in same
block. Also combine has the limitation on the number of insns. I will add
those comments.

Thanks
Gui Haochen

> 
> brgds, H-P


Re: Ping [PATCH] Change the behavior of predicate check failure on cbranchcc4 operand0 in prepare_cmp_insn

2022-11-28 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2022/11/29 2:46, Richard Biener 写道:
> Anyhow - my question still stands - what's the fallback for the callers
> that do not check for failure?  How are we sure we're not running into
> these when relaxing the requirement that a MODE_CC prepare_cmp_insn
> must not fail?

I examed the code and found that currently callers should be fine with
returning a NULL_RTX for MODE_CC processing. The prepare_cmp_insn is called
by following callers.

1 gen_cond_trap which doesn't uses MODE_CC
2 prepare_cmp_insn itself where is after MODE_CC processing, so it never
hits MODE_CC
3 emit_cmp_and_jump_insns which doesn't uses MODE_CC
4 emit_conditional_move which checks the output is null or not
5 emit_conditional_add which checks the output is null or not

Not sure if I missed something. Looking forward to your advice.

Thanks a lot
Gui Haochen



Ping [PATCH] Change the behavior of predicate check failure on cbranchcc4 operand0 in prepare_cmp_insn

2022-11-27 Thread HAO CHEN GUI via Gcc-patches
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607083.html
Thanks
Gui Haochen

在 2022/11/23 10:54, HAO CHEN GUI 写道:
> Hi,
>   I want to enable "have_cbranchcc4" on rs6000. But not all combinations of
> comparison codes and sub CC modes are benefited to generate cbranchcc4 insns
> on rs6000. There is an predicate for operand0 of cbranchcc4 to bypass
> some combinations. It gets assertion failure in prepare_cmp_insn. I think
> we shouldn't suppose that all comparison codes and sub CC modes are supported
> and throw an assertion failure in prepare_cmp_insn. It might check the
> predicate and go to fail if the predicate can't be satisfied. This patch
> changes the behavior of those codes.
> 
>   Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no 
> regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> 
> ChangeLog
> 2022-11-23  Haochen Gui 
> 
> gcc/
>   * optabs.cc (prepare_cmp_insn): Go to fail other than assert it when
>   predicate check of "cbranchcc4" operand[0] fails.
> 
> patch.diff
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 165f8d1fa22..3ec8f6b17ba 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -4484,8 +4484,9 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code 
> comparison, rtx size,
>  {
>enum insn_code icode = optab_handler (cbranch_optab, CCmode);
>test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y);
> -  gcc_assert (icode != CODE_FOR_nothing
> -  && insn_operand_matches (icode, 0, test));
> +  gcc_assert (icode != CODE_FOR_nothing);
> +  if (!insn_operand_matches (icode, 0, test))
> + goto fail;
>*ptest = test;
>return;
>  }


Re: [PATCH] Add a new conversion for conditional ternary set into ifcvt [PR106536]

2022-11-23 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,


在 2022/11/24 4:06, Richard Biener 写道:
> Wouldn't we usually either add an optab or try to recog a canonical
> RTL form instead of adding a new target hook for things like this?

Thanks so much for your comments. Please let me make it clear.

Do you mean we should create an optab for "setb" pattern (the nested
if-then-else insn) and detect candidate insns in ifcvt pass? Then
generate the insn with the new optab?

My concern is that some candidate insns are target specific. For
example, different modes cause additional zero_extend or subreg insns
generated on different targets. So I put the detection process into a
target hook.

Looking forward to your advice.

Thanks again
Gui Haochen


[PATCH] Add a new conversion for conditional ternary set into ifcvt [PR106536]

2022-11-22 Thread HAO CHEN GUI via Gcc-patches
Hi,
  There is a new insn on my target, which has a nested if_then_else and
set -1, 0 and 1 according to a comparison.

   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y")
  (const_int 0))
  (const_int -1)
  (if_then_else (gt (match_dup 1)
(const_int 0))
(const_int 1)
(const_int 0]

  In ifcvt pass, it probably contains a comparison, a branch, a setcc
and a constant set.

8: r122:CC=cmp(r120:DI#0,r121:DI#0)
9: pc={(r122:CC<0)?L29:pc}

   14: r118:SI=r122:CC>0

   29: L29:
5: r118:SI=0x

  This patch adds the new conversion into ifcvt and convert this kind of
branch into a nested if-then-else insn if the target supports such
pattern.

  HAVE_ternary_conditional_set indicates if the target has such nested
if-then-else insn. It's set in genconfig. noce_try_ternary_cset will be
executed to detect suitable pattern and convert it to the nested
if-then-else insn if HAVE_ternary_conditional_set is set. The hook
TARGET_NOCE_TERNARY_CSET_P detects target specific pattern and output
conditions and setting integers for the nested if-then-else.

  Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no
regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-11-23  Haochen Gui 

gcc/
* doc/tm.texi: Regenerate.
* doc/tm.texi.in (TARGET_NOCE_TERNARY_CSET_P): Document new hook.
* genconfig.cc (have_ternary_cset_flag): New.
(walk_insn_part): Detect nested if-then-else with const_int setting
and set have_ternary_cset_flag.
(HAVE_ternary_conditional_set): Define.
* ifcvt.cc (noce_emit_ternary_cset): New function to emit nested
if-then-else insns.
(noce_try_ternary_cset): Detect ternary conditional set and emit the
insn.
(noce_process_if_block): Try to do ternary condition set convertion
when a target supports ternary conditional set insn.
* target.def (noce_ternary_cset_p): New hook.
* targhooks.cc (default_noce_ternary_cset_p): New function.
* targhooks.h (default_noce_ternary_cset_p): New declare.


patch.diff
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 92bda1a7e14..9823eccbe68 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -7094,6 +7094,15 @@ the @code{POLY_VALUE_MIN}, @code{POLY_VALUE_MAX} and
 implementation returns the lowest possible value of @var{val}.
 @end deftypefn

+@deftypefn {Target Hook} bool TARGET_NOCE_TERNARY_CSET_P (struct noce_if_info 
*@var{if_info}, rtx *@var{outer_cond}, rtx *@var{inner_cond}, int *@var{int1}, 
int *@var{int2}, int *@var{int3})
+This hook returns true if the if-then-else-join blocks describled in
+@code{if_info} can be converted to a ternary conditional set implemented by
+a nested if-then-else insn.  The @code{int1}, @code{int2} and @code{int3}
+are three possible results of the nested if-then-else insn.
+@code{outer_cond} and @code{inner_cond} are the conditions for outer and
+if-then-else.
+@end deftypefn
+
 @node Scheduling
 @section Adjusting the Instruction Scheduler

diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 112462310b1..1d6f28cc50a 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4631,6 +4631,8 @@ Define this macro if a non-short-circuit operation 
produced by

 @hook TARGET_ESTIMATED_POLY_VALUE

+@hook TARGET_NOCE_TERNARY_CSET_P
+
 @node Scheduling
 @section Adjusting the Instruction Scheduler

diff --git a/gcc/genconfig.cc b/gcc/genconfig.cc
index b7c6b48eec6..902c832cf5a 100644
--- a/gcc/genconfig.cc
+++ b/gcc/genconfig.cc
@@ -33,6 +33,7 @@ static int max_recog_operands;  /* Largest operand number 
seen.  */
 static int max_dup_operands;/* Largest number of match_dup in any insn.  */
 static int max_clobbers_per_insn;
 static int have_cmove_flag;
+static int have_ternary_cset_flag;
 static int have_cond_exec_flag;
 static int have_lo_sum_flag;
 static int have_rotate_flag;
@@ -136,6 +137,12 @@ walk_insn_part (rtx part, int recog_p, int non_pc_set_src)
  && GET_CODE (XEXP (part, 1)) == MATCH_OPERAND
  && GET_CODE (XEXP (part, 2)) == MATCH_OPERAND)
have_cmove_flag = 1;
+  else if (recog_p && non_pc_set_src
+  && GET_CODE (XEXP (part, 1)) == CONST_INT
+  && GET_CODE (XEXP (part, 2)) == IF_THEN_ELSE
+  && GET_CODE (XEXP (XEXP (part, 2), 1)) == CONST_INT
+  && GET_CODE (XEXP (XEXP (part, 2), 2)) == CONST_INT)
+   have_ternary_cset_flag = 1;
   break;

 case COND_EXEC:
@@ -328,6 +335,11 @@ main (int argc, const char **argv)
   else
 printf ("#define HAVE_conditional_move 0\n");

+  if (have_ternary_cset_flag)
+printf ("#define HAVE_ternary_conditional_set 1\n");
+  else
+printf 

[PATCH] Change the behavior of predicate check failure on cbranchcc4 operand0 in prepare_cmp_insn

2022-11-22 Thread HAO CHEN GUI via Gcc-patches
Hi,
  I want to enable "have_cbranchcc4" on rs6000. But not all combinations of
comparison codes and sub CC modes are benefited to generate cbranchcc4 insns
on rs6000. There is an predicate for operand0 of cbranchcc4 to bypass
some combinations. It gets assertion failure in prepare_cmp_insn. I think
we shouldn't suppose that all comparison codes and sub CC modes are supported
and throw an assertion failure in prepare_cmp_insn. It might check the
predicate and go to fail if the predicate can't be satisfied. This patch
changes the behavior of those codes.

  Bootstrapped and tested on powerpc64-linux BE/LE and x86 with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.


ChangeLog
2022-11-23  Haochen Gui 

gcc/
* optabs.cc (prepare_cmp_insn): Go to fail other than assert it when
predicate check of "cbranchcc4" operand[0] fails.

patch.diff
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 165f8d1fa22..3ec8f6b17ba 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -4484,8 +4484,9 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, 
rtx size,
 {
   enum insn_code icode = optab_handler (cbranch_optab, CCmode);
   test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y);
-  gcc_assert (icode != CODE_FOR_nothing
-  && insn_operand_matches (icode, 0, test));
+  gcc_assert (icode != CODE_FOR_nothing);
+  if (!insn_operand_matches (icode, 0, test))
+   goto fail;
   *ptest = test;
   return;
 }


Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000

2022-11-21 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,

Thanks for your comments.

在 2022/11/22 7:49, Segher Boessenkool 写道:
> *cbranch_2insn is not a machine insn.  It generates a cror and a branch
> insn.  This makes no sense to have in a cbranchcc: those do a branch
> based on an existing cr field, so based on the *output* of that cror.
> 
> If ifcvt requires differently, ifcvt needs fixing.
> 
I have a question here.
For rs6000, "*cbranch_2insn" should not be generated by cbranch_optab?
I mean it gets icode from cbranch_optab and generates insn from this
icode. If so, the predicate of cbranchcc4 should be checked every time
before insn generation other than just doing an assertion.

> We want to use the output of the cror multiple times, not generate more
> cror insns.
> 
> I don't think the behaviour of ifcvt is correct here at all, no.  It
> also does not consider the cost of the code as far as I can see?  That
> could reduce the impact of this problem at least.
ifcvt tries to generate the converted sequence. Then it compares the cost
of new sequence to the cost of orginial. If it benefits, the conversion
will be done.

Thanks
Gui Haochen


Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000

2022-11-21 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen,

在 2022/11/22 11:11, Kewen.Lin 写道:
> Maybe we can adjust prepare_cmp_insn to fail if the constructed cbranchcc4
> pattern doesn't satisfy the predicate of operand 0 rather than to assert.
> It's something like:
> 
> if (!insn_operand_matches (icode, 0, test))
>   goto fail;
> 
> or only assign and return if insn_operand_matches (icode, 0, test).
> 
> The code makes the assumption that all this kind of cbranchcc4 patterns
> should match what target defines for cbranchcc4 optab, but unfortunately
> it's not sure for our port and I don't see how it should be.

Thanks for your comments.

I just drafted a patch to let it go to "fail" when predicate of operand 0 is
not satisfied. It works and passed bootstrap on ppc64le. But my concern is
prepare_cmp_insn is a generic function and is used to create a cmp rtx. It
is not only called by emit_conditional* (finally called by ifcvt) but other
functions (even new functions). If we change the logical in prepare_cmp_insn,
we may lost some potential optimization. After all, the branch_2insn is a valid
insn.

I think the essential of the problem is we want to exclude those comparisons
(from cbranchcc4 used in ifcvt) which need two CC bits. So, we can change the
logical of ifcvt - add an additional check with predicate of operand 0 when
checking the have_cbranchcc4 flag in ifcvt.

What's your opinion?

Thanks
Gui Haochen





Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000

2022-11-20 Thread HAO CHEN GUI via Gcc-patches
Hi Segher,

在 2022/11/18 20:18, Segher Boessenkool 写道:
> I don't think we should pretend we have any conditional jumps the
> machine does not actually have, in cbranchcc4.  When would this ever be
> useful?  cror;beq can be quite expensive, compared to the code it would
> replace anyway.
> 
> If something generates those here (which then ICEs later), that is
> wrong, fix *that*?  Is it ifcvt doing it?

"*cbranch_2insn" is a valid insn for rs6000. So it generates such insn
at expand pass. The "prepare_cmp_insn" called by ifcvt just wants to verify
that the following comparison rtx is valid.

(unlt (reg:CCFP 156)
(const_int 0 [0]))

It should be valid as it's extracted from an existing insn. It hits ICE only
when the comparison rtx can't pass the predicate check of "cbranchcc4". So
"cbranchcc4" should include "extra_insn_branch_comparison_operator".

Then, ifcvt tries to call emit_conditional_move_1 to generates a condition
move for FP mode. It definitely fails as there is no conditional move insn for
FP mode in rs6000. The behavior of ifcvt is correct. It tries to do conversion
but fails. It won't hit ICEs after cbranchcc4 is correctly defined.

Actually, "*cbranch_2insn" has the same logical as float "*cbranch" in ifcvt.
Both of them get a final false return from "rs6000_emit_int_cmove" as rs6000
doesn't have conditional move for FP mode.

So I think "cbranchcc4" should include "extra_insn_branch_comparison_operator"
as "*cbranch_2insn" is a valid insn. Just let ifcvt decide a conditional
move is valid or not.

Thanks
Gui Haochen


Re: [PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000

2022-11-17 Thread HAO CHEN GUI via Gcc-patches
Hi David,

在 2022/11/17 21:24, David Edelsohn 写道:
> This is better, but the pattern should be near and after the existing 
> cbranch4 patterns earlier in the file, not the *cbranch pattern.  It 
> doesn't match the comment.
Sure, I will put it after existing "cbranch4" patterns.

> 
> Why are you using zero_constant predicate instead of matching (const_int 0) 
> for operand 2?
The "const_int 0" is an operand other than a predicate. We need a predicate 
here.

> 
> Why does this need the new all_branch_comparison_operator?  Can the ifcvt 
> optimization correctly elide the 2 insn sequence?
Because rs6000 defines "*cbranch_2insn" insn, such insns are generated after 
expand.

(jump_insn 50 47 51 11 (set (pc)
(if_then_else (ge (reg:CCFP 156)
(const_int 0 [0]))
(label_ref 53)
(pc))) 
"/home/guihaoc/gcc/gcc-mainline-base/gmp/mpz/cmpabs_d.c":80:7 884 
{*cbranch_2insn}
 (expr_list:REG_DEAD (reg:CCFP 156)
(int_list:REG_BR_PROB 633507684 (nil)))
 -> 53)

In prepare_cmp_insn, the comparison is verified by insn_operand_matches. If
extra_insn_branch_comparison_operator is not included in "cbranchcc4" predicate,
it hits ICE here.

  if (GET_MODE_CLASS (mode) == MODE_CC)
{
  enum insn_code icode = optab_handler (cbranch_optab, CCmode);
  test = gen_rtx_fmt_ee (comparison, VOIDmode, x, y);
  gcc_assert (icode != CODE_FOR_nothing
  && insn_operand_matches (icode, 0, test));
  *ptest = test;
  return;
}

The real conditional move is generated by emit_conditional_move_1. Commonly
"*cbranch_2insn" can't be optimized out and it returns NULL_RTX.

  if (COMPARISON_P (comparison))
{
  saved_pending_stack_adjust save;
  save_pending_stack_adjust ();
  last = get_last_insn ();
  do_pending_stack_adjust ();
  machine_mode cmpmode = comp.mode;
  prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1),
GET_CODE (comparison), NULL_RTX, unsignedp,
OPTAB_WIDEN, , );
  if (comparison)
{
   rtx res = emit_conditional_move_1 (target, comparison,
  op2, op3, mode);
   if (res != NULL_RTX)
 return res;
}
  delete_insns_since (last);
  restore_pending_stack_adjust ();

I think that extra_insn_branch_comparison_operator should be included in
"cbranchcc4" predicates as such insns exist. And leave it to
emit_conditional_move which decides whether it can be optimized or not.

Thanks for your comments
Gui Haochen


[PATCHv2, rs6000] Enable have_cbranchcc4 on rs6000

2022-11-16 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch enables have_cbrnachcc4 which is a flag in ifcvt.cc to
indicate if branch by CC bits is invalid or not. The new expand pattern
"cbranchcc4" is created which intend to match the pattern defined in
"*cbranch", "*cbranch_2insn" and "*creturn". The operand sequence in
"cbranchcc4" is inline with the definition in gccint. And the operand
sequence doesn't matter in pattern matching. So I think it should work.

  Compared to last version, one new predicate and one new expander are
created.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-11-17  Haochen Gui 

gcc/
* config/rs6000/predicates.md (all_branch_comparison_operator): New,
and includes operators in branch_comparison_operator and
extra_insn_branch_comparison_operator.
* config/rs6000/rs6000.md (cbranchcc4): New expand pattern.

gcc/testsuite/
* gcc.target/powerpc/cbranchcc4.c: New.


patch.diff
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b1fcc69bb60..843b6f39b84 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1308,6 +1308,7 @@ (define_special_predicate "equality_operator"

 ;; Return 1 if OP is a comparison operation that is valid for a branch
 ;; instruction.  We check the opcode against the mode of the CC value.
+
 ;; validate_condition_mode is an assertion.
 (define_predicate "branch_comparison_operator"
(and (match_operand 0 "comparison_operator")
@@ -1331,6 +1332,11 @@ (define_predicate "extra_insn_branch_comparison_operator"
  GET_MODE (XEXP (op, 0))),
 1")))

+;; Return 1 if OP is a comparison operation that is valid for a branch.
+(define_predicate "all_branch_comparison_operator"
+   (ior (match_operand 0 "branch_comparison_operator")
+   (match_operand 0 "extra_insn_branch_comparison_operator")))
+
 ;; Return 1 if OP is an unsigned comparison operator.
 (define_predicate "unsigned_comparison_operator"
   (match_code "ltu,gtu,leu,geu"))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e9e5cd1e54d..7b7d747a85d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -13067,6 +13067,16 @@ (define_insn_and_split "*_cc"
 ;; Conditional branches.
 ;; These either are a single bc insn, or a bc around a b.

+(define_expand "cbranchcc4"
+  [(set (pc)
+   (if_then_else (match_operator 0 "all_branch_comparison_operator"
+   [(match_operand 1 "cc_reg_operand")
+(match_operand 2 "zero_constant")])
+ (label_ref (match_operand 3))
+ (pc)))]
+  ""
+  "")
+
 (define_insn "*cbranch"
   [(set (pc)
(if_then_else (match_operator 1 "branch_comparison_operator"
diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c 
b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c
new file mode 100644
index 000..528ba1a878d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ce1" } */
+/* { dg-final { scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */
+
+/* The inner branch should be detected by ifcvt then be converted to a setcc
+   with a plus by noce_try_store_flag_constants.  */
+
+int test (unsigned int a, unsigned int b)
+{
+return (a < b ? 0 : (a > b ? 2 : 1));
+}


Re: [rs6000, patch] Enable have_cbranchcc4 on rs6000

2022-11-15 Thread HAO CHEN GUI via Gcc-patches
Hi David,
  I found definition of the operands in 'cbranch'. The argumnets matters.
I will create a new expand pattern for cbranchcc4. Thanks a lot for your
comments.

'cbranchmode4’
Conditional branch instruction combined with a compare instruction.
Operand 0 is a comparison operator. Operand 1 and operand 2 are the
first and second operands of the comparison, respectively. Operand 3
is the code_label to jump to.

Gui Haochen
Thanks

在 2022/11/16 11:04, David Edelsohn 写道:
> It's great to add cbranchcc4 to the Power port where it definitely was an 
> omission, but adapting *cbranch for that purpose is the wrong approach.  The 
> changes to the pattern are incorrect because they are covering up a 
> difference in ordering of the operands.  One can argue that the named pattern 
> only enables the functionality in ifcvt and the pattern otherwise is used in 
> its previous role.  But this is a Frankenstein monster approach.  You're 
> trying to twist the existing pattern so that it triggers as cbranchcc4, but 
> creating a pattern that messes up its arguments and only works because the 
> new, named pattern never is called.  This is too ugly.  Please fix.


[rs6000, patch] Enable have_cbranchcc4 on rs6000

2022-11-15 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch enables have_cbrnachcc4 which is a flag in ifcvt.cc to
indicate if branch by CC bits is invalid or not. As rs6000 already has
"*cbranch" insn which does branching according to CC bits, the flag
should be enabled and relevant branches can be optimized out. The test
case illustrates the optimization.

  "*cbranch" is an anonymous insn which can't be generated directly.
So changing "const_int 0" to the third operand predicated by
"zero_constant" won't cause ICEs as orginal patterns still can be matched.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.


ChangeLog
2022-11-16  Haochen Gui 

gcc/
* config/rs6000/rs6000.md (*cbranch): Rename to...
(cbranchcc4): ...this, and set const_int 0 to the third operand.

gcc/testsuite/
* gcc.target/powerpc/cbranchcc4.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e9e5cd1e54d..ee171f21f6a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -13067,11 +13067,11 @@ (define_insn_and_split "*_cc"
 ;; Conditional branches.
 ;; These either are a single bc insn, or a bc around a b.

-(define_insn "*cbranch"
+(define_insn "cbranchcc4"
   [(set (pc)
(if_then_else (match_operator 1 "branch_comparison_operator"
  [(match_operand 2 "cc_reg_operand" "y")
-  (const_int 0)])
+  (match_operand 3 "zero_constant")])
  (label_ref (match_operand 0))
  (pc)))]
   ""
diff --git a/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c 
b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c
new file mode 100644
index 000..1751d274bbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cbranchcc4.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ce1" } */
+/* { dg-final {scan-rtl-dump "noce_try_store_flag_constants" "ce1" } } */
+
+int test (unsigned int a, unsigned int b)
+{
+return (a < b ? 0 : (a > b ? 2 : 1));
+}


[PATCH v4, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-11-06 Thread HAO CHEN GUI via Gcc-patches
Hi,
  For scalar extract/insert instructions, exponent field can be stored in a
32-bit register. So this patch changes the mode of exponent field from DI to
SI. So these instructions can be generated in a 32-bit environment. The patch
removes TARGET_64BIT check for these instructiions.

  The instructions using DI registers can be invoked with -mpowerpc64 in a
32-bit environment. The patch changes insn condition from TARGET_64BIT to
TARGET_POWERPC64 for those instructions.

  This patch also changes prototypes and catagories of relevant built-ins and
effective target checks of test cases.

  Compared to last version, main changes are to set catagories of relevant
built-ins from power9-64 to power9 and remove some unnecessary test cases.
Last version: 
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601196.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.


ChangeLog
2022-11-07  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
int and move it from power9-64 to power9 catatlog.
(__builtin_vsx_scalar_extract_sig): Set return type to const unsigned
long long.
(__builtin_vsx_scalar_insert_exp): Set type of second argument to
unsigned int.
(__builtin_vsx_scalar_insert_exp_dp): Set type of second argument to
unsigned int and move it from power9-64 to power9 catatlog.
* config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to
SImode.  Remove TARGET_64BIT from insn condition.
(xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64.
(xsiexpdp): Change insn condition from TARGET_64BIT to
TARGET_POWERPC64.  Set mode of third operand to SImode.
(xsiexpdpf): Set mode of third operand to SImode.  Remove TARGET_64BIT
from insn condition.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as case is
invalid now.
* gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check
with has_arch_ppc64. Set type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Set type of exponent
to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Replace lp64 check
with has_arch_ppc64. Set type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Remove lp64 check.
Set type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Set type of exponent to
unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Remove lp64 check. Set
type of exponent to unsigned int.
* gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Deleted as case is
invalid now.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..d8d67fa0cad 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2833,6 +2833,11 @@
   const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128);
 TSTSFI_OV_TD dfptstsfi_unordered_td {}

+  const unsigned int __builtin_vsx_scalar_extract_exp (double);
+VSEEDP xsxexpdp {}
+
+  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int);
+VSIEDPF xsiexpdpf {}

 [power9-64]
   void __builtin_altivec_xst_len_r (vsc, void *, long);
@@ -2847,19 +2852,13 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
-VSEEDP xsxexpdp {}
-
-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
-unsigned long long);
+   unsigned int);
 VSIEDP xsiexpdp {}

-  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long);
-VSIEDPF xsiexpdpf {}
-
   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
 XL_LEN_R xl_len_r {}

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..9d3a2340a79 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5095,10 +5095,10 @@ (define_insn "xsxexpqp_"


[PATCH-2, rs6000] Reverse V8HI on Power8 by vector rotation [PR100866]

2022-10-23 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements V8HI byte reverse on Power8 by vector rotation.
It should be effecient than orignial vector permute. The patch comes from
Xionghu's comments in PR. I just added a test case for it.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.



ChangeLog
2022-10-24  Xionghu Luo 

gcc/
PR target/100866
* config/rs6000/altivec.md: (*altivec_vrl): Named to...
(altivec_vrl): ...this.
* config/rs6000/vsx.md (revb_): Call vspltish and vrlh when
target is Power8 and mode is V8HI.

gcc/testsuite/
PR target/100866
* gcc.target/powerpc/pr100866-2.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2c4940f2e21..84660073f32 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1875,7 +1875,7 @@ (define_insn "altivec_vpkuum_direct"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vrl"
+(define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
(match_operand:VI2 2 "register_operand" "v")))]
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..34662a7252d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6092,12 +6092,21 @@ (define_expand "revb_"
 emit_insn (gen_p9_xxbr_ (operands[0], operands[1]));
   else
 {
-  /* Want to have the elements in reverse order relative
-to the endian mode in use, i.e. in LE mode, put elements
-in BE order.  */
-  rtx sel = swap_endian_selector_for_mode(mode);
-  emit_insn (gen_altivec_vperm_ (operands[0], operands[1],
-  operands[1], sel));
+  if (mode == V8HImode)
+   {
+ rtx splt = gen_reg_rtx (V8HImode);
+ emit_insn (gen_altivec_vspltish (splt, GEN_INT (8)));
+ emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt));
+   }
+  else
+   {
+ /* Want to have the elements in reverse order relative
+to the endian mode in use, i.e. in LE mode, put elements
+in BE order.  */
+ rtx sel = swap_endian_selector_for_mode (mode);
+ emit_insn (gen_altivec_vperm_ (operands[0], operands[1],
+  operands[1], sel));
+   }
 }

   DONE;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100866-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr100866-2.c
new file mode 100644
index 000..4357d1beb09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100866-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-final { scan-assembler {\mvspltish\M} } } */
+/* { dg-final { scan-assembler {\mvrlh\M} } } */
+
+#include 
+
+vector unsigned short revb(vector unsigned short a)
+{
+   return vec_revb(a);
+}
+


[PATCH-1, rs6000] Generate permute index directly for little endian target [PR100866]

2022-10-11 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies the help function which generates permute index for
vector byte reversion and generates permute index directly for little endian
targets. It saves one "xxlnor" instructions on P8 little endian targets as
the original process needs an "xxlnor" to calculate complement for the index.

Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-10-11  Haochen Gui 

gcc/
PR target/100866
* config/rs6000/rs6000-call.cc (swap_endian_selector_for_mode):
Generate permute index directly for little endian targets.
* config/rs6000/vsx.md (revb_): Call vprem directly with
corresponding permute indexes.

gcc/testsuite/
PR target/100866
* gcc.target/powerpc/pr100866.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index 551968b0995..bad8e9e0e52 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -2839,7 +2839,10 @@ swap_endian_selector_for_mode (machine_mode mode)
 }

   for (i = 0; i < 16; ++i)
-perm[i] = GEN_INT (swaparray[i]);
+if (BYTES_BIG_ENDIAN)
+  perm[i] = GEN_INT (swaparray[i]);
+else
+  perm[i] = GEN_INT (~swaparray[i] & 0x001f);

   return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
 gen_rtvec_v (16, perm)));
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e226a93bbe5..b68eba48d2c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6096,8 +6096,8 @@ (define_expand "revb_"
 to the endian mode in use, i.e. in LE mode, put elements
 in BE order.  */
   rtx sel = swap_endian_selector_for_mode(mode);
-  emit_insn (gen_altivec_vperm_ (operands[0], operands[1],
-  operands[1], sel));
+  emit_insn (gen_altivec_vperm__direct (operands[0], operands[1],
+ operands[1], sel));
 }

   DONE;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100866.c 
b/gcc/testsuite/gcc.target/powerpc/pr100866.c
new file mode 100644
index 000..c708dfd502e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100866.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-final { scan-assembler-not "xxlnor" } } */
+
+#include 
+
+vector unsigned short revb(vector unsigned short a)
+{
+   return vec_revb(a);
+}



[PATCH v7, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-09-25 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max when fast-math is not set. While fast-math is set, xs[min/max]dp
are folded to MIN/MAX_EXPR in gimple, and finally expanded to smin/max.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-09-26 Haochen Gui 

gcc/
PR target/103605
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Gimple
fold RS6000_BIF_XSMINDP and RS6000_BIF_XSMAXDP when fast-math is set.
* config/rs6000/rs6000.md (FMINMAX): New int iterator.
(minmax_op): New int attribute.
(UNSPEC_FMAX, UNSPEC_FMIN): New unspecs.
(f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
pattern to fmaxdf3.
(__builtin_vsx_xsmindp): Set pattern to fmindf3.

gcc/testsuite/
PR target/103605
* gcc.dg/powerpc/pr103605.h: New.
* gcc.dg/powerpc/pr103605-1.c: New.
* gcc.dg/powerpc/pr103605-2.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index e925ba9fad9..944ae9fe55c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1588,6 +1588,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   gimple_set_location (g, gimple_location (stmt));
   gsi_replace (gsi, g, true);
   return true;
+/* fold into MIN_EXPR when fast-math is set.  */
+case RS6000_BIF_XSMINDP:
 /* flavors of vec_min.  */
 case RS6000_BIF_XVMINDP:
 case RS6000_BIF_XVMINSP:
@@ -1614,6 +1616,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   gimple_set_location (g, gimple_location (stmt));
   gsi_replace (gsi, g, true);
   return true;
+/* fold into MAX_EXPR when fast-math is set.  */
+case RS6000_BIF_XSMAXDP:
 /* flavors of vec_max.  */
 case RS6000_BIF_XVMAXDP:
 case RS6000_BIF_XVMAXSP:
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..ae0dd98f0f9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
+ (match_operand:SFDF 2 "vsx_register_operand" "wa")]
+FMINMAX))]
+  "TARGET_VSX && !flag_finite_math_only"
+  "xsdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c
new file mode 100644
index 000..923deec6a1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605-1.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include "pr103605.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605-2.c
new file mode 100644
index 000..f50fe9468f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx -ffast-math" } */
+/* { dg-final { scan-assembler-times {\mxsmaxcdp\M} 3 { target has_arch_pwr9 } 
} } */
+/* { dg-final { scan-assembler-times {\mxsmincdp\M} 3 { target has_arch_pwr9 } 
} } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 { target { ! 
has_arch_pwr9 } } } } */

Re: [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-09-22 Thread HAO CHEN GUI via Gcc-patches
Hi Kewen & Segher,

Thanks so much for your review comments.

On 22/9/2022 上午 10:28, Kewen.Lin wrote:
> on 2022/9/22 05:56, Segher Boessenkool wrote:
>> Hi!
>>
>> On Fri, Jun 24, 2022 at 10:02:19AM +0800, HAO CHEN GUI wrote:
>>>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
>>> of smin/max. So the builtins always generate xs[min/max]dp on all
>>> platforms.
>>
>> But how does this not blow up with -ffast-math?
> 
> Indeed.  Since it guards with "TARGET_VSX && !flag_finite_math_only",
> the bifs seem to cause ICE at -ffast-math.
> 
> Haochen, could you double check it?
I tested it with "-ffast-math". fmin/max functions are converted to
MIN/MAX_EXPR in gimple lower pass. But the built-ins are not and hit the
ICE. I thought the built-ins are folded to MIN/MAX_EXPR like vec_ versions'
when fast-math is set. In fact they're not. Sorry for that.

I made a patch to fold these two built-ins to MIN/MAX_EXPR when fast-math
is set. Then the built-ins are converted to MIN/MAX_EXPR and expanded to
smin/max.

Thanks for pointing out the problem!

> 
>>
>> In the other direction I am worried that the unspecs will degrade
>> performance (relative to smin/smax) when -ffast-math *is* active (and
>> this new builtin code and pattern doesn't blow up).
> 
> For fmin/fmax it would be fine, since they are transformed to {MAX,MIN}
> EXPR in middle end, and yes, it can degrade for the bifs, although IMHO
> the previous expansion to smin/smax contradicts with the bif names (users
> expect to map them to xs{min,max}dp than others).
> 
>>
>> I still think we should get RTL codes for this, to have access to proper
>> floating point min/max semantics always and everywhere.  "fmin" and
>> "fmax" seem to be good names :-)
> 
> It would be good, especially if we have observed some uses of these bifs
> and further opportunities around them.  :)
> 
Shall we submit a PR to add fmin/fmax to RTL codes?

> BR,
> Kewen


Ping [PATCH v3, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-09-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
 Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601196.html
Thanks.

On 7/9/2022 下午 3:44, HAO CHEN GUI wrote:
> Hi,
> 
>   For scalar extract/insert instructions, exponent field can be stored in a
> 32-bit register. So this patch changes the mode of exponent field from DI to
> SI. The instructions using DI registers can be invoked with -mpowerpc64 in a
> 32-bit environment. The patch changes insn condition from TARGET_64BIT to
> TARGET_POWERPC64 for those instructions.
> 
>   This patch also changes prototypes of relevant built-ins and effective
> target of test cases.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-09-07  Haochen Gui  
> 
> gcc/
>   * config/rs6000/rs6000-builtins.def
>   (__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
>   int.
>   (__builtin_vsx_scalar_extract_sig): Set return type to const unsigned
>   long long.
>   (__builtin_vsx_scalar_insert_exp): Set type of second argument to
>   unsigned int.
>   (__builtin_vsx_scalar_insert_exp_dp): Likewise.
>   * config/rs6000/vsx.md (xsxexpdp): Set mode of first operand to
>   SImode.  Remove TARGET_64BIT from insn condition.
>   (xsxsigdp): Change insn condition from TARGET_64BIT to TARGET_POWERPC64.
>   (xsiexpdp): Change insn condition from TARGET_64BIT to
>   TARGET_POWERPC64.  Set mode of third operand to SImode.
>   (xsiexpdpf): Set mode of third operand to SImode.  Remove TARGET_64BIT
>   from insn condition.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Change effective
>   target from lp64 to has_arch_ppc64.
>   * gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
>   * gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index f76f54793d7..ca2a1d7657e 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2847,17 +2847,17 @@
>pure vsc __builtin_vsx_lxvl (const void *, signed long);
>  LXVL lxvl {}
> 
> -  const signed long __builtin_vsx_scalar_extract_exp (double);
> +  const unsigned int __builtin_vsx_scalar_extract_exp (double);
>  VSEEDP xsxexpdp {}
> 
> -  const signed long __builtin_vsx_scalar_extract_sig (double);
> +  const unsigned long long __builtin_vsx_scalar_extract_sig (double);
>  VSESDP xsxsigdp {}
> 
>const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
> -unsigned long long);
> + unsigned int);
>  VSIEDP xsiexpdp {}
> 
> -  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long 
> long);
> +  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int);
>  VSIEDPF xsiexpdpf {}
> 
>pure vsc __builtin_vsx_xl_len_r (void *, signed long);
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index e226a93bbe5..9d3a2340a79 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -5095,10 +5095,10 @@ (define_insn "xsxexpqp_"
> 
>  ;; VSX Scalar Extract Exponent Double-Precision
>  (define_insn "xsxexpdp"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> - (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> + (unspec:SI [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXEXPDP))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR"
>"xsxexpdp %0,%x1"
>[(set_attr "type" "integer")])
> 
> @@ -5116,7 +5116,7 @@ (define_insn "xsxsigdp"
>[(set (match_operand:DI 0 "register_operand" "=r")
>   (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXSIG))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR && TARGET_POWERPC64"
>"xsxsigdp %0,%x1"
>[(set_attr "type" "integer")])
> 
> @@ -5145,9 +5145,9 @@ (define_insn "xsiexpqp_"
>  (define_insn "xsiexpdp"
>[(set (match_operand:DF 0 "vsx_register_operand" "=wa")
>   (unspec:DF [(match_operand:DI 1 "register_operand" "r")
> - (match_operand:DI 2 "register_operand" "r")]
> + (match_operand:SI 2 "register_operand" "r")]
>UNSPEC_VSX_SIEXPDP))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR && TARGET_POWERPC64"
>"xsiexpdp %x0,%1,%2"
>[(set_attr "type" "fpsimple")])
> 
> @@ 

Ping^3 [PATCH v2, rs6000] Use CC for BCD operations [PR100736]

2022-09-20 Thread HAO CHEN GUI via Gcc-patches
 Hi,
 Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
Thanks.

On 1/8/2022 上午 10:02, HAO CHEN GUI wrote:
> Hi,
> Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
> Thanks.
> 
> On 4/7/2022 下午 2:33, HAO CHEN GUI wrote:
>> Hi,
>>Gentle ping this:
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
>> Thanks.
>>
>> On 22/6/2022 下午 4:26, HAO CHEN GUI wrote:
>>> Hi,
>>>   This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
>>> math flag has no impact on BCD operations. To support BCD overflow and
>>> invalid coding, an UNSPEC is defined to move the bit to a general register.
>>> The patterns of condition branch and return with overflow bit are defined as
>>> the UNSPEC and branch/return can be combined to one jump insn. The split
>>> pattern of overflow bit extension is define for optimization.
>>>
>>>   This patch also replaces bcdadd with bcdsub for BCD invaliding coding
>>> expand.
>>>
>>> ChangeLog
>>> 2022-06-22 Haochen Gui 
>>>
>>> gcc/
>>> PR target/100736
>>> * config/rs6000/altivec.md (BCD_TEST): Remove unordered.
>>> (bcd_): Replace CCFP with CC.
>>> (*bcd_test_): Replace CCFP with CC.  Generate
>>> condition insn with CC mode.
>>> (bcd_overflow_): New.
>>> (*bcdoverflow_): New.
>>> (*bcdinvalid_): Removed.
>>> (bcdinvalid_): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
>>> (nuun): New.
>>> (*overflow_cbranch): New.
>>> (*overflow_creturn): New.
>>> (*overflow_extendsidi): New.
>>> (bcdshift_v16qi): Replace CCFP with CC.
>>> (bcdmul10_v16qi): Likewise.
>>> (bcddiv10_v16qi): Likewise.
>>> (peephole for bcd_add/sub): Likewise.
>>> * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
>>> pattern to bcdadd_overflow_v1ti.
>>> (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
>>> (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
>>> (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.
>>>
>>> gcc/testsuite/
>>> PR target/100736
>>> * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
>>> Scan no cror insns.
>>>
>>> patch.diff
>>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
>>> index efc8ae35c2e..26f131e61ea 100644
>>> --- a/gcc/config/rs6000/altivec.md
>>> +++ b/gcc/config/rs6000/altivec.md
>>> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB 
>>> [UNSPEC_BCDADD UNSPEC_BCDSUB])
>>>  (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
>>>   (UNSPEC_BCDSUB "sub")])
>>>
>>> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
>>> +(define_code_iterator BCD_TEST [eq lt le gt ge])
>>>  (define_mode_iterator VBCD [V1TI V16QI])
>>>
>>>  (define_insn "bcd_"
>>> @@ -4379,7 +4379,7 @@ (define_insn "bcd_"
>>>   (match_operand:VBCD 2 "register_operand" "v")
>>>   (match_operand:QI 3 "const_0_to_1_operand" "n")]
>>>  UNSPEC_BCD_ADD_SUB))
>>> -   (clobber (reg:CCFP CR6_REGNO))]
>>> +   (clobber (reg:CC CR6_REGNO))]
>>>"TARGET_P8_VECTOR"
>>>"bcd. %0,%1,%2,%3"
>>>[(set_attr "type" "vecsimple")])
>>> @@ -4389,9 +4389,9 @@ (define_insn "bcd_"
>>>  ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The 
>>> type
>>>  ;; probably should be one that can go in the VMX (Altivec) registers, so we
>>>  ;; can't use DDmode or DFmode.
>>> -(define_insn "*bcd_test_"
>>> -  [(set (reg:CCFP CR6_REGNO)
>>> -   (compare:CCFP
>>> +(define_insn "bcd_test_"
>>> +  [(set (reg:CC CR6_REGNO)
>>> +   (compare:CC
>>>  (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
>>>(match_operand:VBCD 2 "register_operand" "v")
>>>(match_operand:QI 3 "const_0_to_1_operand" "i")]
>>> @@ -4408,8 +4408,8 @@ (define_insn "*bcd_test2_"
>>>   (match_operand:VBCD 2 "register_operand" "v")
>>>   (match_operand:QI 3 "const_0_to_1_operand" "i")]
>>>  UNSPEC_BCD_ADD_SUB))
>>> -   (set (reg:CCFP CR6_REGNO)
>>> -   (compare:CCFP
>>> +   (set (reg:CC CR6_REGNO)
>>> +   (compare:CC
>>>  (unspec:V2DF [(match_dup 1)
>>>(match_dup 2)
>>>(match_dup 3)]
>>> @@ -4502,8 +4502,8 @@ (define_insn "vclrrb"
>>> [(set_attr "type" "vecsimple")])
>>>
>>>  (define_expand "bcd__"
>>> -  [(parallel [(set (reg:CCFP CR6_REGNO)
>>> -  (compare:CCFP
>>> +  [(parallel [(set (reg:CC CR6_REGNO)
>>> +  (compare:CC
>>> (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
>>>   (match_operand:VBCD 2 "register_operand")
>>>   (match_operand:QI 3 "const_0_to_1_operand")]
>>> @@ -4511,46 +4511,138 @@ (define_expand "bcd__"
>>> (match_dup 4)))
>>>   (clobber (match_scratch:VBCD 5))])
>>> 

Ping^3 [PATCH v6, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-09-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
Thanks.

On 1/8/2022 上午 10:03, HAO CHEN GUI wrote:
> Hi,
>Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
> Thanks.
> 
> 
> On 4/7/2022 下午 2:32, HAO CHEN GUI wrote:
>> Hi,
>>Gentle ping this:
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597158.html
>> Thanks.
>>
>> On 24/6/2022 上午 10:02, HAO CHEN GUI wrote:
>>> Hi,
>>>   This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
>>> Tests show that outputs of xs[min/max]dp are consistent with the standard
>>> of C99 fmin/max.
>>>
>>>   This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
>>> of smin/max. So the builtins always generate xs[min/max]dp on all
>>> platforms.
>>>
>>>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
>>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>>
>>> ChangeLog
>>> 2022-06-24 Haochen Gui 
>>>
>>> gcc/
>>> PR target/103605
>>> * config/rs6000/rs6000.md (FMINMAX): New.
>>> (minmax_op): New.
>>> (f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
>>> * config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
>>> pattern to fmaxdf3.
>>> (__builtin_vsx_xsmindp): Set pattern to fmindf3.
>>>
>>> gcc/testsuite/
>>> PR target/103605
>>> * gcc.dg/powerpc/pr103605.c: New.
>>>
>>>
>>> patch.diff
>>> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
>>> b/gcc/config/rs6000/rs6000-builtins.def
>>> index f4a9f24bcc5..8b735493b40 100644
>>> --- a/gcc/config/rs6000/rs6000-builtins.def
>>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>>> @@ -1613,10 +1613,10 @@
>>>  XSCVSPDP vsx_xscvspdp {}
>>>
>>>const double __builtin_vsx_xsmaxdp (double, double);
>>> -XSMAXDP smaxdf3 {}
>>> +XSMAXDP fmaxdf3 {}
>>>
>>>const double __builtin_vsx_xsmindp (double, double);
>>> -XSMINDP smindf3 {}
>>> +XSMINDP fmindf3 {}
>>>
>>>const double __builtin_vsx_xsrdpi (double);
>>>  XSRDPI vsx_xsrdpi {}
>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>> index bf85baa5370..ae0dd98f0f9 100644
>>> --- a/gcc/config/rs6000/rs6000.md
>>> +++ b/gcc/config/rs6000/rs6000.md
>>> @@ -158,6 +158,8 @@ (define_c_enum "unspec"
>>> UNSPEC_HASHCHK
>>> UNSPEC_XXSPLTIDP_CONST
>>> UNSPEC_XXSPLTIW_CONST
>>> +   UNSPEC_FMAX
>>> +   UNSPEC_FMIN
>>>])
>>>
>>>  ;;
>>> @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
>>>DONE;
>>>  })
>>>
>>> +
>>> +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
>>> +
>>> +(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
>>> +(UNSPEC_FMIN "min")])
>>> +
>>> +(define_insn "f3"
>>> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
>>> +   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
>>> + (match_operand:SFDF 2 "vsx_register_operand" "wa")]
>>> +FMINMAX))]
>>> +  "TARGET_VSX && !flag_finite_math_only"
>>> +  "xsdp %x0,%x1,%x2"
>>> +  [(set_attr "type" "fp")]
>>> +)
>>> +
>>>  (define_expand "movcc"
>>> [(set (match_operand:GPR 0 "gpc_reg_operand")
>>>  (if_then_else:GPR (match_operand 1 "comparison_operator")
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
>>> b/gcc/testsuite/gcc.target/powerpc/pr103605.c
>>> new file mode 100644
>>> index 000..1c938d40e61
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
>>> @@ -0,0 +1,37 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target powerpc_vsx_ok } */
>>> +/* { dg-options "-O2 -mvsx" } */
>>> +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
>>> +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
>>> +
>>> +#include 
>>> +
>>> +double test1 (double d0, double d1)
>>> +{
>>> +  return fmin (d0, d1);
>>> +}
>>> +
>>> +float test2 (float d0, float d1)
>>> +{
>>> +  return fmin (d0, d1);
>>> +}
>>> +
>>> +double test3 (double d0, double d1)
>>> +{
>>> +  return fmax (d0, d1);
>>> +}
>>> +
>>> +float test4 (float d0, float d1)
>>> +{
>>> +  return fmax (d0, d1);
>>> +}
>>> +
>>> +double test5 (double d0, double d1)
>>> +{
>>> +  return __builtin_vsx_xsmindp (d0, d1);
>>> +}
>>> +
>>> +double test6 (double d0, double d1)
>>> +{
>>> +  return __builtin_vsx_xsmaxdp (d0, d1);
>>> +}


  1   2   3   >