Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

PowerPC vector shift left instructions (vslb, vslh, vslw, etc.) implement
modulo semantics: only the low N bits of the shift amount are considered (3 for
bytes, 4 for halfwords and 5 for words). Higher bits can be ignored safely.

Previously, rs6000_gimple_fold_builtin() restricted folding due to a type check
when the first argument was a signed vector. This blocked modulo reduction
and caused constant shifts to fall back to memory loads instead of using
immediate splat instructions.

This patch removes the overflow check on the first argument. Since the
shift amount (second argument) is always unsigned, modulo reduction is
correct regardless of whether the data being shifted is signed or unsigned.

As a result, constant shift amounts are now folded into splat instructions,
improving code generation and avoiding unnecessary memory accesses.


2025-09-11  Jeevitha Palanisamy  <[email protected]>

gcc/
        PR target/121867
        * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Remove
        overflow type check on shift input.

gcc/testsuite/
        PR target/121867
        * gcc.target/powerpc/pr86731-longlong.c: Adjust test to handle the
        failed case.
        * gcc.target/powerpc/pr121867.c: New test.

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index bc1580f051b..5c964403257 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1710,10 +1710,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
        location_t loc;
        gimple_seq stmts = NULL;
        arg0 = gimple_call_arg (stmt, 0);
-       tree arg0_type = TREE_TYPE (arg0);
-       if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
-           && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
-         return false;
        arg1 = gimple_call_arg (stmt, 1);
        tree arg1_type = TREE_TYPE (arg1);
        tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
diff --git a/gcc/testsuite/gcc.target/powerpc/pr121867.c 
b/gcc/testsuite/gcc.target/powerpc/pr121867.c
new file mode 100644
index 00000000000..0c8f3f8372c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr121867.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-maltivec -mdejagnu-cpu=power8 -O2 -mvsx " } */
+
+/*  This test ensures that we use GIMPLE folding when the element value exceeds
+    the element bit width. It performs modulo reduction and uses vspltis[bhw]
+    to broadcast the value, instead of storing it in memory and performing a
+    shift operation.  */
+
+#include <altivec.h>
+
+vector unsigned char shlb(vector unsigned char in)
+{
+    return vec_sl(in, vec_splats((unsigned char)35));
+}
+
+vector unsigned short shlh(vector unsigned short in)
+{
+    return vec_sl(in, vec_splats((unsigned short)18));
+}
+
+vector unsigned int shlw(vector unsigned int in)
+{
+    return vec_sl(in, vec_splats((unsigned int)34));
+}
+
+/* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvsl[bhw]\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-longlong.c 
b/gcc/testsuite/gcc.target/powerpc/pr86731-longlong.c
index c97cb49de8c..77cb328d3c2 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-longlong.c
@@ -21,10 +21,9 @@ vector signed long long splats4(void)
 }
 
 /* Codegen will consist of splat and shift instructions for most types.
-   Noted variations:  if gimple folding is disabled, or if -fwrapv is not
-   specified, the long long tests will generate a vspltisw+vsld pair,
-   versus generating a single lvx.  */
-/* { dg-final { scan-assembler-times {\mvspltis[bhw]\M|\mxxspltib\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */
+   Now folding is enabled, the vec_sl tests using vector long long type will
+   generate a lvx instead of a vspltisw+vsld pair.  */
 
+/* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
+/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
+/* { dg-final { scan-assembler-times {\mlvx\M} 2 } } */

Reply via email to