This patch improves TImode STV by adding support for logical shifts by
integer constants that are multiples of 8.  For the test case:

__int128 a, b;
void foo() { a = b << 16; }

on x86_64, gcc -O2 currently generates:

        movq    b(%rip), %rax
        movq    b+8(%rip), %rdx
        shldq   $16, %rax, %rdx
        salq    $16, %rax
        movq    %rax, a(%rip)
        movq    %rdx, a+8(%rip)
        ret

with this patch we now generate:

        movdqa  b(%rip), %xmm0
        pslldq  $2, %xmm0
        movaps  %xmm0, a(%rip)
        ret

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check. both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2022-07-28  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386-features.cc (compute_convert_gain): Add gain
        for converting suitable TImode shift to a V1TImode shift.
        (timode_scalar_chain::convert_insn): Add support for converting
        suitable ASHIFT and LSHIFTRT.
        (timode_scalar_to_vector_candidate_p): Consider logical shifts
        by integer constants that are multiples of 8 to be candidates.

gcc/testsuite/ChangeLog
        * gcc.target/i386/sse4_1-stv-7.c: New test case.


Thanks again,
Roger
--

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index aa5de71..e1e0645 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -1221,6 +1221,13 @@ timode_scalar_chain::compute_convert_gain ()
            igain = COSTS_N_INSNS (1);
          break;
 
+       case ASHIFT:
+       case LSHIFTRT:
+         /* For logical shifts by constant multiples of 8. */
+         igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (4)
+                                             : COSTS_N_INSNS (1);
+         break;
+
        default:
          break;
        }
@@ -1462,6 +1469,12 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
       break;
 
+    case ASHIFT:
+    case LSHIFTRT:
+      convert_op (&XEXP (src, 0), insn);
+      PUT_MODE (src, V1TImode);
+      break;
+
     default:
       gcc_unreachable ();
     }
@@ -1796,6 +1809,14 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
     case NOT:
       return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
 
+    case ASHIFT:
+    case LSHIFTRT:
+      /* Handle logical shifts by integer constants between 0 and 120
+        that are multiples of 8.  */
+      return REG_P (XEXP (src, 0))
+            && CONST_INT_P (XEXP (src, 1))
+            && (INTVAL (XEXP (src, 1)) & ~0x78) == 0;
+
     default:
       return false;
     }
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-stv-7.c 
b/gcc/testsuite/gcc.target/i386/sse4_1-stv-7.c
new file mode 100644
index 0000000..b0d5fce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-stv-7.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse4.1 -mstv -mno-stackrealign" } */
+
+unsigned __int128 a;
+unsigned __int128 b;
+
+void foo()
+{
+  a = b << 16;
+}
+
+void bar()
+{
+  a = b >> 16;
+}
+
+/* { dg-final { scan-assembler "pslldq" } } */
+/* { dg-final { scan-assembler "psrldq" } } */

Reply via email to