x86 targets mask 32-bit shifts with a 5-bit mask (and 64-bit with 6-bit mask),
so they can use x >> ~y instead of x >> 31-y.

The optimization converts:

        movl    $31, %ecx
        subl    %esi, %ecx
        sall    %cl, %eax

to:
        notl    %ecx
        sall    %cl, %eax

    PR target/36503

gcc/ChangeLog:

    * config/i386/i386.md (*<insn:any_shift><mode:SWI48>3_sub):
    Also allow operands[3] & (<mode_bitsize>-1) == (<mode_bitsize>-1)
    in insn condition. Emit NOT RTX instead of NEG RTX in this case.
    (*<insn:any_shift><mode:SWI48>3_sub_1): Ditto.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/pr36503-5.c: New test.
    * gcc.target/i386/pr36503-6.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4444e5154af..5c44988112b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -18266,24 +18266,27 @@ (define_insn_and_split "*<insn><mode>3_sub"
              (match_operand 2 "int248_register_operand" "c,r")) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
-   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+       || (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1))
+          == <MODE_SIZE> * BITS_PER_UNIT - 1)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(parallel
-     [(set (match_dup 4)
-          (neg:QI (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])
-   (parallel
      [(set (match_dup 0)
           (any_shift:SWI48 (match_dup 1)
                            (match_dup 4)))
       (clobber (reg:CC FLAGS_REG))])]
 {
+  HOST_WIDE_INT cnt = INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1);
+
   operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
   operands[2] = gen_lowpart (QImode, operands[2]);
 
   operands[4] = gen_reg_rtx (QImode);
+
+  rtx (*insn)(rtx, rtx) = (cnt == 0) ? gen_negqi2 : gen_one_cmplqi2;
+  emit_insn (insn (operands[4], operands[2]));
 }
   [(set_attr "isa" "*,bmi2")])
 
@@ -18296,20 +18299,25 @@ (define_insn_and_split "*<insn><mode>3_sub_1"
            (match_operand:QI 2 "register_operand" "c,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
-   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+       || (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1))
+          == <MODE_SIZE> * BITS_PER_UNIT - 1)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(parallel
-     [(set (match_dup 4)
-          (neg:QI (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])
-   (parallel
      [(set (match_dup 0)
           (any_shift:SWI48 (match_dup 1)
                            (match_dup 4)))
       (clobber (reg:CC FLAGS_REG))])]
-  "operands[4] = gen_reg_rtx (QImode);"
+{
+  HOST_WIDE_INT cnt = INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1);
+
+  operands[4] = gen_reg_rtx (QImode);
+
+  rtx (*insn)(rtx, rtx) = (cnt == 0) ? gen_negqi2 : gen_one_cmplqi2;
+  emit_insn (insn (operands[4], operands[2]));
+}
   [(set_attr "isa" "*,bmi2")])
 
 (define_insn_and_split "*extend<dwi>2_doubleword_highpart"
diff --git a/gcc/testsuite/gcc.target/i386/pr36503-5.c 
b/gcc/testsuite/gcc.target/i386/pr36503-5.c
new file mode 100644
index 00000000000..34522558524
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr36503-5.c
@@ -0,0 +1,20 @@
+/* PR target/36503 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=3" { target ia32 } } */
+/* { dg-final { scan-assembler-not "movl\[ \\t\]+\\\$31" } } */
+
+int foo (int i, int n)
+{
+  return i << (31 - n);
+}
+
+int bar (int i, int n)
+{
+  return i >> (31 - n);
+}
+
+unsigned int baz (unsigned int i, int n)
+{
+  return i >> (31 - n);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr36503-6.c 
b/gcc/testsuite/gcc.target/i386/pr36503-6.c
new file mode 100644
index 00000000000..cf16407dfcb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr36503-6.c
@@ -0,0 +1,19 @@
+/* PR target/36503 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler-not "movl\[ \\t\]+\\\$63" } } */
+
+long long foo (long long i, int n)
+{
+  return i << (63 - n);
+}
+
+long long bar (long long i, int n)
+{
+  return i >> (63 - n);
+}
+
+unsigned long long baz (unsigned long long i, int n)
+{
+  return i >> (63 - n);
+}

Reply via email to