https://gcc.gnu.org/g:a4c9ade72885f9cf72c873d110545e4e3c2c7805

commit r14-10458-ga4c9ade72885f9cf72c873d110545e4e3c2c7805
Author: Roger Sayle <ro...@nextmovesoftware.com>
Date:   Fri Jun 7 14:03:20 2024 +0100

    i386: PR target/115351: RTX costs for *concatditi3 and *insvti_highpart.
    
    This patch addresses PR target/115351, which is a code quality regression
    on x86 when passing floating point complex numbers.  The ABI considers
    these arguments to have TImode, requiring interunit moves to place the
    FP values (which are actually passed in SSE registers) into the upper
    and lower parts of a TImode pseudo, and then similar moves back again
    before they can be used.
    
    The cause of the regression is that changes in how TImode initialization
    is represented in RTL now prevents the RTL optimizers from eliminating
    these redundant moves.  The specific cause is that the *concatditi3
    pattern, (zext(hi)<<64)|zext(lo), has an inappropriately high (default)
    rtx_cost, preventing fwprop1 from propagating it.  This pattern just
    sets the hipart and lopart of a double-word register, typically two
    instructions (less if reload can allocate things appropriately) but
    the current ix86_rtx_costs actually returns INSN_COSTS(13), i.e. 52.
    
    propagating insn 5 into insn 6, replacing:
    (set (reg:TI 110)
        (ior:TI (and:TI (reg:TI 110)
                (const_wide_int 0x0ffffffffffffffff))
            (ashift:TI (zero_extend:TI (subreg:DI (reg:DF 112 [ zD.2796+8 ]) 0))
                (const_int 64 [0x40]))))
    successfully matched this instruction to *concatditi3_3:
    (set (reg:TI 110)
        (ior:TI (ashift:TI (zero_extend:TI (subreg:DI (reg:DF 112 [ zD.2796+8 
]) 0))
                (const_int 64 [0x40]))
            (zero_extend:TI (subreg:DI (reg:DF 111 [ zD.2796 ]) 0))))
    change not profitable (cost 50 -> cost 52)
    
    This issue is resolved by having ix86_rtx_costs return more reasonable
    values for these (place-holder) patterns.
    
    2024-06-07  Roger Sayle  <ro...@nextmovesoftware.com>
    
    gcc/ChangeLog
            PR target/115351
            * config/i386/i386.cc (ix86_rtx_costs): Provide estimates for
            the *concatditi3 and *insvti_highpart patterns, about two insns.
    
    gcc/testsuite/ChangeLog
            PR target/115351
            * g++.target/i386/pr115351.C: New test case.
    
    (cherry picked from commit fb3e4c549d16d5050e10114439ad77149f33c597)

Diff:
---
 gcc/config/i386/i386.cc                  | 43 ++++++++++++++++++++++++++++++++
 gcc/testsuite/g++.target/i386/pr115351.C | 19 ++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3827e2b61fe4..35a282433892 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21865,6 +21865,49 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
            }
          *total = ix86_vec_cost (mode, cost->sse_op);
        }
+      else if (TARGET_64BIT
+              && mode == TImode
+              && GET_CODE (XEXP (x, 0)) == ASHIFT
+              && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
+              && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
+              && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+              && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
+              && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+              && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
+       {
+         /* *concatditi3 is cheap.  */
+         rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
+         rtx op1 = XEXP (XEXP (x, 1), 0);
+         *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
+                  ? COSTS_N_INSNS (1)    /* movq.  */
+                  : set_src_cost (op0, DImode, speed);
+         *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
+                   ? COSTS_N_INSNS (1)    /* movq.  */
+                   : set_src_cost (op1, DImode, speed);
+         return true;
+       }
+      else if (TARGET_64BIT
+              && mode == TImode
+              && GET_CODE (XEXP (x, 0)) == AND
+              && REG_P (XEXP (XEXP (x, 0), 0))
+              && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
+              && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
+              && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
+              && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
+              && GET_CODE (XEXP (x, 1)) == ASHIFT
+              && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
+              && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
+              && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+              && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
+       {
+         /* *insvti_highpart is cheap.  */
+         rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
+         *total = COSTS_N_INSNS (1) + 1;
+         *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
+                   ? COSTS_N_INSNS (1)    /* movq.  */
+                   : set_src_cost (op, DImode, speed);
+         return true;
+       }
       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
        *total = cost->add * 2;
       else
diff --git a/gcc/testsuite/g++.target/i386/pr115351.C 
b/gcc/testsuite/g++.target/i386/pr115351.C
new file mode 100644
index 000000000000..24132f3b8c7b
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr115351.C
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -std=c++11" } */
+
+struct complex
+{
+    double real;
+    double imag;
+};
+
+complex blub(complex z)
+{
+  return {
+          z.real * z.real - z.imag * z.imag,
+          2 * z.real * z.imag
+         };
+}
+
+/* { dg-final { scan-assembler-not "movq" } } */
+/* { dg-final { scan-assembler-not "xchg" } } */

Reply via email to