https://gcc.gnu.org/g:c10076b34d3ede485bfb306b2a91feeee8368cd5

commit r16-6654-gc10076b34d3ede485bfb306b2a91feeee8368cd5
Author: Vladimir N. Makarov <[email protected]>
Date:   Fri Jan 9 10:36:29 2026 -0500

    [PR123121, LRA]: Fix wrong rematerialization of insns with several outputs
    
    LRA in the test case, rematerialize insn with div/mod where div result
    is not used.  Still div result requires ax which is used by different
    pseudos at point of rematerialization and this clobbers the pseudo
    value.  The patch solves the problem by constraining to single set
    insns as we always rematerialize only one pseudo value.  Also there is
    no sense to rematerialize div/mod as usually their latency is more
    than load value from CPU cache.  The patch explicitly excludes such
    insns from rematerialization.
    
    gcc/ChangeLog:
    
            PR rtl-optimization/123121
            * lra-remat.cc (bad_for_rematerialization_p): Consider div/mod ops.
            (operand_to_remat): Exclude rematerialization of insns with
            multiple sets.
    
    gcc/testsuite/ChangeLog:
    
            PR rtl-optimization/123121
            * gcc.target/i386/pr123121.c: New.

Diff:
---
 gcc/lra-remat.cc                         | 25 ++++++++------
 gcc/testsuite/gcc.target/i386/pr123121.c | 57 ++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/gcc/lra-remat.cc b/gcc/lra-remat.cc
index bbdb152fa930..c7ae0f7b8c16 100644
--- a/gcc/lra-remat.cc
+++ b/gcc/lra-remat.cc
@@ -81,8 +81,10 @@ typedef const struct cand *const_cand_t;
 
 /* Insn candidates for rematerialization.  The candidate insn should
    have the following properies:
-   o no any memory (as access to memory is non-profitable)
+   o no any memory (as access to memory is non-profitable) or
+     div/mod operations (as they are usually more expensive than loads)
    o no INOUT regs (it means no non-paradoxical subreg of output reg)
+   o no multiple output pseudos
    o one output spilled pseudo (or reload pseudo of a spilled pseudo)
    o all other pseudos are with assigned hard regs.  */
 struct cand
@@ -249,12 +251,12 @@ finish_cand_table (void)
 
 
 
-/* Return true if X contains memory or some UNSPEC.  We cannot just
-   check insn operands as memory or unspec might be not an operand
-   itself but contain an operand.  Insn with memory access is not
-   profitable for rematerialization.  Rematerialization of UNSPEC
-   might result in wrong code generation as the UNPEC effect is
-   unknown (e.g. generating a label).  */
+/* Return true if X contains memory, some UNSPEC, or expensive operations.  We
+   cannot just check insn operands as memory or unspec might be not an operand
+   itself but contain an operand.  Insns with memory access or expensive ones
+   are not profitable for rematerialization.  Rematerialization of UNSPEC might
+   result in wrong code generation as the UNPEC effect is unknown
+   (e.g. generating a label).  */
 static bool
 bad_for_rematerialization_p (rtx x)
 {
@@ -262,7 +264,11 @@ bad_for_rematerialization_p (rtx x)
   const char *fmt;
   enum rtx_code code;
 
-  if (MEM_P (x) || GET_CODE (x) == UNSPEC || GET_CODE (x) == UNSPEC_VOLATILE)
+  if (MEM_P (x) || GET_CODE (x) == UNSPEC || GET_CODE (x) == UNSPEC_VOLATILE
+      /* Usually the following operations are expensive and does not worth to
+        rematerialize: */
+      || GET_CODE(x) == DIV || GET_CODE(x) == UDIV
+      || GET_CODE(x) == MOD || GET_CODE(x) == UMOD)
     return true;
   code = GET_CODE (x);
   fmt = GET_RTX_FORMAT (code);
@@ -308,8 +314,7 @@ operand_to_remat (rtx_insn *insn)
         cannot know sp offset at a rematerialization place.  */
       if (reg->regno == STACK_POINTER_REGNUM && frame_pointer_needed)
        return -1;
-      else if (reg->type == OP_OUT && ! reg->subreg_p
-              && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
+      else if (reg->type == OP_OUT)
        {
          /* We permits only one spilled reg.  */
          if (found_reg != NULL)
diff --git a/gcc/testsuite/gcc.target/i386/pr123121.c 
b/gcc/testsuite/gcc.target/i386/pr123121.c
new file mode 100644
index 000000000000..d4c3c0fe3118
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr123121.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -fno-ipa-cp -fschedule-insns" } */
+
+int a[256], d, e, f, g, h, j, v[] = {0, -6, 0};;
+unsigned c;
+unsigned k(int l, unsigned m) { return (l & 6777215) ^ a[(l ^ m) & 55]; }
+int n(int l, int m[]) {
+  int i = 0;
+  for (; i < l; ++i) {
+    e = c >> 8 ^ a[m[i] & 255];
+    d = e;
+    d = k(d, m[i] >> 8 & 255);
+    d = k(d, m[i] >> 6);
+    f = (d & 6777215) ^ a[d & 5];
+    c = f;
+  }
+  return f;
+}
+int o() {
+  n(8, (int[]){g});
+  return n(6, (int[]){h});
+}
+int p(int l, int m[], int q, int r) {
+  int s = r = j + 1;
+  s = -(4 % q - 1);
+  if (r)
+    goto t;
+ ad:
+  l = 6 - l;
+  if (l)
+    goto ae;
+ t:
+  m[0] = s;
+  j = r;
+  goto af;
+ ae:
+  if (o())
+    goto ad;
+ af:
+  r = -s - m[1] - 8;
+  s = 1 % m[0] - s;
+  m[1] = 1 / r;
+  int a[] = {l, m[0], m[1], m[2], q, r, r, s};
+  return n(8, a);
+}
+int main() {
+  for (int i = 0; i < 256; i++) {
+    unsigned b = i;
+    if (i & 1)
+      b = b >> 1 ^ 3988292384;
+    a[i] = b;
+  }
+  if (p(1, v, 5, 0) / 100000)
+    p(1, 0, 5, 0);
+  return 0;
+}

Reply via email to