https://gcc.gnu.org/g:4585ccdfab5d7546f648abd151d34b25c6a25495

commit r14-12428-g4585ccdfab5d7546f648abd151d34b25c6a25495
Author: Andrew Pinski <[email protected]>
Date:   Tue Jan 27 12:19:13 2026 -0800

    aarch64: early-ra: Fix handling of multi-register allocation with clobbers 
[PR123285]
    
    So the problem here is while forming chains, we don't process hard register
    conflicts (and ABI based ones) for allocnos which are already part of a 
chain.
    This means sometimes we allocate a register to a color which might be 
clobbered
    over is live range.
    Processing clobbers for all allocnos don't work while forming a chain does
    not work as the chain's front allocnos' candidates does not get updated.
    So we need to the processing of clobbers (and ABI clobbers) before starting
    to form the chains.
    
    Changes since v1:
     * v2: remove accidental hack which was there just for testing.
     * v3: Move the copying of the shared part to new earlier loop too.
           Fix small white space issue.
    
    Bootstrappd and tested on aarch64-linux-gnu.
    
            PR target/123285
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-early-ra.cc (early_ra::form_chains): 
Process clobbers
            and ABI clobbers before starting to form the chain.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/pr123285-1.c: New test.
    
    Signed-off-by: Andrew Pinski <[email protected]>

Diff:
---
 gcc/config/aarch64/aarch64-early-ra.cc        | 44 ++++++++++++++++-----------
 gcc/testsuite/gcc.target/aarch64/pr123285-1.c | 36 ++++++++++++++++++++++
 2 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc 
b/gcc/config/aarch64/aarch64-early-ra.cc
index 427b6a13aecd..73f0f68bbc4c 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -2624,23 +2624,10 @@ early_ra::form_chains ()
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "\nChaining allocnos:\n");
 
-  // Perform (modified) interval graph coloring.  First sort by
-  // increasing start point.
-  m_sorted_allocnos.reserve (m_allocnos.length ());
-  m_sorted_allocnos.splice (m_allocnos);
-  m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>);
-
-  // During this phase, color representatives are only correct for
-  // unprocessed allocno groups (where the color representative is
-  // the group itself) and for groups that contain a current chain head.
-  unsigned int ti = 0;
-  auto_vec<chain_candidate_info> candidates;
-  for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi)
+  // Record conflicts of hard register and ABI conflicts before the
+  // forming of chains so chains have the updated candidates
+  for (auto *allocno1 : m_allocnos)
     {
-      auto *allocno1 = m_sorted_allocnos[hi];
-      if (allocno1->chain_next != INVALID_ALLOCNO)
-       continue;
-
       // Record conflicts with direct uses for FPR hard registers.
       auto *group1 = allocno1->group ();
       for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
@@ -2656,6 +2643,29 @@ early_ra::form_chains ()
            auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
            group1->fpr_candidates &= ~fprs >> allocno1->offset;
          }
+      if (allocno1->is_shared ())
+       {
+         auto *allocno2 = m_allocnos[allocno1->related_allocno];
+         merge_fpr_info (allocno2->group (), group1, allocno2->offset);
+       }
+    }
+
+  // Perform (modified) interval graph coloring.  First sort by
+  // increasing start point.
+  m_sorted_allocnos.reserve (m_allocnos.length ());
+  m_sorted_allocnos.splice (m_allocnos);
+  m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>);
+
+  // During this phase, color representatives are only correct for
+  // unprocessed allocno groups (where the color representative is
+  // the group itself) and for groups that contain a current chain head.
+  unsigned int ti = 0;
+  auto_vec<chain_candidate_info> candidates;
+  for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi)
+    {
+      auto *allocno1 = m_sorted_allocnos[hi];
+      if (allocno1->chain_next != INVALID_ALLOCNO)
+       continue;
 
       if (allocno1->is_shared ())
        {
@@ -2663,8 +2673,6 @@ early_ra::form_chains ()
            fprintf (dump_file, "  Allocno %d shares the same hard register"
                     " as allocno %d\n", allocno1->id,
                     allocno1->related_allocno);
-         auto *allocno2 = m_allocnos[allocno1->related_allocno];
-         merge_fpr_info (allocno2->group (), group1, allocno2->offset);
          m_shared_allocnos.safe_push (allocno1);
          continue;
        }
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123285-1.c 
b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
new file mode 100644
index 000000000000..9ef5a28c9afd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* PR target/123285 */
+
+#define BS_VEC(type, num) type __attribute__((vector_size(num * sizeof(type))))
+
+/* f used to allocate v30 to either a or b and the inline-asm
+   would clobber the v30. */
+[[gnu::noipa]]
+BS_VEC(int, 8) f(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
+{
+  a+=b;
+  asm("movi v30.16b, 0":::"v30");
+  a+=b;
+  return a;
+}
+[[gnu::noipa]]
+BS_VEC(int, 8) f1(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
+{
+  a+=b;
+  a+=b;
+  return a;
+}
+
+int main()
+{
+  BS_VEC(int, 8) a = {0,1,2,3,4,5,6,7};
+  BS_VEC(int, 8) b = {8,9,10,11,12,13,14};
+  BS_VEC(int, 8) c0 = f(a,b);
+  BS_VEC(int, 8) c1 = f1(a,b);
+  for(int i=0;i<8;i++)
+  if ( c0[i] != c1[i] )
+    __builtin_abort ();
+}
+
+

Reply via email to