https://gcc.gnu.org/g:34d9e6df85f888c707c6b3b069ccfc6ea49efd56

commit r17-905-g34d9e6df85f888c707c6b3b069ccfc6ea49efd56
Author: Jin Ma <[email protected]>
Date:   Tue May 26 11:25:57 2026 +0800

    RISC-V: Fix REGNO_REG_CLASS for FP hard registers
    
    The GCC Internals Manual, section 19.8 "Register Classes", documents
    REGNO_REG_CLASS as:
    
      REGNO_REG_CLASS (regno)                                      [Macro]
        A C expression whose value is a register class containing hard
        register regno.  In general there is more than one such class;
        choose a class which is minimal, meaning that no smaller class
        also contains the register.
    
    riscv_regno_to_class[] currently maps every FP hard register to
    RVC_FP_REGS, but RVC_FP_REGS only contains f8-f15.  The entries for
    f0-f7 and f16-f31 therefore violate the "containing hard register
    regno" half of the contract: the returned class does not contain the
    register at all.
    
    The mismatch corrupts IRA's cost model.  setup_allocno_cost_vector
    indexes the per-hard-reg cost slot via REGNO_REG_CLASS:
    
      rclass = REGNO_REG_CLASS (hard_regno);
      num = cost_classes_ptr->index[rclass];
      ...
      reg_costs[j] = COSTS (costs, i)->cost[num];
    
    After setup_regno_cost_classes_by_mode adds RVC_FP_REGS to the cost
    classes, the cost for e.g. f16 is silently read from the RVC_FP_REGS
    slot.
    
    The new fp-reg-class.c testcase puts eight "cf"- and sixteen "f"-
    constrained doubles live across a call.  In the buggy state IRA
    places the cf pseudos outside the cf class and LRA recovers with
    sixteen fmv.d to fs* registers; with the fix IRA spills those values
    honestly and the IRA "+++Costs" line reports a non-zero "mem"
    component.
    
    Fix it by giving each FP hard register its minimal class: FP_REGS for
    f0-f7 and f16-f31, RVC_FP_REGS for f8-f15.  As a companion change,
    switch riscv_secondary_memory_needed from class-equality tests to
    reg_class_subset_p so it still recognises the FP side regardless of
    which subclass the table returns.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.cc (riscv_regno_to_class): Use the minimal
            class containing each FP hard register: FP_REGS for f0-f7 and
            f16-f31, RVC_FP_REGS for f8-f15.
            (riscv_secondary_memory_needed): Use reg_class_subset_p to
            detect FP classes.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/fp-reg-class.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc                     | 16 ++++----
 gcc/testsuite/gcc.target/riscv/fp-reg-class.c | 59 +++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8a737bb41b66..abce8f5f8a54 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -351,14 +351,14 @@ const enum reg_class 
riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   JALR_REGS,   JALR_REGS,      JALR_REGS,      JALR_REGS,
   JALR_REGS,   JALR_REGS,      JALR_REGS,      JALR_REGS,
   SIBCALL_REGS,        SIBCALL_REGS,   SIBCALL_REGS,   SIBCALL_REGS,
+  FP_REGS,     FP_REGS,        FP_REGS,        FP_REGS,
+  FP_REGS,     FP_REGS,        FP_REGS,        FP_REGS,
   RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
   RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
-  RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
-  RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
-  RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
-  RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
-  RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
-  RVC_FP_REGS, RVC_FP_REGS,    RVC_FP_REGS,    RVC_FP_REGS,
+  FP_REGS,     FP_REGS,        FP_REGS,        FP_REGS,
+  FP_REGS,     FP_REGS,        FP_REGS,        FP_REGS,
+  FP_REGS,     FP_REGS,        FP_REGS,        FP_REGS,
+  FP_REGS,     FP_REGS,        FP_REGS,        FP_REGS,
   FRAME_REGS,  FRAME_REGS,     NO_REGS,        NO_REGS,
   NO_REGS,     NO_REGS,        NO_REGS,        NO_REGS,
   NO_REGS,     NO_REGS,        NO_REGS,        NO_REGS,
@@ -10962,8 +10962,8 @@ static bool
 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
                               reg_class_t class2)
 {
-  bool class1_is_fpr = class1 == FP_REGS || class1 == RVC_FP_REGS;
-  bool class2_is_fpr = class2 == FP_REGS || class2 == RVC_FP_REGS;
+  bool class1_is_fpr = reg_class_subset_p (class1, FP_REGS);
+  bool class2_is_fpr = reg_class_subset_p (class2, FP_REGS);
   return (!riscv_vector_mode_p (mode)
          && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
          && (class1_is_fpr != class2_is_fpr)
diff --git a/gcc/testsuite/gcc.target/riscv/fp-reg-class.c 
b/gcc/testsuite/gcc.target/riscv/fp-reg-class.c
new file mode 100644
index 000000000000..e40fcfcde92a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/fp-reg-class.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-O3" "-Og" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc -mabi=lp64d -O2 -fdump-rtl-ira" } */
+
+/* When riscv_regno_to_class[] mapped every FP hard register to
+   RVC_FP_REGS, ira-costs.cc:setup_allocno_cost_vector read the cost
+   slot for f0-f7 / f16-f31 from the wrong bucket and IRA mis-allocated
+   FP pseudos.  Eight "cf"- and sixteen "f"-constrained doubles live
+   across a call expose this: in the buggy state IRA picks an
+   all-in-hardreg coloring with no spills ("+++Costs" shows "mem 0")
+   and LRA recovers with fmv.d to fs* registers; with the fix IRA
+   spills the cf values honestly and the "mem" component is non-zero.  */
+
+extern void use (double, double, double, double,
+                double, double, double, double);
+
+double
+test (double *p, int n)
+{
+  double f0 = p[0], f1 = p[1], f2 = p[2], f3 = p[3];
+  double f4 = p[4], f5 = p[5], f6 = p[6], f7 = p[7];
+  double f8 = p[8], f9 = p[9], f10 = p[10], f11 = p[11];
+  double f12 = p[12], f13 = p[13], f14 = p[14], f15 = p[15];
+
+  double c0, c1, c2, c3, c4, c5, c6, c7;
+  asm ("fadd.d %0,%1,%1" : "=cf" (c0) : "cf" (p[16]));
+  asm ("fadd.d %0,%1,%1" : "=cf" (c1) : "cf" (p[17]));
+  asm ("fadd.d %0,%1,%1" : "=cf" (c2) : "cf" (p[18]));
+  asm ("fadd.d %0,%1,%1" : "=cf" (c3) : "cf" (p[19]));
+  asm ("fadd.d %0,%1,%1" : "=cf" (c4) : "cf" (p[20]));
+  asm ("fadd.d %0,%1,%1" : "=cf" (c5) : "cf" (p[21]));
+  asm ("fadd.d %0,%1,%1" : "=cf" (c6) : "cf" (p[22]));
+  asm ("fadd.d %0,%1,%1" : "=cf" (c7) : "cf" (p[23]));
+
+  for (int i = 0; i < n; ++i)
+    {
+      f0  = f0  * f1  + f2;   f1  = f1  * f2  + f3;
+      f2  = f2  * f3  + f4;   f3  = f3  * f4  + f5;
+      f4  = f4  * f5  + f6;   f5  = f5  * f6  + f7;
+      f6  = f6  * f7  + f8;   f7  = f7  * f8  + f9;
+      f8  = f8  * f9  + f10;  f9  = f9  * f10 + f11;
+      f10 = f10 * f11 + f12;  f11 = f11 * f12 + f13;
+      f12 = f12 * f13 + f14;  f13 = f13 * f14 + f15;
+      f14 = f14 * f15 + f0;   f15 = f15 * f0  + f1;
+      asm ("fadd.d %0,%0,%1" : "+cf" (c0) : "f" (f0));
+      asm ("fadd.d %0,%0,%1" : "+cf" (c1) : "f" (f1));
+      asm ("fadd.d %0,%0,%1" : "+cf" (c2) : "f" (f2));
+      asm ("fadd.d %0,%0,%1" : "+cf" (c3) : "f" (f3));
+      asm ("fadd.d %0,%0,%1" : "+cf" (c4) : "f" (f4));
+      asm ("fadd.d %0,%0,%1" : "+cf" (c5) : "f" (f5));
+      asm ("fadd.d %0,%0,%1" : "+cf" (c6) : "f" (f6));
+      asm ("fadd.d %0,%0,%1" : "+cf" (c7) : "f" (f7));
+      use (f8, f9, f10, f11, f12, f13, f14, f15);
+    }
+  return f0+f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15
+       + c0+c1+c2+c3+c4+c5+c6+c7;
+}
+
+/* { dg-final { scan-rtl-dump {\+\+\+Costs:[^\n]* mem [1-9]} "ira" } } */

Reply via email to