date:20250512

[llvm-branch-commits] [llvm] [GlobalISel] Add computeNumSignBits for ASHR (PR #139503)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits



@@ -864,6 +864,16 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
   return TyBits - 1; // Every always-zero bit is a sign bit.
 break;
   }
+  case TargetOpcode::G_ASHR: {
+Register Src1 = MI.getOperand(1).getReg();
+Register Src2 = MI.getOperand(2).getReg();
+LLT SrcTy = MRI.getType(Src1);
+FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1);
+if (auto C = getIConstantSplatVal(Src2, MRI))

arsenm wrote:

At some point we should make these properly elementwise and not only handle 
splats 

https://github.com/llvm/llvm-project/pull/139503
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)

2025-05-12 Thread Akshat Oke via llvm-branch-commits



@@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
 #ifndef DUMMY_FUNCTION_PASS
 #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME)
 #endif
+DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass)

optimisan wrote:

I'll remove this list entirely since it's the same in `PassRegistry.def`

https://github.com/llvm/llvm-project/pull/138828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] release/20.x: [AArch64] Fix feature list for FUJITSU-MONAKA processor (#139212) (PR #139222)

2025-05-12 Thread David Green via llvm-branch-commits


https://github.com/davemgreen approved this pull request.


https://github.com/llvm/llvm-project/pull/139222
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] release/20.x: [AArch64] Fix feature list for FUJITSU-MONAKA processor (#139212) (PR #139222)

2025-05-12 Thread David Green via llvm-branch-commits


davemgreen wrote:

LGTM - changes look small and correct a regression since the previous release.

https://github.com/llvm/llvm-project/pull/139222
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508

>From 5e50922e53ad2de7e3c68242ad78f1813a48f7b6 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 15:04:28 +0800
Subject: [PATCH] [RISCV][MC] Add Q support for Zfa

---
 llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td |  29 ++
 llvm/test/MC/RISCV/rv64zfa-only-valid.s|  19 +
 llvm/test/MC/RISCV/zfa-invalid.s   |  13 +-
 llvm/test/MC/RISCV/zfa-quad-invalid.s  |  42 +++
 llvm/test/MC/RISCV/zfa-valid.s | 391 -
 5 files changed, 484 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 8a449d32e0104..0ad654db42f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
 }
 } // Predicates = [HasStdExtZfa, HasStdExtZfh]
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">;
+
+def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>;
+def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>;
+
+def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">;
+def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, 
+ "froundnx.q">;
+
+def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>;
+def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>;
+} // Predicates = [HasStdExtZfa, HasStdExtQ]
+
+let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in {
+  let mayRaiseFPException = 0 in {
+def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, 
"fmvh.x.q">;
+def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">;
+  }
+} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64]
+
 
//===--===//
 // Pseudo-instructions and codegen patterns
 
//===--===//
@@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 }
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+def : InstAlias<"fgtq.q $rd, $rs, $rt",
+(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+def : InstAlias<"fgeq.q $rd, $rs, $rt",
+(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+}
+
 
//===--===//
 // Codegen patterns
 
//===--===//
diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s 
b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
new file mode 100644
index 0..95fb253b145c1
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases 
-show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \
+# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+#
+# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \
+# RUN: -M no-aliases -show-encoding < %s 2>&1 \
+# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
+
+# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1
+# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvh.x.q a1, fs1
+
+# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2
+# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvp.q.x fs1, a1, a2
diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s
index c2537c3fc5102..cedc9279db3cb 100644
--- a/llvm/test/MC/RISCV/zfa-invalid.s
+++ b/llvm/test/MC/RISCV/zfa-invalid.s
@@ -1,5 +1,5 @@
-# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
-# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
+# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
 
 # Invalid rounding modes
 # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode
@@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02
 # CHECK-NO-RV32: error: operand must be a valid floating-point constant
 fli.h ft1, 1.60e+00
 
+

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508

>From 5e50922e53ad2de7e3c68242ad78f1813a48f7b6 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 15:04:28 +0800
Subject: [PATCH] [RISCV][MC] Add Q support for Zfa

---
 llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td |  29 ++
 llvm/test/MC/RISCV/rv64zfa-only-valid.s|  19 +
 llvm/test/MC/RISCV/zfa-invalid.s   |  13 +-
 llvm/test/MC/RISCV/zfa-quad-invalid.s  |  42 +++
 llvm/test/MC/RISCV/zfa-valid.s | 391 -
 5 files changed, 484 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 8a449d32e0104..0ad654db42f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
 }
 } // Predicates = [HasStdExtZfa, HasStdExtZfh]
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">;
+
+def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>;
+def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>;
+
+def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">;
+def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, 
+ "froundnx.q">;
+
+def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>;
+def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>;
+} // Predicates = [HasStdExtZfa, HasStdExtQ]
+
+let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in {
+  let mayRaiseFPException = 0 in {
+def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, 
"fmvh.x.q">;
+def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">;
+  }
+} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64]
+
 
//===--===//
 // Pseudo-instructions and codegen patterns
 
//===--===//
@@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 }
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+def : InstAlias<"fgtq.q $rd, $rs, $rt",
+(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+def : InstAlias<"fgeq.q $rd, $rs, $rt",
+(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+}
+
 
//===--===//
 // Codegen patterns
 
//===--===//
diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s 
b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
new file mode 100644
index 0..95fb253b145c1
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases 
-show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \
+# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+#
+# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \
+# RUN: -M no-aliases -show-encoding < %s 2>&1 \
+# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
+
+# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1
+# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvh.x.q a1, fs1
+
+# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2
+# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvp.q.x fs1, a1, a2
diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s
index c2537c3fc5102..cedc9279db3cb 100644
--- a/llvm/test/MC/RISCV/zfa-invalid.s
+++ b/llvm/test/MC/RISCV/zfa-invalid.s
@@ -1,5 +1,5 @@
-# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
-# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
+# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
 
 # Invalid rounding modes
 # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode
@@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02
 # CHECK-NO-RV32: error: operand must be a valid floating-point constant
 fli.h ft1, 1.60e+00
 
+

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495

>From 7fdbd6b564697b7f0fd7ffd1f031671c3036 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 13:32:41 +0800
Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q
 extension

---
 llvm/lib/Target/RISCV/RISCVInstrInfoQ.td  | 98 ---
 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td|  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td |  1 +
 .../lib/Target/RISCV/RISCVSchedSpacemitX60.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR345.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR7.td   |  1 +
 .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  1 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedule.td| 88 -
 14 files changed, 158 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
index 51b9c806976f5..4dc33dd22b2aa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
@@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt];
 
//===--===//
 
 let Predicates = [HasStdExtQ] in {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-  def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd),
-  (ins GPRMem:$rs1, simm12:$imm12),
-  "flq", "$rd, ${imm12}(${rs1})">;
+  def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>;
+
   // Operands for stores are in the order srcreg, base, offset rather than
   // reflecting the order these fields are specified in the instruction
   // encoding.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-  def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs),
-  (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12),
-  "fsq", "$rs2, ${imm12}(${rs1})">;
+  def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>;
 } // Predicates = [HasStdExtQ]
 
 foreach Ext = QExts in {
-  defm FMADD_Q : FPFMA_rrr_frm_m;
-  defm FMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMADD_Q : FPFMA_rrr_frm_m;
+  let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in {
+defm FMADD_Q : FPFMA_rrr_frm_m;
+defm FMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMADD_Q : FPFMA_rrr_frm_m;
+  }
 
-  defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
-  defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in {
+defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
+defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  }
 
+  let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in 
   defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>;
 
+  let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in 
   defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>;
 
   defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy,
-   Ext.PrimaryTy, "fsqrt.q">;
+   Ext.PrimaryTy, "fsqrt.q">,
+ Sched<[WriteFSqrt128, ReadFSqrt128]>;
 
-  let mayRaiseFPException = 0 in {
+  let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128],
+  mayRaiseFPException = 0 in {
 defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>;
 defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>;
 defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>;
   }
 
-  defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
-  defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in {
+defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
+defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  }
 
   defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty,
-Ext.PrimaryTy, "fcvt.s.q">;
+Ext.PrimaryTy, "fcvt.s.q">,
+  Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>;
 
   defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext,
-  Ext.PrimaryTy, Ext.F32Ty, 
"fcvt.q.s">;
+  Ext.PrimaryTy, Ext.F32Ty, 
+  "fcvt.q.s">,
+  Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>;
 
   defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty,
-Ext.Pr

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495

>From 7fdbd6b564697b7f0fd7ffd1f031671c3036 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 13:32:41 +0800
Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q
 extension

---
 llvm/lib/Target/RISCV/RISCVInstrInfoQ.td  | 98 ---
 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td|  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td |  1 +
 .../lib/Target/RISCV/RISCVSchedSpacemitX60.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR345.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR7.td   |  1 +
 .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  1 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedule.td| 88 -
 14 files changed, 158 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
index 51b9c806976f5..4dc33dd22b2aa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
@@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt];
 
//===--===//
 
 let Predicates = [HasStdExtQ] in {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-  def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd),
-  (ins GPRMem:$rs1, simm12:$imm12),
-  "flq", "$rd, ${imm12}(${rs1})">;
+  def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>;
+
   // Operands for stores are in the order srcreg, base, offset rather than
   // reflecting the order these fields are specified in the instruction
   // encoding.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-  def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs),
-  (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12),
-  "fsq", "$rs2, ${imm12}(${rs1})">;
+  def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>;
 } // Predicates = [HasStdExtQ]
 
 foreach Ext = QExts in {
-  defm FMADD_Q : FPFMA_rrr_frm_m;
-  defm FMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMADD_Q : FPFMA_rrr_frm_m;
+  let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in {
+defm FMADD_Q : FPFMA_rrr_frm_m;
+defm FMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMADD_Q : FPFMA_rrr_frm_m;
+  }
 
-  defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
-  defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in {
+defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
+defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  }
 
+  let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in 
   defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>;
 
+  let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in 
   defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>;
 
   defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy,
-   Ext.PrimaryTy, "fsqrt.q">;
+   Ext.PrimaryTy, "fsqrt.q">,
+ Sched<[WriteFSqrt128, ReadFSqrt128]>;
 
-  let mayRaiseFPException = 0 in {
+  let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128],
+  mayRaiseFPException = 0 in {
 defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>;
 defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>;
 defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>;
   }
 
-  defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
-  defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in {
+defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
+defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  }
 
   defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty,
-Ext.PrimaryTy, "fcvt.s.q">;
+Ext.PrimaryTy, "fcvt.s.q">,
+  Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>;
 
   defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext,
-  Ext.PrimaryTy, Ext.F32Ty, 
"fcvt.q.s">;
+  Ext.PrimaryTy, Ext.F32Ty, 
+  "fcvt.q.s">,
+  Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>;
 
   defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty,
-Ext.Pr

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508

>From 6dc27676de2a685404abd0cfd12cff95703a1cf1 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 15:04:28 +0800
Subject: [PATCH] [RISCV][MC] Add Q support for Zfa

---
 llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td |  29 ++
 llvm/test/MC/RISCV/rv64zfa-only-valid.s|  19 +
 llvm/test/MC/RISCV/zfa-invalid.s   |  13 +-
 llvm/test/MC/RISCV/zfa-quad-invalid.s  |  42 +++
 llvm/test/MC/RISCV/zfa-valid.s | 391 -
 5 files changed, 484 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 8a449d32e0104..0ad654db42f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
 }
 } // Predicates = [HasStdExtZfa, HasStdExtZfh]
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">;
+
+def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>;
+def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>;
+
+def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">;
+def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, 
+ "froundnx.q">;
+
+def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>;
+def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>;
+} // Predicates = [HasStdExtZfa, HasStdExtQ]
+
+let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in {
+  let mayRaiseFPException = 0 in {
+def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, 
"fmvh.x.q">;
+def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">;
+  }
+} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64]
+
 
//===--===//
 // Pseudo-instructions and codegen patterns
 
//===--===//
@@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 }
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+def : InstAlias<"fgtq.q $rd, $rs, $rt",
+(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+def : InstAlias<"fgeq.q $rd, $rs, $rt",
+(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+}
+
 
//===--===//
 // Codegen patterns
 
//===--===//
diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s 
b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
new file mode 100644
index 0..95fb253b145c1
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases 
-show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \
+# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+#
+# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \
+# RUN: -M no-aliases -show-encoding < %s 2>&1 \
+# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
+
+# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1
+# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvh.x.q a1, fs1
+
+# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2
+# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvp.q.x fs1, a1, a2
diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s
index c2537c3fc5102..cedc9279db3cb 100644
--- a/llvm/test/MC/RISCV/zfa-invalid.s
+++ b/llvm/test/MC/RISCV/zfa-invalid.s
@@ -1,5 +1,5 @@
-# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
-# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
+# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
 
 # Invalid rounding modes
 # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode
@@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02
 # CHECK-NO-RV32: error: operand must be a valid floating-point constant
 fli.h ft1, 1.60e+00
 
+

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495

>From 9205ac04544703aaee2a1475763ce7bc7495ccab Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 13:32:41 +0800
Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q
 extension

---
 llvm/lib/Target/RISCV/RISCVInstrInfoQ.td  | 98 ---
 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td|  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td |  1 +
 .../lib/Target/RISCV/RISCVSchedSpacemitX60.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR345.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR7.td   |  1 +
 .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  1 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedule.td| 88 -
 14 files changed, 158 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
index 51b9c806976f5..4dc33dd22b2aa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
@@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt];
 
//===--===//
 
 let Predicates = [HasStdExtQ] in {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-  def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd),
-  (ins GPRMem:$rs1, simm12:$imm12),
-  "flq", "$rd, ${imm12}(${rs1})">;
+  def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>;
+
   // Operands for stores are in the order srcreg, base, offset rather than
   // reflecting the order these fields are specified in the instruction
   // encoding.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-  def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs),
-  (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12),
-  "fsq", "$rs2, ${imm12}(${rs1})">;
+  def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>;
 } // Predicates = [HasStdExtQ]
 
 foreach Ext = QExts in {
-  defm FMADD_Q : FPFMA_rrr_frm_m;
-  defm FMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMADD_Q : FPFMA_rrr_frm_m;
+  let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in {
+defm FMADD_Q : FPFMA_rrr_frm_m;
+defm FMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMADD_Q : FPFMA_rrr_frm_m;
+  }
 
-  defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
-  defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in {
+defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
+defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  }
 
+  let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in 
   defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>;
 
+  let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in 
   defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>;
 
   defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy,
-   Ext.PrimaryTy, "fsqrt.q">;
+   Ext.PrimaryTy, "fsqrt.q">,
+ Sched<[WriteFSqrt128, ReadFSqrt128]>;
 
-  let mayRaiseFPException = 0 in {
+  let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128],
+  mayRaiseFPException = 0 in {
 defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>;
 defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>;
 defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>;
   }
 
-  defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
-  defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in {
+defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
+defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  }
 
   defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty,
-Ext.PrimaryTy, "fcvt.s.q">;
+Ext.PrimaryTy, "fcvt.s.q">,
+  Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>;
 
   defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext,
-  Ext.PrimaryTy, Ext.F32Ty, 
"fcvt.q.s">;
+  Ext.PrimaryTy, Ext.F32Ty, 
+  "fcvt.q.s">,
+  Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>;
 
   defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty,
-Ext.Pr

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508

>From 6dc27676de2a685404abd0cfd12cff95703a1cf1 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 15:04:28 +0800
Subject: [PATCH] [RISCV][MC] Add Q support for Zfa

---
 llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td |  29 ++
 llvm/test/MC/RISCV/rv64zfa-only-valid.s|  19 +
 llvm/test/MC/RISCV/zfa-invalid.s   |  13 +-
 llvm/test/MC/RISCV/zfa-quad-invalid.s  |  42 +++
 llvm/test/MC/RISCV/zfa-valid.s | 391 -
 5 files changed, 484 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 8a449d32e0104..0ad654db42f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
 }
 } // Predicates = [HasStdExtZfa, HasStdExtZfh]
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">;
+
+def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>;
+def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>;
+
+def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">;
+def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, 
+ "froundnx.q">;
+
+def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>;
+def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>;
+} // Predicates = [HasStdExtZfa, HasStdExtQ]
+
+let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in {
+  let mayRaiseFPException = 0 in {
+def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, 
"fmvh.x.q">;
+def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">;
+  }
+} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64]
+
 
//===--===//
 // Pseudo-instructions and codegen patterns
 
//===--===//
@@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 }
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+def : InstAlias<"fgtq.q $rd, $rs, $rt",
+(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+def : InstAlias<"fgeq.q $rd, $rs, $rt",
+(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+}
+
 
//===--===//
 // Codegen patterns
 
//===--===//
diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s 
b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
new file mode 100644
index 0..95fb253b145c1
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases 
-show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \
+# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+#
+# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \
+# RUN: -M no-aliases -show-encoding < %s 2>&1 \
+# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
+
+# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1
+# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvh.x.q a1, fs1
+
+# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2
+# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvp.q.x fs1, a1, a2
diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s
index c2537c3fc5102..cedc9279db3cb 100644
--- a/llvm/test/MC/RISCV/zfa-invalid.s
+++ b/llvm/test/MC/RISCV/zfa-invalid.s
@@ -1,5 +1,5 @@
-# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
-# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
+# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
 
 # Invalid rounding modes
 # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode
@@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02
 # CHECK-NO-RV32: error: operand must be a valid floating-point constant
 fli.h ft1, 1.60e+00
 
+

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495

>From 9205ac04544703aaee2a1475763ce7bc7495ccab Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 13:32:41 +0800
Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q
 extension

---
 llvm/lib/Target/RISCV/RISCVInstrInfoQ.td  | 98 ---
 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td|  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td |  1 +
 .../lib/Target/RISCV/RISCVSchedSpacemitX60.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR345.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR7.td   |  1 +
 .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  1 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedule.td| 88 -
 14 files changed, 158 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
index 51b9c806976f5..4dc33dd22b2aa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
@@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt];
 
//===--===//
 
 let Predicates = [HasStdExtQ] in {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-  def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd),
-  (ins GPRMem:$rs1, simm12:$imm12),
-  "flq", "$rd, ${imm12}(${rs1})">;
+  def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>;
+
   // Operands for stores are in the order srcreg, base, offset rather than
   // reflecting the order these fields are specified in the instruction
   // encoding.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-  def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs),
-  (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12),
-  "fsq", "$rs2, ${imm12}(${rs1})">;
+  def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>;
 } // Predicates = [HasStdExtQ]
 
 foreach Ext = QExts in {
-  defm FMADD_Q : FPFMA_rrr_frm_m;
-  defm FMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMADD_Q : FPFMA_rrr_frm_m;
+  let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in {
+defm FMADD_Q : FPFMA_rrr_frm_m;
+defm FMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMADD_Q : FPFMA_rrr_frm_m;
+  }
 
-  defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
-  defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in {
+defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
+defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  }
 
+  let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in 
   defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>;
 
+  let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in 
   defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>;
 
   defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy,
-   Ext.PrimaryTy, "fsqrt.q">;
+   Ext.PrimaryTy, "fsqrt.q">,
+ Sched<[WriteFSqrt128, ReadFSqrt128]>;
 
-  let mayRaiseFPException = 0 in {
+  let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128],
+  mayRaiseFPException = 0 in {
 defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>;
 defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>;
 defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>;
   }
 
-  defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
-  defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in {
+defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
+defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  }
 
   defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty,
-Ext.PrimaryTy, "fcvt.s.q">;
+Ext.PrimaryTy, "fcvt.s.q">,
+  Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>;
 
   defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext,
-  Ext.PrimaryTy, Ext.F32Ty, 
"fcvt.q.s">;
+  Ext.PrimaryTy, Ext.F32Ty, 
+  "fcvt.q.s">,
+  Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>;
 
   defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty,
-Ext.Pr

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port InitUndef to NPM (PR #138495)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138495

>From 476894ffa1fed64724b91c8b1db9391e09295be6 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 5 May 2025 08:47:42 +
Subject: [PATCH 1/2] [CodeGen][NPM] Port InitUndef to NPM

---
 llvm/include/llvm/CodeGen/InitUndef.h | 24 +
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/InitUndef.cpp| 50 +--
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/CodeGen/AArch64/init-undef.mir  |  3 ++
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  |  4 +-
 .../rvv/handle-noreg-with-implicit-def.mir|  2 +
 .../rvv/subregister-undef-early-clobber.mir   |  1 +
 .../RISCV/rvv/undef-earlyclobber-chain.mir|  1 +
 12 files changed, 73 insertions(+), 20 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/InitUndef.h

diff --git a/llvm/include/llvm/CodeGen/InitUndef.h 
b/llvm/include/llvm/CodeGen/InitUndef.h
new file mode 100644
index 0..7274824a74905
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InitUndef.h
@@ -0,0 +1,24 @@
+//===- llvm/CodeGen/InitUndef.h *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_INITUNDEF_H
+#define LLVM_CODEGEN_INITUNDEF_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class InitUndefPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_INITUNDEF_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 5a282d17b72c8..a3ebd2a6d15ca 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -312,7 +312,7 @@ void 
initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
 void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &);
 void initializeTypeBasedAAWrapperPassPass(PassRegistry &);
 void initializeTypePromotionLegacyPass(PassRegistry &);
-void initializeInitUndefPass(PassRegistry &);
+void initializeInitUndefLegacyPass(PassRegistry &);
 void initializeUniformityInfoWrapperPassPass(PassRegistry &);
 void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &);
 void initializeUnpackMachineBundlesPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 29bc432ba3d5d..a3f439119b7da 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -43,6 +43,7 @@
 #include "llvm/CodeGen/GlobalMerge.h"
 #include "llvm/CodeGen/GlobalMergeFunctions.h"
 #include "llvm/CodeGen/IndirectBrExpand.h"
+#include "llvm/CodeGen/InitUndef.h"
 #include "llvm/CodeGen/InterleavedAccess.h"
 #include "llvm/CodeGen/InterleavedLoadCombine.h"
 #include "llvm/CodeGen/JMCInstrumenter.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index c69573ee3ed97..436b26852ce90 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -148,6 +148,7 @@ MACHINE_FUNCTION_PASS("early-tailduplication", 
EarlyTailDuplicatePass())
 MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", 
FixupStatepointCallerSavedPass())
+MACHINE_FUNCTION_PASS("init-undef", InitUndefPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
 MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
@@ -304,7 +305,6 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", 
MIRProfileLoaderNewPass)
 DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass)
 DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass)
 DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass)
-DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass)
 DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass)
 DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass)
 DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 5250534d8a4e4..aa3591cb6be58 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -54,7 +54,7 @@ void llvm::initializeC

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #138829)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138829

>From dbd76c614cb19179ffc0a20a19341a7e58a1431b Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 6 May 2025 14:12:36 +
Subject: [PATCH 1/2] [CodeGen][NPM] Port ProcessImplicitDefs to NPM

---
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp  | 57 +--
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  |  7 ++-
 .../CodeGen/X86/unreachable-mbb-undef-phi.mir |  1 +
 8 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index e56c608c8e2f6..c858ae1177d72 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -247,7 +247,7 @@ void initializePostRASchedulerLegacyPass(PassRegistry &);
 void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &);
 void initializePrintFunctionPassWrapperPass(PassRegistry &);
 void initializePrintModulePassWrapperPass(PassRegistry &);
-void initializeProcessImplicitDefsPass(PassRegistry &);
+void initializeProcessImplicitDefsLegacyPass(PassRegistry &);
 void initializeProfileSummaryInfoWrapperPassPass(PassRegistry &);
 void initializePromoteLegacyPassPass(PassRegistry &);
 void initializeRABasicPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index bd7c0da3a6dde..6d0aaee32b33e 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -71,6 +71,7 @@
 #include "llvm/CodeGen/PeepholeOptimizer.h"
 #include "llvm/CodeGen/PostRASchedulerList.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
 #include "llvm/CodeGen/RegAllocEvictionAdvisor.h"
 #include "llvm/CodeGen/RegAllocFast.h"
 #include "llvm/CodeGen/RegAllocGreedyPass.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index f189aaa5eda2b..63ba0d0a84a0c 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -150,6 +150,7 @@ MACHINE_FUNCTION_PASS("print",
   MachineUniformityPrinterPass(errs()))
 MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(errs()))
 MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(errs()))
+MACHINE_FUNCTION_PASS("process-imp-defs", ProcessImplicitDefsPass())
 MACHINE_FUNCTION_PASS("prolog-epilog", PrologEpilogInserterPass())
 MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass())
 MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass())
@@ -281,7 +282,6 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", 
StaticDataSplitter)
 DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", 
MachineFunctionSplitterPass)
 DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass)
-DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
 DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass)
 DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 065fd4704ccfb..6e12edc3d3a5b 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -110,7 +110,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePostRAMachineSinkingLegacyPass(Registry);
   initializePostRASchedulerLegacyPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
-  initializeProcessImplicitDefsPass(Registry);
+  initializeProcessImplicitDefsLegacyPass(Registry);
   initializeRABasicPass(Registry);
   initializeRAGreedyLegacyPass(Registry);
   initializeRegAllocFastPass(Registry);
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp 
b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index be81ecab9c897..54fd7814ef4f3 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -6,6 +6,7 @@
 //
 
//===--===//
 
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -26,24 +27,15 @@ using namespace llvm;
 namespace {
 /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
 /// for each use. Add isUndef marker to implicit_def defs and their uses.
-class ProcessImplicitDefs : public MachineFunctionPass {
-

[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138828

>From a9bab6452880f4200f4ce2d8c938eacd68d6bbc7 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 6 May 2025 11:04:05 +
Subject: [PATCH 1/2] [CodeGen][NPM] Register Function Passes

---
 llvm/include/llvm/Passes/MachinePassRegistry.def | 2 +-
 llvm/lib/Passes/PassBuilder.cpp  | 4 
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++---
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index c6c00e8f25882..8717b79b26968 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -70,7 +70,6 @@ FUNCTION_PASS("scalarize-masked-mem-intrin", 
ScalarizeMaskedMemIntrinPass())
 FUNCTION_PASS("select-optimize", SelectOptimizePass(TM))
 FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM))
 FUNCTION_PASS("stack-protector", StackProtectorPass(TM))
-FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
 FUNCTION_PASS("verify", VerifierPass())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
@@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
 #ifndef DUMMY_FUNCTION_PASS
 #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME)
 #endif
+DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass)
 #undef DUMMY_FUNCTION_PASS
 
 #ifndef DUMMY_MACHINE_MODULE_PASS
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5f7ce13ad8a3e..a9d192a7fad55 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -95,6 +95,7 @@
 #include "llvm/CodeGen/ExpandLargeDivRem.h"
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandPostRAPseudos.h"
+#include "llvm/CodeGen/ExpandReductions.h"
 #include "llvm/CodeGen/FEntryInserter.h"
 #include "llvm/CodeGen/FinalizeISel.h"
 #include "llvm/CodeGen/FixupStatepointCallerSaved.h"
@@ -155,6 +156,7 @@
 #include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h"
 #include "llvm/CodeGen/RemoveRedundantDebugValues.h"
 #include "llvm/CodeGen/RenameIndependentSubregs.h"
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
 #include "llvm/CodeGen/SafeStack.h"
 #include "llvm/CodeGen/SanitizerBinaryMetadata.h"
 #include "llvm/CodeGen/SelectOptimize.h"
@@ -522,6 +524,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, 
PipelineTuningOptions PTO,
   PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #include "PassRegistry.def"
 
+#define FUNCTION_PASS(NAME, CREATE_PASS)   
\
+  PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)   
\
   PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)   
\
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 0d26b12a4a5e7..634a7fb6eb8e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
+; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Complete optimized regalloc pipeline (PR #138491)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138491

>From dc9a3165d3625002d2122dfd0e1dbe262a399e74 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 5 May 2025 06:30:03 +
Subject: [PATCH] [AMDGPU][NPM] Complete optimized regalloc pipeline

Also fill in some other passes.
---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 41 +--
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h  |  1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  |  7 +---
 4 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a3b19af4adc39..29bc432ba3d5d 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -574,7 +574,7 @@ template  class 
CodeGenPassBuilder {
   /// Insert InsertedPass pass after TargetPass pass.
   /// Only machine function passes are supported.
   template 
-  void insertPass(InsertedPassT &&Pass) {
+  void insertPass(InsertedPassT &&Pass) const {
 AfterCallbacks.emplace_back(
 [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable {
   if (Name == TargetPassT::name())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 680a3fb78a6e3..eee520435f6d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2174,7 +2174,44 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
   addPass(SIShrinkInstructionsPass());
 }
 
+void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
+AddMachinePass &addPass) const {
+  if (EnableDCEInRA)
+insertPass(DeadMachineInstructionElimPass());
+
+  // FIXME: when an instruction has a Killed operand, and the instruction is
+  // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
+  // the register in LiveVariables, this would trigger a failure in verifier,
+  // we should fix it and enable the verifier.
+  if (OptVGPRLiveRange)
+insertPass>(
+SIOptimizeVGPRLiveRangePass());
+
+  // This must be run immediately after phi elimination and before
+  // TwoAddressInstructions, otherwise the processing of the tied operand of
+  // SI_ELSE will introduce a copy of the tied operand source after the else.
+  insertPass(SILowerControlFlowPass());
+
+  if (EnableRewritePartialRegUses)
+insertPass(GCNRewritePartialRegUsesPass());
+
+  if (isPassEnabled(EnablePreRAOptimizations))
+insertPass(GCNPreRAOptimizationsPass());
 
+  // Allow the scheduler to run before SIWholeQuadMode inserts exec 
manipulation
+  // instructions that cause scheduling barriers.
+  insertPass(SIWholeQuadModePass());
+
+  if (OptExecMaskPreRA)
+insertPass(SIOptimizeExecMaskingPreRAPass());
+
+  // This is not an essential optimization and it has a noticeable impact on
+  // compilation time, so we only enable it from O2.
+  if (TM.getOptLevel() > CodeGenOptLevel::Less)
+insertPass(SIFormMemoryClausesPass());
+
+  Base::addOptimizedRegAlloc(addPass);
+}
 
 Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
 AddMachinePass &addPass) const {
@@ -2202,21 +2239,19 @@ Error 
AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
   addPass(SIPreAllocateWWMRegsPass());
 
   // For allocating other wwm register operands.
-  // addRegAlloc(addPass, RegAllocPhase::WWM);
   addPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}));
   addPass(SILowerWWMCopiesPass());
   addPass(VirtRegRewriterPass(false));
   addPass(AMDGPUReserveWWMRegsPass());
 
   // For allocating per-thread VGPRs.
-  // addRegAlloc(addPass, RegAllocPhase::VGPR);
   addPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}));
 
 
   addPreRewrite(addPass);
   addPass(VirtRegRewriterPass(true));
 
-  // TODO: addPass(AMDGPUMarkLastScratchLoadPass());
+  addPass(AMDGPUMarkLastScratchLoadPass());
   return Error::success();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 589123274d0f5..3c62cd19c6e57 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -182,6 +182,7 @@ class AMDGPUCodeGenPassBuilder
   void addPostRegAlloc(AddMachinePass &) const;
   void addPreEmitPass(AddMachinePass &) const;
   Error addRegAssignmentOptimized(AddMachinePass &) const;
+  void addOptimizedRegAlloc(AddMachinePass &) const;
 
   /// Check if a pass is enabled given \p Opt option. The option always
   /// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index e9b57515e71e0..91c15565762de 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,14 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtrip

[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #138670)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138670

>From ea3103a3be32909978894364c1b481cb80c2fc67 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 6 May 2025 09:55:07 +
Subject: [PATCH] [CodeGen][NPM] Read TargetMachine's EnableIPRA option

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h | 4 
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a3f439119b7da..bd7c0da3a6dde 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -172,6 +172,10 @@ template  
class CodeGenPassBuilder {
 // LLVMTM ctor. See TargetMachine::setGlobalISel for example.
 if (Opt.EnableIPRA)
   TM.Options.EnableIPRA = *Opt.EnableIPRA;
+else {
+  // If not explicitly specified, use target default.
+  TM.Options.EnableIPRA |= TM.useIPRA();
+}
 
 if (Opt.EnableGlobalISelAbort)
   TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort;
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 232d5b86ac035..0d26b12a4a5e7 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
+; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
 
-; GCN-O2: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-r

[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #138670)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138670

>From ea3103a3be32909978894364c1b481cb80c2fc67 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 6 May 2025 09:55:07 +
Subject: [PATCH] [CodeGen][NPM] Read TargetMachine's EnableIPRA option

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h | 4 
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a3f439119b7da..bd7c0da3a6dde 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -172,6 +172,10 @@ template  
class CodeGenPassBuilder {
 // LLVMTM ctor. See TargetMachine::setGlobalISel for example.
 if (Opt.EnableIPRA)
   TM.Options.EnableIPRA = *Opt.EnableIPRA;
+else {
+  // If not explicitly specified, use target default.
+  TM.Options.EnableIPRA |= TM.useIPRA();
+}
 
 if (Opt.EnableGlobalISelAbort)
   TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort;
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 232d5b86ac035..0d26b12a4a5e7 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
+; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
 
-; GCN-O2: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-r

[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138830

>From 67f7f32e9ca0a8befc28b7504e9e7f141d771eae Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 7 May 2025 08:57:31 +
Subject: [PATCH] [CodeGen][NPM] Account inserted passes for -start/stop
 options

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  6 --
 llvm/test/tools/llc/new-pm/start-stop-inserted.ll | 15 +++
 2 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/tools/llc/new-pm/start-stop-inserted.ll

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6d0aaee32b33e..752ed6ae08a96 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -583,8 +583,10 @@ template  
class CodeGenPassBuilder {
   void insertPass(InsertedPassT &&Pass) const {
 AfterCallbacks.emplace_back(
 [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable {
-  if (Name == TargetPassT::name())
-MFPM.addPass(std::forward(Pass));
+  if (Name == TargetPassT::name()) {
+if (runBeforeAdding(InsertedPassT::name()))
+  MFPM.addPass(std::forward(Pass));
+  }
 });
   }
 
diff --git a/llvm/test/tools/llc/new-pm/start-stop-inserted.ll 
b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll
new file mode 100644
index 0..ce5ad2d9e5065
--- /dev/null
+++ b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll
@@ -0,0 +1,15 @@
+; REQUIRES: amdgpu-registered-target
+
+; AMDGPU inserts the fourth instance of dead-mi-elimination pass after 
detect-dead-lanes
+; This checks that the pipeline stops before that.
+
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm 
-stop-before=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s | 
FileCheck %s
+
+; There is no way to -start-after an inserted pass right now.
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm 
-start-after=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s
+
+
+; CHECK: dead-mi-elimination
+; CHECK: dead-mi-elimination
+; CHECK: dead-mi-elimination
+; CHECK-NOT: dead-mi-elimination

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #138660)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138660

>From 838e904009527297d38e79572745a810cfa34d60 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 6 May 2025 09:05:52 +
Subject: [PATCH] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag

---
 llvm/include/llvm/CodeGen/VirtRegMap.h | 8 
 llvm/lib/CodeGen/VirtRegMap.cpp| 2 ++
 2 files changed, 10 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h 
b/llvm/include/llvm/CodeGen/VirtRegMap.h
index f5fba0d65401e..37d382650ac1f 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -248,6 +248,14 @@ class VirtRegRewriterPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 
   void printPipeline(raw_ostream &OS, function_ref) 
const;
+
+  MachineFunctionProperties getSetProperties() const {
+if (ClearVirtRegs) {
+  return MachineFunctionProperties().set(
+  MachineFunctionProperties::Property::NoVRegs);
+}
+return {};
+  }
 };
 
 } // end llvm namespace
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 49e819e2d10f7..839905d5e3860 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -302,6 +302,8 @@ bool 
VirtRegRewriterLegacy::runOnMachineFunction(MachineFunction &MF) {
 PreservedAnalyses
 VirtRegRewriterPass::run(MachineFunction &MF,
  MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(*this, MF);
+
   VirtRegMap &VRM = MFAM.getResult(MF);
   LiveIntervals &LIS = MFAM.getResult(MF);
   LiveRegMatrix &LRM = MFAM.getResult(MF);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass (PR #138496)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138496

>From ff19035e9f213592109e7ee2c4fb2b667ba9a333 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 5 May 2025 08:58:58 +
Subject: [PATCH] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass

---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 13453963eec6d..0772eb8fb7a72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -107,6 +107,7 @@ MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
+MACHINE_FUNCTION_PASS("amdgpu-wait-sgpr-hazards", AMDGPUWaitSGPRHazardsPass())
 MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index e00b7ff83e322..468e4208c510a 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
+; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
 
-; GCN-O2: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port InitUndef to NPM (PR #138495)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138495

>From 476894ffa1fed64724b91c8b1db9391e09295be6 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 5 May 2025 08:47:42 +
Subject: [PATCH 1/2] [CodeGen][NPM] Port InitUndef to NPM

---
 llvm/include/llvm/CodeGen/InitUndef.h | 24 +
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/InitUndef.cpp| 50 +--
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/CodeGen/AArch64/init-undef.mir  |  3 ++
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  |  4 +-
 .../rvv/handle-noreg-with-implicit-def.mir|  2 +
 .../rvv/subregister-undef-early-clobber.mir   |  1 +
 .../RISCV/rvv/undef-earlyclobber-chain.mir|  1 +
 12 files changed, 73 insertions(+), 20 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/InitUndef.h

diff --git a/llvm/include/llvm/CodeGen/InitUndef.h 
b/llvm/include/llvm/CodeGen/InitUndef.h
new file mode 100644
index 0..7274824a74905
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InitUndef.h
@@ -0,0 +1,24 @@
+//===- llvm/CodeGen/InitUndef.h *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_INITUNDEF_H
+#define LLVM_CODEGEN_INITUNDEF_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class InitUndefPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_INITUNDEF_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 5a282d17b72c8..a3ebd2a6d15ca 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -312,7 +312,7 @@ void 
initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
 void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &);
 void initializeTypeBasedAAWrapperPassPass(PassRegistry &);
 void initializeTypePromotionLegacyPass(PassRegistry &);
-void initializeInitUndefPass(PassRegistry &);
+void initializeInitUndefLegacyPass(PassRegistry &);
 void initializeUniformityInfoWrapperPassPass(PassRegistry &);
 void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &);
 void initializeUnpackMachineBundlesPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 29bc432ba3d5d..a3f439119b7da 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -43,6 +43,7 @@
 #include "llvm/CodeGen/GlobalMerge.h"
 #include "llvm/CodeGen/GlobalMergeFunctions.h"
 #include "llvm/CodeGen/IndirectBrExpand.h"
+#include "llvm/CodeGen/InitUndef.h"
 #include "llvm/CodeGen/InterleavedAccess.h"
 #include "llvm/CodeGen/InterleavedLoadCombine.h"
 #include "llvm/CodeGen/JMCInstrumenter.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index c69573ee3ed97..436b26852ce90 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -148,6 +148,7 @@ MACHINE_FUNCTION_PASS("early-tailduplication", 
EarlyTailDuplicatePass())
 MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", 
FixupStatepointCallerSavedPass())
+MACHINE_FUNCTION_PASS("init-undef", InitUndefPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
 MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
@@ -304,7 +305,6 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", 
MIRProfileLoaderNewPass)
 DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass)
 DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass)
 DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass)
-DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass)
 DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass)
 DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass)
 DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 5250534d8a4e4..aa3591cb6be58 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -54,7 +54,7 @@ void llvm::initializeC

[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138828

>From a9bab6452880f4200f4ce2d8c938eacd68d6bbc7 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 6 May 2025 11:04:05 +
Subject: [PATCH 1/2] [CodeGen][NPM] Register Function Passes

---
 llvm/include/llvm/Passes/MachinePassRegistry.def | 2 +-
 llvm/lib/Passes/PassBuilder.cpp  | 4 
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++---
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index c6c00e8f25882..8717b79b26968 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -70,7 +70,6 @@ FUNCTION_PASS("scalarize-masked-mem-intrin", 
ScalarizeMaskedMemIntrinPass())
 FUNCTION_PASS("select-optimize", SelectOptimizePass(TM))
 FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM))
 FUNCTION_PASS("stack-protector", StackProtectorPass(TM))
-FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
 FUNCTION_PASS("verify", VerifierPass())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
@@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
 #ifndef DUMMY_FUNCTION_PASS
 #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME)
 #endif
+DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass)
 #undef DUMMY_FUNCTION_PASS
 
 #ifndef DUMMY_MACHINE_MODULE_PASS
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5f7ce13ad8a3e..a9d192a7fad55 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -95,6 +95,7 @@
 #include "llvm/CodeGen/ExpandLargeDivRem.h"
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandPostRAPseudos.h"
+#include "llvm/CodeGen/ExpandReductions.h"
 #include "llvm/CodeGen/FEntryInserter.h"
 #include "llvm/CodeGen/FinalizeISel.h"
 #include "llvm/CodeGen/FixupStatepointCallerSaved.h"
@@ -155,6 +156,7 @@
 #include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h"
 #include "llvm/CodeGen/RemoveRedundantDebugValues.h"
 #include "llvm/CodeGen/RenameIndependentSubregs.h"
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
 #include "llvm/CodeGen/SafeStack.h"
 #include "llvm/CodeGen/SanitizerBinaryMetadata.h"
 #include "llvm/CodeGen/SelectOptimize.h"
@@ -522,6 +524,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, 
PipelineTuningOptions PTO,
   PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #include "PassRegistry.def"
 
+#define FUNCTION_PASS(NAME, CREATE_PASS)   
\
+  PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)   
\
   PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)   
\
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 0d26b12a4a5e7..634a7fb6eb8e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
+; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Complete optimized regalloc pipeline (PR #138491)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138491

>From dc9a3165d3625002d2122dfd0e1dbe262a399e74 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 5 May 2025 06:30:03 +
Subject: [PATCH] [AMDGPU][NPM] Complete optimized regalloc pipeline

Also fill in some other passes.
---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 41 +--
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h  |  1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  |  7 +---
 4 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a3b19af4adc39..29bc432ba3d5d 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -574,7 +574,7 @@ template  class 
CodeGenPassBuilder {
   /// Insert InsertedPass pass after TargetPass pass.
   /// Only machine function passes are supported.
   template 
-  void insertPass(InsertedPassT &&Pass) {
+  void insertPass(InsertedPassT &&Pass) const {
 AfterCallbacks.emplace_back(
 [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable {
   if (Name == TargetPassT::name())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 680a3fb78a6e3..eee520435f6d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2174,7 +2174,44 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
   addPass(SIShrinkInstructionsPass());
 }
 
+void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
+AddMachinePass &addPass) const {
+  if (EnableDCEInRA)
+insertPass(DeadMachineInstructionElimPass());
+
+  // FIXME: when an instruction has a Killed operand, and the instruction is
+  // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
+  // the register in LiveVariables, this would trigger a failure in verifier,
+  // we should fix it and enable the verifier.
+  if (OptVGPRLiveRange)
+insertPass>(
+SIOptimizeVGPRLiveRangePass());
+
+  // This must be run immediately after phi elimination and before
+  // TwoAddressInstructions, otherwise the processing of the tied operand of
+  // SI_ELSE will introduce a copy of the tied operand source after the else.
+  insertPass(SILowerControlFlowPass());
+
+  if (EnableRewritePartialRegUses)
+insertPass(GCNRewritePartialRegUsesPass());
+
+  if (isPassEnabled(EnablePreRAOptimizations))
+insertPass(GCNPreRAOptimizationsPass());
 
+  // Allow the scheduler to run before SIWholeQuadMode inserts exec 
manipulation
+  // instructions that cause scheduling barriers.
+  insertPass(SIWholeQuadModePass());
+
+  if (OptExecMaskPreRA)
+insertPass(SIOptimizeExecMaskingPreRAPass());
+
+  // This is not an essential optimization and it has a noticeable impact on
+  // compilation time, so we only enable it from O2.
+  if (TM.getOptLevel() > CodeGenOptLevel::Less)
+insertPass(SIFormMemoryClausesPass());
+
+  Base::addOptimizedRegAlloc(addPass);
+}
 
 Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
 AddMachinePass &addPass) const {
@@ -2202,21 +2239,19 @@ Error 
AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
   addPass(SIPreAllocateWWMRegsPass());
 
   // For allocating other wwm register operands.
-  // addRegAlloc(addPass, RegAllocPhase::WWM);
   addPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}));
   addPass(SILowerWWMCopiesPass());
   addPass(VirtRegRewriterPass(false));
   addPass(AMDGPUReserveWWMRegsPass());
 
   // For allocating per-thread VGPRs.
-  // addRegAlloc(addPass, RegAllocPhase::VGPR);
   addPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}));
 
 
   addPreRewrite(addPass);
   addPass(VirtRegRewriterPass(true));
 
-  // TODO: addPass(AMDGPUMarkLastScratchLoadPass());
+  addPass(AMDGPUMarkLastScratchLoadPass());
   return Error::success();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 589123274d0f5..3c62cd19c6e57 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -182,6 +182,7 @@ class AMDGPUCodeGenPassBuilder
   void addPostRegAlloc(AddMachinePass &) const;
   void addPreEmitPass(AddMachinePass &) const;
   Error addRegAssignmentOptimized(AddMachinePass &) const;
+  void addOptimizedRegAlloc(AddMachinePass &) const;
 
   /// Check if a pass is enabled given \p Opt option. The option always
   /// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index e9b57515e71e0..91c15565762de 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,14 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtrip

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass (PR #138496)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138496

>From ff19035e9f213592109e7ee2c4fb2b667ba9a333 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 5 May 2025 08:58:58 +
Subject: [PATCH] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass

---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 13453963eec6d..0772eb8fb7a72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -107,6 +107,7 @@ MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
+MACHINE_FUNCTION_PASS("amdgpu-wait-sgpr-hazards", AMDGPUWaitSGPRHazardsPass())
 MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index e00b7ff83e322..468e4208c510a 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
+; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
 
-; GCN-O2: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PostRAMachineSinking to NPM (PR #138497)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138497

>From 41492e43dad53cefb3ee220a13e75f062351c1cc Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 5 May 2025 09:17:40 +
Subject: [PATCH] [CodeGen][NPM] Port PostRAMachineSinking to NPM

---
 llvm/include/llvm/CodeGen/MachineSink.h   | 11 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/MachineSink.cpp  | 31 +++
 .../AArch64/bisect-post-ra-machine-sink.mir   |  1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  |  4 +--
 .../CodeGen/AMDGPU/postra-machine-sink.mir|  1 +
 llvm/test/CodeGen/X86/pr38952.mir |  1 +
 9 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineSink.h 
b/llvm/include/llvm/CodeGen/MachineSink.h
index 71bd7229b7598..eb9548dc82250 100644
--- a/llvm/include/llvm/CodeGen/MachineSink.h
+++ b/llvm/include/llvm/CodeGen/MachineSink.h
@@ -26,5 +26,16 @@ class MachineSinkingPass : public 
PassInfoMixin {
  function_ref MapClassName2PassName);
 };
 
+class PostRAMachineSinkingPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &);
+
+  MachineFunctionProperties getRequiredProperties() const {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoVRegs);
+  }
+};
+
 } // namespace llvm
 #endif // LLVM_CODEGEN_MACHINESINK_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index a3ebd2a6d15ca..e56c608c8e2f6 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -242,7 +242,7 @@ void 
initializePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializePostInlineEntryExitInstrumenterPass(PassRegistry &);
 void initializePostMachineSchedulerLegacyPass(PassRegistry &);
 void initializePostRAHazardRecognizerLegacyPass(PassRegistry &);
-void initializePostRAMachineSinkingPass(PassRegistry &);
+void initializePostRAMachineSinkingLegacyPass(PassRegistry &);
 void initializePostRASchedulerLegacyPass(PassRegistry &);
 void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &);
 void initializePrintFunctionPassWrapperPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 436b26852ce90..c6c00e8f25882 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -164,6 +164,7 @@ MACHINE_FUNCTION_PASS("phi-node-elimination", 
PHIEliminationPass())
 MACHINE_FUNCTION_PASS("post-RA-hazard-rec", PostRAHazardRecognizerPass())
 MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM))
 MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass(TM))
+MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass())
 MACHINE_FUNCTION_PASS("post-ra-pseudos", ExpandPostRAPseudosPass())
 MACHINE_FUNCTION_PASS("print", PrintMIRPass())
 MACHINE_FUNCTION_PASS("print", 
LiveDebugVariablesPrinterPass(errs()))
@@ -315,7 +316,6 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", 
StaticDataSplitter)
 DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", 
MachineFunctionSplitterPass)
 DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass)
-DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
 DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
 DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index aa3591cb6be58..065fd4704ccfb 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -107,7 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePeepholeOptimizerLegacyPass(Registry);
   initializePostMachineSchedulerLegacyPass(Registry);
   initializePostRAHazardRecognizerLegacyPass(Registry);
-  initializePostRAMachineSinkingPass(Registry);
+  initializePostRAMachineSinkingLegacyPass(Registry);
   initializePostRASchedulerLegacyPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index aa2987b6710a3..be1a3ac125c65 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -2068,12 +2068,12 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
 
//===--===//
 namespace {
 
-class PostRAMachineSinking : public MachineFunctionPass {

[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #138660)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138660

>From 838e904009527297d38e79572745a810cfa34d60 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 6 May 2025 09:05:52 +
Subject: [PATCH] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag

---
 llvm/include/llvm/CodeGen/VirtRegMap.h | 8 
 llvm/lib/CodeGen/VirtRegMap.cpp| 2 ++
 2 files changed, 10 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h 
b/llvm/include/llvm/CodeGen/VirtRegMap.h
index f5fba0d65401e..37d382650ac1f 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -248,6 +248,14 @@ class VirtRegRewriterPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 
   void printPipeline(raw_ostream &OS, function_ref) 
const;
+
+  MachineFunctionProperties getSetProperties() const {
+if (ClearVirtRegs) {
+  return MachineFunctionProperties().set(
+  MachineFunctionProperties::Property::NoVRegs);
+}
+return {};
+  }
 };
 
 } // end llvm namespace
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 49e819e2d10f7..839905d5e3860 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -302,6 +302,8 @@ bool 
VirtRegRewriterLegacy::runOnMachineFunction(MachineFunction &MF) {
 PreservedAnalyses
 VirtRegRewriterPass::run(MachineFunction &MF,
  MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(*this, MF);
+
   VirtRegMap &VRM = MFAM.getResult(MF);
   LiveIntervals &LIS = MFAM.getResult(MF);
   LiveRegMatrix &LRM = MFAM.getResult(MF);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/138830

>From 67f7f32e9ca0a8befc28b7504e9e7f141d771eae Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 7 May 2025 08:57:31 +
Subject: [PATCH] [CodeGen][NPM] Account inserted passes for -start/stop
 options

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  6 --
 llvm/test/tools/llc/new-pm/start-stop-inserted.ll | 15 +++
 2 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/tools/llc/new-pm/start-stop-inserted.ll

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6d0aaee32b33e..752ed6ae08a96 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -583,8 +583,10 @@ template  
class CodeGenPassBuilder {
   void insertPass(InsertedPassT &&Pass) const {
 AfterCallbacks.emplace_back(
 [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable {
-  if (Name == TargetPassT::name())
-MFPM.addPass(std::forward(Pass));
+  if (Name == TargetPassT::name()) {
+if (runBeforeAdding(InsertedPassT::name()))
+  MFPM.addPass(std::forward(Pass));
+  }
 });
   }
 
diff --git a/llvm/test/tools/llc/new-pm/start-stop-inserted.ll 
b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll
new file mode 100644
index 0..ce5ad2d9e5065
--- /dev/null
+++ b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll
@@ -0,0 +1,15 @@
+; REQUIRES: amdgpu-registered-target
+
+; AMDGPU inserts the fourth instance of dead-mi-elimination pass after 
detect-dead-lanes
+; This checks that the pipeline stops before that.
+
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm 
-stop-before=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s | 
FileCheck %s
+
+; There is no way to -start-after an inserted pass right now.
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm 
-start-after=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s
+
+
+; CHECK: dead-mi-elimination
+; CHECK: dead-mi-elimination
+; CHECK: dead-mi-elimination
+; CHECK-NOT: dead-mi-elimination

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/139516

None



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/139517

This replaces the Invalidate pass.



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#139517** https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#139516** https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#138830** https://app.graphite.dev/github/pr/llvm/llvm-project/138830?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138829** https://app.graphite.dev/github/pr/llvm/llvm-project/138829?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138828** https://app.graphite.dev/github/pr/llvm/llvm-project/138828?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138670** https://app.graphite.dev/github/pr/llvm/llvm-project/138670?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138660** https://app.graphite.dev/github/pr/llvm/llvm-project/138660?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138497** https://app.graphite.dev/github/pr/llvm/llvm-project/138497?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138496** https://app.graphite.dev/github/pr/llvm/llvm-project/138496?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138495** https://app.graphite.dev/github/pr/llvm/llvm-project/138495?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138491** https://app.graphite.dev/github/pr/llvm/llvm-project/138491?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#136818** https://app.graphite.dev/github/pr/llvm/llvm-project/136818?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/139516
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#139517** https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#139516** https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138830** https://app.graphite.dev/github/pr/llvm/llvm-project/138830?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138829** https://app.graphite.dev/github/pr/llvm/llvm-project/138829?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138828** https://app.graphite.dev/github/pr/llvm/llvm-project/138828?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138670** https://app.graphite.dev/github/pr/llvm/llvm-project/138670?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138660** https://app.graphite.dev/github/pr/llvm/llvm-project/138660?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138497** https://app.graphite.dev/github/pr/llvm/llvm-project/138497?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138496** https://app.graphite.dev/github/pr/llvm/llvm-project/138496?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138495** https://app.graphite.dev/github/pr/llvm/llvm-project/138495?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#138491** https://app.graphite.dev/github/pr/llvm/llvm-project/138491?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#136818** https://app.graphite.dev/github/pr/llvm/llvm-project/136818?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/139517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #138670)

2025-05-12 Thread Christudasan Devadasan via llvm-branch-commits


https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/138670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/139516.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+6) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+3-3) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index eee520435f6d9..89f8805a5c1a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2262,6 +2262,12 @@ void 
AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
   Base::addPostRegAlloc(addPass);
 }
 
+void AMDGPUCodeGenPassBuilder::addPreSched2(AddMachinePass &addPass) const {
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+addPass(SIShrinkInstructionsPass());
+  addPass(SIPostRABundlerPass());
+}
+
 void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
 addPass(GCNCreateVOPDPass());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 3c62cd19c6e57..3b2f39c14a9bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -183,6 +183,7 @@ class AMDGPUCodeGenPassBuilder
   void addPreEmitPass(AddMachinePass &) const;
   Error addRegAssignmentOptimized(AddMachinePass &) const;
   void addOptimizedRegAlloc(AddMachinePass &) const;
+  void addPreSched2(AddMachinePass &) const;
 
   /// Check if a pass is enabled given \p Opt option. The option always
   /// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 5a89fdeb34dfb..9b8a430aaad45 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate))
+; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-ins

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/139517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/139516
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/139517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/139517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)


Changes

This replaces the Invalidate pass.


There are no cross-function analysis requirements right now, so clearing all 
analyses works for the last pass in the pipeline.

---

Having the `InvalidateAnalysisPass()` is causing 
a problem with `ModuleToCGSCCPassAdaptor` by deleting machine functions for 
other functions and ending up with exactly one correctly compiled MF, with the 
rest being vanished.

This is because `ModuleToCGSCCPAdaptor` propagates `PassPA` (received from the 
`CGSCCToFunctionPassAdaptor` that runs the actual codegen pipeline on MFs) to 
the next SCC. That causes `MFA` invalidation on functions in the next SCC. 

For us, `PassPA` happens to be returned from 
`invalidate` which `abandons` the 
`MachineFunctionAnalysis`. So while the first function runs through the 
pipeline normally, `invalidate` also deletes the functions in the next SCC 
before its pipeline is run. (this seems to be the intended mechanism of the `CG 
adaptor` to allow cross-SCC invalidations.


---
Full diff: https://github.com/llvm/llvm-project/pull/139517.diff


5 Files Affected:

- (modified) llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h (+5) 
- (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+2-1) 
- (modified) llvm/lib/CodeGen/MachineFunctionAnalysis.cpp (+7) 
- (modified) llvm/lib/Passes/PassRegistry.def (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+3-3) 


``diff
diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h 
b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
index e194f4838e118..9a1e05ebb3c1b 100644
--- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -45,6 +45,11 @@ class MachineFunctionAnalysis
   Result run(Function &F, FunctionAnalysisManager &FAM);
 };
 
+class FreeAllAnalysesPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MachineFunctionAnalysis
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 752ed6ae08a96..7ab0cf2824038 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -285,7 +285,8 @@ template  class 
CodeGenPassBuilder {
 
   FunctionPassManager FPM;
   FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
-  FPM.addPass(InvalidateAnalysisPass());
+  // Since this is the last pass in the pipeline, we can clear all analyses
+  FPM.addPass(FreeAllAnalysesPass());
   if (this->PB.AddInCGSCCOrder) {
 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
 createCGSCCToFunctionPassAdaptor(std::move(FPM;
diff --git a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp 
b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
index e7a4d6d61e211..25239066a10a7 100644
--- a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -45,3 +45,10 @@ MachineFunctionAnalysis::run(Function &F, 
FunctionAnalysisManager &FAM) {
 
   return Result(std::move(MF));
 }
+
+PreservedAnalyses FreeAllAnalysesPass::run(Function &F,
+   FunctionAnalysisManager &FAM) {
+  // Since this is the last pass in the pipeline, we can clear all analyses
+  FAM.clear(F, F.getName());
+  return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 822aa90601286..bba11759cfa9d 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -379,6 +379,7 @@ FUNCTION_PASS("extra-vector-passes",
 FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
 FUNCTION_PASS("flatten-cfg", FlattenCFGPass())
 FUNCTION_PASS("float2int", Float2IntPass())
+FUNCTION_PASS("free-all-analyses", FreeAllAnalysesPass())
 FUNCTION_PASS("gc-lowering", GCLoweringPass())
 FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn-hoist", GVNHoistPass())
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 9b8a430aaad45..009c1467eda1d 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/139517



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/139517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)

2025-05-12 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/139517

>From ae761dee3ece71d4813b62a2600cf4565b893239 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 12 May 2025 08:02:22 +
Subject: [PATCH 1/2] [CodeGen][NPM] Introduce FreeAllAnalysesPass

This replaces the Invalidate pass.
---
 llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h | 5 +
 llvm/include/llvm/Passes/CodeGenPassBuilder.h   | 3 ++-
 llvm/lib/CodeGen/MachineFunctionAnalysis.cpp| 6 ++
 llvm/lib/Passes/PassRegistry.def| 1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll| 6 +++---
 5 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h 
b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
index e194f4838e118..9a1e05ebb3c1b 100644
--- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -45,6 +45,11 @@ class MachineFunctionAnalysis
   Result run(Function &F, FunctionAnalysisManager &FAM);
 };
 
+class FreeAllAnalysesPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MachineFunctionAnalysis
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 752ed6ae08a96..7ab0cf2824038 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -285,7 +285,8 @@ template  class 
CodeGenPassBuilder {
 
   FunctionPassManager FPM;
   FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
-  FPM.addPass(InvalidateAnalysisPass());
+  // Since this is the last pass in the pipeline, we can clear all analyses
+  FPM.addPass(FreeAllAnalysesPass());
   if (this->PB.AddInCGSCCOrder) {
 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
 createCGSCCToFunctionPassAdaptor(std::move(FPM;
diff --git a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp 
b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
index e7a4d6d61e211..df6a4f4401d9b 100644
--- a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -45,3 +45,9 @@ MachineFunctionAnalysis::run(Function &F, 
FunctionAnalysisManager &FAM) {
 
   return Result(std::move(MF));
 }
+
+PreservedAnalyses FreeAllAnalysesPass::run(Function &F,
+   FunctionAnalysisManager &FAM) {
+  FAM.clear(F, F.getName());
+  return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 822aa90601286..bba11759cfa9d 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -379,6 +379,7 @@ FUNCTION_PASS("extra-vector-passes",
 FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
 FUNCTION_PASS("flatten-cfg", FlattenCFGPass())
 FUNCTION_PASS("float2int", Float2IntPass())
+FUNCTION_PASS("free-all-analyses", FreeAllAnalysesPass())
 FUNCTION_PASS("gc-lowering", GCLoweringPass())
 FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn-hoist", GVNHoistPass())
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 9b8a430aaad45..009c1467eda1d 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,11 +7,11 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: 
require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazard

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev created 
https://github.com/llvm/llvm-project/pull/139508

None



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


el-ev wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/139508?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#139508** https://app.graphite.dev/github/pr/llvm/llvm-project/139508?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139508?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#139369** https://app.graphite.dev/github/pr/llvm/llvm-project/139369?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>: 1 other dependent PR 
([#139495](https://github.com/llvm/llvm-project/pull/139495) https://app.graphite.dev/github/pr/llvm/llvm-project/139495?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>)
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/139508
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev ready_for_review 
https://github.com/llvm/llvm-project/pull/139508
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-mc

Author: Iris Shi (el-ev)


Changes



---

Patch is 25.59 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139508.diff


5 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td (+29) 
- (added) llvm/test/MC/RISCV/rv64zfa-only-valid.s (+19) 
- (modified) llvm/test/MC/RISCV/zfa-invalid.s (+11-2) 
- (added) llvm/test/MC/RISCV/zfa-quad-invalid.s (+42) 
- (modified) llvm/test/MC/RISCV/zfa-valid.s (+383-8) 


``diff
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 8a449d32e0104..0ad654db42f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
 }
 } // Predicates = [HasStdExtZfa, HasStdExtZfh]
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">;
+
+def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>;
+def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>;
+
+def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">;
+def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, 
+ "froundnx.q">;
+
+def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>;
+def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>;
+} // Predicates = [HasStdExtZfa, HasStdExtQ]
+
+let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in {
+  let mayRaiseFPException = 0 in {
+def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, 
"fmvh.x.q">;
+def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">;
+  }
+} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64]
+
 
//===--===//
 // Pseudo-instructions and codegen patterns
 
//===--===//
@@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 }
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+def : InstAlias<"fgtq.q $rd, $rs, $rt",
+(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+def : InstAlias<"fgeq.q $rd, $rs, $rt",
+(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+}
+
 
//===--===//
 // Codegen patterns
 
//===--===//
diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s 
b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
new file mode 100644
index 0..95fb253b145c1
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases 
-show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \
+# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+#
+# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \
+# RUN: -M no-aliases -show-encoding < %s 2>&1 \
+# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
+
+# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1
+# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvh.x.q a1, fs1
+
+# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2
+# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvp.q.x fs1, a1, a2
diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s
index c2537c3fc5102..cedc9279db3cb 100644
--- a/llvm/test/MC/RISCV/zfa-invalid.s
+++ b/llvm/test/MC/RISCV/zfa-invalid.s
@@ -1,5 +1,5 @@
-# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
-# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
+# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
 
 # Invalid rounding modes
 # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode
@@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02
 # CHECK-NO-RV32: error: operand must be a valid floating-point constant
 fli.h ft1, 1.60e+00
 
+# CHECK-NO-RV64: error: operand must be a valid floating-point constant
+# CHECK-NO-RV32: error: operand must be a valid floating-point constant
+fli.q ft1, 2.25e+00
+
 # CHECK-NO-RV64: error: invalid fl

[llvm-branch-commits] [llvm] [GlobalISel] Add computeNumSignBits for ASHR (PR #139503)

2025-05-12 Thread Jay Foad via llvm-branch-commits


https://github.com/jayfoad requested changes to this pull request.


https://github.com/llvm/llvm-project/pull/139503
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [GlobalISel] Add computeNumSignBits for ASHR (PR #139503)

2025-05-12 Thread Jay Foad via llvm-branch-commits



@@ -864,6 +864,16 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
   return TyBits - 1; // Every always-zero bit is a sign bit.
 break;
   }
+  case TargetOpcode::G_ASHR: {
+Register Src1 = MI.getOperand(1).getReg();
+Register Src2 = MI.getOperand(2).getReg();
+LLT SrcTy = MRI.getType(Src1);
+FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1);
+if (auto C = getIConstantSplatVal(Src2, MRI))
+  FirstAnswer = std::max(FirstAnswer + C->getZExtValue(),

jayfoad wrote:

```suggestion
  FirstAnswer = std::min(FirstAnswer + C->getZExtValue(),
```

https://github.com/llvm/llvm-project/pull/139503
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev edited https://github.com/llvm/llvm-project/pull/139508
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LoopVectorizer] Bundle partial reductions inside VPMulAccumulateReductionRecipe (PR #136173)

2025-05-12 Thread Sander de Smalen via llvm-branch-commits



@@ -2432,12 +2437,40 @@ static void 
tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red,
   Red->replaceAllUsesWith(AbstractR);
 }
 
+/// This function tries to create an abstract recipe from a partial reduction 
to
+/// hide its mul and extends from cost estimation.
+static void
+tryToCreateAbstractPartialReductionRecipe(VPPartialReductionRecipe *PRed) {

sdesmalen-arm wrote:

The way I read the code is that at the point of getting to this point in the 
code, it has recognised a reduction so there is a `VP[Partial]ReductionRecipe`. 
It then tries to analyse whether that recipe can be transformed into a 
`VPMulAccumulateReductionRecipe`. For `VPReductionRecipe` it will clamp the 
range to all the VFs that can be turned into a 
`VPMulAccumulateReductionRecipe`, but for `VPPartialReductionRecipe` it doesn't 
do that. I don't see why for partial reductions we'd do something different.

In fact, why wouldn't the `tryToMatchAndCreateMulAccumulateReduction` code be 
sufficient here? Now that you've made `VPPartialReductionRecipe` a subclass of 
`VPReductionRecipe`, I'd expect that code to function roughly the same.

https://github.com/llvm/llvm-project/pull/136173
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495

>From 02b755091def57f5cf541ed04b7a0b8283ba267d Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 13:32:41 +0800
Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q
 extension

---
 llvm/lib/Target/RISCV/RISCVInstrInfoQ.td  | 98 ---
 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td|  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td |  1 +
 .../lib/Target/RISCV/RISCVSchedSpacemitX60.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR345.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR7.td   |  1 +
 .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  1 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedule.td| 88 -
 14 files changed, 158 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
index 51b9c806976f5..4dc33dd22b2aa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
@@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt];
 
//===--===//
 
 let Predicates = [HasStdExtQ] in {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-  def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd),
-  (ins GPRMem:$rs1, simm12:$imm12),
-  "flq", "$rd, ${imm12}(${rs1})">;
+  def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>;
+
   // Operands for stores are in the order srcreg, base, offset rather than
   // reflecting the order these fields are specified in the instruction
   // encoding.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-  def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs),
-  (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12),
-  "fsq", "$rs2, ${imm12}(${rs1})">;
+  def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>;
 } // Predicates = [HasStdExtQ]
 
 foreach Ext = QExts in {
-  defm FMADD_Q : FPFMA_rrr_frm_m;
-  defm FMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMADD_Q : FPFMA_rrr_frm_m;
+  let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in {
+defm FMADD_Q : FPFMA_rrr_frm_m;
+defm FMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMADD_Q : FPFMA_rrr_frm_m;
+  }
 
-  defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
-  defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in {
+defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
+defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  }
 
+  let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in 
   defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>;
 
+  let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in 
   defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>;
 
   defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy,
-   Ext.PrimaryTy, "fsqrt.q">;
+   Ext.PrimaryTy, "fsqrt.q">,
+ Sched<[WriteFSqrt128, ReadFSqrt128]>;
 
-  let mayRaiseFPException = 0 in {
+  let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128],
+  mayRaiseFPException = 0 in {
 defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>;
 defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>;
 defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>;
   }
 
-  defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
-  defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in {
+defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
+defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  }
 
   defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty,
-Ext.PrimaryTy, "fcvt.s.q">;
+Ext.PrimaryTy, "fcvt.s.q">,
+  Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>;
 
   defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext,
-  Ext.PrimaryTy, Ext.F32Ty, 
"fcvt.q.s">;
+  Ext.PrimaryTy, Ext.F32Ty, 
+  "fcvt.q.s">,
+  Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>;
 
   defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty,
-Ext.Pr

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508

>From 6f4a034604e939cad0fa25c0b11768667c213ec6 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 15:04:28 +0800
Subject: [PATCH] [RISCV][MC] Add Q support for Zfa

---
 llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td |  29 ++
 llvm/test/MC/RISCV/rv64zfa-only-valid.s|  19 +
 llvm/test/MC/RISCV/zfa-invalid.s   |  13 +-
 llvm/test/MC/RISCV/zfa-quad-invalid.s  |  42 +++
 llvm/test/MC/RISCV/zfa-valid.s | 391 -
 5 files changed, 484 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 8a449d32e0104..0ad654db42f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
 }
 } // Predicates = [HasStdExtZfa, HasStdExtZfh]
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">;
+
+def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>;
+def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>;
+
+def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">;
+def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, 
+ "froundnx.q">;
+
+def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>;
+def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>;
+} // Predicates = [HasStdExtZfa, HasStdExtQ]
+
+let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in {
+  let mayRaiseFPException = 0 in {
+def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, 
"fmvh.x.q">;
+def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">;
+  }
+} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64]
+
 
//===--===//
 // Pseudo-instructions and codegen patterns
 
//===--===//
@@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 }
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+def : InstAlias<"fgtq.q $rd, $rs, $rt",
+(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+def : InstAlias<"fgeq.q $rd, $rs, $rt",
+(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+}
+
 
//===--===//
 // Codegen patterns
 
//===--===//
diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s 
b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
new file mode 100644
index 0..95fb253b145c1
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases 
-show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \
+# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+#
+# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \
+# RUN: -M no-aliases -show-encoding < %s 2>&1 \
+# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
+
+# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1
+# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvh.x.q a1, fs1
+
+# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2
+# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvp.q.x fs1, a1, a2
diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s
index c2537c3fc5102..cedc9279db3cb 100644
--- a/llvm/test/MC/RISCV/zfa-invalid.s
+++ b/llvm/test/MC/RISCV/zfa-invalid.s
@@ -1,5 +1,5 @@
-# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
-# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
+# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
 
 # Invalid rounding modes
 # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode
@@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02
 # CHECK-NO-RV32: error: operand must be a valid floating-point constant
 fli.h ft1, 1.60e+00
 
+

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495

>From 02b755091def57f5cf541ed04b7a0b8283ba267d Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 13:32:41 +0800
Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q
 extension

---
 llvm/lib/Target/RISCV/RISCVInstrInfoQ.td  | 98 ---
 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td|  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td |  1 +
 .../lib/Target/RISCV/RISCVSchedSpacemitX60.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR345.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR7.td   |  1 +
 .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  1 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedule.td| 88 -
 14 files changed, 158 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
index 51b9c806976f5..4dc33dd22b2aa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
@@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt];
 
//===--===//
 
 let Predicates = [HasStdExtQ] in {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-  def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd),
-  (ins GPRMem:$rs1, simm12:$imm12),
-  "flq", "$rd, ${imm12}(${rs1})">;
+  def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>;
+
   // Operands for stores are in the order srcreg, base, offset rather than
   // reflecting the order these fields are specified in the instruction
   // encoding.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-  def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs),
-  (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12),
-  "fsq", "$rs2, ${imm12}(${rs1})">;
+  def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>;
 } // Predicates = [HasStdExtQ]
 
 foreach Ext = QExts in {
-  defm FMADD_Q : FPFMA_rrr_frm_m;
-  defm FMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMADD_Q : FPFMA_rrr_frm_m;
+  let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in {
+defm FMADD_Q : FPFMA_rrr_frm_m;
+defm FMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMADD_Q : FPFMA_rrr_frm_m;
+  }
 
-  defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
-  defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in {
+defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
+defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  }
 
+  let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in 
   defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>;
 
+  let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in 
   defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>;
 
   defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy,
-   Ext.PrimaryTy, "fsqrt.q">;
+   Ext.PrimaryTy, "fsqrt.q">,
+ Sched<[WriteFSqrt128, ReadFSqrt128]>;
 
-  let mayRaiseFPException = 0 in {
+  let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128],
+  mayRaiseFPException = 0 in {
 defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>;
 defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>;
 defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>;
   }
 
-  defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
-  defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in {
+defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
+defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  }
 
   defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty,
-Ext.PrimaryTy, "fcvt.s.q">;
+Ext.PrimaryTy, "fcvt.s.q">,
+  Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>;
 
   defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext,
-  Ext.PrimaryTy, Ext.F32Ty, 
"fcvt.q.s">;
+  Ext.PrimaryTy, Ext.F32Ty, 
+  "fcvt.q.s">,
+  Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>;
 
   defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty,
-Ext.Pr

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -1842,23 +1859,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, 
const char *end,
 HadError = true;
 PP.Diag(Loc, diag::err_character_too_large);
   }
+  if (!HadError && Converter) {
+assert(Kind != tok::wide_char_constant &&
+   "Wide character translation not supported");
+char ByteChar = *tmp_out_start;
+SmallString<1> ConvertedChar;
+Converter->convert(StringRef(&ByteChar, 1), ConvertedChar);
+assert(ConvertedChar.size() == 1 &&
+   "Char size increased after translation");
+*tmp_out_start = ConvertedChar[0];

cor3ntin wrote:

This should be handled with diagnostics. The conversion can also fail, and that 
should be handled.

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -1842,23 +1859,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, 
const char *end,
 HadError = true;
 PP.Diag(Loc, diag::err_character_too_large);
   }
+  if (!HadError && Converter) {
+assert(Kind != tok::wide_char_constant &&
+   "Wide character translation not supported");
+char ByteChar = *tmp_out_start;
+SmallString<1> ConvertedChar;
+Converter->convert(StringRef(&ByteChar, 1), ConvertedChar);

cor3ntin wrote:

Here the order of operation should be:
  -> convert from UTF-8 to UTF-32, check it's a valid character
  -> convert the same buffer from UTF-8 to the literal encoding
   -> Check that that succeed and has a size of one

(ie some codepoints might hgave a size of 2 when encoded as utf-8 but 1 when 
encoded as latin1)

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -146,6 +144,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
   // that would have been \", which would not have been the end of string.
   unsigned ResultChar = *ThisTokBuf++;
   char Escape = ResultChar;
+  bool Translate = true;

cor3ntin wrote:

```suggestion
  bool Transcode = true;
```

I would prefer this defaults to false

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -416,8 +416,7 @@ Builtin Macros
 ``__clang_literal_encoding__``
   Defined to a narrow string literal that represents the current encoding of
   narrow string literals, e.g., ``"hello"``. This macro typically expands to
-  "UTF-8" (but may change in the future if the
-  ``-fexec-charset="Encoding-Name"`` option is implemented.)
+  the charset specified by -fexec-charset if specified, or the system charset.

cor3ntin wrote:

```suggestion
  the text encoding specified by -fexec-charset if specified, or the system 
charset.
```

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -246,18 +249,19 @@ class StringLiteralParser {
   StringLiteralEvalMethod EvalMethod;
 
 public:
-  StringLiteralParser(ArrayRef StringToks, Preprocessor &PP,
-  StringLiteralEvalMethod StringMethod =
-  StringLiteralEvalMethod::Evaluated);
+  StringLiteralParser(
+  ArrayRef StringToks, Preprocessor &PP,
+  StringLiteralEvalMethod StringMethod = 
StringLiteralEvalMethod::Evaluated,
+  ConversionAction Action = ToExecCharset);

cor3ntin wrote:

Why do we need Conversion at all.
I would expect that any ordinary, non-unevaluated literal would be encoded and 
the
`LiteralConverter` should be the same for all strings so it can live in 
`Preprocessor`

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LEX_LITERALCONVERTER_H
+#define LLVM_CLANG_LEX_LITERALCONVERTER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CharSet.h"
+
+enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset };
+
+class LiteralConverter {
+  llvm::StringRef InternalCharset;
+  llvm::StringRef SystemCharset;
+  llvm::StringRef ExecCharset;
+  llvm::StringMap CharsetConverters;
+
+public:
+  llvm::CharSetConverter *getConverter(const char *Codepage);
+  llvm::CharSetConverter *getConverter(ConversionAction Action);
+  llvm::CharSetConverter *createAndInsertCharConverter(const char *To);
+  void setConvertersFromOptions(const clang::LangOptions &Opts,
+const clang::TargetInfo &TInfo,
+clang::DiagnosticsEngine &Diags);

cor3ntin wrote:

I would prefer, for example a static fuction that returns an optional or a null 
pointer failure, and let the caller call deal with error

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LEX_LITERALCONVERTER_H
+#define LLVM_CLANG_LEX_LITERALCONVERTER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CharSet.h"
+
+enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset };
+
+class LiteralConverter {
+  llvm::StringRef InternalCharset;
+  llvm::StringRef SystemCharset;
+  llvm::StringRef ExecCharset;
+  llvm::StringMap CharsetConverters;

cor3ntin wrote:

Why do we need a Map?
I would expect the Preprocessor to have
   - A converter from UTF-8
   - A converter to UTF-8

We might have 2 additional in the future for wide strings, and storing 4 
pointers seems fine.
We could also have a small class that stores a pair of converters ( A -> B and 
B -> A)


https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -491,6 +491,9 @@ class Triple {
   /// For example, "fooos1.2.3" would return "1.2.3".
   StringRef getEnvironmentVersionString() const;
 
+  /// getSystemCharset - Get the system charset of the triple.
+  StringRef getSystemCharset() const;
+

cor3ntin wrote:

Lets call that `DefaultTextEnding" (on most platforms we ignore whatever the 
system does by default)

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -367,6 +370,15 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
 HadError = true;
   }
 
+  if (Translate && Converter) {
+// Invalid escapes are written as '?' and then translated.
+char ByteChar = Invalid ? '?' : ResultChar;
+SmallString<8> ResultCharConv;
+Converter->convert(StringRef(&ByteChar, 1), ResultCharConv);
+assert(ResultCharConv.size() == 1 &&
+   "Char size increased after translation");

cor3ntin wrote:

Can we have a `GetReplacementChar` function... somewhere, and cache the result?

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LEX_LITERALCONVERTER_H
+#define LLVM_CLANG_LEX_LITERALCONVERTER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CharSet.h"
+
+enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset };

cor3ntin wrote:

We should have `FromOrdinaryLiteralEncoding` and `ToOrdinaryLiteralEncoding` 
instead of `System`/Exec

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)

2025-05-12 Thread via llvm-branch-commits



@@ -633,6 +633,9 @@ class LangOptions : public LangOptionsBase {
   bool AtomicFineGrainedMemory = false;
   bool AtomicIgnoreDenormalMode = false;
 
+  /// Name of the exec charset to convert the internal charset to.
+  std::string ExecCharset;

cor3ntin wrote:

Lets call that a TextEncoding consistently (replacing all instances of Codepage 
and Charset)

https://github.com/llvm/llvm-project/pull/138895
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139508

>From 537ccab69c5d426109d9c9948f55c532e83b0ecf Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 15:04:28 +0800
Subject: [PATCH] [RISCV][MC] Add Q support for Zfa

---
 llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td |  29 ++
 llvm/test/MC/RISCV/rv64zfa-only-valid.s|  19 +
 llvm/test/MC/RISCV/zfa-invalid.s   |  13 +-
 llvm/test/MC/RISCV/zfa-quad-invalid.s  |  42 +++
 llvm/test/MC/RISCV/zfa-valid.s | 391 -
 5 files changed, 484 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 8a449d32e0104..0ad654db42f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
 }
 } // Predicates = [HasStdExtZfa, HasStdExtZfh]
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">;
+
+def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>;
+def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>;
+
+def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">;
+def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, 
+ "froundnx.q">;
+
+def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>;
+def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>;
+} // Predicates = [HasStdExtZfa, HasStdExtQ]
+
+let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in {
+  let mayRaiseFPException = 0 in {
+def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, 
"fmvh.x.q">;
+def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">;
+  }
+} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64]
+
 
//===--===//
 // Pseudo-instructions and codegen patterns
 
//===--===//
@@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 }
 
+let Predicates = [HasStdExtZfa, HasStdExtQ] in {
+def : InstAlias<"fgtq.q $rd, $rs, $rt",
+(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+def : InstAlias<"fgeq.q $rd, $rs, $rt",
+(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>;
+}
+
 
//===--===//
 // Codegen patterns
 
//===--===//
diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s 
b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
new file mode 100644
index 0..95fb253b145c1
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases 
-show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \
+# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+#
+# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \
+# RUN: -M no-aliases -show-encoding < %s 2>&1 \
+# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
+
+# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1
+# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvh.x.q a1, fs1
+
+# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2
+# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6]
+# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional 
Floating-Point){{$}}
+fmvp.q.x fs1, a1, a2
diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s
index c2537c3fc5102..cedc9279db3cb 100644
--- a/llvm/test/MC/RISCV/zfa-invalid.s
+++ b/llvm/test/MC/RISCV/zfa-invalid.s
@@ -1,5 +1,5 @@
-# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
-# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV32 %s
+# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck 
-check-prefixes=CHECK-NO-RV64 %s
 
 # Invalid rounding modes
 # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode
@@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02
 # CHECK-NO-RV32: error: operand must be a valid floating-point constant
 fli.h ft1, 1.60e+00
 
+

[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)

2025-05-12 Thread Iris Shi via llvm-branch-commits


https://github.com/el-ev updated 
https://github.com/llvm/llvm-project/pull/139495

>From 5c454f3091822039e98bcff0693db1e7a5205351 Mon Sep 17 00:00:00 2001
From: Iris Shi <0...@owo.li>
Date: Mon, 12 May 2025 13:32:41 +0800
Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q
 extension

---
 llvm/lib/Target/RISCV/RISCVInstrInfoQ.td  | 98 ---
 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td|  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td |  1 +
 llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td |  1 +
 .../lib/Target/RISCV/RISCVSchedSpacemitX60.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR345.td |  1 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR7.td   |  1 +
 .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  1 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  |  1 +
 llvm/lib/Target/RISCV/RISCVSchedule.td| 88 -
 14 files changed, 158 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
index aa7dcb789a8c2..8cc965ccc515d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td
@@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt];
 
//===--===//
 
 let Predicates = [HasStdExtQ] in {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-  def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd),
-  (ins GPRMem:$rs1, simm12:$imm12),
-  "flq", "$rd, ${imm12}(${rs1})">;
+  def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>;
+
   // Operands for stores are in the order srcreg, base, offset rather than
   // reflecting the order these fields are specified in the instruction
   // encoding.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-  def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs),
-  (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12),
-  "fsq", "$rs2, ${imm12}(${rs1})">;
+  def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>;
 } // Predicates = [HasStdExtQ]
 
 foreach Ext = QExts in {
-  defm FMADD_Q : FPFMA_rrr_frm_m;
-  defm FMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMSUB_Q : FPFMA_rrr_frm_m;
-  defm FNMADD_Q : FPFMA_rrr_frm_m;
+  let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in {
+defm FMADD_Q : FPFMA_rrr_frm_m;
+defm FMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMSUB_Q : FPFMA_rrr_frm_m;
+defm FNMADD_Q : FPFMA_rrr_frm_m;
+  }
 
-  defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
-  defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in {
+defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>;
+defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>;
+  }
 
+  let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in 
   defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>;
 
+  let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in 
   defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>;
 
   defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy,
-   Ext.PrimaryTy, "fsqrt.q">;
+   Ext.PrimaryTy, "fsqrt.q">,
+ Sched<[WriteFSqrt128, ReadFSqrt128]>;
 
-  let mayRaiseFPException = 0 in {
+  let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128],
+  mayRaiseFPException = 0 in {
 defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>;
 defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>;
 defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>;
   }
 
-  defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
-  defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in {
+defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>;
+defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>;
+  }
 
   defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty,
-Ext.PrimaryTy, "fcvt.s.q">;
+Ext.PrimaryTy, "fcvt.s.q">,
+  Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>;
 
   defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext,
-  Ext.PrimaryTy, Ext.F32Ty, 
"fcvt.q.s">;
+  Ext.PrimaryTy, Ext.F32Ty, 
+  "fcvt.q.s">,
+  Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>;
 
   defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty,
-Ext.Pr

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/139516
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)

2025-05-12 Thread Simon Pilgrim via llvm-branch-commits



@@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), 
(MOV16rm addr:$src)>;
 def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
 def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
 
+def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 
addr:$src)),
+   (MOVDI2PDIrm addr:$src)>;   // load atomic <2 x i8>

RKSimon wrote:

Next thing is to add SSE/AVX handling - I've added better test coverage at 
d27d0c7a5266f89f9d62464e71be98421aae598d

https://github.com/llvm/llvm-project/pull/138635
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/139531

By the pseudocode in the ISA manual, if any input is a nan it acts
like min3, which will fold to min2 of the other operands. The other
cases fold to min, I'm not sure how this one was wrong.

>From 069254f8608ac85c821f214ce61432000701022c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 12 May 2025 12:25:45 +0200
Subject: [PATCH] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold

By the pseudocode in the ISA manual, if any input is a nan it acts
like min3, which will fold to min2 of the other operands. The other
cases fold to min, I'm not sure how this one was wrong.
---
 .../Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp |  2 +-
 llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll | 16 
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 1494428cb2bf5..1ca300464a697 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -867,7 +867,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
 } else if (match(Src1, PatternMatch::m_NaN()) || isa(Src1)) {
   V = IC.Builder.CreateMinNum(Src0, Src2);
 } else if (match(Src2, PatternMatch::m_NaN()) || isa(Src2)) {
-  V = IC.Builder.CreateMaxNum(Src0, Src1);
+  V = IC.Builder.CreateMinNum(Src0, Src1);
 }
 
 if (V) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll 
b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index bf94637b36a34..972862d8e327e 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -117,7 +117,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_undef_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_undef_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
@@ -147,7 +147,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 
0x7FF8)
@@ -276,7 +276,7 @@ define float @fmed3_0_nan_1_f32() #1 {
 define float @fmed3_0_1_nan_f32() #1 {
 ; CHECK-LABEL: define float @fmed3_0_1_nan_f32(
 ; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:ret float 1.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 
0x7FF80010)
   ret float %med
@@ -303,7 +303,7 @@ define float @fmed3_0_undef_1_f32() #1 {
 define float @fmed3_0_1_undef_f32() #1 {
 ; CHECK-LABEL: define float @fmed3_0_1_undef_f32(
 ; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:ret float 1.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
   ret float %med
@@ -359,7 +359,7 @@ define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 
0x7FF4)
@@ -414,7 +414,7 @@ define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 
{
 define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:ret float 3.00e+00
+; CHECK-NEXT:ret float -2.00e+00
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 
0x7FF4)
   ret float %med3
@@ -447,7 +447,7 @@ define amdgpu_ps float 
@amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y
 define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float 
%y) {
 ; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]

[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/139530

None

>From 012d451378314c9633c3a38891fca23c027e54b5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 12 May 2025 10:42:16 +0200
Subject: [PATCH] AMDGPU: Disable most fmed3 folds for strictfp

---
 .../lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp |  3 +++
 llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll | 12 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index e76396f6ffbb0..1494428cb2bf5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -855,6 +855,9 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
 return IC.replaceInstUsesWith(II, Src);
 }
 
+if (II.isStrictFP())
+  break;
+
 // Checking for NaN before canonicalization provides better fidelity when
 // mapping other operations onto fmed3 since the order of operands is
 // unchanged.
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll 
b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index 5274ac1093a26..bf94637b36a34 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -494,7 +494,7 @@ define amdgpu_ps float 
@amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #
 define float @fmed3_qnan0_x_y_f32_strictfp(float %x, float %y) #2 {
 ; CHECK-LABEL: define float @fmed3_qnan0_x_y_f32_strictfp(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float 
0x7FF8, float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8, float 
%x, float %y) strictfp
@@ -504,7 +504,7 @@ define float @fmed3_qnan0_x_y_f32_strictfp(float %x, float 
%y) #2 {
 define float @fmed3_x_qnan0_y_f32_strictfp(float %x, float %y) #2 {
 ; CHECK-LABEL: define float @fmed3_x_qnan0_y_f32_strictfp(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X]], 
float 0x7FF8, float [[Y]]) #[[ATTR5]]
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 
0x7FF8, float %y) strictfp
@@ -514,7 +514,7 @@ define float @fmed3_x_qnan0_y_f32_strictfp(float %x, float 
%y) #2 {
 define float @fmed3_x_y_qnan0_f32_strictfp(float %x, float %y) #2 {
 ; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32_strictfp(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X]], 
float [[Y]], float 0x7FF8) #[[ATTR5]]
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 
0x7FF8) strictfp
@@ -524,7 +524,7 @@ define float @fmed3_x_y_qnan0_f32_strictfp(float %x, float 
%y) #2 {
 define float @fmed3_snan1_x_y_f32_strictfp(float %x, float %y) #2 {
 ; CHECK-LABEL: define float @fmed3_snan1_x_y_f32_strictfp(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float 
0x7FF4, float [[X]], float [[Y]]) #[[ATTR5]]
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4, float 
%x, float %y) strictfp
@@ -534,7 +534,7 @@ define float @fmed3_snan1_x_y_f32_strictfp(float %x, float 
%y) #2 {
 define float @fmed3_x_snan1_y_f32_strictfp(float %x, float %y) #2 {
 ; CHECK-LABEL: define float @fmed3_x_snan1_y_f32_strictfp(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X]], 
float 0x7FF4, float [[Y]]) #[[ATTR5]]
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 
0x7FF4, float %y) strictfp
@@ -544,7 +544,7 @@ define float @fmed3_x_snan1_y_f32_strictfp(float %x, float 
%y) #2 {
 define float @fmed3_x_y_snan1_f32_strictfp(float %x, float %y) #2 {
 ; CHECK-LABEL: define float @fmed3_x_y_snan1_f32_strictfp(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+;

[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/139530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

By the pseudocode in the ISA manual, if any input is a nan it acts
like min3, which will fold to min2 of the other operands. The other
cases fold to min, I'm not sure how this one was wrong.

---
Full diff: https://github.com/llvm/llvm-project/pull/139531.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+1-1) 
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll (+8-8) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 1494428cb2bf5..1ca300464a697 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -867,7 +867,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
 } else if (match(Src1, PatternMatch::m_NaN()) || isa(Src1)) {
   V = IC.Builder.CreateMinNum(Src0, Src2);
 } else if (match(Src2, PatternMatch::m_NaN()) || isa(Src2)) {
-  V = IC.Builder.CreateMaxNum(Src0, Src1);
+  V = IC.Builder.CreateMinNum(Src0, Src1);
 }
 
 if (V) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll 
b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index bf94637b36a34..972862d8e327e 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -117,7 +117,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_undef_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_undef_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
@@ -147,7 +147,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 
0x7FF8)
@@ -276,7 +276,7 @@ define float @fmed3_0_nan_1_f32() #1 {
 define float @fmed3_0_1_nan_f32() #1 {
 ; CHECK-LABEL: define float @fmed3_0_1_nan_f32(
 ; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:ret float 1.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 
0x7FF80010)
   ret float %med
@@ -303,7 +303,7 @@ define float @fmed3_0_undef_1_f32() #1 {
 define float @fmed3_0_1_undef_f32() #1 {
 ; CHECK-LABEL: define float @fmed3_0_1_undef_f32(
 ; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:ret float 1.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
   ret float %med
@@ -359,7 +359,7 @@ define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 
0x7FF4)
@@ -414,7 +414,7 @@ define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 
{
 define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:ret float 3.00e+00
+; CHECK-NEXT:ret float -2.00e+00
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 
0x7FF4)
   ret float %med3
@@ -447,7 +447,7 @@ define amdgpu_ps float 
@amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y
 define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float 
%y) {
 ; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float 
[[Y]])
+; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float 
[[Y]])
 ; CHECK-NEXT:ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 
0x7FF4

[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/139531
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/139530?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#139531** https://app.graphite.dev/github/pr/llvm/llvm-project/139531?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#139530** https://app.graphite.dev/github/pr/llvm/llvm-project/139530?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139530?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#139529** https://app.graphite.dev/github/pr/llvm/llvm-project/139529?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/139530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [ObjC] Support objc_claimAutoreleasedReturnValue (PR #138696)

2025-05-12 Thread Jon Roelofs via llvm-branch-commits


https://github.com/jroelofs approved this pull request.


https://github.com/llvm/llvm-project/pull/138696
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC] Refactoring DXContainerYaml Root Parameter representation (PR #138318)

2025-05-12 Thread via llvm-branch-commits


https://github.com/joaosaffran edited 
https://github.com/llvm/llvm-project/pull/138318
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)

2025-05-12 Thread Donát Nagy via llvm-branch-commits



@@ -0,0 +1,200 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection 
-verify=expected,default %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection 
-analyzer-config inline-functions-with-ambiguous-loops=true 
-verify=expected,enabled %s
+
+// This file tests some heuristics in the engine that put functions on a
+// "do not inline" list if their analyisis reaches the `analyzer-max-loop`
+// limit (by default 4 iterations) in a loop. This was almost surely intended
+// as memoization optimization for the "retry without inlining" fallback (if we
+// had to retry once, next time don't even try inlining), but aggressively
+// oversteps the "natural" scope: reaching 4 iterations on _one particular_
+// execution path does not imply that each path would need "retry without
+// inlining" especially if a different call receives different arguments.
+//
+// This heuristic significantly affects the scope/depth of the analysis (and
+// therefore the execution time) because without this limitation on the
+// inlining significantly more entry points would be able to exhaust their
+// `max-nodes` quota. (Trivial thin wrappers around big complex functions are
+// common in many projects.)
+//
+// Unfortunately, this arbitrary heuristic strongly relies on the current loop
+// handling model and its many limitations, so improvements in loop handling
+// can cause surprising slowdowns by reducing the "do not inline" blacklist.
+// In the tests "FIXME-BUT-NEEDED" comments mark "problematic" (aka buggy)
+// analyzer behavior which cannot be fixed without also improving the
+// heuristics for (not) inlining large functions.
+
+  int getNum(void); // Get an unknown symbolic number.

NagyDonat wrote:

```suggestion
int getNum(void); // Get an unknown symbolic number.
```
Oops, my bad there are two superfluous spaces before this function declaration 
(they are also there in the commit that's merged on the main branch). Let's 
delete them at least in this backport.

https://github.com/llvm/llvm-project/pull/139597
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)

2025-05-12 Thread Donát Nagy via llvm-branch-commits


https://github.com/NagyDonat commented:

I read this rebased code and IMO it should work -- however I found a whitespace 
error that originated in my commit :sweat_smile: 

https://github.com/llvm/llvm-project/pull/139597
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)

2025-05-12 Thread Craig Topper via llvm-branch-commits


https://github.com/topperc approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/139508
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)

2025-05-12 Thread Donát Nagy via llvm-branch-commits


https://github.com/NagyDonat edited 
https://github.com/llvm/llvm-project/pull/139597
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

### Merge activity

* **May 12, 2:11 PM EDT**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/139531).


https://github.com/llvm/llvm-project/pull/139531
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)

2025-05-12 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

### Merge activity

* **May 12, 2:11 PM EDT**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/139530).


https://github.com/llvm/llvm-project/pull/139530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [GISelValueTracking] Use representation size for G_PTRTOINT src width (PR #139608)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Alexander Richardson (arichardson)


Changes

While we can only reason about the index/address, the G_PTRTOINT
operations returns all representation bits, so we can't assume the
remaining ones are all zeroes.
This behaviour was clarified as part of the discussion in
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54.
The LangRef semantics of ptrtoint being a full representation bitcast
were documented in https://github.com/llvm/llvm-project/pull/139349.

Fixes: https://github.com/llvm/llvm-project/issues/139598


---
Full diff: https://github.com/llvm/llvm-project/pull/139608.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (+3-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll 
(+14-27) 


``diff
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp 
b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 12fe28b29e5c8..b7e0a43f2fb64 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -483,8 +483,10 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, 
KnownBits &Known,
 if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
   SrcBitWidth = MI.getOperand(2).getImm();
 else {
+  // For G_PTRTOINT all representation bits are returned even though only
+  // the address bits can be reasoned about generically.
   SrcBitWidth = SrcTy.isPointer()
-? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
+? DL.getPointerSizeInBits(SrcTy.getAddressSpace())
 : SrcTy.getSizeInBits();
 }
 assert(SrcBitWidth && "SrcBitWidth can't be zero");
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll
index 6722a55e8da92..d762d7728df36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll
@@ -79,9 +79,9 @@ define <2 x i64> @ptrtoaddr_vec(ptr addrspace(8) %ignored, <2 
x ptr addrspace(8)
 ; GISEL:   ; %bb.0:
 ; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:v_mov_b32_e32 v0, v4
-; GISEL-NEXT:v_mov_b32_e32 v1, v5
+; GISEL-NEXT:v_and_b32_e32 v1, 0x, v5
+; GISEL-NEXT:v_and_b32_e32 v3, 0x, v9
 ; GISEL-NEXT:v_mov_b32_e32 v2, v8
-; GISEL-NEXT:v_mov_b32_e32 v3, v9
 ; GISEL-NEXT:s_setpc_b64 s[30:31]
 ;
 ; SDAG-LABEL: ptrtoaddr_vec:
@@ -129,31 +129,18 @@ define i256 @ptrtoint_ext(ptr addrspace(8) %ignored, ptr 
addrspace(8) %ptr) {
 ;; FIXME: this is wrong for the GlobalISel case, we are removing the trunc:
 ;; https://github.com/llvm/llvm-project/issues/139598
 define i256 @ptrtoaddr_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
-; GISEL-LABEL: ptrtoaddr_ext:
-; GISEL:   ; %bb.0:
-; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:v_mov_b32_e32 v0, v4
-; GISEL-NEXT:v_mov_b32_e32 v1, v5
-; GISEL-NEXT:v_mov_b32_e32 v2, v6
-; GISEL-NEXT:v_mov_b32_e32 v3, v7
-; GISEL-NEXT:v_mov_b32_e32 v4, 0
-; GISEL-NEXT:v_mov_b32_e32 v5, 0
-; GISEL-NEXT:v_mov_b32_e32 v6, 0
-; GISEL-NEXT:v_mov_b32_e32 v7, 0
-; GISEL-NEXT:s_setpc_b64 s[30:31]
-;
-; SDAG-LABEL: ptrtoaddr_ext:
-; SDAG:   ; %bb.0:
-; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:v_mov_b32_e32 v0, v4
-; SDAG-NEXT:v_and_b32_e32 v1, 0x, v5
-; SDAG-NEXT:v_mov_b32_e32 v2, 0
-; SDAG-NEXT:v_mov_b32_e32 v3, 0
-; SDAG-NEXT:v_mov_b32_e32 v4, 0
-; SDAG-NEXT:v_mov_b32_e32 v5, 0
-; SDAG-NEXT:v_mov_b32_e32 v6, 0
-; SDAG-NEXT:v_mov_b32_e32 v7, 0
-; SDAG-NEXT:s_setpc_b64 s[30:31]
+; CHECK-LABEL: ptrtoaddr_ext:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:v_mov_b32_e32 v0, v4
+; CHECK-NEXT:v_and_b32_e32 v1, 0x, v5
+; CHECK-NEXT:v_mov_b32_e32 v2, 0
+; CHECK-NEXT:v_mov_b32_e32 v3, 0
+; CHECK-NEXT:v_mov_b32_e32 v4, 0
+; CHECK-NEXT:v_mov_b32_e32 v5, 0
+; CHECK-NEXT:v_mov_b32_e32 v6, 0
+; CHECK-NEXT:v_mov_b32_e32 v7, 0
+; CHECK-NEXT:s_setpc_b64 s[30:31]
   %ret = ptrtoaddr ptr addrspace(8) %ptr to i256
   ret i256 %ret
 }

``




https://github.com/llvm/llvm-project/pull/139608
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [GISelValueTracking] Use representation size for G_PTRTOINT src width (PR #139608)

2025-05-12 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson created 
https://github.com/llvm/llvm-project/pull/139608

While we can only reason about the index/address, the G_PTRTOINT
operations returns all representation bits, so we can't assume the
remaining ones are all zeroes.
This behaviour was clarified as part of the discussion in
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54.
The LangRef semantics of ptrtoint being a full representation bitcast
were documented in https://github.com/llvm/llvm-project/pull/139349.

Fixes: https://github.com/llvm/llvm-project/issues/139598




  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [GISelValueTracking] Use representation size for G_PTRTOINT src width (PR #139608)

2025-05-12 Thread Alexander Richardson via llvm-branch-commits


arichardson wrote:

Now that we use the full bitwidth the high KnownBits are no longer zext'ed to 
zeroes. But maybe the better approahc would be to just do KnownBits on the 
address bits and set the high bits to unknown?

That should fix the issue as well?

https://github.com/llvm/llvm-project/pull/139608
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-clang-static-analyzer-1

Author: Balazs Benics (steakhal)


Changes

Recently some users reported that they observed large increases of runtime (up 
to +600% on some translation units) when they upgraded to a more recent 
(slightly patched, internal) clang version. Bisection revealed that the bulk of 
this increase was probably caused by my earlier commit 
bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849 ("Don't assume third iteration in 
loops").

As I evaluated that earlier commit on several open source project, it turns out 
that on average it's runtime-neutral (or slightly helpful: it reduced the total 
analysis time by 1.5%) but it can cause runtime spikes on some code: in 
particular it more than doubled the time to analyze `tmux` (one of the smaller 
test projects).

Further profiling and investigation proved that these spikes were caused by an 
_increase of analysis scope_ because there was an heuristic that placed 
functions on a "don't inline this" blacklist if they reached the 
`-analyzer-max-loop` limit (anywhere, on any one execution path) -- which 
became significantly rarer when my commit ensured the analyzer no longer "just 
assumes" four iterations. (With more inlining significantly more entry points 
use up their allocated budgets, which leads to the increased runtime.)

I feel that this heuristic for the "don't inline" blacklist is unjustified and 
arbitrary, because reaching the "retry without inlining" limit on one path does 
not imply that inlining the function won't be valuable on other paths -- so I 
hope that we can eventually replace it with more "natural" limits of the 
analysis scope.

However, the runtime increases are annoying for the users whose project is 
affected, so I created this quick workaround commit that approximates the 
"don't inline" blacklist effects of ambiguous loops (where the analyzer doesn't 
understand the loop condition) without fully reverting the "Don't assume third 
iteration" commit (to avoid reintroducing the false positives that were 
eliminated by it).

Investigating this issue was a team effort: I'm grateful to Endre Fülöp 
(gamesh411) who did the bisection and shared his time measurement setup, and 
Gábor Tóthvári (tigbr) who helped me in profiling.

(cherry picked from commit 9600a12f0de233324b559f60997b9c2db153fede)

---
Full diff: https://github.com/llvm/llvm-project/pull/139597.diff


6 Files Affected:

- (modified) clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def (+13) 
- (modified) 
clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h (-4) 
- (modified) clang/lib/StaticAnalyzer/Core/ExprEngine.cpp (+49-11) 
- (modified) clang/test/Analysis/analyzer-config.c (+1) 
- (added) clang/test/Analysis/loop-based-inlining-prevention.c (+200) 
- (modified) clang/test/Analysis/loop-unrolling.cpp (+23-7) 


``diff
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def 
b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 34bb7a809162b..dbb8e832db5ff 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -385,6 +385,19 @@ ANALYZER_OPTION(
 "flex\" won't be analyzed.",
 true)
 
+ANALYZER_OPTION(
+bool, InlineFunctionsWithAmbiguousLoops, 
"inline-functions-with-ambiguous-loops",
+"If disabled (the default), the analyzer puts functions on a \"do not "
+"inline this\" list if it finds an execution path within that function "
+"that may potentially perform 'analyzer-max-loop' (= 4 by default) "
+"iterations in a loop. (Note that functions that _definitely_ reach the "
+"loop limit on some execution path are currently marked as \"do not "
+"inline\" even if this option is enabled.) Enabling this option "
+"eliminates this (somewhat arbitrary) restriction from the analysis "
+"scope, which increases the analysis runtime (on average by ~10%, but "
+"a few translation units may see much larger slowdowns).",
+false)
+
 
//===--===//
 // Unsigned analyzer options.
 
//===--===//
diff --git 
a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h 
b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
index 3ee0d229cfc29..761395260a0cf 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
@@ -81,10 +81,6 @@ class FunctionSummariesTy {
 I->second.MayInline = 0;
   }
 
-  void markReachedMaxBlockCount(const Decl *D) {
-markShouldNotInline(D);
-  }
-
   std::optional mayInline(const Decl *D) {
 MapTy::const_iterator I = Map.find(D);
 if (I != Map.end() && I->second.InlineChecked)
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp 
b

[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)

2025-05-12 Thread Balazs Benics via llvm-branch-commits


https://github.com/steakhal milestoned 
https://github.com/llvm/llvm-project/pull/139597
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)

2025-05-12 Thread Balazs Benics via llvm-branch-commits


https://github.com/steakhal created 
https://github.com/llvm/llvm-project/pull/139597

Recently some users reported that they observed large increases of runtime (up 
to +600% on some translation units) when they upgraded to a more recent 
(slightly patched, internal) clang version. Bisection revealed that the bulk of 
this increase was probably caused by my earlier commit 
bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849 ("Don't assume third iteration in 
loops").

As I evaluated that earlier commit on several open source project, it turns out 
that on average it's runtime-neutral (or slightly helpful: it reduced the total 
analysis time by 1.5%) but it can cause runtime spikes on some code: in 
particular it more than doubled the time to analyze `tmux` (one of the smaller 
test projects).

Further profiling and investigation proved that these spikes were caused by an 
_increase of analysis scope_ because there was an heuristic that placed 
functions on a "don't inline this" blacklist if they reached the 
`-analyzer-max-loop` limit (anywhere, on any one execution path) -- which 
became significantly rarer when my commit ensured the analyzer no longer "just 
assumes" four iterations. (With more inlining significantly more entry points 
use up their allocated budgets, which leads to the increased runtime.)

I feel that this heuristic for the "don't inline" blacklist is unjustified and 
arbitrary, because reaching the "retry without inlining" limit on one path does 
not imply that inlining the function won't be valuable on other paths -- so I 
hope that we can eventually replace it with more "natural" limits of the 
analysis scope.

However, the runtime increases are annoying for the users whose project is 
affected, so I created this quick workaround commit that approximates the 
"don't inline" blacklist effects of ambiguous loops (where the analyzer doesn't 
understand the loop condition) without fully reverting the "Don't assume third 
iteration" commit (to avoid reintroducing the false positives that were 
eliminated by it).

Investigating this issue was a team effort: I'm grateful to Endre Fülöp 
(gamesh411) who did the bisection and shared his time measurement setup, and 
Gábor Tóthvári (tigbr) who helped me in profiling.

(cherry picked from commit 9600a12f0de233324b559f60997b9c2db153fede)



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [IRTranslator] Handle ptrtoaddr (PR #139601)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-globalisel

Author: Alexander Richardson (arichardson)


Changes

We lower ptrtoaddr by emitting a G_PTRTOINT, truncating that to the
address size and then truncate/zext to the final integer type.

This has exposed an issue in the GlobalIsel postlegalizer combines where
the truncate is incorrectly being removed.
See https://github.com/llvm/llvm-project/issues/139598


---
Full diff: https://github.com/llvm/llvm-project/pull/139601.diff


3 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h (+1-3) 
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+20) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll (+187) 


``diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h 
b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index fcdc733d92c7f..41d03c9fb3ed5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -486,9 +486,7 @@ class IRTranslator : public MachineFunctionPass {
   bool translatePtrToInt(const User &U, MachineIRBuilder &MIRBuilder) {
 return translateCast(TargetOpcode::G_PTRTOINT, U, MIRBuilder);
   }
-  bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder) {
-return translatePtrToInt(U, MIRBuilder);
-  }
+  bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder);
   bool translateTrunc(const User &U, MachineIRBuilder &MIRBuilder) {
 return translateCast(TargetOpcode::G_TRUNC, U, MIRBuilder);
   }
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp 
b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 8ab2533afc15f..5666c9e9f45bc 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1583,6 +1583,26 @@ bool IRTranslator::translateCast(unsigned Opcode, const 
User &U,
   return true;
 }
 
+bool IRTranslator::translatePtrToAddr(const User &U,
+  MachineIRBuilder &MIRBuilder) {
+  if (containsBF16Type(U))
+return false;
+
+  uint32_t Flags = 0;
+  if (const Instruction *I = dyn_cast(&U))
+Flags = MachineInstr::copyFlagsFromInstruction(*I);
+
+  Register Op = getOrCreateVReg(*U.getOperand(0));
+  Type *PtrTy = U.getOperand(0)->getType();
+  LLT AddrTy = getLLTForType(*DL->getIndexType(PtrTy), *DL);
+  auto IntPtrTy = getLLTForType(*DL->getIntPtrType(PtrTy), *DL);
+  auto PtrToInt = MIRBuilder.buildPtrToInt(IntPtrTy, Op);
+  PtrToInt->setFlags(Flags);
+  auto Addr = MIRBuilder.buildTrunc(AddrTy, PtrToInt.getReg(0));
+  MIRBuilder.buildZExtOrTrunc(getOrCreateVReg(U), Addr.getReg(0));
+  return true;
+}
+
 bool IRTranslator::translateGetElementPtr(const User &U,
   MachineIRBuilder &MIRBuilder) {
   Value &Op0 = *U.getOperand(0);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll
new file mode 100644
index 0..30f9dbfcaacf8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -global-isel -verify-machineinstrs --print-changed 
--debug < %s | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s 
--check-prefixes=CHECK,SDAG
+;; Check that we can lower ptrtoaddr differently from ptrtoint.
+;; Includes an ignored argument so the registers actually need to be written
+
+define i128 @ptrtoint(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
+; GISEL-LABEL: ptrtoint:
+; GISEL:   ; %bb.0:
+; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:v_mov_b32_e32 v0, v4
+; GISEL-NEXT:v_mov_b32_e32 v1, v5
+; GISEL-NEXT:v_mov_b32_e32 v2, v6
+; GISEL-NEXT:v_mov_b32_e32 v3, v7
+; GISEL-NEXT:s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: ptrtoint:
+; SDAG:   ; %bb.0:
+; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:v_mov_b32_e32 v3, v7
+; SDAG-NEXT:v_mov_b32_e32 v2, v6
+; SDAG-NEXT:v_mov_b32_e32 v1, v5
+; SDAG-NEXT:v_mov_b32_e32 v0, v4
+; SDAG-NEXT:s_setpc_b64 s[30:31]
+  %ret = ptrtoint ptr addrspace(8) %ptr to i128
+  ret i128 %ret
+}
+
+define i48 @ptrtoaddr(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
+; GISEL-LABEL: ptrtoaddr:
+; GISEL:   ; %bb.0:
+; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:v_mov_b32_e32 v0, v4
+; GISEL-NEXT:v_mov_b32_e32 v1, v5
+; GISEL-NEXT:s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: ptrtoaddr:
+; SDAG:   ; %bb.0:
+; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:v_mov_b32_e32 v1, v5
+; SDAG-NEXT:v_mov_b32_e32 v0, v4
+; SDAG-NEXT:s_setpc_b64 s[30:31]
+  %ret = ptrtoaddr ptr addrspace(8) %ptr to i48
+  ret i48 %ret
+}
+
+define <

[llvm-branch-commits] [IRTranslator] Handle ptrtoaddr (PR #139601)

2025-05-12 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Alexander Richardson (arichardson)


Changes

We lower ptrtoaddr by emitting a G_PTRTOINT, truncating that to the
address size and then truncate/zext to the final integer type.

This has exposed an issue in the GlobalIsel postlegalizer combines where
the truncate is incorrectly being removed.
See https://github.com/llvm/llvm-project/issues/139598


---
Full diff: https://github.com/llvm/llvm-project/pull/139601.diff


3 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h (+1-3) 
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+20) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll (+187) 


``diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h 
b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index fcdc733d92c7f..41d03c9fb3ed5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -486,9 +486,7 @@ class IRTranslator : public MachineFunctionPass {
   bool translatePtrToInt(const User &U, MachineIRBuilder &MIRBuilder) {
 return translateCast(TargetOpcode::G_PTRTOINT, U, MIRBuilder);
   }
-  bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder) {
-return translatePtrToInt(U, MIRBuilder);
-  }
+  bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder);
   bool translateTrunc(const User &U, MachineIRBuilder &MIRBuilder) {
 return translateCast(TargetOpcode::G_TRUNC, U, MIRBuilder);
   }
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp 
b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 8ab2533afc15f..5666c9e9f45bc 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1583,6 +1583,26 @@ bool IRTranslator::translateCast(unsigned Opcode, const 
User &U,
   return true;
 }
 
+bool IRTranslator::translatePtrToAddr(const User &U,
+  MachineIRBuilder &MIRBuilder) {
+  if (containsBF16Type(U))
+return false;
+
+  uint32_t Flags = 0;
+  if (const Instruction *I = dyn_cast(&U))
+Flags = MachineInstr::copyFlagsFromInstruction(*I);
+
+  Register Op = getOrCreateVReg(*U.getOperand(0));
+  Type *PtrTy = U.getOperand(0)->getType();
+  LLT AddrTy = getLLTForType(*DL->getIndexType(PtrTy), *DL);
+  auto IntPtrTy = getLLTForType(*DL->getIntPtrType(PtrTy), *DL);
+  auto PtrToInt = MIRBuilder.buildPtrToInt(IntPtrTy, Op);
+  PtrToInt->setFlags(Flags);
+  auto Addr = MIRBuilder.buildTrunc(AddrTy, PtrToInt.getReg(0));
+  MIRBuilder.buildZExtOrTrunc(getOrCreateVReg(U), Addr.getReg(0));
+  return true;
+}
+
 bool IRTranslator::translateGetElementPtr(const User &U,
   MachineIRBuilder &MIRBuilder) {
   Value &Op0 = *U.getOperand(0);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll
new file mode 100644
index 0..30f9dbfcaacf8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -global-isel -verify-machineinstrs --print-changed 
--debug < %s | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s 
--check-prefixes=CHECK,SDAG
+;; Check that we can lower ptrtoaddr differently from ptrtoint.
+;; Includes an ignored argument so the registers actually need to be written
+
+define i128 @ptrtoint(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
+; GISEL-LABEL: ptrtoint:
+; GISEL:   ; %bb.0:
+; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:v_mov_b32_e32 v0, v4
+; GISEL-NEXT:v_mov_b32_e32 v1, v5
+; GISEL-NEXT:v_mov_b32_e32 v2, v6
+; GISEL-NEXT:v_mov_b32_e32 v3, v7
+; GISEL-NEXT:s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: ptrtoint:
+; SDAG:   ; %bb.0:
+; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:v_mov_b32_e32 v3, v7
+; SDAG-NEXT:v_mov_b32_e32 v2, v6
+; SDAG-NEXT:v_mov_b32_e32 v1, v5
+; SDAG-NEXT:v_mov_b32_e32 v0, v4
+; SDAG-NEXT:s_setpc_b64 s[30:31]
+  %ret = ptrtoint ptr addrspace(8) %ptr to i128
+  ret i128 %ret
+}
+
+define i48 @ptrtoaddr(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
+; GISEL-LABEL: ptrtoaddr:
+; GISEL:   ; %bb.0:
+; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:v_mov_b32_e32 v0, v4
+; GISEL-NEXT:v_mov_b32_e32 v1, v5
+; GISEL-NEXT:s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: ptrtoaddr:
+; SDAG:   ; %bb.0:
+; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:v_mov_b32_e32 v1, v5
+; SDAG-NEXT:v_mov_b32_e32 v0, v4
+; SDAG-NEXT:s_setpc_b64 s[30:31]
+  %ret = ptrtoaddr ptr addrspace(8) %ptr to i48
+  ret i48 %ret
+}
+
+define <2

[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-12 Thread Krzysztof Drewniak via llvm-branch-commits



@@ -1952,6 +1953,22 @@ PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst 
&PI) {
   return {nullptr, nullptr};
 }
 
+PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) {
+  Value *Ptr = PA.getPointerOperand();
+  if (!isSplitFatPtr(Ptr->getType()))
+return {nullptr, nullptr};
+  IRB.SetInsertPoint(&PA);
+
+  auto [Rsrc, Off] = getPtrParts(Ptr);
+  Value *Res = IRB.CreateIntCast(Off, PA.getType(), /*isSigned=*/false,
+ PA.getName() + ".off");

krzysz00 wrote:

We could probably leave the name off given that we'll be `takeName()`ing a 
moment later

https://github.com/llvm/llvm-project/pull/139413
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-12 Thread Krzysztof Drewniak via llvm-branch-commits


https://github.com/krzysz00 edited 
https://github.com/llvm/llvm-project/pull/139413
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-12 Thread Krzysztof Drewniak via llvm-branch-commits


https://github.com/krzysz00 approved this pull request.

One tiny nit, lgtm otherwise

https://github.com/llvm/llvm-project/pull/139413
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [IRTranslator] Handle ptrtoaddr (PR #139601)

2025-05-12 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson created 
https://github.com/llvm/llvm-project/pull/139601

We lower ptrtoaddr by emitting a G_PTRTOINT, truncating that to the
address size and then truncate/zext to the final integer type.

This has exposed an issue in the GlobalIsel postlegalizer combines where
the truncate is incorrectly being removed.
See https://github.com/llvm/llvm-project/issues/139598



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-12 Thread Alexander Richardson via llvm-branch-commits



@@ -5773,7 +5773,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, 
StringRef TT) {
 if (!DL.contains("-p7") && !DL.starts_with("p7"))
   Res.append("-p7:160:256:256:32");
 if (!DL.contains("-p8") && !DL.starts_with("p8"))
-  Res.append("-p8:128:128");
+  Res.append("-p8:128:128:128:48");

arichardson wrote:

I just kept the current structure that doesn't touch pointer definitions that 
already exist - should we override those unconditionally? Or just the old 
variants?

https://github.com/llvm/llvm-project/pull/139419
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)

2025-05-12 Thread Vitaly Buka via llvm-branch-commits


https://github.com/vitalybuka approved this pull request.


https://github.com/llvm/llvm-project/pull/139389
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)

2025-05-12 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/139357

>From 25dc175562349410f161ef0e80246301d9a7ba79 Mon Sep 17 00:00:00 2001
From: Alex Richardson 
Date: Fri, 9 May 2025 22:43:37 -0700
Subject: [PATCH] fix docs build

Created using spr 1.3.6-beta.1
---
 llvm/docs/LangRef.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2d18d0d97aaee..38be6918ff73c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12435,7 +12435,7 @@ Example:
 .. _i_ptrtoaddr:
 
 '``ptrtoaddr .. to``' Instruction
-
+^
 
 Syntax:
 """

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

1 2 3 >

1 - 100 of 224 matches

Mail list logo