[llvm-branch-commits] [llvm] [GlobalISel] Add computeNumSignBits for ASHR (PR #139503)
@@ -864,6 +864,16 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, return TyBits - 1; // Every always-zero bit is a sign bit. break; } + case TargetOpcode::G_ASHR: { +Register Src1 = MI.getOperand(1).getReg(); +Register Src2 = MI.getOperand(2).getReg(); +LLT SrcTy = MRI.getType(Src1); +FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1); +if (auto C = getIConstantSplatVal(Src2, MRI)) arsenm wrote: At some point we should make these properly elementwise and not only handle splats https://github.com/llvm/llvm-project/pull/139503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
@@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME) #endif +DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass) optimisan wrote: I'll remove this list entirely since it's the same in `PassRegistry.def` https://github.com/llvm/llvm-project/pull/138828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/20.x: [AArch64] Fix feature list for FUJITSU-MONAKA processor (#139212) (PR #139222)
https://github.com/davemgreen approved this pull request. https://github.com/llvm/llvm-project/pull/139222 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/20.x: [AArch64] Fix feature list for FUJITSU-MONAKA processor (#139212) (PR #139222)
davemgreen wrote: LGTM - changes look small and correct a regression since the previous release. https://github.com/llvm/llvm-project/pull/139222 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 >From 5e50922e53ad2de7e3c68242ad78f1813a48f7b6 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 15:04:28 +0800 Subject: [PATCH] [RISCV][MC] Add Q support for Zfa --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 29 ++ llvm/test/MC/RISCV/rv64zfa-only-valid.s| 19 + llvm/test/MC/RISCV/zfa-invalid.s | 13 +- llvm/test/MC/RISCV/zfa-quad-invalid.s | 42 +++ llvm/test/MC/RISCV/zfa-valid.s | 391 - 5 files changed, 484 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 8a449d32e0104..0ad654db42f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">; + +def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; +def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; + +def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; +def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, + "froundnx.q">; + +def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; +def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; +} // Predicates = [HasStdExtZfa, HasStdExtQ] + +let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { + let mayRaiseFPException = 0 in { +def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, "fmvh.x.q">; +def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; + } +} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] + //===--===// // Pseudo-instructions and codegen patterns //===--===// @@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +def : InstAlias<"fgtq.q $rd, $rs, $rt", +(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +def : InstAlias<"fgeq.q $rd, $rs, $rt", +(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +} + //===--===// // Codegen patterns //===--===// diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s new file mode 100644 index 0..95fb253b145c1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: -M no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 +# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvh.x.q a1, fs1 + +# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 +# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index c2537c3fc5102..cedc9279db3cb 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.60e+00 +
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 >From 5e50922e53ad2de7e3c68242ad78f1813a48f7b6 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 15:04:28 +0800 Subject: [PATCH] [RISCV][MC] Add Q support for Zfa --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 29 ++ llvm/test/MC/RISCV/rv64zfa-only-valid.s| 19 + llvm/test/MC/RISCV/zfa-invalid.s | 13 +- llvm/test/MC/RISCV/zfa-quad-invalid.s | 42 +++ llvm/test/MC/RISCV/zfa-valid.s | 391 - 5 files changed, 484 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 8a449d32e0104..0ad654db42f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">; + +def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; +def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; + +def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; +def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, + "froundnx.q">; + +def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; +def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; +} // Predicates = [HasStdExtZfa, HasStdExtQ] + +let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { + let mayRaiseFPException = 0 in { +def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, "fmvh.x.q">; +def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; + } +} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] + //===--===// // Pseudo-instructions and codegen patterns //===--===// @@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +def : InstAlias<"fgtq.q $rd, $rs, $rt", +(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +def : InstAlias<"fgeq.q $rd, $rs, $rt", +(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +} + //===--===// // Codegen patterns //===--===// diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s new file mode 100644 index 0..95fb253b145c1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: -M no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 +# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvh.x.q a1, fs1 + +# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 +# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index c2537c3fc5102..cedc9279db3cb 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.60e+00 +
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 >From 7fdbd6b564697b7f0fd7ffd1f031671c3036 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 13:32:41 +0800 Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q extension --- llvm/lib/Target/RISCV/RISCVInstrInfoQ.td | 98 --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 + llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 1 + llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td| 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 1 + .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR345.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR7.td | 1 + .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td| 88 - 14 files changed, 158 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td index 51b9c806976f5..4dc33dd22b2aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td @@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt]; //===--===// let Predicates = [HasStdExtQ] in { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in - def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd), - (ins GPRMem:$rs1, simm12:$imm12), - "flq", "$rd, ${imm12}(${rs1})">; + def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs), - (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12), - "fsq", "$rs2, ${imm12}(${rs1})">; + def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; } // Predicates = [HasStdExtQ] foreach Ext = QExts in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; + let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { +defm FMADD_Q : FPFMA_rrr_frm_m; +defm FMSUB_Q : FPFMA_rrr_frm_m; +defm FNMSUB_Q : FPFMA_rrr_frm_m; +defm FNMADD_Q : FPFMA_rrr_frm_m; + } - defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { +defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; +defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + } + let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; + let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>; defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">; + Ext.PrimaryTy, "fsqrt.q">, + Sched<[WriteFSqrt128, ReadFSqrt128]>; - let mayRaiseFPException = 0 in { + let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], + mayRaiseFPException = 0 in { defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; } - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { +defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; +defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + } defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty, -Ext.PrimaryTy, "fcvt.s.q">; +Ext.PrimaryTy, "fcvt.s.q">, + Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext, - Ext.PrimaryTy, Ext.F32Ty, "fcvt.q.s">; + Ext.PrimaryTy, Ext.F32Ty, + "fcvt.q.s">, + Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty, -Ext.Pr
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 >From 7fdbd6b564697b7f0fd7ffd1f031671c3036 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 13:32:41 +0800 Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q extension --- llvm/lib/Target/RISCV/RISCVInstrInfoQ.td | 98 --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 + llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 1 + llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td| 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 1 + .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR345.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR7.td | 1 + .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td| 88 - 14 files changed, 158 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td index 51b9c806976f5..4dc33dd22b2aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td @@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt]; //===--===// let Predicates = [HasStdExtQ] in { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in - def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd), - (ins GPRMem:$rs1, simm12:$imm12), - "flq", "$rd, ${imm12}(${rs1})">; + def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs), - (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12), - "fsq", "$rs2, ${imm12}(${rs1})">; + def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; } // Predicates = [HasStdExtQ] foreach Ext = QExts in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; + let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { +defm FMADD_Q : FPFMA_rrr_frm_m; +defm FMSUB_Q : FPFMA_rrr_frm_m; +defm FNMSUB_Q : FPFMA_rrr_frm_m; +defm FNMADD_Q : FPFMA_rrr_frm_m; + } - defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { +defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; +defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + } + let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; + let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>; defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">; + Ext.PrimaryTy, "fsqrt.q">, + Sched<[WriteFSqrt128, ReadFSqrt128]>; - let mayRaiseFPException = 0 in { + let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], + mayRaiseFPException = 0 in { defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; } - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { +defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; +defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + } defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty, -Ext.PrimaryTy, "fcvt.s.q">; +Ext.PrimaryTy, "fcvt.s.q">, + Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext, - Ext.PrimaryTy, Ext.F32Ty, "fcvt.q.s">; + Ext.PrimaryTy, Ext.F32Ty, + "fcvt.q.s">, + Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty, -Ext.Pr
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 >From 6dc27676de2a685404abd0cfd12cff95703a1cf1 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 15:04:28 +0800 Subject: [PATCH] [RISCV][MC] Add Q support for Zfa --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 29 ++ llvm/test/MC/RISCV/rv64zfa-only-valid.s| 19 + llvm/test/MC/RISCV/zfa-invalid.s | 13 +- llvm/test/MC/RISCV/zfa-quad-invalid.s | 42 +++ llvm/test/MC/RISCV/zfa-valid.s | 391 - 5 files changed, 484 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 8a449d32e0104..0ad654db42f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">; + +def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; +def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; + +def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; +def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, + "froundnx.q">; + +def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; +def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; +} // Predicates = [HasStdExtZfa, HasStdExtQ] + +let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { + let mayRaiseFPException = 0 in { +def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, "fmvh.x.q">; +def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; + } +} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] + //===--===// // Pseudo-instructions and codegen patterns //===--===// @@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +def : InstAlias<"fgtq.q $rd, $rs, $rt", +(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +def : InstAlias<"fgeq.q $rd, $rs, $rt", +(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +} + //===--===// // Codegen patterns //===--===// diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s new file mode 100644 index 0..95fb253b145c1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: -M no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 +# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvh.x.q a1, fs1 + +# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 +# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index c2537c3fc5102..cedc9279db3cb 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.60e+00 +
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 >From 9205ac04544703aaee2a1475763ce7bc7495ccab Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 13:32:41 +0800 Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q extension --- llvm/lib/Target/RISCV/RISCVInstrInfoQ.td | 98 --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 + llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 1 + llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td| 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 1 + .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR345.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR7.td | 1 + .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td| 88 - 14 files changed, 158 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td index 51b9c806976f5..4dc33dd22b2aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td @@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt]; //===--===// let Predicates = [HasStdExtQ] in { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in - def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd), - (ins GPRMem:$rs1, simm12:$imm12), - "flq", "$rd, ${imm12}(${rs1})">; + def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs), - (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12), - "fsq", "$rs2, ${imm12}(${rs1})">; + def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; } // Predicates = [HasStdExtQ] foreach Ext = QExts in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; + let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { +defm FMADD_Q : FPFMA_rrr_frm_m; +defm FMSUB_Q : FPFMA_rrr_frm_m; +defm FNMSUB_Q : FPFMA_rrr_frm_m; +defm FNMADD_Q : FPFMA_rrr_frm_m; + } - defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { +defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; +defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + } + let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; + let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>; defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">; + Ext.PrimaryTy, "fsqrt.q">, + Sched<[WriteFSqrt128, ReadFSqrt128]>; - let mayRaiseFPException = 0 in { + let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], + mayRaiseFPException = 0 in { defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; } - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { +defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; +defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + } defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty, -Ext.PrimaryTy, "fcvt.s.q">; +Ext.PrimaryTy, "fcvt.s.q">, + Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext, - Ext.PrimaryTy, Ext.F32Ty, "fcvt.q.s">; + Ext.PrimaryTy, Ext.F32Ty, + "fcvt.q.s">, + Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty, -Ext.Pr
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 >From 6dc27676de2a685404abd0cfd12cff95703a1cf1 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 15:04:28 +0800 Subject: [PATCH] [RISCV][MC] Add Q support for Zfa --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 29 ++ llvm/test/MC/RISCV/rv64zfa-only-valid.s| 19 + llvm/test/MC/RISCV/zfa-invalid.s | 13 +- llvm/test/MC/RISCV/zfa-quad-invalid.s | 42 +++ llvm/test/MC/RISCV/zfa-valid.s | 391 - 5 files changed, 484 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 8a449d32e0104..0ad654db42f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">; + +def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; +def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; + +def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; +def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, + "froundnx.q">; + +def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; +def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; +} // Predicates = [HasStdExtZfa, HasStdExtQ] + +let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { + let mayRaiseFPException = 0 in { +def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, "fmvh.x.q">; +def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; + } +} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] + //===--===// // Pseudo-instructions and codegen patterns //===--===// @@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +def : InstAlias<"fgtq.q $rd, $rs, $rt", +(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +def : InstAlias<"fgeq.q $rd, $rs, $rt", +(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +} + //===--===// // Codegen patterns //===--===// diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s new file mode 100644 index 0..95fb253b145c1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: -M no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 +# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvh.x.q a1, fs1 + +# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 +# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index c2537c3fc5102..cedc9279db3cb 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.60e+00 +
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 >From 9205ac04544703aaee2a1475763ce7bc7495ccab Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 13:32:41 +0800 Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q extension --- llvm/lib/Target/RISCV/RISCVInstrInfoQ.td | 98 --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 + llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 1 + llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td| 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 1 + .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR345.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR7.td | 1 + .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td| 88 - 14 files changed, 158 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td index 51b9c806976f5..4dc33dd22b2aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td @@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt]; //===--===// let Predicates = [HasStdExtQ] in { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in - def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd), - (ins GPRMem:$rs1, simm12:$imm12), - "flq", "$rd, ${imm12}(${rs1})">; + def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs), - (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12), - "fsq", "$rs2, ${imm12}(${rs1})">; + def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; } // Predicates = [HasStdExtQ] foreach Ext = QExts in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; + let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { +defm FMADD_Q : FPFMA_rrr_frm_m; +defm FMSUB_Q : FPFMA_rrr_frm_m; +defm FNMSUB_Q : FPFMA_rrr_frm_m; +defm FNMADD_Q : FPFMA_rrr_frm_m; + } - defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { +defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; +defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + } + let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; + let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>; defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">; + Ext.PrimaryTy, "fsqrt.q">, + Sched<[WriteFSqrt128, ReadFSqrt128]>; - let mayRaiseFPException = 0 in { + let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], + mayRaiseFPException = 0 in { defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; } - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { +defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; +defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + } defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty, -Ext.PrimaryTy, "fcvt.s.q">; +Ext.PrimaryTy, "fcvt.s.q">, + Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext, - Ext.PrimaryTy, Ext.F32Ty, "fcvt.q.s">; + Ext.PrimaryTy, Ext.F32Ty, + "fcvt.q.s">, + Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty, -Ext.Pr
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port InitUndef to NPM (PR #138495)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138495 >From 476894ffa1fed64724b91c8b1db9391e09295be6 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 08:47:42 + Subject: [PATCH 1/2] [CodeGen][NPM] Port InitUndef to NPM --- llvm/include/llvm/CodeGen/InitUndef.h | 24 + llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/InitUndef.cpp| 50 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AArch64/init-undef.mir | 3 ++ llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 4 +- .../rvv/handle-noreg-with-implicit-def.mir| 2 + .../rvv/subregister-undef-early-clobber.mir | 1 + .../RISCV/rvv/undef-earlyclobber-chain.mir| 1 + 12 files changed, 73 insertions(+), 20 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/InitUndef.h diff --git a/llvm/include/llvm/CodeGen/InitUndef.h b/llvm/include/llvm/CodeGen/InitUndef.h new file mode 100644 index 0..7274824a74905 --- /dev/null +++ b/llvm/include/llvm/CodeGen/InitUndef.h @@ -0,0 +1,24 @@ +//===- llvm/CodeGen/InitUndef.h *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_INITUNDEF_H +#define LLVM_CODEGEN_INITUNDEF_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class InitUndefPass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_INITUNDEF_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 5a282d17b72c8..a3ebd2a6d15ca 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -312,7 +312,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &); void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &); void initializeTypeBasedAAWrapperPassPass(PassRegistry &); void initializeTypePromotionLegacyPass(PassRegistry &); -void initializeInitUndefPass(PassRegistry &); +void initializeInitUndefLegacyPass(PassRegistry &); void initializeUniformityInfoWrapperPassPass(PassRegistry &); void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &); void initializeUnpackMachineBundlesPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 29bc432ba3d5d..a3f439119b7da 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -43,6 +43,7 @@ #include "llvm/CodeGen/GlobalMerge.h" #include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/IndirectBrExpand.h" +#include "llvm/CodeGen/InitUndef.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index c69573ee3ed97..436b26852ce90 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -148,6 +148,7 @@ MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass()) +MACHINE_FUNCTION_PASS("init-undef", InitUndefPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass()) MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) @@ -304,7 +305,6 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass) DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass) DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass) -DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass) DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass) DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass) DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 5250534d8a4e4..aa3591cb6be58 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -54,7 +54,7 @@ void llvm::initializeC
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #138829)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138829 >From dbd76c614cb19179ffc0a20a19341a7e58a1431b Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 14:12:36 + Subject: [PATCH 1/2] [CodeGen][NPM] Port ProcessImplicitDefs to NPM --- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 57 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 7 ++- .../CodeGen/X86/unreachable-mbb-undef-phi.mir | 1 + 8 files changed, 50 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index e56c608c8e2f6..c858ae1177d72 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -247,7 +247,7 @@ void initializePostRASchedulerLegacyPass(PassRegistry &); void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &); void initializePrintFunctionPassWrapperPass(PassRegistry &); void initializePrintModulePassWrapperPass(PassRegistry &); -void initializeProcessImplicitDefsPass(PassRegistry &); +void initializeProcessImplicitDefsLegacyPass(PassRegistry &); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry &); void initializePromoteLegacyPassPass(PassRegistry &); void initializeRABasicPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index bd7c0da3a6dde..6d0aaee32b33e 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -71,6 +71,7 @@ #include "llvm/CodeGen/PeepholeOptimizer.h" #include "llvm/CodeGen/PostRASchedulerList.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/ProcessImplicitDefs.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" #include "llvm/CodeGen/RegAllocFast.h" #include "llvm/CodeGen/RegAllocGreedyPass.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index f189aaa5eda2b..63ba0d0a84a0c 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -150,6 +150,7 @@ MACHINE_FUNCTION_PASS("print", MachineUniformityPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(errs())) +MACHINE_FUNCTION_PASS("process-imp-defs", ProcessImplicitDefsPass()) MACHINE_FUNCTION_PASS("prolog-epilog", PrologEpilogInserterPass()) MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass()) @@ -281,7 +282,6 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass) -DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass) DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 065fd4704ccfb..6e12edc3d3a5b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -110,7 +110,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostRAMachineSinkingLegacyPass(Registry); initializePostRASchedulerLegacyPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); - initializeProcessImplicitDefsPass(Registry); + initializeProcessImplicitDefsLegacyPass(Registry); initializeRABasicPass(Registry); initializeRAGreedyLegacyPass(Registry); initializeRegAllocFastPass(Registry); diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index be81ecab9c897..54fd7814ef4f3 100644 --- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -6,6 +6,7 @@ // //===--===// +#include "llvm/CodeGen/ProcessImplicitDefs.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,24 +27,15 @@ using namespace llvm; namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def /// for each use. Add isUndef marker to implicit_def defs and their uses. -class ProcessImplicitDefs : public MachineFunctionPass { -
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138828 >From a9bab6452880f4200f4ce2d8c938eacd68d6bbc7 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 11:04:05 + Subject: [PATCH 1/2] [CodeGen][NPM] Register Function Passes --- llvm/include/llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/Passes/PassBuilder.cpp | 4 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index c6c00e8f25882..8717b79b26968 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -70,7 +70,6 @@ FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("select-optimize", SelectOptimizePass(TM)) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM)) FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) -FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass()) @@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME) #endif +DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass) #undef DUMMY_FUNCTION_PASS #ifndef DUMMY_MACHINE_MODULE_PASS diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 5f7ce13ad8a3e..a9d192a7fad55 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -95,6 +95,7 @@ #include "llvm/CodeGen/ExpandLargeDivRem.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandPostRAPseudos.h" +#include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FEntryInserter.h" #include "llvm/CodeGen/FinalizeISel.h" #include "llvm/CodeGen/FixupStatepointCallerSaved.h" @@ -155,6 +156,7 @@ #include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h" #include "llvm/CodeGen/RemoveRedundantDebugValues.h" #include "llvm/CodeGen/RenameIndependentSubregs.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" #include "llvm/CodeGen/SanitizerBinaryMetadata.h" #include "llvm/CodeGen/SelectOptimize.h" @@ -522,6 +524,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "PassRegistry.def" +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) \ diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 0d26b12a4a5e7..634a7fb6eb8e9 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Complete optimized regalloc pipeline (PR #138491)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138491 >From dc9a3165d3625002d2122dfd0e1dbe262a399e74 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 06:30:03 + Subject: [PATCH] [AMDGPU][NPM] Complete optimized regalloc pipeline Also fill in some other passes. --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 41 +-- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 7 +--- 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index a3b19af4adc39..29bc432ba3d5d 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -574,7 +574,7 @@ template class CodeGenPassBuilder { /// Insert InsertedPass pass after TargetPass pass. /// Only machine function passes are supported. template - void insertPass(InsertedPassT &&Pass) { + void insertPass(InsertedPassT &&Pass) const { AfterCallbacks.emplace_back( [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable { if (Name == TargetPassT::name()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 680a3fb78a6e3..eee520435f6d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2174,7 +2174,44 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization( addPass(SIShrinkInstructionsPass()); } +void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( +AddMachinePass &addPass) const { + if (EnableDCEInRA) +insertPass(DeadMachineInstructionElimPass()); + + // FIXME: when an instruction has a Killed operand, and the instruction is + // inside a bundle, seems only the BUNDLE instruction appears as the Kills of + // the register in LiveVariables, this would trigger a failure in verifier, + // we should fix it and enable the verifier. + if (OptVGPRLiveRange) +insertPass>( +SIOptimizeVGPRLiveRangePass()); + + // This must be run immediately after phi elimination and before + // TwoAddressInstructions, otherwise the processing of the tied operand of + // SI_ELSE will introduce a copy of the tied operand source after the else. + insertPass(SILowerControlFlowPass()); + + if (EnableRewritePartialRegUses) +insertPass(GCNRewritePartialRegUsesPass()); + + if (isPassEnabled(EnablePreRAOptimizations)) +insertPass(GCNPreRAOptimizationsPass()); + // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation + // instructions that cause scheduling barriers. + insertPass(SIWholeQuadModePass()); + + if (OptExecMaskPreRA) +insertPass(SIOptimizeExecMaskingPreRAPass()); + + // This is not an essential optimization and it has a noticeable impact on + // compilation time, so we only enable it from O2. + if (TM.getOptLevel() > CodeGenOptLevel::Less) +insertPass(SIFormMemoryClausesPass()); + + Base::addOptimizedRegAlloc(addPass); +} Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( AddMachinePass &addPass) const { @@ -2202,21 +2239,19 @@ Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( addPass(SIPreAllocateWWMRegsPass()); // For allocating other wwm register operands. - // addRegAlloc(addPass, RegAllocPhase::WWM); addPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"})); addPass(SILowerWWMCopiesPass()); addPass(VirtRegRewriterPass(false)); addPass(AMDGPUReserveWWMRegsPass()); // For allocating per-thread VGPRs. - // addRegAlloc(addPass, RegAllocPhase::VGPR); addPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"})); addPreRewrite(addPass); addPass(VirtRegRewriterPass(true)); - // TODO: addPass(AMDGPUMarkLastScratchLoadPass()); + addPass(AMDGPUMarkLastScratchLoadPass()); return Error::success(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 589123274d0f5..3c62cd19c6e57 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -182,6 +182,7 @@ class AMDGPUCodeGenPassBuilder void addPostRegAlloc(AddMachinePass &) const; void addPreEmitPass(AddMachinePass &) const; Error addRegAssignmentOptimized(AddMachinePass &) const; + void addOptimizedRegAlloc(AddMachinePass &) const; /// Check if a pass is enabled given \p Opt option. The option always /// overrides defaults if explicitly used. Otherwise its default will be used diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index e9b57515e71e0..91c15565762de 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,14 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtrip
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #138670)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138670 >From ea3103a3be32909978894364c1b481cb80c2fc67 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 09:55:07 + Subject: [PATCH] [CodeGen][NPM] Read TargetMachine's EnableIPRA option --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 4 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index a3f439119b7da..bd7c0da3a6dde 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -172,6 +172,10 @@ template class CodeGenPassBuilder { // LLVMTM ctor. See TargetMachine::setGlobalISel for example. if (Opt.EnableIPRA) TM.Options.EnableIPRA = *Opt.EnableIPRA; +else { + // If not explicitly specified, use target default. + TM.Options.EnableIPRA |= TM.useIPRA(); +} if (Opt.EnableGlobalISelAbort) TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort; diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 232d5b86ac035..0d26b12a4a5e7 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-r
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #138670)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138670 >From ea3103a3be32909978894364c1b481cb80c2fc67 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 09:55:07 + Subject: [PATCH] [CodeGen][NPM] Read TargetMachine's EnableIPRA option --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 4 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index a3f439119b7da..bd7c0da3a6dde 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -172,6 +172,10 @@ template class CodeGenPassBuilder { // LLVMTM ctor. See TargetMachine::setGlobalISel for example. if (Opt.EnableIPRA) TM.Options.EnableIPRA = *Opt.EnableIPRA; +else { + // If not explicitly specified, use target default. + TM.Options.EnableIPRA |= TM.useIPRA(); +} if (Opt.EnableGlobalISelAbort) TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort; diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 232d5b86ac035..0d26b12a4a5e7 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-r
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138830 >From 67f7f32e9ca0a8befc28b7504e9e7f141d771eae Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 7 May 2025 08:57:31 + Subject: [PATCH] [CodeGen][NPM] Account inserted passes for -start/stop options --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 6 -- llvm/test/tools/llc/new-pm/start-stop-inserted.ll | 15 +++ 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llc/new-pm/start-stop-inserted.ll diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 6d0aaee32b33e..752ed6ae08a96 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -583,8 +583,10 @@ template class CodeGenPassBuilder { void insertPass(InsertedPassT &&Pass) const { AfterCallbacks.emplace_back( [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable { - if (Name == TargetPassT::name()) -MFPM.addPass(std::forward(Pass)); + if (Name == TargetPassT::name()) { +if (runBeforeAdding(InsertedPassT::name())) + MFPM.addPass(std::forward(Pass)); + } }); } diff --git a/llvm/test/tools/llc/new-pm/start-stop-inserted.ll b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll new file mode 100644 index 0..ce5ad2d9e5065 --- /dev/null +++ b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll @@ -0,0 +1,15 @@ +; REQUIRES: amdgpu-registered-target + +; AMDGPU inserts the fourth instance of dead-mi-elimination pass after detect-dead-lanes +; This checks that the pipeline stops before that. + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm -stop-before=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s | FileCheck %s + +; There is no way to -start-after an inserted pass right now. +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm -start-after=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s + + +; CHECK: dead-mi-elimination +; CHECK: dead-mi-elimination +; CHECK: dead-mi-elimination +; CHECK-NOT: dead-mi-elimination ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #138660)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138660 >From 838e904009527297d38e79572745a810cfa34d60 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 09:05:52 + Subject: [PATCH] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag --- llvm/include/llvm/CodeGen/VirtRegMap.h | 8 llvm/lib/CodeGen/VirtRegMap.cpp| 2 ++ 2 files changed, 10 insertions(+) diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index f5fba0d65401e..37d382650ac1f 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -248,6 +248,14 @@ class VirtRegRewriterPass : public PassInfoMixin { static bool isRequired() { return true; } void printPipeline(raw_ostream &OS, function_ref) const; + + MachineFunctionProperties getSetProperties() const { +if (ClearVirtRegs) { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); +} +return {}; + } }; } // end llvm namespace diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 49e819e2d10f7..839905d5e3860 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -302,6 +302,8 @@ bool VirtRegRewriterLegacy::runOnMachineFunction(MachineFunction &MF) { PreservedAnalyses VirtRegRewriterPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + VirtRegMap &VRM = MFAM.getResult(MF); LiveIntervals &LIS = MFAM.getResult(MF); LiveRegMatrix &LRM = MFAM.getResult(MF); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass (PR #138496)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138496 >From ff19035e9f213592109e7ee2c4fb2b667ba9a333 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 08:58:58 + Subject: [PATCH] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass --- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 13453963eec6d..0772eb8fb7a72 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -107,6 +107,7 @@ MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass()) MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass()) +MACHINE_FUNCTION_PASS("amdgpu-wait-sgpr-hazards", AMDGPUWaitSGPRHazardsPass()) MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass()) MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index e00b7ff83e322..468e4208c510a 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port InitUndef to NPM (PR #138495)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138495 >From 476894ffa1fed64724b91c8b1db9391e09295be6 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 08:47:42 + Subject: [PATCH 1/2] [CodeGen][NPM] Port InitUndef to NPM --- llvm/include/llvm/CodeGen/InitUndef.h | 24 + llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/InitUndef.cpp| 50 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AArch64/init-undef.mir | 3 ++ llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 4 +- .../rvv/handle-noreg-with-implicit-def.mir| 2 + .../rvv/subregister-undef-early-clobber.mir | 1 + .../RISCV/rvv/undef-earlyclobber-chain.mir| 1 + 12 files changed, 73 insertions(+), 20 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/InitUndef.h diff --git a/llvm/include/llvm/CodeGen/InitUndef.h b/llvm/include/llvm/CodeGen/InitUndef.h new file mode 100644 index 0..7274824a74905 --- /dev/null +++ b/llvm/include/llvm/CodeGen/InitUndef.h @@ -0,0 +1,24 @@ +//===- llvm/CodeGen/InitUndef.h *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_INITUNDEF_H +#define LLVM_CODEGEN_INITUNDEF_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class InitUndefPass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_INITUNDEF_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 5a282d17b72c8..a3ebd2a6d15ca 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -312,7 +312,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &); void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &); void initializeTypeBasedAAWrapperPassPass(PassRegistry &); void initializeTypePromotionLegacyPass(PassRegistry &); -void initializeInitUndefPass(PassRegistry &); +void initializeInitUndefLegacyPass(PassRegistry &); void initializeUniformityInfoWrapperPassPass(PassRegistry &); void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &); void initializeUnpackMachineBundlesPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 29bc432ba3d5d..a3f439119b7da 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -43,6 +43,7 @@ #include "llvm/CodeGen/GlobalMerge.h" #include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/IndirectBrExpand.h" +#include "llvm/CodeGen/InitUndef.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index c69573ee3ed97..436b26852ce90 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -148,6 +148,7 @@ MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass()) +MACHINE_FUNCTION_PASS("init-undef", InitUndefPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass()) MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) @@ -304,7 +305,6 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass) DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass) DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass) -DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass) DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass) DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass) DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 5250534d8a4e4..aa3591cb6be58 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -54,7 +54,7 @@ void llvm::initializeC
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138828 >From a9bab6452880f4200f4ce2d8c938eacd68d6bbc7 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 11:04:05 + Subject: [PATCH 1/2] [CodeGen][NPM] Register Function Passes --- llvm/include/llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/Passes/PassBuilder.cpp | 4 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index c6c00e8f25882..8717b79b26968 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -70,7 +70,6 @@ FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("select-optimize", SelectOptimizePass(TM)) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM)) FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) -FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass()) @@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME) #endif +DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass) #undef DUMMY_FUNCTION_PASS #ifndef DUMMY_MACHINE_MODULE_PASS diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 5f7ce13ad8a3e..a9d192a7fad55 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -95,6 +95,7 @@ #include "llvm/CodeGen/ExpandLargeDivRem.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandPostRAPseudos.h" +#include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FEntryInserter.h" #include "llvm/CodeGen/FinalizeISel.h" #include "llvm/CodeGen/FixupStatepointCallerSaved.h" @@ -155,6 +156,7 @@ #include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h" #include "llvm/CodeGen/RemoveRedundantDebugValues.h" #include "llvm/CodeGen/RenameIndependentSubregs.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" #include "llvm/CodeGen/SanitizerBinaryMetadata.h" #include "llvm/CodeGen/SelectOptimize.h" @@ -522,6 +524,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "PassRegistry.def" +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) \ diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 0d26b12a4a5e7..634a7fb6eb8e9 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Complete optimized regalloc pipeline (PR #138491)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138491 >From dc9a3165d3625002d2122dfd0e1dbe262a399e74 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 06:30:03 + Subject: [PATCH] [AMDGPU][NPM] Complete optimized regalloc pipeline Also fill in some other passes. --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 41 +-- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 7 +--- 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index a3b19af4adc39..29bc432ba3d5d 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -574,7 +574,7 @@ template class CodeGenPassBuilder { /// Insert InsertedPass pass after TargetPass pass. /// Only machine function passes are supported. template - void insertPass(InsertedPassT &&Pass) { + void insertPass(InsertedPassT &&Pass) const { AfterCallbacks.emplace_back( [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable { if (Name == TargetPassT::name()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 680a3fb78a6e3..eee520435f6d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2174,7 +2174,44 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization( addPass(SIShrinkInstructionsPass()); } +void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( +AddMachinePass &addPass) const { + if (EnableDCEInRA) +insertPass(DeadMachineInstructionElimPass()); + + // FIXME: when an instruction has a Killed operand, and the instruction is + // inside a bundle, seems only the BUNDLE instruction appears as the Kills of + // the register in LiveVariables, this would trigger a failure in verifier, + // we should fix it and enable the verifier. + if (OptVGPRLiveRange) +insertPass>( +SIOptimizeVGPRLiveRangePass()); + + // This must be run immediately after phi elimination and before + // TwoAddressInstructions, otherwise the processing of the tied operand of + // SI_ELSE will introduce a copy of the tied operand source after the else. + insertPass(SILowerControlFlowPass()); + + if (EnableRewritePartialRegUses) +insertPass(GCNRewritePartialRegUsesPass()); + + if (isPassEnabled(EnablePreRAOptimizations)) +insertPass(GCNPreRAOptimizationsPass()); + // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation + // instructions that cause scheduling barriers. + insertPass(SIWholeQuadModePass()); + + if (OptExecMaskPreRA) +insertPass(SIOptimizeExecMaskingPreRAPass()); + + // This is not an essential optimization and it has a noticeable impact on + // compilation time, so we only enable it from O2. + if (TM.getOptLevel() > CodeGenOptLevel::Less) +insertPass(SIFormMemoryClausesPass()); + + Base::addOptimizedRegAlloc(addPass); +} Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( AddMachinePass &addPass) const { @@ -2202,21 +2239,19 @@ Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( addPass(SIPreAllocateWWMRegsPass()); // For allocating other wwm register operands. - // addRegAlloc(addPass, RegAllocPhase::WWM); addPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"})); addPass(SILowerWWMCopiesPass()); addPass(VirtRegRewriterPass(false)); addPass(AMDGPUReserveWWMRegsPass()); // For allocating per-thread VGPRs. - // addRegAlloc(addPass, RegAllocPhase::VGPR); addPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"})); addPreRewrite(addPass); addPass(VirtRegRewriterPass(true)); - // TODO: addPass(AMDGPUMarkLastScratchLoadPass()); + addPass(AMDGPUMarkLastScratchLoadPass()); return Error::success(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 589123274d0f5..3c62cd19c6e57 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -182,6 +182,7 @@ class AMDGPUCodeGenPassBuilder void addPostRegAlloc(AddMachinePass &) const; void addPreEmitPass(AddMachinePass &) const; Error addRegAssignmentOptimized(AddMachinePass &) const; + void addOptimizedRegAlloc(AddMachinePass &) const; /// Check if a pass is enabled given \p Opt option. The option always /// overrides defaults if explicitly used. Otherwise its default will be used diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index e9b57515e71e0..91c15565762de 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,14 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtrip
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass (PR #138496)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138496 >From ff19035e9f213592109e7ee2c4fb2b667ba9a333 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 08:58:58 + Subject: [PATCH] [AMDGPU][NPM] Register AMDGPUWaitSGPRHazards pass --- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 13453963eec6d..0772eb8fb7a72 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -107,6 +107,7 @@ MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass()) MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass()) +MACHINE_FUNCTION_PASS("amdgpu-wait-sgpr-hazards", AMDGPUWaitSGPRHazardsPass()) MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass()) MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index e00b7ff83e322..468e4208c510a 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PostRAMachineSinking to NPM (PR #138497)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138497 >From 41492e43dad53cefb3ee220a13e75f062351c1cc Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 09:17:40 + Subject: [PATCH] [CodeGen][NPM] Port PostRAMachineSinking to NPM --- llvm/include/llvm/CodeGen/MachineSink.h | 11 +++ llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/MachineSink.cpp | 31 +++ .../AArch64/bisect-post-ra-machine-sink.mir | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 4 +-- .../CodeGen/AMDGPU/postra-machine-sink.mir| 1 + llvm/test/CodeGen/X86/pr38952.mir | 1 + 9 files changed, 44 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineSink.h b/llvm/include/llvm/CodeGen/MachineSink.h index 71bd7229b7598..eb9548dc82250 100644 --- a/llvm/include/llvm/CodeGen/MachineSink.h +++ b/llvm/include/llvm/CodeGen/MachineSink.h @@ -26,5 +26,16 @@ class MachineSinkingPass : public PassInfoMixin { function_ref MapClassName2PassName); }; +class PostRAMachineSinkingPass +: public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &); + + MachineFunctionProperties getRequiredProperties() const { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::NoVRegs); + } +}; + } // namespace llvm #endif // LLVM_CODEGEN_MACHINESINK_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index a3ebd2a6d15ca..e56c608c8e2f6 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -242,7 +242,7 @@ void initializePostDominatorTreeWrapperPassPass(PassRegistry &); void initializePostInlineEntryExitInstrumenterPass(PassRegistry &); void initializePostMachineSchedulerLegacyPass(PassRegistry &); void initializePostRAHazardRecognizerLegacyPass(PassRegistry &); -void initializePostRAMachineSinkingPass(PassRegistry &); +void initializePostRAMachineSinkingLegacyPass(PassRegistry &); void initializePostRASchedulerLegacyPass(PassRegistry &); void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &); void initializePrintFunctionPassWrapperPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 436b26852ce90..c6c00e8f25882 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -164,6 +164,7 @@ MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass()) MACHINE_FUNCTION_PASS("post-RA-hazard-rec", PostRAHazardRecognizerPass()) MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM)) MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass(TM)) +MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass()) MACHINE_FUNCTION_PASS("post-ra-pseudos", ExpandPostRAPseudosPass()) MACHINE_FUNCTION_PASS("print", PrintMIRPass()) MACHINE_FUNCTION_PASS("print", LiveDebugVariablesPrinterPass(errs())) @@ -315,7 +316,6 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass) -DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass) DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass) DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index aa3591cb6be58..065fd4704ccfb 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -107,7 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePeepholeOptimizerLegacyPass(Registry); initializePostMachineSchedulerLegacyPass(Registry); initializePostRAHazardRecognizerLegacyPass(Registry); - initializePostRAMachineSinkingPass(Registry); + initializePostRAMachineSinkingLegacyPass(Registry); initializePostRASchedulerLegacyPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index aa2987b6710a3..be1a3ac125c65 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -2068,12 +2068,12 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy( //===--===// namespace { -class PostRAMachineSinking : public MachineFunctionPass {
[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #138660)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138660 >From 838e904009527297d38e79572745a810cfa34d60 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 09:05:52 + Subject: [PATCH] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag --- llvm/include/llvm/CodeGen/VirtRegMap.h | 8 llvm/lib/CodeGen/VirtRegMap.cpp| 2 ++ 2 files changed, 10 insertions(+) diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index f5fba0d65401e..37d382650ac1f 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -248,6 +248,14 @@ class VirtRegRewriterPass : public PassInfoMixin { static bool isRequired() { return true; } void printPipeline(raw_ostream &OS, function_ref) const; + + MachineFunctionProperties getSetProperties() const { +if (ClearVirtRegs) { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); +} +return {}; + } }; } // end llvm namespace diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 49e819e2d10f7..839905d5e3860 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -302,6 +302,8 @@ bool VirtRegRewriterLegacy::runOnMachineFunction(MachineFunction &MF) { PreservedAnalyses VirtRegRewriterPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + VirtRegMap &VRM = MFAM.getResult(MF); LiveIntervals &LIS = MFAM.getResult(MF); LiveRegMatrix &LRM = MFAM.getResult(MF); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138830 >From 67f7f32e9ca0a8befc28b7504e9e7f141d771eae Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 7 May 2025 08:57:31 + Subject: [PATCH] [CodeGen][NPM] Account inserted passes for -start/stop options --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 6 -- llvm/test/tools/llc/new-pm/start-stop-inserted.ll | 15 +++ 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llc/new-pm/start-stop-inserted.ll diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 6d0aaee32b33e..752ed6ae08a96 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -583,8 +583,10 @@ template class CodeGenPassBuilder { void insertPass(InsertedPassT &&Pass) const { AfterCallbacks.emplace_back( [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable { - if (Name == TargetPassT::name()) -MFPM.addPass(std::forward(Pass)); + if (Name == TargetPassT::name()) { +if (runBeforeAdding(InsertedPassT::name())) + MFPM.addPass(std::forward(Pass)); + } }); } diff --git a/llvm/test/tools/llc/new-pm/start-stop-inserted.ll b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll new file mode 100644 index 0..ce5ad2d9e5065 --- /dev/null +++ b/llvm/test/tools/llc/new-pm/start-stop-inserted.ll @@ -0,0 +1,15 @@ +; REQUIRES: amdgpu-registered-target + +; AMDGPU inserts the fourth instance of dead-mi-elimination pass after detect-dead-lanes +; This checks that the pipeline stops before that. + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm -stop-before=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s | FileCheck %s + +; There is no way to -start-after an inserted pass right now. +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -O3 -enable-new-pm -start-after=dead-mi-elimination,4 --print-pipeline-passes -filetype=null %s + + +; CHECK: dead-mi-elimination +; CHECK: dead-mi-elimination +; CHECK: dead-mi-elimination +; CHECK-NOT: dead-mi-elimination ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/139516 None Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/139517 This replaces the Invalidate pass. Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#139517** https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#139516** https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#138830** https://app.graphite.dev/github/pr/llvm/llvm-project/138830?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138829** https://app.graphite.dev/github/pr/llvm/llvm-project/138829?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138828** https://app.graphite.dev/github/pr/llvm/llvm-project/138828?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138670** https://app.graphite.dev/github/pr/llvm/llvm-project/138670?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138660** https://app.graphite.dev/github/pr/llvm/llvm-project/138660?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138497** https://app.graphite.dev/github/pr/llvm/llvm-project/138497?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138496** https://app.graphite.dev/github/pr/llvm/llvm-project/138496?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138495** https://app.graphite.dev/github/pr/llvm/llvm-project/138495?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138491** https://app.graphite.dev/github/pr/llvm/llvm-project/138491?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#136818** https://app.graphite.dev/github/pr/llvm/llvm-project/136818?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/139516 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#139517** https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139517?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#139516** https://app.graphite.dev/github/pr/llvm/llvm-project/139516?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138830** https://app.graphite.dev/github/pr/llvm/llvm-project/138830?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138829** https://app.graphite.dev/github/pr/llvm/llvm-project/138829?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138828** https://app.graphite.dev/github/pr/llvm/llvm-project/138828?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138670** https://app.graphite.dev/github/pr/llvm/llvm-project/138670?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138660** https://app.graphite.dev/github/pr/llvm/llvm-project/138660?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138497** https://app.graphite.dev/github/pr/llvm/llvm-project/138497?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138496** https://app.graphite.dev/github/pr/llvm/llvm-project/138496?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138495** https://app.graphite.dev/github/pr/llvm/llvm-project/138495?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138491** https://app.graphite.dev/github/pr/llvm/llvm-project/138491?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#136818** https://app.graphite.dev/github/pr/llvm/llvm-project/136818?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/139517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #138670)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/138670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/139516.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+6) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+1) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+3-3) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index eee520435f6d9..89f8805a5c1a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2262,6 +2262,12 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const { Base::addPostRegAlloc(addPass); } +void AMDGPUCodeGenPassBuilder::addPreSched2(AddMachinePass &addPass) const { + if (TM.getOptLevel() > CodeGenOptLevel::None) +addPass(SIShrinkInstructionsPass()); + addPass(SIPostRABundlerPass()); +} + void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) { addPass(GCNCreateVOPDPass()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 3c62cd19c6e57..3b2f39c14a9bc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -183,6 +183,7 @@ class AMDGPUCodeGenPassBuilder void addPreEmitPass(AddMachinePass &) const; Error addRegAssignmentOptimized(AddMachinePass &) const; void addOptimizedRegAlloc(AddMachinePass &) const; + void addPreSched2(AddMachinePass &) const; /// Check if a pass is enabled given \p Opt option. The option always /// overrides defaults if explicitly used. Otherwise its default will be used diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 5a89fdeb34dfb..9b8a430aaad45 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-ins
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/139517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/139516 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/139517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/139517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) Changes This replaces the Invalidatepass. There are no cross-function analysis requirements right now, so clearing all analyses works for the last pass in the pipeline. --- Having the `InvalidateAnalysisPass ()` is causing a problem with `ModuleToCGSCCPassAdaptor` by deleting machine functions for other functions and ending up with exactly one correctly compiled MF, with the rest being vanished. This is because `ModuleToCGSCCPAdaptor` propagates `PassPA` (received from the `CGSCCToFunctionPassAdaptor` that runs the actual codegen pipeline on MFs) to the next SCC. That causes `MFA` invalidation on functions in the next SCC. For us, `PassPA` happens to be returned from `invalidate ` which `abandons` the `MachineFunctionAnalysis`. So while the first function runs through the pipeline normally, `invalidate` also deletes the functions in the next SCC before its pipeline is run. (this seems to be the intended mechanism of the `CG adaptor` to allow cross-SCC invalidations. --- Full diff: https://github.com/llvm/llvm-project/pull/139517.diff 5 Files Affected: - (modified) llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h (+5) - (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+2-1) - (modified) llvm/lib/CodeGen/MachineFunctionAnalysis.cpp (+7) - (modified) llvm/lib/Passes/PassRegistry.def (+1) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+3-3) ``diff diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h index e194f4838e118..9a1e05ebb3c1b 100644 --- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h @@ -45,6 +45,11 @@ class MachineFunctionAnalysis Result run(Function &F, FunctionAnalysisManager &FAM); }; +class FreeAllAnalysesPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + } // namespace llvm #endif // LLVM_CODEGEN_MachineFunctionAnalysis diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 752ed6ae08a96..7ab0cf2824038 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -285,7 +285,8 @@ template class CodeGenPassBuilder { FunctionPassManager FPM; FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); - FPM.addPass(InvalidateAnalysisPass()); + // Since this is the last pass in the pipeline, we can clear all analyses + FPM.addPass(FreeAllAnalysesPass()); if (this->PB.AddInCGSCCOrder) { MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( createCGSCCToFunctionPassAdaptor(std::move(FPM; diff --git a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp index e7a4d6d61e211..25239066a10a7 100644 --- a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -45,3 +45,10 @@ MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { return Result(std::move(MF)); } + +PreservedAnalyses FreeAllAnalysesPass::run(Function &F, + FunctionAnalysisManager &FAM) { + // Since this is the last pass in the pipeline, we can clear all analyses + FAM.clear(F, F.getName()); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 822aa90601286..bba11759cfa9d 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -379,6 +379,7 @@ FUNCTION_PASS("extra-vector-passes", FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flatten-cfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) +FUNCTION_PASS("free-all-analyses", FreeAllAnalysesPass()) FUNCTION_PASS("gc-lowering", GCLoweringPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 9b8a430aaad45..009c1467eda1d 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/139517 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/139517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Introduce FreeAllAnalysesPass (PR #139517)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/139517 >From ae761dee3ece71d4813b62a2600cf4565b893239 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 12 May 2025 08:02:22 + Subject: [PATCH 1/2] [CodeGen][NPM] Introduce FreeAllAnalysesPass This replaces the Invalidate pass. --- llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h | 5 + llvm/include/llvm/Passes/CodeGenPassBuilder.h | 3 ++- llvm/lib/CodeGen/MachineFunctionAnalysis.cpp| 6 ++ llvm/lib/Passes/PassRegistry.def| 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll| 6 +++--- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h index e194f4838e118..9a1e05ebb3c1b 100644 --- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h @@ -45,6 +45,11 @@ class MachineFunctionAnalysis Result run(Function &F, FunctionAnalysisManager &FAM); }; +class FreeAllAnalysesPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + } // namespace llvm #endif // LLVM_CODEGEN_MachineFunctionAnalysis diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 752ed6ae08a96..7ab0cf2824038 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -285,7 +285,8 @@ template class CodeGenPassBuilder { FunctionPassManager FPM; FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); - FPM.addPass(InvalidateAnalysisPass()); + // Since this is the last pass in the pipeline, we can clear all analyses + FPM.addPass(FreeAllAnalysesPass()); if (this->PB.AddInCGSCCOrder) { MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( createCGSCCToFunctionPassAdaptor(std::move(FPM; diff --git a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp index e7a4d6d61e211..df6a4f4401d9b 100644 --- a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -45,3 +45,9 @@ MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { return Result(std::move(MF)); } + +PreservedAnalyses FreeAllAnalysesPass::run(Function &F, + FunctionAnalysisManager &FAM) { + FAM.clear(F, F.getName()); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 822aa90601286..bba11759cfa9d 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -379,6 +379,7 @@ FUNCTION_PASS("extra-vector-passes", FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flatten-cfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) +FUNCTION_PASS("free-all-analyses", FreeAllAnalysesPass()) FUNCTION_PASS("gc-lowering", GCLoweringPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 9b8a430aaad45..009c1467eda1d 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazard
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev created https://github.com/llvm/llvm-project/pull/139508 None Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
el-ev wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/139508?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#139508** https://app.graphite.dev/github/pr/llvm/llvm-project/139508?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139508?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#139369** https://app.graphite.dev/github/pr/llvm/llvm-project/139369?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#139495](https://github.com/llvm/llvm-project/pull/139495) https://app.graphite.dev/github/pr/llvm/llvm-project/139495?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/139508 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev ready_for_review https://github.com/llvm/llvm-project/pull/139508 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
llvmbot wrote: @llvm/pr-subscribers-mc Author: Iris Shi (el-ev) Changes --- Patch is 25.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139508.diff 5 Files Affected: - (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td (+29) - (added) llvm/test/MC/RISCV/rv64zfa-only-valid.s (+19) - (modified) llvm/test/MC/RISCV/zfa-invalid.s (+11-2) - (added) llvm/test/MC/RISCV/zfa-quad-invalid.s (+42) - (modified) llvm/test/MC/RISCV/zfa-valid.s (+383-8) ``diff diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 8a449d32e0104..0ad654db42f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">; + +def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; +def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; + +def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; +def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, + "froundnx.q">; + +def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; +def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; +} // Predicates = [HasStdExtZfa, HasStdExtQ] + +let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { + let mayRaiseFPException = 0 in { +def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, "fmvh.x.q">; +def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; + } +} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] + //===--===// // Pseudo-instructions and codegen patterns //===--===// @@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +def : InstAlias<"fgtq.q $rd, $rs, $rt", +(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +def : InstAlias<"fgeq.q $rd, $rs, $rt", +(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +} + //===--===// // Codegen patterns //===--===// diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s new file mode 100644 index 0..95fb253b145c1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: -M no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 +# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvh.x.q a1, fs1 + +# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 +# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index c2537c3fc5102..cedc9279db3cb 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.60e+00 +# CHECK-NO-RV64: error: operand must be a valid floating-point constant +# CHECK-NO-RV32: error: operand must be a valid floating-point constant +fli.q ft1, 2.25e+00 + # CHECK-NO-RV64: error: invalid fl
[llvm-branch-commits] [llvm] [GlobalISel] Add computeNumSignBits for ASHR (PR #139503)
https://github.com/jayfoad requested changes to this pull request. https://github.com/llvm/llvm-project/pull/139503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GlobalISel] Add computeNumSignBits for ASHR (PR #139503)
@@ -864,6 +864,16 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, return TyBits - 1; // Every always-zero bit is a sign bit. break; } + case TargetOpcode::G_ASHR: { +Register Src1 = MI.getOperand(1).getReg(); +Register Src2 = MI.getOperand(2).getReg(); +LLT SrcTy = MRI.getType(Src1); +FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1); +if (auto C = getIConstantSplatVal(Src2, MRI)) + FirstAnswer = std::max(FirstAnswer + C->getZExtValue(), jayfoad wrote: ```suggestion FirstAnswer = std::min(FirstAnswer + C->getZExtValue(), ``` https://github.com/llvm/llvm-project/pull/139503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev edited https://github.com/llvm/llvm-project/pull/139508 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorizer] Bundle partial reductions inside VPMulAccumulateReductionRecipe (PR #136173)
@@ -2432,12 +2437,40 @@ static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, Red->replaceAllUsesWith(AbstractR); } +/// This function tries to create an abstract recipe from a partial reduction to +/// hide its mul and extends from cost estimation. +static void +tryToCreateAbstractPartialReductionRecipe(VPPartialReductionRecipe *PRed) { sdesmalen-arm wrote: The way I read the code is that at the point of getting to this point in the code, it has recognised a reduction so there is a `VP[Partial]ReductionRecipe`. It then tries to analyse whether that recipe can be transformed into a `VPMulAccumulateReductionRecipe`. For `VPReductionRecipe` it will clamp the range to all the VFs that can be turned into a `VPMulAccumulateReductionRecipe`, but for `VPPartialReductionRecipe` it doesn't do that. I don't see why for partial reductions we'd do something different. In fact, why wouldn't the `tryToMatchAndCreateMulAccumulateReduction` code be sufficient here? Now that you've made `VPPartialReductionRecipe` a subclass of `VPReductionRecipe`, I'd expect that code to function roughly the same. https://github.com/llvm/llvm-project/pull/136173 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 >From 02b755091def57f5cf541ed04b7a0b8283ba267d Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 13:32:41 +0800 Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q extension --- llvm/lib/Target/RISCV/RISCVInstrInfoQ.td | 98 --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 + llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 1 + llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td| 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 1 + .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR345.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR7.td | 1 + .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td| 88 - 14 files changed, 158 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td index 51b9c806976f5..4dc33dd22b2aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td @@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt]; //===--===// let Predicates = [HasStdExtQ] in { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in - def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd), - (ins GPRMem:$rs1, simm12:$imm12), - "flq", "$rd, ${imm12}(${rs1})">; + def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs), - (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12), - "fsq", "$rs2, ${imm12}(${rs1})">; + def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; } // Predicates = [HasStdExtQ] foreach Ext = QExts in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; + let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { +defm FMADD_Q : FPFMA_rrr_frm_m; +defm FMSUB_Q : FPFMA_rrr_frm_m; +defm FNMSUB_Q : FPFMA_rrr_frm_m; +defm FNMADD_Q : FPFMA_rrr_frm_m; + } - defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { +defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; +defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + } + let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; + let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>; defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">; + Ext.PrimaryTy, "fsqrt.q">, + Sched<[WriteFSqrt128, ReadFSqrt128]>; - let mayRaiseFPException = 0 in { + let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], + mayRaiseFPException = 0 in { defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; } - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { +defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; +defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + } defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty, -Ext.PrimaryTy, "fcvt.s.q">; +Ext.PrimaryTy, "fcvt.s.q">, + Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext, - Ext.PrimaryTy, Ext.F32Ty, "fcvt.q.s">; + Ext.PrimaryTy, Ext.F32Ty, + "fcvt.q.s">, + Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty, -Ext.Pr
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 >From 6f4a034604e939cad0fa25c0b11768667c213ec6 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 15:04:28 +0800 Subject: [PATCH] [RISCV][MC] Add Q support for Zfa --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 29 ++ llvm/test/MC/RISCV/rv64zfa-only-valid.s| 19 + llvm/test/MC/RISCV/zfa-invalid.s | 13 +- llvm/test/MC/RISCV/zfa-quad-invalid.s | 42 +++ llvm/test/MC/RISCV/zfa-valid.s | 391 - 5 files changed, 484 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 8a449d32e0104..0ad654db42f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">; + +def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; +def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; + +def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; +def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, + "froundnx.q">; + +def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; +def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; +} // Predicates = [HasStdExtZfa, HasStdExtQ] + +let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { + let mayRaiseFPException = 0 in { +def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, "fmvh.x.q">; +def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; + } +} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] + //===--===// // Pseudo-instructions and codegen patterns //===--===// @@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +def : InstAlias<"fgtq.q $rd, $rs, $rt", +(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +def : InstAlias<"fgeq.q $rd, $rs, $rt", +(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +} + //===--===// // Codegen patterns //===--===// diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s new file mode 100644 index 0..95fb253b145c1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: -M no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 +# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvh.x.q a1, fs1 + +# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 +# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index c2537c3fc5102..cedc9279db3cb 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.60e+00 +
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 >From 02b755091def57f5cf541ed04b7a0b8283ba267d Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 13:32:41 +0800 Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q extension --- llvm/lib/Target/RISCV/RISCVInstrInfoQ.td | 98 --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 + llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 1 + llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td| 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 1 + .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR345.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR7.td | 1 + .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td| 88 - 14 files changed, 158 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td index 51b9c806976f5..4dc33dd22b2aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td @@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt]; //===--===// let Predicates = [HasStdExtQ] in { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in - def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd), - (ins GPRMem:$rs1, simm12:$imm12), - "flq", "$rd, ${imm12}(${rs1})">; + def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs), - (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12), - "fsq", "$rs2, ${imm12}(${rs1})">; + def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; } // Predicates = [HasStdExtQ] foreach Ext = QExts in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; + let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { +defm FMADD_Q : FPFMA_rrr_frm_m; +defm FMSUB_Q : FPFMA_rrr_frm_m; +defm FNMSUB_Q : FPFMA_rrr_frm_m; +defm FNMADD_Q : FPFMA_rrr_frm_m; + } - defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { +defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; +defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + } + let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; + let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>; defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">; + Ext.PrimaryTy, "fsqrt.q">, + Sched<[WriteFSqrt128, ReadFSqrt128]>; - let mayRaiseFPException = 0 in { + let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], + mayRaiseFPException = 0 in { defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; } - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { +defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; +defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + } defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty, -Ext.PrimaryTy, "fcvt.s.q">; +Ext.PrimaryTy, "fcvt.s.q">, + Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext, - Ext.PrimaryTy, Ext.F32Ty, "fcvt.q.s">; + Ext.PrimaryTy, Ext.F32Ty, + "fcvt.q.s">, + Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty, -Ext.Pr
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -1842,23 +1859,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, HadError = true; PP.Diag(Loc, diag::err_character_too_large); } + if (!HadError && Converter) { +assert(Kind != tok::wide_char_constant && + "Wide character translation not supported"); +char ByteChar = *tmp_out_start; +SmallString<1> ConvertedChar; +Converter->convert(StringRef(&ByteChar, 1), ConvertedChar); +assert(ConvertedChar.size() == 1 && + "Char size increased after translation"); +*tmp_out_start = ConvertedChar[0]; cor3ntin wrote: This should be handled with diagnostics. The conversion can also fail, and that should be handled. https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -1842,23 +1859,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, HadError = true; PP.Diag(Loc, diag::err_character_too_large); } + if (!HadError && Converter) { +assert(Kind != tok::wide_char_constant && + "Wide character translation not supported"); +char ByteChar = *tmp_out_start; +SmallString<1> ConvertedChar; +Converter->convert(StringRef(&ByteChar, 1), ConvertedChar); cor3ntin wrote: Here the order of operation should be: -> convert from UTF-8 to UTF-32, check it's a valid character -> convert the same buffer from UTF-8 to the literal encoding -> Check that that succeed and has a size of one (ie some codepoints might hgave a size of 2 when encoded as utf-8 but 1 when encoded as latin1) https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -146,6 +144,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, // that would have been \", which would not have been the end of string. unsigned ResultChar = *ThisTokBuf++; char Escape = ResultChar; + bool Translate = true; cor3ntin wrote: ```suggestion bool Transcode = true; ``` I would prefer this defaults to false https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -416,8 +416,7 @@ Builtin Macros ``__clang_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of narrow string literals, e.g., ``"hello"``. This macro typically expands to - "UTF-8" (but may change in the future if the - ``-fexec-charset="Encoding-Name"`` option is implemented.) + the charset specified by -fexec-charset if specified, or the system charset. cor3ntin wrote: ```suggestion the text encoding specified by -fexec-charset if specified, or the system charset. ``` https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -246,18 +249,19 @@ class StringLiteralParser { StringLiteralEvalMethod EvalMethod; public: - StringLiteralParser(ArrayRef StringToks, Preprocessor &PP, - StringLiteralEvalMethod StringMethod = - StringLiteralEvalMethod::Evaluated); + StringLiteralParser( + ArrayRef StringToks, Preprocessor &PP, + StringLiteralEvalMethod StringMethod = StringLiteralEvalMethod::Evaluated, + ConversionAction Action = ToExecCharset); cor3ntin wrote: Why do we need Conversion at all. I would expect that any ordinary, non-unevaluated literal would be encoded and the `LiteralConverter` should be the same for all strings so it can live in `Preprocessor` https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -0,0 +1,36 @@ +//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CLANG_LEX_LITERALCONVERTER_H +#define LLVM_CLANG_LEX_LITERALCONVERTER_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CharSet.h" + +enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset }; + +class LiteralConverter { + llvm::StringRef InternalCharset; + llvm::StringRef SystemCharset; + llvm::StringRef ExecCharset; + llvm::StringMap CharsetConverters; + +public: + llvm::CharSetConverter *getConverter(const char *Codepage); + llvm::CharSetConverter *getConverter(ConversionAction Action); + llvm::CharSetConverter *createAndInsertCharConverter(const char *To); + void setConvertersFromOptions(const clang::LangOptions &Opts, +const clang::TargetInfo &TInfo, +clang::DiagnosticsEngine &Diags); cor3ntin wrote: I would prefer, for example a static fuction that returns an optional or a null pointer failure, and let the caller call deal with error https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -0,0 +1,36 @@ +//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CLANG_LEX_LITERALCONVERTER_H +#define LLVM_CLANG_LEX_LITERALCONVERTER_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CharSet.h" + +enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset }; + +class LiteralConverter { + llvm::StringRef InternalCharset; + llvm::StringRef SystemCharset; + llvm::StringRef ExecCharset; + llvm::StringMap CharsetConverters; cor3ntin wrote: Why do we need a Map? I would expect the Preprocessor to have - A converter from UTF-8 - A converter to UTF-8 We might have 2 additional in the future for wide strings, and storing 4 pointers seems fine. We could also have a small class that stores a pair of converters ( A -> B and B -> A) https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -491,6 +491,9 @@ class Triple { /// For example, "fooos1.2.3" would return "1.2.3". StringRef getEnvironmentVersionString() const; + /// getSystemCharset - Get the system charset of the triple. + StringRef getSystemCharset() const; + cor3ntin wrote: Lets call that `DefaultTextEnding" (on most platforms we ignore whatever the system does by default) https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -367,6 +370,15 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, HadError = true; } + if (Translate && Converter) { +// Invalid escapes are written as '?' and then translated. +char ByteChar = Invalid ? '?' : ResultChar; +SmallString<8> ResultCharConv; +Converter->convert(StringRef(&ByteChar, 1), ResultCharConv); +assert(ResultCharConv.size() == 1 && + "Char size increased after translation"); cor3ntin wrote: Can we have a `GetReplacementChar` function... somewhere, and cache the result? https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -0,0 +1,36 @@ +//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CLANG_LEX_LITERALCONVERTER_H +#define LLVM_CLANG_LEX_LITERALCONVERTER_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CharSet.h" + +enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset }; cor3ntin wrote: We should have `FromOrdinaryLiteralEncoding` and `ToOrdinaryLiteralEncoding` instead of `System`/Exec https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
@@ -633,6 +633,9 @@ class LangOptions : public LangOptionsBase { bool AtomicFineGrainedMemory = false; bool AtomicIgnoreDenormalMode = false; + /// Name of the exec charset to convert the internal charset to. + std::string ExecCharset; cor3ntin wrote: Lets call that a TextEncoding consistently (replacing all instances of Codepage and Charset) https://github.com/llvm/llvm-project/pull/138895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139508 >From 537ccab69c5d426109d9c9948f55c532e83b0ecf Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 15:04:28 +0800 Subject: [PATCH] [RISCV][MC] Add Q support for Zfa --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 29 ++ llvm/test/MC/RISCV/rv64zfa-only-valid.s| 19 + llvm/test/MC/RISCV/zfa-invalid.s | 13 +- llvm/test/MC/RISCV/zfa-quad-invalid.s | 42 +++ llvm/test/MC/RISCV/zfa-valid.s | 391 - 5 files changed, 484 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv64zfa-only-valid.s create mode 100644 llvm/test/MC/RISCV/zfa-quad-invalid.s diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 8a449d32e0104..0ad654db42f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,6 +175,28 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FLI_Q : FPFLI_r<0b011, 0b1, 0b000, FPR128, "fli.q">; + +def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; +def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; + +def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; +def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, + "froundnx.q">; + +def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; +def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; +} // Predicates = [HasStdExtZfa, HasStdExtQ] + +let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { + let mayRaiseFPException = 0 in { +def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b1, 0b000, GPR, FPR128, "fmvh.x.q">; +def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; + } +} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] + //===--===// // Pseudo-instructions and codegen patterns //===--===// @@ -200,6 +222,13 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } +let Predicates = [HasStdExtZfa, HasStdExtQ] in { +def : InstAlias<"fgtq.q $rd, $rs, $rt", +(FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +def : InstAlias<"fgeq.q $rd, $rs, $rt", +(FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; +} + //===--===// // Codegen patterns //===--===// diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s new file mode 100644 index 0..95fb253b145c1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfa-only-valid.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: -M no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 +# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvh.x.q a1, fs1 + +# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 +# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] +# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} +fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index c2537c3fc5102..cedc9279db3cb 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,6 +35,10 @@ fli.d ft1, 3.56e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.60e+00 +
[llvm-branch-commits] [llvm] [RISCV][Scheduler] Add scheduler definitions for the Q extension (PR #139495)
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/139495 >From 5c454f3091822039e98bcff0693db1e7a5205351 Mon Sep 17 00:00:00 2001 From: Iris Shi <0...@owo.li> Date: Mon, 12 May 2025 13:32:41 +0800 Subject: [PATCH] [RISCV][Scheduler] Add scheduler definitions for the Q extension --- llvm/lib/Target/RISCV/RISCVInstrInfoQ.td | 98 --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 + llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 1 + llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td| 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 1 + .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR345.td | 1 + .../Target/RISCV/RISCVSchedSyntacoreSCR7.td | 1 + .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td| 88 - 14 files changed, 158 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td index aa7dcb789a8c2..8cc965ccc515d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td @@ -25,95 +25,119 @@ defvar QExtsRV64 = [QExt]; //===--===// let Predicates = [HasStdExtQ] in { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in - def FLQ : RVInstI<0b100, OPC_LOAD_FP, (outs FPR128:$rd), - (ins GPRMem:$rs1, simm12:$imm12), - "flq", "$rd, ${imm12}(${rs1})">; + def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in - def FSQ : RVInstS<0b100, OPC_STORE_FP, (outs), - (ins FPR128:$rs2, GPRMem:$rs1, simm12:$imm12), - "fsq", "$rs2, ${imm12}(${rs1})">; + def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; } // Predicates = [HasStdExtQ] foreach Ext = QExts in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; + let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { +defm FMADD_Q : FPFMA_rrr_frm_m; +defm FMSUB_Q : FPFMA_rrr_frm_m; +defm FNMSUB_Q : FPFMA_rrr_frm_m; +defm FNMADD_Q : FPFMA_rrr_frm_m; + } - defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { +defm FADD_Q : FPALU_rr_frm_m<0b011, "fadd.q", Ext>; +defm FSUB_Q : FPALU_rr_frm_m<0b111, "fsub.q", Ext>; + } + let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; + let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in defm FDIV_Q : FPALU_rr_frm_m<0b000, "fdiv.q", Ext>; defm FSQRT_Q : FPUnaryOp_r_frm_m<0b010, 0b0, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">; + Ext.PrimaryTy, "fsqrt.q">, + Sched<[WriteFSqrt128, ReadFSqrt128]>; - let mayRaiseFPException = 0 in { + let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], + mayRaiseFPException = 0 in { defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; } - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { +defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; +defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; + } defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b010, 0b00011, Ext, Ext.F32Ty, -Ext.PrimaryTy, "fcvt.s.q">; +Ext.PrimaryTy, "fcvt.s.q">, + Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b0, Ext, - Ext.PrimaryTy, Ext.F32Ty, "fcvt.q.s">; + Ext.PrimaryTy, Ext.F32Ty, + "fcvt.q.s">, + Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b011, 0b00011, Ext, Ext.F64Ty, -Ext.Pr
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #139516)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/139516 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
@@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> RKSimon wrote: Next thing is to add SSE/AVX handling - I've added better test coverage at d27d0c7a5266f89f9d62464e71be98421aae598d https://github.com/llvm/llvm-project/pull/138635 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/139531 By the pseudocode in the ISA manual, if any input is a nan it acts like min3, which will fold to min2 of the other operands. The other cases fold to min, I'm not sure how this one was wrong. >From 069254f8608ac85c821f214ce61432000701022c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 12 May 2025 12:25:45 +0200 Subject: [PATCH] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold By the pseudocode in the ISA manual, if any input is a nan it acts like min3, which will fold to min2 of the other operands. The other cases fold to min, I'm not sure how this one was wrong. --- .../Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +- llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll | 16 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 1494428cb2bf5..1ca300464a697 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -867,7 +867,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } else if (match(Src1, PatternMatch::m_NaN()) || isa(Src1)) { V = IC.Builder.CreateMinNum(Src0, Src2); } else if (match(Src2, PatternMatch::m_NaN()) || isa(Src2)) { - V = IC.Builder.CreateMaxNum(Src0, Src1); + V = IC.Builder.CreateMinNum(Src0, Src1); } if (V) { diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll index bf94637b36a34..972862d8e327e 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll @@ -117,7 +117,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) #1 { define float @fmed3_x_y_undef_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_x_y_undef_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef) @@ -147,7 +147,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 { define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8) @@ -276,7 +276,7 @@ define float @fmed3_0_nan_1_f32() #1 { define float @fmed3_0_1_nan_f32() #1 { ; CHECK-LABEL: define float @fmed3_0_1_nan_f32( ; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT:ret float 1.00e+00 +; CHECK-NEXT:ret float 0.00e+00 ; %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF80010) ret float %med @@ -303,7 +303,7 @@ define float @fmed3_0_undef_1_f32() #1 { define float @fmed3_0_1_undef_f32() #1 { ; CHECK-LABEL: define float @fmed3_0_1_undef_f32( ; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT:ret float 1.00e+00 +; CHECK-NEXT:ret float 0.00e+00 ; %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef) ret float %med @@ -359,7 +359,7 @@ define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 { define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_x_y_snan1_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4) @@ -414,7 +414,7 @@ define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 { define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:ret float 3.00e+00 +; CHECK-NEXT:ret float -2.00e+00 ; %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4) ret float %med3 @@ -447,7 +447,7 @@ define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) { ; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]
[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/139530 None >From 012d451378314c9633c3a38891fca23c027e54b5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 12 May 2025 10:42:16 +0200 Subject: [PATCH] AMDGPU: Disable most fmed3 folds for strictfp --- .../lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 3 +++ llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll | 12 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index e76396f6ffbb0..1494428cb2bf5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -855,6 +855,9 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return IC.replaceInstUsesWith(II, Src); } +if (II.isStrictFP()) + break; + // Checking for NaN before canonicalization provides better fidelity when // mapping other operations onto fmed3 since the order of operands is // unchanged. diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll index 5274ac1093a26..bf94637b36a34 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll @@ -494,7 +494,7 @@ define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) # define float @fmed3_qnan0_x_y_f32_strictfp(float %x, float %y) #2 { ; CHECK-LABEL: define float @fmed3_qnan0_x_y_f32_strictfp( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8, float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8, float %x, float %y) strictfp @@ -504,7 +504,7 @@ define float @fmed3_qnan0_x_y_f32_strictfp(float %x, float %y) #2 { define float @fmed3_x_qnan0_y_f32_strictfp(float %x, float %y) #2 { ; CHECK-LABEL: define float @fmed3_x_qnan0_y_f32_strictfp( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X]], float 0x7FF8, float [[Y]]) #[[ATTR5]] ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8, float %y) strictfp @@ -514,7 +514,7 @@ define float @fmed3_x_qnan0_y_f32_strictfp(float %x, float %y) #2 { define float @fmed3_x_y_qnan0_f32_strictfp(float %x, float %y) #2 { ; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32_strictfp( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X]], float [[Y]], float 0x7FF8) #[[ATTR5]] ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8) strictfp @@ -524,7 +524,7 @@ define float @fmed3_x_y_qnan0_f32_strictfp(float %x, float %y) #2 { define float @fmed3_snan1_x_y_f32_strictfp(float %x, float %y) #2 { ; CHECK-LABEL: define float @fmed3_snan1_x_y_f32_strictfp( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4, float [[X]], float [[Y]]) #[[ATTR5]] ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4, float %x, float %y) strictfp @@ -534,7 +534,7 @@ define float @fmed3_snan1_x_y_f32_strictfp(float %x, float %y) #2 { define float @fmed3_x_snan1_y_f32_strictfp(float %x, float %y) #2 { ; CHECK-LABEL: define float @fmed3_x_snan1_y_f32_strictfp( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X]], float 0x7FF4, float [[Y]]) #[[ATTR5]] ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4, float %y) strictfp @@ -544,7 +544,7 @@ define float @fmed3_x_snan1_y_f32_strictfp(float %x, float %y) #2 { define float @fmed3_x_y_snan1_f32_strictfp(float %x, float %y) #2 { ; CHECK-LABEL: define float @fmed3_x_y_snan1_f32_strictfp( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +;
[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/139530 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes By the pseudocode in the ISA manual, if any input is a nan it acts like min3, which will fold to min2 of the other operands. The other cases fold to min, I'm not sure how this one was wrong. --- Full diff: https://github.com/llvm/llvm-project/pull/139531.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+1-1) - (modified) llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll (+8-8) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 1494428cb2bf5..1ca300464a697 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -867,7 +867,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } else if (match(Src1, PatternMatch::m_NaN()) || isa(Src1)) { V = IC.Builder.CreateMinNum(Src0, Src2); } else if (match(Src2, PatternMatch::m_NaN()) || isa(Src2)) { - V = IC.Builder.CreateMaxNum(Src0, Src1); + V = IC.Builder.CreateMinNum(Src0, Src1); } if (V) { diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll index bf94637b36a34..972862d8e327e 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll @@ -117,7 +117,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) #1 { define float @fmed3_x_y_undef_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_x_y_undef_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef) @@ -147,7 +147,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 { define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8) @@ -276,7 +276,7 @@ define float @fmed3_0_nan_1_f32() #1 { define float @fmed3_0_1_nan_f32() #1 { ; CHECK-LABEL: define float @fmed3_0_1_nan_f32( ; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT:ret float 1.00e+00 +; CHECK-NEXT:ret float 0.00e+00 ; %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF80010) ret float %med @@ -303,7 +303,7 @@ define float @fmed3_0_undef_1_f32() #1 { define float @fmed3_0_1_undef_f32() #1 { ; CHECK-LABEL: define float @fmed3_0_1_undef_f32( ; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT:ret float 1.00e+00 +; CHECK-NEXT:ret float 0.00e+00 ; %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef) ret float %med @@ -359,7 +359,7 @@ define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 { define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_x_y_snan1_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4) @@ -414,7 +414,7 @@ define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 { define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 { ; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:ret float 3.00e+00 +; CHECK-NEXT:ret float -2.00e+00 ; %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4) ret float %med3 @@ -447,7 +447,7 @@ define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) { ; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32( ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { -; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT:[[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) ; CHECK-NEXT:ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4
[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/139531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/139530?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#139531** https://app.graphite.dev/github/pr/llvm/llvm-project/139531?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#139530** https://app.graphite.dev/github/pr/llvm/llvm-project/139530?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/139530?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#139529** https://app.graphite.dev/github/pr/llvm/llvm-project/139529?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/139530 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ObjC] Support objc_claimAutoreleasedReturnValue (PR #138696)
https://github.com/jroelofs approved this pull request. https://github.com/llvm/llvm-project/pull/138696 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] Refactoring DXContainerYaml Root Parameter representation (PR #138318)
https://github.com/joaosaffran edited https://github.com/llvm/llvm-project/pull/138318 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)
@@ -0,0 +1,200 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify=expected,default %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config inline-functions-with-ambiguous-loops=true -verify=expected,enabled %s + +// This file tests some heuristics in the engine that put functions on a +// "do not inline" list if their analyisis reaches the `analyzer-max-loop` +// limit (by default 4 iterations) in a loop. This was almost surely intended +// as memoization optimization for the "retry without inlining" fallback (if we +// had to retry once, next time don't even try inlining), but aggressively +// oversteps the "natural" scope: reaching 4 iterations on _one particular_ +// execution path does not imply that each path would need "retry without +// inlining" especially if a different call receives different arguments. +// +// This heuristic significantly affects the scope/depth of the analysis (and +// therefore the execution time) because without this limitation on the +// inlining significantly more entry points would be able to exhaust their +// `max-nodes` quota. (Trivial thin wrappers around big complex functions are +// common in many projects.) +// +// Unfortunately, this arbitrary heuristic strongly relies on the current loop +// handling model and its many limitations, so improvements in loop handling +// can cause surprising slowdowns by reducing the "do not inline" blacklist. +// In the tests "FIXME-BUT-NEEDED" comments mark "problematic" (aka buggy) +// analyzer behavior which cannot be fixed without also improving the +// heuristics for (not) inlining large functions. + + int getNum(void); // Get an unknown symbolic number. NagyDonat wrote: ```suggestion int getNum(void); // Get an unknown symbolic number. ``` Oops, my bad there are two superfluous spaces before this function declaration (they are also there in the commit that's merged on the main branch). Let's delete them at least in this backport. https://github.com/llvm/llvm-project/pull/139597 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)
https://github.com/NagyDonat commented: I read this rebased code and IMO it should work -- however I found a whitespace error that originated in my commit :sweat_smile: https://github.com/llvm/llvm-project/pull/139597 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV][MC] Add Q support for Zfa (PR #139508)
https://github.com/topperc approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/139508 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)
https://github.com/NagyDonat edited https://github.com/llvm/llvm-project/pull/139597 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (PR #139531)
arsenm wrote: ### Merge activity * **May 12, 2:11 PM EDT**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/139531). https://github.com/llvm/llvm-project/pull/139531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Disable most fmed3 folds for strictfp (PR #139530)
arsenm wrote: ### Merge activity * **May 12, 2:11 PM EDT**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/139530). https://github.com/llvm/llvm-project/pull/139530 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GISelValueTracking] Use representation size for G_PTRTOINT src width (PR #139608)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Alexander Richardson (arichardson) Changes While we can only reason about the index/address, the G_PTRTOINT operations returns all representation bits, so we can't assume the remaining ones are all zeroes. This behaviour was clarified as part of the discussion in https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54. The LangRef semantics of ptrtoint being a full representation bitcast were documented in https://github.com/llvm/llvm-project/pull/139349. Fixes: https://github.com/llvm/llvm-project/issues/139598 --- Full diff: https://github.com/llvm/llvm-project/pull/139608.diff 2 Files Affected: - (modified) llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (+3-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll (+14-27) ``diff diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 12fe28b29e5c8..b7e0a43f2fb64 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -483,8 +483,10 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, if (Opcode == TargetOpcode::G_ASSERT_ZEXT) SrcBitWidth = MI.getOperand(2).getImm(); else { + // For G_PTRTOINT all representation bits are returned even though only + // the address bits can be reasoned about generically. SrcBitWidth = SrcTy.isPointer() -? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) +? DL.getPointerSizeInBits(SrcTy.getAddressSpace()) : SrcTy.getSizeInBits(); } assert(SrcBitWidth && "SrcBitWidth can't be zero"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll index 6722a55e8da92..d762d7728df36 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll @@ -79,9 +79,9 @@ define <2 x i64> @ptrtoaddr_vec(ptr addrspace(8) %ignored, <2 x ptr addrspace(8) ; GISEL: ; %bb.0: ; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT:v_mov_b32_e32 v0, v4 -; GISEL-NEXT:v_mov_b32_e32 v1, v5 +; GISEL-NEXT:v_and_b32_e32 v1, 0x, v5 +; GISEL-NEXT:v_and_b32_e32 v3, 0x, v9 ; GISEL-NEXT:v_mov_b32_e32 v2, v8 -; GISEL-NEXT:v_mov_b32_e32 v3, v9 ; GISEL-NEXT:s_setpc_b64 s[30:31] ; ; SDAG-LABEL: ptrtoaddr_vec: @@ -129,31 +129,18 @@ define i256 @ptrtoint_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { ;; FIXME: this is wrong for the GlobalISel case, we are removing the trunc: ;; https://github.com/llvm/llvm-project/issues/139598 define i256 @ptrtoaddr_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { -; GISEL-LABEL: ptrtoaddr_ext: -; GISEL: ; %bb.0: -; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT:v_mov_b32_e32 v0, v4 -; GISEL-NEXT:v_mov_b32_e32 v1, v5 -; GISEL-NEXT:v_mov_b32_e32 v2, v6 -; GISEL-NEXT:v_mov_b32_e32 v3, v7 -; GISEL-NEXT:v_mov_b32_e32 v4, 0 -; GISEL-NEXT:v_mov_b32_e32 v5, 0 -; GISEL-NEXT:v_mov_b32_e32 v6, 0 -; GISEL-NEXT:v_mov_b32_e32 v7, 0 -; GISEL-NEXT:s_setpc_b64 s[30:31] -; -; SDAG-LABEL: ptrtoaddr_ext: -; SDAG: ; %bb.0: -; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT:v_mov_b32_e32 v0, v4 -; SDAG-NEXT:v_and_b32_e32 v1, 0x, v5 -; SDAG-NEXT:v_mov_b32_e32 v2, 0 -; SDAG-NEXT:v_mov_b32_e32 v3, 0 -; SDAG-NEXT:v_mov_b32_e32 v4, 0 -; SDAG-NEXT:v_mov_b32_e32 v5, 0 -; SDAG-NEXT:v_mov_b32_e32 v6, 0 -; SDAG-NEXT:v_mov_b32_e32 v7, 0 -; SDAG-NEXT:s_setpc_b64 s[30:31] +; CHECK-LABEL: ptrtoaddr_ext: +; CHECK: ; %bb.0: +; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT:v_mov_b32_e32 v0, v4 +; CHECK-NEXT:v_and_b32_e32 v1, 0x, v5 +; CHECK-NEXT:v_mov_b32_e32 v2, 0 +; CHECK-NEXT:v_mov_b32_e32 v3, 0 +; CHECK-NEXT:v_mov_b32_e32 v4, 0 +; CHECK-NEXT:v_mov_b32_e32 v5, 0 +; CHECK-NEXT:v_mov_b32_e32 v6, 0 +; CHECK-NEXT:v_mov_b32_e32 v7, 0 +; CHECK-NEXT:s_setpc_b64 s[30:31] %ret = ptrtoaddr ptr addrspace(8) %ptr to i256 ret i256 %ret } `` https://github.com/llvm/llvm-project/pull/139608 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GISelValueTracking] Use representation size for G_PTRTOINT src width (PR #139608)
https://github.com/arichardson created https://github.com/llvm/llvm-project/pull/139608 While we can only reason about the index/address, the G_PTRTOINT operations returns all representation bits, so we can't assume the remaining ones are all zeroes. This behaviour was clarified as part of the discussion in https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54. The LangRef semantics of ptrtoint being a full representation bitcast were documented in https://github.com/llvm/llvm-project/pull/139349. Fixes: https://github.com/llvm/llvm-project/issues/139598 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GISelValueTracking] Use representation size for G_PTRTOINT src width (PR #139608)
arichardson wrote: Now that we use the full bitwidth the high KnownBits are no longer zext'ed to zeroes. But maybe the better approahc would be to just do KnownBits on the address bits and set the high bits to unknown? That should fix the issue as well? https://github.com/llvm/llvm-project/pull/139608 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)
llvmbot wrote: @llvm/pr-subscribers-clang-static-analyzer-1 Author: Balazs Benics (steakhal) Changes Recently some users reported that they observed large increases of runtime (up to +600% on some translation units) when they upgraded to a more recent (slightly patched, internal) clang version. Bisection revealed that the bulk of this increase was probably caused by my earlier commit bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849 ("Don't assume third iteration in loops"). As I evaluated that earlier commit on several open source project, it turns out that on average it's runtime-neutral (or slightly helpful: it reduced the total analysis time by 1.5%) but it can cause runtime spikes on some code: in particular it more than doubled the time to analyze `tmux` (one of the smaller test projects). Further profiling and investigation proved that these spikes were caused by an _increase of analysis scope_ because there was an heuristic that placed functions on a "don't inline this" blacklist if they reached the `-analyzer-max-loop` limit (anywhere, on any one execution path) -- which became significantly rarer when my commit ensured the analyzer no longer "just assumes" four iterations. (With more inlining significantly more entry points use up their allocated budgets, which leads to the increased runtime.) I feel that this heuristic for the "don't inline" blacklist is unjustified and arbitrary, because reaching the "retry without inlining" limit on one path does not imply that inlining the function won't be valuable on other paths -- so I hope that we can eventually replace it with more "natural" limits of the analysis scope. However, the runtime increases are annoying for the users whose project is affected, so I created this quick workaround commit that approximates the "don't inline" blacklist effects of ambiguous loops (where the analyzer doesn't understand the loop condition) without fully reverting the "Don't assume third iteration" commit (to avoid reintroducing the false positives that were eliminated by it). Investigating this issue was a team effort: I'm grateful to Endre Fülöp (gamesh411) who did the bisection and shared his time measurement setup, and Gábor Tóthvári (tigbr) who helped me in profiling. (cherry picked from commit 9600a12f0de233324b559f60997b9c2db153fede) --- Full diff: https://github.com/llvm/llvm-project/pull/139597.diff 6 Files Affected: - (modified) clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def (+13) - (modified) clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h (-4) - (modified) clang/lib/StaticAnalyzer/Core/ExprEngine.cpp (+49-11) - (modified) clang/test/Analysis/analyzer-config.c (+1) - (added) clang/test/Analysis/loop-based-inlining-prevention.c (+200) - (modified) clang/test/Analysis/loop-unrolling.cpp (+23-7) ``diff diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 34bb7a809162b..dbb8e832db5ff 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -385,6 +385,19 @@ ANALYZER_OPTION( "flex\" won't be analyzed.", true) +ANALYZER_OPTION( +bool, InlineFunctionsWithAmbiguousLoops, "inline-functions-with-ambiguous-loops", +"If disabled (the default), the analyzer puts functions on a \"do not " +"inline this\" list if it finds an execution path within that function " +"that may potentially perform 'analyzer-max-loop' (= 4 by default) " +"iterations in a loop. (Note that functions that _definitely_ reach the " +"loop limit on some execution path are currently marked as \"do not " +"inline\" even if this option is enabled.) Enabling this option " +"eliminates this (somewhat arbitrary) restriction from the analysis " +"scope, which increases the analysis runtime (on average by ~10%, but " +"a few translation units may see much larger slowdowns).", +false) + //===--===// // Unsigned analyzer options. //===--===// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h index 3ee0d229cfc29..761395260a0cf 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h @@ -81,10 +81,6 @@ class FunctionSummariesTy { I->second.MayInline = 0; } - void markReachedMaxBlockCount(const Decl *D) { -markShouldNotInline(D); - } - std::optional mayInline(const Decl *D) { MapTy::const_iterator I = Map.find(D); if (I != Map.end() && I->second.InlineChecked) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b
[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)
https://github.com/steakhal milestoned https://github.com/llvm/llvm-project/pull/139597 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) (PR #139597)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/139597 Recently some users reported that they observed large increases of runtime (up to +600% on some translation units) when they upgraded to a more recent (slightly patched, internal) clang version. Bisection revealed that the bulk of this increase was probably caused by my earlier commit bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849 ("Don't assume third iteration in loops"). As I evaluated that earlier commit on several open source project, it turns out that on average it's runtime-neutral (or slightly helpful: it reduced the total analysis time by 1.5%) but it can cause runtime spikes on some code: in particular it more than doubled the time to analyze `tmux` (one of the smaller test projects). Further profiling and investigation proved that these spikes were caused by an _increase of analysis scope_ because there was an heuristic that placed functions on a "don't inline this" blacklist if they reached the `-analyzer-max-loop` limit (anywhere, on any one execution path) -- which became significantly rarer when my commit ensured the analyzer no longer "just assumes" four iterations. (With more inlining significantly more entry points use up their allocated budgets, which leads to the increased runtime.) I feel that this heuristic for the "don't inline" blacklist is unjustified and arbitrary, because reaching the "retry without inlining" limit on one path does not imply that inlining the function won't be valuable on other paths -- so I hope that we can eventually replace it with more "natural" limits of the analysis scope. However, the runtime increases are annoying for the users whose project is affected, so I created this quick workaround commit that approximates the "don't inline" blacklist effects of ambiguous loops (where the analyzer doesn't understand the loop condition) without fully reverting the "Don't assume third iteration" commit (to avoid reintroducing the false positives that were eliminated by it). Investigating this issue was a team effort: I'm grateful to Endre Fülöp (gamesh411) who did the bisection and shared his time measurement setup, and Gábor Tóthvári (tigbr) who helped me in profiling. (cherry picked from commit 9600a12f0de233324b559f60997b9c2db153fede) Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [IRTranslator] Handle ptrtoaddr (PR #139601)
llvmbot wrote: @llvm/pr-subscribers-llvm-globalisel Author: Alexander Richardson (arichardson) Changes We lower ptrtoaddr by emitting a G_PTRTOINT, truncating that to the address size and then truncate/zext to the final integer type. This has exposed an issue in the GlobalIsel postlegalizer combines where the truncate is incorrectly being removed. See https://github.com/llvm/llvm-project/issues/139598 --- Full diff: https://github.com/llvm/llvm-project/pull/139601.diff 3 Files Affected: - (modified) llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h (+1-3) - (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+20) - (added) llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll (+187) ``diff diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index fcdc733d92c7f..41d03c9fb3ed5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -486,9 +486,7 @@ class IRTranslator : public MachineFunctionPass { bool translatePtrToInt(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_PTRTOINT, U, MIRBuilder); } - bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder) { -return translatePtrToInt(U, MIRBuilder); - } + bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder); bool translateTrunc(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_TRUNC, U, MIRBuilder); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8ab2533afc15f..5666c9e9f45bc 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1583,6 +1583,26 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U, return true; } +bool IRTranslator::translatePtrToAddr(const User &U, + MachineIRBuilder &MIRBuilder) { + if (containsBF16Type(U)) +return false; + + uint32_t Flags = 0; + if (const Instruction *I = dyn_cast(&U)) +Flags = MachineInstr::copyFlagsFromInstruction(*I); + + Register Op = getOrCreateVReg(*U.getOperand(0)); + Type *PtrTy = U.getOperand(0)->getType(); + LLT AddrTy = getLLTForType(*DL->getIndexType(PtrTy), *DL); + auto IntPtrTy = getLLTForType(*DL->getIntPtrType(PtrTy), *DL); + auto PtrToInt = MIRBuilder.buildPtrToInt(IntPtrTy, Op); + PtrToInt->setFlags(Flags); + auto Addr = MIRBuilder.buildTrunc(AddrTy, PtrToInt.getReg(0)); + MIRBuilder.buildZExtOrTrunc(getOrCreateVReg(U), Addr.getReg(0)); + return true; +} + bool IRTranslator::translateGetElementPtr(const User &U, MachineIRBuilder &MIRBuilder) { Value &Op0 = *U.getOperand(0); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll new file mode 100644 index 0..30f9dbfcaacf8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -global-isel -verify-machineinstrs --print-changed --debug < %s | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SDAG +;; Check that we can lower ptrtoaddr differently from ptrtoint. +;; Includes an ignored argument so the registers actually need to be written + +define i128 @ptrtoint(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoint: +; GISEL: ; %bb.0: +; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT:v_mov_b32_e32 v0, v4 +; GISEL-NEXT:v_mov_b32_e32 v1, v5 +; GISEL-NEXT:v_mov_b32_e32 v2, v6 +; GISEL-NEXT:v_mov_b32_e32 v3, v7 +; GISEL-NEXT:s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoint: +; SDAG: ; %bb.0: +; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT:v_mov_b32_e32 v3, v7 +; SDAG-NEXT:v_mov_b32_e32 v2, v6 +; SDAG-NEXT:v_mov_b32_e32 v1, v5 +; SDAG-NEXT:v_mov_b32_e32 v0, v4 +; SDAG-NEXT:s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i128 + ret i128 %ret +} + +define i48 @ptrtoaddr(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoaddr: +; GISEL: ; %bb.0: +; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT:v_mov_b32_e32 v0, v4 +; GISEL-NEXT:v_mov_b32_e32 v1, v5 +; GISEL-NEXT:s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoaddr: +; SDAG: ; %bb.0: +; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT:v_mov_b32_e32 v1, v5 +; SDAG-NEXT:v_mov_b32_e32 v0, v4 +; SDAG-NEXT:s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i48 + ret i48 %ret +} + +define <
[llvm-branch-commits] [IRTranslator] Handle ptrtoaddr (PR #139601)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Alexander Richardson (arichardson) Changes We lower ptrtoaddr by emitting a G_PTRTOINT, truncating that to the address size and then truncate/zext to the final integer type. This has exposed an issue in the GlobalIsel postlegalizer combines where the truncate is incorrectly being removed. See https://github.com/llvm/llvm-project/issues/139598 --- Full diff: https://github.com/llvm/llvm-project/pull/139601.diff 3 Files Affected: - (modified) llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h (+1-3) - (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+20) - (added) llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll (+187) ``diff diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index fcdc733d92c7f..41d03c9fb3ed5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -486,9 +486,7 @@ class IRTranslator : public MachineFunctionPass { bool translatePtrToInt(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_PTRTOINT, U, MIRBuilder); } - bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder) { -return translatePtrToInt(U, MIRBuilder); - } + bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder); bool translateTrunc(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_TRUNC, U, MIRBuilder); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8ab2533afc15f..5666c9e9f45bc 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1583,6 +1583,26 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U, return true; } +bool IRTranslator::translatePtrToAddr(const User &U, + MachineIRBuilder &MIRBuilder) { + if (containsBF16Type(U)) +return false; + + uint32_t Flags = 0; + if (const Instruction *I = dyn_cast(&U)) +Flags = MachineInstr::copyFlagsFromInstruction(*I); + + Register Op = getOrCreateVReg(*U.getOperand(0)); + Type *PtrTy = U.getOperand(0)->getType(); + LLT AddrTy = getLLTForType(*DL->getIndexType(PtrTy), *DL); + auto IntPtrTy = getLLTForType(*DL->getIntPtrType(PtrTy), *DL); + auto PtrToInt = MIRBuilder.buildPtrToInt(IntPtrTy, Op); + PtrToInt->setFlags(Flags); + auto Addr = MIRBuilder.buildTrunc(AddrTy, PtrToInt.getReg(0)); + MIRBuilder.buildZExtOrTrunc(getOrCreateVReg(U), Addr.getReg(0)); + return true; +} + bool IRTranslator::translateGetElementPtr(const User &U, MachineIRBuilder &MIRBuilder) { Value &Op0 = *U.getOperand(0); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll new file mode 100644 index 0..30f9dbfcaacf8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -global-isel -verify-machineinstrs --print-changed --debug < %s | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SDAG +;; Check that we can lower ptrtoaddr differently from ptrtoint. +;; Includes an ignored argument so the registers actually need to be written + +define i128 @ptrtoint(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoint: +; GISEL: ; %bb.0: +; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT:v_mov_b32_e32 v0, v4 +; GISEL-NEXT:v_mov_b32_e32 v1, v5 +; GISEL-NEXT:v_mov_b32_e32 v2, v6 +; GISEL-NEXT:v_mov_b32_e32 v3, v7 +; GISEL-NEXT:s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoint: +; SDAG: ; %bb.0: +; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT:v_mov_b32_e32 v3, v7 +; SDAG-NEXT:v_mov_b32_e32 v2, v6 +; SDAG-NEXT:v_mov_b32_e32 v1, v5 +; SDAG-NEXT:v_mov_b32_e32 v0, v4 +; SDAG-NEXT:s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i128 + ret i128 %ret +} + +define i48 @ptrtoaddr(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoaddr: +; GISEL: ; %bb.0: +; GISEL-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT:v_mov_b32_e32 v0, v4 +; GISEL-NEXT:v_mov_b32_e32 v1, v5 +; GISEL-NEXT:s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoaddr: +; SDAG: ; %bb.0: +; SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT:v_mov_b32_e32 v1, v5 +; SDAG-NEXT:v_mov_b32_e32 v0, v4 +; SDAG-NEXT:s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i48 + ret i48 %ret +} + +define <2
[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
@@ -1952,6 +1953,22 @@ PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) { return {nullptr, nullptr}; } +PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) { + Value *Ptr = PA.getPointerOperand(); + if (!isSplitFatPtr(Ptr->getType())) +return {nullptr, nullptr}; + IRB.SetInsertPoint(&PA); + + auto [Rsrc, Off] = getPtrParts(Ptr); + Value *Res = IRB.CreateIntCast(Off, PA.getType(), /*isSigned=*/false, + PA.getName() + ".off"); krzysz00 wrote: We could probably leave the name off given that we'll be `takeName()`ing a moment later https://github.com/llvm/llvm-project/pull/139413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
https://github.com/krzysz00 edited https://github.com/llvm/llvm-project/pull/139413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
https://github.com/krzysz00 approved this pull request. One tiny nit, lgtm otherwise https://github.com/llvm/llvm-project/pull/139413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [IRTranslator] Handle ptrtoaddr (PR #139601)
https://github.com/arichardson created https://github.com/llvm/llvm-project/pull/139601 We lower ptrtoaddr by emitting a G_PTRTOINT, truncating that to the address size and then truncate/zext to the final integer type. This has exposed an issue in the GlobalIsel postlegalizer combines where the truncate is incorrectly being removed. See https://github.com/llvm/llvm-project/issues/139598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
@@ -5773,7 +5773,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { if (!DL.contains("-p7") && !DL.starts_with("p7")) Res.append("-p7:160:256:256:32"); if (!DL.contains("-p8") && !DL.starts_with("p8")) - Res.append("-p8:128:128"); + Res.append("-p8:128:128:128:48"); arichardson wrote: I just kept the current structure that doesn't touch pointer definitions that already exist - should we override those unconditionally? Or just the old variants? https://github.com/llvm/llvm-project/pull/139419 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)
https://github.com/vitalybuka approved this pull request. https://github.com/llvm/llvm-project/pull/139389 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/139357 >From 25dc175562349410f161ef0e80246301d9a7ba79 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 9 May 2025 22:43:37 -0700 Subject: [PATCH] fix docs build Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2d18d0d97aaee..38be6918ff73c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12435,7 +12435,7 @@ Example: .. _i_ptrtoaddr: '``ptrtoaddr .. to``' Instruction - +^ Syntax: """ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits