Author: Sander de Smalen Date: 2025-07-16T16:17:00+02:00 New Revision: 588b8130794f7ce53fe30237f6bf5614b7122f45
URL: https://github.com/llvm/llvm-project/commit/588b8130794f7ce53fe30237f6bf5614b7122f45 DIFF: https://github.com/llvm/llvm-project/commit/588b8130794f7ce53fe30237f6bf5614b7122f45.diff LOG: [AArch64] Use correct regclass for spills of ZPR2/ZPR4 (#148806) Commit a6293228fdd5aba8c04c63f02f3d017443feb3f2 forced the register class of ZPR[24]StridedOrContiguous for spills/fills of ZPR2 and ZPR4, but this may result in issues when the regclass for the fill is a ZPR2/ZPR4 which would allow the register allocator to pick `z1_z2`, which is not a supported register for ZPR2StridedOrContiguous that only supports tuples of the form (strided) `z0_z8`, `z1_z9` or (contiguous, start at multiple of 2) `z0_z1`, `z2_z3`. For spills we could add a new register class that supports any of the tuple forms, but I've decided to use two pseudos similar to the fills for consistency. Fixes https://github.com/llvm/llvm-project/issues/148655 Added: Modified: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.cpp llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/spillfill-sve.mir Removed: ################################################################################ diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 36f3a670808d4..07b36d20b0c6d 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1591,18 +1591,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, "Non-writeback variants of STGloop / STZGloop should not " "survive past PrologEpilogInserter."); case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); case AArch64::STR_ZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); case AArch64::STR_PPXI: return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); case AArch64::LDR_ZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); case AArch64::LDR_PPXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index c1474773faa76..5420545cc3cec 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2482,8 +2482,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::LDR_PXI: case AArch64::LDR_ZXI: case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZZXI: case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::LDRBBui: case AArch64::LDRBui: case AArch64::LDRDui: @@ -2525,8 +2527,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STR_PXI: case AArch64::STR_ZXI: case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::STR_ZZZXI: case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::STRBBui: case AArch64::STRBui: case AArch64::STRDui: @@ -4318,7 +4322,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, break; // SVE case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: Scale = TypeSize::getScalable(16); Width = TypeSize::getScalable(16 * 4); MinOffset = -256; @@ -4332,7 +4338,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MaxOffset = 253; break; case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: Scale = TypeSize::getScalable(16); Width = TypeSize::getScalable(16 * 2); MinOffset = -256; @@ -5559,8 +5567,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; - } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) || - AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register store without SVE store instructions"); + Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZZXI; @@ -5584,8 +5596,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; - } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) || - AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register store without SVE store instructions"); + Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZZZZXI; @@ -5736,8 +5752,12 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; - } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) || - AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register load without SVE load instructions"); + Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZZXI; @@ -5761,8 +5781,12 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; - } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) || - AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register load without SVE load instructions"); + Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZZZZXI; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index eddb96979f7b8..0c4b4f4c3ed88 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2625,16 +2625,22 @@ let Predicates = [HasSVE_or_SME] in { // These get expanded to individual LDR_ZXI/STR_ZXI instructions in // AArch64ExpandPseudoInsts. let mayLoad = 1, hasSideEffects = 0 in { - def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + + def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let mayStore = 1, hasSideEffects = 0 in { - def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + + def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let AddedComplexity = 1 in { diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir index 83c9b73c57570..2b16dd0f29ecc 100644 --- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -1,5 +1,5 @@ -# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s -# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s --check-prefix=EXPAND --- | ; ModuleID = '<stdin>' source_filename = "<stdin>" @@ -14,13 +14,14 @@ define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable } - define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #2 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable } - define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #2 { entry: unreachable } attributes #0 = { nounwind "target-features"="+sve" } attributes #1 = { nounwind "target-features"="+sve2p1" } + attributes #2 = { nounwind "target-features"="+sve,+sme2" "aarch64_pstate_sm_enabled" } ... --- @@ -318,10 +319,10 @@ registers: - { id: 0, class: zpr2 } stack: liveins: - - { reg: '$z0_z1', virtual-reg: '%0' } + - { reg: '$z1_z2', virtual-reg: '%0' } body: | bb.0.entry: - liveins: $z0_z1 + liveins: $z1_z2 ; CHECK-LABEL: name: spills_fills_stack_id_zpr2 ; CHECK: stack: @@ -329,12 +330,12 @@ body: | ; CHECK-NEXT: stack-id: scalable-vector ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2 - ; EXPAND: STR_ZXI $z0, $sp, 0 - ; EXPAND: STR_ZXI $z1, $sp, 1 - ; EXPAND: $z0 = LDR_ZXI $sp, 0 - ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: STR_ZXI $z1, $sp, 0 + ; EXPAND: STR_ZXI $z2, $sp, 1 + ; EXPAND: $z1 = LDR_ZXI $sp, 0 + ; EXPAND: $z2 = LDR_ZXI $sp, 1 - %0:zpr2 = COPY $z0_z1 + %0:zpr2 = COPY $z1_z2 $z0_z1_z2_z3 = IMPLICIT_DEF $z4_z5_z6_z7 = IMPLICIT_DEF @@ -345,7 +346,7 @@ body: | $z24_z25_z26_z27 = IMPLICIT_DEF $z28_z29_z30_z31 = IMPLICIT_DEF - $z0_z1 = COPY %0 + $z1_z2 = COPY %0 RET_ReallyLR ... --- @@ -439,10 +440,10 @@ registers: - { id: 0, class: zpr4 } stack: liveins: - - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' } + - { reg: '$z1_z2_z3_z4', virtual-reg: '%0' } body: | bb.0.entry: - liveins: $z0_z1_z2_z3 + liveins: $z1_z2_z3_z4 ; CHECK-LABEL: name: spills_fills_stack_id_zpr4 ; CHECK: stack: @@ -450,16 +451,16 @@ body: | ; CHECK-NEXT: stack-id: scalable-vector ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4 - ; EXPAND: STR_ZXI $z0, $sp, 0 - ; EXPAND: STR_ZXI $z1, $sp, 1 - ; EXPAND: STR_ZXI $z2, $sp, 2 - ; EXPAND: STR_ZXI $z3, $sp, 3 - ; EXPAND: $z0 = LDR_ZXI $sp, 0 - ; EXPAND: $z1 = LDR_ZXI $sp, 1 - ; EXPAND: $z2 = LDR_ZXI $sp, 2 - ; EXPAND: $z3 = LDR_ZXI $sp, 3 + ; EXPAND: STR_ZXI $z1, $sp, 0 + ; EXPAND: STR_ZXI $z2, $sp, 1 + ; EXPAND: STR_ZXI $z3, $sp, 2 + ; EXPAND: STR_ZXI $z4, $sp, 3 + ; EXPAND: $z1 = LDR_ZXI $sp, 0 + ; EXPAND: $z2 = LDR_ZXI $sp, 1 + ; EXPAND: $z3 = LDR_ZXI $sp, 2 + ; EXPAND: $z4 = LDR_ZXI $sp, 3 - %0:zpr4 = COPY $z0_z1_z2_z3 + %0:zpr4 = COPY $z1_z2_z3_z4 $z0_z1_z2_z3 = IMPLICIT_DEF $z4_z5_z6_z7 = IMPLICIT_DEF @@ -470,7 +471,7 @@ body: | $z24_z25_z26_z27 = IMPLICIT_DEF $z28_z29_z30_z31 = IMPLICIT_DEF - $z0_z1_z2_z3 = COPY %0 + $z1_z2_z3_z4 = COPY %0 RET_ReallyLR ... --- _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits