llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) <details> <summary>Changes</summary> This cherry-picks 79eb804. These optimizations were added shortly before the branch, however, we're concerned they're not quite ready for production use. This commit limits the optimizations to the simplest cases. --- Patch is 32.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/179473.diff 6 Files Affected: - (modified) llvm/lib/Target/AArch64/MachineSMEABIPass.cpp (+18-150) - (modified) llvm/test/CodeGen/AArch64/sme-agnostic-za.ll (+27-55) - (modified) llvm/test/CodeGen/AArch64/sme-new-za-function.ll (+4-17) - (modified) llvm/test/CodeGen/AArch64/sme-za-control-flow.ll (+15-11) - (modified) llvm/test/CodeGen/AArch64/sme-za-exceptions.ll (+50-19) - (modified) llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll (+48-93) ``````````diff diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index 823c754a0ac05..9b96bed823817 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -201,23 +201,6 @@ class EmitContext { Register AgnosticZABufferPtr = AArch64::NoRegister; }; -/// Checks if \p State is a legal edge bundle state. For a state to be a legal -/// bundle state, it must be possible to transition from it to any other bundle -/// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED, -/// as you can transition between those states by saving/restoring ZA. The OFF -/// state would not be legal, as transitioning to it drops the content of ZA. -static bool isLegalEdgeBundleZAState(ZAState State) { - switch (State) { - case ZAState::ACTIVE: // ZA state within the accumulator/ZT0. - case ZAState::ACTIVE_ZT0_SAVED: // ZT0 is saved (ZA is active). - case ZAState::LOCAL_SAVED: // ZA state may be saved on the stack. - case ZAState::LOCAL_COMMITTED: // ZA state is saved on the stack. - return true; - default: - return false; - } -} - StringRef getZAStateString(ZAState State) { #define MAKE_CASE(V) \ case V: \ @@ -325,11 +308,6 @@ struct MachineSMEABI : public MachineFunctionPass { const EdgeBundles &Bundles, ArrayRef<ZAState> BundleStates); - /// Propagates desired states forwards (from predecessors -> successors) if - /// \p Forwards, otherwise, propagates backwards (from successors -> - /// predecessors). - void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true); - void emitZT0SaveRestore(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsSave); @@ -526,110 +504,36 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { PhysLiveRegsAfterSMEPrologue}; } -void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo, - bool Forwards) { - // If `Forwards`, this propagates desired states from predecessors to - // successors, otherwise, this propagates states from successors to - // predecessors. - auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & { - return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState; - }; - - SmallVector<MachineBasicBlock *> Worklist; - for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) { - if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards))) - Worklist.push_back(MF->getBlockNumbered(BlockID)); - } - - while (!Worklist.empty()) { - MachineBasicBlock *MBB = Worklist.pop_back_val(); - BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()]; - - // Pick a legal edge bundle state that matches the majority of - // predecessors/successors. - int StateCounts[ZAState::NUM_ZA_STATE] = {0}; - for (MachineBasicBlock *PredOrSucc : - Forwards ? predecessors(MBB) : successors(MBB)) { - BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()]; - ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards); - if (isLegalEdgeBundleZAState(ZAState)) - StateCounts[ZAState]++; - } - - ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts); - ZAState &CurrentState = GetBlockState(Block, Forwards); - if (PropagatedState != CurrentState) { - CurrentState = PropagatedState; - ZAState &OtherState = GetBlockState(Block, !Forwards); - // Propagate to the incoming/outgoing state if that is also "ANY". - if (OtherState == ZAState::ANY) - OtherState = PropagatedState; - // Push any successors/predecessors that may need updating to the - // worklist. - for (MachineBasicBlock *SuccOrPred : - Forwards ? successors(MBB) : predecessors(MBB)) { - BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()]; - if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards))) - Worklist.push_back(SuccOrPred); - } - } - } -} - /// Assigns each edge bundle a ZA state based on the needed states of blocks -/// that have incoming or outgoing edges in that bundle. +/// that have incoming or outgoing blocks in that bundle. SmallVector<ZAState> MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, const FunctionInfo &FnInfo) { SmallVector<ZAState> BundleStates(Bundles.getNumBundles()); for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) { - LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n'); - - // Attempt to assign a ZA state for this bundle that minimizes state - // transitions. Edges within loops are given a higher weight as we assume - // they will be executed more than once. - int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0}; + std::optional<ZAState> BundleState; for (unsigned BlockID : Bundles.getBlocks(I)) { - LLVM_DEBUG(dbgs() << "- bb." << BlockID); - const BlockInfo &Block = FnInfo.Blocks[BlockID]; - bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I; - bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I; - - bool LegalInEdge = - InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState); - bool LegalOutEgde = - OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState); - if (LegalInEdge) { - LLVM_DEBUG(dbgs() << " DesiredIncomingState: " - << getZAStateString(Block.DesiredIncomingState)); - EdgeStateCounts[Block.DesiredIncomingState]++; - } - if (LegalOutEgde) { - LLVM_DEBUG(dbgs() << " DesiredOutgoingState: " - << getZAStateString(Block.DesiredOutgoingState)); - EdgeStateCounts[Block.DesiredOutgoingState]++; - } - if (!LegalInEdge && !LegalOutEgde) - LLVM_DEBUG(dbgs() << " (no state preference)"); - LLVM_DEBUG(dbgs() << '\n'); + // Check if the block is an incoming block in the bundle. Note: We skip + // Block.FixedEntryState != ANY to ignore EH pads (which are only + // reachable via exceptions). + if (Block.FixedEntryState != ZAState::ANY || + Bundles.getBundle(BlockID, /*Out=*/false) != I) + continue; + + // Pick a state that matches all incoming blocks. Fallback to "ACTIVE" if + // any blocks doesn't match. This will hoist the state from incoming + // blocks to outgoing blocks. + if (!BundleState) + BundleState = Block.DesiredIncomingState; + else if (BundleState != Block.DesiredIncomingState) + BundleState = ZAState::ACTIVE; } - ZAState BundleState = - ZAState(max_element(EdgeStateCounts) - EdgeStateCounts); - - if (BundleState == ZAState::ANY) + if (!BundleState || BundleState == ZAState::ANY) BundleState = ZAState::ACTIVE; - LLVM_DEBUG({ - dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n' - << "Edge counts:"; - for (auto [State, Count] : enumerate(EdgeStateCounts)) - dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count; - dbgs() << "\n\n"; - }); - - BundleStates[I] = BundleState; + BundleStates[I] = *BundleState; } return BundleStates; @@ -1268,42 +1172,6 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs); - if (OptLevel != CodeGenOptLevel::None) { - // Propagate desired states forward, then backwards. Most of the propagation - // should be done in the forward step, and backwards propagation is then - // used to fill in the gaps. Note: Doing both in one step can give poor - // results. For example, consider this subgraph: - // - // ┌─────┐ - // ┌─┤ BB0 ◄───┐ - // │ └─┬───┘ │ - // │ ┌─▼───◄──┐│ - // │ │ BB1 │ ││ - // │ └─┬┬──┘ ││ - // │ │└─────┘│ - // │ ┌─▼───┐ │ - // │ │ BB2 ├───┘ - // │ └─┬───┘ - // │ ┌─▼───┐ - // └─► BB3 │ - // └─────┘ - // - // If: - // - "BB0" and "BB2" (outer loop) has no state preference - // - "BB1" (inner loop) desires the ACTIVE state on entry/exit - // - "BB3" desires the LOCAL_SAVED state on entry - // - // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2, - // then from BB2 to BB0. Which results in the inner and outer loops having - // the "ACTIVE" state. This avoids any state changes in the loops. - // - // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from - // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED - // in the outer loop. - for (bool Forwards : {true, false}) - propagateDesiredStates(FnInfo, Forwards); - } - SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo); EmitContext Context; diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll index 344f1ef24b843..4a18b9f61d69f 100644 --- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll +++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll @@ -352,61 +352,33 @@ define i64 @test_many_callee_arguments( } define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{ -; CHECK-SDAG-LABEL: agnostic_za_buffer_alloc_with_stack_probes: -; CHECK-SDAG: // %bb.0: -; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Spill -; CHECK-SDAG-NEXT: mov x29, sp -; CHECK-SDAG-NEXT: bl __arm_sme_state_size -; CHECK-SDAG-NEXT: mov x8, sp -; CHECK-SDAG-NEXT: sub x19, x8, x0 -; CHECK-SDAG-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 -; CHECK-SDAG-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-SDAG-NEXT: cmp sp, x19 -; CHECK-SDAG-NEXT: b.le .LBB7_3 -; CHECK-SDAG-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 -; CHECK-SDAG-NEXT: str xzr, [sp] -; CHECK-SDAG-NEXT: b .LBB7_1 -; CHECK-SDAG-NEXT: .LBB7_3: -; CHECK-SDAG-NEXT: mov sp, x19 -; CHECK-SDAG-NEXT: ldr xzr, [sp] -; CHECK-SDAG-NEXT: mov x0, x19 -; CHECK-SDAG-NEXT: bl __arm_sme_save -; CHECK-SDAG-NEXT: bl private_za -; CHECK-SDAG-NEXT: mov x0, x19 -; CHECK-SDAG-NEXT: bl __arm_sme_restore -; CHECK-SDAG-NEXT: mov sp, x29 -; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Reload -; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload -; CHECK-SDAG-NEXT: ret -; -; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: bl __arm_sme_state_size -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: sub x19, x8, x0 -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: bl __arm_sme_save -; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: cmp sp, x19 -; CHECK-NEXT: b.le .LBB7_3 -; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB7_1 -; CHECK-NEXT: .LBB7_3: -; CHECK-NEXT: mov sp, x19 -; CHECK-NEXT: ldr xzr, [sp] -; CHECK-NEXT: bl private_za -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: bl __arm_sme_restore -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: agnostic_za_buffer_alloc_with_stack_probes: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: bl __arm_sme_state_size +; CHECK-COMMON-NEXT: mov x8, sp +; CHECK-COMMON-NEXT: sub x19, x8, x0 +; CHECK-COMMON-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 +; CHECK-COMMON-NEXT: sub sp, sp, #16, lsl #12 // =65536 +; CHECK-COMMON-NEXT: cmp sp, x19 +; CHECK-COMMON-NEXT: b.le .LBB7_3 +; CHECK-COMMON-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 +; CHECK-COMMON-NEXT: str xzr, [sp] +; CHECK-COMMON-NEXT: b .LBB7_1 +; CHECK-COMMON-NEXT: .LBB7_3: +; CHECK-COMMON-NEXT: mov sp, x19 +; CHECK-COMMON-NEXT: ldr xzr, [sp] +; CHECK-COMMON-NEXT: mov x0, x19 +; CHECK-COMMON-NEXT: bl __arm_sme_save +; CHECK-COMMON-NEXT: bl private_za +; CHECK-COMMON-NEXT: mov x0, x19 +; CHECK-COMMON-NEXT: bl __arm_sme_restore +; CHECK-COMMON-NEXT: mov sp, x29 +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret call void @private_za() ret void } diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll index d2715b58439d8..6995cfae8e459 100644 --- a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll +++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll @@ -51,7 +51,6 @@ define void @private_za() "aarch64_new_za" { } ; Note: This test must run at -O0 as otherwise the multiple exits are optimized out. -; TODO: We should be able to omit the ZA save here (as this function does not use ZA). define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" { ; CHECK-SDAG-LABEL: private_za_multiple_exit: ; CHECK-SDAG: // %bb.0: // %prelude @@ -99,33 +98,21 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-NEXT: cbnz x8, .LBB1_1 -; CHECK-NEXT: b .LBB1_2 -; CHECK-NEXT: .LBB1_1: // %entry -; CHECK-NEXT: bl __arm_tpidr2_save -; CHECK-NEXT: msr TPIDR2_EL0, xzr -; CHECK-NEXT: zero {za} -; CHECK-NEXT: b .LBB1_2 -; CHECK-NEXT: .LBB1_2: // %entry -; CHECK-NEXT: smstart za ; CHECK-NEXT: str w1, [sp, #8] // 4-byte Spill ; CHECK-NEXT: str w0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: subs x8, x2, #1 -; CHECK-NEXT: b.ne .LBB1_4 -; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_3: // %if.else +; CHECK-NEXT: b.ne .LBB1_2 +; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: .LBB1_1: // %if.else ; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload ; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; CHECK-NEXT: add w0, w8, w9 -; CHECK-NEXT: smstop za ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_4: // %if.end +; CHECK-NEXT: .LBB1_2: // %if.end ; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload ; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; CHECK-NEXT: subs w0, w8, w9 -; CHECK-NEXT: smstop za ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll index 50449172ce85b..aae1d3b756f4e 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll @@ -49,36 +49,40 @@ define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind { ; CHECK-LABEL: private_za_loop: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: sub x10, x29, #16 ; CHECK-NEXT: cmp w0, #1 ; CHECK-NEXT: stp x9, x8, [x29, #-16] -; CHECK-NEXT: msr TPIDR2_EL0, x10 -; CHECK-NEXT: b.lt .LBB0_3 +; CHECK-NEXT: b.lt .LBB0_5 ; CHECK-NEXT: // %bb.1: // %loop.preheader ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_2: // %loop +; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: cbz w19, .LBB0_5 +; CHECK-NEXT: .LBB0_3: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_call -; CHECK-NEXT: subs w19, w19, #1 -; CHECK-NEXT: b.ne .LBB0_2 -; CHECK-NEXT: .LBB0_3: // %exit +; CHECK-NEXT: sub w19, w19, #1 ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 -; CHECK-NEXT: cbnz x8, .LBB0_5 -; CHECK-NEXT: // %bb.4: // %exit +; CHECK-NEXT: cbnz x8, .LBB0_2 +; CHECK-NEXT: // %bb.4: // %loop +; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_5: // %exit -; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll index 5243b8d7203d8..19ea1e47f84ff 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll @@ -63,17 +63,25 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe ; CHECK-NEXT: ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr] ; CHECK-NEXT: bl __cxa_throw ; CHECK-NEXT: .Ltmp1: // EH_LABEL -; CHECK-NEXT: // %bb.3: // %throw_fail -; CHECK-NEXT: .LBB0_4: // %unwind_dtors +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: cbnz x8, .LBB0_4 +; CHECK-NEXT: // %bb.3: // %throw_exception +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB0_4: // %throw_exception +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: // %bb.5: // %throw_fail +; CHECK-NEXT: .LBB0_6: // %unwind_dtors ; CHECK-NEXT: .Ltmp2: // EH_LABEL ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 -; CHECK-NEXT: cbnz x8, .LBB0_6 -; CHECK-NEXT: // %bb.5: // %unwind_dtors +; CHECK-NEXT: cbnz x8, .LBB0_8 +; CHECK-NEXT: // %bb.7: // %unwind_dtors ; CHECK-NEXT: bl __arm_tpidr2_restore -; CHECK-NEXT: .LBB0_6: // %unwind_dtors +; CHECK-NEXT: .LBB0_8: // %unwind_dtors ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: sub x8, x29, #16 @@ -224,15 +232,15 @@ define void @try_catch() "aarch64_inout_za" personality ptr @__gxx_personality_v ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl may_throw ; CHECK-NEXT: .Ltmp4: // EH_LABEL -; CHECK-NEXT: .LBB1_1: // %after_catch ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 -; CHECK-NEXT: cbnz x8, .LBB1_3 -; CHECK-NEXT: // %bb.2: // %after_catch +; CHECK-NEXT: cbnz x8, .LBB1_2 +; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore -; CHECK-NEXT: .LBB1_3: // %after_catch +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .LBB1_3: // %after_catch ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: b shared_za_call @@ -251,7 +259,15 @@ define void @try_catch() "aarch64_inout_za" personality ptr @__gxx_personality_v ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl __cxa_end_catch -; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: cbnz x8, .LBB1_8 +; CHECK-NEXT: // %bb.7: // %catch +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB1_8: // %catch +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/179473 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
