Author: Jun Ma Date: 2021-12-17T12:08:28+08:00 New Revision: dd399a6194e8506d2af87794d78cb920c66f19b0
URL: https://github.com/llvm/llvm-project/commit/dd399a6194e8506d2af87794d78cb920c66f19b0 DIFF: https://github.com/llvm/llvm-project/commit/dd399a6194e8506d2af87794d78cb920c66f19b0.diff LOG: [RISCV] Use ldp/sdp for EPI Added: llvm/test/CodeGen/RISCV/callee-saved-n3.ll llvm/test/CodeGen/RISCV/large-stack-n3.ll Modified: llvm/lib/Target/RISCV/RISCVFrameLowering.cpp Removed: ################################################################################ diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index f5d491938050..9e1381ec5c6e 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1010,7 +1010,11 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { // Return the FirstSPAdjustAmount if the StackSize can not fit in signed // 12-bit and there exists a callee saved register need to be pushed. - if (!isInt<12>(StackSize) && (CSI.size() > 0)) { + if (!hasFP(MF) && !isInt<10>(StackSize) && + STI.hasFeature(RISCV::Feature64Bit) && + STI.hasFeature(RISCV::FeatureStdExtXin) && (CSI.size() > 1)) { + return 512 - getStackAlign().value(); + } else if (!isInt<12>(StackSize) && (CSI.size() > 0)) { // FirstSPAdjustAmount is choosed as (2048 - StackAlign) // because 2048 will cause sp = sp + 2048 in epilogue split into // multi-instructions. The offset smaller than 2048 can fit in signle @@ -1048,12 +1052,43 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( // Manually spill values not spilled by libcall. const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { + unsigned Count = NonLibcallCSI.size(); + for (unsigned i = 0; i < Count; i += 1) { // Insert the spill to the stack frame. - Register Reg = CS.getReg(); + Register Reg = NonLibcallCSI[i].getReg(); + int FI = NonLibcallCSI[i].getFrameIdx(); + bool IsN3 = STI.hasFeature(RISCV::Feature64Bit) && + STI.hasFeature(RISCV::FeatureStdExtXin); + if (IsN3 && unsigned(i + 1) < Count) { + unsigned NextReg = NonLibcallCSI[i + 1].getReg(); + int NextFI = NonLibcallCSI[i + 1].getFrameIdx(); + if (!hasFP(*MF) && RISCV::GPRRegClass.contains(Reg) && + RISCV::GPRRegClass.contains(NextReg) && FI + 1 == NextFI) { + + MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(RISCV::SDP)); + + MIB.addReg(Reg, getKillRegState(!MBB.isLiveIn(Reg))); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), + MachineMemOperand::MOStore, MFI.getObjectSize(FI), + MFI.getObjectAlign(FI))); + + MIB.addReg(NextReg, getKillRegState(!MBB.isLiveIn(NextReg))); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, NextFI), + MachineMemOperand::MOStore, MFI.getObjectSize(NextFI), + MFI.getObjectAlign(NextFI))); + + MIB.addFrameIndex(FI).addImm(0); + + i += 1; + continue; + } + } + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), - RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), FI, RC, TRI); } return true; @@ -1078,10 +1113,43 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( // load-to-use data hazard between loading RA and return by RA. // loadRegFromStackSlot can insert multiple instructions. const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - Register Reg = CS.getReg(); + unsigned Count = NonLibcallCSI.size(); + for (unsigned i = 0; i < Count; i += 1) { + // Insert the spill to the stack frame. + Register Reg = NonLibcallCSI[i].getReg(); + int FI = NonLibcallCSI[i].getFrameIdx(); + bool IsN3 = STI.hasFeature(RISCV::Feature64Bit) && + STI.hasFeature(RISCV::FeatureStdExtXin); + if (IsN3 && unsigned(i + 1) < Count) { + unsigned NextReg = NonLibcallCSI[i + 1].getReg(); + int NextFI = NonLibcallCSI[i + 1].getFrameIdx(); + if (!hasFP(*MF) && RISCV::GPRRegClass.contains(Reg) && + RISCV::GPRRegClass.contains(NextReg) && FI + 1 == NextFI) { + + MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(RISCV::LDP)); + + MIB.addReg(Reg, RegState::Define); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), + MachineMemOperand::MOLoad, MFI.getObjectSize(FI), + MFI.getObjectAlign(FI))); + + MIB.addReg(NextReg, RegState::Define); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, NextFI), + MachineMemOperand::MOLoad, MFI.getObjectSize(NextFI), + MFI.getObjectAlign(NextFI))); + + MIB.addFrameIndex(FI).addImm(0); + + i += 1; + continue; + } + } + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, TRI); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } diff --git a/llvm/test/CodeGen/RISCV/callee-saved-n3.ll b/llvm/test/CodeGen/RISCV/callee-saved-n3.ll new file mode 100644 index 000000000000..affbb0034efb --- /dev/null +++ b/llvm/test/CodeGen/RISCV/callee-saved-n3.ll @@ -0,0 +1,808 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-xin -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+f,+experimental-xin -target-abi lp64f -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-xin -target-abi lp64f -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-xin -target-abi lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-xin -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-WITH-FP + +@var = global [32 x i32] zeroinitializer + +; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns +; something appropriate. + +define void @callee() nounwind { +; RV64I-LABEL: callee: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -160 +; RV64I-NEXT: sdp ra, s0, 152(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s1, s2, 136(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s3, s4, 120(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s5, s6, 104(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s7, s8, 88(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s9, s10, 72(sp) # 16-byte Folded Spill +; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a7, %hi(var) +; RV64I-NEXT: lw a0, %lo(var)(a7) +; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var+4)(a7) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var+8)(a7) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var+12)(a7) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a5, a7, %lo(var) +; RV64I-NEXT: lw a0, 16(a5) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw t4, 24(a5) +; RV64I-NEXT: lw t5, 28(a5) +; RV64I-NEXT: lw t6, 32(a5) +; RV64I-NEXT: lw s2, 36(a5) +; RV64I-NEXT: lw s3, 40(a5) +; RV64I-NEXT: lw s4, 44(a5) +; RV64I-NEXT: lw s5, 48(a5) +; RV64I-NEXT: lw s6, 52(a5) +; RV64I-NEXT: lw s7, 56(a5) +; RV64I-NEXT: lw s8, 60(a5) +; RV64I-NEXT: lw s9, 64(a5) +; RV64I-NEXT: lw s10, 68(a5) +; RV64I-NEXT: lw s11, 72(a5) +; RV64I-NEXT: lw ra, 76(a5) +; RV64I-NEXT: lw s1, 80(a5) +; RV64I-NEXT: lw t3, 84(a5) +; RV64I-NEXT: lw t2, 88(a5) +; RV64I-NEXT: lw t1, 92(a5) +; RV64I-NEXT: lw t0, 96(a5) +; RV64I-NEXT: lw s0, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw a0, 124(a5) +; RV64I-NEXT: lw a1, 120(a5) +; RV64I-NEXT: lw a2, 116(a5) +; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: sw a0, 124(a5) +; RV64I-NEXT: sw a1, 120(a5) +; RV64I-NEXT: sw a2, 116(a5) +; RV64I-NEXT: sw a3, 112(a5) +; RV64I-NEXT: sw a4, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw s0, 100(a5) +; RV64I-NEXT: sw t0, 96(a5) +; RV64I-NEXT: sw t1, 92(a5) +; RV64I-NEXT: sw t2, 88(a5) +; RV64I-NEXT: sw t3, 84(a5) +; RV64I-NEXT: sw s1, 80(a5) +; RV64I-NEXT: sw ra, 76(a5) +; RV64I-NEXT: sw s11, 72(a5) +; RV64I-NEXT: sw s10, 68(a5) +; RV64I-NEXT: sw s9, 64(a5) +; RV64I-NEXT: sw s8, 60(a5) +; RV64I-NEXT: sw s7, 56(a5) +; RV64I-NEXT: sw s6, 52(a5) +; RV64I-NEXT: sw s5, 48(a5) +; RV64I-NEXT: sw s4, 44(a5) +; RV64I-NEXT: sw s3, 40(a5) +; RV64I-NEXT: sw s2, 36(a5) +; RV64I-NEXT: sw t6, 32(a5) +; RV64I-NEXT: sw t5, 28(a5) +; RV64I-NEXT: sw t4, 24(a5) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(a5) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(a5) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var+12)(a7) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var+8)(a7) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var+4)(a7) +; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var)(a7) +; RV64I-NEXT: ldp ra, s0, 152(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s1, s2, 136(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s3, s4, 120(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s5, s6, 104(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s7, s8, 88(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s9, s10, 72(sp) # 16-byte Folded Reload +; RV64I-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 160 +; RV64I-NEXT: ret +; +; RV64I-WITH-FP-LABEL: callee: +; RV64I-WITH-FP: # %bb.0: +; RV64I-WITH-FP-NEXT: addi sp, sp, -160 +; RV64I-WITH-FP-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s1, 136(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s2, 128(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s3, 120(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s4, 112(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s5, 104(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s6, 96(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s7, 88(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s9, 72(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s10, 64(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: addi s0, sp, 160 +; RV64I-WITH-FP-NEXT: lui a7, %hi(var) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(a7) +; RV64I-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(a7) +; RV64I-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(a7) +; RV64I-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(a7) +; RV64I-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: addi a5, a7, %lo(var) +; RV64I-WITH-FP-NEXT: lw a0, 16(a5) +; RV64I-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 20(a5) +; RV64I-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 24(a5) +; RV64I-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw t5, 28(a5) +; RV64I-WITH-FP-NEXT: lw t6, 32(a5) +; RV64I-WITH-FP-NEXT: lw s2, 36(a5) +; RV64I-WITH-FP-NEXT: lw s3, 40(a5) +; RV64I-WITH-FP-NEXT: lw s4, 44(a5) +; RV64I-WITH-FP-NEXT: lw s5, 48(a5) +; RV64I-WITH-FP-NEXT: lw s6, 52(a5) +; RV64I-WITH-FP-NEXT: lw s7, 56(a5) +; RV64I-WITH-FP-NEXT: lw s8, 60(a5) +; RV64I-WITH-FP-NEXT: lw s9, 64(a5) +; RV64I-WITH-FP-NEXT: lw s10, 68(a5) +; RV64I-WITH-FP-NEXT: lw s11, 72(a5) +; RV64I-WITH-FP-NEXT: lw ra, 76(a5) +; RV64I-WITH-FP-NEXT: lw t4, 80(a5) +; RV64I-WITH-FP-NEXT: lw t3, 84(a5) +; RV64I-WITH-FP-NEXT: lw t2, 88(a5) +; RV64I-WITH-FP-NEXT: lw s1, 92(a5) +; RV64I-WITH-FP-NEXT: lw t1, 96(a5) +; RV64I-WITH-FP-NEXT: lw t0, 100(a5) +; RV64I-WITH-FP-NEXT: lw a6, 104(a5) +; RV64I-WITH-FP-NEXT: lw a4, 108(a5) +; RV64I-WITH-FP-NEXT: lw a0, 124(a5) +; RV64I-WITH-FP-NEXT: lw a1, 120(a5) +; RV64I-WITH-FP-NEXT: lw a2, 116(a5) +; RV64I-WITH-FP-NEXT: lw a3, 112(a5) +; RV64I-WITH-FP-NEXT: sw a0, 124(a5) +; RV64I-WITH-FP-NEXT: sw a1, 120(a5) +; RV64I-WITH-FP-NEXT: sw a2, 116(a5) +; RV64I-WITH-FP-NEXT: sw a3, 112(a5) +; RV64I-WITH-FP-NEXT: sw a4, 108(a5) +; RV64I-WITH-FP-NEXT: sw a6, 104(a5) +; RV64I-WITH-FP-NEXT: sw t0, 100(a5) +; RV64I-WITH-FP-NEXT: sw t1, 96(a5) +; RV64I-WITH-FP-NEXT: sw s1, 92(a5) +; RV64I-WITH-FP-NEXT: sw t2, 88(a5) +; RV64I-WITH-FP-NEXT: sw t3, 84(a5) +; RV64I-WITH-FP-NEXT: sw t4, 80(a5) +; RV64I-WITH-FP-NEXT: sw ra, 76(a5) +; RV64I-WITH-FP-NEXT: sw s11, 72(a5) +; RV64I-WITH-FP-NEXT: sw s10, 68(a5) +; RV64I-WITH-FP-NEXT: sw s9, 64(a5) +; RV64I-WITH-FP-NEXT: sw s8, 60(a5) +; RV64I-WITH-FP-NEXT: sw s7, 56(a5) +; RV64I-WITH-FP-NEXT: sw s6, 52(a5) +; RV64I-WITH-FP-NEXT: sw s5, 48(a5) +; RV64I-WITH-FP-NEXT: sw s4, 44(a5) +; RV64I-WITH-FP-NEXT: sw s3, 40(a5) +; RV64I-WITH-FP-NEXT: sw s2, 36(a5) +; RV64I-WITH-FP-NEXT: sw t6, 32(a5) +; RV64I-WITH-FP-NEXT: sw t5, 28(a5) +; RV64I-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 24(a5) +; RV64I-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 20(a5) +; RV64I-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 16(a5) +; RV64I-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(a7) +; RV64I-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(a7) +; RV64I-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(a7) +; RV64I-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(a7) +; RV64I-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s1, 136(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s3, 120(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s6, 96(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s7, 88(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s8, 80(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s9, 72(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s10, 64(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: addi sp, sp, 160 +; RV64I-WITH-FP-NEXT: ret + %val = load [32 x i32], [32 x i32]* @var + store volatile [32 x i32] %val, [32 x i32]* @var + ret void +} + +; This function tests that RISCVRegisterInfo::getCallPreservedMask returns +; something appropriate. + +define void @caller() nounwind { +; RV32I-LABEL: caller: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -144 +; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui s0, %hi(var) +; RV32I-NEXT: lw a0, %lo(var)(s0) +; RV32I-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var+4)(s0) +; RV32I-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var+8)(s0) +; RV32I-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var+12)(s0) +; RV32I-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi s1, s0, %lo(var) +; RV32I-NEXT: lw a0, 16(s1) +; RV32I-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 20(s1) +; RV32I-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 24(s1) +; RV32I-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 28(s1) +; RV32I-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 32(s1) +; RV32I-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 36(s1) +; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 40(s1) +; RV32I-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 44(s1) +; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 48(s1) +; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 52(s1) +; RV32I-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 56(s1) +; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 60(s1) +; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 64(s1) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 68(s1) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 72(s1) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 76(s1) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 80(s1) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 84(s1) +; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw s4, 88(s1) +; RV32I-NEXT: lw s5, 92(s1) +; RV32I-NEXT: lw s6, 96(s1) +; RV32I-NEXT: lw s7, 100(s1) +; RV32I-NEXT: lw s8, 104(s1) +; RV32I-NEXT: lw s9, 108(s1) +; RV32I-NEXT: lw s10, 112(s1) +; RV32I-NEXT: lw s11, 116(s1) +; RV32I-NEXT: lw s2, 120(s1) +; RV32I-NEXT: lw s3, 124(s1) +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: sw s3, 124(s1) +; RV32I-NEXT: sw s2, 120(s1) +; RV32I-NEXT: sw s11, 116(s1) +; RV32I-NEXT: sw s10, 112(s1) +; RV32I-NEXT: sw s9, 108(s1) +; RV32I-NEXT: sw s8, 104(s1) +; RV32I-NEXT: sw s7, 100(s1) +; RV32I-NEXT: sw s6, 96(s1) +; RV32I-NEXT: sw s5, 92(s1) +; RV32I-NEXT: sw s4, 88(s1) +; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 84(s1) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 80(s1) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 76(s1) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 72(s1) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 68(s1) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 64(s1) +; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 60(s1) +; RV32I-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 56(s1) +; RV32I-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 52(s1) +; RV32I-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 48(s1) +; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 44(s1) +; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 40(s1) +; RV32I-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 36(s1) +; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 32(s1) +; RV32I-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 28(s1) +; RV32I-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 24(s1) +; RV32I-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 20(s1) +; RV32I-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 16(s1) +; RV32I-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var+12)(s0) +; RV32I-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var+8)(s0) +; RV32I-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var+4)(s0) +; RV32I-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var)(s0) +; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 144 +; RV32I-NEXT: ret +; +; RV32I-WITH-FP-LABEL: caller: +; RV32I-WITH-FP: # %bb.0: +; RV32I-WITH-FP-NEXT: addi sp, sp, -144 +; RV32I-WITH-FP-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: addi s0, sp, 144 +; RV32I-WITH-FP-NEXT: lui s6, %hi(var) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV32I-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV32I-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV32I-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV32I-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: addi s1, s6, %lo(var) +; RV32I-WITH-FP-NEXT: lw a0, 16(s1) +; RV32I-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 20(s1) +; RV32I-WITH-FP-NEXT: sw a0, -76(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 24(s1) +; RV32I-WITH-FP-NEXT: sw a0, -80(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 28(s1) +; RV32I-WITH-FP-NEXT: sw a0, -84(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 32(s1) +; RV32I-WITH-FP-NEXT: sw a0, -88(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 36(s1) +; RV32I-WITH-FP-NEXT: sw a0, -92(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 40(s1) +; RV32I-WITH-FP-NEXT: sw a0, -96(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 44(s1) +; RV32I-WITH-FP-NEXT: sw a0, -100(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 48(s1) +; RV32I-WITH-FP-NEXT: sw a0, -104(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 52(s1) +; RV32I-WITH-FP-NEXT: sw a0, -108(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 56(s1) +; RV32I-WITH-FP-NEXT: sw a0, -112(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 60(s1) +; RV32I-WITH-FP-NEXT: sw a0, -116(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 64(s1) +; RV32I-WITH-FP-NEXT: sw a0, -120(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 68(s1) +; RV32I-WITH-FP-NEXT: sw a0, -124(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 72(s1) +; RV32I-WITH-FP-NEXT: sw a0, -128(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 76(s1) +; RV32I-WITH-FP-NEXT: sw a0, -132(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 80(s1) +; RV32I-WITH-FP-NEXT: sw a0, -136(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 84(s1) +; RV32I-WITH-FP-NEXT: sw a0, -140(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw a0, 88(s1) +; RV32I-WITH-FP-NEXT: sw a0, -144(s0) # 4-byte Folded Spill +; RV32I-WITH-FP-NEXT: lw s8, 92(s1) +; RV32I-WITH-FP-NEXT: lw s9, 96(s1) +; RV32I-WITH-FP-NEXT: lw s10, 100(s1) +; RV32I-WITH-FP-NEXT: lw s11, 104(s1) +; RV32I-WITH-FP-NEXT: lw s2, 108(s1) +; RV32I-WITH-FP-NEXT: lw s3, 112(s1) +; RV32I-WITH-FP-NEXT: lw s4, 116(s1) +; RV32I-WITH-FP-NEXT: lw s5, 120(s1) +; RV32I-WITH-FP-NEXT: lw s7, 124(s1) +; RV32I-WITH-FP-NEXT: call callee@plt +; RV32I-WITH-FP-NEXT: sw s7, 124(s1) +; RV32I-WITH-FP-NEXT: sw s5, 120(s1) +; RV32I-WITH-FP-NEXT: sw s4, 116(s1) +; RV32I-WITH-FP-NEXT: sw s3, 112(s1) +; RV32I-WITH-FP-NEXT: sw s2, 108(s1) +; RV32I-WITH-FP-NEXT: sw s11, 104(s1) +; RV32I-WITH-FP-NEXT: sw s10, 100(s1) +; RV32I-WITH-FP-NEXT: sw s9, 96(s1) +; RV32I-WITH-FP-NEXT: sw s8, 92(s1) +; RV32I-WITH-FP-NEXT: lw a0, -144(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 88(s1) +; RV32I-WITH-FP-NEXT: lw a0, -140(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 84(s1) +; RV32I-WITH-FP-NEXT: lw a0, -136(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 80(s1) +; RV32I-WITH-FP-NEXT: lw a0, -132(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 76(s1) +; RV32I-WITH-FP-NEXT: lw a0, -128(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 72(s1) +; RV32I-WITH-FP-NEXT: lw a0, -124(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 68(s1) +; RV32I-WITH-FP-NEXT: lw a0, -120(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 64(s1) +; RV32I-WITH-FP-NEXT: lw a0, -116(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 60(s1) +; RV32I-WITH-FP-NEXT: lw a0, -112(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 56(s1) +; RV32I-WITH-FP-NEXT: lw a0, -108(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 52(s1) +; RV32I-WITH-FP-NEXT: lw a0, -104(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 48(s1) +; RV32I-WITH-FP-NEXT: lw a0, -100(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 44(s1) +; RV32I-WITH-FP-NEXT: lw a0, -96(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 40(s1) +; RV32I-WITH-FP-NEXT: lw a0, -92(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 36(s1) +; RV32I-WITH-FP-NEXT: lw a0, -88(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 32(s1) +; RV32I-WITH-FP-NEXT: lw a0, -84(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 28(s1) +; RV32I-WITH-FP-NEXT: lw a0, -80(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 24(s1) +; RV32I-WITH-FP-NEXT: lw a0, -76(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 20(s1) +; RV32I-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, 16(s1) +; RV32I-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV32I-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV32I-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV32I-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV32I-WITH-FP-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32I-WITH-FP-NEXT: addi sp, sp, 144 +; RV32I-WITH-FP-NEXT: ret +; +; RV64I-LABEL: caller: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -288 +; RV64I-NEXT: sdp ra, s0, 280(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s1, s2, 264(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s3, s4, 248(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s5, s6, 232(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s7, s8, 216(sp) # 16-byte Folded Spill +; RV64I-NEXT: sdp s9, s10, 200(sp) # 16-byte Folded Spill +; RV64I-NEXT: sd s11, 184(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui s0, %hi(var) +; RV64I-NEXT: lw a0, %lo(var)(s0) +; RV64I-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var+4)(s0) +; RV64I-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var+8)(s0) +; RV64I-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var+12)(s0) +; RV64I-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi s1, s0, %lo(var) +; RV64I-NEXT: lw a0, 16(s1) +; RV64I-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(s1) +; RV64I-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 24(s1) +; RV64I-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 28(s1) +; RV64I-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 32(s1) +; RV64I-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 36(s1) +; RV64I-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 40(s1) +; RV64I-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 44(s1) +; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 48(s1) +; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 52(s1) +; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 56(s1) +; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 60(s1) +; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 64(s1) +; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 68(s1) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 72(s1) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 76(s1) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 80(s1) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 84(s1) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw s4, 88(s1) +; RV64I-NEXT: lw s5, 92(s1) +; RV64I-NEXT: lw s6, 96(s1) +; RV64I-NEXT: lw s7, 100(s1) +; RV64I-NEXT: lw s8, 104(s1) +; RV64I-NEXT: lw s9, 108(s1) +; RV64I-NEXT: lw s10, 112(s1) +; RV64I-NEXT: lw s11, 116(s1) +; RV64I-NEXT: lw s2, 120(s1) +; RV64I-NEXT: lw s3, 124(s1) +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: sw s3, 124(s1) +; RV64I-NEXT: sw s2, 120(s1) +; RV64I-NEXT: sw s11, 116(s1) +; RV64I-NEXT: sw s10, 112(s1) +; RV64I-NEXT: sw s9, 108(s1) +; RV64I-NEXT: sw s8, 104(s1) +; RV64I-NEXT: sw s7, 100(s1) +; RV64I-NEXT: sw s6, 96(s1) +; RV64I-NEXT: sw s5, 92(s1) +; RV64I-NEXT: sw s4, 88(s1) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 84(s1) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 80(s1) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 76(s1) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 72(s1) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 68(s1) +; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 64(s1) +; RV64I-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 60(s1) +; RV64I-NEXT: ld a0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 56(s1) +; RV64I-NEXT: ld a0, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 52(s1) +; RV64I-NEXT: ld a0, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 48(s1) +; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 44(s1) +; RV64I-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 40(s1) +; RV64I-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 36(s1) +; RV64I-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 32(s1) +; RV64I-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 28(s1) +; RV64I-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 24(s1) +; RV64I-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(s1) +; RV64I-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(s1) +; RV64I-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var+12)(s0) +; RV64I-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var+8)(s0) +; RV64I-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var+4)(s0) +; RV64I-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var)(s0) +; RV64I-NEXT: ldp ra, s0, 280(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s1, s2, 264(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s3, s4, 248(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s5, s6, 232(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s7, s8, 216(sp) # 16-byte Folded Reload +; RV64I-NEXT: ldp s9, s10, 200(sp) # 16-byte Folded Reload +; RV64I-NEXT: ld s11, 184(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 288 +; RV64I-NEXT: ret +; +; RV64I-WITH-FP-LABEL: caller: +; RV64I-WITH-FP: # %bb.0: +; RV64I-WITH-FP-NEXT: addi sp, sp, -288 +; RV64I-WITH-FP-NEXT: sd ra, 280(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s0, 272(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s1, 264(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s2, 256(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s3, 248(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s4, 240(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s5, 232(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s6, 224(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s7, 216(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s8, 208(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s9, 200(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s10, 192(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: sd s11, 184(sp) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: addi s0, sp, 288 +; RV64I-WITH-FP-NEXT: lui s6, %hi(var) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV64I-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV64I-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV64I-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV64I-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: addi s1, s6, %lo(var) +; RV64I-WITH-FP-NEXT: lw a0, 16(s1) +; RV64I-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 20(s1) +; RV64I-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 24(s1) +; RV64I-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 28(s1) +; RV64I-WITH-FP-NEXT: sd a0, -168(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 32(s1) +; RV64I-WITH-FP-NEXT: sd a0, -176(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 36(s1) +; RV64I-WITH-FP-NEXT: sd a0, -184(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 40(s1) +; RV64I-WITH-FP-NEXT: sd a0, -192(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 44(s1) +; RV64I-WITH-FP-NEXT: sd a0, -200(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 48(s1) +; RV64I-WITH-FP-NEXT: sd a0, -208(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 52(s1) +; RV64I-WITH-FP-NEXT: sd a0, -216(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 56(s1) +; RV64I-WITH-FP-NEXT: sd a0, -224(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 60(s1) +; RV64I-WITH-FP-NEXT: sd a0, -232(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 64(s1) +; RV64I-WITH-FP-NEXT: sd a0, -240(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 68(s1) +; RV64I-WITH-FP-NEXT: sd a0, -248(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 72(s1) +; RV64I-WITH-FP-NEXT: sd a0, -256(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 76(s1) +; RV64I-WITH-FP-NEXT: sd a0, -264(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 80(s1) +; RV64I-WITH-FP-NEXT: sd a0, -272(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 84(s1) +; RV64I-WITH-FP-NEXT: sd a0, -280(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw a0, 88(s1) +; RV64I-WITH-FP-NEXT: sd a0, -288(s0) # 8-byte Folded Spill +; RV64I-WITH-FP-NEXT: lw s8, 92(s1) +; RV64I-WITH-FP-NEXT: lw s9, 96(s1) +; RV64I-WITH-FP-NEXT: lw s10, 100(s1) +; RV64I-WITH-FP-NEXT: lw s11, 104(s1) +; RV64I-WITH-FP-NEXT: lw s2, 108(s1) +; RV64I-WITH-FP-NEXT: lw s3, 112(s1) +; RV64I-WITH-FP-NEXT: lw s4, 116(s1) +; RV64I-WITH-FP-NEXT: lw s5, 120(s1) +; RV64I-WITH-FP-NEXT: lw s7, 124(s1) +; RV64I-WITH-FP-NEXT: call callee@plt +; RV64I-WITH-FP-NEXT: sw s7, 124(s1) +; RV64I-WITH-FP-NEXT: sw s5, 120(s1) +; RV64I-WITH-FP-NEXT: sw s4, 116(s1) +; RV64I-WITH-FP-NEXT: sw s3, 112(s1) +; RV64I-WITH-FP-NEXT: sw s2, 108(s1) +; RV64I-WITH-FP-NEXT: sw s11, 104(s1) +; RV64I-WITH-FP-NEXT: sw s10, 100(s1) +; RV64I-WITH-FP-NEXT: sw s9, 96(s1) +; RV64I-WITH-FP-NEXT: sw s8, 92(s1) +; RV64I-WITH-FP-NEXT: ld a0, -288(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 88(s1) +; RV64I-WITH-FP-NEXT: ld a0, -280(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 84(s1) +; RV64I-WITH-FP-NEXT: ld a0, -272(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 80(s1) +; RV64I-WITH-FP-NEXT: ld a0, -264(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 76(s1) +; RV64I-WITH-FP-NEXT: ld a0, -256(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 72(s1) +; RV64I-WITH-FP-NEXT: ld a0, -248(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 68(s1) +; RV64I-WITH-FP-NEXT: ld a0, -240(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 64(s1) +; RV64I-WITH-FP-NEXT: ld a0, -232(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 60(s1) +; RV64I-WITH-FP-NEXT: ld a0, -224(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 56(s1) +; RV64I-WITH-FP-NEXT: ld a0, -216(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 52(s1) +; RV64I-WITH-FP-NEXT: ld a0, -208(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 48(s1) +; RV64I-WITH-FP-NEXT: ld a0, -200(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 44(s1) +; RV64I-WITH-FP-NEXT: ld a0, -192(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 40(s1) +; RV64I-WITH-FP-NEXT: ld a0, -184(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 36(s1) +; RV64I-WITH-FP-NEXT: ld a0, -176(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 32(s1) +; RV64I-WITH-FP-NEXT: ld a0, -168(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 28(s1) +; RV64I-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 24(s1) +; RV64I-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 20(s1) +; RV64I-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, 16(s1) +; RV64I-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV64I-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV64I-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV64I-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV64I-WITH-FP-NEXT: ld ra, 280(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s0, 272(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s1, 264(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s2, 256(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s3, 248(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s5, 232(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s6, 224(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s7, 216(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s8, 208(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s9, 200(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s10, 192(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: ld s11, 184(sp) # 8-byte Folded Reload +; RV64I-WITH-FP-NEXT: addi sp, sp, 288 +; RV64I-WITH-FP-NEXT: ret + + %val = load [32 x i32], [32 x i32]* @var + call void @callee() + store volatile [32 x i32] %val, [32 x i32]* @var + ret void +} diff --git a/llvm/test/CodeGen/RISCV/large-stack-n3.ll b/llvm/test/CodeGen/RISCV/large-stack-n3.ll new file mode 100644 index 000000000000..f678f4c61193 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/large-stack-n3.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-xin -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-FPELIM %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-xin -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck -check-prefix=RV64I-WITHFP %s + +; TODO: the quality of the generated code is poor + +define void @test() { +; RV64I-FPELIM-LABEL: test: +; RV64I-FPELIM: # %bb.0: +; RV64I-FPELIM-NEXT: lui a0, 74565 +; RV64I-FPELIM-NEXT: addiw a0, a0, 1664 +; RV64I-FPELIM-NEXT: sub sp, sp, a0 +; RV64I-FPELIM-NEXT: .cfi_def_cfa_offset 305419904 +; RV64I-FPELIM-NEXT: lui a0, 74565 +; RV64I-FPELIM-NEXT: addiw a0, a0, 1664 +; RV64I-FPELIM-NEXT: add sp, sp, a0 +; RV64I-FPELIM-NEXT: ret +; +; RV64I-WITHFP-LABEL: test: +; RV64I-WITHFP: # %bb.0: +; RV64I-WITHFP-NEXT: addi sp, sp, -2032 +; RV64I-WITHFP-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-WITHFP-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-WITHFP-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill +; RV64I-WITHFP-NEXT: .cfi_offset ra, -8 +; RV64I-WITHFP-NEXT: .cfi_offset s0, -16 +; RV64I-WITHFP-NEXT: addi s0, sp, 2032 +; RV64I-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; RV64I-WITHFP-NEXT: lui a0, 74565 +; RV64I-WITHFP-NEXT: addiw a0, a0, -352 +; RV64I-WITHFP-NEXT: sub sp, sp, a0 +; RV64I-WITHFP-NEXT: lui a0, 74565 +; RV64I-WITHFP-NEXT: addiw a0, a0, -352 +; RV64I-WITHFP-NEXT: add sp, sp, a0 +; RV64I-WITHFP-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-WITHFP-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload +; RV64I-WITHFP-NEXT: addi sp, sp, 2032 +; RV64I-WITHFP-NEXT: ret + %tmp = alloca [ 305419896 x i8 ] , align 4 + ret void +} + +; This test case artificially produces register pressure which should force +; use of the emergency spill slot. + +define void @test_emergency_spill_slot(i32 %a) { +; RV64I-FPELIM-LABEL: test_emergency_spill_slot: +; RV64I-FPELIM: # %bb.0: +; RV64I-FPELIM-NEXT: addi sp, sp, -496 +; RV64I-FPELIM-NEXT: .cfi_def_cfa_offset 496 +; RV64I-FPELIM-NEXT: sdp s0, s1, 488(sp) # 16-byte Folded Spill +; RV64I-FPELIM-NEXT: lui a1, 78 +; RV64I-FPELIM-NEXT: .cfi_offset s0, -8 +; RV64I-FPELIM-NEXT: .cfi_offset s1, -16 +; RV64I-FPELIM-NEXT: lui a2, 98 +; RV64I-FPELIM-NEXT: addiw a2, a2, -1872 +; RV64I-FPELIM-NEXT: sub sp, sp, a2 +; RV64I-FPELIM-NEXT: .cfi_def_cfa_offset 400032 +; RV64I-FPELIM-NEXT: addiw a1, a1, 512 +; RV64I-FPELIM-NEXT: addi a2, sp, 16 +; RV64I-FPELIM-NEXT: add a1, a2, a1 +; RV64I-FPELIM-NEXT: #APP +; RV64I-FPELIM-NEXT: nop +; RV64I-FPELIM-EMPTY: +; RV64I-FPELIM-NEXT: #NO_APP +; RV64I-FPELIM-NEXT: sw a0, 0(a1) +; RV64I-FPELIM-NEXT: lui a0, 98 +; RV64I-FPELIM-NEXT: addiw a0, a0, -1872 +; RV64I-FPELIM-NEXT: add sp, sp, a0 +; RV64I-FPELIM-NEXT: #APP +; RV64I-FPELIM-NEXT: nop +; RV64I-FPELIM-EMPTY: +; RV64I-FPELIM-NEXT: #NO_APP +; RV64I-FPELIM-NEXT: ldp s0, s1, 488(sp) # 16-byte Folded Reload +; RV64I-FPELIM-NEXT: addi sp, sp, 496 +; RV64I-FPELIM-NEXT: ret +; +; RV64I-WITHFP-LABEL: test_emergency_spill_slot: +; RV64I-WITHFP: # %bb.0: +; RV64I-WITHFP-NEXT: addi sp, sp, -2032 +; RV64I-WITHFP-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-WITHFP-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-WITHFP-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill +; RV64I-WITHFP-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill +; RV64I-WITHFP-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill +; RV64I-WITHFP-NEXT: .cfi_offset ra, -8 +; RV64I-WITHFP-NEXT: .cfi_offset s0, -16 +; RV64I-WITHFP-NEXT: .cfi_offset s1, -24 +; RV64I-WITHFP-NEXT: .cfi_offset s2, -32 +; RV64I-WITHFP-NEXT: addi s0, sp, 2032 +; RV64I-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; RV64I-WITHFP-NEXT: lui a1, 97 +; RV64I-WITHFP-NEXT: addiw a1, a1, 704 +; RV64I-WITHFP-NEXT: sub sp, sp, a1 +; RV64I-WITHFP-NEXT: lui a1, 78 +; RV64I-WITHFP-NEXT: addiw a1, a1, 512 +; RV64I-WITHFP-NEXT: lui a2, 1048478 +; RV64I-WITHFP-NEXT: addiw a2, a2, 1368 +; RV64I-WITHFP-NEXT: add a2, s0, a2 +; RV64I-WITHFP-NEXT: add a1, a2, a1 +; RV64I-WITHFP-NEXT: #APP +; RV64I-WITHFP-NEXT: nop +; RV64I-WITHFP-EMPTY: +; RV64I-WITHFP-NEXT: #NO_APP +; RV64I-WITHFP-NEXT: sw a0, 0(a1) +; RV64I-WITHFP-NEXT: #APP +; RV64I-WITHFP-NEXT: nop +; RV64I-WITHFP-EMPTY: +; RV64I-WITHFP-NEXT: #NO_APP +; RV64I-WITHFP-NEXT: lui a0, 97 +; RV64I-WITHFP-NEXT: addiw a0, a0, 704 +; RV64I-WITHFP-NEXT: add sp, sp, a0 +; RV64I-WITHFP-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-WITHFP-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload +; RV64I-WITHFP-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload +; RV64I-WITHFP-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload +; RV64I-WITHFP-NEXT: addi sp, sp, 2032 +; RV64I-WITHFP-NEXT: ret + %data = alloca [ 100000 x i32 ] , align 4 + %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000 + %1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "nop", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r"() + %asmresult0 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 0 + %asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 1 + %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 2 + %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 3 + %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 4 + %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 5 + %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 6 + %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 7 + %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 8 + %asmresult9 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 9 + %asmresult10 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 10 + %asmresult11 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 11 + %asmresult12 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 12 + %asmresult13 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 13 + %asmresult14 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 14 + store volatile i32 %a, i32* %ptr + tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult0, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14) + ret void +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits