Date: Thursday, December 3, 2020 @ 00:26:14 Author: foutrelis Revision: 769396
archrelease: copy trunk to multilib-x86_64 Added: lib32-llvm/repos/multilib-x86_64/PKGBUILD (from rev 769395, lib32-llvm/trunk/PKGBUILD) lib32-llvm/repos/multilib-x86_64/stack-clash-fixes.patch (from rev 769395, lib32-llvm/trunk/stack-clash-fixes.patch) Deleted: lib32-llvm/repos/multilib-x86_64/PKGBUILD -------------------------+ PKGBUILD | 207 +++++----- stack-clash-fixes.patch | 870 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 976 insertions(+), 101 deletions(-) Deleted: PKGBUILD =================================================================== --- PKGBUILD 2020-12-03 00:26:08 UTC (rev 769395) +++ PKGBUILD 2020-12-03 00:26:14 UTC (rev 769396) @@ -1,101 +0,0 @@ -# Maintainer: Laurent Carlier <lordhea...@gmail.com> -# Contributor: Evangelos Foutras <foutre...@gmail.com> -# Contributor: Jan "heftig" Steffens <jan.steff...@gmail.com> - -pkgname=('lib32-llvm' 'lib32-llvm-libs') -pkgver=11.0.0 -pkgrel=1 -arch=('x86_64') -url="https://llvm.org/" -license=('custom:Apache 2.0 with LLVM Exception') -makedepends=('cmake' 'ninja' 'lib32-libffi' 'lib32-zlib' 'python' 'gcc-multilib' - 'lib32-libxml2') -options=('staticlibs') -_source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver -source=($_source_base/llvm-$pkgver.src.tar.xz{,.sig}) -sha256sums=('913f68c898dfb4a03b397c5e11c6a2f39d0f22ed7665c9cefa87a34423a72469' - 'SKIP') -validpgpkeys+=('B6C8F98282B944E3B0D5C2530FC3042E345AD05D') # Hans Wennborg <h...@chromium.org> -validpgpkeys+=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstel...@redhat.com> - -prepare() { - cd "$srcdir/llvm-$pkgver.src" - mkdir build -} - -build() { - cd "$srcdir/llvm-$pkgver.src/build" - - export PKG_CONFIG_PATH="/usr/lib32/pkgconfig" - - cmake .. -G Ninja \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=/usr \ - -DLLVM_LIBDIR_SUFFIX=32 \ - -DCMAKE_C_FLAGS:STRING=-m32 \ - -DCMAKE_CXX_FLAGS:STRING=-m32 \ - -DLLVM_TARGET_ARCH:STRING=i686 \ - -DLLVM_HOST_TRIPLE=$CHOST \ - -DLLVM_DEFAULT_TARGETS_TRIPLE="i686-pc-linux-gnu" \ - -DLLVM_BUILD_LLVM_DYLIB=ON \ - -DLLVM_LINK_LLVM_DYLIB=ON \ - -DLLVM_ENABLE_RTTI=ON \ - -DLLVM_ENABLE_FFI=ON \ - -DLLVM_BUILD_DOCS=OFF \ - -DLLVM_ENABLE_SPHINX=OFF \ - -DLLVM_ENABLE_DOXYGEN=OFF \ - -DLLVM_BINUTILS_INCDIR=/usr/include - - ninja all -} - -package_lib32-llvm() { - pkgdesc="Collection of modular and reusable compiler and toolchain technologies (32-bit)" - depends=('lib32-llvm-libs' 'llvm') - - cd "$srcdir/llvm-$pkgver.src/build" - - DESTDIR="$pkgdir" ninja install - - # The runtime library goes into lib32-llvm-libs - mv "$pkgdir"/usr/lib32/lib{LLVM,LTO,Remarks}*.so* "$srcdir" - mv -f "$pkgdir"/usr/lib32/LLVMgold.so "$srcdir" - - # Fix permissions of static libs - chmod -x "$pkgdir"/usr/lib32/*.a - - mv "$pkgdir/usr/bin/llvm-config" "$pkgdir/usr/lib32/llvm-config" - mv "$pkgdir/usr/include/llvm/Config/llvm-config.h" \ - "$pkgdir/usr/lib32/llvm-config-32.h" - - rm -rf "$pkgdir"/usr/{bin,include,share/{doc,man,llvm,opt-viewer}} - - # Needed for multilib (https://bugs.archlinux.org/task/29951) - # Header stub is taken from Fedora - install -d "$pkgdir/usr/include/llvm/Config" - mv "$pkgdir/usr/lib32/llvm-config-32.h" "$pkgdir/usr/include/llvm/Config/" - - mkdir "$pkgdir"/usr/bin - mv "$pkgdir/usr/lib32/llvm-config" "$pkgdir/usr/bin/llvm-config32" - - install -Dm644 ../LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE" -} - -package_lib32-llvm-libs() { - pkgdesc="Low Level Virtual Machine (runtime library)(32-bit) " - depends=('lib32-libffi' 'lib32-zlib' 'lib32-ncurses' 'lib32-libxml2' 'lib32-gcc-libs') - - install -d "$pkgdir/usr/lib32" - - cp -P \ - "$srcdir"/lib{LLVM,LTO,Remarks}*.so* \ - "$srcdir"/LLVMgold.so \ - "$pkgdir/usr/lib32/" - - # Symlink LLVMgold.so from /usr/lib/bfd-plugins - # https://bugs.archlinux.org/task/28479 - install -d "$pkgdir/usr/lib32/bfd-plugins" - ln -s ../LLVMgold.so "$pkgdir/usr/lib32/bfd-plugins/LLVMgold.so" - - install -Dm644 llvm-$pkgver.src/LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE" -} Copied: lib32-llvm/repos/multilib-x86_64/PKGBUILD (from rev 769395, lib32-llvm/trunk/PKGBUILD) =================================================================== --- PKGBUILD (rev 0) +++ PKGBUILD 2020-12-03 00:26:14 UTC (rev 769396) @@ -0,0 +1,106 @@ +# Maintainer: Laurent Carlier <lordhea...@gmail.com> +# Contributor: Evangelos Foutras <foutre...@gmail.com> +# Contributor: Jan "heftig" Steffens <jan.steff...@gmail.com> + +pkgname=('lib32-llvm' 'lib32-llvm-libs') +pkgver=11.0.0 +pkgrel=2 +arch=('x86_64') +url="https://llvm.org/" +license=('custom:Apache 2.0 with LLVM Exception') +makedepends=('cmake' 'ninja' 'lib32-libffi' 'lib32-zlib' 'python' 'gcc-multilib' + 'lib32-libxml2') +options=('staticlibs') +_source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver +source=($_source_base/llvm-$pkgver.src.tar.xz{,.sig} + stack-clash-fixes.patch) +sha256sums=('913f68c898dfb4a03b397c5e11c6a2f39d0f22ed7665c9cefa87a34423a72469' + 'SKIP' + 'bdcaa7559223bd42a381086f7cc23fc73f88ebb1966a7c235f897db0f73b7d20') +validpgpkeys+=('B6C8F98282B944E3B0D5C2530FC3042E345AD05D') # Hans Wennborg <h...@chromium.org> +validpgpkeys+=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstel...@redhat.com> + +prepare() { + cd "$srcdir/llvm-$pkgver.src" + mkdir build + + # https://bugs.llvm.org/show_bug.cgi?id=48007 + patch -Np2 -i ../stack-clash-fixes.patch +} + +build() { + cd "$srcdir/llvm-$pkgver.src/build" + + export PKG_CONFIG_PATH="/usr/lib32/pkgconfig" + + cmake .. -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DLLVM_LIBDIR_SUFFIX=32 \ + -DCMAKE_C_FLAGS:STRING=-m32 \ + -DCMAKE_CXX_FLAGS:STRING=-m32 \ + -DLLVM_TARGET_ARCH:STRING=i686 \ + -DLLVM_HOST_TRIPLE=$CHOST \ + -DLLVM_DEFAULT_TARGETS_TRIPLE="i686-pc-linux-gnu" \ + -DLLVM_BUILD_LLVM_DYLIB=ON \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DLLVM_ENABLE_RTTI=ON \ + -DLLVM_ENABLE_FFI=ON \ + -DLLVM_BUILD_DOCS=OFF \ + -DLLVM_ENABLE_SPHINX=OFF \ + -DLLVM_ENABLE_DOXYGEN=OFF \ + -DLLVM_BINUTILS_INCDIR=/usr/include + + ninja all +} + +package_lib32-llvm() { + pkgdesc="Collection of modular and reusable compiler and toolchain technologies (32-bit)" + depends=('lib32-llvm-libs' 'llvm') + + cd "$srcdir/llvm-$pkgver.src/build" + + DESTDIR="$pkgdir" ninja install + + # The runtime library goes into lib32-llvm-libs + mv "$pkgdir"/usr/lib32/lib{LLVM,LTO,Remarks}*.so* "$srcdir" + mv -f "$pkgdir"/usr/lib32/LLVMgold.so "$srcdir" + + # Fix permissions of static libs + chmod -x "$pkgdir"/usr/lib32/*.a + + mv "$pkgdir/usr/bin/llvm-config" "$pkgdir/usr/lib32/llvm-config" + mv "$pkgdir/usr/include/llvm/Config/llvm-config.h" \ + "$pkgdir/usr/lib32/llvm-config-32.h" + + rm -rf "$pkgdir"/usr/{bin,include,share/{doc,man,llvm,opt-viewer}} + + # Needed for multilib (https://bugs.archlinux.org/task/29951) + # Header stub is taken from Fedora + install -d "$pkgdir/usr/include/llvm/Config" + mv "$pkgdir/usr/lib32/llvm-config-32.h" "$pkgdir/usr/include/llvm/Config/" + + mkdir "$pkgdir"/usr/bin + mv "$pkgdir/usr/lib32/llvm-config" "$pkgdir/usr/bin/llvm-config32" + + install -Dm644 ../LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE" +} + +package_lib32-llvm-libs() { + pkgdesc="Low Level Virtual Machine (runtime library)(32-bit) " + depends=('lib32-libffi' 'lib32-zlib' 'lib32-ncurses' 'lib32-libxml2' 'lib32-gcc-libs') + + install -d "$pkgdir/usr/lib32" + + cp -P \ + "$srcdir"/lib{LLVM,LTO,Remarks}*.so* \ + "$srcdir"/LLVMgold.so \ + "$pkgdir/usr/lib32/" + + # Symlink LLVMgold.so from /usr/lib/bfd-plugins + # https://bugs.archlinux.org/task/28479 + install -d "$pkgdir/usr/lib32/bfd-plugins" + ln -s ../LLVMgold.so "$pkgdir/usr/lib32/bfd-plugins/LLVMgold.so" + + install -Dm644 llvm-$pkgver.src/LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE" +} Copied: lib32-llvm/repos/multilib-x86_64/stack-clash-fixes.patch (from rev 769395, lib32-llvm/trunk/stack-clash-fixes.patch) =================================================================== --- stack-clash-fixes.patch (rev 0) +++ stack-clash-fixes.patch 2020-12-03 00:26:14 UTC (rev 769396) @@ -0,0 +1,870 @@ +From a1e0363c7402f7aa58e24e0e6dfa447ebabc1910 Mon Sep 17 00:00:00 2001 +From: serge-sans-paille <sguel...@redhat.com> +Date: Wed, 30 Sep 2020 11:35:00 +0200 +Subject: [PATCH 1/3] Fix limit behavior of dynamic alloca + +When the allocation size is 0, we shouldn't probe. Within [1, PAGE_SIZE], we +should probe once etc. + +This fixes https://bugs.llvm.org/show_bug.cgi?id=47657 + +Differential Revision: https://reviews.llvm.org/D88548 + +(cherry picked from commit 9573c9f2a363da71b2c07a3add4e52721e6028a0) +--- + llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- + llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 8 ++++---- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp +index fd1e6517dfac..f68ae4461fe3 100644 +--- a/llvm/lib/Target/X86/X86ISelLowering.cpp ++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp +@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, + + BuildMI(testMBB, DL, TII->get(X86::JCC_1)) + .addMBB(tailMBB) +- .addImm(X86::COND_L); ++ .addImm(X86::COND_LE); + testMBB->addSuccessor(blockMBB); + testMBB->addSuccessor(tailMBB); + +diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +index bc4678564083..82fd67842c8a 100644 +--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll ++++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"} + ; CHECK-X86-64-NEXT: andq $-16, %rcx + ; CHECK-X86-64-NEXT: subq %rcx, %rax + ; CHECK-X86-64-NEXT: cmpq %rsp, %rax +-; CHECK-X86-64-NEXT: jl .LBB0_3 ++; CHECK-X86-64-NEXT: jle .LBB0_3 + ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 + ; CHECK-X86-64-NEXT: movq $0, (%rsp) + ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 + ; CHECK-X86-64-NEXT: cmpq %rsp, %rax +-; CHECK-X86-64-NEXT: jge .LBB0_2 ++; CHECK-X86-64-NEXT: jg .LBB0_2 + ; CHECK-X86-64-NEXT: .LBB0_3: + ; CHECK-X86-64-NEXT: movq %rax, %rsp + ; CHECK-X86-64-NEXT: movl $1, 4792(%rax) +@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"} + ; CHECK-X86-32-NEXT: andl $-16, %ecx + ; CHECK-X86-32-NEXT: subl %ecx, %eax + ; CHECK-X86-32-NEXT: cmpl %esp, %eax +-; CHECK-X86-32-NEXT: jl .LBB0_3 ++; CHECK-X86-32-NEXT: jle .LBB0_3 + ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 + ; CHECK-X86-32-NEXT: movl $0, (%esp) + ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 + ; CHECK-X86-32-NEXT: cmpl %esp, %eax +-; CHECK-X86-32-NEXT: jge .LBB0_2 ++; CHECK-X86-32-NEXT: jg .LBB0_2 + ; CHECK-X86-32-NEXT: .LBB0_3: + ; CHECK-X86-32-NEXT: movl %eax, %esp + ; CHECK-X86-32-NEXT: movl $1, 4792(%eax) + +From aac36687f7978f33751daf2870b5c812124ebfaf Mon Sep 17 00:00:00 2001 +From: serge-sans-paille <sguel...@redhat.com> +Date: Thu, 23 Jul 2020 16:22:48 +0200 +Subject: [PATCH 2/3] Fix interaction between stack alignment and inline-asm + stack clash protection + +As reported in https://github.com/rust-lang/rust/issues/70143 alignment is not +taken into account when doing the probing. Fix that by adjusting the first probe +if the stack align is small, or by extending the dynamic probing if the +alignment is large. + +Differential Revision: https://reviews.llvm.org/D84419 + +(cherry picked from commit f2c6bfa350de142e4d63808d03335f69bd136d6a) +--- + llvm/lib/Target/X86/X86FrameLowering.cpp | 222 ++++++++++++++++-- + llvm/lib/Target/X86/X86FrameLowering.h | 8 +- + .../X86/stack-clash-large-large-align.ll | 88 +++++++ + .../CodeGen/X86/stack-clash-no-free-probe.ll | 27 --- + .../stack-clash-small-alloc-medium-align.ll | 135 +++++++++++ + .../X86/stack-clash-small-large-align.ll | 83 +++++++ + 6 files changed, 512 insertions(+), 51 deletions(-) + create mode 100644 llvm/test/CodeGen/X86/stack-clash-large-large-align.ll + delete mode 100644 llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll + create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll + create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-large-align.ll + +diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp +index c7ca6fb2a4fc..db6b68659493 100644 +--- a/llvm/lib/Target/X86/X86FrameLowering.cpp ++++ b/llvm/lib/Target/X86/X86FrameLowering.cpp +@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric( + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + uint64_t ProbeChunk = StackProbeSize * 8; + ++ uint64_t MaxAlign = ++ TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; ++ + // Synthesize a loop or unroll it, depending on the number of iterations. ++ // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left ++ // between the unaligned rsp and current rsp. + if (Offset > ProbeChunk) { +- emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset); ++ emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, ++ MaxAlign % StackProbeSize); + } else { +- emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset); ++ emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, ++ MaxAlign % StackProbeSize); + } + } + + void X86FrameLowering::emitStackProbeInlineGenericBlock( + MachineFunction &MF, MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, const DebugLoc &DL, +- uint64_t Offset) const { ++ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, ++ uint64_t AlignOffset) const { + + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); + const X86TargetLowering &TLI = *STI.getTargetLowering(); + const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); ++ + uint64_t CurrentOffset = 0; +- // 0 Thanks to return address being saved on the stack +- uint64_t CurrentProbeOffset = 0; + +- // For the first N - 1 pages, just probe. I tried to take advantage of ++ assert(AlignOffset < StackProbeSize); ++ ++ // If the offset is so small it fits within a page, there's nothing to do. ++ if (StackProbeSize < Offset + AlignOffset) { ++ ++ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) ++ .addReg(StackPtr) ++ .addImm(StackProbeSize - AlignOffset) ++ .setMIFlag(MachineInstr::FrameSetup); ++ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. ++ ++ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) ++ .setMIFlag(MachineInstr::FrameSetup), ++ StackPtr, false, 0) ++ .addImm(0) ++ .setMIFlag(MachineInstr::FrameSetup); ++ NumFrameExtraProbe++; ++ CurrentOffset = StackProbeSize - AlignOffset; ++ } ++ ++ // For the next N - 1 pages, just probe. I tried to take advantage of + // natural probes but it implies much more logic and there was very few + // interesting natural probes to interleave. + while (CurrentOffset + StackProbeSize < Offset) { +@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( + .setMIFlag(MachineInstr::FrameSetup); + NumFrameExtraProbe++; + CurrentOffset += StackProbeSize; +- CurrentProbeOffset += StackProbeSize; + } + ++ // No need to probe the tail, it is smaller than a Page. + uint64_t ChunkSize = Offset - CurrentOffset; + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) +@@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( + + void X86FrameLowering::emitStackProbeInlineGenericLoop( + MachineFunction &MF, MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, const DebugLoc &DL, +- uint64_t Offset) const { ++ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, ++ uint64_t AlignOffset) const { + assert(Offset && "null offset"); + + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); +@@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + ++ if (AlignOffset) { ++ if (AlignOffset < StackProbeSize) { ++ // Perform a first smaller allocation followed by a probe. ++ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset); ++ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) ++ .addReg(StackPtr) ++ .addImm(AlignOffset) ++ .setMIFlag(MachineInstr::FrameSetup); ++ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. ++ ++ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) ++ .setMIFlag(MachineInstr::FrameSetup), ++ StackPtr, false, 0) ++ .addImm(0) ++ .setMIFlag(MachineInstr::FrameSetup); ++ NumFrameExtraProbe++; ++ Offset -= AlignOffset; ++ } ++ } ++ + // Synthesize a loop + NumFrameLoopProbe++; + const BasicBlock *LLVM_BB = MBB.getBasicBlock(); +@@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( + + // save loop bound + { +- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); +- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed) ++ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); ++ BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) + .addReg(FinalStackProbed) + .addImm(Offset / StackProbeSize * StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); +@@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( + + // allocate a page + { +- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); +- BuildMI(testMBB, DL, TII.get(Opc), StackPtr) ++ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); ++ BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); +@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, + uint64_t MaxAlign) const { + uint64_t Val = -MaxAlign; + unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); +- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) +- .addReg(Reg) +- .addImm(Val) +- .setMIFlag(MachineInstr::FrameSetup); + +- // The EFLAGS implicit def is dead. +- MI->getOperand(3).setIsDead(); ++ MachineFunction &MF = *MBB.getParent(); ++ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); ++ const X86TargetLowering &TLI = *STI.getTargetLowering(); ++ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); ++ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); ++ ++ // We want to make sure that (in worst case) less than StackProbeSize bytes ++ // are not probed after the AND. This assumption is used in ++ // emitStackProbeInlineGeneric. ++ if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { ++ { ++ NumFrameLoopProbe++; ++ MachineBasicBlock *entryMBB = ++ MF.CreateMachineBasicBlock(MBB.getBasicBlock()); ++ MachineBasicBlock *headMBB = ++ MF.CreateMachineBasicBlock(MBB.getBasicBlock()); ++ MachineBasicBlock *bodyMBB = ++ MF.CreateMachineBasicBlock(MBB.getBasicBlock()); ++ MachineBasicBlock *footMBB = ++ MF.CreateMachineBasicBlock(MBB.getBasicBlock()); ++ ++ MachineFunction::iterator MBBIter = MBB.getIterator(); ++ MF.insert(MBBIter, entryMBB); ++ MF.insert(MBBIter, headMBB); ++ MF.insert(MBBIter, bodyMBB); ++ MF.insert(MBBIter, footMBB); ++ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; ++ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; ++ ++ // Setup entry block ++ { ++ ++ entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI); ++ BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) ++ .addReg(StackPtr) ++ .setMIFlag(MachineInstr::FrameSetup); ++ MachineInstr *MI = ++ BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed) ++ .addReg(FinalStackProbed) ++ .addImm(Val) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ // The EFLAGS implicit def is dead. ++ MI->getOperand(3).setIsDead(); ++ ++ BuildMI(entryMBB, DL, ++ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) ++ .addReg(FinalStackProbed) ++ .addReg(StackPtr) ++ .setMIFlag(MachineInstr::FrameSetup); ++ BuildMI(entryMBB, DL, TII.get(X86::JCC_1)) ++ .addMBB(&MBB) ++ .addImm(X86::COND_E) ++ .setMIFlag(MachineInstr::FrameSetup); ++ entryMBB->addSuccessor(headMBB); ++ entryMBB->addSuccessor(&MBB); ++ } ++ ++ // Loop entry block ++ ++ { ++ const unsigned SUBOpc = ++ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); ++ BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) ++ .addReg(StackPtr) ++ .addImm(StackProbeSize) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ BuildMI(headMBB, DL, ++ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) ++ .addReg(FinalStackProbed) ++ .addReg(StackPtr) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ // jump ++ BuildMI(headMBB, DL, TII.get(X86::JCC_1)) ++ .addMBB(footMBB) ++ .addImm(X86::COND_B) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ headMBB->addSuccessor(bodyMBB); ++ headMBB->addSuccessor(footMBB); ++ } ++ ++ // setup loop body ++ { ++ addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc)) ++ .setMIFlag(MachineInstr::FrameSetup), ++ StackPtr, false, 0) ++ .addImm(0) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ const unsigned SUBOpc = ++ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); ++ BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) ++ .addReg(StackPtr) ++ .addImm(StackProbeSize) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ // cmp with stack pointer bound ++ BuildMI(bodyMBB, DL, ++ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) ++ .addReg(FinalStackProbed) ++ .addReg(StackPtr) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ // jump ++ BuildMI(bodyMBB, DL, TII.get(X86::JCC_1)) ++ .addMBB(bodyMBB) ++ .addImm(X86::COND_B) ++ .setMIFlag(MachineInstr::FrameSetup); ++ bodyMBB->addSuccessor(bodyMBB); ++ bodyMBB->addSuccessor(footMBB); ++ } ++ ++ // setup loop footer ++ { ++ BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr) ++ .addReg(FinalStackProbed) ++ .setMIFlag(MachineInstr::FrameSetup); ++ addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc)) ++ .setMIFlag(MachineInstr::FrameSetup), ++ StackPtr, false, 0) ++ .addImm(0) ++ .setMIFlag(MachineInstr::FrameSetup); ++ footMBB->addSuccessor(&MBB); ++ } ++ ++ recomputeLiveIns(*headMBB); ++ recomputeLiveIns(*bodyMBB); ++ recomputeLiveIns(*footMBB); ++ recomputeLiveIns(MBB); ++ } ++ } else { ++ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) ++ .addReg(Reg) ++ .addImm(Val) ++ .setMIFlag(MachineInstr::FrameSetup); ++ ++ // The EFLAGS implicit def is dead. ++ MI->getOperand(3).setIsDead(); ++ } + } + + bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { +diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h +index c0b4be95f88d..bb2e83205e71 100644 +--- a/llvm/lib/Target/X86/X86FrameLowering.h ++++ b/llvm/lib/Target/X86/X86FrameLowering.h +@@ -213,14 +213,14 @@ private: + void emitStackProbeInlineGenericBlock(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, +- const DebugLoc &DL, +- uint64_t Offset) const; ++ const DebugLoc &DL, uint64_t Offset, ++ uint64_t Align) const; + + void emitStackProbeInlineGenericLoop(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, +- const DebugLoc &DL, +- uint64_t Offset) const; ++ const DebugLoc &DL, uint64_t Offset, ++ uint64_t Align) const; + + /// Emit a stub to later inline the target stack probe. + MachineInstr *emitStackProbeInlineStub(MachineFunction &MF, +diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll +new file mode 100644 +index 000000000000..6c981cb4ac91 +--- /dev/null ++++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll +@@ -0,0 +1,88 @@ ++; RUN: llc < %s | FileCheck %s ++ ++ ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-unknown-linux-gnu" ++ ++define i32 @foo_noprotect() local_unnamed_addr { ++; CHECK-LABEL: foo_noprotect: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: andq $-4096, %rsp # imm = 0xF000 ++; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000 ++; CHECK-NEXT: movl $1, 392(%rsp) ++; CHECK-NEXT: movl $1, 28792(%rsp) ++; CHECK-NEXT: movl (%rsp), %eax ++; CHECK-NEXT: movq %rbp, %rsp ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ ++ %a = alloca i32, i64 18000, align 4096 ++ %b0 = getelementptr inbounds i32, i32* %a, i64 98 ++ %b1 = getelementptr inbounds i32, i32* %a, i64 7198 ++ store volatile i32 1, i32* %b0 ++ store volatile i32 1, i32* %b1 ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++define i32 @foo_protect() local_unnamed_addr #0 { ++; CHECK-LABEL: foo_protect: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: movq %rsp, %r11 ++; CHECK-NEXT: andq $-4096, %r11 # imm = 0xF000 ++; CHECK-NEXT: cmpq %rsp, %r11 ++; CHECK-NEXT: je .LBB1_4 ++; CHECK-NEXT:# %bb.1: ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: cmpq %rsp, %r11 ++; CHECK-NEXT: jb .LBB1_3 ++; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: cmpq %rsp, %r11 ++; CHECK-NEXT: jb .LBB1_2 ++; CHECK-NEXT:.LBB1_3: ++; CHECK-NEXT: movq %r11, %rsp ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT:.LBB1_4: ++; CHECK-NEXT: movq %rsp, %r11 ++; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000 ++; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: cmpq %r11, %rsp ++; CHECK-NEXT: jne .LBB1_5 ++; CHECK-NEXT:# %bb.6: ++; CHECK-NEXT: movl $1, 392(%rsp) ++; CHECK-NEXT: movl $1, 28792(%rsp) ++; CHECK-NEXT: movl (%rsp), %eax ++; CHECK-NEXT: movq %rbp, %rsp ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ ++ ++ ++ %a = alloca i32, i64 18000, align 4096 ++ %b0 = getelementptr inbounds i32, i32* %a, i64 98 ++ %b1 = getelementptr inbounds i32, i32* %a, i64 7198 ++ store volatile i32 1, i32* %b0 ++ store volatile i32 1, i32* %b1 ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++attributes #0 = {"probe-stack"="inline-asm"} +diff --git a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll b/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll +deleted file mode 100644 +index 652acbdf00ba..000000000000 +--- a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll ++++ /dev/null +@@ -1,27 +0,0 @@ +-; RUN: llc < %s | FileCheck %s +- +-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +-target triple = "x86_64-unknown-linux-gnu" +- +-define i32 @foo(i64 %i) local_unnamed_addr #0 { +-; CHECK-LABEL: foo: +-; CHECK: # %bb.0: +-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +-; CHECK-NEXT: movq $0, (%rsp) +-; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 +-; CHECK-NEXT: .cfi_def_cfa_offset 7888 +-; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4) +-; CHECK-NEXT: movl -128(%rsp), %eax +-; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8 +-; CHECK-NEXT: .cfi_def_cfa_offset 8 +-; CHECK-NEXT: retq +- +- %a = alloca i32, i32 2000, align 16 +- %b = getelementptr inbounds i32, i32* %a, i64 %i +- store volatile i32 1, i32* %b +- %c = load volatile i32, i32* %a +- ret i32 %c +-} +- +-attributes #0 = {"probe-stack"="inline-asm"} +- +diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +new file mode 100644 +index 000000000000..eafa86f1eba9 +--- /dev/null ++++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +@@ -0,0 +1,135 @@ ++; RUN: llc < %s | FileCheck %s ++ ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-unknown-linux-gnu" ++ ++; | case1 | alloca + align < probe_size ++define i32 @foo1(i64 %i) local_unnamed_addr #0 { ++; CHECK-LABEL: foo1: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: andq $-64, %rsp ++; CHECK-NEXT: subq $832, %rsp # imm = 0x340 ++; CHECK-NEXT: movl $1, (%rsp,%rdi,4) ++; CHECK-NEXT: movl (%rsp), %eax ++; CHECK-NEXT: movq %rbp, %rsp ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ %a = alloca i32, i32 200, align 64 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++; | case2 | alloca > probe_size, align > probe_size ++define i32 @foo2(i64 %i) local_unnamed_addr #0 { ++; CHECK-LABEL: foo2: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800 ++; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 ++; CHECK-NEXT: movl $1, (%rsp,%rdi,4) ++; CHECK-NEXT: movl (%rsp), %eax ++; CHECK-NEXT: movq %rbp, %rsp ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ %a = alloca i32, i32 2000, align 2048 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++; | case3 | alloca < probe_size, align < probe_size, alloca + align > probe_size ++define i32 @foo3(i64 %i) local_unnamed_addr #0 { ++; CHECK-LABEL: foo3: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00 ++; CHECK-NEXT: subq $3072, %rsp # imm = 0xC00 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: subq $1024, %rsp # imm = 0x400 ++; CHECK-NEXT: movl $1, (%rsp,%rdi,4) ++; CHECK-NEXT: movl (%rsp), %eax ++; CHECK-NEXT: movq %rbp, %rsp ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ ++ %a = alloca i32, i32 1000, align 1024 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++; | case4 | alloca + probe_size < probe_size, followed by dynamic alloca ++define i32 @foo4(i64 %i) local_unnamed_addr #0 { ++; CHECK-LABEL: foo4: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: pushq %rbx ++; CHECK-NEXT: andq $-64, %rsp ++; CHECK-NEXT: subq $896, %rsp # imm = 0x380 ++; CHECK-NEXT: movq %rsp, %rbx ++; CHECK-NEXT: .cfi_offset %rbx, -24 ++; CHECK-NEXT: movl $1, (%rbx,%rdi,4) ++; CHECK-NEXT: movl (%rbx), %ecx ++; CHECK-NEXT: movq %rsp, %rax ++; CHECK-NEXT: leaq 15(,%rcx,4), %rcx ++; CHECK-NEXT: andq $-16, %rcx ++; CHECK-NEXT: subq %rcx, %rax ++; CHECK-NEXT: cmpq %rsp, %rax ++; CHECK-NEXT: jle .LBB3_3 ++; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: cmpq %rsp, %rax ++; CHECK-NEXT: jg .LBB3_2 ++; CHECK-NEXT:.LBB3_3: ++; CHECK-NEXT: andq $-64, %rax ++; CHECK-NEXT: movq %rax, %rsp ++; CHECK-NEXT: movl (%rax), %eax ++; CHECK-NEXT: leaq -8(%rbp), %rsp ++; CHECK-NEXT: popq %rbx ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ %a = alloca i32, i32 200, align 64 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ %d = alloca i32, i32 %c, align 64 ++ %e = load volatile i32, i32* %d ++ ret i32 %e ++} ++ ++attributes #0 = {"probe-stack"="inline-asm"} ++ +diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll +new file mode 100644 +index 000000000000..e608bab90415 +--- /dev/null ++++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll +@@ -0,0 +1,83 @@ ++; RUN: llc < %s | FileCheck %s ++ ++ ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-unknown-linux-gnu" ++ ++define i32 @foo_noprotect() local_unnamed_addr { ++; CHECK-LABEL: foo_noprotect: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: andq $-65536, %rsp ++; CHECK-NEXT: subq $65536, %rsp ++; CHECK-NEXT: movl $1, 392(%rsp) ++; CHECK-NEXT: movl (%rsp), %eax ++; CHECK-NEXT: movq %rbp, %rsp ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ ++ ++ %a = alloca i32, i64 100, align 65536 ++ %b = getelementptr inbounds i32, i32* %a, i64 98 ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++define i32 @foo_protect() local_unnamed_addr #0 { ++; CHECK-LABEL: foo_protect: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pushq %rbp ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: .cfi_offset %rbp, -16 ++; CHECK-NEXT: movq %rsp, %rbp ++; CHECK-NEXT: .cfi_def_cfa_register %rbp ++; CHECK-NEXT: movq %rsp, %r11 ++; CHECK-NEXT: andq $-65536, %r11 # imm = 0xFFFF0000 ++; CHECK-NEXT: cmpq %rsp, %r11 ++; CHECK-NEXT: je .LBB1_4 ++; CHECK-NEXT:# %bb.1: ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: cmpq %rsp, %r11 ++; CHECK-NEXT: jb .LBB1_3 ++; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: cmpq %rsp, %r11 ++; CHECK-NEXT: jb .LBB1_2 ++; CHECK-NEXT:.LBB1_3: ++; CHECK-NEXT: movq %r11, %rsp ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT:.LBB1_4: ++; CHECK-NEXT: movq %rsp, %r11 ++; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000 ++; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ++; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: cmpq %r11, %rsp ++; CHECK-NEXT: jne .LBB1_5 ++; CHECK-NEXT:# %bb.6: ++; CHECK-NEXT: movl $1, 392(%rsp) ++; CHECK-NEXT: movl (%rsp), %eax ++; CHECK-NEXT: movq %rbp, %rsp ++; CHECK-NEXT: popq %rbp ++; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ++; CHECK-NEXT: retq ++ ++ ++ ++ ++ %a = alloca i32, i64 100, align 65536 ++ %b = getelementptr inbounds i32, i32* %a, i64 98 ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++attributes #0 = {"probe-stack"="inline-asm"} + +From bbe6cbbed8c7460a7e8477373b9250543362e771 Mon Sep 17 00:00:00 2001 +From: serge-sans-paille <sguel...@redhat.com> +Date: Tue, 27 Oct 2020 10:59:42 +0100 +Subject: [PATCH 3/3] [stack-clash] Fix probing of dynamic alloca + +- Perform the probing in the correct direction. + Related to https://github.com/rust-lang/rust/pull/77885#issuecomment-711062924 + +- The first touch on a dynamic alloca cannot use a mov because it clobbers + existing space. Use a xor 0 instead + +Differential Revision: https://reviews.llvm.org/D90216 + +(cherry picked from commit 0f60bcc36c34522618bd1425a45f8c6006568fb6) +--- + llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++---- + llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 12 ++++++------ + .../X86/stack-clash-small-alloc-medium-align.ll | 6 +++--- + 3 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp +index f68ae4461fe3..afe470cc6e0b 100644 +--- a/llvm/lib/Target/X86/X86ISelLowering.cpp ++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp +@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, + + BuildMI(testMBB, DL, TII->get(X86::JCC_1)) + .addMBB(tailMBB) +- .addImm(X86::COND_LE); ++ .addImm(X86::COND_GE); + testMBB->addSuccessor(blockMBB); + testMBB->addSuccessor(tailMBB); + +@@ -31892,9 +31892,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, + // + // The property we want to enforce is to never have more than [page alloc] between two probes. + +- const unsigned MovMIOpc = +- TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi; +- addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0) ++ const unsigned XORMIOpc = ++ TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8; ++ addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0) + .addImm(0); + + BuildMI(blockMBB, DL, +diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +index 82fd67842c8a..6dd8b6ab5897 100644 +--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll ++++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"} + ; CHECK-X86-64-NEXT: andq $-16, %rcx + ; CHECK-X86-64-NEXT: subq %rcx, %rax + ; CHECK-X86-64-NEXT: cmpq %rsp, %rax +-; CHECK-X86-64-NEXT: jle .LBB0_3 ++; CHECK-X86-64-NEXT: jge .LBB0_3 + ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +-; CHECK-X86-64-NEXT: movq $0, (%rsp) ++; CHECK-X86-64-NEXT: xorq $0, (%rsp) + ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 + ; CHECK-X86-64-NEXT: cmpq %rsp, %rax +-; CHECK-X86-64-NEXT: jg .LBB0_2 ++; CHECK-X86-64-NEXT: jl .LBB0_2 + ; CHECK-X86-64-NEXT: .LBB0_3: + ; CHECK-X86-64-NEXT: movq %rax, %rsp + ; CHECK-X86-64-NEXT: movl $1, 4792(%rax) +@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"} + ; CHECK-X86-32-NEXT: andl $-16, %ecx + ; CHECK-X86-32-NEXT: subl %ecx, %eax + ; CHECK-X86-32-NEXT: cmpl %esp, %eax +-; CHECK-X86-32-NEXT: jle .LBB0_3 ++; CHECK-X86-32-NEXT: jge .LBB0_3 + ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +-; CHECK-X86-32-NEXT: movl $0, (%esp) ++; CHECK-X86-32-NEXT: xorl $0, (%esp) + ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 + ; CHECK-X86-32-NEXT: cmpl %esp, %eax +-; CHECK-X86-32-NEXT: jg .LBB0_2 ++; CHECK-X86-32-NEXT: jl .LBB0_2 + ; CHECK-X86-32-NEXT: .LBB0_3: + ; CHECK-X86-32-NEXT: movl %eax, %esp + ; CHECK-X86-32-NEXT: movl $1, 4792(%eax) +diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +index eafa86f1eba9..39b6c3640a60 100644 +--- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll ++++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +@@ -106,12 +106,12 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 { + ; CHECK-NEXT: andq $-16, %rcx + ; CHECK-NEXT: subq %rcx, %rax + ; CHECK-NEXT: cmpq %rsp, %rax +-; CHECK-NEXT: jle .LBB3_3 ++; CHECK-NEXT: jge .LBB3_3 + ; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1 +-; CHECK-NEXT: movq $0, (%rsp) ++; CHECK-NEXT: xorq $0, (%rsp) + ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 + ; CHECK-NEXT: cmpq %rsp, %rax +-; CHECK-NEXT: jg .LBB3_2 ++; CHECK-NEXT: jl .LBB3_2 + ; CHECK-NEXT:.LBB3_3: + ; CHECK-NEXT: andq $-64, %rax + ; CHECK-NEXT: movq %rax, %rsp