Hi Nikita, Your patch seems to increase code-size of 401.bzip2 by 11% at -Oz. This is due to BZ2_decompress() function growing by 56%.
Would you please investigate and see if this regression can be avoided? Please let us know if you need help reproducing or analyzing the problem. Regards, -- Maxim Kuvyrkov https://www.linaro.org > On Mar 27, 2022, at 11:26 AM, ci_not...@linaro.org wrote: > > After llvm commit 6fde0439512580df793f3f48f95757b47de40d2b > Author: Nikita Popov <npo...@redhat.com> > > [MachineSink] Disable if there are any irreducible cycles > > the following benchmarks grew in size by more than 1%: > - 401.bzip2 grew in size by 11% from 36213 to 40325 bytes > - 401.bzip2:[.] BZ2_decompress grew in size by 56% from 7400 to 11560 bytes > > Below reproducer instructions can be used to re-build both "first_bad" and > "last_good" cross-toolchains used in this bisection. Naturally, the scripts > will fail when triggerring benchmarking jobs if you don't have access to > Linaro TCWG CI. > > For your convenience, we have uploaded tarballs with pre-processed source and > assembly files at: > - First_bad save-temps: > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/build-6fde0439512580df793f3f48f95757b47de40d2b/save-temps/ > - Last_good save-temps: > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/build-eb27da7dec67f1a36505b589b786ba1a499c274a/save-temps/ > - Baseline save-temps: > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/build-baseline/save-temps/ > > Configuration: > - Benchmark: SPEC CPU2006 > - Toolchain: Clang + Glibc + LLVM Linker > - Version: all components were built from their tip of trunk > - Target: arm-linux-gnueabihf > - Compiler flags: -Oz -mthumb > - Hardware: APM Mustang 8x X-Gene1 > > This benchmarking CI is work-in-progress, and we welcome feedback and > suggestions at linaro-toolchain@lists.linaro.org . In our improvement plans > is to add support for SPEC CPU2017 benchmarks and provide "perf > report/annotate" data behind these reports. > > THIS IS THE END OF INTERESTING STUFF. BELOW ARE LINKS TO BUILDS, > REPRODUCTION INSTRUCTIONS, AND THE RAW COMMIT. > > This commit has regressed these CI configurations: > - tcwg_bmk_llvm_apm/llvm-master-arm-spec2k6-Oz > - tcwg_bmk_llvm_apm/llvm-master-arm-spec2k6-Oz_LTO > > First_bad build: > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/build-6fde0439512580df793f3f48f95757b47de40d2b/ > Last_good build: > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/build-eb27da7dec67f1a36505b589b786ba1a499c274a/ > Baseline build: > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/build-baseline/ > Even more details: > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/ > > Reproduce builds: > <cut> > mkdir investigate-llvm-6fde0439512580df793f3f48f95757b47de40d2b > cd investigate-llvm-6fde0439512580df793f3f48f95757b47de40d2b > > # Fetch scripts > git clone https://git.linaro.org/toolchain/jenkins-scripts > > # Fetch manifests and test.sh script > mkdir -p artifacts/manifests > curl -o artifacts/manifests/build-baseline.sh > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/manifests/build-baseline.sh > --fail > curl -o artifacts/manifests/build-parameters.sh > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/manifests/build-parameters.sh > --fail > curl -o artifacts/test.sh > https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-arm-spec2k6-Oz/20/artifact/artifacts/test.sh > --fail > chmod +x artifacts/test.sh > > # Reproduce the baseline build (build all pre-requisites) > ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh > > # Save baseline build state (which is then restored in artifacts/test.sh) > mkdir -p ./bisect > rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ > --exclude /llvm/ ./ ./bisect/baseline/ > > cd llvm > > # Reproduce first_bad build > git checkout --detach 6fde0439512580df793f3f48f95757b47de40d2b > ../artifacts/test.sh > > # Reproduce last_good build > git checkout --detach eb27da7dec67f1a36505b589b786ba1a499c274a > ../artifacts/test.sh > > cd .. > </cut> > > Full commit (up to 1000 lines): > <cut> > commit 6fde0439512580df793f3f48f95757b47de40d2b > Author: Nikita Popov <npo...@redhat.com> > Date: Thu Feb 24 10:09:49 2022 +0100 > > [MachineSink] Disable if there are any irreducible cycles > > This is an alternative to D120330, which disables MachineSink for > functions with irreducible cycles entirely. This avoids both the > correctness problem, and ensures we don't perform non-profitable > sinks into cycles. At the same time, it may also disable > profitable sinks in the same function. This can be made more > precise by using MachineCycleInfo in the future. > > Fixes https://github.com/llvm/llvm-project/issues/53990. > > Differential Revision: https://reviews.llvm.org/D120800 > --- > llvm/lib/CodeGen/MachineSink.cpp | 12 +++ > llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll | 22 +++-- > llvm/test/CodeGen/X86/pr38795.ll | 93 +++++++++++----------- > .../CodeGen/X86/pr53990-incorrect-machine-sink.ll | 9 +-- > llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 36 ++++----- > 5 files changed, 87 insertions(+), 85 deletions(-) > > diff --git a/llvm/lib/CodeGen/MachineSink.cpp > b/llvm/lib/CodeGen/MachineSink.cpp > index 7ed33f9fdeac..301cc73a0530 100644 > --- a/llvm/lib/CodeGen/MachineSink.cpp > +++ b/llvm/lib/CodeGen/MachineSink.cpp > @@ -18,12 +18,14 @@ > #include "llvm/ADT/DenseSet.h" > #include "llvm/ADT/MapVector.h" > #include "llvm/ADT/PointerIntPair.h" > +#include "llvm/ADT/PostOrderIterator.h" > #include "llvm/ADT/SetVector.h" > #include "llvm/ADT/SmallSet.h" > #include "llvm/ADT/SmallVector.h" > #include "llvm/ADT/SparseBitVector.h" > #include "llvm/ADT/Statistic.h" > #include "llvm/Analysis/AliasAnalysis.h" > +#include "llvm/Analysis/CFG.h" > #include "llvm/CodeGen/MachineBasicBlock.h" > #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" > #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" > @@ -429,6 +431,16 @@ bool > MachineSinking::runOnMachineFunction(MachineFunction &MF) { > AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); > RegClassInfo.runOnMachineFunction(MF); > > + // MachineSink currently uses MachineLoopInfo, which only recognizes > natural > + // loops. As such, we could sink instructions into irreducible cycles, > which > + // would be non-profitable. > + // WARNING: The current implementation of hasStoreBetween() is incorrect > for > + // sinking into irreducible cycles (PR53990), this bailout is currently > + // necessary for correctness, not just profitability. > + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); > + if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI)) > + return false; > + > bool EverMadeChange = false; > > while (true) { > diff --git a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll > b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll > index 024b6c608aba..f93e181d157c 100644 > --- a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll > +++ b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll > @@ -24,7 +24,7 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { > ; CHECK-NEXT: movq %r15, %rdi > ; CHECK-NEXT: callq l > ; CHECK-NEXT: testl %eax, %eax > -; CHECK-NEXT: jne .LBB0_10 > +; CHECK-NEXT: jne .LBB0_9 > ; CHECK-NEXT: # %bb.1: # %if.end > ; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill > ; CHECK-NEXT: cmpl $0, e(%rip) > @@ -44,21 +44,19 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) > nounwind { > ; CHECK-NEXT: callq i > ; CHECK-NEXT: movl %eax, %ebp > ; CHECK-NEXT: orl %r14d, %ebp > -; CHECK-NEXT: testl %r13d, %r13d > -; CHECK-NEXT: je .LBB0_6 > -; CHECK-NEXT: # %bb.5: > ; CHECK-NEXT: andl $4, %ebx > -; CHECK-NEXT: jmp .LBB0_3 > -; CHECK-NEXT: .LBB0_6: # %if.end12 > +; CHECK-NEXT: testl %r13d, %r13d > +; CHECK-NEXT: jne .LBB0_3 > +; CHECK-NEXT: # %bb.5: # %if.end12 > ; CHECK-NEXT: testl %ebp, %ebp > -; CHECK-NEXT: je .LBB0_9 > -; CHECK-NEXT: # %bb.7: # %if.then14 > +; CHECK-NEXT: je .LBB0_8 > +; CHECK-NEXT: # %bb.6: # %if.then14 > ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload > ; CHECK-NEXT: #APP > ; CHECK-NEXT: #NO_APP > -; CHECK-NEXT: jmp .LBB0_10 > +; CHECK-NEXT: jmp .LBB0_9 > ; CHECK-NEXT: .Ltmp0: # Block address taken > -; CHECK-NEXT: # %bb.8: # %if.then20.critedge > +; CHECK-NEXT: # %bb.7: # %if.then20.critedge > ; CHECK-NEXT: movl j(%rip), %edi > ; CHECK-NEXT: movslq %eax, %rcx > ; CHECK-NEXT: movl $1, %esi > @@ -71,9 +69,9 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { > ; CHECK-NEXT: popq %r15 > ; CHECK-NEXT: popq %rbp > ; CHECK-NEXT: jmp k # TAILCALL > -; CHECK-NEXT: .LBB0_9: # %if.else > +; CHECK-NEXT: .LBB0_8: # %if.else > ; CHECK-NEXT: incq 0 > -; CHECK-NEXT: .LBB0_10: # %cleanup > +; CHECK-NEXT: .LBB0_9: # %cleanup > ; CHECK-NEXT: addq $8, %rsp > ; CHECK-NEXT: popq %rbx > ; CHECK-NEXT: popq %r12 > diff --git a/llvm/test/CodeGen/X86/pr38795.ll > b/llvm/test/CodeGen/X86/pr38795.ll > index d805dcad8b6e..b526e4f471b1 100644 > --- a/llvm/test/CodeGen/X86/pr38795.ll > +++ b/llvm/test/CodeGen/X86/pr38795.ll > @@ -32,13 +32,14 @@ define dso_local void @fn() { > ; CHECK-NEXT: # implicit-def: $ebp > ; CHECK-NEXT: jmp .LBB0_1 > ; CHECK-NEXT: .p2align 4, 0x90 > -; CHECK-NEXT: .LBB0_16: # %for.inc > +; CHECK-NEXT: .LBB0_15: # %for.inc > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > +; CHECK-NEXT: movl %esi, %ecx > ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill > ; CHECK-NEXT: movb %dh, %dl > ; CHECK-NEXT: .LBB0_1: # %for.cond > ; CHECK-NEXT: # =>This Loop Header: Depth=1 > -; CHECK-NEXT: # Child Loop BB0_20 Depth 2 > +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 > ; CHECK-NEXT: cmpb $8, %dl > ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill > ; CHECK-NEXT: ja .LBB0_3 > @@ -55,7 +56,7 @@ define dso_local void @fn() { > ; CHECK-NEXT: movb %cl, %dh > ; CHECK-NEXT: movl $0, h > ; CHECK-NEXT: cmpb $8, %dl > -; CHECK-NEXT: jg .LBB0_8 > +; CHECK-NEXT: jg .LBB0_9 > ; CHECK-NEXT: # %bb.5: # %if.then13 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > ; CHECK-NEXT: movl %eax, %esi > @@ -64,12 +65,10 @@ define dso_local void @fn() { > ; CHECK-NEXT: calll printf > ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload > ; CHECK-NEXT: testb %bl, %bl > -; CHECK-NEXT: movl %esi, %ecx > ; CHECK-NEXT: # implicit-def: $eax > -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload > -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill > +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload > ; CHECK-NEXT: movb %dh, %dl > -; CHECK-NEXT: jne .LBB0_16 > +; CHECK-NEXT: jne .LBB0_15 > ; CHECK-NEXT: jmp .LBB0_6 > ; CHECK-NEXT: .p2align 4, 0x90 > ; CHECK-NEXT: .LBB0_3: # %if.then > @@ -78,82 +77,82 @@ define dso_local void @fn() { > ; CHECK-NEXT: calll printf > ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload > ; CHECK-NEXT: # implicit-def: $eax > +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload > +; CHECK-NEXT: jmp .LBB0_6 > +; CHECK-NEXT: .p2align 4, 0x90 > +; CHECK-NEXT: .LBB0_9: # %if.end21 > +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > +; CHECK-NEXT: # implicit-def: $ebp > +; CHECK-NEXT: jmp .LBB0_10 > +; CHECK-NEXT: .p2align 4, 0x90 > ; CHECK-NEXT: .LBB0_6: # %for.cond35 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > +; CHECK-NEXT: movb %dl, %dh > ; CHECK-NEXT: testl %edi, %edi > -; CHECK-NEXT: je .LBB0_7 > -; CHECK-NEXT: .LBB0_11: # %af > +; CHECK-NEXT: movl %edi, %esi > +; CHECK-NEXT: movl $0, %edi > +; CHECK-NEXT: movb %cl, %dl > +; CHECK-NEXT: je .LBB0_19 > +; CHECK-NEXT: # %bb.7: # %af > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > ; CHECK-NEXT: testb %bl, %bl > -; CHECK-NEXT: jne .LBB0_12 > -; CHECK-NEXT: .LBB0_17: # %if.end39 > +; CHECK-NEXT: jne .LBB0_8 > +; CHECK-NEXT: .LBB0_16: # %if.end39 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > ; CHECK-NEXT: testl %eax, %eax > -; CHECK-NEXT: je .LBB0_19 > -; CHECK-NEXT: # %bb.18: # %if.then41 > +; CHECK-NEXT: je .LBB0_18 > +; CHECK-NEXT: # %bb.17: # %if.then41 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) > ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) > ; CHECK-NEXT: movl $.str, (%esp) > ; CHECK-NEXT: calll printf > -; CHECK-NEXT: .LBB0_19: # %for.end46 > +; CHECK-NEXT: .LBB0_18: # %for.end46 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > +; CHECK-NEXT: movl %esi, %edi > ; CHECK-NEXT: # implicit-def: $dl > ; CHECK-NEXT: # implicit-def: $dh > ; CHECK-NEXT: # implicit-def: $ebp > -; CHECK-NEXT: jmp .LBB0_20 > -; CHECK-NEXT: .p2align 4, 0x90 > -; CHECK-NEXT: .LBB0_8: # %if.end21 > -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > -; CHECK-NEXT: # implicit-def: $ebp > -; CHECK-NEXT: jmp .LBB0_9 > ; CHECK-NEXT: .p2align 4, 0x90 > -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 > -; CHECK-NEXT: xorl %edi, %edi > -; CHECK-NEXT: movb %dl, %dh > -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload > -; CHECK-NEXT: .p2align 4, 0x90 > -; CHECK-NEXT: .LBB0_20: # %for.cond47 > +; CHECK-NEXT: .LBB0_19: # %for.cond47 > ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 > ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 > ; CHECK-NEXT: testb %bl, %bl > -; CHECK-NEXT: jne .LBB0_20 > -; CHECK-NEXT: # %bb.21: # %for.cond47 > -; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 > +; CHECK-NEXT: jne .LBB0_19 > +; CHECK-NEXT: # %bb.20: # %for.cond47 > +; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 > ; CHECK-NEXT: testb %bl, %bl > -; CHECK-NEXT: jne .LBB0_20 > -; CHECK-NEXT: .LBB0_9: # %ae > +; CHECK-NEXT: jne .LBB0_19 > +; CHECK-NEXT: .LBB0_10: # %ae > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > ; CHECK-NEXT: testb %bl, %bl > -; CHECK-NEXT: jne .LBB0_10 > -; CHECK-NEXT: # %bb.13: # %if.end26 > +; CHECK-NEXT: jne .LBB0_11 > +; CHECK-NEXT: # %bb.12: # %if.end26 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > -; CHECK-NEXT: xorl %ecx, %ecx > +; CHECK-NEXT: xorl %esi, %esi > ; CHECK-NEXT: testb %dl, %dl > -; CHECK-NEXT: je .LBB0_16 > -; CHECK-NEXT: # %bb.14: # %if.end26 > +; CHECK-NEXT: je .LBB0_15 > +; CHECK-NEXT: # %bb.13: # %if.end26 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > ; CHECK-NEXT: testl %ebp, %ebp > -; CHECK-NEXT: jne .LBB0_16 > -; CHECK-NEXT: # %bb.15: # %if.then31 > +; CHECK-NEXT: jne .LBB0_15 > +; CHECK-NEXT: # %bb.14: # %if.then31 > ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 > -; CHECK-NEXT: xorl %ecx, %ecx > +; CHECK-NEXT: xorl %esi, %esi > ; CHECK-NEXT: xorl %ebp, %ebp > -; CHECK-NEXT: jmp .LBB0_16 > +; CHECK-NEXT: jmp .LBB0_15 > ; CHECK-NEXT: .p2align 4, 0x90 > -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 > +; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 > +; CHECK-NEXT: movl %edi, %esi > ; CHECK-NEXT: # implicit-def: $eax > ; CHECK-NEXT: testb %bl, %bl > -; CHECK-NEXT: je .LBB0_17 > -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 > +; CHECK-NEXT: je .LBB0_16 > +; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 > ; CHECK-NEXT: # implicit-def: $edi > ; CHECK-NEXT: # implicit-def: $cl > -; CHECK-NEXT: # kill: killed $cl > ; CHECK-NEXT: # implicit-def: $dl > ; CHECK-NEXT: # implicit-def: $ebp > -; CHECK-NEXT: testl %edi, %edi > -; CHECK-NEXT: jne .LBB0_11 > -; CHECK-NEXT: jmp .LBB0_7 > +; CHECK-NEXT: jmp .LBB0_6 > entry: > br label %for.cond > > diff --git a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll > b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll > index 3d7ff6cbe676..4f56d7b16a87 100644 > --- a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll > +++ b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll > @@ -7,18 +7,15 @@ define void @test(i1 %c, i64* %p, i64* noalias %p2) > nounwind { > ; CHECK-LABEL: test: > ; CHECK: # %bb.0: # %entry > ; CHECK-NEXT: pushq %rbp > -; CHECK-NEXT: pushq %r15 > ; CHECK-NEXT: pushq %r14 > ; CHECK-NEXT: pushq %rbx > -; CHECK-NEXT: pushq %rax > ; CHECK-NEXT: movq %rdx, %rbx > -; CHECK-NEXT: movq %rsi, %r14 > -; CHECK-NEXT: movl %edi, %r15d > +; CHECK-NEXT: movl %edi, %r14d > +; CHECK-NEXT: movq (%rsi), %rbp > ; CHECK-NEXT: xorl %eax, %eax > ; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) > ; CHECK-NEXT: .LBB0_1: # %split.3 > -; CHECK-NEXT: movq (%r14), %rbp > -; CHECK-NEXT: testb $1, %r15b > +; CHECK-NEXT: testb $1, %r14b > ; CHECK-NEXT: je .LBB0_3 > ; CHECK-NEXT: # %bb.2: # %clobber > ; CHECK-NEXT: callq clobber@PLT > diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll > b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll > index 0f8bb837f82a..b44895293b41 100644 > --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll > +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll > @@ -1377,6 +1377,8 @@ define i32 @irreducibleCFG() #4 { > ; ENABLE-NEXT: pushq %rbx > ; ENABLE-NEXT: pushq %rax > ; ENABLE-NEXT: .cfi_offset %rbx, -24 > +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax > +; ENABLE-NEXT: movl (%rax), %edi > ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax > ; ENABLE-NEXT: cmpb $0, (%rax) > ; ENABLE-NEXT: je LBB16_2 > @@ -1386,24 +1388,20 @@ define i32 @irreducibleCFG() #4 { > ; ENABLE-NEXT: jmp LBB16_1 > ; ENABLE-NEXT: LBB16_2: ## %split > ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax > +; ENABLE-NEXT: xorl %ebx, %ebx > ; ENABLE-NEXT: cmpl $0, (%rax) > -; ENABLE-NEXT: je LBB16_3 > -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i > -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax > -; ENABLE-NEXT: movl (%rax), %edi > +; ENABLE-NEXT: je LBB16_4 > +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i > ; ENABLE-NEXT: xorl %ebx, %ebx > ; ENABLE-NEXT: xorl %eax, %eax > ; ENABLE-NEXT: callq _something > -; ENABLE-NEXT: jmp LBB16_5 > -; ENABLE-NEXT: LBB16_3: > -; ENABLE-NEXT: xorl %ebx, %ebx > ; ENABLE-NEXT: .p2align 4, 0x90 > -; ENABLE-NEXT: LBB16_5: ## %for.inc > +; ENABLE-NEXT: LBB16_4: ## %for.inc > ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 > ; ENABLE-NEXT: incl %ebx > ; ENABLE-NEXT: cmpl $7, %ebx > -; ENABLE-NEXT: jl LBB16_5 > -; ENABLE-NEXT: ## %bb.6: ## %fn1.exit > +; ENABLE-NEXT: jl LBB16_4 > +; ENABLE-NEXT: ## %bb.5: ## %fn1.exit > ; ENABLE-NEXT: xorl %eax, %eax > ; ENABLE-NEXT: addq $8, %rsp > ; ENABLE-NEXT: popq %rbx > @@ -1420,6 +1418,8 @@ define i32 @irreducibleCFG() #4 { > ; DISABLE-NEXT: pushq %rbx > ; DISABLE-NEXT: pushq %rax > ; DISABLE-NEXT: .cfi_offset %rbx, -24 > +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax > +; DISABLE-NEXT: movl (%rax), %edi > ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax > ; DISABLE-NEXT: cmpb $0, (%rax) > ; DISABLE-NEXT: je LBB16_2 > @@ -1429,24 +1429,20 @@ define i32 @irreducibleCFG() #4 { > ; DISABLE-NEXT: jmp LBB16_1 > ; DISABLE-NEXT: LBB16_2: ## %split > ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax > +; DISABLE-NEXT: xorl %ebx, %ebx > ; DISABLE-NEXT: cmpl $0, (%rax) > -; DISABLE-NEXT: je LBB16_3 > -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i > -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax > -; DISABLE-NEXT: movl (%rax), %edi > +; DISABLE-NEXT: je LBB16_4 > +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i > ; DISABLE-NEXT: xorl %ebx, %ebx > ; DISABLE-NEXT: xorl %eax, %eax > ; DISABLE-NEXT: callq _something > -; DISABLE-NEXT: jmp LBB16_5 > -; DISABLE-NEXT: LBB16_3: > -; DISABLE-NEXT: xorl %ebx, %ebx > ; DISABLE-NEXT: .p2align 4, 0x90 > -; DISABLE-NEXT: LBB16_5: ## %for.inc > +; DISABLE-NEXT: LBB16_4: ## %for.inc > ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 > ; DISABLE-NEXT: incl %ebx > ; DISABLE-NEXT: cmpl $7, %ebx > -; DISABLE-NEXT: jl LBB16_5 > -; DISABLE-NEXT: ## %bb.6: ## %fn1.exit > +; DISABLE-NEXT: jl LBB16_4 > +; DISABLE-NEXT: ## %bb.5: ## %fn1.exit > ; DISABLE-NEXT: xorl %eax, %eax > ; DISABLE-NEXT: addq $8, %rsp > ; DISABLE-NEXT: popq %rbx > </cut> _______________________________________________ linaro-toolchain mailing list -- linaro-toolchain@lists.linaro.org To unsubscribe send an email to linaro-toolchain-le...@lists.linaro.org