Author: Sanjay Patel Date: 2020-12-11T17:23:35-05:00 New Revision: 204bdc5322cc89603d503fb1f02a0eba19a1b496
URL: https://github.com/llvm/llvm-project/commit/204bdc5322cc89603d503fb1f02a0eba19a1b496 DIFF: https://github.com/llvm/llvm-project/commit/204bdc5322cc89603d503fb1f02a0eba19a1b496.diff LOG: [InstCombine][x86] fix insertion point bug in vector demanded elts fold (PR48476) This transform was added at: c63799fc52ff >From what I see, it's the first demanded elements transform that adds a new instruction using the IRBuilder. There are similar folds in the generic demanded bits chunk of instcombine that also use the InsertPointGuard code pattern. The tests here would assert/crash because the new instruction was being added at the start of the demanded elements analysis rather than at the instruction that is being replaced. Added: Modified: llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp llvm/test/Transforms/InstCombine/X86/x86-addsub.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 3b05dba57a33..ca026baa2c41 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -1916,13 +1916,20 @@ Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( case Intrinsic::x86_sse3_addsub_ps: case Intrinsic::x86_avx_addsub_pd_256: case Intrinsic::x86_avx_addsub_ps_256: { + // If none of the even or none of the odd lanes are required, turn this + // into a generic FP math instruction. APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1)); - if (DemandedElts.isSubsetOf(SubMask)) - return IC.Builder.CreateFSub(II.getArgOperand(0), II.getArgOperand(1)); - APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2)); - if (DemandedElts.isSubsetOf(AddMask)) - return IC.Builder.CreateFAdd(II.getArgOperand(0), II.getArgOperand(1)); + bool IsSubOnly = DemandedElts.isSubsetOf(SubMask); + bool IsAddOnly = DemandedElts.isSubsetOf(AddMask); + if (IsSubOnly || IsAddOnly) { + assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only"); + IRBuilderBase::InsertPointGuard Guard(IC.Builder); + IC.Builder.SetInsertPoint(&II); + Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1); + return IC.Builder.CreateBinOp( + IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1); + } simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); diff --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll index d051732ee819..0b9831be8fcf 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll @@ -5,6 +5,7 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg) #0 ; ; Demanded Elts @@ -164,4 +165,30 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, flo ret void } +define double @PR48476_fsub(<2 x double> %x) { +; CHECK-LABEL: @PR48476_fsub( +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> <double 0.000000e+00, double undef>, [[X:%.*]] +; CHECK-NEXT: [[T2:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[X]], i8 6) +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[T2]], i32 0 +; CHECK-NEXT: ret double [[VECEXT]] +; + %t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x) + %t2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %t1, <2 x double> %x, i8 6) + %vecext = extractelement <2 x double> %t2, i32 0 + ret double %vecext +} +define double @PR48476_fadd_fsub(<2 x double> %x) { +; CHECK-LABEL: @PR48476_fadd_fsub( +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], <double undef, double 0.000000e+00> +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 1, i32 undef> +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]] +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 +; CHECK-NEXT: ret double [[VECEXT]] +; + %t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x) + %s = shufflevector <2 x double> %t1, <2 x double> undef, <2 x i32> <i32 1, i32 0> + %t2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %s, <2 x double> %x) + %vecext = extractelement <2 x double> %t2, i32 0 + ret double %vecext +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits