https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/149486
None >From 57e55fb8e9c3f5b31edf66cb19e18ff98954aacf Mon Sep 17 00:00:00 2001 From: Qi Zhao <zhaoq...@loongson.cn> Date: Fri, 18 Jul 2025 17:41:27 +0800 Subject: [PATCH] [LoongArch] Optimize general fp build_vector lowering --- .../LoongArch/LoongArchISelLowering.cpp | 5 +-- .../CodeGen/LoongArch/lasx/build-vector.ll | 2 -- llvm/test/CodeGen/LoongArch/lasx/fpowi.ll | 36 +++++++++---------- .../lasx/ir-instruction/fix-xvshuf.ll | 11 +++--- llvm/test/CodeGen/LoongArch/llvm.exp10.ll | 8 ++--- llvm/test/CodeGen/LoongArch/llvm.sincos.ll | 26 +++++++------- .../CodeGen/LoongArch/lsx/build-vector.ll | 2 -- llvm/test/CodeGen/LoongArch/lsx/fpowi.ll | 35 +++++++++--------- 8 files changed, 55 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 2378664ca8155..1e2f4dd5c9e5e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2514,8 +2514,9 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, assert(ResTy.isVector()); unsigned NumElts = ResTy.getVectorNumElements(); - SDValue Vector = DAG.getUNDEF(ResTy); - for (unsigned i = 0; i < NumElts; ++i) { + SDValue Vector = + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Node->getOperand(0)); + for (unsigned i = 1; i < NumElts; ++i) { Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Node->getOperand(i), DAG.getConstant(i, DL, Subtarget.getGRLenVT())); diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll index a4f3fe717ae25..61a915a2837cb 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -475,7 +475,6 @@ define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float ; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2 ; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 -; CHECK-NEXT: xvinsve0.w $xr0, $xr0, 0 ; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 ; CHECK-NEXT: xvinsve0.w $xr0, $xr2, 2 ; CHECK-NEXT: xvinsve0.w $xr0, $xr3, 3 @@ -505,7 +504,6 @@ define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, dou ; CHECK-NEXT: # kill: def $f2_64 killed $f2_64 def $xr2 ; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; CHECK-NEXT: xvinsve0.d $xr0, $xr0, 0 ; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 ; CHECK-NEXT: xvinsve0.d $xr0, $xr2, 2 ; CHECK-NEXT: xvinsve0.d $xr0, $xr3, 3 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll index 014a41ac9494c..76bb55b314f4a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll @@ -11,24 +11,23 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind { ; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill ; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 ; CHECK-NEXT: movgr2fr.w $fa0, $a0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 -; CHECK-NEXT: xvinsve0.w $xr0, $xr0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill ; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 ; CHECK-NEXT: movgr2fr.w $fa0, $a0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 ; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 1 -; CHECK-NEXT: xvst $xr1, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill ; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload ; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2 ; CHECK-NEXT: movgr2fr.w $fa0, $a0 @@ -106,44 +105,43 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -80 ; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill ; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 ; CHECK-NEXT: movgr2fr.d $fa0, $a0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; CHECK-NEXT: xvinsve0.d $xr0, $xr0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 ; CHECK-NEXT: movgr2fr.d $fa0, $a0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 1 -; CHECK-NEXT: xvst $xr1, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvld $xr1, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 ; CHECK-NEXT: movgr2fr.d $fa0, $a0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvld $xr1, $sp, 0 # 32-byte Folded Reload ; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 2 -; CHECK-NEXT: xvst $xr1, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvst $xr1, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 ; CHECK-NEXT: movgr2fr.d $fa0, $a0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvld $xr1, $sp, 0 # 32-byte Folded Reload ; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 3 ; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index f154dd3b8eb3c..221aba3166ed7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -6,15 +6,12 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 -; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 0 ; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2 -; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 1 -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 -; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 2 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 ; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3 -; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 3 -; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3 ; CHECK-NEXT: ret entry: %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7> diff --git a/llvm/test/CodeGen/LoongArch/llvm.exp10.ll b/llvm/test/CodeGen/LoongArch/llvm.exp10.ll index c667a3609e7f1..62ea5cba2fc26 100644 --- a/llvm/test/CodeGen/LoongArch/llvm.exp10.ll +++ b/llvm/test/CodeGen/LoongArch/llvm.exp10.ll @@ -196,22 +196,20 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) #0 { ; LA64-NEXT: addi.d $sp, $sp, -48 ; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill ; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 +; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; LA64-NEXT: pcaddu18i $ra, %call36(exp10) ; LA64-NEXT: jirl $ra, $ra, 0 ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vextrins.d $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill ; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; LA64-NEXT: pcaddu18i $ra, %call36(exp10) ; LA64-NEXT: jirl $ra, $ra, 0 ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 ; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr1, $vr0, 16 -; LA64-NEXT: vori.b $vr0, $vr1, 0 +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 ; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 48 ; LA64-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/llvm.sincos.ll b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll index 38ff783d85286..6ebf48d29e89d 100644 --- a/llvm/test/CodeGen/LoongArch/llvm.sincos.ll +++ b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll @@ -571,39 +571,37 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) #0 { ; LA64-NEXT: addi.d $sp, $sp, -64 ; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; LA64-NEXT: pcaddu18i $ra, %call36(sin) ; LA64-NEXT: jirl $ra, $ra, 0 ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vextrins.d $vr0, $vr0, 0 -; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill ; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; LA64-NEXT: pcaddu18i $ra, %call36(sin) ; LA64-NEXT: jirl $ra, $ra, 0 ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr1, $vr0, 16 -; LA64-NEXT: vst $vr1, $sp, 32 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; LA64-NEXT: pcaddu18i $ra, %call36(cos) ; LA64-NEXT: jirl $ra, $ra, 0 ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vextrins.d $vr0, $vr0, 0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill ; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload ; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; LA64-NEXT: pcaddu18i $ra, %call36(cos) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr1, $vr0, 16 +; LA64-NEXT: fmov.d $fa1, $fa0 ; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr1, $vr0, 16 +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload ; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 64 ; LA64-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll index f723343964f5d..afc87d1575da5 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -338,7 +338,6 @@ define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float ; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2 ; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vextrins.w $vr0, $vr0, 0 ; CHECK-NEXT: vextrins.w $vr0, $vr1, 16 ; CHECK-NEXT: vextrins.w $vr0, $vr2, 32 ; CHECK-NEXT: vextrins.w $vr0, $vr3, 48 @@ -358,7 +357,6 @@ define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr0, 0 ; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll index 79663b63daf08..735dad453660e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll @@ -9,44 +9,43 @@ define <4 x float> @powi_v4f32(<4 x float> %va, i32 %b) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -48 ; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill ; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill ; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vextrins.w $vr0, $vr0, 0 -; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vextrins.w $vr1, $vr0, 16 -; CHECK-NEXT: vst $vr1, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vextrins.w $vr0, $vr1, 16 +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload ; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; CHECK-NEXT: vextrins.w $vr1, $vr0, 32 -; CHECK-NEXT: vst $vr1, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload ; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; CHECK-NEXT: vextrins.w $vr1, $vr0, 48 ; CHECK-NEXT: vori.b $vr0, $vr1, 0 ; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -68,24 +67,22 @@ define <2 x double> @powi_v2f64(<2 x double> %va, i32 %b) nounwind { ; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill ; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill ; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 ; CHECK-NEXT: move $a0, $fp ; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) ; CHECK-NEXT: jirl $ra, $ra, 0 ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 ; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vextrins.d $vr1, $vr0, 16 -; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload ; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload ; CHECK-NEXT: addi.d $sp, $sp, 48 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits