Conanap updated this revision to Diff 317355. Conanap marked 2 inline comments as done. Conanap added a comment.
Loads Hi before Lo now; removed implicit cast. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90173/new/ https://reviews.llvm.org/D90173 Files: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
Index: llvm/test/CodeGen/PowerPC/p10-splatImm32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -1,22 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ -; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck --check-prefix=CHECK-BE %s +; RUN: FileCheck %s ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_1(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_1: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 566 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_1: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 566 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 566 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 566, i32 undef, i32 566>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -24,15 +16,10 @@ ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_2(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_2: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 33 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_2: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 33 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, 33 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> <i32 33, i32 undef, i32 33, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -40,15 +27,10 @@ ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_3(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_3: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 12 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 12 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 12 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 12, i32 undef, i32 12>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -56,15 +38,10 @@ ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_4(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_4: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -683 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -683 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, -683 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> <i32 -683, i32 undef, i32 -683, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -72,15 +49,10 @@ ; Function Attrs: nounwind define <4 x float> @test_xxsplti32dx_5(<4 x float> %vfa) { -; CHECK-LE-LABEL: test_xxsplti32dx_5: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 1065353216 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_5: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 1065353216 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 1065353216 +; CHECK-NEXT: blr entry: %vecins3.i = shufflevector <4 x float> %vfa, <4 x float> <float undef, float 1.000000e+00, float undef, float 1.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x float> %vecins3.i @@ -88,15 +60,10 @@ ; Function Attrs: nounwind define <4 x float> @test_xxsplti32dx_6(<4 x float> %vfa) { -; CHECK-LE-LABEL: test_xxsplti32dx_6: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 1073741824 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_6: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1073741824 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, 1073741824 +; CHECK-NEXT: blr entry: %vecins3.i = shufflevector <4 x float> <float 2.000000e+00, float undef, float 2.000000e+00, float undef>, <4 x float> %vfa, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x float> %vecins3.i @@ -105,16 +72,31 @@ ; Function Attrs: norecurse nounwind readnone ; Test to illustrate when the splat is narrower than 32-bits. define dso_local <4 x i32> @test_xxsplti32dx_7(<4 x i32> %a) local_unnamed_addr #0 { -; CHECK-LE-LABEL: test_xxsplti32dx_7: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1414812757 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1414812757 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, -1414812757 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 -1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 } + +define dso_local <2 x double> @test_xxsplti32dx_8() { +; CHECK-LABEL: test_xxsplti32dx_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 1082660167 +; CHECK-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-NEXT: blr +entry: + ret <2 x double> <double 0x40881547AE147AE1, double 0x40881547AE147AE1> +} + +define dso_local <8 x i16> @test_xxsplti32dx_9() { +; CHECK-LABEL: test_xxsplti32dx_9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 23855277 +; CHECK-NEXT: xxsplti32dx vs34, 1, 65827 +; CHECK-NEXT: blr +entry: + ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef> +} Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll +++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll @@ -1,114 +1,216 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ -; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-NOPCREL +; RUN: --check-prefixes=CHECK-NOPCREL-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: FileCheck %s --check-prefixes=CHECK-NOPCREL-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: FileCheck %s --check-prefixes=CHECK-NOPREFIX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK-BE define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr { -; CHECK-LABEL: testDoubleToDoubleFail: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleToDoubleFail: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleFail: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleFail: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleToDoubleFail: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-BE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-BE-NEXT: blr entry: ret <2 x double> <double 3.423300e+02, double 3.423300e+02> } define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormToDouble: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormToDouble: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormToDouble: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDouble: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDouble: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormToDouble: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-BE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-BE-NEXT: blr entry: ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000> } define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr { -; CHECK-LABEL: testDoubleToDoubleNaNFail: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-BE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-BE-NEXT: blr entry: ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0> } define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr { -; CHECK-LABEL: testDoubleNonRepresentableScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NOPCREL-NEXT: lfd f1, .LCPI3_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret double 3.423300e+02 } define dso_local float @testFloatDenormScalar() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NOPCREL-NEXT: lfs f1, .LCPI4_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret float 0x380B38FB80000000 } define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormToDoubleScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-NOPCREL-NEXT: lfs f1, .LCPI5_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret double 0x380B38FB80000000 } Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2533,6 +2533,9 @@ def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; + + def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), + (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; } let Predicates = [IsISA3_1, HasVSX] in { Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8605,13 +8605,41 @@ // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same // double. This is to exploit the XXSPLTIDP instruction. - if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() && - (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) && - convertToNonDenormSingle(APSplatBits)) { - SDValue SplatNode = DAG.getNode( - PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, - DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); - return DAG.getBitcast(Op.getValueType(), SplatNode); + // If we lose precision, we use XXSPLTI32DX. + if (BVNIsConstantSplat && (SplatBitSize == 64) && + Subtarget.hasPrefixInstrs()) { + if (convertToNonDenormSingle(APSplatBits) && + (Op->getValueType(0) == MVT::v2f64)) { + SDValue SplatNode = DAG.getNode( + PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, + DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); + return DAG.getBitcast(Op.getValueType(), SplatNode); + } else { // We may lose precision, so we have to use XXSPLTI32DX. + + uint32_t Hi = + (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); + uint32_t Lo = + (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF); + SDValue SplatNode = DAG.getUNDEF(MVT::v2i64); + + if (!Hi || !Lo) + // If either load is 0, then we should generate XXLXOR to set to 0. + SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64); + + if (Hi) + SplatNode = DAG.getNode( + PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, + DAG.getTargetConstant(0, dl, MVT::i32), + DAG.getTargetConstant(Hi, dl, MVT::i32)); + + if (Lo) + SplatNode = + DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, + DAG.getTargetConstant(1, dl, MVT::i32), + DAG.getTargetConstant(Lo, dl, MVT::i32)); + + return DAG.getBitcast(Op.getValueType(), SplatNode); + } } if (!BVNIsConstantSplat || SplatBitSize > 32) {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits