[PATCH] D90173: [PowerPC] Exploit splat instruction xxsplti32dx in Power10

Albion Fung via Phabricator via cfe-commits Mon, 18 Jan 2021 07:55:46 -0800

Conanap updated this revision to Diff 317355.
Conanap marked 2 inline comments as done.
Conanap added a comment.


Loads Hi before Lo now; removed implicit cast.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90173/new/

https://reviews.llvm.org/D90173

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

Index: llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -1,22 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck --check-prefix=CHECK-LE %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
-; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck --check-prefix=CHECK-BE %s
+; RUN:     FileCheck %s
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_1(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_1:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 566
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_1:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 566
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 566
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 566, i32 undef, i32 566>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -24,15 +16,10 @@
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_2(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_2:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, 33
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_2:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 33
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 33
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 33, i32 undef, i32 33, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -40,15 +27,10 @@
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_3(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_3:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 12
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_3:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 12
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 12
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 12, i32 undef, i32 12>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -56,15 +38,10 @@
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_4(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_4:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -683
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_4:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -683
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -683
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 -683, i32 undef, i32 -683, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -72,15 +49,10 @@
 
 ; Function Attrs: nounwind
 define  <4 x float> @test_xxsplti32dx_5(<4 x float> %vfa) {
-; CHECK-LE-LABEL: test_xxsplti32dx_5:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 1065353216
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_5:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 1065353216
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1065353216
+; CHECK-NEXT:    blr
 entry:
   %vecins3.i = shufflevector <4 x float> %vfa, <4 x float> <float undef, float 1.000000e+00, float undef, float 1.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %vecins3.i
@@ -88,15 +60,10 @@
 
 ; Function Attrs: nounwind
 define  <4 x float> @test_xxsplti32dx_6(<4 x float> %vfa) {
-; CHECK-LE-LABEL: test_xxsplti32dx_6:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, 1073741824
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_6:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 1073741824
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 1073741824
+; CHECK-NEXT:    blr
 entry:
   %vecins3.i = shufflevector <4 x float> <float 2.000000e+00, float undef, float 2.000000e+00, float undef>, <4 x float> %vfa, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %vecins3.i
@@ -105,16 +72,31 @@
 ; Function Attrs: norecurse nounwind readnone
 ; Test to illustrate when the splat is narrower than 32-bits.
 define dso_local <4 x i32> @test_xxsplti32dx_7(<4 x i32> %a) local_unnamed_addr #0 {
-; CHECK-LE-LABEL: test_xxsplti32dx_7:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -1414812757
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_7:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -1414812757
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -1414812757
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 -1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
 }
+
+define dso_local <2 x double> @test_xxsplti32dx_8() {
+; CHECK-LABEL: test_xxsplti32dx_8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1082660167
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NEXT:    blr
+entry:
+  ret <2 x double> <double 0x40881547AE147AE1, double 0x40881547AE147AE1>
+}
+
+define dso_local <8 x i16> @test_xxsplti32dx_9() {
+; CHECK-LABEL: test_xxsplti32dx_9:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 23855277
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 65827
+; CHECK-NEXT:    blr
+entry:
+  ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef>
+}
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -1,114 +1,216 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
-; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
+; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \
-; RUN:     --check-prefix=CHECK-NOPCREL
+; RUN:     --check-prefixes=CHECK-NOPCREL-BE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPCREL-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPREFIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s
+; RUN:     FileCheck %s --check-prefixes=CHECK-BE
 
 define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr {
-; CHECK-LABEL: testDoubleToDoubleFail:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI0_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI0_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleToDoubleFail:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI0_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleToDoubleFail:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
 }
 
 define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormToDouble:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI1_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDouble:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI1_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormToDouble:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDouble:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDouble:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI1_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormToDouble:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
 }
 
 define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
-; CHECK-LABEL: testDoubleToDoubleNaNFail:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI2_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI2_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI2_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
 }
 
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
-; CHECK-LABEL: testDoubleNonRepresentableScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret double 3.423300e+02
 }
 
 define dso_local float @testFloatDenormScalar() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret float 0x380B38FB80000000
 }
 
 define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormToDoubleScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret double 0x380B38FB80000000
 }
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2533,6 +2533,9 @@
 
   def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
             (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
+
+  def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)),
+             (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>;
 }
 
 let Predicates = [IsISA3_1, HasVSX] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8605,13 +8605,41 @@
   // If it is a splat of a double, check if we can shrink it to a 32 bit
   // non-denormal float which when converted back to double gives us the same
   // double. This is to exploit the XXSPLTIDP instruction.
-  if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
-      (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
-      convertToNonDenormSingle(APSplatBits)) {
-    SDValue SplatNode = DAG.getNode(
-        PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
-        DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
-    return DAG.getBitcast(Op.getValueType(), SplatNode);
+  // If we lose precision, we use XXSPLTI32DX.
+  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
+      Subtarget.hasPrefixInstrs()) {
+    if (convertToNonDenormSingle(APSplatBits) &&
+        (Op->getValueType(0) == MVT::v2f64)) {
+      SDValue SplatNode = DAG.getNode(
+          PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+          DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    } else { // We may lose precision, so we have to use XXSPLTI32DX.
+
+      uint32_t Hi =
+          (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
+      uint32_t Lo =
+          (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
+      SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
+
+      if (!Hi || !Lo)
+        // If either load is 0, then we should generate XXLXOR to set to 0.
+        SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
+
+      if (Hi)
+        SplatNode = DAG.getNode(
+            PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+            DAG.getTargetConstant(0, dl, MVT::i32),
+            DAG.getTargetConstant(Hi, dl, MVT::i32));
+
+      if (Lo)
+        SplatNode =
+            DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+                        DAG.getTargetConstant(1, dl, MVT::i32),
+                        DAG.getTargetConstant(Lo, dl, MVT::i32));
+
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    }
   }
 
   if (!BVNIsConstantSplat || SplatBitSize > 32) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90173: [PowerPC] Exploit splat instruction xxsplti32dx in Power10

Reply via email to