[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-24 Thread Albion Fung via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe29bb074c62c: [PowerPC] Exploit xxsplti32dx (constant 
materialization) for scalars (authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D95458?vs=329015=333101#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:   plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:   xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:   xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:   blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:   plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:   blr
+; CHECK-O:   xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
 ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,9 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input8@got@pcrel(0), 1
@@ -44,9 +47,6 @@
 ; CHECK-NEXT:lbz r3, 0(r3)
 ; CHECK-NEXT:stb r3, 0(r4)
 ; CHECK-NEXT:blr
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 entry:
   %0 = load i8, i8* @input8, align 1
   store i8 %0, i8* @output8, align 1
@@ -54,6 +54,9 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input16@got@pcrel(0), 1
@@ -63,9 +66,6 @@
 ; CHECK-NEXT:lhz r3, 0(r3)
 ; CHECK-NEXT:sth r3, 0(r4)
 ; CHECK-NEXT:blr
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 entry:
   %0 = load i16, i16* @input16, align 2
   store i16 %0, i16* @output16, align 2
@@ -144,7 +144,8 @@
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:.reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:lfd f0, 0(r3)
 ; CHECK-NEXT:pld r3, outputf64@got@pcrel(0), 1
Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -173,7 +173,9 @@
 ; CHECK-LARGE: add r2, r2, r12
 ; CHECK-S-NOT:   .localentry
 ; CHECK-ALL:   # %bb.0: # %entry
-; CHECK-S-NEXT:plfd f1, .LCPI7_0@PCREL(0), 1
+; CHECK-S-NEXT:xxsplti32dx vs1, 0, 1078011044
+; CHECK-S-NEXT:xxsplti32dx vs1, 1, -337824948
+; CHECK-S-NEXT:# kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-S-NEXT:blr
 entry:
   ret double 0x404124A4EBDD334C
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -122,19 +122,23 @@
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ; CHECK-LE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-LE:   # %bb.0: # %entry
-; 

[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-12 Thread Stefan Pintilie via Phabricator via cfe-commits
stefanp accepted this revision.
stefanp added a comment.
This revision is now accepted and ready to land.

Thank you for adding this!
Other than one minor nit I think this LGTM.

Feel free to address nits on commits.




Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:8834
+   );
+  bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
+

nit:
You can just inline this. It is only used in one place.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-10 Thread Amy Kwan via Phabricator via cfe-commits
amyk added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:16134
 if (Subtarget.hasPrefixInstrs()) {
-  // With prefixed instructions, we can materialize anything that can be
-  // represented with a 32-bit immediate, not just positive zero.
-  APFloat APFloatOfImm = Imm;
-  return convertToNonDenormSingle(APFloatOfImm);
+  // we can materialize all imms via XXSPLTI32dDX and XXSPLTIDP
+  return true;

Minor nit on comment.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-08 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 329015.
Conanap marked 3 inline comments as done.
Conanap added a comment.

Updated some comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:   plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:   xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:   xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:   blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:   plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:   blr
+; CHECK-O:   xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
 ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,9 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input8@got@pcrel(0), 1
@@ -44,9 +47,6 @@
 ; CHECK-NEXT:lbz r3, 0(r3)
 ; CHECK-NEXT:stb r3, 0(r4)
 ; CHECK-NEXT:blr
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 entry:
   %0 = load i8, i8* @input8, align 1
   store i8 %0, i8* @output8, align 1
@@ -54,6 +54,9 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input16@got@pcrel(0), 1
@@ -63,9 +66,6 @@
 ; CHECK-NEXT:lhz r3, 0(r3)
 ; CHECK-NEXT:sth r3, 0(r4)
 ; CHECK-NEXT:blr
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 entry:
   %0 = load i16, i16* @input16, align 2
   store i16 %0, i16* @output16, align 2
@@ -144,7 +144,8 @@
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:.reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:lfd f0, 0(r3)
 ; CHECK-NEXT:pld r3, outputf64@got@pcrel(0), 1
Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -173,7 +173,9 @@
 ; CHECK-LARGE: add r2, r2, r12
 ; CHECK-S-NOT:   .localentry
 ; CHECK-ALL:   # %bb.0: # %entry
-; CHECK-S-NEXT:plfd f1, .LCPI7_0@PCREL(0), 1
+; CHECK-S-NEXT:xxsplti32dx vs1, 0, 1078011044
+; CHECK-S-NEXT:xxsplti32dx vs1, 1, -337824948
+; CHECK-S-NEXT:# kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-S-NEXT:blr
 entry:
   ret double 0x404124A4EBDD334C
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -122,19 +122,23 @@
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ; CHECK-LE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-LE:   # %bb.0: # %entry
-; CHECK-LE-NEXT:plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-LE-NEXT:xxsplti32dx vs1, 0, 1081435463
+; CHECK-LE-NEXT:xxsplti32dx vs1, 1, -1374389535
+; CHECK-LE-NEXT:# kill: def $f1 

[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-05 Thread Amy Kwan via Phabricator via cfe-commits
amyk added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCInstrPrefix.td:1885
 
+let isReMaterializable = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in {
+  def XXSPLTI32DX :

I think it might be good to add a comment of why the `XXSPLTI32DX` instruction 
needs to be split out like this from the other instructions.



Comment at: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll:41
+; which is defined between the pld and the stb.
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)

nit: Remove duplicate comment.



Comment at: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll:63
+; which is defined between the pld and the sth.
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)

nit: Remove duplicate comment.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-02 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked 2 inline comments as done.
Conanap added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:8593
+  return !convertToNonDenormSingle(ArgAPFloat);
+}
+

stefanp wrote:
> I'm wondering if it would not be better to just inline this. It's just "not" 
> of another call. That would simplify the patch a little.
so in the process of removing this, I thought I might as well just write a 
non-destructive test in its place. The tests didn't have any problems with just 
`checkConvertToNonDenormSingle` but might as well be on the safer side of 
things.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-02 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 327537.
Conanap added a comment.

Addressed Stefan's comments, converted the check to a mirror of the original 
function for XXSPLTIDP except non-destructive.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:   plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:   xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:   xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:   blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:   plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:   blr
+; CHECK-O:   xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
 ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,12 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input8@got@pcrel(0), 1
@@ -44,9 +50,6 @@
 ; CHECK-NEXT:lbz r3, 0(r3)
 ; CHECK-NEXT:stb r3, 0(r4)
 ; CHECK-NEXT:blr
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 entry:
   %0 = load i8, i8* @input8, align 1
   store i8 %0, i8* @output8, align 1
@@ -54,6 +57,12 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input16@got@pcrel(0), 1
@@ -63,9 +72,6 @@
 ; CHECK-NEXT:lhz r3, 0(r3)
 ; CHECK-NEXT:sth r3, 0(r4)
 ; CHECK-NEXT:blr
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 entry:
   %0 = load i16, i16* @input16, align 2
   store i16 %0, i16* @output16, align 2
@@ -144,7 +150,8 @@
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:.reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:lfd f0, 0(r3)
 ; CHECK-NEXT:pld r3, outputf64@got@pcrel(0), 1
Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -173,7 +173,9 @@
 ; CHECK-LARGE: add r2, r2, r12
 ; CHECK-S-NOT:   .localentry
 ; CHECK-ALL:   # %bb.0: # %entry
-; CHECK-S-NEXT:plfd f1, .LCPI7_0@PCREL(0), 1
+; CHECK-S-NEXT:xxsplti32dx vs1, 0, 1078011044
+; CHECK-S-NEXT:xxsplti32dx vs1, 1, -337824948
+; CHECK-S-NEXT:# kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-S-NEXT:blr
 entry:
   ret double 0x404124A4EBDD334C
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ 

[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-03-01 Thread Stefan Pintilie via Phabricator via cfe-commits
stefanp added a comment.

Comments relate to just cleaning up the patch a little.




Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:8593
+  return !convertToNonDenormSingle(ArgAPFloat);
+}
+

I'm wondering if it would not be better to just inline this. It's just "not" of 
another call. That would simplify the patch a little.



Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:15874
+  return convertToNonDenormSingle(APFloatOfImm) ||
+ checkNonDenormCannotConvertToSingle(APFloatOfImm);
 }

Isn't this just :
```
return convertToNonDenormSingle(APFloatOfImm) ||
   !convertToNonDenormSingle(APFloatOfImm);
```
Which is always true?

Basically the logic is that we can now materialize without a load any `f32` or 
any `f64`.



Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.h:1321
   bool convertToNonDenormSingle(APFloat );
+  bool checkNonDenormCannotConvertToSingle(APInt );
+  bool checkNonDenormCannotConvertToSingle(APFloat );

Conanap wrote:
> stefanp wrote:
> > Is the APInt version of this function used anywere?
> > 
> Hm I don't think so, although I implemented it for consistency with 
> `XXSPLTIDP` (`convertToNonDenormSingle`). I'll remove this if that is 
> preferred.
nit:
Ok, unless other reviewers disagree, just remove it.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-02-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.h:1321
   bool convertToNonDenormSingle(APFloat );
+  bool checkNonDenormCannotConvertToSingle(APInt );
+  bool checkNonDenormCannotConvertToSingle(APFloat );

stefanp wrote:
> Is the APInt version of this function used anywere?
> 
Hm I don't think so, although I implemented it for consistency with `XXSPLTIDP` 
(`convertToNonDenormSingle`). I'll remove this if that is preferred.



Comment at: llvm/test/CodeGen/PowerPC/constant-pool.ll:363
+; CHECK-NEXT:stxv vs3, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:xxsplti32dx vs3, 1, -343597384
+; CHECK-NEXT:# kill: def $f3 killed $f3 killed $vsl3

stefanp wrote:
> What is going on here?
> It almost looks like we are spilling `vs3` half way through materializing a 
> constant.
This is fixed now, thank you for spotting this.



Comment at: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll:147
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:xxsplti32dx vs1, 1, 858993459

Just a heads up - the tests in this file are autogenerated, hence some 
unrelated changes.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-02-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 326000.
Conanap marked 6 inline comments as done.
Conanap added a comment.

Addressed some nits and a problem where sometimes the compiler would spill half 
way through materialization.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:   plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:   xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:   xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:   blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:   plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:   blr
+; CHECK-O:   xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
 ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,9 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input8@got@pcrel(0), 1
@@ -42,9 +45,6 @@
 ; CHECK-NEXT:pld r4, output8@got@pcrel(0), 1
 ; CHECK-NEXT:.reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; CHECK-NEXT:lbz r3, 0(r3)
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 ; CHECK-NEXT:stb r3, 0(r4)
 ; CHECK-NEXT:blr
 entry:
@@ -54,6 +54,9 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input16@got@pcrel(0), 1
@@ -61,9 +64,6 @@
 ; CHECK-NEXT:pld r4, output16@got@pcrel(0), 1
 ; CHECK-NEXT:.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; CHECK-NEXT:lhz r3, 0(r3)
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 ; CHECK-NEXT:sth r3, 0(r4)
 ; CHECK-NEXT:blr
 entry:
@@ -144,7 +144,8 @@
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:.reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:lfd f0, 0(r3)
 ; CHECK-NEXT:pld r3, outputf64@got@pcrel(0), 1
@@ -286,8 +287,7 @@
 
 define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
 ; CHECK-LABEL: FuncPtrCall:
-; CHECK: .localentry FuncPtrCall, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, FuncPtrIn@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel10:
 ; CHECK-NEXT:.reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
@@ -317,8 +317,7 @@
 
 define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
 ; CHECK-LABEL: VecMultiUse:
-; CHECK: .localentry VecMultiUse, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:mflr r0
 ; CHECK-NEXT:std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:std r30, -16(r1) # 8-byte Folded Spill
@@ -355,8 +354,7 @@
 
 define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: UseAddr:
-; CHECK: .localentry UseAddr, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:mflr r0
 ; CHECK-NEXT:std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:std r0, 16(r1)
Index: 

[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-02-16 Thread Amy Kwan via Phabricator via cfe-commits
amyk added a comment.

In addition to the nit comments, I also have the same question as Stefan for 
`getFPAs64BitIntHi`/`getFPAs64BitIntLo`.




Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:8591
+  // Only convert if it loses info, since XXSPLTIDP should
+  // handle the other case
+  return !ArgAPFloat.isDenormal() &&

nit: end with period.



Comment at: llvm/lib/Target/PowerPC/PPCInstrPrefix.td:2609
 
+// To replace constant pool with XXSPLI32DX for scalars.
+def : Pat<(f32 nzFPImmAsi64





Comment at: llvm/lib/Target/PowerPC/PPCInstrPrefix.td:2611
+def : Pat<(f32 nzFPImmAsi64
+   : $A),
+  (COPY_TO_REGCLASS(XXSPLTI32DX(XXSPLTI32DX(IMPLICIT_DEF), 0,

Have this `: $A` on the line above? Same as the one below.
Also a minor nit, but add a space to separate the `(`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-02-16 Thread Stefan Pintilie via Phabricator via cfe-commits
stefanp requested changes to this revision.
stefanp added inline comments.
This revision now requires changes to proceed.



Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.h:1321
   bool convertToNonDenormSingle(APFloat );
+  bool checkNonDenormCannotConvertToSingle(APInt );
+  bool checkNonDenormCannotConvertToSingle(APFloat );

Is the APInt version of this function used anywere?




Comment at: llvm/lib/Target/PowerPC/PPCInstrInfo.td:412
+  APFloat APFloatOfN = N->getValueAPF();
+  checkNonDenormCannotConvertToSingle(APFloatOfN);
+  uint32_t Hi = (uint32_t)((APFloatOfN.bitcastToAPInt().getZExtValue() &

Why are we running this here?
We don't check the return of the function so we must assume that it returns 
true.
In that case the value of `APFloatOfN` won't change because 
`convertToNonDenormSingle` will only change the value of the parameter if it 
returns true. But `checkNonDenormCannotConvertToSingle` only returns true if 
`convertToNonDenormSingle` return false.



Comment at: llvm/test/CodeGen/PowerPC/constant-pool.ll:44
+; CHECK-NEXT:xxsplti32dx vs1, 0, 56623104
+; CHECK-NEXT:xxsplti32dx vs1, 1, -609716532
+; CHECK-NEXT:# kill: def $f1 killed $f1 killed $vsl1

I'm looking to understand this test case.
We are trying to materialize a special PowerPC long double (double-double). It 
seems that we have materialized one half of it and not the other half.

Is it because the first half is a denormal?
Why are we avoiding denormals anyway? It seems like we can completely specify a 
64 bit double with two `xxsplti32dx` instructions. 



Comment at: llvm/test/CodeGen/PowerPC/constant-pool.ll:363
+; CHECK-NEXT:stxv vs3, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:xxsplti32dx vs3, 1, -343597384
+; CHECK-NEXT:# kill: def $f3 killed $f3 killed $vsl3

What is going on here?
It almost looks like we are spilling `vs3` half way through materializing a 
constant.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-02-03 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 321184.
Conanap added a comment.

Updated to ensure the shortcircuit protects against the destructive function.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:   plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:   xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:   xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:   blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:   plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:   blr
+; CHECK-O:   xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
 ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,9 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input8@got@pcrel(0), 1
@@ -42,9 +45,6 @@
 ; CHECK-NEXT:pld r4, output8@got@pcrel(0), 1
 ; CHECK-NEXT:.reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; CHECK-NEXT:lbz r3, 0(r3)
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 ; CHECK-NEXT:stb r3, 0(r4)
 ; CHECK-NEXT:blr
 entry:
@@ -54,6 +54,9 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input16@got@pcrel(0), 1
@@ -61,9 +64,6 @@
 ; CHECK-NEXT:pld r4, output16@got@pcrel(0), 1
 ; CHECK-NEXT:.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; CHECK-NEXT:lhz r3, 0(r3)
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 ; CHECK-NEXT:sth r3, 0(r4)
 ; CHECK-NEXT:blr
 entry:
@@ -144,7 +144,8 @@
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:.reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:lfd f0, 0(r3)
 ; CHECK-NEXT:pld r3, outputf64@got@pcrel(0), 1
@@ -286,8 +287,7 @@
 
 define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
 ; CHECK-LABEL: FuncPtrCall:
-; CHECK: .localentry FuncPtrCall, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, FuncPtrIn@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel10:
 ; CHECK-NEXT:.reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
@@ -317,8 +317,7 @@
 
 define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
 ; CHECK-LABEL: VecMultiUse:
-; CHECK: .localentry VecMultiUse, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:mflr r0
 ; CHECK-NEXT:std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:std r30, -16(r1) # 8-byte Folded Spill
@@ -355,8 +354,7 @@
 
 define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: UseAddr:
-; CHECK: .localentry UseAddr, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:mflr r0
 ; CHECK-NEXT:std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:std r0, 16(r1)
Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
===

[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

2021-01-26 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: nemanjai, saghir, PowerPC.
Conanap added projects: LLVM, clang, PowerPC.
Herald added a subscriber: kbarton.
Conanap requested review of this revision.

Previously related differential (exploit xxsplti32dx for vectors) here: 
https://reviews.llvm.org/D90173

This patch exploits the xxsplti32dx instruction available on Power10 in place 
of constant pool loads where xxspltidp would not be able to, usually because 
the immediate cannot fit into 32 bits.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:   plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:   xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:   xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:   blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:   plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:   blr
+; CHECK-O:   xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
 ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,9 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input8@got@pcrel(0), 1
@@ -42,9 +45,6 @@
 ; CHECK-NEXT:pld r4, output8@got@pcrel(0), 1
 ; CHECK-NEXT:.reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; CHECK-NEXT:lbz r3, 0(r3)
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 ; CHECK-NEXT:stb r3, 0(r4)
 ; CHECK-NEXT:blr
 entry:
@@ -54,6 +54,9 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, input16@got@pcrel(0), 1
@@ -61,9 +64,6 @@
 ; CHECK-NEXT:pld r4, output16@got@pcrel(0), 1
 ; CHECK-NEXT:.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; CHECK-NEXT:lhz r3, 0(r3)
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 ; CHECK-NEXT:sth r3, 0(r4)
 ; CHECK-NEXT:blr
 entry:
@@ -144,7 +144,8 @@
 ; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:.reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:lfd f0, 0(r3)
 ; CHECK-NEXT:pld r3, outputf64@got@pcrel(0), 1
@@ -286,8 +287,7 @@
 
 define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
 ; CHECK-LABEL: FuncPtrCall:
-; CHECK: .localentry FuncPtrCall, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:pld r3, FuncPtrIn@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel10:
 ; CHECK-NEXT:.reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
@@ -317,8 +317,7 @@
 
 define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
 ; CHECK-LABEL: VecMultiUse:
-; CHECK: .localentry VecMultiUse, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:   # %bb.0: # %entry
 ; CHECK-NEXT:mflr r0
 ; CHECK-NEXT:std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:std r30, -16(r1) # 8-byte Folded Spill
@@ -355,8 +354,7 @@
 
 define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: UseAddr:
-; CHECK: