[PATCH] D26520: [PowerPC] Implement BE VSX load/store builtins - llvm portion.

Tony Jiang via cfe-commits Thu, 10 Nov 2016 13:11:01 -0800

jtony created this revision.
jtony added reviewers: nemanjai, sfertile, syzaara, lei, kbarton, amehsan.
jtony added subscribers: llvm-commits, cfe-commits, echristo.


This patch implements all the overloads for vec_xl_be and vec_xst_be. On BE,
they behaves exactly the same with vec_xl and vec_xst, therefore they are
simply implemented by defining a matching macro. On LE, they are implemented
by defining new builtins and intrinsics. For int/float/long long/double, it
is just a load (lxvw4x/lxvd2x) or store(stxvw4x/stxvd2x). For char/char/short,
we also need some extra shuffling before or after call the builtins to get the
desired BE order. For int128, simply call vec_xl or vec_xst.

Signatures:
vector signed char vec_xl_be (signed long long, signed char *);
vector unsigned char vec_xl_be (signed long long, unsigned char *);
vector signed int vec_xl_be (signed long long, signed int *);
vector unsigned int vec_xl_be (signed long long, unsigned int *);
vector signed int128 vec_xl_be (signed long long, signed int128 *);
vector unsigned int128 vec_xl_be (signed long long, unsigned int128 *);
vector signed long long vec_xl_be (signed long long, signed long long *);
vector unsigned long long vec_xl_be (signed long long, unsigned long long *);
vector signed short vec_xl_be (signed long long, signed short *);
vector unsigned short vec_xl_be (signed long long, unsigned short *);
vector double vec_xl_be (signed long long, double *);
vector float vec_xl_be (signed long long, float *);
void vec_xst_be (vector signed char, signed long long, signed char *);
void vec_xst_be (vector unsigned char, signed long long, unsigned char *);
void vec_xst_be (vector signed int, signed long long, signed int *);
void vec_xst_be (vector unsigned int, signed long long, unsigned int *);
void vec_xst_be (vector signed int128, signed long long, signed int128 *);
void vec_xst_be (vector unsigned int128, signed long long, unsigned int128 *);
void vec_xst_be (vector signed long long, signed long long, signed long long *);
void vec_xst_be (vector unsigned long long, signed long long, unsigned long 
long *);
void vec_xst_be (vector signed short, signed long long, signed short *);
void vec_xst_be (vector unsigned short, signed long long, unsigned short *);
void vec_xst_be (vector double, signed long long, double *);
void vec_xst_be (vector float, signed long long, float *);


https://reviews.llvm.org/D26520

Files:
  include/llvm/IR/IntrinsicsPowerPC.td
  lib/Target/PowerPC/PPCISelLowering.cpp
  lib/Target/PowerPC/PPCInstrVSX.td
  test/CodeGen/PowerPC/vsx.ll

Index: test/CodeGen/PowerPC/vsx.ll
===================================================================
--- test/CodeGen/PowerPC/vsx.ll
+++ test/CodeGen/PowerPC/vsx.ll
@@ -1190,3 +1190,51 @@
 ; CHECK-LE: xscmpudp [[REG:[0-9]+]], 3, 4
 ; CHECK-LE: beqlr [[REG]]
 }
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @test83(i8* %a) {
+  entry:
+    %0 = tail call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %a)
+      ret <4 x i32> %0
+      ; CHECK-LABEL: test83
+      ; CHECK: lxvw4x 34, 0, 3
+      ; CHECK: blr
+}
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8*)
+
+; Function Attrs: nounwind readnone
+define <2 x double> @test84(i8* %a) {
+  entry:
+    %0 = tail call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %a)
+      ret <2 x double> %0
+      ; CHECK-LABEL: test84
+      ; CHECK: lxvd2x 34, 0, 3
+      ; CHECK: blr
+}
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8*)
+
+; Function Attrs: nounwind readnone
+define void @test85(<4 x i32> %a, i8* %b) {
+  entry:
+    tail call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %a, i8* %b)
+    ret void
+      ; CHECK-LABEL: test85
+      ; CHECK: stxvw4x 34, 0, 5
+      ; CHECK: blr
+}
+; Function Attrs: nounwind readnone
+declare void @llvm.ppc.vsx.stxvw4x.be(<4 x i32>, i8*)
+
+; Function Attrs: nounwind readnone
+define void @test86(<2 x double> %a, i8* %b) {
+  entry:
+    tail call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %a, i8* %b)
+    ret void
+      ; CHECK-LABEL: test86
+      ; CHECK: stxvd2x 34, 0, 5
+      ; CHECK: blr
+}
+; Function Attrs: nounwind readnone
+declare void @llvm.ppc.vsx.stxvd2x.be(<2 x double>, i8*)
Index: lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- lib/Target/PowerPC/PPCInstrVSX.td
+++ lib/Target/PowerPC/PPCInstrVSX.td
@@ -1012,6 +1012,10 @@
             (STXVD2X $rS, xoaddr:$dst)>;
   def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
             (STXVW4X $rS, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+            (STXVD2X $rS, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+            (STXVW4X $rS, xoaddr:$dst)>;
   def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
 }
 let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
@@ -1840,6 +1844,9 @@
   def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
             (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
 
+  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
 let Predicates = [IsLittleEndian, HasDirectMove] in {
   // v16i8 scalar <-> vector conversions (LE)
   def : Pat<(v16i8 (scalar_to_vector i32:$A)),
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9783,9 +9783,11 @@
     case Intrinsic::ppc_altivec_lvx:
     case Intrinsic::ppc_altivec_lvxl:
     case Intrinsic::ppc_vsx_lxvw4x:
+    case Intrinsic::ppc_vsx_lxvw4x_be:
       VT = MVT::v4i32;
       break;
     case Intrinsic::ppc_vsx_lxvd2x:
+    case Intrinsic::ppc_vsx_lxvd2x_be:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_altivec_lvebx:
@@ -9832,6 +9834,12 @@
     case Intrinsic::ppc_vsx_stxvd2x:
       VT = MVT::v2f64;
       break;
+    case Intrinsic::ppc_vsx_stxvw4x_be:
+      VT = MVT::v4i32;
+      break;
+    case Intrinsic::ppc_vsx_stxvd2x_be:
+      VT = MVT::v2f64;
+      break;
     case Intrinsic::ppc_altivec_stvebx:
       VT = MVT::i8;
       break;
Index: include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- include/llvm/IR/IntrinsicsPowerPC.td
+++ include/llvm/IR/IntrinsicsPowerPC.td
@@ -721,13 +721,20 @@
       Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
 def int_ppc_vsx_lxvd2x :
       Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+def int_ppc_vsx_lxvw4x_be :
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+def int_ppc_vsx_lxvd2x_be :
+      Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
 
 // Vector store.
 def int_ppc_vsx_stxvw4x :
       Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
 def int_ppc_vsx_stxvd2x :
       Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
-
+def int_ppc_vsx_stxvw4x_be :
+      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
+def int_ppc_vsx_stxvd2x_be :
+      Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
 // Vector and scalar maximum.
 def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
 def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D26520: [PowerPC] Implement BE VSX load/store builtins - llvm portion.

Reply via email to