[PATCH] D26546: [PPC] Add vec_insert4b/vec_extract4b to altivec.h

Sean Fertile via cfe-commits Mon, 21 Nov 2016 12:58:17 -0800

sfertile updated this revision to Diff 78760.
sfertile added a comment.

Moved the endian related massaging from altivec.h into Clang codegen and 
clamped the input index into the valid range [0, 12].



Repository:
  rL LLVM

https://reviews.llvm.org/D26546

Files:
  include/clang/Basic/BuiltinsPPC.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/altivec.h
  test/CodeGen/builtins-ppc-p9vector.c

Index: test/CodeGen/builtins-ppc-p9vector.c
===================================================================
--- test/CodeGen/builtins-ppc-p9vector.c
+++ test/CodeGen/builtins-ppc-p9vector.c
@@ -1180,3 +1180,27 @@
 // CHECK-LE-NEXT: ret <4 x float>
   return vec_extract_fp32_from_shortl(vusa);
 }
+vector unsigned char test116(void) {
+// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7)
+// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8>
+// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0>
+// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 5)
+// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8>
+  return vec_insert4b(vuia, vuca, 7);
+}
+vector unsigned char test117(void) {
+// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 5)
+// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8>
+// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0>
+// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 7)
+// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8>
+  return vec_insert4b(vsia, vuca, 5);
+}
+vector unsigned long long test118(void) {
+// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11)
+// CHECK-BE-NEXT: ret <2 x i64>
+// CHECK-LE: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1)
+// CHECK-LE-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> <i32 1, i32 0>
+// CHECK-LE-NEXT: ret <2 x i64>
+  return vec_extract4b(vuca, 11);
+}
Index: lib/Headers/altivec.h
===================================================================
--- lib/Headers/altivec.h
+++ lib/Headers/altivec.h
@@ -12456,6 +12456,9 @@
 
 #ifdef __POWER9_VECTOR__
 
+#define vec_insert4b __builtin_vsx_insertword
+#define vec_extract4b __builtin_vsx_extractuword
+
 /* vec_extract_exp */
 
 static __inline__ vector unsigned int __ATTRS_o_ai
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -35,6 +35,11 @@
 using namespace CodeGen;
 using namespace llvm;
 
+static
+int64_t clamp(int64_t value, int64_t low, int64_t high) {
+  return std::min(high, std::max(low, value));
+}
+
 /// getBuiltinLibFunction - Given a builtin id for a function like
 /// "__builtin_fabsf", return a Function* for "fabsf".
 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
@@ -8168,6 +8173,67 @@
     llvm_unreachable("Unknown FMA operation");
     return nullptr; // Suppress no-return warning
   }
+  case PPC::BI__builtin_vsx_insertword: {
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
+
+    // Third argument is a compile time constant int. It must be clamped to
+    // to the range [0, 12].
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI);
+    int64_t index = clamp(ArgCI->getSExtValue(), 0, 12);
+
+    // Need to cast the second argument from a vector of cahr to a vector
+    // of long long.
+    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
+
+    if(getTarget().isLittleEndian()) {
+      // Create a shuffle mask of (1, 0)
+      Constant *ShuffleElts[2];
+      ShuffleElts[0] = ConstantInt::get(Int32Ty, 1);
+      ShuffleElts[1] = ConstantInt::get(Int32Ty, 0);
+      Constant* ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+      // Reverse the double words in the second argument.
+      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], ShuffleMask);
+
+      // reverse the index
+      index = 12 - index;
+    }
+
+    Ops[2] = ConstantInt::getSigned(Int32Ty, index);
+    return Builder.CreateCall(F, Ops);
+  }
+  case PPC::BI__builtin_vsx_extractuword: {
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
+
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+
+    // The second argument is a compile time constant int that needs to
+    // be clamped to the range [0, 12].
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
+    assert(ArgCI);
+    int64_t index = clamp(ArgCI->getSExtValue(), 0, 12);
+
+    if(getTarget().isLittleEndian()) {
+      // Reverse the index
+      index = 12 - index;
+      Ops[1] = ConstantInt::getSigned(Int32Ty, index);
+
+      // Emit the call, then reverse the double words of the results vector.
+      Value *Call = Builder.CreateCall(F, Ops);
+
+      // Create a shuffle mask of (1, 0)
+      Constant *ShuffleElts[2];
+      ShuffleElts[0] = ConstantInt::get(Int32Ty, 1);
+      ShuffleElts[1] = ConstantInt::get(Int32Ty, 0);
+      Constant* ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+
+      Value* ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
+      return ShuffleCall;
+    } else {
+      Ops[1] = ConstantInt::getSigned(Int32Ty, index);
+      return Builder.CreateCall(F, Ops);
+    }
+  }
   }
 }
 
Index: include/clang/Basic/BuiltinsPPC.def
===================================================================
--- include/clang/Basic/BuiltinsPPC.def
+++ include/clang/Basic/BuiltinsPPC.def
@@ -417,6 +417,9 @@
 BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "")
 BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "")
 
+BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "")
+BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D26546: [PPC] Add vec_insert4b/vec_extract4b to altivec.h

Reply via email to