[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Amy Kwan via Phabricator via cfe-commits Wed, 23 Sep 2020 20:56:49 -0700

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6b136b19cbe4: [Power10]  Implement custom codegen for the 
vec_replace_elt and… (authored by amyk).


Changed prior to commit:
  https://reviews.llvm.org/D83500?vs=293779&id=293936#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83500/new/

https://reviews.llvm.org/D83500

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/altivec.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-p10vector.c
  clang/test/CodeGen/builtins-ppc-vec-ins-error.c

Index: clang/test/CodeGen/builtins-ppc-vec-ins-error.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-vec-ins-error.c
@@ -0,0 +1,81 @@
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
+// RUN:   -triple powerpc64le-unknown-unknown -fsyntax-only %s -verify
+// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
+// RUN:   -triple powerpc64-unknown-unknown -fsyntax-only %s -verify
+
+#include <altivec.h>
+
+vector signed int vsia;
+vector unsigned int vuia;
+vector signed long long vslla;
+vector unsigned long long vulla;
+vector float vfa;
+vector double vda;
+signed int sia;
+unsigned int uia;
+signed long long slla;
+unsigned long long ulla;
+float fa;
+double da;
+
+vector signed int test_vec_replace_elt_si(void) {
+  return vec_replace_elt(vsia, sia, 13); // expected-error {{argument value 13 is outside the valid range [0, 12]}}
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  return vec_replace_elt(vuia, sia, 1); // expected-error {{arguments are of different types ('unsigned int' vs 'int')}}
+}
+
+vector float test_vec_replace_elt_f(void) {
+  return vec_replace_elt(vfa, fa, 20); // expected-error {{argument value 20 is outside the valid range [0, 12]}}
+}
+
+vector float test_vec_replace_elt_f_2(void) {
+  return vec_replace_elt(vfa, da, 0); // expected-error {{arguments are of different types ('float' vs 'double')}}
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  return vec_replace_elt(vslla, slla, 9); // expected-error {{argument value 9 is outside the valid range [0, 8]}}
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  return vec_replace_elt(vulla, vda, 0); // expected-error {{arguments are of different types ('unsigned long long' vs '__vector double' (vector of 2 'double' values))}}
+}
+
+vector unsigned long long test_vec_replace_elt_ull_2(void) {
+  return vec_replace_elt(vulla, vulla, vsia); // expected-error {{argument to '__builtin_altivec_vec_replace_elt' must be a constant integer}}
+}
+
+vector double test_vec_replace_elt_d(void) {
+  return vec_replace_elt(vda, da, 33); // expected-error {{argument value 33 is outside the valid range [0, 8]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  return vec_replace_unaligned(vsia, da, 6); // expected-error {{arguments are of different types ('int' vs 'double')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  return vec_replace_unaligned(vuia, uia, 14); // expected-error {{argument value 14 is outside the valid range [0, 12]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  return vec_replace_unaligned(vfa, fa, 19); // expected-error {{argument value 19 is outside the valid range [0, 12]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  return vec_replace_unaligned(vslla, fa, 0); // expected-error {{arguments are of different types ('long long' vs 'float')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  return vec_replace_unaligned(vulla, ulla, 12); // expected-error {{argument value 12 is outside the valid range [0, 8]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  return vec_replace_unaligned(vda, fa, 8); // expected-error {{arguments are of different types ('double' vs 'float')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_d_2(void) {
+  return vec_replace_unaligned(vda, vda, da); // expected-error {{argument to '__builtin_altivec_vec_replace_unaligned' must be a constant integer}}
+}
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -21,6 +21,9 @@
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
+float fa;
+double da;
+signed int sia;
 signed int *iap;
 unsigned int uia, uib, *uiap;
 signed char *cap;
@@ -1011,6 +1014,126 @@
   return vec_blendv(vda, vdb, vullc);
 }
 
+vector signed int test_vec_replace_elt_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-LE-NEXT: ret <4 x i32>
+  return vec_replace_elt(vsia, sia, 0);
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <4 x i32>
+  return vec_replace_elt(vuia, uia, 1);
+}
+
+vector float test_vec_replace_elt_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-BE-NEXT: ret <4 x float>
+  // CHECK-LE: bitcast float %{{.+}} to i32
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-LE-NEXT: ret <4 x float>
+  return vec_replace_elt(vfa, fa, 2);
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <2 x i64>
+  return vec_replace_elt(vslla, llb, 0);
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <2 x i64>
+  return vec_replace_elt(vulla, ulla, 0);
+}
+
+vector double test_vec_replace_elt_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-BE-NEXT: ret <2 x double>
+  // CHECK-LE: bitcast double %{{.+}} to i64
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-LE-NEXT: ret <2 x double>
+  return vec_replace_elt(vda, da, 1);
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vsia, sia, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vuia, uia, 8);
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: bitcast float %{{.+}} to i32
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vfa, fa, 12);
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vslla, llb, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vulla, ulla, 7);
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: bitcast double %{{.+}} to i64
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vda, da, 8);
+}
+
 vector unsigned char test_vec_insertl_uc(void) {
   // CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <16 x i8>
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -2570,6 +2570,17 @@
           dyn_cast<ArraySubscriptExpr>(Arg->IgnoreParens()));
 }
 
+static bool isEltOfVectorTy(ASTContext &Context, CallExpr *Call, Sema &S,
+                            QualType VectorTy, QualType EltTy) {
+  QualType VectorEltTy = VectorTy->castAs<VectorType>()->getElementType();
+  if (!Context.hasSameType(VectorEltTy, EltTy)) {
+    S.Diag(Call->getBeginLoc(), diag::err_typecheck_call_different_arg_types)
+        << Call->getSourceRange() << VectorEltTy << EltTy;
+    return false;
+  }
+  return true;
+}
+
 static bool isValidBPFPreserveTypeInfoArg(Expr *Arg) {
   QualType ArgType = Arg->getType();
   if (ArgType->getAsPlaceholderType())
@@ -3222,6 +3233,14 @@
     return SemaVSXCheck(TheCall);
   case PPC::BI__builtin_altivec_vgnb:
      return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7);
+  case PPC::BI__builtin_altivec_vec_replace_elt:
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    QualType VecTy = TheCall->getArg(0)->getType();
+    QualType EltTy = TheCall->getArg(1)->getType();
+    unsigned Width = Context.getIntWidth(EltTy);
+    return SemaBuiltinConstantArgRange(TheCall, 2, 0, Width == 32 ? 12 : 8) ||
+           !isEltOfVectorTy(Context, TheCall, *this, VecTy, EltTy);
+  }
   case PPC::BI__builtin_vsx_xxeval:
      return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255);
   case PPC::BI__builtin_altivec_vsldbi:
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -17837,6 +17837,14 @@
   return __builtin_vsx_xxblendvd(__a, __b, __c);
 }
 
+/* vec_replace_elt */
+
+#define vec_replace_elt __builtin_altivec_vec_replace_elt
+
+/* vec_replace_unaligned */
+
+#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned
+
 /* vec_splati */
 
 #define vec_splati(__a)                                                        \
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14224,6 +14224,63 @@
     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
     return Builder.CreateCall(F, {X, Undef});
   }
+  case PPC::BI__builtin_altivec_vec_replace_elt:
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    // The third argument of vec_replace_elt and vec_replace_unaligned must
+    // be a compile time constant and will be emitted either to the vinsw
+    // or vinsd instruction.
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Function *F = nullptr;
+    Value *Call = nullptr;
+    int64_t ConstArg = ArgCI->getSExtValue();
+    unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
+    bool Is32Bit = false;
+    assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
+    // The input to vec_replace_elt is an element index, not a byte index.
+    if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
+      ConstArg *= ArgWidth / 8;
+    if (ArgWidth == 32) {
+      Is32Bit = true;
+      // When the second argument is 32 bits, it can either be an integer or
+      // a float. The vinsw intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+      // Fix the constant according to endianess.
+      if (getTarget().isLittleEndian())
+        ConstArg = 12 - ConstArg;
+    } else {
+      // When the second argument is 64 bits, it can either be a long long or
+      // a double. The vinsd intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+      // Fix the constant for little endian.
+      if (getTarget().isLittleEndian())
+        ConstArg = 8 - ConstArg;
+    }
+    Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+    // Depending on ArgWidth, the input vector could be a float or a double.
+    // If the input vector is a float type, bitcast the inputs to integers. Or,
+    // if the input vector is a double, bitcast the inputs to 64-bit integers.
+    if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
+      Ops[0] = Builder.CreateBitCast(
+          Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
+                          : llvm::FixedVectorType::get(Int64Ty, 2));
+      Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
+    }
+    // Emit the call to vinsw or vinsd.
+    Call = Builder.CreateCall(F, Ops);
+    // Depending on the builtin, bitcast to the approriate result type.
+    if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+        !Ops[1]->getType()->isIntegerTy())
+      return Builder.CreateBitCast(Call, ResultType);
+    else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+             Ops[1]->getType()->isIntegerTy())
+      return Call;
+    else
+      return Builder.CreateBitCast(Call,
+                                   llvm::FixedVectorType::get(Int8Ty, 16));
+  }
   case PPC::BI__builtin_altivec_vpopcntb:
   case PPC::BI__builtin_altivec_vpopcnth:
   case PPC::BI__builtin_altivec_vpopcntw:
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -409,6 +409,8 @@
 BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsUiV8Us", "")
 BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiUiV4Ui", "")
 BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiUiV4Ui", "")
+BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiUiIi", "t")
+BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiUiIi", "t")
 
 // P10 Vector Extract built-ins.
 BUILTIN(__builtin_altivec_vextdubvlx, "V2ULLiV16UcV16UcUi", "")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Reply via email to