[PATCH] D107647: [PowerPC] MMA - Add __builtin_vsx_build_pair and __builtin_mma_build_acc builtins

Ahsan Saghir via Phabricator via cfe-commits Mon, 27 Sep 2021 17:49:44 -0700

saghir updated this revision to Diff 375443.
saghir added a comment.

Addressed review comments.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107647/new/

https://reviews.llvm.org/D107647

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-build-pair-mma.c
  clang/test/Sema/ppc-pair-mma-types.c
  clang/test/SemaCXX/ppc-pair-mma-types.cpp

Index: clang/test/SemaCXX/ppc-pair-mma-types.cpp
===================================================================
--- clang/test/SemaCXX/ppc-pair-mma-types.cpp
+++ clang/test/SemaCXX/ppc-pair-mma-types.cpp
@@ -370,6 +370,7 @@
     return *vpp; // expected-error {{invalid use of PPC MMA type}}
   };
   auto f3 = [](vector unsigned char vc) { __vector_pair vp; __builtin_vsx_assemble_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}}
+  auto f4 = [](vector unsigned char vc) { __vector_pair vp; __builtin_vsx_build_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}}
 }
 
 // cast
Index: clang/test/Sema/ppc-pair-mma-types.c
===================================================================
--- clang/test/Sema/ppc-pair-mma-types.c
+++ clang/test/Sema/ppc-pair-mma-types.c
@@ -249,6 +249,7 @@
   __vector_pair vp1 = *vpp;
   __vector_pair vp2;
   __builtin_vsx_assemble_pair(&vp2, vc, vc);
+  __builtin_vsx_build_pair(&vp2, vc, vc);
   __vector_pair vp3;
   __vector_quad vq;
   __builtin_mma_xvf64ger(&vq, vp3, vc);
Index: clang/test/CodeGen/builtins-ppc-build-pair-mma.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-build-pair-mma.c
@@ -0,0 +1,51 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
+// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE
+// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
+// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE
+
+// CHECK-LE-LABEL: @test1(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]])
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-LE-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-LE-NEXT:    ret void
+//
+// CHECK-BE-LABEL: @test1(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC4:%.*]])
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-BE-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-BE-NEXT:    ret void
+//
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vector unsigned char vc2,
+            vector unsigned char vc3, vector unsigned char vc4, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_quad res;
+  __builtin_mma_build_acc(&res, vc1, vc2, vc3, vc4);
+  *((__vector_quad *)resp) = res;
+}
+
+// CHECK-LE-LABEL: @test2(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]])
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
+// CHECK-LE-NEXT:    store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-LE-NEXT:    ret void
+//
+// CHECK-BE-LABEL: @test2(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]])
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
+// CHECK-BE-NEXT:    store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-BE-NEXT:    ret void
+//
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1,
+            vector unsigned char vc2, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_pair res;
+  __builtin_vsx_build_pair(&res, vc1, vc2);
+  *((__vector_pair *)resp) = res;
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -15910,6 +15910,17 @@
       }
       return Call;
     }
+    if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
+        BuiltinID == PPC::BI__builtin_mma_build_acc) {
+      // Reverse the order of the operands for LE, so the
+      // same builtin call can be used on both LE and BE
+      // without the need for the programmer to swap operands.
+      // The operands are reversed starting from the second argument,
+      // the first operand is the pointer to the pair/accumulator
+      // that is being built.
+      if (getTarget().isLittleEndian())
+        std::reverse(Ops.begin() + 1, Ops.end());
+    }
     bool Accumulate;
     switch (BuiltinID) {
   #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -817,6 +817,8 @@
 CUSTOM_BUILTIN(mma_stxvp, vsx_stxvp, "vW256SLiW256C*", false)
 CUSTOM_BUILTIN(mma_assemble_pair, vsx_assemble_pair, "vW256*VV", false)
 CUSTOM_BUILTIN(mma_disassemble_pair, vsx_disassemble_pair, "vv*W256*", false)
+CUSTOM_BUILTIN(vsx_build_pair, vsx_assemble_pair,  "vW256*VV", false)
+CUSTOM_BUILTIN(mma_build_acc, mma_assemble_acc, "vW512*VVVV", false)
 
 // UNALIASED_CUSTOM_BUILTIN macro is used for built-ins that have
 // the same name as that of the intrinsic they generate, i.e. the

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D107647: [PowerPC] MMA - Add __builtin_vsx_build_pair and __builtin_mma_build_acc builtins

Reply via email to