[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions

2020-11-03 Thread Baptiste Saleil via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGdaa127d77eab: [PowerPC] Add MMA builtin decoding and 
definitions (authored by bsaleil).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81748/new/

https://reviews.llvm.org/D81748

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-mma.c

Index: clang/test/CodeGen/builtins-ppc-mma.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-mma.c
@@ -0,0 +1,1038 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: @test1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT:store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT:ret void
+//
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_quad res;
+  __builtin_mma_assemble_acc(, vc, vc, vc, vc);
+  *((__vector_quad *)resp) = res;
+}
+
+// CHECK-LABEL: @test2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT:[[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
+// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
+// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
+// CHECK-NEXT:[[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2
+// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32
+// CHECK-NEXT:[[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16
+// CHECK-NEXT:[[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3
+// CHECK-NEXT:[[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48
+// CHECK-NEXT:[[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16
+// CHECK-NEXT:ret void
+//
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
+}
+
+// CHECK-LABEL: @test3(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
+// CHECK-NEXT:store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6
+// CHECK-NEXT:ret void
+//
+void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_pair res;
+  __builtin_mma_assemble_pair(, vc, vc);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: @test4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
+// CHECK-NEXT:[[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
+// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
+// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
+// 

[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions

2020-11-02 Thread Amy Kwan via Phabricator via cfe-commits
amyk accepted this revision as: amyk.
amyk added a comment.
This revision is now accepted and ready to land.

Thanks Baptiste. LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81748/new/

https://reviews.llvm.org/D81748

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions

2020-10-28 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil updated this revision to Diff 301437.
bsaleil added a comment.

Fix typo and add comment to explain why we add the `ASTContext::DecodeTypeStr` 
function.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81748/new/

https://reviews.llvm.org/D81748

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-mma.c

Index: clang/test/CodeGen/builtins-ppc-mma.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-mma.c
@@ -0,0 +1,1038 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: @test1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT:store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT:ret void
+//
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_quad res;
+  __builtin_mma_assemble_acc(, vc, vc, vc, vc);
+  *((__vector_quad *)resp) = res;
+}
+
+// CHECK-LABEL: @test2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT:[[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
+// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
+// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
+// CHECK-NEXT:[[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2
+// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32
+// CHECK-NEXT:[[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16
+// CHECK-NEXT:[[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3
+// CHECK-NEXT:[[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48
+// CHECK-NEXT:[[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16
+// CHECK-NEXT:ret void
+//
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
+}
+
+// CHECK-LABEL: @test3(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
+// CHECK-NEXT:store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6
+// CHECK-NEXT:ret void
+//
+void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_pair res;
+  __builtin_mma_assemble_pair(, vc, vc);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: @test4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
+// CHECK-NEXT:[[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
+// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
+// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
+// 

[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions

2020-10-28 Thread Amy Kwan via Phabricator via cfe-commits
amyk added inline comments.



Comment at: clang/include/clang/AST/ASTContext.h:2050
 
+  QualType DecodeTypeStr(const char *, const ASTContext ,
+ ASTContext::GetBuiltinTypeError ,

I think it might be good to add a small comment here/in `ASTContext.cpp` for 
this function (in how we use it to fall back to default type handling).



Comment at: clang/include/clang/Basic/BuiltinsPPC.def:674
+// The third argument is set to true if the builtin accumulates its result into
+// its given accumulator accumulator.
+

Remove the extra `accumulator`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81748/new/

https://reviews.llvm.org/D81748

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions

2020-10-26 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil updated this revision to Diff 300805.
bsaleil added a comment.
Herald added a subscriber: dexonsmith.

Rebase patch


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81748/new/

https://reviews.llvm.org/D81748

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-mma.c

Index: clang/test/CodeGen/builtins-ppc-mma.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-mma.c
@@ -0,0 +1,1038 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: @test1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT:store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT:ret void
+//
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_quad res;
+  __builtin_mma_assemble_acc(, vc, vc, vc, vc);
+  *((__vector_quad *)resp) = res;
+}
+
+// CHECK-LABEL: @test2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT:[[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
+// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
+// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
+// CHECK-NEXT:[[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2
+// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32
+// CHECK-NEXT:[[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16
+// CHECK-NEXT:[[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3
+// CHECK-NEXT:[[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48
+// CHECK-NEXT:[[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16
+// CHECK-NEXT:ret void
+//
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
+}
+
+// CHECK-LABEL: @test3(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
+// CHECK-NEXT:store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6
+// CHECK-NEXT:ret void
+//
+void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __vector_pair res;
+  __builtin_mma_assemble_pair(, vc, vc);
+  *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: @test4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
+// CHECK-NEXT:[[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
+// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
+// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
+// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
+// CHECK-NEXT:ret void
+//
+void