[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGdaa127d77eab: [PowerPC] Add MMA builtin decoding and definitions (authored by bsaleil). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D81748/new/ https://reviews.llvm.org/D81748 Files: clang/include/clang/AST/ASTContext.h clang/include/clang/Basic/BuiltinsPPC.def clang/include/clang/Sema/Sema.h clang/lib/AST/ASTContext.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/builtins-ppc-mma.c Index: clang/test/CodeGen/builtins-ppc-mma.c === --- /dev/null +++ clang/test/CodeGen/builtins-ppc-mma.c @@ -0,0 +1,1038 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @test1( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT:store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT:ret void +// +void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_quad res; + __builtin_mma_assemble_acc(, vc, vc, vc, vc); + *((__vector_quad *)resp) = res; +} + +// CHECK-LABEL: @test2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT:[[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64 +// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-NEXT:[[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2 +// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32 +// CHECK-NEXT:[[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16 +// CHECK-NEXT:[[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3 +// CHECK-NEXT:[[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48 +// CHECK-NEXT:[[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16 +// CHECK-NEXT:ret void +// +void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp); +} + +// CHECK-LABEL: @test3( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-NEXT:store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6 +// CHECK-NEXT:ret void +// +void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_pair res; + __builtin_mma_assemble_pair(, vc, vc); + *((__vector_pair *)resp) = res; +} + +// CHECK-LABEL: @test4( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-NEXT:[[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32 +// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +//
[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions
amyk accepted this revision as: amyk. amyk added a comment. This revision is now accepted and ready to land. Thanks Baptiste. LGTM. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D81748/new/ https://reviews.llvm.org/D81748 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions
bsaleil updated this revision to Diff 301437. bsaleil added a comment. Fix typo and add comment to explain why we add the `ASTContext::DecodeTypeStr` function. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D81748/new/ https://reviews.llvm.org/D81748 Files: clang/include/clang/AST/ASTContext.h clang/include/clang/Basic/BuiltinsPPC.def clang/include/clang/Sema/Sema.h clang/lib/AST/ASTContext.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/builtins-ppc-mma.c Index: clang/test/CodeGen/builtins-ppc-mma.c === --- /dev/null +++ clang/test/CodeGen/builtins-ppc-mma.c @@ -0,0 +1,1038 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @test1( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT:store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT:ret void +// +void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_quad res; + __builtin_mma_assemble_acc(, vc, vc, vc, vc); + *((__vector_quad *)resp) = res; +} + +// CHECK-LABEL: @test2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT:[[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64 +// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-NEXT:[[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2 +// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32 +// CHECK-NEXT:[[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16 +// CHECK-NEXT:[[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3 +// CHECK-NEXT:[[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48 +// CHECK-NEXT:[[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16 +// CHECK-NEXT:ret void +// +void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp); +} + +// CHECK-LABEL: @test3( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-NEXT:store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6 +// CHECK-NEXT:ret void +// +void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_pair res; + __builtin_mma_assemble_pair(, vc, vc); + *((__vector_pair *)resp) = res; +} + +// CHECK-LABEL: @test4( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-NEXT:[[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32 +// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +//
[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions
amyk added inline comments. Comment at: clang/include/clang/AST/ASTContext.h:2050 + QualType DecodeTypeStr(const char *, const ASTContext , + ASTContext::GetBuiltinTypeError , I think it might be good to add a small comment here/in `ASTContext.cpp` for this function (in how we use it to fall back to default type handling). Comment at: clang/include/clang/Basic/BuiltinsPPC.def:674 +// The third argument is set to true if the builtin accumulates its result into +// its given accumulator accumulator. + Remove the extra `accumulator`. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D81748/new/ https://reviews.llvm.org/D81748 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D81748: [PowerPC] Add MMA builtin decoding and definitions
bsaleil updated this revision to Diff 300805. bsaleil added a comment. Herald added a subscriber: dexonsmith. Rebase patch Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D81748/new/ https://reviews.llvm.org/D81748 Files: clang/include/clang/AST/ASTContext.h clang/include/clang/Basic/BuiltinsPPC.def clang/include/clang/Sema/Sema.h clang/lib/AST/ASTContext.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/builtins-ppc-mma.c Index: clang/test/CodeGen/builtins-ppc-mma.c === --- /dev/null +++ clang/test/CodeGen/builtins-ppc-mma.c @@ -0,0 +1,1038 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @test1( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT:store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT:ret void +// +void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_quad res; + __builtin_mma_assemble_acc(, vc, vc, vc, vc); + *((__vector_quad *)resp) = res; +} + +// CHECK-LABEL: @test2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT:[[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64 +// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-NEXT:[[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2 +// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32 +// CHECK-NEXT:[[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16 +// CHECK-NEXT:[[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3 +// CHECK-NEXT:[[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48 +// CHECK-NEXT:[[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16 +// CHECK-NEXT:ret void +// +void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp); +} + +// CHECK-LABEL: @test3( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-NEXT:store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6 +// CHECK-NEXT:ret void +// +void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_pair res; + __builtin_mma_assemble_pair(, vc, vc); + *((__vector_pair *)resp) = res; +} + +// CHECK-LABEL: @test4( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-NEXT:[[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32 +// CHECK-NEXT:[[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-NEXT:store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-NEXT:store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-NEXT:ret void +// +void