https://github.com/maryammo updated https://github.com/llvm/llvm-project/pull/185961
>From f4bd1e17350954aea1327b885033abfa6b3cecb3 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas <[email protected]> Date: Wed, 11 Mar 2026 19:39:45 +0000 Subject: [PATCH 1/2] [Clang][PowerPC] Add DMF crypto builtins for extended mnemonics --- clang/include/clang/Basic/BuiltinsPPC.def | 30 +- clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 77 ++- clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c | 420 ++++++++++--- .../CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c | 32 +- llvm/include/llvm/IR/IntrinsicsPowerPC.td | 6 +- llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 6 +- llvm/test/CodeGen/PowerPC/dmrp-spill.ll | 6 +- llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll | 554 +++++++++++++++++- 8 files changed, 1028 insertions(+), 103 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 75d7d92c4f9d4..baa7e057a777d 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1125,11 +1125,35 @@ UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false, UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false, "mma,isa-future-instructions") -UNALIASED_CUSTOM_BUILTIN(mma_dmsha2hash, "vW1024*W1024*Ii", true, +UNALIASED_CUSTOM_BUILTIN(dmsha2hash, "vW1024*W1024*Ii", true, "mma,isa-future-instructions") -UNALIASED_CUSTOM_BUILTIN(mma_dmsha3hash, "vW2048*Ii", true, +UNALIASED_CUSTOM_BUILTIN(dmsha3hash, "vW2048*Ii", true, "mma,isa-future-instructions") -UNALIASED_CUSTOM_BUILTIN(mma_dmxxshapad, "vW1024*VIiIiIi", true, +UNALIASED_CUSTOM_BUILTIN(dmxxshapad, "vW1024*VIiIiIi", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmsha256hash, dmsha2hash, "vW1024*W1024*", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmsha512hash, dmsha2hash, "vW1024*W1024*", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmsha3dw, dmsha3hash, "vW2048*", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmcryshash, dmsha3hash, "vW2048*", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxsha3512pad, dmxxshapad, "vW1024*VIi", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxsha3384pad, dmxxshapad, "vW1024*VIi", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxsha3256pad, dmxxshapad, "vW1024*VIi", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxsha3224pad, dmxxshapad, "vW1024*VIi", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxshake256pad, dmxxshapad, "vW1024*VIi", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxshake128pad, dmxxshapad, "vW1024*VIi", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxsha384512pad, dmxxshapad, "vW1024*V", true, + "mma,isa-future-instructions") +CUSTOM_BUILTIN(dmxxsha224256pad, dmxxshapad, "vW1024*V", true, "mma,isa-future-instructions") // MMA builtins with positive/negative multiply/accumulate. diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp index 6568959351a5d..e915c1bd4d27a 100644 --- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp @@ -1154,10 +1154,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, if (BuiltinID == PPC::BI__builtin_mma_dmmr || BuiltinID == PPC::BI__builtin_mma_dmxor || BuiltinID == PPC::BI__builtin_mma_disassemble_dmr || - BuiltinID == PPC::BI__builtin_mma_dmsha2hash) { + BuiltinID == PPC::BI__builtin_dmsha2hash) { Address Addr = EmitPointerWithAlignment(E->getArg(1)); Ops[1] = Builder.CreateLoad(Addr); } + if (BuiltinID == PPC::BI__builtin_dmsha256hash || + BuiltinID == PPC::BI__builtin_dmsha512hash) { + Address Addr = EmitPointerWithAlignment(E->getArg(1)); + Ops[1] = Builder.CreateLoad(Addr); + int Imm = (BuiltinID == PPC::BI__builtin_dmsha256hash) ? 0 : 1; + Ops.push_back(llvm::ConstantInt::get(Int32Ty, Imm)); + } + if (BuiltinID == PPC::BI__builtin_dmsha3dw || + BuiltinID == PPC::BI__builtin_dmcryshash) { + int Imm = (BuiltinID == PPC::BI__builtin_dmsha3dw) ? 0 : 12; + Ops.push_back(llvm::ConstantInt::get(Int32Ty, Imm)); + } + if (BuiltinID == PPC::BI__builtin_dmxxsha3512pad || + BuiltinID == PPC::BI__builtin_dmxxsha3384pad || + BuiltinID == PPC::BI__builtin_dmxxsha3256pad || + BuiltinID == PPC::BI__builtin_dmxxsha3224pad || + BuiltinID == PPC::BI__builtin_dmxxshake256pad || + BuiltinID == PPC::BI__builtin_dmxxshake128pad || + BuiltinID == PPC::BI__builtin_dmxxsha384512pad || + BuiltinID == PPC::BI__builtin_dmxxsha224256pad) { + int ID, BL; + bool hasE; + switch (BuiltinID) { + case PPC::BI__builtin_dmxxsha3512pad: + ID = 0; + BL = 0; + hasE = true; + break; + case PPC::BI__builtin_dmxxsha3384pad: + ID = 0; + BL = 1; + hasE = true; + break; + case PPC::BI__builtin_dmxxsha3256pad: + ID = 0; + BL = 2; + hasE = true; + break; + case PPC::BI__builtin_dmxxsha3224pad: + ID = 0; + BL = 3; + hasE = true; + break; + case PPC::BI__builtin_dmxxshake256pad: + ID = 1; + BL = 0; + hasE = true; + break; + case PPC::BI__builtin_dmxxshake128pad: + ID = 1; + BL = 1; + hasE = true; + break; + case PPC::BI__builtin_dmxxsha384512pad: + ID = 2; + BL = 0; + hasE = false; + break; + case PPC::BI__builtin_dmxxsha224256pad: + ID = 3; + BL = 0; + hasE = false; + break; + } + if (hasE) { + Value *E_val = Ops[2]; + Ops[2] = ConstantInt::get(Int32Ty, ID); + Ops.push_back(E_val); + Ops.push_back(ConstantInt::get(Int32Ty, BL)); + } else { + Ops.push_back(ConstantInt::get(Int32Ty, ID)); + Ops.push_back(ConstantInt::get(Int32Ty, 0)); + Ops.push_back(ConstantInt::get(Int32Ty, 0)); + } + } if (BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) return Builder.CreateAlignedStore(Ops[1], Ops[0], MaybeAlign()); for (unsigned i=1; i<Ops.size(); i++) diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c index 585d8bac57181..c9274988b2e5d 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c @@ -8,17 +8,17 @@ // CHECK-LABEL: define dso_local void @test_dmxvi8gerx4( // CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]] +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7:![0-9]+]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxvi8gerx4( // AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5:![0-9]+]] // AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) -// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]] +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7:![0-9]+]] // AIX-NEXT: ret void // void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -31,17 +31,17 @@ void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned // CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4( // CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_pmdmxvi8gerx4( // AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // AIX-NEXT: ret void // void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -54,19 +54,19 @@ void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigne // CHECK-LABEL: define dso_local void @test_dmxvi8gerx4pp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxvi8gerx4pp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // AIX-NEXT: ret void // void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -79,19 +79,19 @@ void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigne // CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4pp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_pmdmxvi8gerx4pp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // AIX-NEXT: ret void // void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -104,19 +104,19 @@ void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsig // CHECK-LABEL: define dso_local void @test_dmxvi8gerx4spp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxvi8gerx4spp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // AIX-NEXT: ret void // void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -129,19 +129,19 @@ void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsign // CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4spp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_pmdmxvi8gerx4spp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // AIX-NEXT: ret void // void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -185,7 +185,7 @@ void test_dmf_basic(char *p, char *res1, char *res2) { // CHECK-LABEL: define dso_local void @test_dmf_basic2( // CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA10:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA9:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]]) // CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128 // CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128 @@ -195,7 +195,7 @@ void test_dmf_basic(char *p, char *res1, char *res2) { // AIX-LABEL: define void @test_dmf_basic2( // AIX-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA10:![0-9]+]] +// AIX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA9:![0-9]+]] // AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]]) // AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128 // AIX-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128 @@ -212,89 +212,359 @@ void test_dmf_basic2(char *p1, char *res1, char *res2, // CHECK-LABEL: define dso_local void @test_dmsha2hash( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA8]] -// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA8]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmsha2hash( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA8]] -// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA8]] -// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // AIX-NEXT: ret void // void test_dmsha2hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char *resp) { __dmr1024 vdmr1 = *((__dmr1024 *)vdmrp1); __dmr1024 vdmr2 = *((__dmr1024 *)vdmrp2); - __builtin_mma_dmsha2hash(&vdmr1, &vdmr2, 1); + __builtin_dmsha2hash(&vdmr1, &vdmr2, 1); *((__dmr1024 *)resp) = vdmr1; } // CHECK-LABEL: define dso_local void @test_dmsha3hash( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA11:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> [[TMP0]], i32 4) -// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 4) +// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmsha3hash( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA11:![0-9]+]] -// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> [[TMP0]], i32 4) -// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA11]] +// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10:![0-9]+]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 4) +// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]] // AIX-NEXT: ret void // void test_dmsha3hash(unsigned char *vdmrpp, unsigned char *resp) { __dmr2048 vdmrp = *((__dmr2048 *)vdmrpp); - __builtin_mma_dmsha3hash(&vdmrp, 4); + __builtin_dmsha3hash(&vdmrp, 4); *((__dmr2048 *)resp) = vdmrp; } // CHECK-LABEL: define dso_local void @test_dmxxshapad( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 5) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 3) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxxshapad( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] -// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 5) -// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 3) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] // AIX-NEXT: ret void // void test_dmxxshapad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); - __builtin_mma_dmxxshapad(&vdmr, vc, 2, 1, 5); + __builtin_dmxxshapad(&vdmr, vc, 2, 1, 3); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmsha256hash( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmsha256hash( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmsha256hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char *resp) { + __dmr1024 vdmr1 = *((__dmr1024 *)vdmrp1); + __dmr1024 vdmr2 = *((__dmr1024 *)vdmrp2); + __builtin_dmsha256hash(&vdmr1, &vdmr2); + *((__dmr1024 *)resp) = vdmr1; +} + +// CHECK-LABEL: define dso_local void @test_dmsha512hash( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmsha512hash( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmsha512hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char *resp) { + __dmr1024 vdmr1 = *((__dmr1024 *)vdmrp1); + __dmr1024 vdmr2 = *((__dmr1024 *)vdmrp2); + __builtin_dmsha512hash(&vdmr1, &vdmr2); + *((__dmr1024 *)resp) = vdmr1; +} + +// CHECK-LABEL: define dso_local void @test_dmsha3dw( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 0) +// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmsha3dw( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 0) +// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// AIX-NEXT: ret void +// +void test_dmsha3dw(unsigned char *vdmrpp, unsigned char *resp) { + __dmr2048 vdmrp = *((__dmr2048 *)vdmrpp); + __builtin_dmsha3dw(&vdmrp); + *((__dmr2048 *)resp) = vdmrp; +} + +// CHECK-LABEL: define dso_local void @test_dmcryshash( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 12) +// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmcryshash( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 12) +// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]] +// AIX-NEXT: ret void +// +void test_dmcryshash(unsigned char *vdmrpp, unsigned char *resp) { + __dmr2048 vdmrp = *((__dmr2048 *)vdmrpp); + __builtin_dmcryshash(&vdmrp); + *((__dmr2048 *)resp) = vdmrp; +} + +// CHECK-LABEL: define dso_local void @test_dmxxsha3512pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxsha3512pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxsha3512pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxsha3512pad(&vdmr, vc, 0); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmxxsha3384pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 1) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxsha3384pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 1) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxsha3384pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxsha3384pad(&vdmr, vc, 1); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmxxsha3256pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 2) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxsha3256pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 2) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxsha3256pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxsha3256pad(&vdmr, vc, 0); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmxxsha3224pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 3) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxsha3224pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 3) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxsha3224pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxsha3224pad(&vdmr, vc, 1); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmxxshake256pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxshake256pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxshake256pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxshake256pad(&vdmr, vc, 0); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmxxshake128pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 1, i32 1) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxshake128pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 1, i32 1) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxshake128pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxshake128pad(&vdmr, vc, 1); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmxxsha384512pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxsha384512pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxsha384512pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxsha384512pad(&vdmr, vc); + *((__dmr1024 *)resp) = vdmr; +} + +// CHECK-LABEL: define dso_local void @test_dmxxsha224256pad( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 3, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_dmxxsha224256pad( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 3, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]] +// AIX-NEXT: ret void +// +void test_dmxxsha224256pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __builtin_dmxxsha224256pad(&vdmr, vc); *((__dmr1024 *)resp) = vdmr; } //. -// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} -// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} -// CHECK: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} -// CHECK: [[META9]] = !{!"__dmr1024", [[META4]], i64 0} -// CHECK: [[CHAR_TBAA10]] = !{[[META4]], [[META4]], i64 0} -// CHECK: [[__DMR2048_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// CHECK: [[META12]] = !{!"__dmr2048", [[META4]], i64 0} +// CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__VECTOR_PAIR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// CHECK: [[META6]] = !{!"__vector_pair", [[META3]], i64 0} +// CHECK: [[__DMR1024_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"__dmr1024", [[META3]], i64 0} +// CHECK: [[CHAR_TBAA9]] = !{[[META3]], [[META3]], i64 0} +// CHECK: [[__DMR2048_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK: [[META11]] = !{!"__dmr2048", [[META3]], i64 0} //. -// AIX: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} -// AIX: [[META5]] = !{!"Simple C/C++ TBAA"} -// AIX: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// AIX: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} -// AIX: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} -// AIX: [[META9]] = !{!"__dmr1024", [[META4]], i64 0} -// AIX: [[CHAR_TBAA10]] = !{[[META4]], [[META4]], i64 0} -// AIX: [[__DMR2048_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// AIX: [[META12]] = !{!"__dmr2048", [[META4]], i64 0} +// AIX: [[META3:![0-9]+]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// AIX: [[META4]] = !{!"Simple C/C++ TBAA"} +// AIX: [[__VECTOR_PAIR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// AIX: [[META6]] = !{!"__vector_pair", [[META3]], i64 0} +// AIX: [[__DMR1024_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// AIX: [[META8]] = !{!"__dmr1024", [[META3]], i64 0} +// AIX: [[CHAR_TBAA9]] = !{[[META3]], [[META3]], i64 0} +// AIX: [[__DMR2048_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// AIX: [[META11]] = !{!"__dmr2048", [[META3]], i64 0} //. diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c index 66b9d797c65d3..a13ce3e0abc28 100644 --- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c +++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c @@ -25,9 +25,21 @@ void test_mma(unsigned char *vdmrpp, unsigned char *vdmrp, unsigned char *vpp, v __builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp); __builtin_mma_build_dmr(&vdmr, vc, vc, vc, vc, vc, vc, vc, vc); __builtin_mma_disassemble_dmr(vdmrp, &vdmr); - __builtin_mma_dmsha2hash(&vdmr, &vdmr, 0); - __builtin_mma_dmsha3hash(&vdmrpair, 0); - __builtin_mma_dmxxshapad(&vdmr, vc, 0, 0, 0); + __builtin_dmsha2hash(&vdmr, &vdmr, 0); + __builtin_dmsha3hash(&vdmrpair, 0); + __builtin_dmxxshapad(&vdmr, vc, 0, 0, 0); + __builtin_dmsha256hash(&vdmr, &vdmr); + __builtin_dmsha512hash(&vdmr, &vdmr); + __builtin_dmsha3dw(&vdmrpair); + __builtin_dmcryshas(&vdmrpair); + __builtin_dmxxsha3512pad(&vdmr, vc, 0); + __builtin_dmxxsha3384pad(&vdmr, vc, 0); + __builtin_dmxxsha3256pad(&vdmr, vc, 0); + __builtin_dmxxsha3224pad(&vdmr, vc, 0); + __builtin_dmxxshake256pad(&vdmr, vc, 0); + __builtin_dmxxshake128pad(&vdmr, vc, 0); + __builtin_dmxxsha384512pad(&vdmr, vc); + __builtin_dmxxsha224256pad(&vdmr, vc); // CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops // CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops @@ -40,9 +52,17 @@ void test_mma(unsigned char *vdmrpp, unsigned char *vdmrp, unsigned char *vpp, v // ISA_FUTURE: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions // ISA_FUTURE: error: '__builtin_mma_build_dmr' needs target feature mma,isa-future-instructions // ISA_FUTURE: error: '__builtin_mma_disassemble_dmr' needs target feature mma,isa-future-instructions -// CHECK: error: '__builtin_mma_dmsha2hash' needs target feature mma,isa-future-instructions -// CHECK: error: '__builtin_mma_dmsha3hash' needs target feature mma,isa-future-instructions -// CHECK: error: '__builtin_mma_dmxxshapad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmsha2hash' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmsha3hash' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxshapad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxsha3512pad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxsha3384pad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxsha3256pad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxsha3224pad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxshake256pad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxshake128pad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxsha384512pad' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_dmxxsha224256pad' needs target feature mma,isa-future-instructions // DMF VSX Vector bfloat16 GER 2x builtins. diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index ec33af88c72d9..1fd74745e84e1 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1860,16 +1860,16 @@ let TargetPrefix = "ppc" in { defm int_ppc_mma_pmdmxvf16gerx2 : PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; - def int_ppc_mma_dmsha2hash : + def int_ppc_dmsha2hash : DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v1024i1_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_ppc_mma_dmsha3hash : + def int_ppc_dmsha3hash : DefaultAttrsIntrinsic<[llvm_v2048i1_ty], [llvm_v2048i1_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; - def int_ppc_mma_dmxxshapad : + def int_ppc_dmxxshapad : DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>, diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td index 1b4b58f724bc8..5df334b271549 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -511,14 +511,14 @@ let Predicates = [MMA, IsISAFuture] in { : XForm_AT3_T1_AB3< 31, 14, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB, u1imm:$T), "dmsha2hash $AT, $AB, $T", - [(set v1024i1:$AT, (int_ppc_mma_dmsha2hash v1024i1:$ATi, + [(set v1024i1:$AT, (int_ppc_dmsha2hash v1024i1:$ATi, v1024i1:$AB, u1imm_timm:$T))]>, RegConstraint<"$ATi = $AT">; def DMSHA3HASH : XForm_ATp2_SR5<31, 15, 177, (outs dmrp:$ATp), (ins dmrp:$ATpi, u5imm:$SR), "dmsha3hash $ATp, $SR", [(set v2048i1:$ATp, - (int_ppc_mma_dmsha3hash v2048i1:$ATpi, + (int_ppc_dmsha3hash v2048i1:$ATpi, u5imm_timm:$SR))]>, RegConstraint<"$ATpi = $ATp">; def DMXXSHAPAD @@ -593,7 +593,7 @@ let Predicates = [MMA, IsISAFuture] in { (DMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>; // Cryptography Intrinsic - def : Pat<(v1024i1 (int_ppc_mma_dmxxshapad v1024i1:$ATi, v16i8:$XB, + def : Pat<(v1024i1 (int_ppc_dmxxshapad v1024i1:$ATi, v16i8:$XB, u2imm_timm:$ID, u1imm_timm:$E, u2imm_timm:$BL)), (DMXXSHAPAD $ATi, RCCp.BToVSRC, $ID, $E, $BL)>; } diff --git a/llvm/test/CodeGen/PowerPC/dmrp-spill.ll b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll index 7a26c49b89df5..88afec18f7b1d 100644 --- a/llvm/test/CodeGen/PowerPC/dmrp-spill.ll +++ b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll @@ -10,7 +10,7 @@ ; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32 declare void @dummy_func() -declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32) +declare <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1>, i32) define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) nounwind { ; CHECK-LABEL: test_dmsha3hash: @@ -205,9 +205,9 @@ define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) nounwind { ; AIX32-NEXT: blr entry: %0 = load <2048 x i1>, ptr %vopp, align 64 - %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5) + %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 5) tail call void @dummy_func() - %3 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5) + %3 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 5) store <2048 x i1> %2, ptr %resp, align 64 ret void } diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll index ab2324a4646b3..157baabb93014 100644 --- a/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll +++ b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll @@ -6,10 +6,10 @@ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE -declare <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1>, <1024 x i1>, i32) +declare <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1>, <1024 x i1>, i32) -define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) { -; CHECK-LABEL: test_dmsha2hash: +define dso_local void @test_dmsha256hash(ptr %vop, ptr %vinp, ptr %resp) { +; CHECK-LABEL: test_dmsha256hash: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxvp vsp34, 0(r3) ; CHECK-NEXT: lxvp vsp36, 32(r3) @@ -32,7 +32,7 @@ define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) { ; CHECK-NEXT: stxvp vsp36, 0(r5) ; CHECK-NEXT: blr ; -; CHECK-BE-LABEL: test_dmsha2hash: +; CHECK-BE-LABEL: test_dmsha256hash: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxvp vsp34, 96(r3) ; CHECK-BE-NEXT: lxvp vsp36, 64(r3) @@ -57,12 +57,12 @@ define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) { entry: %0 = load <1024 x i1>, ptr %vop, align 64 %1 = load <1024 x i1>, ptr %vinp, align 64 - %3 = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 0) + %3 = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 0) store <1024 x i1> %3, ptr %resp, align 64 ret void } -declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32) +declare <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1>, i32) define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) { ; CHECK-LABEL: test_dmsha3hash: @@ -124,12 +124,12 @@ define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) { ; CHECK-BE-NEXT: blr entry: %0 = load <2048 x i1>, ptr %vopp, align 64 - %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5) + %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 5) store <2048 x i1> %2, ptr %resp, align 64 ret void } -declare <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1>, <16 x i8>, i32, i32, i32) +declare <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1>, <16 x i8>, i32, i32, i32) define dso_local void @test_dmxxshapad(ptr %vopp, ptr %vcp, ptr %resp) { ; CHECK-LABEL: test_dmxxshapad: @@ -170,7 +170,543 @@ define dso_local void @test_dmxxshapad(ptr %vopp, ptr %vcp, ptr %resp) { entry: %0 = load <1024 x i1>, ptr %vopp, align 64 %1 = load <16 x i8>, ptr %vcp, align 64 - %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 2, i32 1, i32 3) + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 2, i32 1, i32 3) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmsha512hash(ptr %vop, ptr %vinp, ptr %resp) { +; CHECK-LABEL: test_dmsha512hash: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxvp vsp40, 0(r4) +; CHECK-NEXT: lxvp vsp42, 32(r4) +; CHECK-NEXT: lxvp vsp44, 64(r4) +; CHECK-NEXT: lxvp vsp46, 96(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp42, vsp40, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp46, vsp44, 0 +; CHECK-NEXT: dmsha512hash dmr0, dmr1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmsha512hash: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxvp vsp40, 96(r4) +; CHECK-BE-NEXT: lxvp vsp42, 64(r4) +; CHECK-BE-NEXT: lxvp vsp44, 32(r4) +; CHECK-BE-NEXT: lxvp vsp46, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp42, vsp40, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp46, vsp44, 0 +; CHECK-BE-NEXT: dmsha512hash dmr0, dmr1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vop, align 64 + %1 = load <1024 x i1>, ptr %vinp, align 64 + %3 = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 1) + store <1024 x i1> %3, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmsha3dw(ptr %vopp, ptr %resp) { +; CHECK-LABEL: test_dmsha3dw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxvp vsp40, 128(r3) +; CHECK-NEXT: lxvp vsp42, 160(r3) +; CHECK-NEXT: lxvp vsp44, 192(r3) +; CHECK-NEXT: lxvp vsp46, 224(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0 +; CHECK-NEXT: dmsha3dw dmrp0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 224(r4) +; CHECK-NEXT: stxvp vsp36, 192(r4) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 160(r4) +; CHECK-NEXT: stxvp vsp36, 128(r4) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-NEXT: stxvp vsp34, 96(r4) +; CHECK-NEXT: stxvp vsp36, 64(r4) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 +; CHECK-NEXT: stxvp vsp34, 32(r4) +; CHECK-NEXT: stxvp vsp36, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmsha3dw: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 224(r3) +; CHECK-BE-NEXT: lxvp vsp36, 192(r3) +; CHECK-BE-NEXT: lxvp vsp32, 160(r3) +; CHECK-BE-NEXT: lxvp vsp38, 128(r3) +; CHECK-BE-NEXT: lxvp vsp40, 96(r3) +; CHECK-BE-NEXT: lxvp vsp42, 64(r3) +; CHECK-BE-NEXT: lxvp vsp44, 32(r3) +; CHECK-BE-NEXT: lxvp vsp46, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0 +; CHECK-BE-NEXT: dmsha3dw dmrp0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 +; CHECK-BE-NEXT: stxvp vsp36, 224(r4) +; CHECK-BE-NEXT: stxvp vsp34, 192(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-NEXT: stxvp vsp36, 160(r4) +; CHECK-BE-NEXT: stxvp vsp34, 128(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r4) +; CHECK-BE-NEXT: stxvp vsp34, 64(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r4) +; CHECK-BE-NEXT: stxvp vsp34, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %0 = load <2048 x i1>, ptr %vopp, align 64 + %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 0) + store <2048 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmcryshash(ptr %vopp, ptr %resp) { +; CHECK-LABEL: test_dmcryshash: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxvp vsp40, 128(r3) +; CHECK-NEXT: lxvp vsp42, 160(r3) +; CHECK-NEXT: lxvp vsp44, 192(r3) +; CHECK-NEXT: lxvp vsp46, 224(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0 +; CHECK-NEXT: dmcryshash dmrp0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 224(r4) +; CHECK-NEXT: stxvp vsp36, 192(r4) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 160(r4) +; CHECK-NEXT: stxvp vsp36, 128(r4) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-NEXT: stxvp vsp34, 96(r4) +; CHECK-NEXT: stxvp vsp36, 64(r4) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 +; CHECK-NEXT: stxvp vsp34, 32(r4) +; CHECK-NEXT: stxvp vsp36, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmcryshash: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 224(r3) +; CHECK-BE-NEXT: lxvp vsp36, 192(r3) +; CHECK-BE-NEXT: lxvp vsp32, 160(r3) +; CHECK-BE-NEXT: lxvp vsp38, 128(r3) +; CHECK-BE-NEXT: lxvp vsp40, 96(r3) +; CHECK-BE-NEXT: lxvp vsp42, 64(r3) +; CHECK-BE-NEXT: lxvp vsp44, 32(r3) +; CHECK-BE-NEXT: lxvp vsp46, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0 +; CHECK-BE-NEXT: dmcryshash dmrp0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 +; CHECK-BE-NEXT: stxvp vsp36, 224(r4) +; CHECK-BE-NEXT: stxvp vsp34, 192(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-NEXT: stxvp vsp36, 160(r4) +; CHECK-BE-NEXT: stxvp vsp34, 128(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r4) +; CHECK-BE-NEXT: stxvp vsp34, 64(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r4) +; CHECK-BE-NEXT: stxvp vsp34, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %0 = load <2048 x i1>, ptr %vopp, align 64 + %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 12) + store <2048 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxsha3512pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxsha3512pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxsha3512pad dmr0, vs0, 1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxsha3512pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxsha3512pad dmr0, vs0, 1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 0) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxsha3384pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxsha3384pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxsha3384pad dmr0, vs0, 1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxsha3384pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxsha3384pad dmr0, vs0, 1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 1) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxsha3256pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxsha3256pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxsha3256pad dmr0, vs0, 1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxsha3256pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxsha3256pad dmr0, vs0, 1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 2) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxsha3224pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxsha3224pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxsha3224pad dmr0, vs0, 1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxsha3224pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxsha3224pad dmr0, vs0, 1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 3) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxshake256pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxshake256pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxshake256pad dmr0, vs0, 1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxshake256pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxshake256pad dmr0, vs0, 1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 1, i32 1, i32 0) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxshake128pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxshake128pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxshake128pad dmr0, vs0, 1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxshake128pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxshake128pad dmr0, vs0, 1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 1, i32 1, i32 1) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxsha384512pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxsha384512pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxsha384512pad dmr0, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxsha384512pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxsha384512pad dmr0, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 2, i32 0, i32 0) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +define dso_local void @test_dmxxsha224256pad(ptr %vopp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxxsha224256pad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: lxvp vsp32, 64(r3) +; CHECK-NEXT: lxvp vsp38, 96(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-NEXT: dmxxsha224256pad dmr0, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxxsha224256pad: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: lxvp vsp32, 32(r3) +; CHECK-BE-NEXT: lxvp vsp38, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-NEXT: dmxxsha224256pad dmr0, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vopp, align 64 + %1 = load <16 x i8>, ptr %vcp, align 64 + %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 3, i32 0, i32 0) store <1024 x i1> %2, ptr %resp, align 64 ret void } >From 053e37c47da8aef891f1050e5c89257171a5afb6 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas <[email protected]> Date: Thu, 19 Mar 2026 16:34:29 +0000 Subject: [PATCH 2/2] Address review comments --- clang/include/clang/Basic/BuiltinsPPC.def | 18 +++++----- clang/test/Sema/builtins-ppc-crypto.c | 43 +++++++++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 clang/test/Sema/builtins-ppc-crypto.c diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index baa7e057a777d..7996a5a87e648 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1125,11 +1125,11 @@ UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false, UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false, "mma,isa-future-instructions") -UNALIASED_CUSTOM_BUILTIN(dmsha2hash, "vW1024*W1024*Ii", true, +UNALIASED_CUSTOM_BUILTIN(dmsha2hash, "vW1024*W1024*i1", true, "mma,isa-future-instructions") -UNALIASED_CUSTOM_BUILTIN(dmsha3hash, "vW2048*Ii", true, +UNALIASED_CUSTOM_BUILTIN(dmsha3hash, "vW2048*i31", true, "mma,isa-future-instructions") -UNALIASED_CUSTOM_BUILTIN(dmxxshapad, "vW1024*VIiIiIi", true, +UNALIASED_CUSTOM_BUILTIN(dmxxshapad, "vW1024*Vi3i1i3", true, "mma,isa-future-instructions") CUSTOM_BUILTIN(dmsha256hash, dmsha2hash, "vW1024*W1024*", true, "mma,isa-future-instructions") @@ -1139,17 +1139,17 @@ CUSTOM_BUILTIN(dmsha3dw, dmsha3hash, "vW2048*", true, "mma,isa-future-instructions") CUSTOM_BUILTIN(dmcryshash, dmsha3hash, "vW2048*", true, "mma,isa-future-instructions") -CUSTOM_BUILTIN(dmxxsha3512pad, dmxxshapad, "vW1024*VIi", true, +CUSTOM_BUILTIN(dmxxsha3512pad, dmxxshapad, "vW1024*Vi1", true, "mma,isa-future-instructions") -CUSTOM_BUILTIN(dmxxsha3384pad, dmxxshapad, "vW1024*VIi", true, +CUSTOM_BUILTIN(dmxxsha3384pad, dmxxshapad, "vW1024*Vi1", true, "mma,isa-future-instructions") -CUSTOM_BUILTIN(dmxxsha3256pad, dmxxshapad, "vW1024*VIi", true, +CUSTOM_BUILTIN(dmxxsha3256pad, dmxxshapad, "vW1024*Vi1", true, "mma,isa-future-instructions") -CUSTOM_BUILTIN(dmxxsha3224pad, dmxxshapad, "vW1024*VIi", true, +CUSTOM_BUILTIN(dmxxsha3224pad, dmxxshapad, "vW1024*Vi1", true, "mma,isa-future-instructions") -CUSTOM_BUILTIN(dmxxshake256pad, dmxxshapad, "vW1024*VIi", true, +CUSTOM_BUILTIN(dmxxshake256pad, dmxxshapad, "vW1024*Vi1", true, "mma,isa-future-instructions") -CUSTOM_BUILTIN(dmxxshake128pad, dmxxshapad, "vW1024*VIi", true, +CUSTOM_BUILTIN(dmxxshake128pad, dmxxshapad, "vW1024*Vi1", true, "mma,isa-future-instructions") CUSTOM_BUILTIN(dmxxsha384512pad, dmxxshapad, "vW1024*V", true, "mma,isa-future-instructions") diff --git a/clang/test/Sema/builtins-ppc-crypto.c b/clang/test/Sema/builtins-ppc-crypto.c new file mode 100644 index 0000000000000..83543aafcf80e --- /dev/null +++ b/clang/test/Sema/builtins-ppc-crypto.c @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu future \ +// RUN: -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \ +// RUN: -fsyntax-only -verify %s + + +void test_crypto(unsigned char *vdmrpp, unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc) { + __dmr2048 vdmrpair = *((__dmr2048 *)vdmrpp); + __dmr1024 vdmr = *((__dmr1024 *)vdmrp); + __vector_pair vp = *((__vector_pair *)vpp); + int ia; + + __builtin_dmsha2hash(&vdmr, &vdmr, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __builtin_dmsha2hash(&vdmr, &vdmr, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} + __builtin_dmsha2hash(&vdmr, &vdmr, ia); // expected-error {{argument to '__builtin_dmsha2hash' must be a constant integer}} + + __builtin_dmsha3hash(&vdmrpair, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_dmsha3hash(&vdmrpair, -2); // expected-error {{argument value -2 is outside the valid range [0, 31]}} + __builtin_dmsha3hash(&vdmrpair, ia); // expected-error {{argument to '__builtin_dmsha3hash' must be a constant integer}} + + __builtin_dmxxshapad(&vdmr, vc, 4, 0, 3); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __builtin_dmxxshapad(&vdmr, vc, 3, 2, 3); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __builtin_dmxxshapad(&vdmr, vc, 3, 1, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} + __builtin_dmxxshapad(&vdmr, vc, ia, 1, -1); // expected-error {{argument to '__builtin_dmxxshapad' must be a constant integer}} + + __builtin_dmxxsha3512pad(&vdmr, vc, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __builtin_dmxxsha3512pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3512pad' must be a constant integer}} + + __builtin_dmxxsha3384pad(&vdmr, vc, 3); // expected-error {{argument value 3 is outside the valid range [0, 1]}} + __builtin_dmxxsha3384pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3384pad' must be a constant integer}} + + __builtin_dmxxsha3256pad(&vdmr, vc, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} + __builtin_dmxxsha3256pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3256pad' must be a constant integer}} + + __builtin_dmxxsha3224pad(&vdmr, vc, 4); // expected-error {{argument value 4 is outside the valid range [0, 1]}} + __builtin_dmxxsha3224pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3224pad' must be a constant integer}} + + __builtin_dmxxshake256pad(&vdmr, vc, -2); // expected-error {{argument value -2 is outside the valid range [0, 1]}} + __builtin_dmxxshake256pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxshake256pad' must be a constant integer}} + + __builtin_dmxxshake128pad(&vdmr, vc, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __builtin_dmxxshake128pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxshake128pad' must be a constant integer}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
