[clang] ef86355 - [AArch64] Add intrinsics for 9.6 crypto instructions (#165545)

via cfe-commits Tue, 06 Jan 2026 02:27:51 -0800

Author: Lukacma
Date: 2026-01-06T11:27:37+01:00
New Revision: ef86355945601c4349d669360365d22a0412d5c8


URL: 
https://github.com/llvm/llvm-project/commit/ef86355945601c4349d669360365d22a0412d5c8
DIFF: 
https://github.com/llvm/llvm-project/commit/ef86355945601c4349d669360365d22a0412d5c8.diff

LOG: [AArch64] Add intrinsics for 9.6 crypto instructions  (#165545)

This patch add intrinsics for crpyto instructions defined in
https://github.com/ARM-software/acle/pull/411 ACLE proposal

Added: 
    clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll

Modified: 
    clang/include/clang/Basic/arm_sve.td
    clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
    clang/utils/TableGen/SveEmitter.cpp
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b68dab4757581..93c077df46552 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1873,6 +1873,23 @@ def SVPMULLT_PAIR_U64   : SInst<"svpmullt_pair[_{d}]",   
"ddd", "Ul", MergeNone,
 def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", 
MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
 }
 
+let SVETargetGuard = "sve-aes2", SMETargetGuard = "sve-aes2,ssve-aes" in {
+def SVAESD_X2   : SInst<"svaesd_lane[_{d}_x2]",   "22di", "Uc", MergeNone, 
"aarch64_sve_aesd_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
+def SVAESDIMC_X2 : SInst<"svaesdimc_lane[_{d}_x2]", "22di",  "Uc", MergeNone, 
"aarch64_sve_aesdimc_lane_x2", [IsOverloadNone, VerifyRuntimeMode], 
[ImmCheck<2, ImmCheck0_3>]>;
+def SVAESE_X2   : SInst<"svaese_lane[_{d}_x2]",   "22di", "Uc", MergeNone, 
"aarch64_sve_aese_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
+def SVAESEMC_X2  : SInst<"svaesemc_lane[_{d}_x2]",  "22di",  "Uc", MergeNone, 
"aarch64_sve_aesemc_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
+
+def SVAESD_X4   : SInst<"svaesd_lane[_{d}_x4]",   "44di", "Uc", MergeNone, 
"aarch64_sve_aesd_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
+def SVAESDIMC_X4 : SInst<"svaesdimc_lane[_{d}_x4]", "44di",  "Uc", MergeNone, 
"aarch64_sve_aesdimc_lane_x4", [IsOverloadNone, VerifyRuntimeMode], 
[ImmCheck<2, ImmCheck0_3>]>;
+def SVAESE_X4   : SInst<"svaese_lane[_{d}_x4]",   "44di", "Uc", MergeNone, 
"aarch64_sve_aese_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
+def SVAESEMC_X4  : SInst<"svaesemc_lane[_{d}_x4]",  "44di",  "Uc", MergeNone, 
"aarch64_sve_aesemc_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
+
+def SVPMULL_PAIR_U64   : SInst<"svpmull_pair[_{d}_x2]",   "2dd", "Ul", 
MergeNone, "aarch64_sve_pmull_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVPMULL_PAIR_N_U64 : SInst<"svpmull_pair[_n_{d}_x2]", "2da", "Ul", 
MergeNone, "aarch64_sve_pmull_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVPMLAL_PAIR_U64   : SInst<"svpmlal_pair[_{d}_x2]",   "22dd", "Ul", 
MergeNone, "aarch64_sve_pmlal_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVPMLAL_PAIR_N_U64 : SInst<"svpmlal_pair[_n_{d}_x2]", "22da", "Ul", 
MergeNone, "aarch64_sve_pmlal_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
+}
+
 let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in {
 def SVRAX1 : SInst<"svrax1[_{d}]",   "ddd", "lUl", MergeNone, 
"aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>;
 }

diff  --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c 
b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c
new file mode 100644
index 0000000000000..43126d2ea0cd8
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c
@@ -0,0 +1,217 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature 
+sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck 
%s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature 
+sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall 
-emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature 
+sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature 
+sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall 
-emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature 
+ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature 
+ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve-aes2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sve-aes2 -target-feature +ssve-aes -S -disable-O0-optnone -Werror -Wall -o 
/dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: @test_svaesd_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8> } @llvm.aarch64.sve.aesd.lane.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x211svuint8x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8> } @llvm.aarch64.sve.aesd.lane.x2(<vscale x 16 x i8> 
[[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> 
[[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaesd_lane,_u8_x2)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svaesdimc_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8> } @llvm.aarch64.sve.aesdimc.lane.x2(<vscale x 16 x i8> 
[[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> 
[[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x211svuint8x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8> } @llvm.aarch64.sve.aesdimc.lane.x2(<vscale x 16 x i8> 
[[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> 
[[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaesdimc_lane,_u8_x2)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svaese_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8> } @llvm.aarch64.sve.aese.lane.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x211svuint8x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8> } @llvm.aarch64.sve.aese.lane.x2(<vscale x 16 x i8> 
[[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> 
[[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaese_lane,_u8_x2)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svaesemc_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8> } @llvm.aarch64.sve.aesemc.lane.x2(<vscale x 16 x i8> 
[[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> 
[[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x211svuint8x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8> } @llvm.aarch64.sve.aesemc.lane.x2(<vscale x 16 x i8> 
[[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> 
[[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaesemc_lane,_u8_x2)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svaesd_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesd.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x411svuint8x4_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesd.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaesd_lane,_u8_x4)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svaesdimc_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesdimc.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x411svuint8x4_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesdimc.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaesdimc_lane,_u8_x4)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svaese_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aese.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x411svuint8x4_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aese.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale 
x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaese_lane,_u8_x4)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svaesemc_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesemc.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x411svuint8x4_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesemc.lane.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], 
<vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svaesemc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svaesemc_lane,_u8_x4)(op1, op2, 0);
+}
+
+// CHECK-LABEL: @test_svpmull_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x 
i64> } @llvm.aarch64.sve.pmull.pair.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale 
x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svpmull_u64_x2u12__SVUint64_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 
2 x i64> } @llvm.aarch64.sve.pmull.pair.x2(<vscale x 2 x i64> [[OP1:%.*]], 
<vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+svuint64x2_t test_svpmull_u64_x2(svuint64_t op1, svuint64_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svpmull_pair,_u64_x2)(op1, op2);
+}
+
+// CHECK-LABEL: @test_svpmull_n_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> 
poison, i64 [[OP2:%.*]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> 
[[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> 
zeroinitializer
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x 
i64> } @llvm.aarch64.sve.pmull.pair.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale 
x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svpmull_n_u64_x2u12__SVUint64_tm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x 
i64> poison, i64 [[OP2:%.*]], i64 0
+// CPP-CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> 
[[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> 
zeroinitializer
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 
2 x i64> } @llvm.aarch64.sve.pmull.pair.x2(<vscale x 2 x i64> [[OP1:%.*]], 
<vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+svuint64x2_t test_svpmull_n_u64_x2(svuint64_t op1, uint64_t op2) STREAMING
+{
+  return SVE_ACLE_FUNC(svpmull_pair,_n_u64_x2)(op1, op2);
+}
+
+// CHECK-LABEL: @test_svpmlal_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x 
i64> } @llvm.aarch64.sve.pmlal.pair.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], 
<vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale 
x 2 x i64> [[OP3:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svpmlal_u64_x212svuint64x2_tu12__SVUint64_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 
2 x i64> } @llvm.aarch64.sve.pmlal.pair.x2(<vscale x 2 x i64> 
[[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> 
[[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+svuint64x2_t test_svpmlal_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t 
op3) STREAMING
+{
+  return SVE_ACLE_FUNC(svpmlal_pair,_u64_x2)(op1, op2, op3);
+}
+
+// CHECK-LABEL: @test_svpmlal_n_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> 
poison, i64 [[OP3:%.*]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> 
[[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> 
zeroinitializer
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x 
i64> } @llvm.aarch64.sve.pmlal.pair.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], 
<vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale 
x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svpmlal_n_u64_x212svuint64x2_tu12__SVUint64_tm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x 
i64> poison, i64 [[OP3:%.*]], i64 0
+// CPP-CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> 
[[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> 
zeroinitializer
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 
2 x i64> } @llvm.aarch64.sve.pmlal.pair.x2(<vscale x 2 x i64> 
[[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> 
[[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
+//
+svuint64x2_t test_svpmlal_n_u64_x2(svuint64x2_t op1, svuint64_t op2, uint64_t 
op3) STREAMING
+{
+  return SVE_ACLE_FUNC(svpmlal_pair,_n_u64_x2)(op1, op2, op3);
+}

diff  --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp 
b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index ac7586e202b96..15f89838fee8e 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -254,4 +254,30 @@ void test_svdup_laneq(){
   svdup_laneq_f32(zn_f32,-1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
   svdup_laneq_f64(zn_f64,-1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 1]}}
   svdup_laneq_bf16(zn_bf16,-1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 7]}}
+}
+
+__attribute__((target("+sve-aes2")))
+void test_aes_x2_imm_0_3(svuint8x2_t op1, svuint8_t op2) {
+  svaesd_lane(op1, op2, -1);  // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+  svaesdimc_lane(op1, op2, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+  svaese_lane(op1, op2, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+  svaesemc_lane(op1, op2, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+
+  svaesd_lane(op1, op2, 4);  // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
+  svaesdimc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
+  svaese_lane(op1, op2, 4); // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
+  svaesemc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
+}
+
+__attribute__((target("+sve-aes2")))
+void test_aes_x4_imm_0_3(svuint8x4_t op1, svuint8_t op2) {
+  svaesd_lane(op1, op2, -1);  // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+  svaesdimc_lane(op1, op2, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+  svaese_lane(op1, op2, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+  svaesemc_lane(op1, op2, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 3]}}
+
+  svaesd_lane(op1, op2, 4);  // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
+  svaesdimc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
+  svaese_lane(op1, op2, 4); // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
+  svaesemc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside 
the valid range [0, 3]}}
 }
\ No newline at end of file

diff  --git a/clang/utils/TableGen/SveEmitter.cpp 
b/clang/utils/TableGen/SveEmitter.cpp
index b1e94e0ce0975..accb7b240288f 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -267,14 +267,21 @@ class Intrinsic {
   unsigned getSplatIdx() const {
     unsigned I = 1, Param = 0;
     for (; I < Proto.size(); ++I, ++Param) {
+      assert(Proto[I] != '4' &&
+             "Handling for '4' prototype modifier not implemented");
       if (Proto[I] == 'a' || Proto[I] == 'j' || Proto[I] == 'f' ||
           Proto[I] == 'r' || Proto[I] == 'K' || Proto[I] == 'L' ||
           Proto[I] == 'R' || Proto[I] == '@' || Proto[I] == '!')
         break;
 
+      if (Proto[I] == '2')
+        Param += 1;
+
       // Multivector modifier can be skipped
-      if (Proto[I] == '.')
+      if (Proto[I] == '.') {
+        Param -= 1; // Adjust for the increment at the top of the loop
         I += 2;
+      }
     }
     assert(I != Proto.size() && "Prototype has no splat operand");
     return Param;

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 10e8703e9debe..31cf260ee0e29 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -4241,4 +4241,31 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2  : 
SME_FP8_ZA_LANE_VGx2_Intrinsic;
   def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : 
SME_FP8_ZA_LANE_VGx2_Intrinsic;
   def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : 
SME_FP8_ZA_LANE_VGx2_Intrinsic;
+  
+  // AES2
+  class SVE2_Crypto_LANE_X2_Intrinsic
+  : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
+      [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
+      [ImmArg<ArgIndex<3>>, IntrNoMem]>;
+  class SVE2_Crypto_LANE_X4_Intrinsic
+  : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty],
+      [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
+       llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
+      [ImmArg<ArgIndex<5>>, IntrNoMem]>;
+
+  def int_aarch64_sve_aesd_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
+  def int_aarch64_sve_aesdimc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
+  def int_aarch64_sve_aese_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
+  def int_aarch64_sve_aesemc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
+
+  def int_aarch64_sve_aesd_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
+  def int_aarch64_sve_aesdimc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
+  def int_aarch64_sve_aese_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
+  def int_aarch64_sve_aesemc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
+
+  def int_aarch64_sve_pmull_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, 
llvm_nxv2i64_ty],
+      [llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>;
+  def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, 
llvm_nxv2i64_ty],
+      [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], 
[IntrNoMem]>;
 }
+

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index dc207e0cbba95..7b566e432b6bc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1979,27 +1979,28 @@ void 
AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
 
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
-  unsigned FirstVecIdx = HasPred ? 2 : 1;
+  SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
+  SmallVector<SDValue, 4> Ops;
 
-  auto GetMultiVecOperand = [=](unsigned StartIdx) {
-    SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
+  auto GetMultiVecOperand = [&]() {
+    SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
+    OpsIter += NumVecs;
     return createZMulTuple(Regs);
   };
 
-  SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
+  if (HasPred)
+    Ops.push_back(*OpsIter++);
 
-  SDValue Zm;
+  Ops.push_back(GetMultiVecOperand());
   if (IsZmMulti)
-    Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
+    Ops.push_back(GetMultiVecOperand());
   else
-    Zm = N->getOperand(NumVecs + FirstVecIdx);
+    Ops.push_back(*OpsIter++);
 
+  // Append any remaining operands.
+  Ops.append(OpsIter, N->op_end());
   SDNode *Intrinsic;
-  if (HasPred)
-    Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
-                                       N->getOperand(1), Zdn, Zm);
-  else
-    Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
+  Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
   SDValue SuperReg = SDValue(Intrinsic, 0);
   for (unsigned i = 0; i < NumVecs; ++i)
     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
@@ -6254,6 +6255,46 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
                AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
       return;
+    case Intrinsic::aarch64_sve_aese_lane_x2:
+      SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_aesd_lane_x2:
+      SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_aesemc_lane_x2:
+      SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_aesdimc_lane_x2:
+      SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_aese_lane_x4:
+      SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_aesd_lane_x4:
+      SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_aesemc_lane_x4:
+      SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_aesdimc_lane_x4:
+      SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
+      return;
+    case Intrinsic::aarch64_sve_pmlal_pair_x2:
+      SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
+      return;
+    case Intrinsic::aarch64_sve_pmull_pair_x2: {
+      SDLoc DL(Node);
+      SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
+      SDNode *Res =
+          CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, 
Regs);
+      SDValue SuperReg = SDValue(Res, 0);
+      for (unsigned I = 0; I < 2; I++)
+        ReplaceUses(SDValue(Node, I),
+                    CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
+                                                   SuperReg));
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
     case Intrinsic::aarch64_sve_fscale_x4:
       SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
       return;

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c69ab1b74e71b..c9fc13950e221 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4271,12 +4271,12 @@ let Predicates = [HasSVEAES2, 
HasNonStreamingSVE_or_SSVE_AES] in {
   def AESE_2ZZI_B    : sve_crypto_binary_multi2<0b000, "aese">;
   def AESD_2ZZI_B    : sve_crypto_binary_multi2<0b010, "aesd">;
   def AESEMC_2ZZI_B  : sve_crypto_binary_multi2<0b100, "aesemc">;
-  def AESDMIC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">;
+  def AESDIMC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">;
   // SVE_AES2 multi-vector instructions (x4)
   def AESE_4ZZI_B    : sve_crypto_binary_multi4<0b0000, "aese">;
   def AESD_4ZZI_B    : sve_crypto_binary_multi4<0b0100, "aesd">;
   def AESEMC_4ZZI_B  : sve_crypto_binary_multi4<0b1000, "aesemc">;
-  def AESDMIC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">;
+  def AESDIMC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">;
 
   // SVE_AES2 multi-vector polynomial multiply
   def PMLAL_2ZZZ_Q : sve_crypto_pmlal_multi<"pmlal">;

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll 
b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll
new file mode 100644
index 0000000000000..952e0ecfb5343
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sve-aes2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve-aes2,+ssve-aes 
-force-streaming < %s | FileCheck %s
+
+;
+; AESE
+;
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aese_x2(<vscale x 16 x i8> 
%a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: aese_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    aese { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out = call { <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aese.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, 
<vscale x 16 x i8> %c, i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } @aese_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 
x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: aese_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    aese { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8> }  @llvm.aarch64.sve.aese.lane.x4(<vscale x 16 x i8> %a,
+                                                                               
                     <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,
+                                                                               
                     <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } %out
+}
+
+;
+; AESD
+;
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesd_x2(<vscale x 16 x i8> 
%a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: aesd_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    aesd { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesd.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, 
<vscale x 16 x i8> %c, i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } @aesd_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 
x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: aesd_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    aesd { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8> }  @llvm.aarch64.sve.aesd.lane.x4(<vscale x 16 x i8> %a,
+                                                                               
                     <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,
+                                                                               
                     <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } %out
+}
+
+
+;
+; AESEMC
+;
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesemc_x2(<vscale x 16 x 
i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: aesemc_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    aesemc { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesemc.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, 
<vscale x 16 x i8> %c, i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } @aesemc_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 
16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: aesemc_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    aesemc { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8> }  @llvm.aarch64.sve.aesemc.lane.x4(<vscale x 16 x i8> %a,
+                                                                               
                     <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,
+                                                                               
                     <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } %out
+}
+
+;
+; AESDIMC
+;
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesdimc_x2(<vscale x 16 x 
i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: aesdimc_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    aesdimc { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sve.aesdimc.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, 
<vscale x 16 x i8> %c, i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } @aesdimc_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 
16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: aesdimc_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def 
$z0_z1_z2_z3
+; CHECK-NEXT:    aesdimc { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
+; CHECK-NEXT:    ret
+  %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8> }  @llvm.aarch64.sve.aesdimc.lane.x4(<vscale x 16 x i8> %a,
+                                                                               
                     <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,
+                                                                               
                     <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } %out
+}
+
+;
+; PMULL
+;
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmull_i64(<vscale x 2 x 
i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: pmull_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmull { z0.q, z1.q }, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } 
@llvm.aarch64.sve.pmull.pair.x2(<vscale x 2 x i64> %a,
+                                                                       <vscale 
x 2 x i64> %b)
+  ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %out
+}
+
+
+;
+; PMLAL
+;
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmlal_i64(<vscale x 2 x 
i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
+; CHECK-LABEL: pmlal_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    pmlal { z0.q, z1.q }, z2.d, z3.d
+; CHECK-NEXT:    ret
+  %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } 
@llvm.aarch64.sve.pmlal.pair.x2(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, 
<vscale x 2 x i64> %c,
+                                                                       <vscale 
x 2 x i64> %d)
+  ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %out
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] ef86355 - [AArch64] Add intrinsics for 9.6 crypto instructions (#165545)

Reply via email to