[clang] [llvm] [AArch64] Add intrinsics support for SVE2p2 instructions (PR #163575)

via cfe-commits Mon, 01 Dec 2025 08:24:24 -0800

https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/163575


>From e7a2489497e4c896d1a2a425356f2c1dac2de1d4 Mon Sep 17 00:00:00 2001
From: Marian Lukac <[email protected]>
Date: Wed, 15 Oct 2025 15:09:35 +0000
Subject: [PATCH 1/6] [AArch64] Add intrinsics support for SVE2p2 instructions

---
 clang/include/clang/Basic/arm_sve.td          |  12 +-
 .../AArch64/sve-intrinsics/acle_sve_compact.c |   6 +
 .../sve2p2-intriniscs/acle_sve2p2_compact.c   | 142 ++++++++++
 .../sve2p2-intriniscs/acle_sve2p2_expand.c    | 243 ++++++++++++++++++
 .../sve2p2-intriniscs/acle_sve2p2_firstp.c    | 101 ++++++++
 .../sve2p2-intriniscs/acle_sve2p2_lastp.c     | 101 ++++++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |   3 +
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   6 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  18 +-
 .../test/CodeGen/AArch64/sve2p2-intrinsics.ll | 173 +++++++++++++
 10 files changed, 793 insertions(+), 12 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index d2b7b78b9970f..716c2cd68ffcc 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -984,6 +984,11 @@ let SMETargetGuard = "sme2p2" in {
 def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd",  "ilUiUlfd", MergeNone, 
"aarch64_sve_compact", [VerifyRuntimeMode]>;
 }
 
+let SVETargetGuard = "sve2p2|sme2p2",  SMETargetGuard = "sme2p2" in {
+def SVCOMPACT_BH : SInst<"svcompact[_{d}]", "dPd",  "cUcsUsmbh", MergeNone, 
"aarch64_sve_compact", [VerifyRuntimeMode]>;
+def SVEXPAND  : SInst<"svexpand[_{d}]",  "dPd",  "cUcsUsiUilUlmbhfd", 
MergeNone, "aarch64_sve_expand",  [VerifyRuntimeMode]>;
+}
+
 // Note: svdup_lane is implemented using the intrinsic for TBL to represent a
 // splat of any possible lane. It is upto LLVM to pick a more efficient
 // instruction such as DUP (indexed) if the lane index fits the range of the
@@ -1111,6 +1116,11 @@ def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, 
"aarch64_sve_cntd", [IsAppendS
 def SVCNTP : SInst<"svcntp_{d}",  "nPP", "PcPsPiPl",         MergeNone, 
"aarch64_sve_cntp", [VerifyRuntimeMode]>;
 def SVLEN  : SInst<"svlen[_{d}]", "nd",  "csilUcUsUiUlhfdb", MergeNone, "", 
[VerifyRuntimeMode]>;
 
+let SVETargetGuard = "sve2p2|sme2p2",  SMETargetGuard = "sve2p2|sme2p2" in {
+  def SVFIRSTP  : SInst<"svfirstp_{d}", "lPP", "PcPsPiPl", MergeNone, 
"aarch64_sve_firstp", [VerifyRuntimeMode], []>;
+  def SVLASTP  : SInst<"svlastp_{d}", "lPP", "PcPsPiPl", MergeNone, 
"aarch64_sve_lastp", [VerifyRuntimeMode], []>;
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 // Saturating scalar arithmetic
 
@@ -2388,4 +2398,4 @@ let SVETargetGuard = "sve2,fp8fma", SMETargetGuard = 
"ssve-fp8fma" in {
   def SVFMLALLBT_LANE : SInst<"svmlallbt_lane[_f32_mf8]", "dd~~i>", "f", 
MergeNone, "aarch64_sve_fp8_fmlallbt_lane", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
   def SVFMLALLTB_LANE : SInst<"svmlalltb_lane[_f32_mf8]", "dd~~i>", "f", 
MergeNone, "aarch64_sve_fp8_fmlalltb_lane", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
   def SVFMLALLTT_LANE : SInst<"svmlalltt_lane[_f32_mf8]", "dd~~i>", "f", 
MergeNone, "aarch64_sve_fp8_fmlalltt_lane", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c 
b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
index 4c18969e78f0c..75ee18cb134d7 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
@@ -14,6 +14,12 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
 // CHECK-LABEL: @test_svcompact_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
new file mode 100644
index 0000000000000..8bee2ed1121a6
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
@@ -0,0 +1,142 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2  -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: @test_svcompact_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcompact_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_s8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_s16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcompact_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_u8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_u16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_mf8u10__SVBool_tu13__SVMfloat8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_mf8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_f16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcompact_bf16u10__SVBool_tu14__SVBfloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svcompact_bf16(svbool_t pg, svbfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_bf16,,)(pg, op);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
new file mode 100644
index 0000000000000..ece0ce795df39
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
@@ -0,0 +1,243 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2  -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: @test_svexpand_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svexpand_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svexpand_s8(svbool_t pg, svint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_s8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svexpand_s16(svbool_t pg, svint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_s16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svexpand_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svexpand_u8(svbool_t pg, svuint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_u8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svexpand_u16(svbool_t pg, svuint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_u16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_mf8u10__SVBool_tu13__SVMfloat8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svexpand_mf8(svbool_t pg, svmfloat8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_mf8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svexpand_f16(svbool_t pg, svfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_f16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.expand.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svexpand_bf16u10__SVBool_tu14__SVBfloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.expand.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svexpand_bf16(svbool_t pg, svbfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_bf16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svexpand_s32(svbool_t pg, svint32_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_s32,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> 
@llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_s64u10__SVBool_tu11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> 
@llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svexpand_s64(svbool_t pg, svint64_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_s64,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svexpand_u32(svbool_t pg, svuint32_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_u32,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> 
@llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> 
@llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svexpand_u64(svbool_t pg, svuint64_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_u64,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.expand.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x 
float> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_f32u10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.expand.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x 
float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svexpand_f32(svbool_t pg, svfloat32_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_f32,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.expand.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x 
double> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_f64u10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.expand.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x 
double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svexpand_f64(svbool_t pg, svfloat64_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_f64,,)(pg, op);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
new file mode 100644
index 0000000000000..1656f10a83a90
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
@@ -0,0 +1,101 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local i64 @test_svfirstp_b8(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> 
[[OP]])
+// CHECK-NEXT:    ret i64 [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z16test_svfirstp_b8u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> 
[[OP]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP0]]
+//
+int64_t test_svfirstp_b8(svbool_t pg, svbool_t op) STREAMING
+{
+  return svfirstp_b8(pg, op);
+}
+
+// CHECK-LABEL: define dso_local i64 @test_svfirstp_b16(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> 
[[TMP1]])
+// CHECK-NEXT:    ret i64 [[TMP2]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z17test_svfirstp_b16u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> 
[[TMP1]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP2]]
+//
+int64_t test_svfirstp_b16(svbool_t pg, svbool_t op) STREAMING
+{
+  return svfirstp_b16(pg, op);
+}
+
+// CHECK-LABEL: define dso_local i64 @test_svfirstp_b32(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> 
[[TMP1]])
+// CHECK-NEXT:    ret i64 [[TMP2]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z17test_svfirstp_b32u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> 
[[TMP1]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP2]]
+//
+int64_t test_svfirstp_b32(svbool_t pg, svbool_t op) STREAMING
+{
+  return svfirstp_b32(pg, op);
+}
+
+// CHECK-LABEL: define dso_local i64 @test_svfirstp_b64(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> 
[[TMP1]])
+// CHECK-NEXT:    ret i64 [[TMP2]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z17test_svfirstp_b64u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.firstp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> 
[[TMP1]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP2]]
+//
+int64_t test_svfirstp_b64(svbool_t pg, svbool_t op) STREAMING
+{
+  return svfirstp_b64(pg, op);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
new file mode 100644
index 0000000000000..bfe82af07f20c
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
@@ -0,0 +1,101 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local i64 @test_svlastp_b8(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> 
[[OP]])
+// CHECK-NEXT:    ret i64 [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z15test_svlastp_b8u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> 
[[OP]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP0]]
+//
+int64_t test_svlastp_b8(svbool_t pg, svbool_t op) STREAMING
+{
+  return svlastp_b8(pg, op);
+}
+
+// CHECK-LABEL: define dso_local i64 @test_svlastp_b16(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> 
[[TMP1]])
+// CHECK-NEXT:    ret i64 [[TMP2]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z16test_svlastp_b16u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> 
[[TMP1]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP2]]
+//
+int64_t test_svlastp_b16(svbool_t pg, svbool_t op) STREAMING
+{
+  return svlastp_b16(pg, op);
+}
+
+// CHECK-LABEL: define dso_local i64 @test_svlastp_b32(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> 
[[TMP1]])
+// CHECK-NEXT:    ret i64 [[TMP2]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z16test_svlastp_b32u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> 
[[TMP1]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP2]]
+//
+int64_t test_svlastp_b32(svbool_t pg, svbool_t op) STREAMING
+{
+  return svlastp_b32(pg, op);
+}
+
+// CHECK-LABEL: define dso_local i64 @test_svlastp_b64(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> 
[[TMP1]])
+// CHECK-NEXT:    ret i64 [[TMP2]]
+//
+// CPP-CHECK-LABEL: define dso_local noundef i64 
@_Z16test_svlastp_b64u10__SVBool_tS_(
+// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> 
[[OP:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 
@llvm.aarch64.sve.lastp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> 
[[TMP1]])
+// CPP-CHECK-NEXT:    ret i64 [[TMP2]]
+//
+int64_t test_svlastp_b64(svbool_t pg, svbool_t op) STREAMING
+{
+  return svlastp_b64(pg, op);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b0269eec3347a..af8784ed58c75 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1930,6 +1930,8 @@ def int_aarch64_sve_cntw : AdvSIMD_SVE_CNTB_Intrinsic;
 def int_aarch64_sve_cntd : AdvSIMD_SVE_CNTB_Intrinsic;
 
 def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic;
+def int_aarch64_sve_firstp : AdvSIMD_SVE_CNTP_Intrinsic;
+def int_aarch64_sve_lastp  : AdvSIMD_SVE_CNTP_Intrinsic;
 
 //
 // FFR manipulation
@@ -2026,6 +2028,7 @@ def int_aarch64_sve_clasta_n  : 
AdvSIMD_SVE_ReduceWithInit_Intrinsic;
 def int_aarch64_sve_clastb    : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_clastb_n  : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
 def int_aarch64_sve_compact   : AdvSIMD_Pred1VectorArg_Intrinsic;
+def int_aarch64_sve_expand   : AdvSIMD_Pred1VectorArg_Intrinsic;
 def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic;
 def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic;
 def int_aarch64_sve_ext       : AdvSIMD_2VectorArgIndexed_Intrinsic;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 98a128e582866..2d9ab2cb033b1 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4604,8 +4604,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
   defm UXTW_ZPzZ  : sve_int_un_pred_arit_d_z<0b101, "uxtw", AArch64uxt_mt>;
 
   // SVE predicate count
-  defm FIRSTP_XPP : sve_int_pcount_pred_tmp<0b001, "firstp">;
-  defm LASTP_XPP  : sve_int_pcount_pred_tmp<0b010, "lastp">;
+  defm FIRSTP_XPP : sve_int_pcount_pred<0b001, "firstp", 
int_aarch64_sve_firstp>;
+  defm LASTP_XPP  : sve_int_pcount_pred<0b010, "lastp", int_aarch64_sve_lastp>;
 
   // SVE reverse within elements, zeroing predicate
   defm RBIT_ZPzZ : sve_int_perm_rev_rbit_z<"rbit", AArch64rbit_mt>;
@@ -4620,7 +4620,7 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
 
//===----------------------------------------------------------------------===//
 let Predicates = [HasNonStreamingSVE2p2_or_SME2p2] in {
   // SVE2 EXPAND
-  defm EXPAND_ZPZ : sve2_int_perm_expand<"expand">;
+  defm EXPAND_ZPZ : sve2_int_perm_expand<"expand", int_aarch64_sve_expand>;
   // SVE COMPACT - byte and halfword
   defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact", 
int_aarch64_sve_compact>;
 }
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 3cdd505f12116..fe5e6a9e3583a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1173,13 +1173,6 @@ multiclass sve_int_pcount_pred<bits<3> opc, string asm,
   def : SVE_2_Op_Pat<i64, int_op, nxv4i1,  nxv4i1,  !cast<Instruction>(NAME # 
_S)>;
   def : SVE_2_Op_Pat<i64, int_op, nxv2i1,  nxv2i1,  !cast<Instruction>(NAME # 
_D)>;
 }
-
-multiclass sve_int_pcount_pred_tmp<bits<3> opc, string asm> {
-  def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>;
-  def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>;
-  def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>;
-  def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>;
-}
 
//===----------------------------------------------------------------------===//
 // SVE Element Count Group
 
//===----------------------------------------------------------------------===//
@@ -7678,11 +7671,20 @@ class sve2_int_perm_expand<bits<2> sz, string asm,
   let hasSideEffects = 0;
 }
 
-multiclass sve2_int_perm_expand<string asm> {
+multiclass sve2_int_perm_expand<string asm, SDPatternOperator op> {
   def _B : sve2_int_perm_expand<0b00, asm, ZPR8>;
   def _H : sve2_int_perm_expand<0b01, asm, ZPR16>;
   def _S : sve2_int_perm_expand<0b10, asm, ZPR32>;
   def _D : sve2_int_perm_expand<0b11, asm, ZPR64>;
+
+  def : SVE_2_Op_Pat<nxv16i8,  op, nxv16i1, nxv16i8,  !cast<Instruction>(NAME 
# _B)>;
+  def : SVE_2_Op_Pat<nxv8i16,  op, nxv8i1,  nxv8i16,  !cast<Instruction>(NAME 
# _H)>;
+  def : SVE_2_Op_Pat<nxv8f16,  op, nxv8i1,  nxv8f16,  !cast<Instruction>(NAME 
# _H)>;
+  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8i1,  nxv8bf16, !cast<Instruction>(NAME 
# _H)>;
+  def : SVE_2_Op_Pat<nxv4i32,  op, nxv4i1,  nxv4i32,  !cast<Instruction>(NAME 
# _S)>;
+  def : SVE_2_Op_Pat<nxv4f32,  op, nxv4i1,  nxv4f32,  !cast<Instruction>(NAME 
# _S)>;
+  def : SVE_2_Op_Pat<nxv2i64,  op, nxv2i1,  nxv2i64,  !cast<Instruction>(NAME 
# _D)>;
+  def : SVE_2_Op_Pat<nxv2f64,  op, nxv2i1,  nxv2f64,  !cast<Instruction>(NAME 
# _D)>;
 }
 
 class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll 
b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
index 6017070b114a5..e4d25407add4a 100644
--- a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
@@ -87,6 +87,179 @@ define <vscale x 8 x bfloat> @compact_bf16(<vscale x 8 x 
i1> %pg, <vscale x 8 x
   ret <vscale x 8 x bfloat> %out
 }
 
+;
+; EXPAND
+;
+
+define <vscale x 16 x i8> @expand_i8(<vscale x 16 x i1> %pg, <vscale x 16 x 
i8> %a) {
+; CHECK-LABEL: expand_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.b, p0, z0.b
+; CHECK-NEXT:    ret
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 
x i1> %pg,
+                                                                   <vscale x 
16 x i8> %a)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @expand_i16(<vscale x 8 x i1> %pg, <vscale x 8 x 
i16> %a) {
+; CHECK-LABEL: expand_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.h, p0, z0.h
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 
x i1> %pg,
+                                                                   <vscale x 8 
x i16> %a)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @expand_i32(<vscale x 4 x i1> %pg, <vscale x 4 x 
i32> %a) {
+; CHECK-LABEL: expand_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.s, p0, z0.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 
x i1> %pg,
+                                                                   <vscale x 4 
x i32> %a)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @expand_i64(<vscale x 2 x i1> %pg, <vscale x 2 x 
i64> %a) {
+; CHECK-LABEL: expand_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.d, p0, z0.d
+; CHECK-NEXT:    ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 
x i1> %pg,
+                                                                   <vscale x 2 
x i64> %a)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x half> @expand_f16(<vscale x 8 x i1> %pg, <vscale x 8 x 
half> %a) {
+; CHECK-LABEL: expand_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.h, p0, z0.h
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 
x i1> %pg,
+                                                                    <vscale x 
8 x half> %a)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @expand_f32(<vscale x 4 x i1> %pg, <vscale x 4 x 
float> %a) {
+; CHECK-LABEL: expand_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.s, p0, z0.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.expand.nxv4f32(<vscale x 
4 x i1> %pg,
+                                                                     <vscale x 
4 x float> %a)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @expand_f64(<vscale x 2 x i1> %pg, <vscale x 2 x 
double> %a) {
+; CHECK-LABEL: expand_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.d, p0, z0.d
+; CHECK-NEXT:    ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.expand.nxv2f64(<vscale x 
2 x i1> %pg,
+                                                                      <vscale 
x 2 x double> %a)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x bfloat> @expand_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x 
bfloat> %a) {
+; CHECK-LABEL: expand_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    expand z0.h, p0, z0.h
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.expand.nxv8bf16(<vscale 
x 8 x i1> %pg,
+                                                                       <vscale 
x 8 x bfloat> %a)
+  ret <vscale x 8 x bfloat> %out
+}
+
+;
+; FIRSTP
+;
+
+define i64 @firstp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: firstp_b8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    firstp x0, p0, p1.b
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.firstp.nxv16i1(<vscale x 16 x i1> %pg,
+                                                 <vscale x 16 x i1> %a)
+  ret i64 %out
+}
+
+define i64 @firstp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
+; CHECK-LABEL: firstp_b16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    firstp x0, p0, p1.h
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.firstp.nxv8i1(<vscale x 8 x i1> %pg,
+                                                <vscale x 8 x i1> %a)
+  ret i64 %out
+}
+
+define i64 @firstp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
+; CHECK-LABEL: firstp_b32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    firstp x0, p0, p1.s
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.firstp.nxv4i1(<vscale x 4 x i1> %pg,
+                                                <vscale x 4 x i1> %a)
+  ret i64 %out
+}
+
+define i64 @firstp_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) {
+; CHECK-LABEL: firstp_b64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    firstp x0, p0, p1.d
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.firstp.nxv2i1(<vscale x 2 x i1> %pg,
+                                                <vscale x 2 x i1> %a)
+  ret i64 %out
+}
+
+;
+; LASTP
+;
+
+define i64 @lastp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: lastp_b8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastp x0, p0, p1.b
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.lastp.nxv16i1(<vscale x 16 x i1> %pg,
+                                                 <vscale x 16 x i1> %a)
+  ret i64 %out
+}
+
+define i64 @lastp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
+; CHECK-LABEL: lastp_b16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastp x0, p0, p1.h
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.lastp.nxv8i1(<vscale x 8 x i1> %pg,
+                                                <vscale x 8 x i1> %a)
+  ret i64 %out
+}
+
+define i64 @lastp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
+; CHECK-LABEL: lastp_b32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastp x0, p0, p1.s
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.lastp.nxv4i1(<vscale x 4 x i1> %pg,
+                                                <vscale x 4 x i1> %a)
+  ret i64 %out
+}
+
+define i64 @lastp_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) {
+; CHECK-LABEL: lastp_b64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastp x0, p0, p1.d
+; CHECK-NEXT:    ret
+  %out = call i64 @llvm.aarch64.sve.lastp.nxv2i1(<vscale x 2 x i1> %pg,
+                                                <vscale x 2 x i1> %a)
+  ret i64 %out
+}
+
+
 declare <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>)

>From 3672fdf4296e997b6b9ec14bbe3b235d4e9c9b39 Mon Sep 17 00:00:00 2001
From: Lukacma <[email protected]>
Date: Thu, 16 Oct 2025 15:35:26 +0100
Subject: [PATCH 2/6] Apply suggestion from @Lukacma

---
 llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index af8784ed58c75..2a06867cb31a7 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2028,7 +2028,7 @@ def int_aarch64_sve_clasta_n  : 
AdvSIMD_SVE_ReduceWithInit_Intrinsic;
 def int_aarch64_sve_clastb    : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_clastb_n  : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
 def int_aarch64_sve_compact   : AdvSIMD_Pred1VectorArg_Intrinsic;
-def int_aarch64_sve_expand   : AdvSIMD_Pred1VectorArg_Intrinsic;
+def int_aarch64_sve_expand    : AdvSIMD_Pred1VectorArg_Intrinsic;
 def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic;
 def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic;
 def int_aarch64_sve_ext       : AdvSIMD_2VectorArgIndexed_Intrinsic;

>From d2923dd30058038750fe3af02f85749eeae980d2 Mon Sep 17 00:00:00 2001
From: Lukacma <[email protected]>
Date: Fri, 14 Nov 2025 10:52:00 +0000
Subject: [PATCH 3/6] Update
 clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c

Co-authored-by: Kerry McLaughlin <[email protected]>
---
 .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
index 8bee2ed1121a6..882ee36e69a2b 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
@@ -17,9 +17,9 @@
 
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
 #else
-#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
 #endif
 
 #ifdef __ARM_FEATURE_SME

>From d79baadb91744b9c2b054b1b68f75abf5af127ca Mon Sep 17 00:00:00 2001
From: Marian Lukac <[email protected]>
Date: Fri, 14 Nov 2025 12:22:50 +0000
Subject: [PATCH 4/6] Address Review comments

---
 .../sve2p2-intriniscs/acle_sve2p2_compact.c   | 22 +++++-------
 .../sve2p2-intriniscs/acle_sve2p2_expand.c    | 34 ++++++++-----------
 .../sve2p2-intriniscs/acle_sve2p2_firstp.c    |  7 ++--
 .../sve2p2-intriniscs/acle_sve2p2_lastp.c     |  7 ++--
 4 files changed, 28 insertions(+), 42 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
index 882ee36e69a2b..8a7cafd94b64e 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
@@ -11,8 +11,10 @@
 
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"
+#define STREAMING __arm_streaming
 #else
 #include "arm_sve.h"
+#define STREAMING
 #endif
 
 #ifdef SVE_OVERLOADED_FORMS
@@ -22,12 +24,6 @@
 #define SVE_ACLE_FUNC(A1,A2) A1##A2
 #endif
 
-#ifdef __ARM_FEATURE_SME
-#define STREAMING __arm_streaming
-#else
-#define STREAMING
-#endif
-
 // CHECK-LABEL: @test_svcompact_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
@@ -40,7 +36,7 @@
 //
 svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svcompact,_s8,,)(pg, op);
+  return SVE_ACLE_FUNC(svcompact,_s8)(pg, op);
 }
 
 // CHECK-LABEL: @test_svcompact_s16(
@@ -57,7 +53,7 @@ svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING
 //
 svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svcompact,_s16,,)(pg, op);
+  return SVE_ACLE_FUNC(svcompact,_s16)(pg, op);
 }
 
 // CHECK-LABEL: @test_svcompact_u8(
@@ -72,7 +68,7 @@ svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) 
STREAMING
 //
 svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svcompact,_u8,,)(pg, op);
+  return SVE_ACLE_FUNC(svcompact,_u8)(pg, op);
 }
 
 // CHECK-LABEL: @test_svcompact_u16(
@@ -89,7 +85,7 @@ svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) 
STREAMING
 //
 svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svcompact,_u16,,)(pg, op);
+  return SVE_ACLE_FUNC(svcompact,_u16)(pg, op);
 }
 
 // CHECK-LABEL: @test_svcompact_mf8(
@@ -104,7 +100,7 @@ svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) 
STREAMING
 //
 svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svcompact,_mf8,,)(pg, op);
+  return SVE_ACLE_FUNC(svcompact,_mf8)(pg, op);
 }
 
 // CHECK-LABEL: @test_svcompact_f16(
@@ -121,7 +117,7 @@ svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) 
STREAMING
 //
 svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svcompact,_f16,,)(pg, op);
+  return SVE_ACLE_FUNC(svcompact,_f16)(pg, op);
 }
 
 // CHECK-LABEL: @test_svcompact_bf16(
@@ -138,5 +134,5 @@ svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) 
STREAMING
 //
 svbfloat16_t test_svcompact_bf16(svbool_t pg, svbfloat16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svcompact,_bf16,,)(pg, op);
+  return SVE_ACLE_FUNC(svcompact,_bf16)(pg, op);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
index ece0ce795df39..c89fd657bc6b7 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
@@ -10,8 +10,10 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"
+#define STREAMING __arm_streaming
 #else
 #include "arm_sve.h"
+#define STREAMING
 #endif
 
 #ifdef SVE_OVERLOADED_FORMS
@@ -21,12 +23,6 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
-#ifdef __ARM_FEATURE_SME
-#define STREAMING __arm_streaming
-#else
-#define STREAMING
-#endif
-
 // CHECK-LABEL: @test_svexpand_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
@@ -39,7 +35,7 @@
 //
 svint8_t test_svexpand_s8(svbool_t pg, svint8_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_s8,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_s8)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_s16(
@@ -56,7 +52,7 @@ svint8_t test_svexpand_s8(svbool_t pg, svint8_t op) STREAMING
 //
 svint16_t test_svexpand_s16(svbool_t pg, svint16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_s16,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_s16)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_u8(
@@ -71,7 +67,7 @@ svint16_t test_svexpand_s16(svbool_t pg, svint16_t op) 
STREAMING
 //
 svuint8_t test_svexpand_u8(svbool_t pg, svuint8_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_u8,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_u8)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_u16(
@@ -88,7 +84,7 @@ svuint8_t test_svexpand_u8(svbool_t pg, svuint8_t op) 
STREAMING
 //
 svuint16_t test_svexpand_u16(svbool_t pg, svuint16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_u16,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_u16)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_mf8(
@@ -103,7 +99,7 @@ svuint16_t test_svexpand_u16(svbool_t pg, svuint16_t op) 
STREAMING
 //
 svmfloat8_t test_svexpand_mf8(svbool_t pg, svmfloat8_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_mf8,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_mf8)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_f16(
@@ -120,7 +116,7 @@ svmfloat8_t test_svexpand_mf8(svbool_t pg, svmfloat8_t op) 
STREAMING
 //
 svfloat16_t test_svexpand_f16(svbool_t pg, svfloat16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_f16,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_f16)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_bf16(
@@ -137,7 +133,7 @@ svfloat16_t test_svexpand_f16(svbool_t pg, svfloat16_t op) 
STREAMING
 //
 svbfloat16_t test_svexpand_bf16(svbool_t pg, svbfloat16_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_bf16,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_bf16)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_s32(
@@ -154,7 +150,7 @@ svbfloat16_t test_svexpand_bf16(svbool_t pg, svbfloat16_t 
op) STREAMING
 //
 svint32_t test_svexpand_s32(svbool_t pg, svint32_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_s32,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_s32)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_s64(
@@ -171,7 +167,7 @@ svint32_t test_svexpand_s32(svbool_t pg, svint32_t op) 
STREAMING
 //
 svint64_t test_svexpand_s64(svbool_t pg, svint64_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_s64,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_s64)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_u32(
@@ -188,7 +184,7 @@ svint64_t test_svexpand_s64(svbool_t pg, svint64_t op) 
STREAMING
 //
 svuint32_t test_svexpand_u32(svbool_t pg, svuint32_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_u32,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_u32)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_u64(
@@ -205,7 +201,7 @@ svuint32_t test_svexpand_u32(svbool_t pg, svuint32_t op) 
STREAMING
 //
 svuint64_t test_svexpand_u64(svbool_t pg, svuint64_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_u64,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_u64)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_f32(
@@ -222,7 +218,7 @@ svuint64_t test_svexpand_u64(svbool_t pg, svuint64_t op) 
STREAMING
 //
 svfloat32_t test_svexpand_f32(svbool_t pg, svfloat32_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_f32,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_f32)(pg, op);
 }
 
 // CHECK-LABEL: @test_svexpand_f64(
@@ -239,5 +235,5 @@ svfloat32_t test_svexpand_f32(svbool_t pg, svfloat32_t op) 
STREAMING
 //
 svfloat64_t test_svexpand_f64(svbool_t pg, svfloat64_t op) STREAMING
 {
-  return SVE_ACLE_FUNC(svexpand,_f64,,)(pg, op);
+  return SVE_ACLE_FUNC(svexpand,_f64)(pg, op);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
index 1656f10a83a90..b72b34d778099 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
@@ -10,16 +10,13 @@
 
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"
-#else
-#include "arm_sve.h"
-#endif
-
-#ifdef __ARM_FEATURE_SME
 #define STREAMING __arm_streaming
 #else
+#include "arm_sve.h"
 #define STREAMING
 #endif
 
+
 // CHECK-LABEL: define dso_local i64 @test_svfirstp_b8(
 // CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
index bfe82af07f20c..cca33e00d374d 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
@@ -10,16 +10,13 @@
 
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"
-#else
-#include "arm_sve.h"
-#endif
-
-#ifdef __ARM_FEATURE_SME
 #define STREAMING __arm_streaming
 #else
+#include "arm_sve.h"
 #define STREAMING
 #endif
 
+
 // CHECK-LABEL: define dso_local i64 @test_svlastp_b8(
 // CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) 
#[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]

>From e09d738c41fe4ec87254c7ac186c3f986c4726b6 Mon Sep 17 00:00:00 2001
From: Marian Lukac <[email protected]>
Date: Fri, 14 Nov 2025 13:13:31 +0000
Subject: [PATCH 5/6] test fix

---
 .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
index c89fd657bc6b7..1b90822d6b8d4 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
@@ -18,9 +18,9 @@
 
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
 #else
-#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
 #endif
 
 // CHECK-LABEL: @test_svexpand_s8(

>From e2f1d96ed126b82651d2fbfc9c203a9ef9dfaefc Mon Sep 17 00:00:00 2001
From: Marian Lukac <[email protected]>
Date: Mon, 1 Dec 2025 16:23:43 +0000
Subject: [PATCH 6/6] Address review comments

---
 .../test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c  | 6 ------
 .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c | 1 +
 .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c  | 2 ++
 .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c  | 1 +
 .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c   | 1 +
 5 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c 
b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
index 75ee18cb134d7..4c18969e78f0c 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
@@ -14,12 +14,6 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
-#ifdef __ARM_FEATURE_SME
-#define STREAMING __arm_streaming
-#else
-#define STREAMING
-#endif
-
 // CHECK-LABEL: @test_svcompact_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
index 8a7cafd94b64e..5d0886f78258d 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
@@ -8,6 +8,7 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2  -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
index 1b90822d6b8d4..1fc30930c4de3 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
@@ -8,6 +8,8 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2  -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"
 #define STREAMING __arm_streaming
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
index b72b34d778099..b052592e44dc4 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c
@@ -7,6 +7,7 @@
 
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
index cca33e00d374d..8e53f2f8193f0 100644
--- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c
@@ -7,6 +7,7 @@
 
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #ifdef __ARM_FEATURE_SME
 #include "arm_sme.h"

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64] Add intrinsics support for SVE2p2 instructions (PR #163575)

Reply via email to