https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/163575
>From e7a2489497e4c896d1a2a425356f2c1dac2de1d4 Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Wed, 15 Oct 2025 15:09:35 +0000 Subject: [PATCH 1/6] [AArch64] Add intrinsics support for SVE2p2 instructions --- clang/include/clang/Basic/arm_sve.td | 12 +- .../AArch64/sve-intrinsics/acle_sve_compact.c | 6 + .../sve2p2-intriniscs/acle_sve2p2_compact.c | 142 ++++++++++ .../sve2p2-intriniscs/acle_sve2p2_expand.c | 243 ++++++++++++++++++ .../sve2p2-intriniscs/acle_sve2p2_firstp.c | 101 ++++++++ .../sve2p2-intriniscs/acle_sve2p2_lastp.c | 101 ++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 6 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 18 +- .../test/CodeGen/AArch64/sve2p2-intrinsics.ll | 173 +++++++++++++ 10 files changed, 793 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c create mode 100644 clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c create mode 100644 clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c create mode 100644 clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index d2b7b78b9970f..716c2cd68ffcc 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -984,6 +984,11 @@ let SMETargetGuard = "sme2p2" in { def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact", [VerifyRuntimeMode]>; } +let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sme2p2" in { +def SVCOMPACT_BH : SInst<"svcompact[_{d}]", "dPd", "cUcsUsmbh", MergeNone, "aarch64_sve_compact", [VerifyRuntimeMode]>; +def SVEXPAND : SInst<"svexpand[_{d}]", "dPd", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_expand", [VerifyRuntimeMode]>; +} + // Note: svdup_lane is implemented using the intrinsic for TBL to represent a // splat of any possible lane. It is upto LLVM to pick a more efficient // instruction such as DUP (indexed) if the lane index fits the range of the @@ -1111,6 +1116,11 @@ def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendS def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>; def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfdb", MergeNone, "", [VerifyRuntimeMode]>; +let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sve2p2|sme2p2" in { + def SVFIRSTP : SInst<"svfirstp_{d}", "lPP", "PcPsPiPl", MergeNone, "aarch64_sve_firstp", [VerifyRuntimeMode], []>; + def SVLASTP : SInst<"svlastp_{d}", "lPP", "PcPsPiPl", MergeNone, "aarch64_sve_lastp", [VerifyRuntimeMode], []>; +} + //////////////////////////////////////////////////////////////////////////////// // Saturating scalar arithmetic @@ -2388,4 +2398,4 @@ let SVETargetGuard = "sve2,fp8fma", SMETargetGuard = "ssve-fp8fma" in { def SVFMLALLBT_LANE : SInst<"svmlallbt_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlallbt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVFMLALLTB_LANE : SInst<"svmlalltb_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVFMLALLTT_LANE : SInst<"svmlalltt_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; -} +} \ No newline at end of file diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c index 4c18969e78f0c..75ee18cb134d7 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c @@ -14,6 +14,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + // CHECK-LABEL: @test_svcompact_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c new file mode 100644 index 0000000000000..8bee2ed1121a6 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c @@ -0,0 +1,142 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#ifdef __ARM_FEATURE_SME +#include "arm_sme.h" +#else +#include "arm_sve.h" +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: @test_svcompact_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcompact_s8u10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING +{ + return SVE_ACLE_FUNC(svcompact,_s8,,)(pg, op); +} + +// CHECK-LABEL: @test_svcompact_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svcompact_s16u10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svcompact,_s16,,)(pg, op); +} + +// CHECK-LABEL: @test_svcompact_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcompact_u8u10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING +{ + return SVE_ACLE_FUNC(svcompact,_u8,,)(pg, op); +} + +// CHECK-LABEL: @test_svcompact_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svcompact_u16u10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svcompact,_u16,,)(pg, op); +} + +// CHECK-LABEL: @test_svcompact_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcompact_mf8u10__SVBool_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING +{ + return SVE_ACLE_FUNC(svcompact,_mf8,,)(pg, op); +} + +// CHECK-LABEL: @test_svcompact_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svcompact_f16u10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svcompact,_f16,,)(pg, op); +} + +// CHECK-LABEL: @test_svcompact_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svcompact_bf16u10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] +// +svbfloat16_t test_svcompact_bf16(svbool_t pg, svbfloat16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svcompact,_bf16,,)(pg, op); +} diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c new file mode 100644 index 0000000000000..ece0ce795df39 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c @@ -0,0 +1,243 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#ifdef __ARM_FEATURE_SME +#include "arm_sme.h" +#else +#include "arm_sve.h" +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: @test_svexpand_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svexpand_s8u10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svint8_t test_svexpand_s8(svbool_t pg, svint8_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_s8,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_s16u10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +svint16_t test_svexpand_s16(svbool_t pg, svint16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_s16,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svexpand_u8u10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svuint8_t test_svexpand_u8(svbool_t pg, svuint8_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_u8,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_u16u10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +svuint16_t test_svexpand_u16(svbool_t pg, svuint16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_u16,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_mf8u10__SVBool_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svexpand_mf8(svbool_t pg, svmfloat8_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_mf8,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_f16u10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_svexpand_f16(svbool_t pg, svfloat16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_f16,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.expand.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svexpand_bf16u10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.expand.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] +// +svbfloat16_t test_svexpand_bf16(svbool_t pg, svbfloat16_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_bf16,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_s32u10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +svint32_t test_svexpand_s32(svbool_t pg, svint32_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_s32,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_s64u10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +svint64_t test_svexpand_s64(svbool_t pg, svint64_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_s64,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_u32u10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +svuint32_t test_svexpand_u32(svbool_t pg, svuint32_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_u32,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_u64u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +svuint64_t test_svexpand_u64(svbool_t pg, svuint64_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_u64,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.expand.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_f32u10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.expand.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_svexpand_f32(svbool_t pg, svfloat32_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_f32,,)(pg, op); +} + +// CHECK-LABEL: @test_svexpand_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.expand.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svexpand_f64u10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.expand.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_svexpand_f64(svbool_t pg, svfloat64_t op) STREAMING +{ + return SVE_ACLE_FUNC(svexpand,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c new file mode 100644 index 0000000000000..1656f10a83a90 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c @@ -0,0 +1,101 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#ifdef __ARM_FEATURE_SME +#include "arm_sme.h" +#else +#include "arm_sve.h" +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local i64 @test_svfirstp_b8( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z16test_svfirstp_b8u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +int64_t test_svfirstp_b8(svbool_t pg, svbool_t op) STREAMING +{ + return svfirstp_b8(pg, op); +} + +// CHECK-LABEL: define dso_local i64 @test_svfirstp_b16( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]]) +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z17test_svfirstp_b16u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]]) +// CPP-CHECK-NEXT: ret i64 [[TMP2]] +// +int64_t test_svfirstp_b16(svbool_t pg, svbool_t op) STREAMING +{ + return svfirstp_b16(pg, op); +} + +// CHECK-LABEL: define dso_local i64 @test_svfirstp_b32( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]]) +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z17test_svfirstp_b32u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]]) +// CPP-CHECK-NEXT: ret i64 [[TMP2]] +// +int64_t test_svfirstp_b32(svbool_t pg, svbool_t op) STREAMING +{ + return svfirstp_b32(pg, op); +} + +// CHECK-LABEL: define dso_local i64 @test_svfirstp_b64( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]]) +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z17test_svfirstp_b64u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.firstp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]]) +// CPP-CHECK-NEXT: ret i64 [[TMP2]] +// +int64_t test_svfirstp_b64(svbool_t pg, svbool_t op) STREAMING +{ + return svfirstp_b64(pg, op); +} diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c new file mode 100644 index 0000000000000..bfe82af07f20c --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c @@ -0,0 +1,101 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#ifdef __ARM_FEATURE_SME +#include "arm_sme.h" +#else +#include "arm_sve.h" +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local i64 @test_svlastp_b8( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z15test_svlastp_b8u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +int64_t test_svlastp_b8(svbool_t pg, svbool_t op) STREAMING +{ + return svlastp_b8(pg, op); +} + +// CHECK-LABEL: define dso_local i64 @test_svlastp_b16( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]]) +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z16test_svlastp_b16u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]]) +// CPP-CHECK-NEXT: ret i64 [[TMP2]] +// +int64_t test_svlastp_b16(svbool_t pg, svbool_t op) STREAMING +{ + return svlastp_b16(pg, op); +} + +// CHECK-LABEL: define dso_local i64 @test_svlastp_b32( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]]) +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z16test_svlastp_b32u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]]) +// CPP-CHECK-NEXT: ret i64 [[TMP2]] +// +int64_t test_svlastp_b32(svbool_t pg, svbool_t op) STREAMING +{ + return svlastp_b32(pg, op); +} + +// CHECK-LABEL: define dso_local i64 @test_svlastp_b64( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]]) +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local noundef i64 @_Z16test_svlastp_b64u10__SVBool_tS_( +// CPP-CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.lastp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]]) +// CPP-CHECK-NEXT: ret i64 [[TMP2]] +// +int64_t test_svlastp_b64(svbool_t pg, svbool_t op) STREAMING +{ + return svlastp_b64(pg, op); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b0269eec3347a..af8784ed58c75 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1930,6 +1930,8 @@ def int_aarch64_sve_cntw : AdvSIMD_SVE_CNTB_Intrinsic; def int_aarch64_sve_cntd : AdvSIMD_SVE_CNTB_Intrinsic; def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic; +def int_aarch64_sve_firstp : AdvSIMD_SVE_CNTP_Intrinsic; +def int_aarch64_sve_lastp : AdvSIMD_SVE_CNTP_Intrinsic; // // FFR manipulation @@ -2026,6 +2028,7 @@ def int_aarch64_sve_clasta_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic; def int_aarch64_sve_clastb : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_clastb_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic; def int_aarch64_sve_compact : AdvSIMD_Pred1VectorArg_Intrinsic; +def int_aarch64_sve_expand : AdvSIMD_Pred1VectorArg_Intrinsic; def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic; def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic; def int_aarch64_sve_ext : AdvSIMD_2VectorArgIndexed_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 98a128e582866..2d9ab2cb033b1 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4604,8 +4604,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in { defm UXTW_ZPzZ : sve_int_un_pred_arit_d_z<0b101, "uxtw", AArch64uxt_mt>; // SVE predicate count - defm FIRSTP_XPP : sve_int_pcount_pred_tmp<0b001, "firstp">; - defm LASTP_XPP : sve_int_pcount_pred_tmp<0b010, "lastp">; + defm FIRSTP_XPP : sve_int_pcount_pred<0b001, "firstp", int_aarch64_sve_firstp>; + defm LASTP_XPP : sve_int_pcount_pred<0b010, "lastp", int_aarch64_sve_lastp>; // SVE reverse within elements, zeroing predicate defm RBIT_ZPzZ : sve_int_perm_rev_rbit_z<"rbit", AArch64rbit_mt>; @@ -4620,7 +4620,7 @@ let Predicates = [HasSVE2p2_or_SME2p2] in { //===----------------------------------------------------------------------===// let Predicates = [HasNonStreamingSVE2p2_or_SME2p2] in { // SVE2 EXPAND - defm EXPAND_ZPZ : sve2_int_perm_expand<"expand">; + defm EXPAND_ZPZ : sve2_int_perm_expand<"expand", int_aarch64_sve_expand>; // SVE COMPACT - byte and halfword defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact", int_aarch64_sve_compact>; } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 3cdd505f12116..fe5e6a9e3583a 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1173,13 +1173,6 @@ multiclass sve_int_pcount_pred<bits<3> opc, string asm, def : SVE_2_Op_Pat<i64, int_op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>; def : SVE_2_Op_Pat<i64, int_op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>; } - -multiclass sve_int_pcount_pred_tmp<bits<3> opc, string asm> { - def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; - def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; - def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>; - def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>; -} //===----------------------------------------------------------------------===// // SVE Element Count Group //===----------------------------------------------------------------------===// @@ -7678,11 +7671,20 @@ class sve2_int_perm_expand<bits<2> sz, string asm, let hasSideEffects = 0; } -multiclass sve2_int_perm_expand<string asm> { +multiclass sve2_int_perm_expand<string asm, SDPatternOperator op> { def _B : sve2_int_perm_expand<0b00, asm, ZPR8>; def _H : sve2_int_perm_expand<0b01, asm, ZPR16>; def _S : sve2_int_perm_expand<0b10, asm, ZPR32>; def _D : sve2_int_perm_expand<0b11, asm, ZPR64>; + + def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>; + def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll index 6017070b114a5..e4d25407add4a 100644 --- a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll @@ -87,6 +87,179 @@ define <vscale x 8 x bfloat> @compact_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x ret <vscale x 8 x bfloat> %out } +; +; EXPAND +; + +define <vscale x 16 x i8> @expand_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { +; CHECK-LABEL: expand_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.b, p0, z0.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @expand_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { +; CHECK-LABEL: expand_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.h, p0, z0.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @expand_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { +; CHECK-LABEL: expand_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.s, p0, z0.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @expand_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { +; CHECK-LABEL: expand_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.d, p0, z0.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.expand.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a) + ret <vscale x 2 x i64> %out +} + +define <vscale x 8 x half> @expand_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) { +; CHECK-LABEL: expand_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.h, p0, z0.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x half> @llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 x i1> %pg, + <vscale x 8 x half> %a) + ret <vscale x 8 x half> %out +} + +define <vscale x 4 x float> @expand_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) { +; CHECK-LABEL: expand_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.s, p0, z0.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x float> @llvm.aarch64.sve.expand.nxv4f32(<vscale x 4 x i1> %pg, + <vscale x 4 x float> %a) + ret <vscale x 4 x float> %out +} + +define <vscale x 2 x double> @expand_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) { +; CHECK-LABEL: expand_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.d, p0, z0.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x double> @llvm.aarch64.sve.expand.nxv2f64(<vscale x 2 x i1> %pg, + <vscale x 2 x double> %a) + ret <vscale x 2 x double> %out +} + +define <vscale x 8 x bfloat> @expand_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) { +; CHECK-LABEL: expand_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: expand z0.h, p0, z0.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.expand.nxv8bf16(<vscale x 8 x i1> %pg, + <vscale x 8 x bfloat> %a) + ret <vscale x 8 x bfloat> %out +} + +; +; FIRSTP +; + +define i64 @firstp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) { +; CHECK-LABEL: firstp_b8: +; CHECK: // %bb.0: +; CHECK-NEXT: firstp x0, p0, p1.b +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.firstp.nxv16i1(<vscale x 16 x i1> %pg, + <vscale x 16 x i1> %a) + ret i64 %out +} + +define i64 @firstp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) { +; CHECK-LABEL: firstp_b16: +; CHECK: // %bb.0: +; CHECK-NEXT: firstp x0, p0, p1.h +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.firstp.nxv8i1(<vscale x 8 x i1> %pg, + <vscale x 8 x i1> %a) + ret i64 %out +} + +define i64 @firstp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) { +; CHECK-LABEL: firstp_b32: +; CHECK: // %bb.0: +; CHECK-NEXT: firstp x0, p0, p1.s +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.firstp.nxv4i1(<vscale x 4 x i1> %pg, + <vscale x 4 x i1> %a) + ret i64 %out +} + +define i64 @firstp_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) { +; CHECK-LABEL: firstp_b64: +; CHECK: // %bb.0: +; CHECK-NEXT: firstp x0, p0, p1.d +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.firstp.nxv2i1(<vscale x 2 x i1> %pg, + <vscale x 2 x i1> %a) + ret i64 %out +} + +; +; LASTP +; + +define i64 @lastp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) { +; CHECK-LABEL: lastp_b8: +; CHECK: // %bb.0: +; CHECK-NEXT: lastp x0, p0, p1.b +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.lastp.nxv16i1(<vscale x 16 x i1> %pg, + <vscale x 16 x i1> %a) + ret i64 %out +} + +define i64 @lastp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) { +; CHECK-LABEL: lastp_b16: +; CHECK: // %bb.0: +; CHECK-NEXT: lastp x0, p0, p1.h +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.lastp.nxv8i1(<vscale x 8 x i1> %pg, + <vscale x 8 x i1> %a) + ret i64 %out +} + +define i64 @lastp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) { +; CHECK-LABEL: lastp_b32: +; CHECK: // %bb.0: +; CHECK-NEXT: lastp x0, p0, p1.s +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.lastp.nxv4i1(<vscale x 4 x i1> %pg, + <vscale x 4 x i1> %a) + ret i64 %out +} + +define i64 @lastp_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) { +; CHECK-LABEL: lastp_b64: +; CHECK: // %bb.0: +; CHECK-NEXT: lastp x0, p0, p1.d +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.lastp.nxv2i1(<vscale x 2 x i1> %pg, + <vscale x 2 x i1> %a) + ret i64 %out +} + + declare <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>) declare <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>) declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>) >From 3672fdf4296e997b6b9ec14bbe3b235d4e9c9b39 Mon Sep 17 00:00:00 2001 From: Lukacma <[email protected]> Date: Thu, 16 Oct 2025 15:35:26 +0100 Subject: [PATCH 2/6] Apply suggestion from @Lukacma --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index af8784ed58c75..2a06867cb31a7 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2028,7 +2028,7 @@ def int_aarch64_sve_clasta_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic; def int_aarch64_sve_clastb : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_clastb_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic; def int_aarch64_sve_compact : AdvSIMD_Pred1VectorArg_Intrinsic; -def int_aarch64_sve_expand : AdvSIMD_Pred1VectorArg_Intrinsic; +def int_aarch64_sve_expand : AdvSIMD_Pred1VectorArg_Intrinsic; def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic; def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic; def int_aarch64_sve_ext : AdvSIMD_2VectorArgIndexed_Intrinsic; >From d2923dd30058038750fe3af02f85749eeae980d2 Mon Sep 17 00:00:00 2001 From: Lukacma <[email protected]> Date: Fri, 14 Nov 2025 10:52:00 +0000 Subject: [PATCH 3/6] Update clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c Co-authored-by: Kerry McLaughlin <[email protected]> --- .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c index 8bee2ed1121a6..882ee36e69a2b 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c @@ -17,9 +17,9 @@ #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 #else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#define SVE_ACLE_FUNC(A1,A2) A1##A2 #endif #ifdef __ARM_FEATURE_SME >From d79baadb91744b9c2b054b1b68f75abf5af127ca Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Fri, 14 Nov 2025 12:22:50 +0000 Subject: [PATCH 4/6] Address Review comments --- .../sve2p2-intriniscs/acle_sve2p2_compact.c | 22 +++++------- .../sve2p2-intriniscs/acle_sve2p2_expand.c | 34 ++++++++----------- .../sve2p2-intriniscs/acle_sve2p2_firstp.c | 7 ++-- .../sve2p2-intriniscs/acle_sve2p2_lastp.c | 7 ++-- 4 files changed, 28 insertions(+), 42 deletions(-) diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c index 882ee36e69a2b..8a7cafd94b64e 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c @@ -11,8 +11,10 @@ #ifdef __ARM_FEATURE_SME #include "arm_sme.h" +#define STREAMING __arm_streaming #else #include "arm_sve.h" +#define STREAMING #endif #ifdef SVE_OVERLOADED_FORMS @@ -22,12 +24,6 @@ #define SVE_ACLE_FUNC(A1,A2) A1##A2 #endif -#ifdef __ARM_FEATURE_SME -#define STREAMING __arm_streaming -#else -#define STREAMING -#endif - // CHECK-LABEL: @test_svcompact_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) @@ -40,7 +36,7 @@ // svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING { - return SVE_ACLE_FUNC(svcompact,_s8,,)(pg, op); + return SVE_ACLE_FUNC(svcompact,_s8)(pg, op); } // CHECK-LABEL: @test_svcompact_s16( @@ -57,7 +53,7 @@ svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING // svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING { - return SVE_ACLE_FUNC(svcompact,_s16,,)(pg, op); + return SVE_ACLE_FUNC(svcompact,_s16)(pg, op); } // CHECK-LABEL: @test_svcompact_u8( @@ -72,7 +68,7 @@ svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING // svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING { - return SVE_ACLE_FUNC(svcompact,_u8,,)(pg, op); + return SVE_ACLE_FUNC(svcompact,_u8)(pg, op); } // CHECK-LABEL: @test_svcompact_u16( @@ -89,7 +85,7 @@ svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING // svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING { - return SVE_ACLE_FUNC(svcompact,_u16,,)(pg, op); + return SVE_ACLE_FUNC(svcompact,_u16)(pg, op); } // CHECK-LABEL: @test_svcompact_mf8( @@ -104,7 +100,7 @@ svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING // svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING { - return SVE_ACLE_FUNC(svcompact,_mf8,,)(pg, op); + return SVE_ACLE_FUNC(svcompact,_mf8)(pg, op); } // CHECK-LABEL: @test_svcompact_f16( @@ -121,7 +117,7 @@ svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING // svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING { - return SVE_ACLE_FUNC(svcompact,_f16,,)(pg, op); + return SVE_ACLE_FUNC(svcompact,_f16)(pg, op); } // CHECK-LABEL: @test_svcompact_bf16( @@ -138,5 +134,5 @@ svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING // svbfloat16_t test_svcompact_bf16(svbool_t pg, svbfloat16_t op) STREAMING { - return SVE_ACLE_FUNC(svcompact,_bf16,,)(pg, op); + return SVE_ACLE_FUNC(svcompact,_bf16)(pg, op); } diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c index ece0ce795df39..c89fd657bc6b7 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c @@ -10,8 +10,10 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #ifdef __ARM_FEATURE_SME #include "arm_sme.h" +#define STREAMING __arm_streaming #else #include "arm_sve.h" +#define STREAMING #endif #ifdef SVE_OVERLOADED_FORMS @@ -21,12 +23,6 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif -#ifdef __ARM_FEATURE_SME -#define STREAMING __arm_streaming -#else -#define STREAMING -#endif - // CHECK-LABEL: @test_svexpand_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) @@ -39,7 +35,7 @@ // svint8_t test_svexpand_s8(svbool_t pg, svint8_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_s8,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_s8)(pg, op); } // CHECK-LABEL: @test_svexpand_s16( @@ -56,7 +52,7 @@ svint8_t test_svexpand_s8(svbool_t pg, svint8_t op) STREAMING // svint16_t test_svexpand_s16(svbool_t pg, svint16_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_s16,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_s16)(pg, op); } // CHECK-LABEL: @test_svexpand_u8( @@ -71,7 +67,7 @@ svint16_t test_svexpand_s16(svbool_t pg, svint16_t op) STREAMING // svuint8_t test_svexpand_u8(svbool_t pg, svuint8_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_u8,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_u8)(pg, op); } // CHECK-LABEL: @test_svexpand_u16( @@ -88,7 +84,7 @@ svuint8_t test_svexpand_u8(svbool_t pg, svuint8_t op) STREAMING // svuint16_t test_svexpand_u16(svbool_t pg, svuint16_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_u16,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_u16)(pg, op); } // CHECK-LABEL: @test_svexpand_mf8( @@ -103,7 +99,7 @@ svuint16_t test_svexpand_u16(svbool_t pg, svuint16_t op) STREAMING // svmfloat8_t test_svexpand_mf8(svbool_t pg, svmfloat8_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_mf8,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_mf8)(pg, op); } // CHECK-LABEL: @test_svexpand_f16( @@ -120,7 +116,7 @@ svmfloat8_t test_svexpand_mf8(svbool_t pg, svmfloat8_t op) STREAMING // svfloat16_t test_svexpand_f16(svbool_t pg, svfloat16_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_f16,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_f16)(pg, op); } // CHECK-LABEL: @test_svexpand_bf16( @@ -137,7 +133,7 @@ svfloat16_t test_svexpand_f16(svbool_t pg, svfloat16_t op) STREAMING // svbfloat16_t test_svexpand_bf16(svbool_t pg, svbfloat16_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_bf16,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_bf16)(pg, op); } // CHECK-LABEL: @test_svexpand_s32( @@ -154,7 +150,7 @@ svbfloat16_t test_svexpand_bf16(svbool_t pg, svbfloat16_t op) STREAMING // svint32_t test_svexpand_s32(svbool_t pg, svint32_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_s32,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_s32)(pg, op); } // CHECK-LABEL: @test_svexpand_s64( @@ -171,7 +167,7 @@ svint32_t test_svexpand_s32(svbool_t pg, svint32_t op) STREAMING // svint64_t test_svexpand_s64(svbool_t pg, svint64_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_s64,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_s64)(pg, op); } // CHECK-LABEL: @test_svexpand_u32( @@ -188,7 +184,7 @@ svint64_t test_svexpand_s64(svbool_t pg, svint64_t op) STREAMING // svuint32_t test_svexpand_u32(svbool_t pg, svuint32_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_u32,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_u32)(pg, op); } // CHECK-LABEL: @test_svexpand_u64( @@ -205,7 +201,7 @@ svuint32_t test_svexpand_u32(svbool_t pg, svuint32_t op) STREAMING // svuint64_t test_svexpand_u64(svbool_t pg, svuint64_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_u64,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_u64)(pg, op); } // CHECK-LABEL: @test_svexpand_f32( @@ -222,7 +218,7 @@ svuint64_t test_svexpand_u64(svbool_t pg, svuint64_t op) STREAMING // svfloat32_t test_svexpand_f32(svbool_t pg, svfloat32_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_f32,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_f32)(pg, op); } // CHECK-LABEL: @test_svexpand_f64( @@ -239,5 +235,5 @@ svfloat32_t test_svexpand_f32(svbool_t pg, svfloat32_t op) STREAMING // svfloat64_t test_svexpand_f64(svbool_t pg, svfloat64_t op) STREAMING { - return SVE_ACLE_FUNC(svexpand,_f64,,)(pg, op); + return SVE_ACLE_FUNC(svexpand,_f64)(pg, op); } diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c index 1656f10a83a90..b72b34d778099 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c @@ -10,16 +10,13 @@ #ifdef __ARM_FEATURE_SME #include "arm_sme.h" -#else -#include "arm_sve.h" -#endif - -#ifdef __ARM_FEATURE_SME #define STREAMING __arm_streaming #else +#include "arm_sve.h" #define STREAMING #endif + // CHECK-LABEL: define dso_local i64 @test_svfirstp_b8( // CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c index bfe82af07f20c..cca33e00d374d 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c @@ -10,16 +10,13 @@ #ifdef __ARM_FEATURE_SME #include "arm_sme.h" -#else -#include "arm_sve.h" -#endif - -#ifdef __ARM_FEATURE_SME #define STREAMING __arm_streaming #else +#include "arm_sve.h" #define STREAMING #endif + // CHECK-LABEL: define dso_local i64 @test_svlastp_b8( // CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] >From e09d738c41fe4ec87254c7ac186c3f986c4726b6 Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Fri, 14 Nov 2025 13:13:31 +0000 Subject: [PATCH 5/6] test fix --- .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c index c89fd657bc6b7..1b90822d6b8d4 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c @@ -18,9 +18,9 @@ #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 #else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#define SVE_ACLE_FUNC(A1,A2) A1##A2 #endif // CHECK-LABEL: @test_svexpand_s8( >From e2f1d96ed126b82651d2fbfc9c203a9ef9dfaefc Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Mon, 1 Dec 2025 16:23:43 +0000 Subject: [PATCH 6/6] Address review comments --- .../test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c | 6 ------ .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c | 1 + .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c | 2 ++ .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c | 1 + .../CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c | 1 + 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c index 75ee18cb134d7..4c18969e78f0c 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c @@ -14,12 +14,6 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif -#ifdef __ARM_FEATURE_SME -#define STREAMING __arm_streaming -#else -#define STREAMING -#endif - // CHECK-LABEL: @test_svcompact_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c index 8a7cafd94b64e..5d0886f78258d 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c @@ -8,6 +8,7 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #ifdef __ARM_FEATURE_SME #include "arm_sme.h" diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c index 1b90822d6b8d4..1fc30930c4de3 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c @@ -8,6 +8,8 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + #ifdef __ARM_FEATURE_SME #include "arm_sme.h" #define STREAMING __arm_streaming diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c index b72b34d778099..b052592e44dc4 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c @@ -7,6 +7,7 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #ifdef __ARM_FEATURE_SME #include "arm_sme.h" diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c index cca33e00d374d..8e53f2f8193f0 100644 --- a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c +++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c @@ -7,6 +7,7 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #ifdef __ARM_FEATURE_SME #include "arm_sme.h" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
