https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/174433
This PR adds CIR lowering support for unpredicated `svdup` SVE builtins. The corresponding ACLE intrinsics are documented at: * https://developer.arm.com/architectures/instruction-sets/intrinsics (search for svdup). Since LLVM provides a direct intrinsic for svdup with a 1:1 mapping, CIR lowers these builtins by emitting a call to the corresponding LLVM intrinsic. DESIGN NOTES ------------ With this change, any ACLE intrinsic that has a corresponding LLVM intrinsic can, in principle, be lowered successfully by CIR. This improves code reuse by avoiding duplication of intrinsic definitions and instead reusing LLVM’s intrinsic metadata. One consequence of this approach is that CIR will no longer emit NYI diagnostics for such intrinsics: if a mapping exists, the intrinsic will be silently lowered. IMPLEMENTATION NOTES -------------------- * Intrinsic discovery logic mirrors the approach in CodeGen/TargetBuiltins/ARM.cpp, but is simplified since CIR only requires the intrinsic name. * Test inputs are copied from the existing svdup tests: tests/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c. * The LLVM IR produced _with_ and _without_ `-fclangir` is identical, modulo basic block labels, SROA, and function attributes. EXAMPLE LOWERING ---------------- Input: ```C svint8_t test_svdup_n_s8(int8_t op) { return svdup_n_s8)(op); } ``` OUTPUT 1 (default): ```llvm define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 noundef %op) #0 { entry: %op.addr = alloca i8, align 1 store i8 %op, ptr %op.addr, align 1 %0 = load i8, ptr %op.addr, align 1 %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %0) ret <vscale x 16 x i8> %1 } ``` OUTPUT 2 (via `-fclangir`): ```llvm define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 %0) #0 { %2 = alloca i8, i64 1, align 1 %3 = alloca <vscale x 16 x i8>, i64 1, align 16 store i8 %0, ptr %2, align 1 %4 = load i8, ptr %2, align 1 %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %4) store <vscale x 16 x i8> %5, ptr %3, align 16 %6 = load <vscale x 16 x i8>, ptr %3, align 16 ret <vscale x 16 x i8> %6 } ``` From 19b1297728da91187905f6592053ed7acc84670f Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Mon, 5 Jan 2026 09:29:53 +0000 Subject: [PATCH] [CIR][AArch64] Add lowering for unpredicated svdup builtins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds CIR lowering support for unpredicated `svdup` SVE builtins. The corresponding ACLE intrinsics are documented at: * https://developer.arm.com/architectures/instruction-sets/intrinsics (search for svdup). Since LLVM provides a direct intrinsic for svdup with a 1:1 mapping, CIR lowers these builtins by emitting a call to the corresponding LLVM intrinsic. DESIGN NOTES ------------ With this change, any ACLE intrinsic that has a corresponding LLVM intrinsic can, in principle, be lowered successfully by CIR. This improves code reuse by avoiding duplication of intrinsic definitions and instead reusing LLVM’s intrinsic metadata. One consequence of this approach is that CIR will no longer emit NYI diagnostics for such intrinsics: if a mapping exists, the intrinsic will be silently lowered. IMPLEMENTATION NOTES -------------------- * Intrinsic discovery logic mirrors the approach in CodeGen/TargetBuiltins/ARM.cpp, but is simplified since CIR only requires the intrinsic name. * Test inputs are copied from the existing svdup tests: tests/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c. * The LLVM IR produced _with_ and _without_ `-fclangir` is identical, modulo basic block labels, SROA, and function attributes. EXAMPLE LOWERING ---------------- Input: ```C svint8_t test_svdup_n_s8(int8_t op) { return svdup_n_s8)(op); } ``` OUTPUT 1 (default): ```llvm define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 noundef %op) #0 { entry: %op.addr = alloca i8, align 1 store i8 %op, ptr %op.addr, align 1 %0 = load i8, ptr %op.addr, align 1 %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %0) ret <vscale x 16 x i8> %1 } ``` OUTPUT 2 (via `-fclangir`): ```llvm define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 %0) #0 { %2 = alloca i8, i64 1, align 1 %3 = alloca <vscale x 16 x i8>, i64 1, align 16 store i8 %0, ptr %2, align 1 %4 = load i8, ptr %2, align 1 %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %4) store <vscale x 16 x i8> %5, ptr %3, align 16 %6 = load <vscale x 16 x i8>, ptr %3, align 16 ret <vscale x 16 x i8> %6 } ``` --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 71 ++++++ .../CodeGenBuiltins/AArch64/acle_sve_dup.c | 211 ++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index e28b3c6cdc2ff..f2e448917aae9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -52,6 +52,51 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc, builder.getUInt64(scalingFactor, loc)); } +static bool aarch64SVEIntrinsicsProvenSorted = false; + +namespace { +struct aarc64BuiltinInfo { + unsigned builtinID; + unsigned LLVMIntrinsic; + + bool operator<(unsigned RHSbuiltinID) const { + return builtinID < RHSbuiltinID; + } + bool operator<(const aarc64BuiltinInfo &TE) const { + return builtinID < TE.builtinID; + } +}; +} // end anonymous namespace + +#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic} + +#define SVEMAP2(NameBase, TypeModifier) {SVE::BI__builtin_sve_##NameBase, 0} +static const aarc64BuiltinInfo aarch64SVEIntrinsicMap[] = { +#define GET_SVE_LLVM_INTRINSIC_MAP +#include "clang/Basic/arm_sve_builtin_cg.inc" +#undef GET_SVE_LLVM_INTRINSIC_MAP +}; + +static const aarc64BuiltinInfo * +findARMVectorIntrinsicInMap(ArrayRef<aarc64BuiltinInfo> intrinsicMap, + unsigned builtinID, bool &mapProvenSorted) { + +#ifndef NDEBUG + if (!mapProvenSorted) { + assert(llvm::is_sorted(intrinsicMap)); + mapProvenSorted = true; + } +#endif + + const aarc64BuiltinInfo *info = llvm::lower_bound(intrinsicMap, builtinID); + + if (info != intrinsicMap.end() && info->builtinID == builtinID) + return info; + + return nullptr; +} + std::optional<mlir::Value> CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, const CallExpr *expr) { @@ -65,7 +110,26 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, assert(!cir::MissingFeatures::aarch64SVEIntrinsics()); + auto *builtinIntrInfo = findARMVectorIntrinsicInMap( + aarch64SVEIntrinsicMap, builtinID, aarch64SVEIntrinsicsProvenSorted); + + // The operands of the builtin call + llvm::SmallVector<mlir::Value> ops; + + for (const auto *argExpr : expr->arguments()) + ops.push_back(emitScalarExpr(argExpr)); + mlir::Location loc = getLoc(expr->getExprLoc()); + if (builtinIntrInfo->LLVMIntrinsic) { + std::string llvmIntrName(Intrinsic::getBaseName( + (llvm::Intrinsic::ID)builtinIntrInfo->LLVMIntrinsic)); + + llvmIntrName.erase(0, /*std::strlen(".llvm")=*/5); + + return emitIntrinsicCallOp(builder, loc, llvmIntrName, + convertType(expr->getType()), + mlir::ValueRange{ops}); + } switch (builtinID) { default: @@ -103,10 +167,12 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, case SVE::BI__builtin_sve_svpmullb_u64: case SVE::BI__builtin_sve_svpmullb_n_u16: case SVE::BI__builtin_sve_svpmullb_n_u64: + case SVE::BI__builtin_sve_svdup_n_b8: case SVE::BI__builtin_sve_svdup_n_b16: case SVE::BI__builtin_sve_svdup_n_b32: case SVE::BI__builtin_sve_svdup_n_b64: + case SVE::BI__builtin_sve_svdupq_n_b8: case SVE::BI__builtin_sve_svdupq_n_b16: case SVE::BI__builtin_sve_svdupq_n_b32: @@ -129,22 +195,27 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return mlir::Value{}; + case SVE::BI__builtin_sve_svlen_u8: case SVE::BI__builtin_sve_svlen_s8: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 16); + case SVE::BI__builtin_sve_svlen_u16: case SVE::BI__builtin_sve_svlen_s16: case SVE::BI__builtin_sve_svlen_f16: case SVE::BI__builtin_sve_svlen_bf16: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8); + case SVE::BI__builtin_sve_svlen_u32: case SVE::BI__builtin_sve_svlen_s32: case SVE::BI__builtin_sve_svlen_f32: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4); + case SVE::BI__builtin_sve_svlen_u64: case SVE::BI__builtin_sve_svlen_s64: case SVE::BI__builtin_sve_svlen_f64: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2); + case SVE::BI__builtin_sve_svtbl2_u8: case SVE::BI__builtin_sve_svtbl2_s8: case SVE::BI__builtin_sve_svtbl2_u16: diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c new file mode 100644 index 0000000000000..3e0a892d6b368 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c @@ -0,0 +1,211 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// ALL-LABEL: @test_svdup_n_s8 +svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s8i {{.*}} -> !cir.vector<[16] x !s8i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s8i) -> !cir.vector<[16] x !s8i> + +// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1 +// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s8,)(op); +} + +// ALL-LABEL: @test_svdup_n_s16 +svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s16i {{.*}} -> !cir.vector<[8] x !s16i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s16i) -> !cir.vector<[8] x !s16i> + +// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: store i16 [[OP]], ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s16,)(op); +} + +// ALL-LABEL: @test_svdup_n_s32 +svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s32i {{.*}} -> !cir.vector<[4] x !s32i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s32i) -> !cir.vector<[4] x !s32i> + +// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} align 4 +// LLVM_OGCG_CIR: store i32 [[OP]], ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s32,)(op); +} + +// ALL-LABEL: @test_svdup_n_s64 +svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s64i {{.*}} -> !cir.vector<[2] x !s64i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s64i) -> !cir.vector<[2] x !s64i> + +// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} align 8 +// LLVM_OGCG_CIR: store i64 [[OP]], ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s64,)(op); +} + +// ALL-LABEL: @test_svdup_n_u8 +svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u8i {{.*}} -> !cir.vector<[16] x !u8i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u8i) -> !cir.vector<[16] x !u8i> + +// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1 +// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u8,)(op); +} + +// ALL-LABEL: @test_svdup_n_u16 +svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u16i {{.*}} -> !cir.vector<[8] x !u16i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u16i) -> !cir.vector<[8] x !u16i> + +// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: store i16 [[OP]], ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u16,)(op); +} + +// ALL-LABEL: @test_svdup_n_u32 +svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u32i {{.*}} -> !cir.vector<[4] x !u32i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u32i) -> !cir.vector<[4] x !u32i> + +// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} align 4 +// LLVM_OGCG_CIR: store i32 [[OP]], ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u32,)(op); +} + +// ALL-LABEL: @test_svdup_n_u64 +svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u64i {{.*}} -> !cir.vector<[2] x !u64i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u64i) -> !cir.vector<[2] x !u64i> + +// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} align 8 +// LLVM_OGCG_CIR: store i64 [[OP]], ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u64,)(op); +} + +// ALL-LABEL: @test_svdup_n_f16 +svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !cir.f16 {{.*}} -> !cir.vector<[8] x !cir.f16> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!cir.f16) -> !cir.vector<[8] x !cir.f16> + +// LLVM_OGCG_CIR-SAME: half {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: store half [[OP]], ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load half, ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_f16,)(op); +} + +// ALL-LABEL: @test_svdup_n_f32 +svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !cir.float {{.*}} -> !cir.vector<[4] x !cir.float> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!cir.float) -> !cir.vector<[4] x !cir.float> + +// LLVM_OGCG_CIR-SAME: float {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} align 4 +// LLVM_OGCG_CIR: store float [[OP]], ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load float, ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_f32,)(op); +} + +// ALL-LABEL: @test_svdup_n_f64 +svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !cir.double {{.*}} -> !cir.vector<[2] x !cir.double> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!cir.double) -> !cir.vector<[2] x !cir.double> + +// LLVM_OGCG_CIR-SAME: double {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} align 8 +// LLVM_OGCG_CIR: store double [[OP]], ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load double, ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_f64,)(op); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
