llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mlir Author: Andrzej Warzyński (banach-space) <details> <summary>Changes</summary> - **[mlir] Fix alignment for predicate (i1) vectors** - **[CIR][AArch64] Add lowering for predicated SVE svdup builtins (zeroing)** --- Patch is 35.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175976.diff 4 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+86-9) - (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+2) - (modified) clang/lib/CIR/CodeGen/CIRGenTypes.cpp (+4) - (modified) clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c (+472-5) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 93089eb585aa7..d59d3bebe0bb0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -126,6 +126,81 @@ bool CIRGenFunction::getAArch64SVEProcessedOperands( return true; } +// Reinterpret the input predicate so that it can be used to correctly isolate +// the elements of the specified datatype. +mlir::Value CIRGenFunction::emitSVEpredicateCast(mlir::Value *pred, + unsigned minNumElts, + mlir::Location loc) { + + // TODO: Handle "aarch64.svcount" once we get round to supporting SME. + + auto retTy = cir::VectorType::get(builder.getUIntNTy(1), minNumElts, + /*is_scalable=*/true); + if (pred->getType() == retTy) + return *pred; + + unsigned intID; + mlir::Type intrinsicTy; + switch (minNumElts) { + default: + llvm_unreachable("unsupported element count!"); + case 1: + case 2: + case 4: + case 8: + intID = Intrinsic::aarch64_sve_convert_from_svbool; + intrinsicTy = retTy; + break; + case 16: + intID = Intrinsic::aarch64_sve_convert_to_svbool; + intrinsicTy = pred->getType(); + break; + } + + std::string llvmIntrName(Intrinsic::getBaseName(intID)); + llvmIntrName.erase(0, /*std::strlen(".llvm")=*/5); + auto call = emitIntrinsicCallOp(builder, loc, llvmIntrName, retTy, + mlir::ValueRange{*pred}); + assert(call.getType() == retTy && "Unexpected return type!"); + return call; +} + +// Return the element count for +static unsigned getSVEMinEltCount(const clang::SVETypeFlags::EltType &sveType) { + switch (sveType) { + default: + llvm_unreachable("Invalid SVETypeFlag!"); + + case SVETypeFlags::EltTyInt8: + return 16; + case SVETypeFlags::EltTyInt16: + return 8; + case SVETypeFlags::EltTyInt32: + return 4; + case SVETypeFlags::EltTyInt64: + return 2; + + case SVETypeFlags::EltTyMFloat8: + return 16; + case SVETypeFlags::EltTyFloat16: + case SVETypeFlags::EltTyBFloat16: + return 8; + case SVETypeFlags::EltTyFloat32: + return 4; + case SVETypeFlags::EltTyFloat64: + return 2; + + case SVETypeFlags::EltTyBool8: + return 16; + case SVETypeFlags::EltTyBool16: + return 8; + case SVETypeFlags::EltTyBool32: + return 4; + case SVETypeFlags::EltTyBool64: + return 2; + } +} + std::optional<mlir::Value> CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, const CallExpr *expr) { @@ -171,10 +246,12 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); - if (typeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AArch64 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); + // Zero-ing predication + if (typeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) { + auto null = builder.getNullValue(convertType(expr->getType()), + getLoc(expr->getExprLoc())); + ops.insert(ops.begin(), null); + } if (typeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) cgm.errorNYI(expr->getSourceRange(), @@ -194,11 +271,11 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, // Predicates must match the main datatype. for (mlir::Value &op : ops) - if (auto predTy = dyn_cast<mlir::VectorType>(op.getType())) - if (predTy.getElementType().isInteger(1)) - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AArch64 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); + if (auto predTy = dyn_cast<cir::VectorType>(op.getType())) + if (auto cirInt = dyn_cast<cir::IntType>(predTy.getElementType())) + if (cirInt.getWidth() == 1) + op = emitSVEpredicateCast( + &op, getSVEMinEltCount(typeFlags.getEltType()), loc); // Splat scalar operand to vector (intrinsics with _n infix) if (typeFlags.hasSplatOperand()) { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 5fe1d9a4f2b76..86d2a8c4ac089 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1269,6 +1269,8 @@ class CIRGenFunction : public CIRGenTypeCache { bool getAArch64SVEProcessedOperands(unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> &ops, clang::SVETypeFlags typeFlags); + mlir::Value emitSVEpredicateCast(mlir::Value *pred, unsigned minNumElts, + mlir::Location loc); std::optional<mlir::Value> emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ReturnValueSlot returnValue, diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index 985c2901a7b04..f6220c616ed60 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -373,6 +373,10 @@ mlir::Type CIRGenTypes::convertType(QualType type) { resultType = cir::VectorType::get(builder.getDoubleTy(), 2, /*is_scalable=*/true); break; + case BuiltinType::SveBool: + resultType = cir::VectorType::get(builder.getUIntNTy(1), 16, + /*is_scalable=*/true); + break; // Unsigned integral types. case BuiltinType::Char8: diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c index 3e0a892d6b368..60a2992ab14ad 100644 --- a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c +++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c @@ -1,13 +1,13 @@ // REQUIRES: aarch64-registered-target - +// // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT #include <arm_sve.h> #if defined __ARM_FEATURE_SME @@ -209,3 +209,470 @@ svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR // LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]]) return SVE_ACLE_FUNC(svdup,_n,_f64,)(op); } + +// ALL-LABEL: @test_svdup_n_s8_z +svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op) MODE_ATTR +{ +// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>> +// CIR-SAME: %[[OP:.*]]: !s8i +// CIR-SAME: -> !cir.vector<[16] x !s8i> +// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>> +// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s8i +// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[16] x !s8i> +// CIR: cir.store %[[PG]], %[[ALLOCA_PG]] +// CIR: cir.store %[[OP]], %[[ALLOCA_OP]] +// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] +// CIR: %[[LOAD_OP:.*]] = cir.load align(1) %[[ALLOCA_OP]] +// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[16] x !s8i> +// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[LOAD_PG]], %[[LOAD_OP]] +// CIR-SAME: -> !cir.vector<[16] x !s8i> +// CIR: cir.store %[[CONVERT_PG]], %[[ALLOCA_RES]] +// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]] +// CIR: cir.return %[[RES]] + +// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1 +// +// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 16 x i8>,{{([[:space:]]?i64 1,)?}} align 16 +// +// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i8, ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[TMP0]], i8 [[TMP1]]) +// +// LLVM_DIRECT: ret {{.*}} [[TMP2]] +// +// LLVM_VIA_CIR: store {{.*}} [[TMP2]], ptr [[RES_ADDR]] +// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]] +// LLVM_VIA_CIR: ret {{.*}} [[RES]] + return SVE_ACLE_FUNC(svdup,_n,_s8_z,)(pg, op); +} + +// ALL-LABEL: @test_svdup_n_s16_z( +svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op) MODE_ATTR +{ +// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>> +// CIR-SAME: %[[OP:.*]]: !s16i +// CIR-SAME: -> !cir.vector<[8] x !s16i> +// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>> +// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s16i +// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[8] x !s16i> +// CIR: cir.store %[[PG]], %[[ALLOCA_PG]] +// CIR: cir.store %[[OP]], %[[ALLOCA_OP]] +// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] +// CIR: %[[LOAD_OP:.*]] = cir.load align(2) %[[ALLOCA_OP]] +// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[8] x !s16i> +// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.convert.from.svbool" %[[LOAD_PG]] +// CIR-SAME: -> !cir.vector<[8] x !cir.int<u, 1>> +// CIR: %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]] +// CIR-SAME: -> !cir.vector<[8] x !s16i> +// CIR: cir.store %[[CALL_DUP]], %[[ALLOCA_RES]] +// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]] +// CIR: cir.return %[[RES]] + +// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} align 2 +// +// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 8 x i16>,{{([[:space:]]?i64 1,)?}} align 16 +// +// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: store i16 [[OP]], ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i16, ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]]) +// LLVM_OGCG_CIR: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP2]], i16 [[TMP1]]) +// +// LLVM_DIRECT: ret {{.*}} [[TMP3]] +// +// LLVM_VIA_CIR: store {{.*}} [[TMP3]], ptr [[RES_ADDR]] +// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]] +// LLVM_VIA_CIR: ret {{.*}} [[RES]] + return SVE_ACLE_FUNC(svdup,_n,_s16_z,)(pg, op); +} + +// ALL-LABEL: @test_svdup_n_s32_z( +svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op) MODE_ATTR +{ +// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>> +// CIR-SAME: %[[OP:.*]]: !s32i +// CIR-SAME: -> !cir.vector<[4] x !s32i> +// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>> +// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s32i +// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[4] x !s32i> +// CIR: cir.store %[[PG]], %[[ALLOCA_PG]] +// CIR: cir.store %[[OP]], %[[ALLOCA_OP]] +// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] +// CIR: %[[LOAD_OP:.*]] = cir.load align(4) %[[ALLOCA_OP]] +// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[4] x !s32i> +// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.convert.from.svbool" %[[LOAD_PG]] +// CIR-SAME: -> !cir.vector<[4] x !cir.int<u, 1>> +// CIR: %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]] +// CIR-SAME: -> !cir.vector<[4] x !s32i> +// CIR: cir.store %[[CALL_DUP]], %[[ALLOCA_RES]] +// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]] +// CIR: cir.return %[[RES]] + +// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} align 4 +// +// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 4 x i32>,{{([[:space:]]?i64 1,)?}} align 16 +// +// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: store i32 [[OP]], ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i32, ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]]) +// LLVM_OGCG_CIR: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP2]], i32 [[TMP1]]) +// +// LLVM_DIRECT: ret {{.*}} [[TMP3]] +// +// LLVM_VIA_CIR: store {{.*}} [[TMP3]], ptr [[RES_ADDR]] +// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]] +// LLVM_VIA_CIR: ret {{.*}} [[RES]] + return SVE_ACLE_FUNC(svdup,_n,_s32_z,)(pg, op); +} + +// ALL-LABEL: @test_svdup_n_s64_z( +svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op) MODE_ATTR +{ +// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>> +// CIR-SAME: %[[OP:.*]]: !s64i +// CIR-SAME: -> !cir.vector<[2] x !s64i> +// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>> +// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s64i +// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[2] x !s64i> +// CIR: cir.store %[[PG]], %[[ALLOCA_PG]] +// CIR: cir.store %[[OP]], %[[ALLOCA_OP]] +// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] +// CIR: %[[LOAD_OP:.*]] = cir.load align(8) %[[ALLOCA_OP]] +// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[2] x !s64i> +// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.convert.from.svbool" %[[LOAD_PG]] +// CIR-SAME: -> !cir.vector<[2] x !cir.int<u, 1>> +// CIR: %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]] +// CIR-SAME: -> !cir.vector<[2] x !s64i> +// CIR: cir.store %[[CALL_DUP]], %[[ALLOCA_RES]] +// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]] +// CIR: cir.return %[[RES]] + +// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} align 8 +// +// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 2 x i64>,{{([[:space:]]?i64 1,)?}} align 16 +// +// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: store i64 [[OP]], ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i64, ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]]) +// LLVM_OGCG_CIR: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP2]], i64 [[TMP1]]) +// +// LLVM_DIRECT: ret {{.*}} [[TMP3]] +// +// LLVM_VIA_CIR: store {{.*}} [[TMP3]], ptr [[RES_ADDR]] +// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]] +// LLVM_VIA_CIR: ret {{.*}} [[RES]] + return SVE_ACLE_FUNC(svdup,_n,_s64_z,)(pg, op); +} + +// ALL-LABEL: @test_svdup_n_u8_z( +svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op) MODE_ATTR +{ +// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>> +// CIR-SAME: %[[OP:.*]]: !u8i +// CIR-SAME: -> !cir.vector<[16] x !u8i> +// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>> +// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !u8i +// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[16] x !u8i> +// CIR: cir.store %[[PG]], %[[ALLOCA_PG]] +// CIR: cir.store %[[OP]], %[[ALLOCA_OP]] +// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] +// CIR: %[[LOAD_OP:.*]] = cir.load align(1) %[[ALLOCA_OP]] +// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[16] x !u8i> +// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[LOAD_PG]], %[[LOAD_OP]] +// CIR-SAME: -> !cir.vector<[16] x !u8i> +// CIR: cir.store %[[CONVERT_PG]], %[[ALLOCA_RES]] +// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]] +// CIR: cir.return %[[RES]] + +// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1 +// +// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 16 x i8>,{{([[:space:]]?i64 1,)?}} align 16 +// +// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2 +// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i8, ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[TMP0]], i8 [[TMP1]]) +// +// LLVM_DIRECT: ret {{.*}} [[TMP2]] +// +// LLVM_VIA_CIR: store {{.*}} [[TMP2]], ptr [[RES_ADDR]] +// LLVM_VIA_CIR: ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/175976 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
