[clang] [CIR][AArch64] Add lowering for unpredicated svdup builtins (PR #174433)

Andrzej Warzyński via cfe-commits Mon, 05 Jan 2026 08:38:24 -0800

https://github.com/banach-space created 
https://github.com/llvm/llvm-project/pull/174433


This PR adds CIR lowering support for unpredicated `svdup` SVE builtins.
The corresponding ACLE intrinsics are documented at:
* https://developer.arm.com/architectures/instruction-sets/intrinsics

(search for svdup).

Since LLVM provides a direct intrinsic for svdup with a 1:1 mapping, CIR
lowers these builtins by emitting a call to the corresponding LLVM
intrinsic.

DESIGN NOTES
------------
With this change, any ACLE intrinsic that has a corresponding LLVM
intrinsic can, in principle, be lowered successfully by CIR. This
improves code reuse by avoiding duplication of intrinsic definitions and
instead reusing LLVM’s intrinsic metadata.

One consequence of this approach is that CIR will no longer emit NYI
diagnostics for such intrinsics: if a mapping exists, the intrinsic will
be silently lowered.

IMPLEMENTATION NOTES
--------------------
* Intrinsic discovery logic mirrors the approach in
  CodeGen/TargetBuiltins/ARM.cpp, but is simplified since CIR only
  requires the intrinsic name.
* Test inputs are copied from the existing svdup tests:
  tests/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c.
* The LLVM IR produced _with_ and _without_ `-fclangir` is identical,
  modulo basic block labels, SROA, and function attributes.

EXAMPLE LOWERING
----------------
Input:
```C

svint8_t test_svdup_n_s8(int8_t op)
{
  return svdup_n_s8)(op);
}
```

OUTPUT 1 (default):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 noundef %op) #0 {
entry:
  %op.addr = alloca i8, align 1
  store i8 %op, ptr %op.addr, align 1
  %0 = load i8, ptr %op.addr, align 1
  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %0)
  ret <vscale x 16 x i8> %1
}
```

OUTPUT 2 (via `-fclangir`):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 %0) #0 {
  %2 = alloca i8, i64 1, align 1
  %3 = alloca <vscale x 16 x i8>, i64 1, align 16
  store i8 %0, ptr %2, align 1
  %4 = load i8, ptr %2, align 1
  %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %4)
  store <vscale x 16 x i8> %5, ptr %3, align 16
  %6 = load <vscale x 16 x i8>, ptr %3, align 16
  ret <vscale x 16 x i8> %6
}
```


From 19b1297728da91187905f6592053ed7acc84670f Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Mon, 5 Jan 2026 09:29:53 +0000
Subject: [PATCH] [CIR][AArch64] Add lowering for unpredicated svdup builtins
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR adds CIR lowering support for unpredicated `svdup` SVE builtins.
The corresponding ACLE intrinsics are documented at:
* https://developer.arm.com/architectures/instruction-sets/intrinsics

(search for svdup).

Since LLVM provides a direct intrinsic for svdup with a 1:1 mapping, CIR
lowers these builtins by emitting a call to the corresponding LLVM
intrinsic.

DESIGN NOTES
------------
With this change, any ACLE intrinsic that has a corresponding LLVM
intrinsic can, in principle, be lowered successfully by CIR. This
improves code reuse by avoiding duplication of intrinsic definitions and
instead reusing LLVM’s intrinsic metadata.

One consequence of this approach is that CIR will no longer emit NYI
diagnostics for such intrinsics: if a mapping exists, the intrinsic will
be silently lowered.

IMPLEMENTATION NOTES
--------------------
* Intrinsic discovery logic mirrors the approach in
  CodeGen/TargetBuiltins/ARM.cpp, but is simplified since CIR only
  requires the intrinsic name.
* Test inputs are copied from the existing svdup tests:
  tests/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c.
* The LLVM IR produced _with_ and _without_ `-fclangir` is identical,
  modulo basic block labels, SROA, and function attributes.

EXAMPLE LOWERING
----------------
Input:
```C

svint8_t test_svdup_n_s8(int8_t op)
{
  return svdup_n_s8)(op);
}
```

OUTPUT 1 (default):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 noundef %op) #0 {
entry:
  %op.addr = alloca i8, align 1
  store i8 %op, ptr %op.addr, align 1
  %0 = load i8, ptr %op.addr, align 1
  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %0)
  ret <vscale x 16 x i8> %1
}
```

OUTPUT 2 (via `-fclangir`):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 %0) #0 {
  %2 = alloca i8, i64 1, align 1
  %3 = alloca <vscale x 16 x i8>, i64 1, align 16
  store i8 %0, ptr %2, align 1
  %4 = load i8, ptr %2, align 1
  %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %4)
  store <vscale x 16 x i8> %5, ptr %3, align 16
  %6 = load <vscale x 16 x i8>, ptr %3, align 16
  ret <vscale x 16 x i8> %6
}
```
---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  71 ++++++
 .../CodeGenBuiltins/AArch64/acle_sve_dup.c    | 211 ++++++++++++++++++
 2 files changed, 282 insertions(+)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index e28b3c6cdc2ff..f2e448917aae9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -52,6 +52,51 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc,
                                builder.getUInt64(scalingFactor, loc));
 }
 
+static bool aarch64SVEIntrinsicsProvenSorted = false;
+
+namespace {
+struct aarc64BuiltinInfo {
+  unsigned builtinID;
+  unsigned LLVMIntrinsic;
+
+  bool operator<(unsigned RHSbuiltinID) const {
+    return builtinID < RHSbuiltinID;
+  }
+  bool operator<(const aarc64BuiltinInfo &TE) const {
+    return builtinID < TE.builtinID;
+  }
+};
+} // end anonymous namespace
+
+#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         
\
+  {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic}
+
+#define SVEMAP2(NameBase, TypeModifier) {SVE::BI__builtin_sve_##NameBase, 0}
+static const aarc64BuiltinInfo aarch64SVEIntrinsicMap[] = {
+#define GET_SVE_LLVM_INTRINSIC_MAP
+#include "clang/Basic/arm_sve_builtin_cg.inc"
+#undef GET_SVE_LLVM_INTRINSIC_MAP
+};
+
+static const aarc64BuiltinInfo *
+findARMVectorIntrinsicInMap(ArrayRef<aarc64BuiltinInfo> intrinsicMap,
+                            unsigned builtinID, bool &mapProvenSorted) {
+
+#ifndef NDEBUG
+  if (!mapProvenSorted) {
+    assert(llvm::is_sorted(intrinsicMap));
+    mapProvenSorted = true;
+  }
+#endif
+
+  const aarc64BuiltinInfo *info = llvm::lower_bound(intrinsicMap, builtinID);
+
+  if (info != intrinsicMap.end() && info->builtinID == builtinID)
+    return info;
+
+  return nullptr;
+}
+
 std::optional<mlir::Value>
 CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
                                           const CallExpr *expr) {
@@ -65,7 +110,26 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
 
   assert(!cir::MissingFeatures::aarch64SVEIntrinsics());
 
+  auto *builtinIntrInfo = findARMVectorIntrinsicInMap(
+      aarch64SVEIntrinsicMap, builtinID, aarch64SVEIntrinsicsProvenSorted);
+
+  // The operands of the builtin call
+  llvm::SmallVector<mlir::Value> ops;
+
+  for (const auto *argExpr : expr->arguments())
+    ops.push_back(emitScalarExpr(argExpr));
+
   mlir::Location loc = getLoc(expr->getExprLoc());
+  if (builtinIntrInfo->LLVMIntrinsic) {
+    std::string llvmIntrName(Intrinsic::getBaseName(
+        (llvm::Intrinsic::ID)builtinIntrInfo->LLVMIntrinsic));
+
+    llvmIntrName.erase(0, /*std::strlen(".llvm")=*/5);
+
+    return emitIntrinsicCallOp(builder, loc, llvmIntrName,
+                               convertType(expr->getType()),
+                               mlir::ValueRange{ops});
+  }
 
   switch (builtinID) {
   default:
@@ -103,10 +167,12 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
   case SVE::BI__builtin_sve_svpmullb_u64:
   case SVE::BI__builtin_sve_svpmullb_n_u16:
   case SVE::BI__builtin_sve_svpmullb_n_u64:
+
   case SVE::BI__builtin_sve_svdup_n_b8:
   case SVE::BI__builtin_sve_svdup_n_b16:
   case SVE::BI__builtin_sve_svdup_n_b32:
   case SVE::BI__builtin_sve_svdup_n_b64:
+
   case SVE::BI__builtin_sve_svdupq_n_b8:
   case SVE::BI__builtin_sve_svdupq_n_b16:
   case SVE::BI__builtin_sve_svdupq_n_b32:
@@ -129,22 +195,27 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
                  std::string("unimplemented AArch64 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
+
   case SVE::BI__builtin_sve_svlen_u8:
   case SVE::BI__builtin_sve_svlen_s8:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 
16);
+
   case SVE::BI__builtin_sve_svlen_u16:
   case SVE::BI__builtin_sve_svlen_s16:
   case SVE::BI__builtin_sve_svlen_f16:
   case SVE::BI__builtin_sve_svlen_bf16:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8);
+
   case SVE::BI__builtin_sve_svlen_u32:
   case SVE::BI__builtin_sve_svlen_s32:
   case SVE::BI__builtin_sve_svlen_f32:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4);
+
   case SVE::BI__builtin_sve_svlen_u64:
   case SVE::BI__builtin_sve_svlen_s64:
   case SVE::BI__builtin_sve_svlen_f64:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2);
+
   case SVE::BI__builtin_sve_svtbl2_u8:
   case SVE::BI__builtin_sve_svtbl2_s8:
   case SVE::BI__builtin_sve_svtbl2_u16:
diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c 
b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
new file mode 100644
index 0000000000000..3e0a892d6b368
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -0,0 +1,211 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// ALL-LABEL: @test_svdup_n_s8
+svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s8i {{.*}} -> !cir.vector<[16] x !s8i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s8i) -> !cir.vector<[16] x !s8i>
+
+// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
+// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s8,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s16
+svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s16i {{.*}} -> !cir.vector<[8] x !s16i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s16i) -> !cir.vector<[8] x !s16i>
+
+// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s32
+svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s32i {{.*}} -> !cir.vector<[4] x !s32i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s32i) -> !cir.vector<[4] x !s32i>
+
+// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s64
+svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s64i {{.*}} -> !cir.vector<[2] x !s64i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s64i) -> !cir.vector<[2] x !s64i>
+
+// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s64,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u8
+svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u8i {{.*}} -> !cir.vector<[16] x !u8i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u8i) -> !cir.vector<[16] x !u8i>
+
+// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
+// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u8,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u16
+svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u16i {{.*}} -> !cir.vector<[8] x !u16i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u16i) -> !cir.vector<[8] x !u16i>
+
+// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u32
+svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u32i {{.*}} -> !cir.vector<[4] x !u32i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u32i) -> !cir.vector<[4] x !u32i>
+
+// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u64
+svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u64i {{.*}} -> !cir.vector<[2] x !u64i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u64i) -> !cir.vector<[2] x !u64i>
+
+// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u64,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f16
+svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.f16 {{.*}} -> !cir.vector<[8] x !cir.f16>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.f16) -> !cir.vector<[8] x !cir.f16>
+
+// LLVM_OGCG_CIR-SAME: half {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store half [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load half, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.x.nxv8f16(half [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f32
+svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.float {{.*}} -> !cir.vector<[4] x 
!cir.float>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.float) -> !cir.vector<[4] x !cir.float>
+
+// LLVM_OGCG_CIR-SAME: float {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store float [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load float, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.x.nxv4f32(float [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f64
+svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.double {{.*}} -> !cir.vector<[2] x 
!cir.double>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.double) -> !cir.vector<[2] x !cir.double>
+
+// LLVM_OGCG_CIR-SAME: double {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store double [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load double, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f64,)(op);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AArch64] Add lowering for unpredicated svdup builtins (PR #174433)

Reply via email to