[clang] [CIR][Aarch64] upstream scalar & vector intrinsics (FP16) (PR #190310)

Rafe Murray via cfe-commits Wed, 08 Apr 2026 17:57:34 -0700

https://github.com/rafe-murray updated 
https://github.com/llvm/llvm-project/pull/190310


>From 878e98c1a5887e1d8ef0c0b552aff5e5a245bce2 Mon Sep 17 00:00:00 2001
From: Rafe Murray <[email protected]>
Date: Thu, 12 Mar 2026 17:19:27 -0700
Subject: [PATCH 1/5] [CIR][Aarch64] upstream scalar & vector intrinsics (FP16)

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  | 16 ++++--
 .../CodeGen/AArch64/neon/fp16_intrinsics.c    | 57 +++++++++++++++++++
 2 files changed, 69 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index d9d303cd07b92..ce7b3fa9f9877 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2122,14 +2122,22 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vdups_laneq_f32:
   case NEON::BI__builtin_neon_vgetq_lane_f64:
   case NEON::BI__builtin_neon_vdupd_laneq_f64:
-  case NEON::BI__builtin_neon_vaddh_f16:
-  case NEON::BI__builtin_neon_vsubh_f16:
-  case NEON::BI__builtin_neon_vmulh_f16:
-  case NEON::BI__builtin_neon_vdivh_f16:
     cgm.errorNYI(expr->getSourceRange(),
                  std::string("unimplemented AArch64 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
+  case NEON::BI__builtin_neon_vaddh_f16:
+    ops.push_back(emitScalarExpr(expr->getArg(1)));
+    return builder.createFAdd(loc, ops[0], ops[1]);
+  case NEON::BI__builtin_neon_vsubh_f16:
+    ops.push_back(emitScalarExpr(expr->getArg(1)));
+    return builder.createFSub(loc, ops[0], ops[1]);
+  case NEON::BI__builtin_neon_vmulh_f16:
+    ops.push_back(emitScalarExpr(expr->getArg(1)));
+    return builder.createFMul(loc, ops[0], ops[1]);
+  case NEON::BI__builtin_neon_vdivh_f16:
+    ops.push_back(emitScalarExpr(expr->getArg(1)));
+    return builder.createFDiv(loc, ops[0], ops[1]);
   case NEON::BI__builtin_neon_vfmah_f16:
     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
     std::rotate(ops.begin(), ops.begin() + 1, ops.end());
diff --git a/clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c
new file mode 100644
index 0000000000000..8077d914cce81
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c
@@ -0,0 +1,57 @@
+#include <arm_fp16.h>
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// LLVM-LABEL: @test_vaddh_f16(
+// CIR-LABEL: @test_vaddh_f16(
+float16_t test_vaddh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.add {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.*]], half{{.*}} [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// LLVM:  [[ADD:%.*]] = fadd half [[A]], [[B]]
+// LLVM:  ret half [[ADD]]
+  return vaddh_f16(a, b);
+}
+
+// LLVM-LABEL: @test_vsubh_f16(
+// CIR-LABEL: @test_vsubh_f16(
+float16_t test_vsubh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.sub {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) #[[ATTR0:[0-9]+]] {
+// LLVM:  [[SUB:%.*]] = fsub half [[A]], [[B]]
+// LLVM:  ret half [[SUB]]
+  return vsubh_f16(a, b);
+}
+
+// LLVM-LABEL: @test_vmulh_f16(
+// CIR-LABEL: @test_vmulh_f16(
+float16_t test_vmulh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.mul {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) #[[ATTR0:[0-9]+]] {
+// LLVM:  [[MUL:%.*]] = fmul half [[A]], [[B]]
+// LLVM:  ret half [[MUL]]
+  return vmulh_f16(a, b);
+}
+
+// LLVM-LABEL: @test_vdivh_f16(
+// CIR-LABEL: @test_vdivh_f16(
+float16_t test_vdivh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.div {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) #[[ATTR0:[0-9]+]] {
+// LLVM:  [[DIV:%.*]] = fdiv half [[A]], [[B]]
+// LLVM:  ret half [[DIV]]
+  return vdivh_f16(a, b);
+}

>From f198c4ff06a6179e6f265ee2f7fa71f17371e8c9 Mon Sep 17 00:00:00 2001
From: Rafe Murray <[email protected]>
Date: Wed, 8 Apr 2026 11:38:56 -0700
Subject: [PATCH 2/5] Remove redundant operand handling

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index ce7b3fa9f9877..62a1fb16c0a44 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2127,16 +2127,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
   case NEON::BI__builtin_neon_vaddh_f16:
-    ops.push_back(emitScalarExpr(expr->getArg(1)));
     return builder.createFAdd(loc, ops[0], ops[1]);
   case NEON::BI__builtin_neon_vsubh_f16:
-    ops.push_back(emitScalarExpr(expr->getArg(1)));
     return builder.createFSub(loc, ops[0], ops[1]);
   case NEON::BI__builtin_neon_vmulh_f16:
-    ops.push_back(emitScalarExpr(expr->getArg(1)));
     return builder.createFMul(loc, ops[0], ops[1]);
   case NEON::BI__builtin_neon_vdivh_f16:
-    ops.push_back(emitScalarExpr(expr->getArg(1)));
     return builder.createFDiv(loc, ops[0], ops[1]);
   case NEON::BI__builtin_neon_vfmah_f16:
     // NEON intrinsic puts accumulator first, unlike the LLVM fma.

>From c2152c2c7d972ce60931293f11cfd84f8f10c209 Mon Sep 17 00:00:00 2001
From: Rafe Murray <[email protected]>
Date: Wed, 8 Apr 2026 11:50:02 -0700
Subject: [PATCH 3/5] Move fp16 tests to existing file and add block comments

---
 .../CodeGen/AArch64/neon/fp16_intrinsics.c    | 57 -------------------
 clang/test/CodeGen/AArch64/neon/fullfp16.c    | 56 ++++++++++++++++++
 2 files changed, 56 insertions(+), 57 deletions(-)
 delete mode 100644 clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c

diff --git a/clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c
deleted file mode 100644
index 8077d914cce81..0000000000000
--- a/clang/test/CodeGen/AArch64/neon/fp16_intrinsics.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <arm_fp16.h>
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \
-// RUN:    -fclangir -disable-O0-optnone \
-// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \
-// RUN:    -fclangir -disable-O0-optnone \
-// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-// LLVM-LABEL: @test_vaddh_f16(
-// CIR-LABEL: @test_vaddh_f16(
-float16_t test_vaddh_f16(float16_t a, float16_t b) {
-// CIR: {{%.*}} = cir.add {{%.*}}, {{%.*}} : !cir.f16
-
-// LLVM-SAME: half {{.*}} [[A:%.*]], half{{.*}} [[B:%.*]]) #[[ATTR0:[0-9]+]] {
-// LLVM:  [[ADD:%.*]] = fadd half [[A]], [[B]]
-// LLVM:  ret half [[ADD]]
-  return vaddh_f16(a, b);
-}
-
-// LLVM-LABEL: @test_vsubh_f16(
-// CIR-LABEL: @test_vsubh_f16(
-float16_t test_vsubh_f16(float16_t a, float16_t b) {
-// CIR: {{%.*}} = cir.sub {{%.*}}, {{%.*}} : !cir.f16
-
-// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) #[[ATTR0:[0-9]+]] {
-// LLVM:  [[SUB:%.*]] = fsub half [[A]], [[B]]
-// LLVM:  ret half [[SUB]]
-  return vsubh_f16(a, b);
-}
-
-// LLVM-LABEL: @test_vmulh_f16(
-// CIR-LABEL: @test_vmulh_f16(
-float16_t test_vmulh_f16(float16_t a, float16_t b) {
-// CIR: {{%.*}} = cir.mul {{%.*}}, {{%.*}} : !cir.f16
-
-// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) #[[ATTR0:[0-9]+]] {
-// LLVM:  [[MUL:%.*]] = fmul half [[A]], [[B]]
-// LLVM:  ret half [[MUL]]
-  return vmulh_f16(a, b);
-}
-
-// LLVM-LABEL: @test_vdivh_f16(
-// CIR-LABEL: @test_vdivh_f16(
-float16_t test_vdivh_f16(float16_t a, float16_t b) {
-// CIR: {{%.*}} = cir.div {{%.*}}, {{%.*}} : !cir.f16
-
-// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) #[[ATTR0:[0-9]+]] {
-// LLVM:  [[DIV:%.*]] = fdiv half [[A]], [[B]]
-// LLVM:  ret half [[DIV]]
-  return vdivh_f16(a, b);
-}
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c 
b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index 619d07538eaaf..dfcd9c1da6545 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -33,6 +33,62 @@
 
 #include <arm_fp16.h>
 
+//===------------------------------------------------------===//
+// 2.5.1.1.  Addition
+//===------------------------------------------------------===//
+// LLVM-LABEL: @test_vaddh_f16(
+// CIR-LABEL: @test_vaddh_f16(
+float16_t test_vaddh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.add {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.*]], half{{.*}} [[B:%.*]]) {{.*}} {
+// LLVM:  [[ADD:%.*]] = fadd half [[A]], [[B]]
+// LLVM:  ret half [[ADD]]
+  return vaddh_f16(a, b);
+}
+
+//===------------------------------------------------------===//
+// 2.5.10.1.  Subtraction
+//===------------------------------------------------------===//
+// LLVM-LABEL: @test_vsubh_f16(
+// CIR-LABEL: @test_vsubh_f16(
+float16_t test_vsubh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.sub {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) {{.*}} {
+// LLVM:  [[SUB:%.*]] = fsub half [[A]], [[B]]
+// LLVM:  ret half [[SUB]]
+  return vsubh_f16(a, b);
+}
+
+//===------------------------------------------------------===//
+// 2.5.9.1.  Multiplication
+//===------------------------------------------------------===//
+// LLVM-LABEL: @test_vmulh_f16(
+// CIR-LABEL: @test_vmulh_f16(
+float16_t test_vmulh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.mul {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) {{.*}} {
+// LLVM:  [[MUL:%.*]] = fmul half [[A]], [[B]]
+// LLVM:  ret half [[MUL]]
+  return vmulh_f16(a, b);
+}
+
+//===------------------------------------------------------===//
+// 2.5.1.6.  Division
+//===------------------------------------------------------===//
+// LLVM-LABEL: @test_vdivh_f16(
+// CIR-LABEL: @test_vdivh_f16(
+float16_t test_vdivh_f16(float16_t a, float16_t b) {
+// CIR: {{%.*}} = cir.div {{%.*}}, {{%.*}} : !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.]], half {{.*}} [[B:%.]]) {{.*}} {
+// LLVM:  [[DIV:%.*]] = fdiv half [[A]], [[B]]
+// LLVM:  ret half [[DIV]]
+  return vdivh_f16(a, b);
+}
+
 //===------------------------------------------------------===//
 // 2.5.2.1.  Bitwise equal to zero
 //===------------------------------------------------------===//

>From abfa71cde07f792ee2243e0637b23ba35cebf731 Mon Sep 17 00:00:00 2001
From: Rafe Murray <[email protected]>
Date: Wed, 8 Apr 2026 11:55:44 -0700
Subject: [PATCH 4/5] Remove tests from v8.2a-fp16-intrinsics.c

---
 .../CodeGen/AArch64/v8.2a-fp16-intrinsics.c   | 28 -------------------
 1 file changed, 28 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c 
b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index c80d9e9d7f759..f1fd42a8ea26c 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -368,13 +368,6 @@ float16_t test_vsqrth_f16(float16_t a) {
   return vsqrth_f16(a);
 }
 
-// CHECK-LABEL: test_vaddh_f16
-// CHECK:  [[ADD:%.*]] = fadd half %a, %b
-// CHECK:  ret half [[ADD]]
-float16_t test_vaddh_f16(float16_t a, float16_t b) {
-  return vaddh_f16(a, b);
-}
-
 // CHECK-LABEL: test_vabdh_f16
 // CHECK:  [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half %a, half 
%b)
 // CHECK:  ret half [[ABD]]
@@ -542,13 +535,6 @@ int64_t test_vcvth_n_u64_f16(float16_t a) {
   return vcvth_n_u64_f16(a, 1);
 }
 
-// CHECK-LABEL: test_vdivh_f16
-// CHECK:  [[DIV:%.*]] = fdiv half %a, %b
-// CHECK:  ret half [[DIV]]
-float16_t test_vdivh_f16(float16_t a, float16_t b) {
-  return vdivh_f16(a, b);
-}
-
 // CHECK-LABEL: test_vmaxh_f16
 // CHECK:  [[MAX:%.*]] = call half @llvm.aarch64.neon.fmax.f16(half %a, half 
%b)
 // CHECK:  ret half [[MAX]]
@@ -577,13 +563,6 @@ float16_t test_vminnmh_f16(float16_t a, float16_t b) {
   return vminnmh_f16(a, b);
 }
 
-// CHECK-LABEL: test_vmulh_f16
-// CHECK:  [[MUL:%.*]] = fmul half %a, %b
-// CHECK:  ret half [[MUL]]
-float16_t test_vmulh_f16(float16_t a, float16_t b) {
-  return vmulh_f16(a, b);
-}
-
 // CHECK-LABEL: test_vmulxh_f16
 // CHECK:  [[MUL:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half 
%b)
 // CHECK:  ret half [[MUL]]
@@ -604,10 +583,3 @@ float16_t test_vrecpsh_f16(float16_t a, float16_t b) {
 float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) {
   return vrsqrtsh_f16(a, b);
 }
-
-// CHECK-LABEL: test_vsubh_f16
-// CHECK:  [[SUB:%.*]] = fsub half %a, %b
-// CHECK:  ret half [[SUB]]
-float16_t test_vsubh_f16(float16_t a, float16_t b) {
-  return vsubh_f16(a, b);
-}

>From bf7d1bd77b483ad3bcbcda8a03a68467a281bf86 Mon Sep 17 00:00:00 2001
From: Rafe Murray <[email protected]>
Date: Wed, 8 Apr 2026 17:49:31 -0700
Subject: [PATCH 5/5] Use ALL-LABEL over separate LLVM and CIR labels

---
 clang/test/CodeGen/AArch64/neon/fullfp16.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c 
b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index dfcd9c1da6545..db0b20fe62799 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -36,8 +36,7 @@
 //===------------------------------------------------------===//
 // 2.5.1.1.  Addition
 //===------------------------------------------------------===//
-// LLVM-LABEL: @test_vaddh_f16(
-// CIR-LABEL: @test_vaddh_f16(
+// ALL-LABEL: @test_vaddh_f16(
 float16_t test_vaddh_f16(float16_t a, float16_t b) {
 // CIR: {{%.*}} = cir.add {{%.*}}, {{%.*}} : !cir.f16
 
@@ -50,8 +49,7 @@ float16_t test_vaddh_f16(float16_t a, float16_t b) {
 //===------------------------------------------------------===//
 // 2.5.10.1.  Subtraction
 //===------------------------------------------------------===//
-// LLVM-LABEL: @test_vsubh_f16(
-// CIR-LABEL: @test_vsubh_f16(
+// ALL-LABEL: @test_vsubh_f16(
 float16_t test_vsubh_f16(float16_t a, float16_t b) {
 // CIR: {{%.*}} = cir.sub {{%.*}}, {{%.*}} : !cir.f16
 
@@ -64,8 +62,7 @@ float16_t test_vsubh_f16(float16_t a, float16_t b) {
 //===------------------------------------------------------===//
 // 2.5.9.1.  Multiplication
 //===------------------------------------------------------===//
-// LLVM-LABEL: @test_vmulh_f16(
-// CIR-LABEL: @test_vmulh_f16(
+// ALL-LABEL: @test_vmulh_f16(
 float16_t test_vmulh_f16(float16_t a, float16_t b) {
 // CIR: {{%.*}} = cir.mul {{%.*}}, {{%.*}} : !cir.f16
 
@@ -78,8 +75,7 @@ float16_t test_vmulh_f16(float16_t a, float16_t b) {
 //===------------------------------------------------------===//
 // 2.5.1.6.  Division
 //===------------------------------------------------------===//
-// LLVM-LABEL: @test_vdivh_f16(
-// CIR-LABEL: @test_vdivh_f16(
+// ALL-LABEL: @test_vdivh_f16(
 float16_t test_vdivh_f16(float16_t a, float16_t b) {
 // CIR: {{%.*}} = cir.div {{%.*}}, {{%.*}} : !cir.f16
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][Aarch64] upstream scalar & vector intrinsics (FP16) (PR #190310)

Reply via email to