https://github.com/DanAlejandroRodriguez updated https://github.com/llvm/llvm-project/pull/195085
>From e109901c0023af8c9c9968cfc845ef1a206156ae Mon Sep 17 00:00:00 2001 From: IAmCheese1231 <[email protected]> Date: Thu, 23 Apr 2026 22:01:56 -0400 Subject: [PATCH 1/8] implemented vqshrun builtin, added some basic tests --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 4 ++- clang/test/CodeGen/AArch64/neon/vqshrun.c | 28 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/AArch64/neon/vqshrun.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index ace8a5737f4bd..52ef0b36a2abe 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2540,11 +2540,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vmaxnmq_v: intrName = "aarch64.neon.fmaxnm"; return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); + case NEON::BI__builtin_neon_vqshrun_n_v: + intrName = "aarch64.neon.sqshrun"; + return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); case NEON::BI__builtin_neon_vmaxnmh_f16: case NEON::BI__builtin_neon_vrecpss_f32: case NEON::BI__builtin_neon_vrecpsd_f64: case NEON::BI__builtin_neon_vrecpsh_f16: - case NEON::BI__builtin_neon_vqshrun_n_v: case NEON::BI__builtin_neon_vqrshrun_n_v: case NEON::BI__builtin_neon_vqshrn_n_v: case NEON::BI__builtin_neon_vrshrn_n_v: diff --git a/clang/test/CodeGen/AArch64/neon/vqshrun.c b/clang/test/CodeGen/AArch64/neon/vqshrun.c new file mode 100644 index 0000000000000..016377b845183 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/vqshrun.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -emit-llvm -o - %s | FileCheck %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -fclangir -emit-cir -o - %s | FileCheck %s --check-prefix=CIR + +#include <arm_neon.h> + +// LLVM-LABEL: @test_vqshrun_n_s16( +// CIR-LABEL: @test_vqshrun_n_s16( +uint8x8_t test_vqshrun_n_s16(int16x8_t a) { + // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" + // LLVM: call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8 + return vqshrun_n_s16(a, 3); +} + +// LLVM-LABEL: @test_vqshrun_n_s32( +// CIR-LABEL: @test_vqshrun_n_s32( +uint16x4_t test_vqshrun_n_s32(int32x4_t a) { + // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" + // LLVM: call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16 + return vqshrun_n_s32(a, 9); +} + +// LLVM-LABEL: @test_vqshrun_n_s64( +// CIR-LABEL: @test_vqshrun_n_s64( +uint32x2_t test_vqshrun_n_s64(int64x2_t a) { + // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" + // LLVM: call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32 + return vqshrun_n_s64(a, 19); +} \ No newline at end of file >From cd1f96bf9a209784fa589095167996a67f75e2a9 Mon Sep 17 00:00:00 2001 From: Jing Wang <[email protected]> Date: Wed, 29 Apr 2026 12:36:01 -0400 Subject: [PATCH 2/8] [CIR][AArch64] Implement vqshrn_n NEON builtin --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 4 +- clang/test/CodeGen/AArch64/neon/vqshrn.c | 40 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/AArch64/neon/vqshrn.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 52ef0b36a2abe..1ffad5dc01d8e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2543,12 +2543,14 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vqshrun_n_v: intrName = "aarch64.neon.sqshrun"; return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); + case NEON::BI__builtin_neon_vqshrn_n_v: + intrName = "aarch64.neon.sqshrn"; + return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); case NEON::BI__builtin_neon_vmaxnmh_f16: case NEON::BI__builtin_neon_vrecpss_f32: case NEON::BI__builtin_neon_vrecpsd_f64: case NEON::BI__builtin_neon_vrecpsh_f16: case NEON::BI__builtin_neon_vqrshrun_n_v: - case NEON::BI__builtin_neon_vqshrn_n_v: case NEON::BI__builtin_neon_vrshrn_n_v: case NEON::BI__builtin_neon_vqrshrn_n_v: case NEON::BI__builtin_neon_vrndah_f16: diff --git a/clang/test/CodeGen/AArch64/neon/vqshrn.c b/clang/test/CodeGen/AArch64/neon/vqshrn.c new file mode 100644 index 0000000000000..bcc9f04374cf8 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/vqshrn.c @@ -0,0 +1,40 @@ +// REQUIRES: aarch64-registered-target || arm-registered-target +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -flax-vector-conversions=none \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=LLVM +// RUN: %if cir-enabled %{ %clang_cc1 -triple arm64-none-linux-gnu \ +// RUN: -target-feature +neon -disable-O0-optnone \ +// RUN: -flax-vector-conversions=none -fclangir -emit-cir \ +// RUN: -o - %s | FileCheck %s --check-prefix=CIR %} + +#include <arm_neon.h> + +// LLVM-LABEL: @test_vqshrn_n_s16( +// CIR-LABEL: @vqshrn_n_s16( +int8x8_t test_vqshrn_n_s16(int16x8_t a) { + // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.sqshrn" + // CIR-SAME: (!cir.vector<8 x !s16i>) -> !cir.vector<8 x !s8i> + // LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8 + // LLVM-NEXT: ret <8 x i8> [[RES]] + return vqshrn_n_s16(a, 3); +} + +// LLVM-LABEL: @test_vqshrn_n_s32( +// CIR-LABEL: @vqshrn_n_s32( +int16x4_t test_vqshrn_n_s32(int32x4_t a) { + // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.sqshrn" + // CIR-SAME: (!cir.vector<4 x !s32i>) -> !cir.vector<4 x !s16i> + // LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16 + // LLVM-NEXT: ret <4 x i16> [[RES]] + return vqshrn_n_s32(a, 9); +} + +// LLVM-LABEL: @test_vqshrn_n_s64( +// CIR-LABEL: @vqshrn_n_s64( +int32x2_t test_vqshrn_n_s64(int64x2_t a) { + // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.sqshrn" + // CIR-SAME: (!cir.vector<2 x !s64i>) -> !cir.vector<2 x !s32i> + // LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32 + // LLVM-NEXT: ret <2 x i32> [[RES]] + return vqshrn_n_s64(a, 19); +} \ No newline at end of file >From cb79049db229dcffcae346335ac9d4c77e1121ee Mon Sep 17 00:00:00 2001 From: Shreyas Hari <[email protected]> Date: Wed, 29 Apr 2026 20:29:03 -0400 Subject: [PATCH 3/8] [CIR][AArch64] Fix vqshrn_n unsigned NEON lowering --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 2 +- clang/test/CodeGen/AArch64/neon/vqshrn.c | 32 ++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 1ffad5dc01d8e..f268ae94a00dc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2544,7 +2544,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, intrName = "aarch64.neon.sqshrun"; return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); case NEON::BI__builtin_neon_vqshrn_n_v: - intrName = "aarch64.neon.sqshrn"; + intrName = usgn ? "aarch64.neon.uqshrn" : "aarch64.neon.sqshrn"; return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); case NEON::BI__builtin_neon_vmaxnmh_f16: case NEON::BI__builtin_neon_vrecpss_f32: diff --git a/clang/test/CodeGen/AArch64/neon/vqshrn.c b/clang/test/CodeGen/AArch64/neon/vqshrn.c index bcc9f04374cf8..9d37a7a213075 100644 --- a/clang/test/CodeGen/AArch64/neon/vqshrn.c +++ b/clang/test/CodeGen/AArch64/neon/vqshrn.c @@ -37,4 +37,34 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { // LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32 // LLVM-NEXT: ret <2 x i32> [[RES]] return vqshrn_n_s64(a, 19); -} \ No newline at end of file +} + +// LLVM-LABEL: @test_vqshrn_n_u16( +// CIR-LABEL: @vqshrn_n_u16( +uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { + // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.uqshrn" + // CIR-SAME: (!cir.vector<8 x !u16i>) -> !cir.vector<8 x !u8i> + // LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8 + // LLVM-NEXT: ret <8 x i8> [[RES]] + return vqshrn_n_u16(a, 3); +} + +// LLVM-LABEL: @test_vqshrn_n_u32( +// CIR-LABEL: @vqshrn_n_u32( +uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { + // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.uqshrn" + // CIR-SAME: (!cir.vector<4 x !u32i>) -> !cir.vector<4 x !u16i> + // LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16 + // LLVM-NEXT: ret <4 x i16> [[RES]] + return vqshrn_n_u32(a, 9); +} + +// LLVM-LABEL: @test_vqshrn_n_u64( +// CIR-LABEL: @vqshrn_n_u64( +uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { + // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.uqshrn" + // CIR-SAME: (!cir.vector<2 x !u64i>) -> !cir.vector<2 x !u32i> + // LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32 + // LLVM-NEXT: ret <2 x i32> [[RES]] + return vqshrn_n_u64(a, 19); +} >From bc97a2d505fd912ac920c5265d9a6a8a0c8d3981 Mon Sep 17 00:00:00 2001 From: IAmCheese1231 <[email protected]> Date: Thu, 23 Apr 2026 22:01:56 -0400 Subject: [PATCH 4/8] [CIR][AArch64] Implement vqshrun_n NEON builtin --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 27 ++++++++++++- clang/test/CodeGen/AArch64/neon/vqshrun.c | 40 +++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/AArch64/neon/vqshrun.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 834f66586833b..9717843c223b3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2594,7 +2594,32 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vrecpss_f32: case NEON::BI__builtin_neon_vrecpsd_f64: case NEON::BI__builtin_neon_vrecpsh_f16: - case NEON::BI__builtin_neon_vqshrun_n_v: + case NEON::BI__builtin_neon_vqshrun_n_v: { + mlir::Type inputTy; + + switch (type.getEltType()) { + case NeonTypeFlags::Int8: + inputTy = getNeonType( + this, NeonTypeFlags(NeonTypeFlags::Int16, false, true), loc); + break; + case NeonTypeFlags::Int16: + inputTy = getNeonType( + this, NeonTypeFlags(NeonTypeFlags::Int32, false, true), loc); + break; + case NeonTypeFlags::Int32: + inputTy = getNeonType( + this, NeonTypeFlags(NeonTypeFlags::Int64, false, true), loc); + break; + default: + llvm_unreachable("unexpected vqshrun element type"); + } + + auto shiftTy = ops[1].getType(); + ops[0] = builder.createBitcast(loc, ops[0], inputTy); + + intrName = "aarch64.neon.sqshrun"; + return emitNeonCall(cgm, builder, {inputTy, shiftTy}, ops, intrName, ty, loc); + } case NEON::BI__builtin_neon_vqrshrun_n_v: case NEON::BI__builtin_neon_vqshrn_n_v: case NEON::BI__builtin_neon_vrshrn_n_v: diff --git a/clang/test/CodeGen/AArch64/neon/vqshrun.c b/clang/test/CodeGen/AArch64/neon/vqshrun.c new file mode 100644 index 0000000000000..04cb61df1987e --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/vqshrun.c @@ -0,0 +1,40 @@ +// REQUIRES: aarch64-registered-target || arm-registered-target + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa,simplifycfg | FileCheck %s --check-prefixes=LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa,simplifycfg | FileCheck %s --check-prefixes=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} + +#include <arm_neon.h> + +// LLVM-LABEL: @test_vqshrun_n_s16( +// CIR-LABEL: @test_vqshrun_n_s16( +uint8x8_t test_vqshrun_n_s16(int16x8_t a) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !u8i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> {{.*}}, i32 3) +// LLVM-NEXT: ret <8 x i8> [[R]] + return vqshrun_n_s16(a, 3); +} + +// LLVM-LABEL: @test_vqshrun_n_s32( +// CIR-LABEL: @test_vqshrun_n_s32( +uint16x4_t test_vqshrun_n_s32(int32x4_t a) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !u16i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> {{.*}}, i32 9) +// LLVM-NEXT: ret <4 x i16> [[R]] + return vqshrun_n_s32(a, 9); +} + +// LLVM-LABEL: @test_vqshrun_n_s64( +// CIR-LABEL: @test_vqshrun_n_s64( +uint32x2_t test_vqshrun_n_s64(int64x2_t a) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !u32i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> {{.*}}, i32 19) +// LLVM-NEXT: ret <2 x i32> [[R]] + return vqshrun_n_s64(a, 19); +} >From 0833c1ce9066822a463fd5a113f9635a61197192 Mon Sep 17 00:00:00 2001 From: Daniel Rodriguez <[email protected]> Date: Thu, 30 Apr 2026 05:28:32 -0400 Subject: [PATCH 5/8] [CIR][AArch64] Fix vqshrn_n type mismatch and strengthen tests --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 27 ++++++- clang/test/CodeGen/AArch64/neon/vqshrn.c | 77 ++++++++++--------- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index f268ae94a00dc..9557e955fdd25 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2543,9 +2543,32 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vqshrun_n_v: intrName = "aarch64.neon.sqshrun"; return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); - case NEON::BI__builtin_neon_vqshrn_n_v: + case NEON::BI__builtin_neon_vqshrn_n_v: { + mlir::Type inputTy; + + switch (type.getEltType()) { + case NeonTypeFlags::Int8: + inputTy = getNeonType( + this, NeonTypeFlags(NeonTypeFlags::Int16, false, true), loc); + break; + case NeonTypeFlags::Int16: + inputTy = getNeonType( + this, NeonTypeFlags(NeonTypeFlags::Int32, false, true), loc); + break; + case NeonTypeFlags::Int32: + inputTy = getNeonType( + this, NeonTypeFlags(NeonTypeFlags::Int64, false, true), loc); + break; + default: + llvm_unreachable("unexpected vqshrn element type"); + } + + auto shiftTy = ops[1].getType(); + ops[0] = builder.createBitcast(loc, ops[0], inputTy); + intrName = usgn ? "aarch64.neon.uqshrn" : "aarch64.neon.sqshrn"; - return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); + return emitNeonCall(cgm, builder, {inputTy, shiftTy}, ops, intrName, ty, loc); + } case NEON::BI__builtin_neon_vmaxnmh_f16: case NEON::BI__builtin_neon_vrecpss_f32: case NEON::BI__builtin_neon_vrecpsd_f64: diff --git a/clang/test/CodeGen/AArch64/neon/vqshrn.c b/clang/test/CodeGen/AArch64/neon/vqshrn.c index 9d37a7a213075..62f8f569b9d03 100644 --- a/clang/test/CodeGen/AArch64/neon/vqshrn.c +++ b/clang/test/CodeGen/AArch64/neon/vqshrn.c @@ -1,70 +1,73 @@ // REQUIRES: aarch64-registered-target || arm-registered-target -// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ -// RUN: -disable-O0-optnone -flax-vector-conversions=none \ -// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=LLVM -// RUN: %if cir-enabled %{ %clang_cc1 -triple arm64-none-linux-gnu \ -// RUN: -target-feature +neon -disable-O0-optnone \ -// RUN: -flax-vector-conversions=none -fclangir -emit-cir \ -// RUN: -o - %s | FileCheck %s --check-prefix=CIR %} + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa,simplifycfg | FileCheck %s --check-prefixes=LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa,simplifycfg | FileCheck %s --check-prefixes=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} #include <arm_neon.h> // LLVM-LABEL: @test_vqshrn_n_s16( -// CIR-LABEL: @vqshrn_n_s16( +// CIR-LABEL: @test_vqshrn_n_s16( int8x8_t test_vqshrn_n_s16(int16x8_t a) { - // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.sqshrn" - // CIR-SAME: (!cir.vector<8 x !s16i>) -> !cir.vector<8 x !s8i> - // LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8 - // LLVM-NEXT: ret <8 x i8> [[RES]] +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !s8i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> {{.*}}, i32 3) +// LLVM-NEXT: ret <8 x i8> [[R]] return vqshrn_n_s16(a, 3); } // LLVM-LABEL: @test_vqshrn_n_s32( -// CIR-LABEL: @vqshrn_n_s32( +// CIR-LABEL: @test_vqshrn_n_s32( int16x4_t test_vqshrn_n_s32(int32x4_t a) { - // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.sqshrn" - // CIR-SAME: (!cir.vector<4 x !s32i>) -> !cir.vector<4 x !s16i> - // LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16 - // LLVM-NEXT: ret <4 x i16> [[RES]] +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !s16i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> {{.*}}, i32 9) +// LLVM-NEXT: ret <4 x i16> [[R]] return vqshrn_n_s32(a, 9); } // LLVM-LABEL: @test_vqshrn_n_s64( -// CIR-LABEL: @vqshrn_n_s64( +// CIR-LABEL: @test_vqshrn_n_s64( int32x2_t test_vqshrn_n_s64(int64x2_t a) { - // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.sqshrn" - // CIR-SAME: (!cir.vector<2 x !s64i>) -> !cir.vector<2 x !s32i> - // LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32 - // LLVM-NEXT: ret <2 x i32> [[RES]] +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !s32i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> {{.*}}, i32 19) +// LLVM-NEXT: ret <2 x i32> [[R]] return vqshrn_n_s64(a, 19); } // LLVM-LABEL: @test_vqshrn_n_u16( -// CIR-LABEL: @vqshrn_n_u16( +// CIR-LABEL: @test_vqshrn_n_u16( uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { - // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.uqshrn" - // CIR-SAME: (!cir.vector<8 x !u16i>) -> !cir.vector<8 x !u8i> - // LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8 - // LLVM-NEXT: ret <8 x i8> [[RES]] +// CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !u8i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> {{.*}}, i32 3) +// LLVM-NEXT: ret <8 x i8> [[R]] return vqshrn_n_u16(a, 3); } // LLVM-LABEL: @test_vqshrn_n_u32( -// CIR-LABEL: @vqshrn_n_u32( +// CIR-LABEL: @test_vqshrn_n_u32( uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { - // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.uqshrn" - // CIR-SAME: (!cir.vector<4 x !u32i>) -> !cir.vector<4 x !u16i> - // LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16 - // LLVM-NEXT: ret <4 x i16> [[RES]] +// CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !u16i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> {{.*}}, i32 9) +// LLVM-NEXT: ret <4 x i16> [[R]] return vqshrn_n_u32(a, 9); } // LLVM-LABEL: @test_vqshrn_n_u64( -// CIR-LABEL: @vqshrn_n_u64( +// CIR-LABEL: @test_vqshrn_n_u64( uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { - // CIR: {{.*}}cir.call_llvm_intrinsic "aarch64.neon.uqshrn" - // CIR-SAME: (!cir.vector<2 x !u64i>) -> !cir.vector<2 x !u32i> - // LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32 - // LLVM-NEXT: ret <2 x i32> [[RES]] +// CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !u32i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> {{.*}}, i32 19) +// LLVM-NEXT: ret <2 x i32> [[R]] return vqshrn_n_u64(a, 19); } >From 305a48cdded3de01705df1825d9f94eda75f342a Mon Sep 17 00:00:00 2001 From: Daniel Rodriguez <[email protected]> Date: Thu, 30 Apr 2026 13:46:36 -0400 Subject: [PATCH 6/8] [CIR][AArch64] Address review: combine vqshrn and vqshrun tests into one file, fix blank lines --- clang/test/CodeGen/AArch64/neon/vqshrn.c | 36 ++++++++++++++++---- clang/test/CodeGen/AArch64/neon/vqshrun.c | 40 ----------------------- 2 files changed, 30 insertions(+), 46 deletions(-) delete mode 100644 clang/test/CodeGen/AArch64/neon/vqshrun.c diff --git a/clang/test/CodeGen/AArch64/neon/vqshrn.c b/clang/test/CodeGen/AArch64/neon/vqshrn.c index 62f8f569b9d03..457e457e61bb5 100644 --- a/clang/test/CodeGen/AArch64/neon/vqshrn.c +++ b/clang/test/CodeGen/AArch64/neon/vqshrn.c @@ -10,7 +10,6 @@ // CIR-LABEL: @test_vqshrn_n_s16( int8x8_t test_vqshrn_n_s16(int16x8_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !s8i> - // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> {{.*}}, i32 3) // LLVM-NEXT: ret <8 x i8> [[R]] @@ -21,7 +20,6 @@ int8x8_t test_vqshrn_n_s16(int16x8_t a) { // CIR-LABEL: @test_vqshrn_n_s32( int16x4_t test_vqshrn_n_s32(int32x4_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !s16i> - // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> {{.*}}, i32 9) // LLVM-NEXT: ret <4 x i16> [[R]] @@ -32,7 +30,6 @@ int16x4_t test_vqshrn_n_s32(int32x4_t a) { // CIR-LABEL: @test_vqshrn_n_s64( int32x2_t test_vqshrn_n_s64(int64x2_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !s32i> - // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> {{.*}}, i32 19) // LLVM-NEXT: ret <2 x i32> [[R]] @@ -43,7 +40,6 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { // CIR-LABEL: @test_vqshrn_n_u16( uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !u8i> - // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> {{.*}}, i32 3) // LLVM-NEXT: ret <8 x i8> [[R]] @@ -54,7 +50,6 @@ uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { // CIR-LABEL: @test_vqshrn_n_u32( uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !u16i> - // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> {{.*}}, i32 9) // LLVM-NEXT: ret <4 x i16> [[R]] @@ -65,9 +60,38 @@ uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { // CIR-LABEL: @test_vqshrn_n_u64( uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !u32i> - // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> {{.*}}, i32 19) // LLVM-NEXT: ret <2 x i32> [[R]] return vqshrn_n_u64(a, 19); } + +// LLVM-LABEL: @test_vqshrun_n_s16( +// CIR-LABEL: @test_vqshrun_n_s16( +uint8x8_t test_vqshrun_n_s16(int16x8_t a) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !u8i> +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> {{.*}}, i32 3) +// LLVM-NEXT: ret <8 x i8> [[R]] + return vqshrun_n_s16(a, 3); +} + +// LLVM-LABEL: @test_vqshrun_n_s32( +// CIR-LABEL: @test_vqshrun_n_s32( +uint16x4_t test_vqshrun_n_s32(int32x4_t a) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !u16i> +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> {{.*}}, i32 9) +// LLVM-NEXT: ret <4 x i16> [[R]] + return vqshrun_n_s32(a, 9); +} + +// LLVM-LABEL: @test_vqshrun_n_s64( +// CIR-LABEL: @test_vqshrun_n_s64( +uint32x2_t test_vqshrun_n_s64(int64x2_t a) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !u32i> +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> {{.*}}, i32 19) +// LLVM-NEXT: ret <2 x i32> [[R]] + return vqshrun_n_s64(a, 19); +} diff --git a/clang/test/CodeGen/AArch64/neon/vqshrun.c b/clang/test/CodeGen/AArch64/neon/vqshrun.c deleted file mode 100644 index 04cb61df1987e..0000000000000 --- a/clang/test/CodeGen/AArch64/neon/vqshrun.c +++ /dev/null @@ -1,40 +0,0 @@ -// REQUIRES: aarch64-registered-target || arm-registered-target - -// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa,simplifycfg | FileCheck %s --check-prefixes=LLVM -// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa,simplifycfg | FileCheck %s --check-prefixes=LLVM %} -// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} - -#include <arm_neon.h> - -// LLVM-LABEL: @test_vqshrun_n_s16( -// CIR-LABEL: @test_vqshrun_n_s16( -uint8x8_t test_vqshrun_n_s16(int16x8_t a) { -// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !u8i> - -// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { -// LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> {{.*}}, i32 3) -// LLVM-NEXT: ret <8 x i8> [[R]] - return vqshrun_n_s16(a, 3); -} - -// LLVM-LABEL: @test_vqshrun_n_s32( -// CIR-LABEL: @test_vqshrun_n_s32( -uint16x4_t test_vqshrun_n_s32(int32x4_t a) { -// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !u16i> - -// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { -// LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> {{.*}}, i32 9) -// LLVM-NEXT: ret <4 x i16> [[R]] - return vqshrun_n_s32(a, 9); -} - -// LLVM-LABEL: @test_vqshrun_n_s64( -// CIR-LABEL: @test_vqshrun_n_s64( -uint32x2_t test_vqshrun_n_s64(int64x2_t a) { -// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !u32i> - -// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { -// LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> {{.*}}, i32 19) -// LLVM-NEXT: ret <2 x i32> [[R]] - return vqshrun_n_s64(a, 19); -} >From 3db5a2c4e719a56a803fff5546c397cd83d219f4 Mon Sep 17 00:00:00 2001 From: Daniel Rodriguez <[email protected]> Date: Thu, 30 Apr 2026 14:13:15 -0400 Subject: [PATCH 7/8] [CIR][AArch64] Fix accidental fallthrough: keep vmaxnmh/vrecps builtins in NYI group --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 0f17c51462b4d..437594bf2c07b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2594,6 +2594,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vrecpss_f32: case NEON::BI__builtin_neon_vrecpsd_f64: case NEON::BI__builtin_neon_vrecpsh_f16: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case NEON::BI__builtin_neon_vqshrun_n_v: { mlir::Type inputTy; >From c0c62f41b0b53e0718452f327cd98e1c27bd5cf8 Mon Sep 17 00:00:00 2001 From: Daniel Rodriguez <[email protected]> Date: Tue, 26 May 2026 00:55:46 -0400 Subject: [PATCH 8/8] [CIR][AArch64] remove migrated tests from neon-intrinsics.c, fix formatting --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 6 +- clang/test/CodeGen/AArch64/neon-intrinsics.c | 108 ------------------ clang/test/CodeGen/AArch64/neon/vqshrn.c | 9 ++ 3 files changed, 13 insertions(+), 110 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 437594bf2c07b..fb9a0dab44f02 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2622,7 +2622,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ops[0] = builder.createBitcast(loc, ops[0], inputTy); intrName = "aarch64.neon.sqshrun"; - return emitNeonCall(cgm, builder, {inputTy, shiftTy}, ops, intrName, ty, loc); + return emitNeonCall(cgm, builder, {inputTy, shiftTy}, ops, intrName, ty, + loc); } case NEON::BI__builtin_neon_vqshrn_n_v: { mlir::Type inputTy; @@ -2648,7 +2649,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ops[0] = builder.createBitcast(loc, ops[0], inputTy); intrName = usgn ? "aarch64.neon.uqshrn" : "aarch64.neon.sqshrn"; - return emitNeonCall(cgm, builder, {inputTy, shiftTy}, ops, intrName, ty, loc); + return emitNeonCall(cgm, builder, {inputTy, shiftTy}, ops, intrName, ty, + loc); } case NEON::BI__builtin_neon_vqrshrun_n_v: case NEON::BI__builtin_neon_vrshrn_n_v: diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 784d9624823d5..555d20f0c9684 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -6303,42 +6303,6 @@ uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vshrn_high_n_u64(a, b, 19); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vqshrun_n_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) -// CHECK-NEXT: ret <8 x i8> [[VQSHRUN_N1]] -// -uint8x8_t test_vqshrun_n_s16(int16x8_t a) { - return vqshrun_n_s16(a, 3); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vqshrun_n_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) -// CHECK-NEXT: ret <4 x i16> [[VQSHRUN_N1]] -// -uint16x4_t test_vqshrun_n_s32(int32x4_t a) { - return vqshrun_n_s32(a, 9); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vqshrun_n_s64( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) -// CHECK-NEXT: ret <2 x i32> [[VQSHRUN_N1]] -// -uint32x2_t test_vqshrun_n_s64(int64x2_t a) { - return vqshrun_n_s64(a, 19); -} - // CHECK-LABEL: define dso_local <16 x i8> @test_vqshrun_high_n_s16( // CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -6603,78 +6567,6 @@ uint32x4_t test_vqrshrun_high_n_s64(uint32x2_t a, int64x2_t b) { return vqrshrun_high_n_s64(a, b, 19); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vqshrn_n_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) -// CHECK-NEXT: ret <8 x i8> [[VQSHRN_N1]] -// -int8x8_t test_vqshrn_n_s16(int16x8_t a) { - return vqshrn_n_s16(a, 3); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vqshrn_n_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) -// CHECK-NEXT: ret <4 x i16> [[VQSHRN_N1]] -// -int16x4_t test_vqshrn_n_s32(int32x4_t a) { - return vqshrn_n_s32(a, 9); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vqshrn_n_s64( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) -// CHECK-NEXT: ret <2 x i32> [[VQSHRN_N1]] -// -int32x2_t test_vqshrn_n_s64(int64x2_t a) { - return vqshrn_n_s64(a, 19); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqshrn_n_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) -// CHECK-NEXT: ret <8 x i8> [[VQSHRN_N1]] -// -uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { - return vqshrn_n_u16(a, 3); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vqshrn_n_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) -// CHECK-NEXT: ret <4 x i16> [[VQSHRN_N1]] -// -uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { - return vqshrn_n_u32(a, 9); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vqshrn_n_u64( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> -// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) -// CHECK-NEXT: ret <2 x i32> [[VQSHRN_N1]] -// -uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { - return vqshrn_n_u64(a, 19); -} - // CHECK-LABEL: define dso_local <16 x i8> @test_vqshrn_high_n_s16( // CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/vqshrn.c b/clang/test/CodeGen/AArch64/neon/vqshrn.c index 457e457e61bb5..fb706db02ffc9 100644 --- a/clang/test/CodeGen/AArch64/neon/vqshrn.c +++ b/clang/test/CodeGen/AArch64/neon/vqshrn.c @@ -10,6 +10,7 @@ // CIR-LABEL: @test_vqshrn_n_s16( int8x8_t test_vqshrn_n_s16(int16x8_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !s8i> + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> {{.*}}, i32 3) // LLVM-NEXT: ret <8 x i8> [[R]] @@ -20,6 +21,7 @@ int8x8_t test_vqshrn_n_s16(int16x8_t a) { // CIR-LABEL: @test_vqshrn_n_s32( int16x4_t test_vqshrn_n_s32(int32x4_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !s16i> + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> {{.*}}, i32 9) // LLVM-NEXT: ret <4 x i16> [[R]] @@ -30,6 +32,7 @@ int16x4_t test_vqshrn_n_s32(int32x4_t a) { // CIR-LABEL: @test_vqshrn_n_s64( int32x2_t test_vqshrn_n_s64(int64x2_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrn" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !s32i> + // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> {{.*}}, i32 19) // LLVM-NEXT: ret <2 x i32> [[R]] @@ -40,6 +43,7 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { // CIR-LABEL: @test_vqshrn_n_u16( uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !u8i> + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> {{.*}}, i32 3) // LLVM-NEXT: ret <8 x i8> [[R]] @@ -50,6 +54,7 @@ uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { // CIR-LABEL: @test_vqshrn_n_u32( uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !u16i> + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> {{.*}}, i32 9) // LLVM-NEXT: ret <4 x i16> [[R]] @@ -60,6 +65,7 @@ uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { // CIR-LABEL: @test_vqshrn_n_u64( uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshrn" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !u32i> + // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> {{.*}}, i32 19) // LLVM-NEXT: ret <2 x i32> [[R]] @@ -70,6 +76,7 @@ uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { // CIR-LABEL: @test_vqshrun_n_s16( uint8x8_t test_vqshrun_n_s16(int16x8_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<8 x !s16i>, !s32i) -> !cir.vector<8 x !u8i> + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> {{.*}}, i32 3) // LLVM-NEXT: ret <8 x i8> [[R]] @@ -80,6 +87,7 @@ uint8x8_t test_vqshrun_n_s16(int16x8_t a) { // CIR-LABEL: @test_vqshrun_n_s32( uint16x4_t test_vqshrun_n_s32(int32x4_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<4 x !s32i>, !s32i) -> !cir.vector<4 x !u16i> + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> {{.*}}, i32 9) // LLVM-NEXT: ret <4 x i16> [[R]] @@ -90,6 +98,7 @@ uint16x4_t test_vqshrun_n_s32(int32x4_t a) { // CIR-LABEL: @test_vqshrun_n_s64( uint32x2_t test_vqshrun_n_s64(int64x2_t a) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshrun" {{.*}} : (!cir.vector<2 x !s64i>, !s32i) -> !cir.vector<2 x !u32i> + // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} { // LLVM: [[R:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> {{.*}}, i32 19) // LLVM-NEXT: ret <2 x i32> [[R]] _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
