https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/199990
From a89b0a1a1fb35689217150c5d43ae5da504988a3 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Wed, 27 May 2026 13:46:42 +0000 Subject: [PATCH 1/2] [clang][CIR][AArch64] Add lowering for conversion intrinsics This PR adds lowering for intrinsic from the following groups: * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2 It continues the work started in #190961 and #193273. This PR implements conversions from integer to FP types where the bit-wdith does not change: * vcvt_s64_f64 * vcvt_u64_f64 * vcvt_s32_f32 * vcvtq_s32_f32 * vcvtq_s64_f64 * vcvt_u32_f32 * vcvtq_u32_f32 * vcvtq_u64_f64 * vcvt_s16_f16 * vcvtq_s16_f16 * vcvt_u16_f16 * vcvtq_u16_f16 The corresponding tests are moved from: * clang/test/CodeGen/AArch64/ to: * clang/test/CodeGen/AArch64/neon/ The lowering follows the existing implementation in CodeGen/TargetBuiltins/ARM.cpp. --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 15 ++- clang/test/CodeGen/AArch64/neon-intrinsics.c | 28 ----- clang/test/CodeGen/AArch64/neon-misc.c | 78 ------------ clang/test/CodeGen/AArch64/neon/fullfp16.c | 59 +++++++++ clang/test/CodeGen/AArch64/neon/intrinsics.c | 114 ++++++++++++++++++ .../CodeGen/AArch64/v8.2a-neon-intrinsics.c | 52 -------- 6 files changed, 187 insertions(+), 159 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 5333c1b5c0277..950699d9f0124 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -676,7 +676,20 @@ static mlir::Value emitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: case NEON::BI__builtin_neon_vcvtq_s16_f16: - case NEON::BI__builtin_neon_vcvtq_u16_f16: + case NEON::BI__builtin_neon_vcvtq_u16_f16: { + auto ty = getFloatNeonType(cgf, neonType); + // Undo the bitcast inserted by intrinsics that expand to this builtin + // (e.g. vcvt_u32_f32). + // TODO: While the bitcasts eventually cancel each other out, we should + // avoid them altogether. + ops[0] = + cgf.getBuilder().createCast(loc, cir::CastKind::bitcast, ops[0], ty); + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + // AArch64: use fptosi.sat/fptoui.sat unless under strict FP. + llvm::StringRef llvmIntrName = usgn ? "fptoui.sat" : "fptosi.sat"; + return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(), + /*argTypes=*/{ty}, ops, llvmIntrName, vTy, loc); + } case NEON::BI__builtin_neon_vcvta_s16_f16: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 424d476ad33c9..7fdc80019ba01 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -20007,34 +20007,6 @@ float64x1_t test_vneg_f64(float64x1_t a) { return vneg_f64(a); } -// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_s64_f64( -// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 -// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]]) -// CHECK-NEXT: ret <1 x i64> [[VCVTZ_I]] -// -int64x1_t test_vcvt_s64_f64(float64x1_t a) { - return vcvt_s64_f64(a); -} - -// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_u64_f64( -// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 -// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]]) -// CHECK-NEXT: ret <1 x i64> [[VCVTZ_I]] -// -uint64x1_t test_vcvt_u64_f64(float64x1_t a) { - return vcvt_u64_f64(a); -} - // CHECK-LABEL: define dso_local <1 x i64> @test_vcvtn_s64_f64( // CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c index d4cfcfe4e9bdc..964b0059662d2 100644 --- a/clang/test/CodeGen/AArch64/neon-misc.c +++ b/clang/test/CodeGen/AArch64/neon-misc.c @@ -2839,84 +2839,6 @@ float64x2_t test_vrndiq_f64(float64x2_t a) { return vrndiq_f64(a); } -// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_s32_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[TMP2]]) -// CHECK-NEXT: ret <2 x i32> [[VCVTZ_I]] -// -int32x2_t test_vcvt_s32_f32(float32x2_t a) { - return vcvt_s32_f32(a); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_s32_f32( -// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP2]]) -// CHECK-NEXT: ret <4 x i32> [[VCVTZ_I]] -// -int32x4_t test_vcvtq_s32_f32(float32x4_t a) { - return vcvtq_s32_f32(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_s64_f64( -// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> [[TMP2]]) -// CHECK-NEXT: ret <2 x i64> [[VCVTZ_I]] -// -int64x2_t test_vcvtq_s64_f64(float64x2_t a) { - return vcvtq_s64_f64(a); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_u32_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> [[TMP2]]) -// CHECK-NEXT: ret <2 x i32> [[VCVTZ_I]] -// -uint32x2_t test_vcvt_u32_f32(float32x2_t a) { - return vcvt_u32_f32(a); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_u32_f32( -// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP2]]) -// CHECK-NEXT: ret <4 x i32> [[VCVTZ_I]] -// -uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { - return vcvtq_u32_f32(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_u64_f64( -// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> [[TMP2]]) -// CHECK-NEXT: ret <2 x i64> [[VCVTZ_I]] -// -uint64x2_t test_vcvtq_u64_f64(float64x2_t a) { - return vcvtq_u64_f64(a); -} - // CHECK-LABEL: define dso_local <2 x i32> @test_vcvtn_s32_f32( // CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c index 895e05629abf5..eb301306588eb 100644 --- a/clang/test/CodeGen/AArch64/neon/fullfp16.c +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -32,6 +32,7 @@ //============================================================================= #include <arm_fp16.h> +#include <arm_neon.h> //===------------------------------------------------------===// // 2.5.1.1. Addition @@ -211,3 +212,61 @@ float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { // LLVM: ret half [[ADD]] return vfmsh_f16(a, b, c); } + +//===------------------------------------------------------===// +// 2.6.3.1 Convearions +// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2 +//===------------------------------------------------------===// + +// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]]) +// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]] +// +int16x4_t test_vcvt_s16_f16 (float16x4_t a) { + return vcvt_s16_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]]) +// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]] +// +int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { + return vcvtq_s16_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]]) +// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]] +// +uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { + return vcvt_u16_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]]) +// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]] +// +uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { + return vcvtq_u16_f16(a); +} + diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index 7d3fb8fd7a3ca..9b90f5d3da310 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -4181,6 +4181,120 @@ uint64_t test_vcvtd_n_u64_f64(float64_t a) { return (uint64_t)vcvtd_n_u64_f64(a, 64); } +// LLVM-LABEL: @test_vcvt_s32_f32 +// CIR-LABEL: @vcvt_s32_f32 +int32x2_t test_vcvt_s32_f32(float32x2_t a) { +// CIR: cir.call_llvm_intrinsic "fptosi.sat + +// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> +// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> +// LLVM: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[TMP2]]) +// LLVM: ret <2 x i32> [[VCVTZ_I]] + return vcvt_s32_f32(a); +} + +// LLVM-LABEL: @test_vcvtq_s32_f32 +// CIR-LABEL: @vcvtq_s32_f32 +int32x4_t test_vcvtq_s32_f32(float32x4_t a) { +// CIR: cir.call_llvm_intrinsic "fptosi.sat + +// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> +// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> +// LLVM: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP2]]) +// LLVM: ret <4 x i32> [[VCVTZ_I]] + return vcvtq_s32_f32(a); +} + +// LLVM-LABEL: @test_vcvt_u32_f32 +// CIR-LABEL: @vcvt_u32_f32 +uint32x2_t test_vcvt_u32_f32(float32x2_t a) { +// CIR: cir.call_llvm_intrinsic "fptoui.sat + +// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> +// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> +// LLVM: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> [[TMP2]]) +// LLVM: ret <2 x i32> [[VCVTZ_I]] + return vcvt_u32_f32(a); +} + +// LLVM-LABEL: @test_vcvtq_u32_f32 +// CIR-LABEL: @vcvtq_u32_f32 +uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { +// CIR: cir.call_llvm_intrinsic "fptoui.sat + +// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> +// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> +// LLVM: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP2]]) +// LLVM: ret <4 x i32> [[VCVTZ_I]] + return vcvtq_u32_f32(a); +} + +// LLVM-LABEL: @test_vcvt_s64_f64 +// CIR-LABEL: @vcvt_s64_f64 +int64x1_t test_vcvt_s64_f64(float64x1_t a) { +// CIR: cir.call_llvm_intrinsic "fptosi.sat + +// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 +// LLVM: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 +// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// LLVM: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]]) +// LLVM: ret <1 x i64> [[VCVTZ_I]] + return vcvt_s64_f64(a); +} + +// LLVM-LABEL: @test_vcvtq_s64_f64 +// CIR-LABEL: @vcvtq_s64_f64 +int64x2_t test_vcvtq_s64_f64(float64x2_t a) { +// CIR: cir.call_llvm_intrinsic "fptosi.sat + +// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> +// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> +// LLVM: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> [[TMP2]]) +// LLVM: ret <2 x i64> [[VCVTZ_I]] + return vcvtq_s64_f64(a); +} + +// LLVM-LABEL: @test_vcvt_u64_f64 +// CIR-LABEL: @vcvt_u64_f64 +uint64x1_t test_vcvt_u64_f64(float64x1_t a) { +// CIR: cir.call_llvm_intrinsic "fptoui.sat + +// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 +// LLVM: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 +// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// LLVM: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]]) +// LLVM: ret <1 x i64> [[VCVTZ_I]] + return vcvt_u64_f64(a); +} + +// LLVM-LABEL: @test_vcvtq_u64_f64 +// CIR-LABEL: @vcvtq_u64_f64 +uint64x2_t test_vcvtq_u64_f64(float64x2_t a) { +// CIR: cir.call_llvm_intrinsic "fptoui.sat + +// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> +// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> +// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> +// LLVM: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> [[TMP2]]) +// LLVM: ret <2 x i64> [[VCVTZ_I]] + return vcvtq_u64_f64(a); +} + //===------------------------------------------------------===// // 2.1.3.2.4 Vector rounding shift right and accumulate // https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#vector-rounding-shift-right-and-accumulate diff --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c index ff1c206fc6350..69eb1916d2d6d 100644 --- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c @@ -222,58 +222,6 @@ float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) { return vcvtq_f16_u16(a); } -// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]]) -// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]] -// -int16x4_t test_vcvt_s16_f16 (float16x4_t a) { - return vcvt_s16_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]]) -// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]] -// -int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { - return vcvtq_s16_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]]) -// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]] -// -uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { - return vcvt_u16_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]]) -// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]] -// -uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { - return vcvtq_u16_f16(a); -} - // CHECK-LABEL: define {{[^@]+}}@test_vcvta_s16_f16 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: From 8c5a5ea800c2899cdf0fdb4f5d1f869710814584 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Fri, 29 May 2026 13:59:35 +0000 Subject: [PATCH 2/2] Move FP16 tests to a dedicated file --- .../AArch64/neon/conversion-fullfp16.c | 87 +++++++++++++++++++ clang/test/CodeGen/AArch64/neon/fullfp16.c | 58 ------------- 2 files changed, 87 insertions(+), 58 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c diff --git a/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c b/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c new file mode 100644 index 0000000000000..9e0c65c9a77b8 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c @@ -0,0 +1,87 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1_cg_arm64_neon -target-feature +fullfp16 -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM +// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +fullfp16 -fclangir -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +fullfp16 -fclangir -emit-cir %s -disable-O0-optnone | FileCheck %s --check-prefixes=CIR %} + +//============================================================================= +// NOTES +// +// Tests for unconstrained conversion intrinsics that require the fullfp16 extension. +// +// This file contains FP16 tests that were originally located in +// * clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c +// The main difference is the use of RUN lines that enable ClangIR lowering; +// therefore only builtins currently supported by ClangIR are tested here. +// Once ClangIR support is complete, this file is intended to replace the +// original test file. +// +// ACLE section headings based on v2025Q2 of the ACLE specification: +// * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#bitwise-equal-to-zero +// +//============================================================================= + +#include <arm_fp16.h> +#include <arm_neon.h> + +//===------------------------------------------------------===// +// 2.6.3.1 Convearions +// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2 +//===------------------------------------------------------===// + +// LLVM-LABEL: @test_vcvt_s16_f16 +// CIR-LABEL: @vcvt_s16_f16 +int16x4_t test_vcvt_s16_f16 (float16x4_t a) { +// CIR: cir.call_llvm_intrinsic "fptosi.sat" + +// LLVM-SAME: (<4 x half> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]]) +// LLVM-NEXT: ret <4 x i16> [[VCVTZ_I]] + return vcvt_s16_f16(a); +} + +// LLVM-LABEL: @test_vcvtq_s16_f16 +// CIR-LABEL: @vcvtq_s16_f16 +int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { +// CIR: cir.call_llvm_intrinsic "fptosi.sat" + +// LLVM-SAME: (<8 x half> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]]) +// LLVM-NEXT: ret <8 x i16> [[VCVTZ_I]] + return vcvtq_s16_f16(a); +} + +// LLVM-LABEL: @test_vcvt_u16_f16 +// CIR-LABEL: @vcvt_u16_f16 +uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { +// CIR: cir.call_llvm_intrinsic "fptoui.sat" + +// LLVM-SAME: (<4 x half> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]]) +// LLVM-NEXT: ret <4 x i16> [[VCVTZ_I]] + return vcvt_u16_f16(a); +} + +// LLVM-LABEL: @test_vcvtq_u16_f16 +// CIR-LABEL: @vcvtq_u16_f16 +uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { +// CIR: cir.call_llvm_intrinsic "fptoui.sat" + +// LLVM: (<8 x half> {{.*}} [[A:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]]) +// LLVM-NEXT: ret <8 x i16> [[VCVTZ_I]] + return vcvtq_u16_f16(a); +} + diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c index eb301306588eb..41544b925b32d 100644 --- a/clang/test/CodeGen/AArch64/neon/fullfp16.c +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -212,61 +212,3 @@ float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { // LLVM: ret half [[ADD]] return vfmsh_f16(a, b, c); } - -//===------------------------------------------------------===// -// 2.6.3.1 Convearions -// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2 -//===------------------------------------------------------===// - -// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]]) -// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]] -// -int16x4_t test_vcvt_s16_f16 (float16x4_t a) { - return vcvt_s16_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]]) -// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]] -// -int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { - return vcvtq_s16_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]]) -// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]] -// -uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { - return vcvt_u16_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]]) -// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]] -// -uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { - return vcvtq_u16_f16(a); -} - _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
