https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/170686
This is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need. >From 15b50694c5e964f96b4bc36dc1d94108ea51d82f Mon Sep 17 00:00:00 2001 From: David Green <[email protected]> Date: Thu, 4 Dec 2025 16:26:22 +0000 Subject: [PATCH] [ARM] Introduce intrinsics for MVE fp-converts under strict-fp. This is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need. --- clang/include/clang/Basic/arm_mve.td | 2 +- clang/include/clang/Basic/arm_mve_defs.td | 20 +- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 32 + clang/test/CodeGen/arm-mve-intrinsics/vcvt.c | 958 ++++++++++++------ clang/utils/TableGen/MveEmitter.cpp | 7 +- llvm/include/llvm/IR/IntrinsicsARM.td | 6 + llvm/lib/Target/ARM/ARMInstrMVE.td | 20 +- .../mve-intrinsics/strict-intrinsics.ll | 81 ++ 8 files changed, 808 insertions(+), 318 deletions(-) diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 77531c31538c1..eae3a9f9624ab 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -598,7 +598,7 @@ foreach half = [ "b", "t" ] in { } // params = [f16], pnt = PNT_None } // loop over half = "b", "t" -multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> { +multiclass float_int_conversions<Type FScalar, Type IScalar, Builder ftoi, Builder itof> { defvar FVector = VecOf<FScalar>; defvar IVector = VecOf<IScalar>; diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 3210549d0cb56..d228b298a9aa6 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -123,10 +123,10 @@ def fcmp_ule: IRBuilder<"CreateFCmpULE">; def splat: CGHelperFn<"ARMMVEVectorSplat">; def select: IRBuilder<"CreateSelect">; def fneg: IRBuilder<"CreateFNeg">; -def sitofp: IRBuilder<"CreateSIToFP">; -def uitofp: IRBuilder<"CreateUIToFP">; -def fptosi: IRBuilder<"CreateFPToSI">; -def fptoui: IRBuilder<"CreateFPToUI">; +def sitofp_node: IRBuilder<"CreateSIToFP">; +def uitofp_node: IRBuilder<"CreateUIToFP">; +def fptosi_node: IRBuilder<"CreateFPToSI">; +def fptoui_node: IRBuilder<"CreateFPToUI">; def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> { let special_params = [IRBuilderIntParam<1, "unsigned">]; } @@ -215,9 +215,9 @@ def bitsize; // strictFPAlt allows a node to have different code generation under strict-fp. // TODO: The standard node can be IRBuilderBase or IRIntBase. -class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder { +class strictFPAlt<Builder standard_, Builder strictfp_> : Builder { Builder standard = standard_; - IRIntBase strictfp = strictfp_; + Builder strictfp = strictfp_; } // If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it @@ -593,6 +593,14 @@ def fminnm : strictFPAlt<IRIntBase<"minnum", [Vector]>, IRInt<"vminnm", [Vector]>>; def fmaxnm : strictFPAlt<IRIntBase<"maxnum", [Vector]>, IRInt<"vmaxnm", [Vector]>>; +def sitofp: strictFPAlt<sitofp_node, + CGFHelperFn<"ARMMVECreateSIToFP">>; +def uitofp: strictFPAlt<uitofp_node, + CGFHelperFn<"ARMMVECreateUIToFP">>; +def fptosi: strictFPAlt<fptosi_node, + CGFHelperFn<"ARMMVECreateFPToSI">>; +def fptoui: strictFPAlt<fptoui_node, + CGFHelperFn<"ARMMVECreateFPToUI">>; // ----------------------------------------------------------------------------- // Convenience lists of parameter types. 'T' is just a container record, so you diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index d4b0b81d3d87f..744cd1b0a324a 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -3512,6 +3512,38 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, return Builder.CreateShuffleVector(V, Indices); } +static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder, + CodeGenFunction *CGF, llvm::Value *V, + llvm::Type *Ty) { + return Builder.CreateCall( + CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}), + {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)}); +} + +static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder, + CodeGenFunction *CGF, llvm::Value *V, + llvm::Type *Ty) { + return Builder.CreateCall( + CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}), + {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)}); +} + +static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder, + CodeGenFunction *CGF, llvm::Value *V, + llvm::Type *Ty) { + return Builder.CreateCall( + CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}), + {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)}); +} + +static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder, + CodeGenFunction *CGF, llvm::Value *V, + llvm::Type *Ty) { + return Builder.CreateCall( + CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}), + {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)}); +} + Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c index b2a6d0c1ea668..14a9116208b87 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c @@ -1,15 +1,22 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include <arm_mve.h> -// CHECK-LABEL: @test_vcvtq_f16_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half> -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_f16_s16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half> +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_f16_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> [[A:%.*]], i32 0) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vcvtq_f16_s16(int16x8_t a) { @@ -20,10 +27,15 @@ float16x8_t test_vcvtq_f16_s16(int16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_f16_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half> -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_f16_u16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half> +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_f16_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> [[A:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vcvtq_f16_u16(uint16x8_t a) { @@ -34,10 +46,15 @@ float16x8_t test_vcvtq_f16_u16(uint16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_f32_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_f32_s32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_f32_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> [[A:%.*]], i32 0) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vcvtq_f32_s32(int32x4_t a) { @@ -48,10 +65,15 @@ float32x4_t test_vcvtq_f32_s32(int32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_f32_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_f32_u32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_f32_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> [[A:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vcvtq_f32_u32(uint32x4_t a) { @@ -62,52 +84,79 @@ float32x4_t test_vcvtq_f32_u32(uint32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_s16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_s16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_s16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> [[A:%.*]], i32 0) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vcvtq_s16_f16(float16x8_t a) { return vcvtq_s16_f16(a); } -// CHECK-LABEL: @test_vcvtq_s32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_s32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_s32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> [[A:%.*]], i32 0) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vcvtq_s32_f32(float32x4_t a) { return vcvtq_s32_f32(a); } -// CHECK-LABEL: @test_vcvtq_u16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_u16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_u16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> [[A:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vcvtq_u16_f16(float16x8_t a) { return vcvtq_u16_f16(a); } -// CHECK-LABEL: @test_vcvtq_u32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_u32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_u32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> [[A:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { return vcvtq_u32_f32(a); } -// CHECK-LABEL: @test_vcvtq_m_f16_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f16_s16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_f16_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p) { @@ -118,12 +167,19 @@ float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_m_f16_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f16_u16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_f16_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p) { @@ -134,12 +190,19 @@ float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_ #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_m_f32_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f32_s32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_f32_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p) { @@ -150,12 +213,19 @@ float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_m_f32_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f32_u32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_f32_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p) { @@ -166,12 +236,19 @@ float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_ #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_m_s16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_s16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_s16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vcvtq_m_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -182,12 +259,19 @@ int16x8_t test_vcvtq_m_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_m_s32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_s32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_s32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vcvtq_m_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -198,12 +282,19 @@ int32x4_t test_vcvtq_m_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_m_u16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_u16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_u16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vcvtq_m_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -214,12 +305,19 @@ uint16x8_t test_vcvtq_m_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_m_u32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_u32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_u32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vcvtq_m_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -230,12 +328,19 @@ uint32x4_t test_vcvtq_m_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_x_f16_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_f16_s16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_f16_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_x_f16_s16(int16x8_t a, mve_pred16_t p) { @@ -246,12 +351,19 @@ float16x8_t test_vcvtq_x_f16_s16(int16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_x_f16_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_f16_u16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_f16_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_x_f16_u16(uint16x8_t a, mve_pred16_t p) { @@ -262,12 +374,19 @@ float16x8_t test_vcvtq_x_f16_u16(uint16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_x_f32_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_f32_s32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_f32_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_x_f32_s32(int32x4_t a, mve_pred16_t p) { @@ -278,12 +397,19 @@ float32x4_t test_vcvtq_x_f32_s32(int32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_x_f32_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_f32_u32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_f32_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_x_f32_u32(uint32x4_t a, mve_pred16_t p) { @@ -294,80 +420,125 @@ float32x4_t test_vcvtq_x_f32_u32(uint32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vcvtq_x_s16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_s16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_s16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vcvtq_x_s16_f16(float16x8_t a, mve_pred16_t p) { return vcvtq_x_s16_f16(a, p); } -// CHECK-LABEL: @test_vcvtq_x_s32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_s32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_s32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p) { return vcvtq_x_s32_f32(a, p); } -// CHECK-LABEL: @test_vcvtq_x_u16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_u16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_u16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vcvtq_x_u16_f16(float16x8_t a, mve_pred16_t p) { return vcvtq_x_u16_f16(a, p); } -// CHECK-LABEL: @test_vcvtq_x_u32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_u32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_u32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vcvtq_x_u32_f32(float32x4_t a, mve_pred16_t p) { return vcvtq_x_u32_f32(a, p); } -// CHECK-LABEL: @test_vcvttq_f16_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvttq_f16_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvttq_f16_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vcvttq_f16_f32(float16x8_t a, float32x4_t b) { return vcvttq_f16_f32(a, b); } -// CHECK-LABEL: @test_vcvttq_m_f16_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvttq_m_f16_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvttq_m_f16_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvttq_m_f16_f32(float16x8_t a, float32x4_t b, mve_pred16_t p) { return vcvttq_m_f16_f32(a, b, p); } -// CHECK-LABEL: @test_vcvtq_n_f16_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 0, <8 x i16> [[A:%.*]], i32 1) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_f16_s16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 0, <8 x i16> [[A:%.*]], i32 1) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_f16_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 0, <8 x i16> [[A:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vcvtq_n_f16_s16(int16x8_t a) { @@ -378,10 +549,15 @@ float16x8_t test_vcvtq_n_f16_s16(int16x8_t a) #endif } -// CHECK-LABEL: @test_vcvtq_n_f16_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 1, <8 x i16> [[A:%.*]], i32 2) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_f16_u16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 1, <8 x i16> [[A:%.*]], i32 2) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_f16_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 1, <8 x i16> [[A:%.*]], i32 2) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) { @@ -392,10 +568,15 @@ float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) #endif } -// CHECK-LABEL: @test_vcvtq_n_f32_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 0, <4 x i32> [[A:%.*]], i32 3) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_f32_s32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 0, <4 x i32> [[A:%.*]], i32 3) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_f32_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 0, <4 x i32> [[A:%.*]], i32 3) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { @@ -406,10 +587,15 @@ float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) #endif } -// CHECK-LABEL: @test_vcvtq_n_f32_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 1, <4 x i32> [[A:%.*]], i32 32) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_f32_u32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 1, <4 x i32> [[A:%.*]], i32 32) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_f32_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 1, <4 x i32> [[A:%.*]], i32 32) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { @@ -420,52 +606,79 @@ float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) #endif } -// CHECK-LABEL: @test_vcvtq_n_s16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 0, <8 x half> [[A:%.*]], i32 1) -// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_s16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 0, <8 x half> [[A:%.*]], i32 1) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_s16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 0, <8 x half> [[A:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vcvtq_n_s16_f16(float16x8_t a) { return vcvtq_n_s16_f16(a, 1); } -// CHECK-LABEL: @test_vcvtq_n_u16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 1, <8 x half> [[A:%.*]], i32 2) -// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_u16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 1, <8 x half> [[A:%.*]], i32 2) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_u16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 1, <8 x half> [[A:%.*]], i32 2) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vcvtq_n_u16_f16(float16x8_t a) { return vcvtq_n_u16_f16(a, 2); } -// CHECK-LABEL: @test_vcvtq_n_s32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 0, <4 x float> [[A:%.*]], i32 3) -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_s32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 0, <4 x float> [[A:%.*]], i32 3) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_s32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 0, <4 x float> [[A:%.*]], i32 3) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { return vcvtq_n_s32_f32(a, 3); } -// CHECK-LABEL: @test_vcvtq_n_u32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 1, <4 x float> [[A:%.*]], i32 32) -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_n_u32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 1, <4 x float> [[A:%.*]], i32 32) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_n_u32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 1, <4 x float> [[A:%.*]], i32 32) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { return vcvtq_n_u32_f32(a, 32); } -// CHECK-LABEL: @test_vcvtq_m_n_f16_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_f16_s16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_f16_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_m_n_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p) { @@ -476,12 +689,19 @@ float16x8_t test_vcvtq_m_n_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16 #endif } -// CHECK-LABEL: @test_vcvtq_m_n_f16_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_f16_u16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_f16_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_m_n_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p) { @@ -492,12 +712,19 @@ float16x8_t test_vcvtq_m_n_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred1 #endif } -// CHECK-LABEL: @test_vcvtq_m_n_f32_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_f32_s32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_f32_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_m_n_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p) { @@ -508,12 +735,19 @@ float32x4_t test_vcvtq_m_n_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16 #endif } -// CHECK-LABEL: @test_vcvtq_m_n_f32_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_f32_u32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_f32_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_m_n_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p) { @@ -524,12 +758,19 @@ float32x4_t test_vcvtq_m_n_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred1 #endif } -// CHECK-LABEL: @test_vcvtq_m_n_s16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_s16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_s16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vcvtq_m_n_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -540,12 +781,19 @@ int16x8_t test_vcvtq_m_n_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t #endif } -// CHECK-LABEL: @test_vcvtq_m_n_u16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_u16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_u16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vcvtq_m_n_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -556,12 +804,19 @@ uint16x8_t test_vcvtq_m_n_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16 #endif } -// CHECK-LABEL: @test_vcvtq_m_n_s32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_s32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_s32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vcvtq_m_n_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -572,12 +827,19 @@ int32x4_t test_vcvtq_m_n_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t #endif } -// CHECK-LABEL: @test_vcvtq_m_n_u32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_n_u32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_m_n_u32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vcvtq_m_n_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -588,12 +850,19 @@ uint32x4_t test_vcvtq_m_n_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16 #endif } -// CHECK-LABEL: @test_vcvtq_x_n_f16_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> undef, <8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_f16_s16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> undef, <8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_f16_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> undef, <8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_x_n_f16_s16(int16x8_t a, mve_pred16_t p) { @@ -604,12 +873,19 @@ float16x8_t test_vcvtq_x_n_f16_s16(int16x8_t a, mve_pred16_t p) #endif } -// CHECK-LABEL: @test_vcvtq_x_n_f16_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> undef, <8 x i16> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_f16_u16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> undef, <8 x i16> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_f16_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> undef, <8 x i16> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vcvtq_x_n_f16_u16(uint16x8_t a, mve_pred16_t p) { @@ -620,12 +896,19 @@ float16x8_t test_vcvtq_x_n_f16_u16(uint16x8_t a, mve_pred16_t p) #endif } -// CHECK-LABEL: @test_vcvtq_x_n_f32_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> undef, <4 x i32> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_f32_s32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> undef, <4 x i32> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_f32_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> undef, <4 x i32> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_x_n_f32_s32(int32x4_t a, mve_pred16_t p) { @@ -636,12 +919,19 @@ float32x4_t test_vcvtq_x_n_f32_s32(int32x4_t a, mve_pred16_t p) #endif } -// CHECK-LABEL: @test_vcvtq_x_n_f32_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> undef, <4 x i32> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_f32_u32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> undef, <4 x i32> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_f32_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> undef, <4 x i32> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtq_x_n_f32_u32(uint32x4_t a, mve_pred16_t p) { @@ -652,118 +942,186 @@ float32x4_t test_vcvtq_x_n_f32_u32(uint32x4_t a, mve_pred16_t p) #endif } -// CHECK-LABEL: @test_vcvtq_x_n_s16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_s16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_s16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vcvtq_x_n_s16_f16(float16x8_t a, mve_pred16_t p) { return vcvtq_x_n_s16_f16(a, 1, p); } -// CHECK-LABEL: @test_vcvtq_x_n_u16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_u16_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_u16_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> [[A:%.*]], i32 2, <8 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vcvtq_x_n_u16_f16(float16x8_t a, mve_pred16_t p) { return vcvtq_x_n_u16_f16(a, 2, p); } -// CHECK-LABEL: @test_vcvtq_x_n_s32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_s32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_s32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> [[A:%.*]], i32 3, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vcvtq_x_n_s32_f32(float32x4_t a, mve_pred16_t p) { return vcvtq_x_n_s32_f32(a, 3, p); } -// CHECK-LABEL: @test_vcvtq_x_n_u32_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtq_x_n_u32_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtq_x_n_u32_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> [[A:%.*]], i32 32, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vcvtq_x_n_u32_f32(float32x4_t a, mve_pred16_t p) { return vcvtq_x_n_u32_f32(a, 32, p); } -// CHECK-LABEL: @test_vcvtbq_f32_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 0) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvtbq_f32_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 0) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvtbq_f32_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 0) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vcvtbq_f32_f16(float16x8_t a) { return vcvtbq_f32_f16(a); } -// CHECK-LABEL: @test_vcvttq_f32_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 1) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vcvttq_f32_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 1) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vcvttq_f32_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 1) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vcvttq_f32_f16(float16x8_t a) { return vcvttq_f32_f16(a); } -// CHECK-LABEL: @test_vcvtbq_m_f32_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtbq_m_f32_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtbq_m_f32_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtbq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtbq_m_f32_f16(inactive, a, p); } -// CHECK-LABEL: @test_vcvttq_m_f32_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvttq_m_f32_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvttq_m_f32_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvttq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p) { return vcvttq_m_f32_f16(inactive, a, p); } -// CHECK-LABEL: @test_vcvtbq_x_f32_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvtbq_x_f32_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvtbq_x_f32_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvtbq_x_f32_f16(float16x8_t a, mve_pred16_t p) { return vcvtbq_x_f32_f16(a, p); } -// CHECK-LABEL: @test_vcvttq_x_f32_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vcvttq_x_f32_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vcvttq_x_f32_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vcvttq_x_f32_f16(float16x8_t a, mve_pred16_t p) { return vcvttq_x_f32_f16(a, p); } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 8fde56a0bb5ec..6bade9d2373ec 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -1294,10 +1294,13 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D, return GenIRIntBase(Op); } else if (Op->isSubClassOf("strictFPAlt")) { auto StardardBuilder = Op->getValueAsDef("standard"); - Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilder") + Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilderBase") ? GenIRBuilderBase(StardardBuilder) : GenIRIntBase(StardardBuilder); - Result::Ptr StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp")); + auto StrictBuilder = Op->getValueAsDef("strictfp"); + Result::Ptr StrictFp = StrictBuilder->isSubClassOf("IRBuilderBase") + ? GenIRBuilderBase(StrictBuilder) + : GenIRIntBase(StrictBuilder); return std::make_shared<StrictFpAltResult>(Standard, StrictFp); } else { PrintFatalError("Unsupported dag node " + Op->getName()); diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 3b475c8d5614d..773ab9bbb5b68 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1304,6 +1304,12 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated< [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */], LLVMMatchType<0>, llvm_anyvector_ty>; +def int_arm_mve_vcvt_fp_int: DefaultAttrsIntrinsic< + [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */], + [IntrNoMem]>; +def int_arm_mve_vcvt_int_fp: DefaultAttrsIntrinsic< + [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */], + [IntrNoMem]>; def int_arm_mve_vcvt_fp_int_predicated: DefaultAttrsIntrinsic< [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */], diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 097318711d137..1b10310d1efe8 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4045,7 +4045,7 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned, } multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src, - SDNode unpred_op> { + SDNode unpred_op, SDPatternOperator unpred_intrinsic> { defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u")); defvar ToInt = !eq(Src.SuffixLetter,"f"); @@ -4056,6 +4056,8 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src, let Predicates = [HasMVEFloat] in { def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))), (Dest.Vec (Inst (Src.Vec MQPR:$src)))>; + def : Pat<(Dest.Vec (unpred_intrinsic (Src.Vec MQPR:$src), (i32 Unsigned))), + (Dest.Vec (Inst (Src.Vec MQPR:$src)))>; def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated (Src.Vec MQPR:$src), (i32 Unsigned), (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))), @@ -4066,15 +4068,15 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src, } // The unsuffixed VCVT for float->int implicitly rounds toward zero, // which I reflect here in the llvm instruction names -defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>; -defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>; -defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>; -defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>; +defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint, int_arm_mve_vcvt_int_fp>; +defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint, int_arm_mve_vcvt_int_fp>; +defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint, int_arm_mve_vcvt_int_fp>; +defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint, int_arm_mve_vcvt_int_fp>; // Whereas VCVT for int->float rounds to nearest -defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>; -defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>; -defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>; -defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>; +defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp, int_arm_mve_vcvt_fp_int>; +defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp, int_arm_mve_vcvt_fp_int>; +defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp, int_arm_mve_vcvt_fp_int>; +defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp, int_arm_mve_vcvt_fp_int>; let Predicates = [HasMVEFloat] in { def : Pat<(v4i32 (fp_to_sint_sat v4f32:$src, i32)), diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll index 9e42f3984c24d..708e18e4f2389 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll @@ -329,4 +329,85 @@ entry: ret <4 x float> %2 } + +define arm_aapcs_vfpcc <8 x half> @test_vcvtq_f16_s16(<8 x i16> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_f16_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.f16.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> %a, i32 0) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vcvtq_f16_u16(<8 x i16> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_f16_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.f16.u16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> %a, i32 1) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vcvtq_f32_s32(<4 x i32> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_f32_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> %a, i32 0) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vcvtq_f32_u32(<4 x i32> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_f32_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.f32.u32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> %a, i32 1) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_s16_f16(<8 x half> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_s16_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.s16.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> %a, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_s32_f32(<4 x float> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_s32_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> %a, i32 0) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_u16_f16(<8 x half> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_u16_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.u16.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> %a, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_u32_f32(<4 x float> noundef %a) #0 { +; CHECK-LABEL: test_vcvtq_u32_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> %a, i32 1) + ret <4 x i32> %0 +} + attributes #0 = { strictfp } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
