https://github.com/davemgreen updated 
https://github.com/llvm/llvm-project/pull/169771

>From b4db38a6e5ff3cf4267c0a98f7e6d7d20a22005a Mon Sep 17 00:00:00 2001
From: David Green <[email protected]>
Date: Thu, 27 Nov 2025 09:19:23 +0000
Subject: [PATCH] [ARM] Introduce intrinsics for MVE fma under strict-fp.

Similar to #169156, this adds an @arm.mve.fma intrinsic for strict-fp. A
Builder class is added to act as the common subclass of IRBuilder and IRInt.
---
 clang/include/clang/Basic/arm_mve.td          |    6 +-
 clang/include/clang/Basic/arm_mve_defs.td     |    9 +-
 .../test/CodeGen/arm-mve-intrinsics/ternary.c | 1012 +++++++++++------
 clang/utils/TableGen/MveEmitter.cpp           |   15 +-
 llvm/include/llvm/IR/IntrinsicsARM.td         |    3 +
 llvm/lib/Target/ARM/ARMInstrMVE.td            |   12 +
 .../mve-intrinsics/strict-intrinsics.ll       |   98 ++
 7 files changed, 825 insertions(+), 330 deletions(-)

diff --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 2e5e1d93be096..51b7dd16e5195 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -167,7 +167,9 @@ multiclass FMA<bit add> {
   // second multiply input.
   defvar m2_cg = !if(add, (id $m2), (fneg $m2));
 
-  defvar unpred_cg = (IRIntBase<"fma", [Vector]> $m1, m2_cg, $addend);
+  defvar fma = strictFPAlt<IRIntBase<"fma", [Vector]>,
+                           IRInt<"fma", [Vector]>>;
+  defvar unpred_cg = (fma $m1, m2_cg, $addend);
   defvar pred_cg   = (IRInt<"fma_predicated", [Vector, Predicate]>
                           $m1, m2_cg, $addend, $pred);
 
@@ -723,7 +725,7 @@ multiclass compare_with_pred<string condname, dag arguments,
        NameOverride<"vcmp" # condname # "q_m" # suffix>;
 }
 
-multiclass compare<string condname, IRBuilder cmpop> {
+multiclass compare<string condname, Builder cmpop> {
   // Make all four variants of a comparison: the vector/vector and
   // vector/scalar forms, each using compare_with_pred to make a
   // predicated and unpredicated version.
diff --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index eeca9153dd742..3714262898476 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -34,7 +34,8 @@ class IRBuilderAddrParam<int index_> : IRBuilderParam<index_>;
 class IRBuilderIntParam<int index_, string type_> : IRBuilderParam<index_> {
   string type = type_;
 }
-class IRBuilderBase {
+class Builder {}
+class IRBuilderBase : Builder {
   // The prefix of the function call, including an open parenthesis.
   string prefix;
 
@@ -166,7 +167,7 @@ def address;
 // Another node class you can use in the codegen dag. This one corresponds to
 // an IR intrinsic function, which has to be specialized to a particular list
 // of types.
-class IRIntBase<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
+class IRIntBase<string name_, list<Type> params_ = [], bit appendKind_ = 0> : 
Builder {
   string intname = name_;       // base name of the intrinsic
   list<Type> params = params_;  // list of parameter types
 
@@ -214,8 +215,8 @@ def bitsize;
 
 // strictFPAlt allows a node to have different code generation under strict-fp.
 // TODO: The standard node can be IRBuilderBase or IRIntBase.
-class strictFPAlt<IRBuilderBase standard_, IRIntBase strictfp_> {
-  IRBuilderBase standard = standard_;
+class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder {
+  Builder standard = standard_;
   IRIntBase strictfp = strictfp_;
 }
 
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c 
b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
index 768d397cb5611..3ab84459e0515 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
@@ -1,15 +1,22 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - 
%s | opt -S -passes=sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - 
%s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=sroa | 
FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S 
-passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vfmaq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> 
[[B:%.*]], <8 x half> [[C:%.*]], <8 x half> [[A:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x 
half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x half> 
[[A:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
 #ifdef POLYMORPHIC
@@ -19,10 +26,15 @@ float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, 
float16x8_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmaq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> 
[[B:%.*]], <4 x float> [[C:%.*]], <4 x float> [[A:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 
x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x 
float> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
 #ifdef POLYMORPHIC
@@ -32,12 +44,19 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, 
float32x4_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmaq_n_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, 
half [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> 
[[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_n_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x 
half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_n_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x 
half> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
 #ifdef POLYMORPHIC
@@ -47,12 +66,19 @@ float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, 
float16_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmaq_n_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, 
float [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> 
[[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_n_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 
x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_n_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x 
float> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
 #ifdef POLYMORPHIC
@@ -62,12 +88,19 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, 
float32_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmasq_n_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, 
half [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> 
[[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmasq_n_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x 
half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmasq_n_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> 
[[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
 #ifdef POLYMORPHIC
@@ -77,12 +110,19 @@ float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t 
b, float16_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmasq_n_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, 
float [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> 
[[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vfmasq_n_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 
x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vfmasq_n_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x 
float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vfmasq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
 #ifdef POLYMORPHIC
@@ -92,11 +132,17 @@ float32x4_t test_vfmasq_n_f32(float32x4_t a, float32x4_t 
b, float32_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmsq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> 
[[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[A:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vfmsq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x 
half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vfmsq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> 
[[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP1]]
 //
 float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
 #ifdef POLYMORPHIC
@@ -106,11 +152,17 @@ float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, 
float16x8_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmsq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> 
[[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[A:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vfmsq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 
x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vfmsq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x 
float> [[A:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP1]]
 //
 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
 #ifdef POLYMORPHIC
@@ -312,11 +364,17 @@ uint32x4_t test_vmlasq_n_u32(uint32x4_t a, uint32x4_t b, 
uint32_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlahq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP1]]
 //
 int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
 #ifdef POLYMORPHIC
@@ -326,11 +384,17 @@ int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, 
int8_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlahq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 
x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP1]]
 //
 int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
 #ifdef POLYMORPHIC
@@ -340,10 +404,15 @@ int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, 
int16_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlahq_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 
x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
 #ifdef POLYMORPHIC
@@ -353,11 +422,17 @@ int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, 
int32_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlashq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 
[[TMP0]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlashq_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlashq_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP1]]
 //
 int8x16_t test_vqdmlashq_n_s8(int8x16_t m1, int8x16_t m2, int8_t add) {
 #ifdef POLYMORPHIC
@@ -367,11 +442,17 @@ int8x16_t test_vqdmlashq_n_s8(int8x16_t m1, int8x16_t m2, 
int8_t add) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlashq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.v8i16(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 
[[TMP0]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlashq_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.v8i16(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlashq_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.v8i16(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP1]]
 //
 int16x8_t test_vqdmlashq_n_s16(int16x8_t m1, int16x8_t m2, int16_t add) {
 #ifdef POLYMORPHIC
@@ -381,10 +462,15 @@ int16x8_t test_vqdmlashq_n_s16(int16x8_t m1, int16x8_t 
m2, int16_t add) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlashq_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.v4i32(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 
[[ADD:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlashq_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.v4i32(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 
[[ADD:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlashq_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.v4i32(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 
[[ADD:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vqdmlashq_n_s32(int32x4_t m1, int32x4_t m2, int32_t add) {
 #ifdef POLYMORPHIC
@@ -394,11 +480,17 @@ int32x4_t test_vqdmlashq_n_s32(int32x4_t m1, int32x4_t 
m2, int32_t add) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlahq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlahq_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlahq_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP1]]
 //
 int8x16_t test_vqrdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
 #ifdef POLYMORPHIC
@@ -408,11 +500,17 @@ int8x16_t test_vqrdmlahq_n_s8(int8x16_t a, int8x16_t b, 
int8_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlahq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlahq_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlahq_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP1]]
 //
 int16x8_t test_vqrdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
 #ifdef POLYMORPHIC
@@ -422,10 +520,15 @@ int16x8_t test_vqrdmlahq_n_s16(int16x8_t a, int16x8_t b, 
int16_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlahq_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlahq_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlahq_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vqrdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
 #ifdef POLYMORPHIC
@@ -435,11 +538,17 @@ int32x4_t test_vqrdmlahq_n_s32(int32x4_t a, int32x4_t b, 
int32_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlashq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlash.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlashq_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlash.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlashq_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlash.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP1]]
 //
 int8x16_t test_vqrdmlashq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
 #ifdef POLYMORPHIC
@@ -449,11 +558,17 @@ int8x16_t test_vqrdmlashq_n_s8(int8x16_t a, int8x16_t b, 
int8_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlashq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlash.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlashq_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlash.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlashq_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlash.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
[[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP1]]
 //
 int16x8_t test_vqrdmlashq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
 #ifdef POLYMORPHIC
@@ -463,10 +578,15 @@ int16x8_t test_vqrdmlashq_n_s16(int16x8_t a, int16x8_t b, 
int16_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlashq_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlash.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlashq_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlash.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlashq_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlash.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
[[C:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vqrdmlashq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
 #ifdef POLYMORPHIC
@@ -476,12 +596,19 @@ int32x4_t test_vqrdmlashq_n_s32(int32x4_t a, int32x4_t b, 
int32_t c) {
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmaq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[C:%.*]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[C:%.*]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[C:%.*]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vfmaq_m_f16(float16x8_t a, float16x8_t b, float16x8_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -491,12 +618,19 @@ float16x8_t test_vfmaq_m_f16(float16x8_t a, float16x8_t 
b, float16x8_t c, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmaq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[C:%.*]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[C:%.*]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[C:%.*]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vfmaq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -506,14 +640,23 @@ float32x4_t test_vfmaq_m_f32(float32x4_t a, float32x4_t 
b, float32x4_t c, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmaq_m_n_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, 
half [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_m_n_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_m_n_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vfmaq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -523,14 +666,23 @@ float16x8_t test_vfmaq_m_n_f16(float16x8_t a, float16x8_t 
b, float16_t c, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmaq_m_n_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, 
float [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[DOTSPLAT]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vfmaq_m_n_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[DOTSPLAT]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vfmaq_m_n_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[DOTSPLAT]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vfmaq_m_n_f32(float32x4_t a, float32x4_t b, float32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -540,15 +692,25 @@ float32x4_t test_vfmaq_m_n_f32(float32x4_t a, float32x4_t 
b, float32_t c, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmasq_m_n_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, 
half [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> 
[[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
-// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x half> 
[[TMP2]], <8 x half> [[A]]
-// CHECK-NEXT:    ret <8 x half> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vfmasq_m_n_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x 
half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x half> 
[[TMP2]], <8 x half> [[A]]
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vfmasq_m_n_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> 
[[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x half> 
[[TMP2]], <8 x half> [[A]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP3]]
 //
 float16x8_t test_vfmasq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -558,15 +720,25 @@ float16x8_t test_vfmasq_m_n_f16(float16x8_t a, 
float16x8_t b, float16_t c, mve_p
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmasq_m_n_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, 
float [[C:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> 
[[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
-// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x float> 
[[TMP2]], <4 x float> [[A]]
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vfmasq_m_n_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 
x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x 
float> [[TMP2]], <4 x float> [[A]]
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vfmasq_m_n_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[C:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x 
float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x float> 
[[TMP2]], <4 x float> [[A]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP3]]
 //
 float32x4_t test_vfmasq_m_n_f32(float32x4_t a, float32x4_t b, float32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -576,13 +748,21 @@ float32x4_t test_vfmasq_m_n_f32(float32x4_t a, 
float32x4_t b, float32_t c, mve_p
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmsq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[TMP0]], <8 x half> [[A:%.*]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x half> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vfmsq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[TMP0]], <8 x half> [[A:%.*]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vfmsq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[C:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x half> 
@llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> 
[[TMP0]], <8 x half> [[A:%.*]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP3]]
 //
 float16x8_t test_vfmsq_m_f16(float16x8_t a, float16x8_t b, float16x8_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -592,13 +772,21 @@ float16x8_t test_vfmsq_m_f16(float16x8_t a, float16x8_t 
b, float16x8_t c, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vfmsq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[TMP0]], <4 x float> [[A:%.*]], <4 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vfmsq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[TMP0]], <4 x float> [[A:%.*]], <4 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vfmsq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = fneg <4 x float> [[C:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <4 x float> 
@llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> 
[[TMP0]], <4 x float> [[A:%.*]], <4 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP3]]
 //
 float32x4_t test_vfmsq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -608,13 +796,21 @@ float32x4_t test_vfmsq_m_f32(float32x4_t a, float32x4_t 
b, float32x4_t c, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlaq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlaq_m_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlaq_m_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 int8x16_t test_vmlaq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t 
p) {
 #ifdef POLYMORPHIC
@@ -624,13 +820,21 @@ int8x16_t test_vmlaq_m_n_s8(int8x16_t a, int8x16_t b, 
int8_t c, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlaq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlaq_m_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlaq_m_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vmlaq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t 
p) {
 #ifdef POLYMORPHIC
@@ -640,12 +844,19 @@ int16x8_t test_vmlaq_m_n_s16(int16x8_t a, int16x8_t b, 
int16_t c, mve_pred16_t p
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlaq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmlaq_m_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmlaq_m_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vmlaq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t 
p) {
 #ifdef POLYMORPHIC
@@ -655,13 +866,21 @@ int32x4_t test_vmlaq_m_n_s32(int32x4_t a, int32x4_t b, 
int32_t c, mve_pred16_t p
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlaq_m_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlaq_m_n_u8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlaq_m_n_u8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 uint8x16_t test_vmlaq_m_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -671,13 +890,21 @@ uint8x16_t test_vmlaq_m_n_u8(uint8x16_t a, uint8x16_t b, 
uint8_t c, mve_pred16_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlaq_m_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlaq_m_n_u16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlaq_m_n_u16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 uint16x8_t test_vmlaq_m_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -687,12 +914,19 @@ uint16x8_t test_vmlaq_m_n_u16(uint16x8_t a, uint16x8_t b, 
uint16_t c, mve_pred16
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlaq_m_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmlaq_m_n_u32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmlaq_m_n_u32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vmlaq_m_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -702,13 +936,21 @@ uint32x4_t test_vmlaq_m_n_u32(uint32x4_t a, uint32x4_t b, 
uint32_t c, mve_pred16
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlasq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlasq_m_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlasq_m_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 int8x16_t test_vmlasq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t 
p) {
 #ifdef POLYMORPHIC
@@ -718,13 +960,21 @@ int8x16_t test_vmlasq_m_n_s8(int8x16_t a, int8x16_t b, 
int8_t c, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlasq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlasq_m_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlasq_m_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vmlasq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -734,12 +984,19 @@ int16x8_t test_vmlasq_m_n_s16(int16x8_t a, int16x8_t b, 
int16_t c, mve_pred16_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlasq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmlasq_m_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmlasq_m_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vmlasq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -749,13 +1006,21 @@ int32x4_t test_vmlasq_m_n_s32(int32x4_t a, int32x4_t b, 
int32_t c, mve_pred16_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlasq_m_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlasq_m_n_u8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlasq_m_n_u8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 uint8x16_t test_vmlasq_m_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -765,13 +1030,21 @@ uint8x16_t test_vmlasq_m_n_u8(uint8x16_t a, uint8x16_t 
b, uint8_t c, mve_pred16_
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlasq_m_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vmlasq_m_n_u16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vmlasq_m_n_u16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 uint16x8_t test_vmlasq_m_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -781,12 +1054,19 @@ uint16x8_t test_vmlasq_m_n_u16(uint16x8_t a, uint16x8_t 
b, uint16_t c, mve_pred1
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmlasq_m_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmlasq_m_n_u32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmlasq_m_n_u32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vmlasq_m_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -796,13 +1076,21 @@ uint32x4_t test_vmlasq_m_n_u32(uint32x4_t a, uint32x4_t 
b, uint32_t c, mve_pred1
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlahq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_m_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_m_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 int8x16_t test_vqdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -812,13 +1100,21 @@ int8x16_t test_vqdmlahq_m_n_s8(int8x16_t a, int8x16_t b, 
int8_t c, mve_pred16_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlahq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_m_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_m_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vqdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -828,12 +1124,19 @@ int16x8_t test_vqdmlahq_m_n_s16(int16x8_t a, int16x8_t 
b, int16_t c, mve_pred16_
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlahq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlahq_m_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlahq_m_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vqdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -843,13 +1146,21 @@ int32x4_t test_vqdmlahq_m_n_s32(int32x4_t a, int32x4_t 
b, int32_t c, mve_pred16_
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlashq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> [[M1:%.*]], <16 x i8> 
[[M2:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlashq_m_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> [[M1:%.*]], <16 x i8> 
[[M2:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlashq_m_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> [[M1:%.*]], <16 x i8> 
[[M2:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 int8x16_t test_vqdmlashq_m_n_s8(int8x16_t m1, int8x16_t m2, int8_t add, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -859,13 +1170,21 @@ int8x16_t test_vqdmlashq_m_n_s8(int8x16_t m1, int8x16_t 
m2, int8_t add, mve_pred
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlashq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> [[M1:%.*]], <8 x i16> 
[[M2:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlashq_m_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> [[M1:%.*]], <8 x i16> 
[[M2:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlashq_m_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> [[M1:%.*]], <8 x i16> 
[[M2:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vqdmlashq_m_n_s16(int16x8_t m1, int16x8_t m2, int16_t add, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -875,12 +1194,19 @@ int16x8_t test_vqdmlashq_m_n_s16(int16x8_t m1, int16x8_t 
m2, int16_t add, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqdmlashq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> [[M1:%.*]], <4 x i32> 
[[M2:%.*]], i32 [[ADD:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vqdmlashq_m_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> [[M1:%.*]], <4 x i32> 
[[M2:%.*]], i32 [[ADD:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vqdmlashq_m_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> [[M1:%.*]], <4 x i32> 
[[M2:%.*]], i32 [[ADD:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vqdmlashq_m_n_s32(int32x4_t m1, int32x4_t m2, int32_t add, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -890,13 +1216,21 @@ int32x4_t test_vqdmlashq_m_n_s32(int32x4_t m1, int32x4_t 
m2, int32_t add, mve_pr
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlahq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlahq_m_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlahq_m_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 int8x16_t test_vqrdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -906,13 +1240,21 @@ int8x16_t test_vqrdmlahq_m_n_s8(int8x16_t a, int8x16_t 
b, int8_t c, mve_pred16_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlahq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlahq_m_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlahq_m_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vqrdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -922,12 +1264,19 @@ int16x8_t test_vqrdmlahq_m_n_s16(int16x8_t a, int16x8_t 
b, int16_t c, mve_pred16
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlahq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlahq_m_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlahq_m_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vqrdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -937,13 +1286,21 @@ int32x4_t test_vqrdmlahq_m_n_s32(int32x4_t a, int32x4_t 
b, int32_t c, mve_pred16
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlashq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlash.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlashq_m_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlash.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <16 x i8> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlashq_m_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlash.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <16 x i8> [[TMP3]]
 //
 int8x16_t test_vqrdmlashq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -953,13 +1310,21 @@ int8x16_t test_vqrdmlashq_m_n_s8(int8x16_t a, int8x16_t 
b, int8_t c, mve_pred16_
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlashq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlash.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlashq_m_n_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlash.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP3]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlashq_m_n_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlash.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vqrdmlashq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
@@ -969,12 +1334,19 @@ int16x8_t test_vqrdmlashq_m_n_s16(int16x8_t a, int16x8_t 
b, int16_t c, mve_pred1
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vqrdmlashq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlash.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vqrdmlashq_m_n_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlash.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vqrdmlashq_m_n_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlash.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> 
[[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vqrdmlashq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, 
mve_pred16_t p) {
 #ifdef POLYMORPHIC
diff --git a/clang/utils/TableGen/MveEmitter.cpp 
b/clang/utils/TableGen/MveEmitter.cpp
index 7681213d9675a..8fde56a0bb5ec 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -1260,7 +1260,9 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
     for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
       Args.push_back(getCodeForDagArg(D, i, Scope, Param));
 
-    auto GenIRBuilderBase = [&](const Record *Op) {
+    auto GenIRBuilderBase = [&](const Record *Op) -> Result::Ptr {
+      assert(Op->isSubClassOf("IRBuilderBase") &&
+             "Expected IRBuilderBase in GenIRBuilderBase\n");
       std::set<unsigned> AddressArgs;
       std::map<unsigned, std::string> IntegerArgs;
       for (const Record *sp : Op->getValueAsListOfDefs("special_params")) {
@@ -1274,7 +1276,9 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
       return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"),
                                                Args, AddressArgs, IntegerArgs);
     };
-    auto GenIRIntBase = [&](const Record *Op) {
+    auto GenIRIntBase = [&](const Record *Op) -> Result::Ptr {
+      assert(Op->isSubClassOf("IRIntBase") &&
+             "Expected IRIntBase in GenIRIntBase\n");
       std::vector<const Type *> ParamTypes;
       for (const Record *RParam : Op->getValueAsListOfDefs("params"))
         ParamTypes.push_back(getType(RParam, Param));
@@ -1289,8 +1293,11 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
     } else if (Op->isSubClassOf("IRIntBase")) {
       return GenIRIntBase(Op);
     } else if (Op->isSubClassOf("strictFPAlt")) {
-      auto Standard = GenIRBuilderBase(Op->getValueAsDef("standard"));
-      auto StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
+      auto StardardBuilder = Op->getValueAsDef("standard");
+      Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilder")
+                                 ? GenIRBuilderBase(StardardBuilder)
+                                 : GenIRIntBase(StardardBuilder);
+      Result::Ptr StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
       return std::make_shared<StrictFpAltResult>(Standard, StrictFp);
     } else {
       PrintFatalError("Unsupported dag node " + Op->getName());
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td 
b/llvm/include/llvm/IR/IntrinsicsARM.td
index ecadb235bec36..3787e2591a4c1 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1362,6 +1362,9 @@ def int_arm_mve_vqmovn_predicated: 
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
     llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
     llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
 
+def int_arm_mve_fma: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
+    LLVMMatchType<0> /* addend */], [IntrNoMem]>;
 // fma_predicated returns the add operand for disabled lanes.
 def int_arm_mve_fma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
    [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td 
b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 6da04c4ac6f18..f9aaacb7f5250 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3723,6 +3723,10 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, 
MVEVectorVTInfo VTI> {
     if fms then {
       def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
                 (Inst $add, $m1, $m2)>;
+      def : Pat<(VTI.Vec (int_arm_mve_fma (fneg m1), m2, add)),
+                (Inst $add, $m1, $m2)>;
+      def : Pat<(VTI.Vec (int_arm_mve_fma m1, (fneg m2), add)),
+                (Inst $add, $m1, $m2)>;
       def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
                                   (VTI.Vec (fma (fneg m1), m2, add)),
                                   add)),
@@ -3734,6 +3738,8 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, 
MVEVectorVTInfo VTI> {
     } else {
       def : Pat<(VTI.Vec (fma m1, m2, add)),
                 (Inst $add, $m1, $m2)>;
+      def : Pat<(VTI.Vec (int_arm_mve_fma m1, m2, add)),
+                (Inst $add, $m1, $m2)>;
       def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
                                   (VTI.Vec (fma m1, m2, add)),
                                   add)),
@@ -5672,6 +5678,8 @@ multiclass MVE_VFMA_qr_multi<string iname, 
MVEVectorVTInfo VTI,
     if scalar_addend then {
       def : Pat<(VTI.Vec (fma v1, v2, vs)),
                 (VTI.Vec (Inst v1, v2, is))>;
+      def : Pat<(VTI.Vec (int_arm_mve_fma v1, v2, vs)),
+                (VTI.Vec (Inst v1, v2, is))>;
       def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
                                   (VTI.Vec (fma v1, v2, vs)),
                                   v1)),
@@ -5681,6 +5689,10 @@ multiclass MVE_VFMA_qr_multi<string iname, 
MVEVectorVTInfo VTI,
                 (VTI.Vec (Inst v2, v1, is))>;
       def : Pat<(VTI.Vec (fma vs, v1, v2)),
                 (VTI.Vec (Inst v2, v1, is))>;
+      def : Pat<(VTI.Vec (int_arm_mve_fma v1, vs, v2)),
+                (VTI.Vec (Inst v2, v1, is))>;
+      def : Pat<(VTI.Vec (int_arm_mve_fma vs, v1, v2)),
+                (VTI.Vec (Inst v2, v1, is))>;
       def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
                                   (VTI.Vec (fma vs, v2, v1)),
                                   v1)),
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll 
b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
index 9c3a921ba2540..91d80925b6264 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
@@ -141,3 +141,101 @@ entry:
   %0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x 
float> %s)
   ret <4 x float> %0
 }
+
+define arm_aapcs_vfpcc <4 x float> @fma_v4f32(<4 x float> %dst, <4 x float> 
%s1, <4 x float> %s2) {
+; CHECK-LABEL: fma_v4f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vfma.f32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %s1, <4 x 
float> %s2, <4 x float> %dst)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fma_v8f16(<8 x half> %dst, <8 x half> %s1, 
<8 x half> %s2) {
+; CHECK-LABEL: fma_v8f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vfma.f16 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %s1, <8 x half> 
%s2, <8 x half> %dst)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @fma_n_v8f16(<4 x float> %s1, <4 x float> 
%s2, float %s3) {
+; CHECK-LABEL: fma_n_v8f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vfma.f32 q0, q1, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <4 x float> poison, float %s3, i32 0
+  %sp = shufflevector <4 x float> %i, <4 x float> poison, <4 x i32> 
zeroinitializer
+  %0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %s2, <4 x 
float> %sp, <4 x float> %s1)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fma_n_v4f32(<8 x half> %s1, <8 x half> %s2, 
half %s3) {
+; CHECK-LABEL: fma_n_v4f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f16 r0, s8
+; CHECK-NEXT:    vfma.f16 q0, q1, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <8 x half> poison, half %s3, i32 0
+  %sp = shufflevector <8 x half> %i, <8 x half> poison, <8 x i32> 
zeroinitializer
+  %0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %s2, <8 x half> 
%sp, <8 x half> %s1)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @fms_v4f32(<4 x float> %dst, <4 x float> 
%s1, <4 x float> %s2) {
+; CHECK-LABEL: fms_v4f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vfms.f32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %c = fneg <4 x float> %s1
+  %0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %c, <4 x 
float> %s2, <4 x float> %dst)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fms_v8f16(<8 x half> %dst, <8 x half> %s1, 
<8 x half> %s2) {
+; CHECK-LABEL: fms_v8f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vfms.f16 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %c = fneg <8 x half> %s1
+  %0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %c, <8 x half> 
%s2, <8 x half> %dst)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @fms_n_v8f16(<4 x float> %s1, <4 x float> 
%s2, float %s3) {
+; CHECK-LABEL: fms_n_v8f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vdup.32 q2, r0
+; CHECK-NEXT:    vfms.f32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %c = fneg <4 x float> %s2
+  %i = insertelement <4 x float> poison, float %s3, i32 0
+  %sp = shufflevector <4 x float> %i, <4 x float> poison, <4 x i32> 
zeroinitializer
+  %0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %c, <4 x 
float> %sp, <4 x float> %s1)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fms_n_v4f32(<8 x half> %s1, <8 x half> %s2, 
half %s3) {
+; CHECK-LABEL: fms_n_v4f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f16 r0, s8
+; CHECK-NEXT:    vdup.16 q2, r0
+; CHECK-NEXT:    vfms.f16 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %c = fneg <8 x half> %s2
+  %i = insertelement <8 x half> poison, half %s3, i32 0
+  %sp = shufflevector <8 x half> %i, <8 x half> poison, <8 x i32> 
zeroinitializer
+  %0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %c, <8 x half> 
%sp, <8 x half> %s1)
+  ret <8 x half> %0
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to