[llvm-branch-commits] [llvm] AMDGPU: Fix src2_modifiers for v_dot2_f32_f16/bf16 on gfx11+ (PR #179224)

Petar Avramovic via llvm-branch-commits Wed, 18 Mar 2026 05:21:26 -0700

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179224


>From e42fd5ee0c5dfac3fe9f5114cb2f43bce4049d16 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <[email protected]>
Date: Wed, 18 Mar 2026 12:32:12 +0100
Subject: [PATCH] AMDGPU: Fix src2_modifiers for v_dot2_f32_f16/bf16

---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td         |  4 +++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  9 +++++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |  1 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 16 +++++++++
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |  2 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |  2 ++
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td   | 28 ++++++++++++---
 llvm/lib/Target/AMDGPU/VOPInstructions.td     |  6 ++--
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll      | 15 ++++----
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 35 ++++++++-----------
 10 files changed, 81 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 84c0348c1d611..de8722841d3fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,6 +51,10 @@ def gi_vop3pmodsdot :
     GIComplexOperandMatcher<s32, "selectVOP3PModsDOT">,
     GIComplexPatternEquiv<VOP3PModsDOT>;
 
+def gi_vop3pmodsf32 :
+    GIComplexOperandMatcher<s32, "selectVOP3PModsF32">,
+    GIComplexPatternEquiv<VOP3PModsF32>;
+
 def gi_wmmaopselvop3pmods :
     GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
     GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 749450aaf0344..613dcfeb646a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
+                                            SDValue &SrcMods) const {
+  SelectVOP3Mods(In, Src, SrcMods);
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  Mods |= cast<ConstantSDNode>(SrcMods)->getZExtValue();
+  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+  return true;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
                                                   SDValue &Src) const {
   const ConstantSDNode *C = cast<ConstantSDNode>(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index ffeb6dfdb3f90..8b12d1d2a800f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,6 +233,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
                        bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index eb0b05a45d47d..32487094efab3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5269,6 +5269,22 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PModsF32(MachineOperand &Root) const {
+  Register Src = Root.getReg();
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+    unsigned ModsImpl;
+    std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Root.getReg());
+    Mods |= ModsImpl;
+  }
+
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+  }};
+}
+
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
     MachineOperand &Root) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index cc121632e101d..2c9ecc207d8bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -200,6 +200,8 @@ class AMDGPUInstructionSelector final : public 
InstructionSelector {
 
   InstructionSelector::ComplexRendererFns
   selectVOP3PModsDOT(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectVOP3PModsF32(MachineOperand &Root) const;
 
   InstructionSelector::ComplexRendererFns
   selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index d9b40beaf7318..229cac30d4165 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1706,6 +1706,8 @@ def VOP3OMods : ComplexPattern<untyped, 3, 
"SelectVOP3OMods">;
 def VOP3PMods  : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
 
 def VOP3PModsDOT  : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
+def VOP3PModsF32  : ComplexPattern<untyped, 2, "SelectVOP3PModsF32">;
+
 def WMMAOpSelVOP3PMods  : ComplexPattern<untyped, 1, 
"SelectWMMAOpSelVOP3PMods">;
 
 def WMMAModsF32NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td 
b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 333240e0f7ac2..cc08710a299a3 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -86,6 +86,21 @@ multiclass VOP3PInst<string OpName, VOPProfile P,
   } // end SubtargetPredicate = isGFX11Plus
 }
 
+multiclass VOP3PInstDotWithDual<string OpName, VOPProfile P,
+                                SDPatternOperator node = null_frag> {
+  def NAME : VOP3P_Pseudo<OpName, P,
+                          getVOP3PModPat<P, node,
+                                         1 /*HasExplicitClamp*/, 1/*IsDOT*/,
+                                         VOP3PModsDOT, VOP3PModsF32>.ret>;
+  let SubtargetPredicate = isGFX11Plus in {
+  if P.HasExtVOP3DPP then
+    def _dpp : VOP3_DPP_Pseudo<OpName, P> {
+      let VOP3P = 1;
+      let PseudoInstr = OpName #"_dpp";
+    }
+  } // end SubtargetPredicate = isGFX11Plus
+}
+
 // Non-packed instructions that use the VOP3P encoding.
 // VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
 multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> {
@@ -598,9 +613,11 @@ defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
 } // End OtherPredicates = [HasDot2Insts]
 
 let OtherPredicates = [HasDot10Insts] in
-defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
-  VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
-  AMDGPUfdot2, 1/*ExplicitClamp*/>;
+defm V_DOT2_F32_F16 :
+  VOP3PInstDotWithDual<"v_dot2_f32_f16",
+                       VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR,
+                                     /*HasDPP*/ 1>,
+                       AMDGPUfdot2>;
 
 let OtherPredicates = [HasDot7Insts] in {
 defm V_DOT4_U32_U8  : VOP3PInst<"v_dot4_u32_u8",
@@ -623,8 +640,9 @@ def DOT2_BF16_Profile
 
 let SubtargetPredicate = HasDot12Insts  in {
 
-defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
-  int_amdgcn_fdot2_f32_bf16, 1>;
+defm V_DOT2_F32_BF16 :
+  VOP3PInstDotWithDual<"v_dot2_f32_bf16", DOT2_BF16_Profile,
+                       int_amdgcn_fdot2_f32_bf16>;
 
 } // End SubtargetPredicate = HasDot12Insts
 
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td 
b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 9d56aa4ad5cb0..82545a472cf17 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1380,10 +1380,12 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator 
node> {
 
 class getVOP3PModPat<VOPProfile P, SDPatternOperator node, bit 
HasExplicitClamp,
                      bit IsDOT = 0,
-                     ComplexPattern SrcPat = !if(IsDOT, VOP3PModsDOT, 
VOP3PMods)> {
+                     ComplexPattern SrcPat = !if(IsDOT, VOP3PModsDOT,
+                                                        VOP3PMods),
+                     ComplexPattern Src2Pat = SrcPat> {
   dag src0_dag = (P.Src0VT (SrcPat P.Src0VT:$src0, i32:$src0_modifiers));
   dag src1_dag = (P.Src1VT (SrcPat P.Src1VT:$src1, i32:$src1_modifiers));
-  dag src2_dag = (P.Src2VT (SrcPat P.Src2VT:$src2, i32:$src2_modifiers));
+  dag src2_dag = (P.Src2VT (Src2Pat P.Src2VT:$src2, i32:$src2_modifiers));
   dag clamp_dag = (i1 timm:$clamp);
 
   list<dag> ret3 = [(set P.DstVT:$vdst,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
index ce5de94117210..6cfa02501adc5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
@@ -165,7 +165,7 @@ define float @v_fdot2_f32_bf16_neg_c(<2 x bfloat> %a, <2 x 
bfloat> %b, float %c)
 ;
 ; GFX11PLUS-LABEL: v_fdot2_f32_bf16_neg_c:
 ; GFX11PLUS:  ; %bb.0:
-; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_lo:[0,0,1]
   %neg.c = fneg float %c
   %r = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> 
%b, float %neg.c, i1 false)
   ret float %r
@@ -180,8 +180,7 @@ define float @v_fdot2_f32_bf16_abs_c(<2 x bfloat> %a, <2 x 
bfloat> %b, float %c)
 ;
 ; GFX11PLUS-LABEL: v_fdot2_f32_bf16_abs_c:
 ; GFX11PLUS:  ; %bb.0:
-; GFX11PLUS:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2
+; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_hi:[0,0,1]
   %abs.c = call float @llvm.fabs.f32(float %c)
   %r = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> 
%b, float %abs.c, i1 false)
   ret float %r
@@ -344,7 +343,7 @@ define float @v_fdot2_f32_bf16_neg_b_clamp(<2 x bfloat> %a, 
<2 x bfloat> %b, flo
 define float @v_fdot2_f32_bf16_neg_c_clamp(<2 x bfloat> %a, <2 x bfloat> %b, 
float %c) {
 ; GCN-LABEL: v_fdot2_f32_bf16_neg_c_clamp:
 ; GCN:  ; %bb.0:
-; GCN:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] clamp
+; GCN:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_lo:[0,0,1] clamp
   %neg.c = fneg float %c
   %r = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> 
%b, float %neg.c, i1 true)
   ret float %r
@@ -353,8 +352,7 @@ define float @v_fdot2_f32_bf16_neg_c_clamp(<2 x bfloat> %a, 
<2 x bfloat> %b, flo
 define float @v_fdot2_f32_bf16_abs_c_clamp(<2 x bfloat> %a, <2 x bfloat> %b, 
float %c) {
 ; GCN-LABEL: v_fdot2_f32_bf16_abs_c_clamp:
 ; GCN:  ; %bb.0:
-; GCN:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GCN:    v_dot2_f32_bf16 v0, v0, v1, v2 clamp
+; GCN:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_hi:[0,0,1] clamp
   %abs.c = call float @llvm.fabs.f32(float %c)
   %r = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> 
%b, float %abs.c, i1 true)
   ret float %r
@@ -682,7 +680,7 @@ define float @v_fdot2_f32_bf16_neg_c_dual(<2 x bfloat> %a, 
<2 x bfloat> %b, floa
 ;
 ; GFX11PLUS-LABEL: v_fdot2_f32_bf16_neg_c_dual:
 ; GFX11PLUS:  ; %bb.0:
-; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_lo:[0,0,1]
 ; GFX11PLUS:    v_dot2_f32_bf16 v1, v3, v4, v5
 ; GFX11PLUS:    v_add_f32_e32 v0, v0, v1
   %neg.c = fneg float %c
@@ -702,8 +700,7 @@ define float @v_fdot2_f32_bf16_abs_c_dual(<2 x bfloat> %a, 
<2 x bfloat> %b, floa
 ;
 ; GFX11PLUS-LABEL: v_fdot2_f32_bf16_abs_c_dual:
 ; GFX11PLUS:  ; %bb.0:
-; GFX11PLUS:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2
+; GFX11PLUS:    v_dot2_f32_bf16 v0, v0, v1, v2 neg_hi:[0,0,1]
 ; GFX11PLUS:    v_dot2_f32_bf16 v1, v3, v4, v5
 ; GFX11PLUS:    v_add_f32_e32 v0, v0, v1
   %abs.c = call float @llvm.fabs.f32(float %c)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
index 3312f29470066..c0f1240e4ef05 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
@@ -290,7 +290,7 @@ define float @v_fdot2_neg_b_hi(<2 x half> %a, <2 x half> 
%b, float %c) {
 define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) {
 ; GFX906-LABEL: v_fdot2_neg_c:
 ; GFX906:  ; %bb.0:
-; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
 ;
 ; GFX950-LABEL: v_fdot2_neg_c:
 ; GFX950:  ; %bb.0:
@@ -312,11 +312,11 @@ define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, 
float %c) {
 ;
 ; GFX1170-LABEL: v_fdot2_neg_c:
 ; GFX1170:  ; %bb.0:
-; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
 ;
 ; GFX12-LABEL: v_fdot2_neg_c:
 ; GFX12:  ; %bb.0:
-; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
   %neg.c = fneg float %c
   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 
%neg.c, i1 false)
   ret float %r
@@ -325,8 +325,7 @@ define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, 
float %c) {
 define float @v_fdot2_abs_c(<2 x half> %a, <2 x half> %b, float %c) {
 ; GFX906-LABEL: v_fdot2_abs_c:
 ; GFX906:  ; %bb.0:
-; GFX906:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2
+; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
 ;
 ; GFX950-LABEL: v_fdot2_abs_c:
 ; GFX950:  ; %bb.0:
@@ -348,13 +347,11 @@ define float @v_fdot2_abs_c(<2 x half> %a, <2 x half> %b, 
float %c) {
 ;
 ; GFX1170-LABEL: v_fdot2_abs_c:
 ; GFX1170:  ; %bb.0:
-; GFX1170:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2
+; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
 ;
 ; GFX12-LABEL: v_fdot2_abs_c:
 ; GFX12:  ; %bb.0:
-; GFX12:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2
+; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
   %abs.c = call float @llvm.fabs.f32(float %c)
   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 
%abs.c, i1 false)
   ret float %r
@@ -637,7 +634,7 @@ define float @v_fdot2_neg_b_clamp(<2 x half> %a, <2 x half> 
%b, float %c) {
 define float @v_fdot2_neg_c_clamp(<2 x half> %a, <2 x half> %b, float %c) {
 ; GCN-LABEL: v_fdot2_neg_c_clamp:
 ; GCN:  ; %bb.0:
-; GCN:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] clamp
+; GCN:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] clamp
   %neg.c = fneg float %c
   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 
%neg.c, i1 true)
   ret float %r
@@ -646,8 +643,7 @@ define float @v_fdot2_neg_c_clamp(<2 x half> %a, <2 x half> 
%b, float %c) {
 define float @v_fdot2_abs_c_clamp(<2 x half> %a, <2 x half> %b, float %c) {
 ; GCN-LABEL: v_fdot2_abs_c_clamp:
 ; GCN:  ; %bb.0:
-; GCN:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GCN:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
+; GCN:    v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1] clamp
   %abs.c = call float @llvm.fabs.f32(float %c)
   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 
%abs.c, i1 true)
   ret float %r
@@ -1191,7 +1187,7 @@ define float @v_fdot2_neg_b_hi_dual(<2 x half> %a, <2 x 
half> %b, float %c, <2 x
 define float @v_fdot2_neg_c_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x 
half> %d, <2 x half> %e, float %f) {
 ; GFX906-LABEL: v_fdot2_neg_c_dual:
 ; GFX906:  ; %bb.0:
-; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
 ; GFX906:    v_dot2_f32_f16 v1, v3, v4, v5
 ; GFX906:    v_add_f32_e32 v0, v0, v1
 ;
@@ -1217,13 +1213,13 @@ define float @v_fdot2_neg_c_dual(<2 x half> %a, <2 x 
half> %b, float %c, <2 x ha
 ;
 ; GFX1170-LABEL: v_fdot2_neg_c_dual:
 ; GFX1170:  ; %bb.0:
-; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
 ; GFX1170:    v_dot2_f32_f16 v1, v3, v4, v5
 ; GFX1170:    v_add_f32_e32 v0, v0, v1
 ;
 ; GFX12-LABEL: v_fdot2_neg_c_dual:
 ; GFX12:  ; %bb.0:
-; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
 ; GFX12:    v_dot2_f32_f16 v1, v3, v4, v5
 ; GFX12:    v_add_f32_e32 v0, v0, v1
   %neg.c = fneg float %c
@@ -1236,8 +1232,7 @@ define float @v_fdot2_neg_c_dual(<2 x half> %a, <2 x 
half> %b, float %c, <2 x ha
 define float @v_fdot2_abs_c_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x 
half> %d, <2 x half> %e, float %f) {
 ; GFX906-LABEL: v_fdot2_abs_c_dual:
 ; GFX906:  ; %bb.0:
-; GFX906:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2
+; GFX906:    v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
 ; GFX906:    v_dot2_f32_f16 v1, v3, v4, v5
 ; GFX906:    v_add_f32_e32 v0, v0, v1
 ;
@@ -1263,15 +1258,13 @@ define float @v_fdot2_abs_c_dual(<2 x half> %a, <2 x 
half> %b, float %c, <2 x ha
 ;
 ; GFX1170-LABEL: v_fdot2_abs_c_dual:
 ; GFX1170:  ; %bb.0:
-; GFX1170:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2
+; GFX1170:    v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
 ; GFX1170:    v_dot2_f32_f16 v1, v3, v4, v5
 ; GFX1170:    v_add_f32_e32 v0, v0, v1
 ;
 ; GFX12-LABEL: v_fdot2_abs_c_dual:
 ; GFX12:  ; %bb.0:
-; GFX12:    v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2
+; GFX12:    v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
 ; GFX12:    v_dot2_f32_f16 v1, v3, v4, v5
 ; GFX12:    v_add_f32_e32 v0, v0, v1
   %abs.c = call float @llvm.fabs.f32(float %c)

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Fix src2_modifiers for v_dot2_f32_f16/bf16 on gfx11+ (PR #179224)

Reply via email to