llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-amdgpu

Author: Krzysztof Drewniak (krzysz00)

<details>
<summary>Changes</summary>

Bitcasts preserve undef/poison status, but vector bitcasts can change
which source lanes cover a demanded result lane. Map the demanded
element mask through fixed-length vector bitcasts before checking the
source where possible.

AI note: an LLM generated the code and the test, I've read them

Co-Authored-By: OpenAI Codex &lt;codex@<!-- -->openai.com&gt;

---

&lt;sub&gt;Stack created with &lt;a 
href="https://github.com/github/gh-stack"&gt;GitHub Stacks CLI&lt;/a&gt; • 
&lt;a href="https://gh.io/stacks-feedback"&gt;Give Feedback 
💬&lt;/a&gt;&lt;/sub&gt;

---
Full diff: https://github.com/llvm/llvm-project/pull/200933.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+41) 
- (added) llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll 
(+102) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 748520a28ffae..072a918115bbd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5654,6 +5654,47 @@ bool 
SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
   case ISD::UNDEF:
     return !includesUndef(Kind);
 
+  case ISD::BITCAST: {
+    if (!DemandedElts)
+      return true;
+
+    SDValue Src = Op.getOperand(0);
+    EVT SrcVT = Src.getValueType();
+    EVT DstVT = Op.getValueType();
+
+    if (!SrcVT.isFixedLengthVector() || !DstVT.isFixedLengthVector())
+      return isGuaranteedNotToBeUndefOrPoison(Src, Kind, Depth + 1);
+
+    unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
+    unsigned DstEltBits = DstVT.getScalarSizeInBits();
+    unsigned NumSrcElts = SrcVT.getVectorNumElements();
+    unsigned NumDstElts = DstVT.getVectorNumElements();
+
+    if (SrcEltBits == DstEltBits)
+      return isGuaranteedNotToBeUndefOrPoison(Src, DemandedElts, Kind,
+                                              Depth + 1);
+
+    if (SrcEltBits < DstEltBits) {
+      if (DstEltBits % SrcEltBits != 0)
+        return isGuaranteedNotToBeUndefOrPoison(Src, Kind, Depth + 1);
+
+      assert(NumSrcElts == NumDstElts * (DstEltBits / SrcEltBits) &&
+             "Unexpected fixed-width vector bitcast");
+      APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+      return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, Kind,
+                                              Depth + 1);
+    }
+
+    if (SrcEltBits % DstEltBits != 0)
+      return isGuaranteedNotToBeUndefOrPoison(Src, Kind, Depth + 1);
+
+    assert(NumDstElts == NumSrcElts * (SrcEltBits / DstEltBits) &&
+           "Unexpected fixed-width vector bitcast");
+    APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+    return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, Kind,
+                                            Depth + 1);
+  }
+
   case ISD::BUILD_VECTOR:
     // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements -
     // this shouldn't affect the result.
diff --git 
a/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll 
b/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll
new file mode 100644
index 0000000000000..27527cfd9eeff
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \
+; RUN:   -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s \
+; RUN:   --check-prefix=COMBINE \
+; RUN:   --implicit-check-not=V_ADD_U64_PSEUDO 
--implicit-check-not=REG_SEQUENCE
+; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \
+; RUN:   -verify-machineinstrs -combiner-disabled -stop-after=amdgpu-isel < %s 
\
+; RUN:   | FileCheck %s --check-prefix=NOCOMBINE
+
+declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32>, i64 immarg)
+
+define <4 x i32> @freeze_extract_bitcast_demanded(<2 x i64> %a, <2 x i64> %b) 
nounwind {
+  ; COMBINE-LABEL: name: freeze_extract_bitcast_demanded
+  ; COMBINE: bb.0 (%ir-block.0):
+  ; COMBINE-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; COMBINE-NEXT: {{  $}}
+  ; COMBINE-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; COMBINE-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; COMBINE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; COMBINE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; COMBINE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; COMBINE-NEXT:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 
[[S_MOV_B32_]], [[COPY3]], implicit $exec
+  ; COMBINE-NEXT:   [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec
+  ; COMBINE-NEXT:   [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+  ; COMBINE-NEXT:   [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+  ; COMBINE-NEXT:   $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
+  ; COMBINE-NEXT:   $vgpr1 = COPY [[V_LSHRREV_B32_e64_1]]
+  ; COMBINE-NEXT:   $vgpr2 = COPY [[V_LSHRREV_B32_e64_2]]
+  ; COMBINE-NEXT:   $vgpr3 = COPY [[V_LSHRREV_B32_e64_3]]
+  ; COMBINE-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit 
$vgpr2, implicit $vgpr3
+  ;
+  ; NOCOMBINE-LABEL: name: freeze_extract_bitcast_demanded
+  ; NOCOMBINE: bb.0 (%ir-block.0):
+  ; NOCOMBINE-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, 
$vgpr6, $vgpr7
+  ; NOCOMBINE-NEXT: {{  $}}
+  ; NOCOMBINE-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+  ; NOCOMBINE-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+  ; NOCOMBINE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+  ; NOCOMBINE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+  ; NOCOMBINE-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; NOCOMBINE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; NOCOMBINE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; NOCOMBINE-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE 
[[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; NOCOMBINE-NEXT:   [[COPY8:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub1
+  ; NOCOMBINE-NEXT:   [[COPY9:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub0
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE 
[[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; NOCOMBINE-NEXT:   [[COPY10:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; NOCOMBINE-NEXT:   [[COPY11:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE 
[[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; NOCOMBINE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+  ; NOCOMBINE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed 
[[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
+  ; NOCOMBINE-NEXT:   [[V_ADD_U:%[0-9]+]]:vreg_64_align2 = nsw 
V_ADD_U64_PSEUDO killed [[REG_SEQUENCE2]], [[REG_SEQUENCE3]], implicit-def dead 
$vcc, implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY12:%[0-9]+]]:av_32 = COPY [[V_ADD_U]].sub1
+  ; NOCOMBINE-NEXT:   [[COPY13:%[0-9]+]]:av_32 = COPY [[V_ADD_U]].sub0
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE 
[[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; NOCOMBINE-NEXT:   [[V_ADD_U1:%[0-9]+]]:vreg_64_align2 = nsw 
V_ADD_U64_PSEUDO killed [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], implicit-def dead 
$vcc, implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY14:%[0-9]+]]:av_32 = COPY [[V_ADD_U1]].sub1
+  ; NOCOMBINE-NEXT:   [[COPY15:%[0-9]+]]:av_32 = COPY [[V_ADD_U1]].sub0
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:av_256_align2 = REG_SEQUENCE 
killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed 
[[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3, killed [[COPY15]], 
%subreg.sub4, killed [[COPY14]], %subreg.sub5, killed [[COPY13]], %subreg.sub6, 
killed [[COPY12]], %subreg.sub7
+  ; NOCOMBINE-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub7
+  ; NOCOMBINE-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY16]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub6
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY17]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub5
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY18]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub4
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY19]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub3
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_4:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY20]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub2
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_5:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY21]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub1
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_6:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY22]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub0
+  ; NOCOMBINE-NEXT:   [[V_LSHRREV_B32_e64_7:%[0-9]+]]:vgpr_32 = 
V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY23]], implicit $exec
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:vreg_256_align2 = REG_SEQUENCE 
killed [[V_LSHRREV_B32_e64_7]], %subreg.sub0, killed [[V_LSHRREV_B32_e64_6]], 
%subreg.sub1, killed [[V_LSHRREV_B32_e64_5]], %subreg.sub2, killed 
[[V_LSHRREV_B32_e64_4]], %subreg.sub3, killed [[V_LSHRREV_B32_e64_3]], 
%subreg.sub4, killed [[V_LSHRREV_B32_e64_2]], %subreg.sub5, killed 
[[V_LSHRREV_B32_e64_1]], %subreg.sub6, killed [[V_LSHRREV_B32_e64_]], 
%subreg.sub7
+  ; NOCOMBINE-NEXT:   [[COPY24:%[0-9]+]]:av_256_align2 = COPY killed 
[[REG_SEQUENCE6]]
+  ; NOCOMBINE-NEXT:   [[COPY25:%[0-9]+]]:av_32 = COPY [[COPY24]].sub3
+  ; NOCOMBINE-NEXT:   [[COPY26:%[0-9]+]]:av_32 = COPY [[COPY24]].sub2
+  ; NOCOMBINE-NEXT:   [[COPY27:%[0-9]+]]:av_32 = COPY [[COPY24]].sub1
+  ; NOCOMBINE-NEXT:   [[COPY28:%[0-9]+]]:av_32 = COPY [[COPY24]].sub0
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:av_128_align2 = REG_SEQUENCE 
killed [[COPY28]], %subreg.sub0, killed [[COPY27]], %subreg.sub1, killed 
[[COPY26]], %subreg.sub2, killed [[COPY25]], %subreg.sub3
+  ; NOCOMBINE-NEXT:   [[COPY29:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub0
+  ; NOCOMBINE-NEXT:   [[COPY30:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub1
+  ; NOCOMBINE-NEXT:   [[COPY31:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub2
+  ; NOCOMBINE-NEXT:   [[COPY32:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub3
+  ; NOCOMBINE-NEXT:   $vgpr0 = COPY [[COPY29]]
+  ; NOCOMBINE-NEXT:   $vgpr1 = COPY [[COPY30]]
+  ; NOCOMBINE-NEXT:   $vgpr2 = COPY [[COPY31]]
+  ; NOCOMBINE-NEXT:   $vgpr3 = COPY [[COPY32]]
+  ; NOCOMBINE-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit 
$vgpr2, implicit $vgpr3
+  %poisonable = add nsw <2 x i64> %b, <i64 9223372036854775807, i64 
9223372036854775807>
+  %wide = shufflevector <2 x i64> %a, <2 x i64> %poisonable, <4 x i32> <i32 0, 
i32 1, i32 2, i32 3>
+  %bc = bitcast <4 x i64> %wide to <8 x i32>
+  %shifted = lshr <8 x i32> %bc, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 
i32 1, i32 1>
+  %fr = freeze <8 x i32> %shifted
+  %ext = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %fr, i64 0)
+  ret <4 x i32> %ext
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/200933
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to