llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Krzysztof Drewniak (krzysz00) <details> <summary>Changes</summary> Teach isGuaranteedNotToBeUndefOrPoison to distribute fixed-length demanded element masks across CONCAT_VECTORS operands. This is part of the series of fixes needed to resolve a SelectionDAG hang by making it possible to prove certain values don't need to be frozen. AI note: an LLM generated the code and the test, I've read them Co-Authored-By: OpenAI Codex <codex@<!-- -->openai.com> --- <sub>Stack created with <a href="https://github.com/github/gh-stack">GitHub Stacks CLI</a> • <a href="https://gh.io/stacks-feedback">Give Feedback 💬</a></sub> --- Full diff: https://github.com/llvm/llvm-project/pull/200932.diff 2 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+26) - (added) llvm/test/CodeGen/AMDGPU/dagcombine-freeze-demanded-elts.ll (+80) ``````````diff diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 75d550801315b..748520a28ffae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5665,6 +5665,32 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, } return true; + case ISD::CONCAT_VECTORS: { + if (!DemandedElts) + return true; + + EVT VT = Op.getValueType(); + if (!VT.isFixedLengthVector()) + return all_of(Op->ops(), [&](SDValue V) { + return isGuaranteedNotToBeUndefOrPoison(V, Kind, Depth + 1); + }); + + assert(DemandedElts.getBitWidth() == VT.getVectorNumElements() && + "Unexpected demanded element mask width"); + + EVT SubVT = Op.getOperand(0).getValueType(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + APInt DemandedSubElts = + DemandedElts.extractBits(NumSubElts, i * NumSubElts); + if (!!DemandedSubElts && + !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), + DemandedSubElts, Kind, Depth + 1)) + return false; + } + return true; + } + case ISD::EXTRACT_SUBVECTOR: { SDValue Src = Op.getOperand(0); if (Src.getValueType().isScalableVector()) diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-demanded-elts.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-demanded-elts.ll new file mode 100644 index 0000000000000..dd9edb94e5b5a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-demanded-elts.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \ +; RUN: -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s \ +; RUN: --check-prefix=COMBINE \ +; RUN: --implicit-check-not=V_ADD_U32 --implicit-check-not=REG_SEQUENCE +; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \ +; RUN: -verify-machineinstrs -combiner-disabled -stop-after=amdgpu-isel < %s \ +; RUN: | FileCheck %s --check-prefix=NOCOMBINE + +declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32>, i64 immarg) + +define <4 x i32> @freeze_lshr_extract_concat_poisonable(<4 x i32> %a, <4 x i32> %b) nounwind { + ; COMBINE-LABEL: name: freeze_lshr_extract_concat_poisonable + ; COMBINE: bb.0 (%ir-block.0): + ; COMBINE-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; COMBINE-NEXT: {{ $}} + ; COMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; COMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; COMBINE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; COMBINE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; COMBINE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY3]], implicit $exec + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; COMBINE-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] + ; COMBINE-NEXT: $vgpr1 = COPY [[V_LSHRREV_B32_e64_1]] + ; COMBINE-NEXT: $vgpr2 = COPY [[V_LSHRREV_B32_e64_2]] + ; COMBINE-NEXT: $vgpr3 = COPY [[V_LSHRREV_B32_e64_3]] + ; COMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; NOCOMBINE-LABEL: name: freeze_lshr_extract_concat_poisonable + ; NOCOMBINE: bb.0 (%ir-block.0): + ; NOCOMBINE-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; NOCOMBINE-NEXT: {{ $}} + ; NOCOMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; NOCOMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; NOCOMBINE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; NOCOMBINE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; NOCOMBINE-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; NOCOMBINE-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; NOCOMBINE-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; NOCOMBINE-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; NOCOMBINE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[COPY3]], [[S_MOV_B32_]], 0, implicit $exec + ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[COPY2]], [[S_MOV_B32_]], 0, implicit $exec + ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[COPY1]], [[S_MOV_B32_]], 0, implicit $exec + ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[COPY]], [[S_MOV_B32_]], 0, implicit $exec + ; NOCOMBINE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY4]], implicit $exec + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY5]], implicit $exec + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY6]], implicit $exec + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY7]], implicit $exec + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_4:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_3]], implicit $exec + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_5:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_2]], implicit $exec + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_6:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_1]], implicit $exec + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_7:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_]], implicit $exec + ; NOCOMBINE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_256_align2 = REG_SEQUENCE killed [[V_LSHRREV_B32_e64_3]], %subreg.sub0, killed [[V_LSHRREV_B32_e64_2]], %subreg.sub1, killed [[V_LSHRREV_B32_e64_1]], %subreg.sub2, killed [[V_LSHRREV_B32_e64_]], %subreg.sub3, killed [[V_LSHRREV_B32_e64_7]], %subreg.sub4, killed [[V_LSHRREV_B32_e64_6]], %subreg.sub5, killed [[V_LSHRREV_B32_e64_5]], %subreg.sub6, killed [[V_LSHRREV_B32_e64_4]], %subreg.sub7 + ; NOCOMBINE-NEXT: [[COPY8:%[0-9]+]]:av_256_align2 = COPY killed [[REG_SEQUENCE]] + ; NOCOMBINE-NEXT: [[COPY9:%[0-9]+]]:av_32 = COPY [[COPY8]].sub3 + ; NOCOMBINE-NEXT: [[COPY10:%[0-9]+]]:av_32 = COPY [[COPY8]].sub2 + ; NOCOMBINE-NEXT: [[COPY11:%[0-9]+]]:av_32 = COPY [[COPY8]].sub1 + ; NOCOMBINE-NEXT: [[COPY12:%[0-9]+]]:av_32 = COPY [[COPY8]].sub0 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:av_128_align2 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 + ; NOCOMBINE-NEXT: [[COPY13:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub0 + ; NOCOMBINE-NEXT: [[COPY14:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub1 + ; NOCOMBINE-NEXT: [[COPY15:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub2 + ; NOCOMBINE-NEXT: [[COPY16:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub3 + ; NOCOMBINE-NEXT: $vgpr0 = COPY [[COPY13]] + ; NOCOMBINE-NEXT: $vgpr1 = COPY [[COPY14]] + ; NOCOMBINE-NEXT: $vgpr2 = COPY [[COPY15]] + ; NOCOMBINE-NEXT: $vgpr3 = COPY [[COPY16]] + ; NOCOMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %poisonable = add nsw <4 x i32> %b, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> + %wide = shufflevector <4 x i32> %a, <4 x i32> %poisonable, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %shifted = lshr <8 x i32> %wide, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %fr = freeze <8 x i32> %shifted + %ext = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %fr, i64 0) + ret <4 x i32> %ext +} `````````` </details> https://github.com/llvm/llvm-project/pull/200932 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
