llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-selectiondag Author: Krzysztof Drewniak (krzysz00) <details> <summary>Changes</summary> Generalize the extract_subvector-of-extract_subvector fold to compose nonzero indices instead of only handling an outer index of zero. AI note: an LLM generated the code and the test, I've read them Co-Authored-By: OpenAI Codex <codex@<!-- -->openai.com> --- <sub>Stack created with <a href="https://github.com/github/gh-stack">GitHub Stacks CLI</a> • <a href="https://gh.io/stacks-feedback">Give Feedback 💬</a></sub> --- Full diff: https://github.com/llvm/llvm-project/pull/200935.diff 2 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+8-5) - (added) llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll (+43) ``````````diff diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0c9820fb64de9..dd74e63744f2e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27559,17 +27559,20 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { return NarrowLoad; // Combine an extract of an extract into a single extract_subvector. - // ext (ext X, C), 0 --> ext X, C - if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) { + // ext (ext X, C1), C2 --> ext X, C1 + C2 + if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) { // Both indices must have the same scaling factor and C has to be a // multiple of the new result type's known minimum vector length. + uint64_t InnerExtIdx = V.getConstantOperandVal(1); + uint64_t NewExtIdx = InnerExtIdx + ExtIdx; if (V.getValueType().isScalableVector() == NVT.isScalableVector() && - V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 && + NewExtIdx % NVT.getVectorMinNumElements() == 0 && TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), - V.getConstantOperandVal(1)) && + NewExtIdx) && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { + SDValue NewIndex = DAG.getVectorIdxConstant(NewExtIdx, DL); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0), - V.getOperand(1)); + NewIndex); } } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll new file mode 100644 index 0000000000000..aa5b2f80542c7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \ +; RUN: -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s \ +; RUN: --check-prefix=COMBINE --implicit-check-not=REG_SEQUENCE +; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \ +; RUN: -verify-machineinstrs -combiner-disabled -stop-after=amdgpu-isel < %s \ +; RUN: | FileCheck %s --check-prefix=NOCOMBINE + +declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32>, i64 immarg) +declare <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32>, i64 immarg) + +define <2 x i32> @extract_of_extract_nonzero(<8 x i32> %x) nounwind { + ; COMBINE-LABEL: name: extract_of_extract_nonzero + ; COMBINE: bb.0 (%ir-block.0): + ; COMBINE-NEXT: liveins: $vgpr6, $vgpr7 + ; COMBINE-NEXT: {{ $}} + ; COMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; COMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; COMBINE-NEXT: $vgpr0 = COPY [[COPY1]] + ; COMBINE-NEXT: $vgpr1 = COPY [[COPY]] + ; COMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; NOCOMBINE-LABEL: name: extract_of_extract_nonzero + ; NOCOMBINE: bb.0 (%ir-block.0): + ; NOCOMBINE-NEXT: liveins: $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; NOCOMBINE-NEXT: {{ $}} + ; NOCOMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; NOCOMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; NOCOMBINE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; NOCOMBINE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 + ; NOCOMBINE-NEXT: [[COPY4:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub3 + ; NOCOMBINE-NEXT: [[COPY5:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub2 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:av_64_align2 = REG_SEQUENCE killed [[COPY5]], %subreg.sub0, killed [[COPY4]], %subreg.sub1 + ; NOCOMBINE-NEXT: [[COPY6:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub0 + ; NOCOMBINE-NEXT: [[COPY7:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub1 + ; NOCOMBINE-NEXT: $vgpr0 = COPY [[COPY6]] + ; NOCOMBINE-NEXT: $vgpr1 = COPY [[COPY7]] + ; NOCOMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + %mid = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %x, i64 4) + %out = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %mid, i64 2) + ret <2 x i32> %out +} `````````` </details> https://github.com/llvm/llvm-project/pull/200935 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
