[llvm-branch-commits] [llvm] [AArch64] Improve cost model for legal subvec insert/extract (PR #81135)
https://github.com/huntergr-arm edited https://github.com/llvm/llvm-project/pull/81135 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#80296 (PR #80408)
https://github.com/huntergr-arm approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/80408 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)
https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/73941 >From 79dd9b74ec3bab7108cc595a94d10222e376bcb1 Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Wed, 11 Oct 2023 17:06:09 +0100 Subject: [PATCH] [LV] Add support for linear arguments for vector function variants If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match. --- .../Transforms/Vectorize/LoopVectorize.cpp| 24 ++ .../AArch64/vector-call-linear-args.ll| 44 --- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09a6e01226ab6..4b6eac56597c2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7035,6 +7035,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { ParamsOk = false; break; } + case VFParamKind::OMP_Linear: { +Value *ScalarParam = CI->getArgOperand(Param.ParamPos); +// Find the stride for the scalar parameter in this loop and see if +// it matches the stride for the variant. +// TODO: do we need to figure out the cost of an extract to get the +// first lane? Or do we hope that it will be folded away? +ScalarEvolution *SE = PSE.getSE(); +const auto *SAR = +dyn_cast(SE->getSCEV(ScalarParam)); + +if (!SAR || SAR->getLoop() != TheLoop) { + ParamsOk = false; + break; +} + +const SCEVConstant *Step = +dyn_cast(SAR->getStepRecurrence(*SE)); + +if (!Step || +Step->getAPInt().getSExtValue() != Param.LinearStepOrPos) + ParamsOk = false; + +break; + } case VFParamKind::GlobalPredicate: UsesMask = true; break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index cd133371f66ce..876d58131bd7a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF @@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear8 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON:[[TMP3:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP2:%.*]]) ; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_OR_NEON:[[TMP13:%.*]] = call @vec_foo_linear8_nomask_sve(ptr [[TMP12:%.*]]) +; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_TF:[[TMP19:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP18:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; entry: br label %for.body @@ -40,15 +43,17 @@ for.cond.cleanup: define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_vector_linear4 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON:[[TMP5:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) ; NEON:[[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_vector_linear4 ; SVE_OR_NEON-SAME: (ptr noalia
[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)
huntergr-arm wrote: Rebased after test changes. https://github.com/llvm/llvm-project/pull/73941 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)
https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/73941 >From a3f46f46483b2d83a5b38c197caebf7f68af8d56 Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Wed, 11 Oct 2023 17:06:09 +0100 Subject: [PATCH] [LV] Add support for linear arguments for vector function variants If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match. --- .../Transforms/Vectorize/LoopVectorize.cpp| 24 ++ .../AArch64/vector-call-linear-args.ll| 44 --- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09a6e01226ab68c..4b6eac56597c232 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7035,6 +7035,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { ParamsOk = false; break; } + case VFParamKind::OMP_Linear: { +Value *ScalarParam = CI->getArgOperand(Param.ParamPos); +// Find the stride for the scalar parameter in this loop and see if +// it matches the stride for the variant. +// TODO: do we need to figure out the cost of an extract to get the +// first lane? Or do we hope that it will be folded away? +ScalarEvolution *SE = PSE.getSE(); +const auto *SAR = +dyn_cast(SE->getSCEV(ScalarParam)); + +if (!SAR || SAR->getLoop() != TheLoop) { + ParamsOk = false; + break; +} + +const SCEVConstant *Step = +dyn_cast(SAR->getStepRecurrence(*SE)); + +if (!Step || +Step->getAPInt().getSExtValue() != Param.LinearStepOrPos) + ParamsOk = false; + +break; + } case VFParamKind::GlobalPredicate: UsesMask = true; break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index ba9d57e1e4a16fd..ee7f243d5b3734c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF @@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON:[[TMP3:%.*]] = call <2 x i64> @neon_foo_linear(ptr [[TMP2:%.*]]) ; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_OR_NEON:[[TMP13:%.*]] = call @sve_foo_linear_nomask(ptr [[TMP12:%.*]]) +; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_TF:[[TMP19:%.*]] = call @sve_foo_linear(ptr [[TMP18:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; entry: br label %for.body @@ -40,15 +43,17 @@ for.cond.cleanup: define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_linear_with_vector ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON:[[TMP5:%.*]] = call <4 x i32> @neon_baz_vector_and_linear(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) ; NEON:[[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear_with_vector ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readn
[llvm-branch-commits] [llvm] a3f46f4 - [LV] Add support for linear arguments for vector function variants
Author: Graham Hunter Date: 2023-12-01T16:35:34Z New Revision: a3f46f46483b2d83a5b38c197caebf7f68af8d56 URL: https://github.com/llvm/llvm-project/commit/a3f46f46483b2d83a5b38c197caebf7f68af8d56 DIFF: https://github.com/llvm/llvm-project/commit/a3f46f46483b2d83a5b38c197caebf7f68af8d56.diff LOG: [LV] Add support for linear arguments for vector function variants If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match. Added: Modified: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09a6e01226ab68c..4b6eac56597c232 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7035,6 +7035,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { ParamsOk = false; break; } + case VFParamKind::OMP_Linear: { +Value *ScalarParam = CI->getArgOperand(Param.ParamPos); +// Find the stride for the scalar parameter in this loop and see if +// it matches the stride for the variant. +// TODO: do we need to figure out the cost of an extract to get the +// first lane? Or do we hope that it will be folded away? +ScalarEvolution *SE = PSE.getSE(); +const auto *SAR = +dyn_cast(SE->getSCEV(ScalarParam)); + +if (!SAR || SAR->getLoop() != TheLoop) { + ParamsOk = false; + break; +} + +const SCEVConstant *Step = +dyn_cast(SAR->getStepRecurrence(*SE)); + +if (!Step || +Step->getAPInt().getSExtValue() != Param.LinearStepOrPos) + ParamsOk = false; + +break; + } case VFParamKind::GlobalPredicate: UsesMask = true; break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index ba9d57e1e4a16fd..ee7f243d5b3734c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF @@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON:[[TMP3:%.*]] = call <2 x i64> @neon_foo_linear(ptr [[TMP2:%.*]]) ; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_OR_NEON:[[TMP13:%.*]] = call @sve_foo_linear_nomask(ptr [[TMP12:%.*]]) +; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_TF:[[TMP19:%.*]] = call @sve_foo_linear(ptr [[TMP18:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; entry: br label %for.body @@ -40,15 +43,17 @@ for.cond.cleanup: define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_linear_with_vector ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON:[[TMP5:%.*]] = call <4 x i32> @neon_baz_vector_and_linear(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) ; NEON:[[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-
[llvm-branch-commits] [llvm] 6232394 - More run lines, neon mappings, negative stride test
Author: Graham Hunter Date: 2023-12-01T16:06:25Z New Revision: 62323944c4a6447dab25145de7dd816a54e499c4 URL: https://github.com/llvm/llvm-project/commit/62323944c4a6447dab25145de7dd816a54e499c4 DIFF: https://github.com/llvm/llvm-project/commit/62323944c4a6447dab25145de7dd816a54e499c4.diff LOG: More run lines, neon mappings, negative stride test Added: Modified: llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll Removed: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index ef6b8e1d83f3811..ba9d57e1e4a16fd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -1,26 +1,24 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2 +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON +; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON +; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF target triple = "aarch64-unknown-linux-gnu" ; A call whose argument can remain a scalar because it's sequential and only the ; starting value is required. -define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT:br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]] -; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT:ret void +define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) { +; NEON-LABEL: define void @test_linear +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; entry: br label %for.body @@ -28,9 +26,9 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %gepb = getelementptr i64, ptr %b, i64 %indvars.iv - %call = call i64 @foo(ptr %gepb) #1 + %data = call i64 @foo(ptr %gepb) #0 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv - store i64 %call, ptr %gepa + store i64 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -39,32 +37,30 @@ for.cond.cleanup: ret void } -define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear_with_mask -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT:br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_
[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)
huntergr-arm wrote: Stacked PR on top of https://github.com/llvm/llvm-project/pull/73936 https://github.com/llvm/llvm-project/pull/73941 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)
https://github.com/huntergr-arm created https://github.com/llvm/llvm-project/pull/73941 If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match. >From 0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Wed, 11 Oct 2023 17:06:09 +0100 Subject: [PATCH] [LV] Add support for linear arguments for vector function variants If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match. --- .../Transforms/Vectorize/LoopVectorize.cpp| 24 ++ .../AArch64/vector-call-linear-args.ll| 247 -- 2 files changed, 252 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09a6e01226ab68c..4b6eac56597c232 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7035,6 +7035,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { ParamsOk = false; break; } + case VFParamKind::OMP_Linear: { +Value *ScalarParam = CI->getArgOperand(Param.ParamPos); +// Find the stride for the scalar parameter in this loop and see if +// it matches the stride for the variant. +// TODO: do we need to figure out the cost of an extract to get the +// first lane? Or do we hope that it will be folded away? +ScalarEvolution *SE = PSE.getSE(); +const auto *SAR = +dyn_cast(SE->getSCEV(ScalarParam)); + +if (!SAR || SAR->getLoop() != TheLoop) { + ParamsOk = false; + break; +} + +const SCEVConstant *Step = +dyn_cast(SAR->getStepRecurrence(*SE)); + +if (!Step || +Step->getAPInt().getSExtValue() != Param.LinearStepOrPos) + ParamsOk = false; + +break; + } case VFParamKind::GlobalPredicate: UsesMask = true; break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index ef6b8e1d83f3811..c6faa812187c07f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 { ; CHECK-LABEL: define void @test_linear ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT:[[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT:[[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT:[[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT:br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[TMP7:%.*]] = getelementptr i64, ptr [[B]], [[VEC_IND]] +; CHECK-NEXT:[[TMP8:%.*]] = extractelement [[TMP7]], i32 0 +; CHECK-NEXT:[[TMP9:%.*]] = call @foo_linear_nomask(ptr [[TMP8]]) +; CHECK-NEXT:[[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT:store [[TMP9]], ptr [[TMP10]], align 8 +; CHECK-NEXT:[[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT:[[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT:[[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT:br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_
[llvm-branch-commits] [llvm] 0cc0f46 - [LV] Add support for linear arguments for vector function variants
Author: Graham Hunter Date: 2023-11-30T13:39:06Z New Revision: 0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea URL: https://github.com/llvm/llvm-project/commit/0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea DIFF: https://github.com/llvm/llvm-project/commit/0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea.diff LOG: [LV] Add support for linear arguments for vector function variants If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match. Added: Modified: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09a6e01226ab68c..4b6eac56597c232 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7035,6 +7035,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { ParamsOk = false; break; } + case VFParamKind::OMP_Linear: { +Value *ScalarParam = CI->getArgOperand(Param.ParamPos); +// Find the stride for the scalar parameter in this loop and see if +// it matches the stride for the variant. +// TODO: do we need to figure out the cost of an extract to get the +// first lane? Or do we hope that it will be folded away? +ScalarEvolution *SE = PSE.getSE(); +const auto *SAR = +dyn_cast(SE->getSCEV(ScalarParam)); + +if (!SAR || SAR->getLoop() != TheLoop) { + ParamsOk = false; + break; +} + +const SCEVConstant *Step = +dyn_cast(SAR->getStepRecurrence(*SE)); + +if (!Step || +Step->getAPInt().getSExtValue() != Param.LinearStepOrPos) + ParamsOk = false; + +break; + } case VFParamKind::GlobalPredicate: UsesMask = true; break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index ef6b8e1d83f3811..c6faa812187c07f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 { ; CHECK-LABEL: define void @test_linear ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT:[[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT:[[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT:[[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT:br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[TMP7:%.*]] = getelementptr i64, ptr [[B]], [[VEC_IND]] +; CHECK-NEXT:[[TMP8:%.*]] = extractelement [[TMP7]], i32 0 +; CHECK-NEXT:[[TMP9:%.*]] = call @foo_linear_nomask(ptr [[TMP8]]) +; CHECK-NEXT:[[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT:store [[TMP9]], ptr [[TMP10]], align 8 +; CHECK-NEXT:[[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT:[[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT:[[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT:[[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT:br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:
[llvm-branch-commits] [llvm] 557b422 - [LV] Linear argument tests for vectorization of function calls
Author: Graham Hunter Date: 2023-11-30T13:18:06Z New Revision: 557b422bbcb5c2f2051c806a99c8d2e249717525 URL: https://github.com/llvm/llvm-project/commit/557b422bbcb5c2f2051c806a99c8d2e249717525 DIFF: https://github.com/llvm/llvm-project/commit/557b422bbcb5c2f2051c806a99c8d2e249717525.diff LOG: [LV] Linear argument tests for vectorization of function calls Added: llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll Modified: Removed: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll new file mode 100644 index 000..ef6b8e1d83f3811 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -0,0 +1,275 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -S | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; A call whose argument can remain a scalar because it's sequential and only the +; starting value is required. +define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 { +; CHECK-LABEL: define void @test_linear +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT:br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8 +; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT:ret void +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepb = getelementptr i64, ptr %b, i64 %indvars.iv + %call = call i64 @foo(ptr %gepb) #1 + %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %call, ptr %gepa + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 { +; CHECK-LABEL: define void @test_linear_with_mask +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT:br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8 +; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT:ret void +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepb = getelementptr i64, ptr %b, i64 %indvars.iv + %call = call i64 @foo(ptr %gepb) #2 + %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %call, ptr %gepa + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) #0 { +; CHECK-LABEL: define void @test_linear_with_vector +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT:br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT:[[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT:[[DATA:%.*]] = load i32, ptr [[GEPC]], align 8 +; CHECK-NEXT:[[GEPB:%.*]] = geteleme