[llvm-branch-commits] [llvm] [AArch64] Improve cost model for legal subvec insert/extract (PR #81135)

2024-02-12 Thread Graham Hunter via llvm-branch-commits

https://github.com/huntergr-arm edited 
https://github.com/llvm/llvm-project/pull/81135
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] PR for llvm/llvm-project#80296 (PR #80408)

2024-02-02 Thread Graham Hunter via llvm-branch-commits

https://github.com/huntergr-arm approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/80408
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-12-05 Thread Graham Hunter via llvm-branch-commits

https://github.com/huntergr-arm updated 
https://github.com/llvm/llvm-project/pull/73941

>From 79dd9b74ec3bab7108cc595a94d10222e376bcb1 Mon Sep 17 00:00:00 2001
From: Graham Hunter 
Date: Wed, 11 Oct 2023 17:06:09 +0100
Subject: [PATCH] [LV] Add support for linear arguments for vector function
 variants

If we have vectorized variants of a function which take linear
parameters, we should be able to vectorize assuming the strides
match.
---
 .../Transforms/Vectorize/LoopVectorize.cpp| 24 ++
 .../AArch64/vector-call-linear-args.ll| 44 ---
 2 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab6..4b6eac56597c2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;
diff --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index cd133371f66ce..876d58131bd7a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter "call" --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | 
FileCheck %s --check-prefixes=NEON
 ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S | FileCheck %s --check-prefixes=SVE_OR_NEON
 ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s 
--check-prefixes=SVE_TF
@@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; NEON-LABEL: define void @test_linear8
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
+; NEON:[[TMP3:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr 
[[TMP2:%.*]])
 ; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear8
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 
[[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) 
#[[ATTR1:[0-9]+]]
+; SVE_OR_NEON:[[TMP13:%.*]] = call  
@vec_foo_linear8_nomask_sve(ptr [[TMP12:%.*]])
+; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) 
#[[ATTR2:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear8
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
-; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+; SVE_TF:[[TMP19:%.*]] = call  
@vec_foo_linear8_mask_sve(ptr [[TMP18:%.*]],  
[[ACTIVE_LANE_MASK:%.*]])
+; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -40,15 +43,17 @@ for.cond.cleanup:
 define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly 
%c, i64 %n) {
 ; NEON-LABEL: define void @test_vector_linear4
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly 
[[C:%.*]], i64 [[N:%.*]]) {
+; NEON:[[TMP5:%.*]] = call <4 x i32> 
@vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr 
[[TMP4:%.*]])
 ; NEON:[[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) 
#[[ATTR1:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_vector_linear4
 ; SVE_OR_NEON-SAME: (ptr noalia

[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-12-01 Thread Graham Hunter via llvm-branch-commits

huntergr-arm wrote:

Rebased after test changes.

https://github.com/llvm/llvm-project/pull/73941
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-12-01 Thread Graham Hunter via llvm-branch-commits

https://github.com/huntergr-arm updated 
https://github.com/llvm/llvm-project/pull/73941

>From a3f46f46483b2d83a5b38c197caebf7f68af8d56 Mon Sep 17 00:00:00 2001
From: Graham Hunter 
Date: Wed, 11 Oct 2023 17:06:09 +0100
Subject: [PATCH] [LV] Add support for linear arguments for vector function
 variants

If we have vectorized variants of a function which take linear
parameters, we should be able to vectorize assuming the strides
match.
---
 .../Transforms/Vectorize/LoopVectorize.cpp| 24 ++
 .../AArch64/vector-call-linear-args.ll| 44 ---
 2 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..4b6eac56597c232 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;
diff --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ba9d57e1e4a16fd..ee7f243d5b3734c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter "call" --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | 
FileCheck %s --check-prefixes=NEON
 ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S | FileCheck %s --check-prefixes=SVE_OR_NEON
 ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s 
--check-prefixes=SVE_TF
@@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; NEON-LABEL: define void @test_linear
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
+; NEON:[[TMP3:%.*]] = call <2 x i64> @neon_foo_linear(ptr [[TMP2:%.*]])
 ; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 
[[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) 
#[[ATTR1:[0-9]+]]
+; SVE_OR_NEON:[[TMP13:%.*]] = call  
@sve_foo_linear_nomask(ptr [[TMP12:%.*]])
+; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) 
#[[ATTR2:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
-; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+; SVE_TF:[[TMP19:%.*]] = call  @sve_foo_linear(ptr 
[[TMP18:%.*]],  [[ACTIVE_LANE_MASK:%.*]])
+; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -40,15 +43,17 @@ for.cond.cleanup:
 define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr 
readonly %c, i64 %n) {
 ; NEON-LABEL: define void @test_linear_with_vector
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly 
[[C:%.*]], i64 [[N:%.*]]) {
+; NEON:[[TMP5:%.*]] = call <4 x i32> @neon_baz_vector_and_linear(<4 x i32> 
[[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
 ; NEON:[[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) 
#[[ATTR1:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear_with_vector
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readn

[llvm-branch-commits] [llvm] a3f46f4 - [LV] Add support for linear arguments for vector function variants

2023-12-01 Thread Graham Hunter via llvm-branch-commits

Author: Graham Hunter
Date: 2023-12-01T16:35:34Z
New Revision: a3f46f46483b2d83a5b38c197caebf7f68af8d56

URL: 
https://github.com/llvm/llvm-project/commit/a3f46f46483b2d83a5b38c197caebf7f68af8d56
DIFF: 
https://github.com/llvm/llvm-project/commit/a3f46f46483b2d83a5b38c197caebf7f68af8d56.diff

LOG: [LV] Add support for linear arguments for vector function variants

If we have vectorized variants of a function which take linear
parameters, we should be able to vectorize assuming the strides
match.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..4b6eac56597c232 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;

diff  --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ba9d57e1e4a16fd..ee7f243d5b3734c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter "call" --version 2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | 
FileCheck %s --check-prefixes=NEON
 ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S | FileCheck %s --check-prefixes=SVE_OR_NEON
 ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s 
--check-prefixes=SVE_TF
@@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; NEON-LABEL: define void @test_linear
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
+; NEON:[[TMP3:%.*]] = call <2 x i64> @neon_foo_linear(ptr [[TMP2:%.*]])
 ; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 
[[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) 
#[[ATTR1:[0-9]+]]
+; SVE_OR_NEON:[[TMP13:%.*]] = call  
@sve_foo_linear_nomask(ptr [[TMP12:%.*]])
+; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) 
#[[ATTR2:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
-; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+; SVE_TF:[[TMP19:%.*]] = call  @sve_foo_linear(ptr 
[[TMP18:%.*]],  [[ACTIVE_LANE_MASK:%.*]])
+; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -40,15 +43,17 @@ for.cond.cleanup:
 define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr 
readonly %c, i64 %n) {
 ; NEON-LABEL: define void @test_linear_with_vector
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly 
[[C:%.*]], i64 [[N:%.*]]) {
+; NEON:[[TMP5:%.*]] = call <4 x i32> @neon_baz_vector_and_linear(<4 x i32> 
[[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
 ; NEON:[[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) 
#[[ATTR1:[0-

[llvm-branch-commits] [llvm] 6232394 - More run lines, neon mappings, negative stride test

2023-12-01 Thread Graham Hunter via llvm-branch-commits

Author: Graham Hunter
Date: 2023-12-01T16:06:25Z
New Revision: 62323944c4a6447dab25145de7dd816a54e499c4

URL: 
https://github.com/llvm/llvm-project/commit/62323944c4a6447dab25145de7dd816a54e499c4
DIFF: 
https://github.com/llvm/llvm-project/commit/62323944c4a6447dab25145de7dd816a54e499c4.diff

LOG: More run lines, neon mappings, negative stride test

Added: 


Modified: 
llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Removed: 




diff  --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ef6b8e1d83f3811..ba9d57e1e4a16fd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -1,26 +1,24 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 2
-; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 
-S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter "call" --version 2
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | 
FileCheck %s --check-prefixes=NEON
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S | FileCheck %s --check-prefixes=SVE_OR_NEON
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 
-S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s 
--check-prefixes=SVE_TF
 
 target triple = "aarch64-unknown-linux-gnu"
 
 ; A call whose argument can remain a scalar because it's sequential and only 
the
 ; starting value is required.
-define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:br label [[FOR_BODY:%.*]]
-; CHECK:   for.body:
-; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 
[[INDVARS_IV]]
-; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]]
-; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDVARS_IV]]
-; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label 
[[FOR_BODY]]
-; CHECK:   for.cond.cleanup:
-; CHECK-NEXT:ret void
+define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) {
+; NEON-LABEL: define void @test_linear
+; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
+; NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 
[[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SVE_OR_NEON:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) 
#[[ATTR1:[0-9]+]]
+;
+; SVE_TF-LABEL: define void @test_linear
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
+; SVE_TF:[[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -28,9 +26,9 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
-  %call = call i64 @foo(ptr %gepb) #1
+  %data = call i64 @foo(ptr %gepb) #0
   %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
-  store i64 %call, ptr %gepa
+  store i64 %data, ptr %gepa
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -39,32 +37,30 @@ for.cond.cleanup:
   ret void
 }
 
-define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 
{
-; CHECK-LABEL: define void @test_linear_with_mask
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:br label [[FOR_BODY:%.*]]
-; CHECK:   for.body:
-; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 
[[INDVARS_IV]]
-; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDVARS_IV]]
-; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_

[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-11-30 Thread Graham Hunter via llvm-branch-commits

huntergr-arm wrote:

Stacked PR on top of https://github.com/llvm/llvm-project/pull/73936

https://github.com/llvm/llvm-project/pull/73941
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-11-30 Thread Graham Hunter via llvm-branch-commits

https://github.com/huntergr-arm created 
https://github.com/llvm/llvm-project/pull/73941

If we have vectorized variants of a function which take linear parameters, we 
should be able to vectorize assuming the strides match.

>From 0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea Mon Sep 17 00:00:00 2001
From: Graham Hunter 
Date: Wed, 11 Oct 2023 17:06:09 +0100
Subject: [PATCH] [LV] Add support for linear arguments for vector function
 variants

If we have vectorized variants of a function which take linear
parameters, we should be able to vectorize assuming the strides
match.
---
 .../Transforms/Vectorize/LoopVectorize.cpp|  24 ++
 .../AArch64/vector-call-linear-args.ll| 247 --
 2 files changed, 252 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..4b6eac56597c232 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;
diff --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ef6b8e1d83f3811..c6faa812187c07f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 
%n) #0 {
 ; CHECK-LABEL: define void @test_linear
 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label 
[[VECTOR_PH:%.*]]
+; CHECK:   vector.ph:
+; CHECK-NEXT:[[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:[[TMP4:%.*]] = call  
@llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:[[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement  
poison, i64 [[TMP6]], i64 0
+; CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector  
[[DOTSPLATINSERT]],  poison,  
zeroinitializer
+; CHECK-NEXT:br label [[VECTOR_BODY:%.*]]
+; CHECK:   vector.body:
+; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[VEC_IND:%.*]] = phi  [ [[TMP4]], 
[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[TMP7:%.*]] = getelementptr i64, ptr [[B]],  [[VEC_IND]]
+; CHECK-NEXT:[[TMP8:%.*]] = extractelement  [[TMP7]], 
i32 0
+; CHECK-NEXT:[[TMP9:%.*]] = call  @foo_linear_nomask(ptr 
[[TMP8]])
+; CHECK-NEXT:[[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDEX]]
+; CHECK-NEXT:store  [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT:[[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP12:%.*]] = mul i64 [[TMP11]], 2
+; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT:[[VEC_IND_NEXT]] = add  [[VEC_IND]], 
[[DOTSPLAT]]
+; CHECK-NEXT:[[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label 
[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   middle.block:
+; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_

[llvm-branch-commits] [llvm] 0cc0f46 - [LV] Add support for linear arguments for vector function variants

2023-11-30 Thread Graham Hunter via llvm-branch-commits

Author: Graham Hunter
Date: 2023-11-30T13:39:06Z
New Revision: 0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea

URL: 
https://github.com/llvm/llvm-project/commit/0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea
DIFF: 
https://github.com/llvm/llvm-project/commit/0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea.diff

LOG: [LV] Add support for linear arguments for vector function variants

If we have vectorized variants of a function which take linear
parameters, we should be able to vectorize assuming the strides
match.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..4b6eac56597c232 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;

diff  --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ef6b8e1d83f3811..c6faa812187c07f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 
%n) #0 {
 ; CHECK-LABEL: define void @test_linear
 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label 
[[VECTOR_PH:%.*]]
+; CHECK:   vector.ph:
+; CHECK-NEXT:[[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:[[TMP4:%.*]] = call  
@llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:[[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement  
poison, i64 [[TMP6]], i64 0
+; CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector  
[[DOTSPLATINSERT]],  poison,  
zeroinitializer
+; CHECK-NEXT:br label [[VECTOR_BODY:%.*]]
+; CHECK:   vector.body:
+; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[VEC_IND:%.*]] = phi  [ [[TMP4]], 
[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[TMP7:%.*]] = getelementptr i64, ptr [[B]],  [[VEC_IND]]
+; CHECK-NEXT:[[TMP8:%.*]] = extractelement  [[TMP7]], 
i32 0
+; CHECK-NEXT:[[TMP9:%.*]] = call  @foo_linear_nomask(ptr 
[[TMP8]])
+; CHECK-NEXT:[[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDEX]]
+; CHECK-NEXT:store  [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT:[[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP12:%.*]] = mul i64 [[TMP11]], 2
+; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT:[[VEC_IND_NEXT]] = add  [[VEC_IND]], 
[[DOTSPLAT]]
+; CHECK-NEXT:[[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label 
[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   middle.block:
+; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:

[llvm-branch-commits] [llvm] 557b422 - [LV] Linear argument tests for vectorization of function calls

2023-11-30 Thread Graham Hunter via llvm-branch-commits

Author: Graham Hunter
Date: 2023-11-30T13:18:06Z
New Revision: 557b422bbcb5c2f2051c806a99c8d2e249717525

URL: 
https://github.com/llvm/llvm-project/commit/557b422bbcb5c2f2051c806a99c8d2e249717525
DIFF: 
https://github.com/llvm/llvm-project/commit/557b422bbcb5c2f2051c806a99c8d2e249717525.diff

LOG: [LV] Linear argument tests for vectorization of function calls

Added: 
llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Modified: 


Removed: 




diff  --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
new file mode 100644
index 000..ef6b8e1d83f3811
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 2
+; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 
-S | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; A call whose argument can remain a scalar because it's sequential and only 
the
+; starting value is required.
+define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[FOR_BODY:%.*]]
+; CHECK:   for.body:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label 
[[FOR_BODY]]
+; CHECK:   for.cond.cleanup:
+; CHECK-NEXT:ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+  %call = call i64 @foo(ptr %gepb) #1
+  %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %gepa
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 
{
+; CHECK-LABEL: define void @test_linear_with_mask
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[FOR_BODY:%.*]]
+; CHECK:   for.body:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label 
[[FOR_BODY]]
+; CHECK:   for.cond.cleanup:
+; CHECK-NEXT:ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+  %call = call i64 @foo(ptr %gepb) #2
+  %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %gepa
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr 
readonly %c, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_with_vector
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly 
[[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[FOR_BODY:%.*]]
+; CHECK:   for.body:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:[[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:[[DATA:%.*]] = load i32, ptr [[GEPC]], align 8
+; CHECK-NEXT:[[GEPB:%.*]] = geteleme