llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-flang-openmp Author: Michael Kruse (Meinersbur) <details> <summary>Changes</summary> The emitted code tests whether the current tile should executing the remainder iterations by checking the logical iteration number is the one after the floor iterations that execute the non-remainder iterations. There are two counts of how many iterations there are: Those of non-remainder iterations (simply rounded-down division of tripcount and tile size), and those including an additional floor iteration for the remainder iterations. The code was used the wrong one that caused the condition to never match. --- Full diff: https://github.com/llvm/llvm-project/pull/154999.diff 6 Files Affected: - (modified) clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c (+1-1) - (modified) clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c (+1-1) - (modified) clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c (+1-1) - (modified) clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c (+1-1) - (modified) clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c (+1-1) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+6-5) ``````````diff diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c index 8780d51de8a36..a9514e1e7d145 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -79,7 +79,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]] // CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c index 728f67ccf2843..8ca000a05792f 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -85,7 +85,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]] // CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c index f41f1fe5ce719..5fbcf8f2d030c 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -87,7 +87,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]] // CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c index 3c2407851e5a5..9a28c0c1bf713 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -75,7 +75,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]] // CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c index a03bd47ca8b8f..24d42d265d6a6 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c @@ -59,7 +59,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]] // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: -// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[TMP3]] // CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 989bcf45e0006..606f2e03821a3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5582,13 +5582,13 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, // Compute the trip counts of the floor loops. Builder.SetCurrentDebugLocation(DL); Builder.restoreIP(OutermostLoop->getPreheaderIP()); - SmallVector<Value *, 4> FloorCount, FloorRems; + SmallVector<Value *, 4> FloorCompleteCount, FloorCount, FloorRems; for (int i = 0; i < NumLoops; ++i) { Value *TileSize = TileSizes[i]; Value *OrigTripCount = OrigTripCounts[i]; Type *IVType = OrigTripCount->getType(); - Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); + Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); // 0 if tripcount divides the tilesize, 1 otherwise. @@ -5602,11 +5602,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); - FloorTripCount = - Builder.CreateAdd(FloorTripCount, FloorTripOverflow, + Value *FloorTripCount = + Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow, "omp_floor" + Twine(i) + ".tripcount", true); // Remember some values for later use. + FloorCompleteCount.push_back(FloorCompleteTripCount); FloorCount.push_back(FloorTripCount); FloorRems.push_back(FloorTripRem); } @@ -5661,7 +5662,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, Value *TileSize = TileSizes[i]; Value *FloorIsEpilogue = - Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); + Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]); Value *TileTripCount = Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); `````````` </details> https://github.com/llvm/llvm-project/pull/154999 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits