Author: Michael Kruse Date: 2022-01-31T17:28:52-06:00 New Revision: 8a9e4f245b66b90839bdf34e91cb0901e3260dad
URL: https://github.com/llvm/llvm-project/commit/8a9e4f245b66b90839bdf34e91cb0901e3260dad DIFF: https://github.com/llvm/llvm-project/commit/8a9e4f245b66b90839bdf34e91cb0901e3260dad.diff LOG: [Clang][OpenMPIRBuilder] Fix off-by-one error when dividing by stepsize. When the stepsize does not evenly divide the range's end, round-up to ensure that that last multiple of the stepsize before the reaching the upper boud is reached. For instance, the trip count of for (int i = 0; i < 7; i+=5) is two (i=0 and i=5), not (7-0)/5 == 1. Reviewed By: peixin Differential Revision: https://reviews.llvm.org/D118542 Added: clang/test/OpenMP/irbuilder_for_unsigned_down.c Modified: clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/irbuilder_for_unsigned.c clang/test/OpenMP/irbuilder_unroll_full.c clang/test/OpenMP/irbuilder_unroll_heuristic.c clang/test/OpenMP/irbuilder_unroll_partial_factor.c clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c Removed: ################################################################################ diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index ae91a6470471a..a500ad4f02209 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5327,6 +5327,8 @@ static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy, IntegerLiteral *Zero = IntegerLiteral::Create( Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 0), LogicalTy, {}); + IntegerLiteral *One = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 1), LogicalTy, {}); Expr *Dist; if (Rel == BO_NE) { // When using a != comparison, the increment can be +1 or -1. This can be @@ -5381,18 +5383,25 @@ static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy, if (Rel == BO_LE || Rel == BO_GE) { // Add one to the range if the relational operator is inclusive. - Range = AssertSuccess(Actions.BuildBinOp( - nullptr, {}, BO_Add, Range, - Actions.ActOnIntegerConstant(SourceLocation(), 1).get())); + Range = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Add, Range, One)); } - // Divide by the absolute step amount. + // Divide by the absolute step amount. If the range is not a multiple of + // the step size, rounding-up the effective upper bound ensures that the + // last iteration is included. + // Note that the rounding-up may cause an overflow in a temporry that + // could be avoided, but would have occured in a C-style for-loop as well. Expr *Divisor = BuildVarRef(NewStep); if (Rel == BO_GE || Rel == BO_GT) Divisor = AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, Divisor)); + Expr *DivisorMinusOne = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Sub, Divisor, One)); + Expr *RangeRoundUp = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Add, Range, DivisorMinusOne)); Dist = AssertSuccess( - Actions.BuildBinOp(nullptr, {}, BO_Div, Range, Divisor)); + Actions.BuildBinOp(nullptr, {}, BO_Div, RangeRoundUp, Divisor)); // If there is not at least one iteration, the range contains garbage. Fix // to zero in this case. diff --git a/clang/test/OpenMP/irbuilder_for_unsigned.c b/clang/test/OpenMP/irbuilder_for_unsigned.c index a7e70391c027b..928d99b9bfc72 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned.c @@ -123,14 +123,17 @@ extern "C" void workshareloop_unsigned(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTSTART]], align 4 // CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] // CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTSTEP]], align 4 -// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], [[TMP8]] +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 [[COND]], i32* [[TMP9]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], i32* [[TMP10]], align 4 // CHECK-NEXT: ret void // // diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_down.c b/clang/test/OpenMP/irbuilder_for_unsigned_down.c new file mode 100644 index 0000000000000..7ef48b9ec900d --- /dev/null +++ b/clang/test/OpenMP/irbuilder_for_unsigned_down.c @@ -0,0 +1,158 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_down( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 +// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 +// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 +// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 +// CHECK-NEXT: store i32 32000000, i32* %[[I]], align 4 +// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) +// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: +// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[TMP3:.+]] = sub i32 %[[DOTCOUNT]], 1 +// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[TMP6:.+]] = sub i32 %[[TMP5]], %[[TMP4]] +// CHECK-NEXT: %[[TMP7:.+]] = add i32 %[[TMP6]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_HEADER]]: +// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_COND]]: +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[TMP7]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_BODY]]: +// CHECK-NEXT: %[[TMP8:.+]] = add i32 %[[OMP_LOOP_IV]], %[[TMP4]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP8]], %struct.anon.0* %[[AGG_CAPTURED1]]) +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[CONV:.+]] = uitofp i32 %[[TMP9]] to float +// CHECK-NEXT: %[[TMP10:.+]] = load float*, float** %[[A_ADDR]], align 8 +// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP11]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP10]], i64 %[[IDXPROM]] +// CHECK-NEXT: store float %[[CONV]], float* %[[ARRAYIDX]], align 4 +// CHECK-NEXT: br label %[[OMP_LOOP_INC]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_EXIT]]: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM2:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_AFTER]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +extern "C" void workshareloop_unsigned_down(float *a) { +#pragma omp for + for (unsigned i = 32000000; i > 33; i -= 7) { + a[i] = i; + } +} + +#endif // HEADER +// +// +// +// +// + +// CHECK-LABEL: define {{.*}}@__captured_stmt( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 +// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 +// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 +// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 +// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: store i32 33, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 -7, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: %[[CMP:.+]] = icmp ugt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_TRUE]]: +// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[SUB1:.+]] = sub nsw i32 0, %[[TMP8]] +// CHECK-NEXT: %[[SUB2:.+]] = sub i32 %[[SUB1]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB2]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i32 0, %[[TMP9]] +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[SUB3]] +// CHECK-NEXT: br label %[[COND_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_FALSE]]: +// CHECK-NEXT: br label %[[COND_END]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_END]]: +// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@__captured_stmt.1( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 +// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 +// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: %[[MUL:.+]] = mul i32 -7, %[[TMP3]] +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] +// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} +// CHECK: ![[META2:[0-9]+]] = diff --git a/clang/test/OpenMP/irbuilder_unroll_full.c b/clang/test/OpenMP/irbuilder_unroll_full.c index 79d29568f53c0..93f232600833e 100644 --- a/clang/test/OpenMP/irbuilder_unroll_full.c +++ b/clang/test/OpenMP/irbuilder_unroll_full.c @@ -111,7 +111,10 @@ void unroll_full(float *a, float *b, float *c, float *d) { // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -119,8 +122,8 @@ void unroll_full(float *a, float *b, float *c, float *d) { // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_heuristic.c index 4c590b24db178..20a657cc4dc21 100644 --- a/clang/test/OpenMP/irbuilder_unroll_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_heuristic.c @@ -112,7 +112,10 @@ void unroll_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -120,8 +123,8 @@ void unroll_heuristic(float *a, float *b, float *c, float *d) { // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor.c index 254dcc57d9206..3aca672f7247a 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor.c @@ -111,7 +111,10 @@ void unroll_partial_factor(float *a, float *b, float *c, float *d) { // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -119,8 +122,8 @@ void unroll_partial_factor(float *a, float *b, float *c, float *d) { // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c index 5452bf07eca7e..7bcfeab076c3e 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -180,7 +180,10 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] // CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP11]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -188,8 +191,8 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 +// CHECK-NEXT: %[[TMP13:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP13]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c index 15f4cedefd067..942b1b841a3ea 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c @@ -111,7 +111,10 @@ void unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -119,8 +122,8 @@ void unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c index fa25aa717de05..34dd745aa855f 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -201,7 +201,10 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -209,8 +212,8 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c index 6a1bbf5fad003..f17c52ee68d10 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -206,7 +206,10 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] // CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP11]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -214,8 +217,8 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 +// CHECK-NEXT: %[[TMP13:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP13]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c index 5bb8911c3f094..09f64f86915bc 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -173,7 +173,10 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -181,8 +184,8 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c index ebc3df4c90cd3..6c18d5c6a4b00 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c @@ -154,7 +154,10 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -162,8 +165,8 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits