[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
This revision was automatically updated to reflect the committed changes. Closed by commit rC350759: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop… (authored by gbercea, committed by ). Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 Files: lib/Sema/SemaOpenMP.cpp test/OpenMP/for_codegen.cpp test/OpenMP/for_simd_codegen.cpp test/OpenMP/parallel_for_simd_codegen.cpp test/OpenMP/simd_codegen.cpp Index: test/OpenMP/simd_codegen.cpp === --- test/OpenMP/simd_codegen.cpp +++ test/OpenMP/simd_codegen.cpp @@ -278,8 +278,11 @@ // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.access.group // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group -// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group +// CHECK-NEXT: [[J_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4 +// CHECK-NEXT: [[J_1_MUL1:%.+]] = mul nsw i64 [[J_1_DIV1]], 4 +// CHECK-NEXT: [[J_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[J_1_MUL1]] +// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1_SUB0]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.access.group // simd.for.inc: @@ -393,22 +396,70 @@ // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] + // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group -// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 -// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 +// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60 +// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60 +// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]] +// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] + // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group -// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 -// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 -// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 -// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] -// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group -// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 -// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 -// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] +// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60 +// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60 +// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]] + +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60 +// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60 +// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]] +// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20 +// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20 +// CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]] +// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5 +// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1 +// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]] +// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]] + +// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60 +// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60 +// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]] + +// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60 +// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60 +// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]] +// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20 +// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20 +// CHECK-NEXT: [[SUB_11:%.+]] = sub i32 [[SUB_7]], [[MUL_8]] + +// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60 +// CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60 +// CHECK-NEXT:
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
gtbercea updated this revision to Diff 180722. gtbercea added a comment. Fix. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 Files: lib/Sema/SemaOpenMP.cpp test/OpenMP/for_codegen.cpp test/OpenMP/for_simd_codegen.cpp test/OpenMP/parallel_for_simd_codegen.cpp test/OpenMP/simd_codegen.cpp Index: test/OpenMP/simd_codegen.cpp === --- test/OpenMP/simd_codegen.cpp +++ test/OpenMP/simd_codegen.cpp @@ -278,8 +278,11 @@ // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.access.group // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group -// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group +// CHECK-NEXT: [[J_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4 +// CHECK-NEXT: [[J_1_MUL1:%.+]] = mul nsw i64 [[J_1_DIV1]], 4 +// CHECK-NEXT: [[J_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[J_1_MUL1]] +// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1_SUB0]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.access.group // simd.for.inc: @@ -393,22 +396,70 @@ // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] + // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group -// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 -// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 +// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60 +// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60 +// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]] +// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] + // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group -// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 -// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 -// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 -// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] -// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group -// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 -// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 -// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] +// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60 +// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60 +// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]] + +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60 +// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60 +// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]] +// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20 +// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20 +// CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]] +// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5 +// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1 +// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]] +// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]] + +// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group +// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60 +// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60 +// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]] + +// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60 +// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60 +// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]] +// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20 +// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20 +// CHECK-NEXT: [[SUB_11:%.+]] = sub i32 [[SUB_7]], [[MUL_8]] + +// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60 +// CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60 +// CHECK-NEXT: [[SUB_12:%.+]] = sub i32 [[IV1_7]], [[MUL_9]] + +// CHECK: [[IV1_8:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_8_1:%.+]] = load i32, i32*
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
ABataev accepted this revision. ABataev added a comment. This revision is now accepted and ready to land. LG Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
gtbercea updated this revision to Diff 180713. gtbercea added a comment. Remove redundant initalization. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 Files: lib/Sema/SemaOpenMP.cpp test/OpenMP/for_codegen.cpp test/OpenMP/for_simd_codegen.cpp test/OpenMP/parallel_for_simd_codegen.cpp test/OpenMP/simd_codegen.cpp Index: test/OpenMP/simd_codegen.cpp === --- test/OpenMP/simd_codegen.cpp +++ test/OpenMP/simd_codegen.cpp @@ -278,8 +278,11 @@ // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] -// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: [[I_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4 +// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1_DIV1]], 4 +// CHECK-NEXT: [[I_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[I_1_MUL1]] +// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[I_1_SUB0]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] // simd.for.inc: @@ -393,22 +396,70 @@ // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] + // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 -// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 +// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60 +// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60 +// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]] +// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] + // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 -// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 -// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 -// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] -// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 -// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 -// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] +// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60 +// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60 +// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]] + +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60 +// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60 +// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]] +// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20 +// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20 +// CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]] +// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5 +// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1 +// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]] +// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]] + +// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60 +// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60 +// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]] + +// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60 +// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60 +// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]] +// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20 +// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20 +// CHECK-NEXT: [[SUB_11:%.+]] = sub i32 [[SUB_7]], [[MUL_8]] + +// CHECK: [[IV1_7:%.+]] = load i32, i32*
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
ABataev added inline comments. Comment at: lib/Sema/SemaOpenMP.cpp:5523 SourceLocation UpdLoc = IS.IncSrcRange.getBegin(); - // Build: Iter = (IV / Div) % IS.NumIters - // where Div is product of previous iterations' IS.NumIters. - ExprResult Iter; - if (Div.isUsable()) { -Iter = -SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Div, IV.get(), Div.get()); - } else { -Iter = IV; -assert((Cnt == (int)NestedLoopCount - 1) && - "unusable div expected on first iteration only"); - } - - if (Cnt != 0 && Iter.isUsable()) -Iter = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Rem, Iter.get(), - IS.NumIterations); + ExprResult Iter = IV; + Do you need to initialize `Iter` here? Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
gtbercea updated this revision to Diff 180711. gtbercea added a comment. Fix update. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 Files: lib/Sema/SemaOpenMP.cpp test/OpenMP/for_codegen.cpp test/OpenMP/for_simd_codegen.cpp test/OpenMP/parallel_for_simd_codegen.cpp test/OpenMP/simd_codegen.cpp Index: test/OpenMP/simd_codegen.cpp === --- test/OpenMP/simd_codegen.cpp +++ test/OpenMP/simd_codegen.cpp @@ -278,8 +278,11 @@ // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] -// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: [[I_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4 +// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1_DIV1]], 4 +// CHECK-NEXT: [[I_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[I_1_MUL1]] +// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[I_1_SUB0]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] // simd.for.inc: @@ -393,22 +396,70 @@ // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] + // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 -// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 +// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60 +// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60 +// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]] +// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] + // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 -// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 -// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 -// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] -// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 -// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 -// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] +// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60 +// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60 +// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]] + +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60 +// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60 +// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]] +// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20 +// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20 +// CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]] +// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5 +// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1 +// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]] +// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]] + +// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60 +// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60 +// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]] + +// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60 +// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60 +// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]] +// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20 +// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20 +// CHECK-NEXT: [[SUB_11:%.+]] = sub i32 [[SUB_7]], [[MUL_8]] + +// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK:
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
gtbercea updated this revision to Diff 180709. gtbercea added a comment. Invert accumulation direction. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 Files: docs/OpenMPSupport.rst include/clang/Basic/LangOptions.def include/clang/Driver/Options.td lib/Driver/ToolChains/Clang.cpp lib/Frontend/CompilerInvocation.cpp lib/Sema/SemaOpenMP.cpp test/OpenMP/for_codegen.cpp test/OpenMP/for_simd_codegen.cpp test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp test/OpenMP/parallel_for_simd_codegen.cpp test/OpenMP/simd_codegen.cpp Index: test/OpenMP/simd_codegen.cpp === --- test/OpenMP/simd_codegen.cpp +++ test/OpenMP/simd_codegen.cpp @@ -278,8 +278,11 @@ // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] -// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: [[I_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4 +// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1_DIV1]], 4 +// CHECK-NEXT: [[I_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[I_1_MUL1]] +// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[I_1_SUB0]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] // simd.for.inc: @@ -393,22 +396,70 @@ // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] + // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 -// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 +// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60 +// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60 +// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]] +// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] + // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 -// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 -// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 -// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] -// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 -// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 -// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] +// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60 +// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60 +// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]] + +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60 +// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60 +// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]] +// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20 +// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20 +// CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]] +// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5 +// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1 +// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]] +// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]] + +// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60 +// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60 +// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]] + +// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60 +// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60 +// CHECK-NEXT: [[SUB_10:%.+]] = sub i32
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
ABataev added inline comments. Comment at: lib/Sema/SemaOpenMP.cpp:5520 +ExprResult Acc = + SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get(); +for (unsigned int Cnt = 0; Cnt < NestedLoopCount; Cnt++) { No need for `.get()` here Comment at: lib/Sema/SemaOpenMP.cpp:5521 + SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get(); +for (unsigned int Cnt = 0; Cnt < NestedLoopCount; Cnt++) { LoopIterationSpace = IterSpaces[Cnt]; Use preincrement Comment at: lib/Sema/SemaOpenMP.cpp:5529 + SemaRef.ActOnIntegerConstant(SourceLocation(), 1).get(); + for (unsigned int K = Cnt+1; K < NestedLoopCount; K++) +Prod = 1. Preincrement. 2. Format. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56413/new/ https://reviews.llvm.org/D56413 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D56413: [OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
gtbercea created this revision. gtbercea added reviewers: ABataev, caomhin. Herald added subscribers: cfe-commits, arphaman, guansong. Change the strategy for computing loop index variables after collapsing a loop nest via the collapse clause by replacing the expensive remainder operation with multiplications and additions. Repository: rC Clang https://reviews.llvm.org/D56413 Files: lib/Sema/SemaOpenMP.cpp test/OpenMP/for_codegen.cpp test/OpenMP/for_simd_codegen.cpp test/OpenMP/parallel_for_simd_codegen.cpp test/OpenMP/simd_codegen.cpp Index: test/OpenMP/simd_codegen.cpp === --- test/OpenMP/simd_codegen.cpp +++ test/OpenMP/simd_codegen.cpp @@ -278,7 +278,11 @@ // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] -// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4 +// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] +// CHECK-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 +// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[DIV_1]], 4 +// CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] +// CHECK-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[I_1_ADD0]] // CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]] @@ -393,22 +397,77 @@ // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] + // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20 -// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3 +// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60 +// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60 +// CHECK-NEXT: [[ADD_2:%.+]] = add i32 0, [[MUL_1]] +// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[ADD_2]] +// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] + // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5 -// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4 -// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1 -// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]] -// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]] -// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] -// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5 -// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1 -// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]] +// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60 +// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60 +// CHECK-NEXT: [[ADD_3:%.+]] = add i32 0, [[MUL_2]] + +// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60 +// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60 +// CHECK-NEXT: [[ADD_4:%.+]] = add i32 0, [[MUL_3]] +// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[ADD_4]] +// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20 +// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20 +// CHECK-NEXT: [[ADD_5:%.+]] = add i32 [[ADD_3]], [[MUL_4]] +// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_3]], [[ADD_5]] +// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 5 +// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1 +// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]] +// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]] + +// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]] +// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60 +// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60 +// CHECK-NEXT: [[ADD_7:%.+]] = add i32 0, [[MUL_6]] + +// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]] +// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60 +// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60