https://github.com/adelejjeh created https://github.com/llvm/llvm-project/pull/180961
Co-authored-by: Carlo Bertolli [email protected] >From dfc667b64e3c59e3235c09651d0afab89f86b1c4 Mon Sep 17 00:00:00 2001 From: Adel Ejjeh <[email protected]> Date: Wed, 11 Feb 2026 09:42:23 -0600 Subject: [PATCH] Add flag to enforce loop unroll pragma regardless of expensive trip count Co-authored-by: Carlo Bertolli [email protected] --- clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Options/Options.td | 2 + clang/lib/CodeGen/CGLoopInfo.cpp | 14 + clang/lib/CodeGen/CGLoopInfo.h | 3 + clang/lib/Frontend/CompilerInvocation.cpp | 5 +- clang/test/CodeGen/force-unroll-pragma.c | 339 +++++++++++++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 17 +- .../LoopUnroll/expensive-tripcount.ll | 474 ++++++++++++++++++ 8 files changed, 850 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/force-unroll-pragma.c create mode 100644 llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 8c056bb690690..1ff70ca69da23 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -339,6 +339,7 @@ VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500, Benign) ///< Minimum time granul CODEGENOPT(InterchangeLoops , 1, 0, Benign) ///< Run loop-interchange. CODEGENOPT(FuseLoops , 1, 0, Benign) ///< Run loop-fusion. CODEGENOPT(UnrollLoops , 1, 0, Benign) ///< Control whether loops are unrolled. +CODEGENOPT(ForceUnrollPragma , 1, 0, Benign) ///< Force unroll runtime loops when pragma provided. CODEGENOPT(RerollLoops , 1, 0, Benign) ///< Control whether loops are rerolled. CODEGENOPT(NoUseJumpTables , 1, 0, Benign) ///< Set when -fno-jump-tables is enabled. VALUE_CODEGENOPT(UnwindTables, 2, 0, Benign) ///< Unwind tables (1, Benign) or asynchronous unwind tables (2, Benign) diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 155f19fb00bd8..09a4219d0f378 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4492,6 +4492,8 @@ def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>, HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>, HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; +def force_unroll_pragma : Flag<["-"], "force-unroll-pragma">, Group<f_Group>, + HelpText<"Force unroll runtime loops when an unroll pragma is provided">, Visibility<[ClangOption, CC1Option]>; def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>, HelpText<"Assume all non-trivial loops are finite.">, Visibility<[ClangOption, CC1Option]>; def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>, diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index b2b569a43038c..93486a65de22d 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -122,6 +122,13 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, Args.push_back(MDNode::get(Ctx, Vals)); } + // Emit metadata to allow expensive trip count if ForceUnrollPragma is set + // This applies when unroll pragma is specified without an explicit count + if (Attrs.ForceUnrollPragma) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.runtime.force")}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + if (FollowupHasTransforms) Args.push_back( createFollowupMetadata("llvm.loop.unroll.followup_all", Followup)); @@ -821,6 +828,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, StagedAttrs.UnrollCount == 0)) setUnrollState(LoopAttributes::Disable); + // Set ForceUnrollPragma flag if the flag is enabled and there's an unroll + // pragma without an explicit count (pragmas with explicit counts already + // enable expensive trip count) + if (CGOpts.ForceUnrollPragma) { + StagedAttrs.ForceUnrollPragma = true; + } + /// Stage the attributes. push(Header, StartLoc, EndLoc); } diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index 3c57124f4137c..e8ec8af55a616 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -84,6 +84,9 @@ struct LoopAttributes { /// Value for whether the loop is required to make progress. bool MustProgress; + + /// Value for whether to force unroll pragma even with expensive trip count. + bool ForceUnrollPragma = false; }; /// Information used when generating a structured loop. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 6aa2afb6f5918..005d1ae47b1a5 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1603,7 +1603,8 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, GenerateArg(Consumer, OPT_funroll_loops); else if (!Opts.UnrollLoops && Opts.OptimizationLevel > 1) GenerateArg(Consumer, OPT_fno_unroll_loops); - + if (Opts.ForceUnrollPragma) + GenerateArg(Consumer, OPT_force_unroll_pragma); if (Opts.InterchangeLoops) GenerateArg(Consumer, OPT_floop_interchange); else @@ -1921,6 +1922,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.UnrollLoops = Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops, (Opts.OptimizationLevel > 1)); + Opts.ForceUnrollPragma = Args.hasFlag( + OPT_force_unroll_pragma, /*OPT_fno_force_unroll_pragma*/ {}, false); Opts.InterchangeLoops = Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false); Opts.FuseLoops = Args.hasFlag(OPT_fexperimental_loop_fusion, diff --git a/clang/test/CodeGen/force-unroll-pragma.c b/clang/test/CodeGen/force-unroll-pragma.c new file mode 100644 index 0000000000000..8c79d5b7a1f5d --- /dev/null +++ b/clang/test/CodeGen/force-unroll-pragma.c @@ -0,0 +1,339 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOPRAGMA +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -force-unroll-pragma %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-PRAGMA + +const int output_vec_size = 4; +struct ArgVec { + float v[output_vec_size]; +}; + +// CHECK-LABEL: define dso_local i32 @calc_offset( +// CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[OFF1]], [[INPUT_OFFSET]] +// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[OFF2]] +// CHECK-NEXT: ret i32 [[ADD1]] +// +int calc_offset(int input_offset, int off1, int off2) { + return input_offset + off1 + off2; +} + +// CHECK-NOPRAGMA-LABEL: define dso_local void @complex_loop( +// CHECK-NOPRAGMA-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-NOPRAGMA-NEXT: [[ENTRY:.*:]] +// CHECK-NOPRAGMA-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]] +// CHECK-NOPRAGMA-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]] +// CHECK-NOPRAGMA: [[FOR_BODY_LR_PH]]: +// CHECK-NOPRAGMA-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]] +// CHECK-NOPRAGMA-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64 +// CHECK-NOPRAGMA-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64 +// CHECK-NOPRAGMA-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +// CHECK-NOPRAGMA-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]] +// CHECK-NOPRAGMA-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4 +// CHECK-NOPRAGMA-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-NOPRAGMA-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8 +// CHECK-NOPRAGMA-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-NOPRAGMA-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12 +// CHECK-NOPRAGMA-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-NOPRAGMA-NEXT: br label %[[FOR_BODY:.*]] +// CHECK-NOPRAGMA: [[FOR_BODY]]: +// CHECK-NOPRAGMA-NEXT: [[TMP3:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ] +// CHECK-NOPRAGMA-NEXT: [[TMP4:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2:%.*]], %[[FOR_BODY]] ] +// CHECK-NOPRAGMA-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1:%.*]], %[[FOR_BODY]] ] +// CHECK-NOPRAGMA-NEXT: [[TMP6:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +// CHECK-NOPRAGMA-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +// CHECK-NOPRAGMA-NEXT: [[TMP7:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 +// CHECK-NOPRAGMA-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP7]] +// CHECK-NOPRAGMA-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64 +// CHECK-NOPRAGMA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]] +// CHECK-NOPRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NOPRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 +// CHECK-NOPRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4 +// CHECK-NOPRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8 +// CHECK-NOPRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4 +// CHECK-NOPRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 +// CHECK-NOPRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]] +// CHECK-NOPRAGMA-NEXT: [[ADD]] = fadd float [[TMP6]], [[NEXT_SROA_0_0_COPYLOAD]] +// CHECK-NOPRAGMA-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-NOPRAGMA-NEXT: [[ADD_1]] = fadd float [[TMP5]], [[NEXT_SROA_4_0_COPYLOAD]] +// CHECK-NOPRAGMA-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-NOPRAGMA-NEXT: [[ADD_2]] = fadd float [[TMP4]], [[NEXT_SROA_5_0_COPYLOAD]] +// CHECK-NOPRAGMA-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-NOPRAGMA-NEXT: [[ADD_3]] = fadd float [[TMP3]], [[NEXT_SROA_6_0_COPYLOAD]] +// CHECK-NOPRAGMA-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-NOPRAGMA-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP1]] +// CHECK-NOPRAGMA-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]] +// CHECK-NOPRAGMA-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END14]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK-NOPRAGMA: [[FOR_END14]]: +// CHECK-NOPRAGMA-NEXT: ret void +// +// CHECK-PRAGMA-LABEL: define dso_local void @complex_loop( +// CHECK-PRAGMA-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-PRAGMA-NEXT: [[ENTRY:.*:]] +// CHECK-PRAGMA-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]] +// CHECK-PRAGMA-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]] +// CHECK-PRAGMA: [[FOR_BODY_LR_PH]]: +// CHECK-PRAGMA-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]] +// CHECK-PRAGMA-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64 +// CHECK-PRAGMA-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64 +// CHECK-PRAGMA-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +// CHECK-PRAGMA-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]] +// CHECK-PRAGMA-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4 +// CHECK-PRAGMA-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8 +// CHECK-PRAGMA-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12 +// CHECK-PRAGMA-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP1]], [[TMP0]] +// CHECK-PRAGMA-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP3]], i64 [[TMP2]]) +// CHECK-PRAGMA-NEXT: [[TMP4:%.*]] = icmp slt i64 [[TMP3]], [[TMP2]] +// CHECK-PRAGMA-NEXT: [[UMIN:%.*]] = zext i1 [[TMP4]] to i64 +// CHECK-PRAGMA-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP3]], [[UMIN]] +// CHECK-PRAGMA-NEXT: [[TMP6:%.*]] = sub i64 [[SMAX]], [[TMP5]] +// CHECK-PRAGMA-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP6]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[UMIN]] +// CHECK-PRAGMA-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 +// CHECK-PRAGMA-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP9]], 7 +// CHECK-PRAGMA-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 +// CHECK-PRAGMA-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]], label %[[FOR_BODY_PROL:.*]] +// CHECK-PRAGMA: [[FOR_BODY_PROL]]: +// CHECK-PRAGMA-NEXT: [[TMP10:%.*]] = phi float [ [[ADD_3_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ] +// CHECK-PRAGMA-NEXT: [[TMP11:%.*]] = phi float [ [[ADD_2_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ] +// CHECK-PRAGMA-NEXT: [[TMP12:%.*]] = phi float [ [[ADD_1_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ] +// CHECK-PRAGMA-NEXT: [[TMP13:%.*]] = phi float [ [[ADD_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[TMP0]], %[[FOR_BODY_LR_PH]] ] +// CHECK-PRAGMA-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY_PROL]] ], [ 0, %[[FOR_BODY_LR_PH]] ] +// CHECK-PRAGMA-NEXT: [[TMP14:%.*]] = trunc nsw i64 [[INDVARS_IV_PROL]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_PROL:%.*]] = add i32 [[ADD_I]], [[TMP14]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_PROL:%.*]] = sext i32 [[ADD1_I_PROL]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_PROL]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_PROL:%.*]] = load float, ptr [[ARRAYIDX_PROL]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]] +// CHECK-PRAGMA-NEXT: [[ADD_PROL]] = fadd float [[TMP13]], [[NEXT_SROA_0_0_COPYLOAD_PROL]] +// CHECK-PRAGMA-NEXT: store float [[ADD_PROL]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_PROL]] = fadd float [[TMP12]], [[NEXT_SROA_4_0_COPYLOAD_PROL]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_PROL]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_PROL]] = fadd float [[TMP11]], [[NEXT_SROA_5_0_COPYLOAD_PROL]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_PROL]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_PROL]] = fadd float [[TMP10]], [[NEXT_SROA_6_0_COPYLOAD_PROL]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_PROL]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +// CHECK-PRAGMA-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +// CHECK-PRAGMA-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK-PRAGMA: [[FOR_BODY_PROL_LOOPEXIT]]: +// CHECK-PRAGMA-NEXT: [[DOTUNR:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3_PROL]], %[[FOR_BODY_PROL]] ] +// CHECK-PRAGMA-NEXT: [[DOTUNR30:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2_PROL]], %[[FOR_BODY_PROL]] ] +// CHECK-PRAGMA-NEXT: [[DOTUNR31:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1_PROL]], %[[FOR_BODY_PROL]] ] +// CHECK-PRAGMA-NEXT: [[DOTUNR32:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_PROL]], %[[FOR_BODY_PROL]] ] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ] +// CHECK-PRAGMA-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP8]], 7 +// CHECK-PRAGMA-NEXT: br i1 [[TMP15]], label %[[FOR_END14]], label %[[FOR_BODY:.*]] +// CHECK-PRAGMA: [[FOR_BODY]]: +// CHECK-PRAGMA-NEXT: [[TMP16:%.*]] = phi float [ [[ADD_3_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR]], %[[FOR_BODY_PROL_LOOPEXIT]] ] +// CHECK-PRAGMA-NEXT: [[TMP17:%.*]] = phi float [ [[ADD_2_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR30]], %[[FOR_BODY_PROL_LOOPEXIT]] ] +// CHECK-PRAGMA-NEXT: [[TMP18:%.*]] = phi float [ [[ADD_1_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR31]], %[[FOR_BODY_PROL_LOOPEXIT]] ] +// CHECK-PRAGMA-NEXT: [[TMP19:%.*]] = phi float [ [[ADD_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR32]], %[[FOR_BODY_PROL_LOOPEXIT]] ] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PROL_LOOPEXIT]] ] +// CHECK-PRAGMA-NEXT: [[TMP20:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP20]] +// CHECK-PRAGMA-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD:%.*]] = fadd float [[TMP19]], [[NEXT_SROA_0_0_COPYLOAD]] +// CHECK-PRAGMA-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1:%.*]] = fadd float [[TMP18]], [[NEXT_SROA_4_0_COPYLOAD]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2:%.*]] = fadd float [[TMP17]], [[NEXT_SROA_5_0_COPYLOAD]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3:%.*]] = fadd float [[TMP16]], [[NEXT_SROA_6_0_COPYLOAD]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP21:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_1:%.*]] = add i32 [[ADD_I]], [[TMP21]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_1:%.*]] = sext i32 [[ADD1_I_1]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_1]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD_133:%.*]] = fadd float [[ADD]], [[NEXT_SROA_0_0_COPYLOAD_1]] +// CHECK-PRAGMA-NEXT: store float [[ADD_133]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_1:%.*]] = fadd float [[ADD_1]], [[NEXT_SROA_4_0_COPYLOAD_1]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_1:%.*]] = fadd float [[ADD_2]], [[NEXT_SROA_5_0_COPYLOAD_1]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_1]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_1:%.*]] = fadd float [[ADD_3]], [[NEXT_SROA_6_0_COPYLOAD_1]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_1]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP22:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_1]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_2:%.*]] = add i32 [[ADD_I]], [[TMP22]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_2:%.*]] = sext i32 [[ADD1_I_2]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_2]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD_234:%.*]] = fadd float [[ADD_133]], [[NEXT_SROA_0_0_COPYLOAD_2]] +// CHECK-PRAGMA-NEXT: store float [[ADD_234]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_2:%.*]] = fadd float [[ADD_1_1]], [[NEXT_SROA_4_0_COPYLOAD_2]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_2]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_2:%.*]] = fadd float [[ADD_2_1]], [[NEXT_SROA_5_0_COPYLOAD_2]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_2:%.*]] = fadd float [[ADD_3_1]], [[NEXT_SROA_6_0_COPYLOAD_2]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_2]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP23:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_2]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_3:%.*]] = add i32 [[ADD_I]], [[TMP23]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_3:%.*]] = sext i32 [[ADD1_I_3]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_3]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD_335:%.*]] = fadd float [[ADD_234]], [[NEXT_SROA_0_0_COPYLOAD_3]] +// CHECK-PRAGMA-NEXT: store float [[ADD_335]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_3:%.*]] = fadd float [[ADD_1_2]], [[NEXT_SROA_4_0_COPYLOAD_3]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_3]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_3:%.*]] = fadd float [[ADD_2_2]], [[NEXT_SROA_5_0_COPYLOAD_3]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_3]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_3:%.*]] = fadd float [[ADD_3_2]], [[NEXT_SROA_6_0_COPYLOAD_3]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_2]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP24:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_3]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_4:%.*]] = add i32 [[ADD_I]], [[TMP24]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_4:%.*]] = sext i32 [[ADD1_I_4]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_4]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD_4:%.*]] = fadd float [[ADD_335]], [[NEXT_SROA_0_0_COPYLOAD_4]] +// CHECK-PRAGMA-NEXT: store float [[ADD_4]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_4:%.*]] = fadd float [[ADD_1_3]], [[NEXT_SROA_4_0_COPYLOAD_4]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_4]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_4:%.*]] = fadd float [[ADD_2_3]], [[NEXT_SROA_5_0_COPYLOAD_4]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_4]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_4:%.*]] = fadd float [[ADD_3_3]], [[NEXT_SROA_6_0_COPYLOAD_4]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_4]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_3]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP25:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_4]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_5:%.*]] = add i32 [[ADD_I]], [[TMP25]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_5:%.*]] = sext i32 [[ADD1_I_5]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_5]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD_5:%.*]] = fadd float [[ADD_4]], [[NEXT_SROA_0_0_COPYLOAD_5]] +// CHECK-PRAGMA-NEXT: store float [[ADD_5]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_5:%.*]] = fadd float [[ADD_1_4]], [[NEXT_SROA_4_0_COPYLOAD_5]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_5]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_5:%.*]] = fadd float [[ADD_2_4]], [[NEXT_SROA_5_0_COPYLOAD_5]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_5]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_5:%.*]] = fadd float [[ADD_3_4]], [[NEXT_SROA_6_0_COPYLOAD_5]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_5]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_4]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP26:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_5]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_6:%.*]] = add i32 [[ADD_I]], [[TMP26]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_6:%.*]] = sext i32 [[ADD1_I_6]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_6]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD_6:%.*]] = fadd float [[ADD_5]], [[NEXT_SROA_0_0_COPYLOAD_6]] +// CHECK-PRAGMA-NEXT: store float [[ADD_6]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_6:%.*]] = fadd float [[ADD_1_5]], [[NEXT_SROA_4_0_COPYLOAD_6]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_6]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_6:%.*]] = fadd float [[ADD_2_5]], [[NEXT_SROA_5_0_COPYLOAD_6]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_6]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_6:%.*]] = fadd float [[ADD_3_5]], [[NEXT_SROA_6_0_COPYLOAD_6]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_6]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_5]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[TMP27:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_6]] to i32 +// CHECK-PRAGMA-NEXT: [[ADD1_I_7:%.*]] = add i32 [[ADD_I]], [[TMP27]] +// CHECK-PRAGMA-NEXT: [[IDXPROM_7:%.*]] = sext i32 [[ADD1_I_7]] to i64 +// CHECK-PRAGMA-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_7]] +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_0_0_COPYLOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_4_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 8 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_5_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7]], align 4 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 12 +// CHECK-PRAGMA-NEXT: [[NEXT_SROA_6_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-PRAGMA-NEXT: [[ADD_7]] = fadd float [[ADD_6]], [[NEXT_SROA_0_0_COPYLOAD_7]] +// CHECK-PRAGMA-NEXT: store float [[ADD_7]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_1_7]] = fadd float [[ADD_1_6]], [[NEXT_SROA_4_0_COPYLOAD_7]] +// CHECK-PRAGMA-NEXT: store float [[ADD_1_7]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_2_7]] = fadd float [[ADD_2_6]], [[NEXT_SROA_5_0_COPYLOAD_7]] +// CHECK-PRAGMA-NEXT: store float [[ADD_2_7]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[ADD_3_7]] = fadd float [[ADD_3_6]], [[NEXT_SROA_6_0_COPYLOAD_7]] +// CHECK-PRAGMA-NEXT: store float [[ADD_3_7]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +// CHECK-PRAGMA-NEXT: [[INDVARS_IV_NEXT_7]] = add nsw i64 [[INDVARS_IV_NEXT_6]], [[TMP1]] +// CHECK-PRAGMA-NEXT: [[CMP_7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_7]], [[TMP2]] +// CHECK-PRAGMA-NEXT: br i1 [[CMP_7]], label %[[FOR_BODY]], label %[[FOR_END14]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK-PRAGMA: [[FOR_END14]]: +// CHECK-PRAGMA-NEXT: ret void +// +void complex_loop(int input_offset, int step, int n, int off1, int off2, const struct ArgVec* reduce_buffer, struct ArgVec* value) { + #pragma unroll + for (; input_offset < n; input_offset += step) { + int idx = calc_offset(input_offset, off1, off2); + struct ArgVec next = reduce_buffer[idx]; + #pragma unroll + for (int i = 0; i < output_vec_size; i++) { + value->v[i] = value->v[i] + next.v[i]; + } + } +} + +//. +// CHECK-NOPRAGMA: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-NOPRAGMA: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-NOPRAGMA: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-NOPRAGMA: [[META7]] = !{!"float", [[META4]], i64 0} +// CHECK-NOPRAGMA: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0} +// CHECK-NOPRAGMA: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +// CHECK-NOPRAGMA: [[META10]] = !{!"llvm.loop.mustprogress"} +// CHECK-NOPRAGMA: [[META11]] = !{!"llvm.loop.unroll.enable"} +//. +// CHECK-PRAGMA: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-PRAGMA: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-PRAGMA: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-PRAGMA: [[META7]] = !{!"float", [[META4]], i64 0} +// CHECK-PRAGMA: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0} +// CHECK-PRAGMA: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]} +// CHECK-PRAGMA: [[META10]] = !{!"llvm.loop.unroll.disable"} +// CHECK-PRAGMA: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]], [[META10]]} +// CHECK-PRAGMA: [[META12]] = !{!"llvm.loop.mustprogress"} +//. diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 6050650eb937c..06d8c2b12c90c 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -330,13 +330,14 @@ struct EstimatedUnrollCost { }; struct PragmaInfo { - PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU) + PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU, bool FPU) : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC), - PragmaEnableUnroll(PEU) {} + PragmaEnableUnroll(PEU), ForcePragmaUnroll(FPU) {} const bool UserUnrollCount; const bool PragmaFullUnroll; const unsigned PragmaCount; const bool PragmaEnableUnroll; + const bool ForcePragmaUnroll; }; } // end anonymous namespace @@ -762,6 +763,12 @@ static bool hasRuntimeUnrollDisablePragma(const Loop *L) { return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); } +// Returns true if the loop has a metadata flag to allow expensive trip counts +// when unrolling with a pragma. +static bool hasRuntimeForceUnroll(const Loop *L) { + return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.force"); +} + // If loop has an unroll_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. static unsigned unrollCountPragmaValue(const Loop *L) { @@ -937,12 +944,13 @@ bool llvm::computeUnrollCount( const bool PragmaFullUnroll = hasUnrollFullPragma(L); const unsigned PragmaCount = unrollCountPragmaValue(L); const bool PragmaEnableUnroll = hasUnrollEnablePragma(L); + const bool ForcePragmaUnroll = hasRuntimeForceUnroll(L); const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || PragmaEnableUnroll || UserUnrollCount; PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount, - PragmaEnableUnroll); + PragmaEnableUnroll, ForcePragmaUnroll); // Use an explicit peel count that has been specified for testing. In this // case it's not permitted to also specify an explicit unroll count. if (PP.PeelCount) { @@ -1102,7 +1110,8 @@ bool llvm::computeUnrollCount( } if (UP.Count == 0) UP.Count = UP.DefaultUnrollRuntimeCount; - + if (PragmaEnableUnroll && ForcePragmaUnroll) + UP.AllowExpensiveTripCount = true; // Reduce unroll count to be the largest power-of-two factor of // the original count which satisfies the threshold limit. while (UP.Count != 0 && diff --git a/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll new file mode 100644 index 0000000000000..bdae391f2c0f1 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -S -passes=loop-unroll | FileCheck %s +; Checks that loops with expensive trip counts are unrolled when the force-unroll-pragma Metadata is present. +; The first loop should be unrolled, while the second loop should not be unrolled. + +; ModuleID = 'force-unroll-pragma.c' +source_filename = "force-unroll-pragma.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.ArgVec = type { [4 x float] } + +@output_vec_size = local_unnamed_addr constant i32 4, align 4 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @calc_offset(i32 noundef %input_offset, i32 noundef %off1, i32 noundef %off2) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local i32 @calc_offset( +; CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[OFF1]], [[INPUT_OFFSET]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[OFF2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; +entry: + %add = add nsw i32 %off1, %input_offset + %add1 = add nsw i32 %add, %off2 + ret i32 %add1 +} + +; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) +define dso_local void @complex_loop_unroll(i32 noundef %input_offset, i32 noundef %step, i32 noundef %n, i32 noundef %off1, i32 noundef %off2, ptr noundef readonly captures(none) %reduce_buffer, ptr noundef captures(none) %value) local_unnamed_addr #1 { +; CHECK-LABEL: define dso_local void @complex_loop_unroll( +; CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]] +; CHECK-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]] +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]] +; CHECK-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8 +; CHECK-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12 +; CHECK-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP3]], i64 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[SMAX]], [[TMP3]] +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 1) +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[SMAX]], [[UMIN]] +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP6]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[UMIN]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP9]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_PROL_PREHEADER:.*]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]] +; CHECK: [[FOR_BODY_PROL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY_PROL:.*]] +; CHECK: [[FOR_BODY_PROL]]: +; CHECK-NEXT: [[TMP10:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_3_PROL:%.*]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_2_PROL:%.*]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_1_PROL:%.*]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_PROL:%.*]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL:%.*]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[TMP14:%.*]] = trunc nsw i64 [[INDVARS_IV_PROL]] to i32 +; CHECK-NEXT: [[ADD1_I_PROL:%.*]] = add i32 [[ADD_I]], [[TMP14]] +; CHECK-NEXT: [[IDXPROM_PROL:%.*]] = sext i32 [[ADD1_I_PROL]] to i64 +; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_PROL]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_PROL:%.*]] = load float, ptr [[ARRAYIDX_PROL]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]] +; CHECK-NEXT: [[ADD_PROL]] = fadd float [[TMP13]], [[NEXT_SROA_0_0_COPYLOAD_PROL]] +; CHECK-NEXT: store float [[ADD_PROL]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_PROL]] = fadd float [[TMP12]], [[NEXT_SROA_4_0_COPYLOAD_PROL]] +; CHECK-NEXT: store float [[ADD_1_PROL]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_PROL]] = fadd float [[TMP11]], [[NEXT_SROA_5_0_COPYLOAD_PROL]] +; CHECK-NEXT: store float [[ADD_2_PROL]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_PROL]] = fadd float [[TMP10]], [[NEXT_SROA_6_0_COPYLOAD_PROL]] +; CHECK-NEXT: store float [[ADD_3_PROL]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], [[TMP1]] +; CHECK-NEXT: [[CMP_PROL:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], [[TMP2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[FOR_BODY_PROL]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[DOTUNR_PH:%.*]] = phi float [ [[ADD_3_PROL]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[DOTUNR1_PH:%.*]] = phi float [ [[ADD_2_PROL]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[DOTUNR2_PH:%.*]] = phi float [ [[ADD_1_PROL]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[DOTUNR3_PH:%.*]] = phi float [ [[ADD_PROL]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ] +; CHECK-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT]] +; CHECK: [[FOR_BODY_PROL_LOOPEXIT]]: +; CHECK-NEXT: [[DOTUNR:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[DOTUNR1:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[DOTUNR2:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR2_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[DOTUNR3:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR3_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_UNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP8]], 7 +; CHECK-NEXT: br i1 [[TMP15]], label %[[FOR_END14_LOOPEXIT:.*]], label %[[FOR_BODY_LR_PH_NEW:.*]] +; CHECK: [[FOR_BODY_LR_PH_NEW]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP16:%.*]] = phi float [ [[DOTUNR]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3_7:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi float [ [[DOTUNR1]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_2_7:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi float [ [[DOTUNR2]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_1_7:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi float [ [[DOTUNR3]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP20:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP20]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP19]], [[NEXT_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[TMP18]], [[NEXT_SROA_4_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[TMP17]], [[NEXT_SROA_5_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[TMP16]], [[NEXT_SROA_6_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP1]] +; CHECK-NEXT: [[TMP21:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[ADD1_I_1:%.*]] = add i32 [[ADD_I]], [[TMP21]] +; CHECK-NEXT: [[IDXPROM_1:%.*]] = sext i32 [[ADD1_I_1]] to i64 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_1]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD_14:%.*]] = fadd float [[ADD]], [[NEXT_SROA_0_0_COPYLOAD_1]] +; CHECK-NEXT: store float [[ADD_14]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_1:%.*]] = fadd float [[ADD_1]], [[NEXT_SROA_4_0_COPYLOAD_1]] +; CHECK-NEXT: store float [[ADD_1_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_1:%.*]] = fadd float [[ADD_2]], [[NEXT_SROA_5_0_COPYLOAD_1]] +; CHECK-NEXT: store float [[ADD_2_1]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_1:%.*]] = fadd float [[ADD_3]], [[NEXT_SROA_6_0_COPYLOAD_1]] +; CHECK-NEXT: store float [[ADD_3_1]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], [[TMP1]] +; CHECK-NEXT: [[TMP22:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_1]] to i32 +; CHECK-NEXT: [[ADD1_I_2:%.*]] = add i32 [[ADD_I]], [[TMP22]] +; CHECK-NEXT: [[IDXPROM_2:%.*]] = sext i32 [[ADD1_I_2]] to i64 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_2]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD_25:%.*]] = fadd float [[ADD_14]], [[NEXT_SROA_0_0_COPYLOAD_2]] +; CHECK-NEXT: store float [[ADD_25]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_2:%.*]] = fadd float [[ADD_1_1]], [[NEXT_SROA_4_0_COPYLOAD_2]] +; CHECK-NEXT: store float [[ADD_1_2]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_2:%.*]] = fadd float [[ADD_2_1]], [[NEXT_SROA_5_0_COPYLOAD_2]] +; CHECK-NEXT: store float [[ADD_2_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_2:%.*]] = fadd float [[ADD_3_1]], [[NEXT_SROA_6_0_COPYLOAD_2]] +; CHECK-NEXT: store float [[ADD_3_2]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], [[TMP1]] +; CHECK-NEXT: [[TMP23:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_2]] to i32 +; CHECK-NEXT: [[ADD1_I_3:%.*]] = add i32 [[ADD_I]], [[TMP23]] +; CHECK-NEXT: [[IDXPROM_3:%.*]] = sext i32 [[ADD1_I_3]] to i64 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_3]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD_36:%.*]] = fadd float [[ADD_25]], [[NEXT_SROA_0_0_COPYLOAD_3]] +; CHECK-NEXT: store float [[ADD_36]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_3:%.*]] = fadd float [[ADD_1_2]], [[NEXT_SROA_4_0_COPYLOAD_3]] +; CHECK-NEXT: store float [[ADD_1_3]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_3:%.*]] = fadd float [[ADD_2_2]], [[NEXT_SROA_5_0_COPYLOAD_3]] +; CHECK-NEXT: store float [[ADD_2_3]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_3:%.*]] = fadd float [[ADD_3_2]], [[NEXT_SROA_6_0_COPYLOAD_3]] +; CHECK-NEXT: store float [[ADD_3_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_2]], [[TMP1]] +; CHECK-NEXT: [[TMP24:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_3]] to i32 +; CHECK-NEXT: [[ADD1_I_4:%.*]] = add i32 [[ADD_I]], [[TMP24]] +; CHECK-NEXT: [[IDXPROM_4:%.*]] = sext i32 [[ADD1_I_4]] to i64 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_4]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[ADD_36]], [[NEXT_SROA_0_0_COPYLOAD_4]] +; CHECK-NEXT: store float [[ADD_4]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_4:%.*]] = fadd float [[ADD_1_3]], [[NEXT_SROA_4_0_COPYLOAD_4]] +; CHECK-NEXT: store float [[ADD_1_4]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_4:%.*]] = fadd float [[ADD_2_3]], [[NEXT_SROA_5_0_COPYLOAD_4]] +; CHECK-NEXT: store float [[ADD_2_4]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_4:%.*]] = fadd float [[ADD_3_3]], [[NEXT_SROA_6_0_COPYLOAD_4]] +; CHECK-NEXT: store float [[ADD_3_4]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_3]], [[TMP1]] +; CHECK-NEXT: [[TMP25:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_4]] to i32 +; CHECK-NEXT: [[ADD1_I_5:%.*]] = add i32 [[ADD_I]], [[TMP25]] +; CHECK-NEXT: [[IDXPROM_5:%.*]] = sext i32 [[ADD1_I_5]] to i64 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_5]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD_5:%.*]] = fadd float [[ADD_4]], [[NEXT_SROA_0_0_COPYLOAD_5]] +; CHECK-NEXT: store float [[ADD_5]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_5:%.*]] = fadd float [[ADD_1_4]], [[NEXT_SROA_4_0_COPYLOAD_5]] +; CHECK-NEXT: store float [[ADD_1_5]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_5:%.*]] = fadd float [[ADD_2_4]], [[NEXT_SROA_5_0_COPYLOAD_5]] +; CHECK-NEXT: store float [[ADD_2_5]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_5:%.*]] = fadd float [[ADD_3_4]], [[NEXT_SROA_6_0_COPYLOAD_5]] +; CHECK-NEXT: store float [[ADD_3_5]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_4]], [[TMP1]] +; CHECK-NEXT: [[TMP26:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_5]] to i32 +; CHECK-NEXT: [[ADD1_I_6:%.*]] = add i32 [[ADD_I]], [[TMP26]] +; CHECK-NEXT: [[IDXPROM_6:%.*]] = sext i32 [[ADD1_I_6]] to i64 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_6]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD_6:%.*]] = fadd float [[ADD_5]], [[NEXT_SROA_0_0_COPYLOAD_6]] +; CHECK-NEXT: store float [[ADD_6]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_6:%.*]] = fadd float [[ADD_1_5]], [[NEXT_SROA_4_0_COPYLOAD_6]] +; CHECK-NEXT: store float [[ADD_1_6]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_6:%.*]] = fadd float [[ADD_2_5]], [[NEXT_SROA_5_0_COPYLOAD_6]] +; CHECK-NEXT: store float [[ADD_2_6]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_6:%.*]] = fadd float [[ADD_3_5]], [[NEXT_SROA_6_0_COPYLOAD_6]] +; CHECK-NEXT: store float [[ADD_3_6]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_5]], [[TMP1]] +; CHECK-NEXT: [[TMP27:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_6]] to i32 +; CHECK-NEXT: [[ADD1_I_7:%.*]] = add i32 [[ADD_I]], [[TMP27]] +; CHECK-NEXT: [[IDXPROM_7:%.*]] = sext i32 [[ADD1_I_7]] to i64 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_7]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD_7]] = fadd float [[ADD_6]], [[NEXT_SROA_0_0_COPYLOAD_7]] +; CHECK-NEXT: store float [[ADD_7]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1_7]] = fadd float [[ADD_1_6]], [[NEXT_SROA_4_0_COPYLOAD_7]] +; CHECK-NEXT: store float [[ADD_1_7]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2_7]] = fadd float [[ADD_2_6]], [[NEXT_SROA_5_0_COPYLOAD_7]] +; CHECK-NEXT: store float [[ADD_2_7]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3_7]] = fadd float [[ADD_3_6]], [[NEXT_SROA_6_0_COPYLOAD_7]] +; CHECK-NEXT: store float [[ADD_3_7]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nsw i64 [[INDVARS_IV_NEXT_6]], [[TMP1]] +; CHECK-NEXT: [[CMP_7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_7]], [[TMP2]] +; CHECK-NEXT: br i1 [[CMP_7]], label %[[FOR_BODY]], label %[[FOR_END14_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[FOR_END14_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: br label %[[FOR_END14_LOOPEXIT]] +; CHECK: [[FOR_END14_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END14]] +; CHECK: [[FOR_END14]]: +; CHECK-NEXT: ret void +; +entry: + %cmp23 = icmp slt i32 %input_offset, %n + br i1 %cmp23, label %for.body.lr.ph, label %for.end14 + +for.body.lr.ph: ; preds = %entry + %add.i = add i32 %off2, %off1 + %0 = sext i32 %input_offset to i64 + %1 = sext i32 %step to i64 + %2 = sext i32 %n to i64 + %.pre = load float, ptr %value, align 4, !tbaa !6 + %arrayidx5.1.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 4 + %.pre27 = load float, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6 + %arrayidx5.2.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 8 + %.pre28 = load float, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6 + %arrayidx5.3.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 12 + %.pre29 = load float, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %3 = phi float [ %.pre29, %for.body.lr.ph ], [ %add.3, %for.body ] + %4 = phi float [ %.pre28, %for.body.lr.ph ], [ %add.2, %for.body ] + %5 = phi float [ %.pre27, %for.body.lr.ph ], [ %add.1, %for.body ] + %6 = phi float [ %.pre, %for.body.lr.ph ], [ %add, %for.body ] + %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %7 = trunc nsw i64 %indvars.iv to i32 + %add1.i = add i32 %add.i, %7 + %idxprom = sext i32 %add1.i to i64 + %arrayidx = getelementptr inbounds %struct.ArgVec, ptr %reduce_buffer, i64 %idxprom + %next.sroa.0.0.copyload = load float, ptr %arrayidx, align 4 + %next.sroa.4.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4 + %next.sroa.4.0.copyload = load float, ptr %next.sroa.4.0.arrayidx.sroa_idx, align 4 + %next.sroa.5.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 8 + %next.sroa.5.0.copyload = load float, ptr %next.sroa.5.0.arrayidx.sroa_idx, align 4 + %next.sroa.6.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 12 + %next.sroa.6.0.copyload = load float, ptr %next.sroa.6.0.arrayidx.sroa_idx, align 4, !tbaa !8 + %add = fadd float %6, %next.sroa.0.0.copyload + store float %add, ptr %value, align 4, !tbaa !6 + %add.1 = fadd float %5, %next.sroa.4.0.copyload + store float %add.1, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6 + %add.2 = fadd float %4, %next.sroa.5.0.copyload + store float %add.2, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6 + %add.3 = fadd float %3, %next.sroa.6.0.copyload + store float %add.3, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6 + %indvars.iv.next = add nsw i64 %indvars.iv, %1 + %cmp = icmp slt i64 %indvars.iv.next, %2 + br i1 %cmp, label %for.body, label %for.end14, !llvm.loop !9 + +for.end14: ; preds = %for.body, %entry + ret void +} + +; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) +define dso_local void @complex_loop_nounroll(i32 noundef %input_offset, i32 noundef %step, i32 noundef %n, i32 noundef %off1, i32 noundef %off2, ptr noundef readonly captures(none) %reduce_buffer, ptr noundef captures(none) %value) local_unnamed_addr #1 { +; CHECK-LABEL: define dso_local void @complex_loop_nounroll( +; CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]] +; CHECK-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]] +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8 +; CHECK-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12 +; CHECK-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP7]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]] +; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 +; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4 +; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8 +; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4 +; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 +; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8]] +; CHECK-NEXT: [[ADD]] = fadd float [[TMP6]], [[NEXT_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_1]] = fadd float [[TMP5]], [[NEXT_SROA_4_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_2]] = fadd float [[TMP4]], [[NEXT_SROA_5_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[ADD_3]] = fadd float [[TMP3]], [[NEXT_SROA_6_0_COPYLOAD]] +; CHECK-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END14_LOOPEXIT:.*]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[FOR_END14_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END14]] +; CHECK: [[FOR_END14]]: +; CHECK-NEXT: ret void +; +entry: + %cmp23 = icmp slt i32 %input_offset, %n + br i1 %cmp23, label %for.body.lr.ph, label %for.end14 + +for.body.lr.ph: ; preds = %entry + %add.i = add i32 %off2, %off1 + %0 = sext i32 %input_offset to i64 + %1 = sext i32 %step to i64 + %2 = sext i32 %n to i64 + %.pre = load float, ptr %value, align 4, !tbaa !6 + %arrayidx5.1.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 4 + %.pre27 = load float, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6 + %arrayidx5.2.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 8 + %.pre28 = load float, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6 + %arrayidx5.3.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 12 + %.pre29 = load float, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %3 = phi float [ %.pre29, %for.body.lr.ph ], [ %add.3, %for.body ] + %4 = phi float [ %.pre28, %for.body.lr.ph ], [ %add.2, %for.body ] + %5 = phi float [ %.pre27, %for.body.lr.ph ], [ %add.1, %for.body ] + %6 = phi float [ %.pre, %for.body.lr.ph ], [ %add, %for.body ] + %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %7 = trunc nsw i64 %indvars.iv to i32 + %add1.i = add i32 %add.i, %7 + %idxprom = sext i32 %add1.i to i64 + %arrayidx = getelementptr inbounds %struct.ArgVec, ptr %reduce_buffer, i64 %idxprom + %next.sroa.0.0.copyload = load float, ptr %arrayidx, align 4 + %next.sroa.4.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4 + %next.sroa.4.0.copyload = load float, ptr %next.sroa.4.0.arrayidx.sroa_idx, align 4 + %next.sroa.5.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 8 + %next.sroa.5.0.copyload = load float, ptr %next.sroa.5.0.arrayidx.sroa_idx, align 4 + %next.sroa.6.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 12 + %next.sroa.6.0.copyload = load float, ptr %next.sroa.6.0.arrayidx.sroa_idx, align 4, !tbaa !8 + %add = fadd float %6, %next.sroa.0.0.copyload + store float %add, ptr %value, align 4, !tbaa !6 + %add.1 = fadd float %5, %next.sroa.4.0.copyload + store float %add.1, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6 + %add.2 = fadd float %4, %next.sroa.5.0.copyload + store float %add.2, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6 + %add.3 = fadd float %3, %next.sroa.6.0.copyload + store float %add.3, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6 + %indvars.iv.next = add nsw i64 %indvars.iv, %1 + %cmp = icmp slt i64 %indvars.iv.next, %2 + br i1 %cmp, label %for.body, label %for.end14, !llvm.loop !13 + +for.end14: ; preds = %for.body, %entry + ret void +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +attributes #1 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!llvm.errno.tbaa = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 23.0.0git (https://github.com/adelejjeh/llvm-project 5e0e389360d569e5b3918e61a615d52328649533)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"float", !4, i64 0} +!8 = !{!4, !4, i64 0} +!9 = distinct !{!9, !10, !11, !12} +!10 = !{!"llvm.loop.mustprogress"} +!11 = !{!"llvm.loop.unroll.enable"} +!12 = !{!"llvm.loop.unroll.runtime.force"} +!13 = distinct !{!13, !10, !11} +;. +; CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +; CHECK: [[META7]] = !{!"float", [[META4]], i64 0} +; CHECK: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]} +; CHECK: [[META10]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]], [[META10]]} +; CHECK: [[META12]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META12]], [[META14:![0-9]+]]} +; CHECK: [[META14]] = !{!"llvm.loop.unroll.enable"} +;. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
