[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-12-20 Thread Michael Kruse via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC349823: [CodeGen] Generate llvm.loop.parallel_accesses 
instead of llvm.mem. (authored by Meinersbur, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D52117?vs=178944=179141#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D52117/new/

https://reviews.llvm.org/D52117

Files:
  lib/CodeGen/CGLoopInfo.cpp
  lib/CodeGen/CGLoopInfo.h
  test/CodeGenCXX/pragma-loop-safety-imperfectly_nested.cpp
  test/CodeGenCXX/pragma-loop-safety-nested.cpp
  test/CodeGenCXX/pragma-loop-safety-outer.cpp
  test/CodeGenCXX/pragma-loop-safety.cpp
  test/OpenMP/for_codegen.cpp
  test/OpenMP/for_simd_codegen.cpp
  test/OpenMP/loops_explicit_clauses_codegen.cpp
  test/OpenMP/ordered_codegen.cpp
  test/OpenMP/parallel_for_simd_codegen.cpp
  test/OpenMP/schedule_codegen.cpp
  test/OpenMP/simd_codegen.cpp
  test/OpenMP/simd_metadata.c
  test/OpenMP/target_parallel_for_simd_codegen.cpp
  test/OpenMP/target_simd_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp

Index: lib/CodeGen/CGLoopInfo.h
===
--- lib/CodeGen/CGLoopInfo.h
+++ lib/CodeGen/CGLoopInfo.h
@@ -84,6 +84,9 @@
   /// Get the set of attributes active for this loop.
   const LoopAttributes () const { return Attrs; }
 
+  /// Return this loop's access group or nullptr if it does not have one.
+  llvm::MDNode *getAccessGroup() const { return AccGroup; }
+
 private:
   /// Loop ID metadata.
   llvm::MDNode *LoopID;
@@ -91,6 +94,8 @@
   llvm::BasicBlock *Header;
   /// The attributes for this loop.
   LoopAttributes Attrs;
+  /// The access group for memory accesses parallel to this loop.
+  llvm::MDNode *AccGroup = nullptr;
 };
 
 /// A stack of loop information corresponding to loop nesting levels.
Index: lib/CodeGen/CGLoopInfo.cpp
===
--- lib/CodeGen/CGLoopInfo.cpp
+++ lib/CodeGen/CGLoopInfo.cpp
@@ -21,7 +21,7 @@
 
 static MDNode *createMetadata(LLVMContext , const LoopAttributes ,
   const llvm::DebugLoc ,
-  const llvm::DebugLoc ) {
+  const llvm::DebugLoc , MDNode *) {
 
   if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
   Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
@@ -122,6 +122,12 @@
 Args.push_back(MDNode::get(Ctx, Vals));
   }
 
+  if (Attrs.IsParallel) {
+AccGroup = MDNode::getDistinct(Ctx, {});
+Args.push_back(MDNode::get(
+Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup}));
+  }
+
   // Set the first operand to itself.
   MDNode *LoopID = MDNode::get(Ctx, Args);
   LoopID->replaceOperandWith(0, LoopID);
@@ -150,7 +156,8 @@
 LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes ,
const llvm::DebugLoc , const llvm::DebugLoc )
 : LoopID(nullptr), Header(Header), Attrs(Attrs) {
-  LoopID = createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc);
+  LoopID =
+  createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup);
 }
 
 void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc ,
@@ -328,6 +335,21 @@
 }
 
 void LoopInfoStack::InsertHelper(Instruction *I) const {
+  if (I->mayReadOrWriteMemory()) {
+SmallVector AccessGroups;
+for (const LoopInfo  : Active) {
+  // Here we assume that every loop that has an access group is parallel.
+  if (MDNode *Group = AL.getAccessGroup())
+AccessGroups.push_back(Group);
+}
+MDNode *UnionMD = nullptr;
+if (AccessGroups.size() == 1)
+  UnionMD = cast(AccessGroups[0]);
+else if (AccessGroups.size() >= 2)
+  UnionMD = MDNode::get(I->getContext(), AccessGroups);
+I->setMetadata("llvm.access.group", UnionMD);
+  }
+
   if (!hasInfo())
 return;
 
@@ -343,18 +365,4 @@
   }
 return;
   }
-
-  if (I->mayReadOrWriteMemory()) {
-SmallVector ParallelLoopIDs;
-for (const LoopInfo  : Active)
-  if (AL.getAttributes().IsParallel)
-ParallelLoopIDs.push_back(AL.getLoopID());
-
-MDNode *ParallelMD = nullptr;
-if (ParallelLoopIDs.size() == 1)
-  ParallelMD = cast(ParallelLoopIDs[0]);
-else if (ParallelLoopIDs.size() >= 2)
-  ParallelMD = MDNode::get(I->getContext(), ParallelLoopIDs);
-I->setMetadata("llvm.mem.parallel_loop_access", ParallelMD);
-  }
 }
Index: test/OpenMP/target_simd_codegen.cpp
===
--- test/OpenMP/target_simd_codegen.cpp
+++ test/OpenMP/target_simd_codegen.cpp
@@ -342,7 +342,7 @@
 // CHECK-64:[[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
 // CHECK-64:[[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
 // CHECK-32:[[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
-// CHECK:   !llvm.mem.parallel_loop_access
+// CHECK:   !llvm.access.group
 // CHECK:   !llvm.loop
 // 

[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-12-19 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur updated this revision to Diff 178944.
Meinersbur added a comment.

- Fix typo


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D52117/new/

https://reviews.llvm.org/D52117

Files:
  lib/CodeGen/CGLoopInfo.cpp
  lib/CodeGen/CGLoopInfo.h
  test/CodeGenCXX/pragma-loop-safety-imperfectly_nested.cpp
  test/CodeGenCXX/pragma-loop-safety-nested.cpp
  test/CodeGenCXX/pragma-loop-safety-outer.cpp
  test/CodeGenCXX/pragma-loop-safety.cpp
  test/OpenMP/for_codegen.cpp
  test/OpenMP/for_simd_codegen.cpp
  test/OpenMP/loops_explicit_clauses_codegen.cpp
  test/OpenMP/ordered_codegen.cpp
  test/OpenMP/parallel_for_simd_codegen.cpp
  test/OpenMP/schedule_codegen.cpp
  test/OpenMP/simd_codegen.cpp
  test/OpenMP/simd_metadata.c
  test/OpenMP/target_parallel_for_simd_codegen.cpp
  test/OpenMP/target_simd_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp

Index: test/OpenMP/taskloop_simd_codegen.cpp
===
--- test/OpenMP/taskloop_simd_codegen.cpp
+++ test/OpenMP/taskloop_simd_codegen.cpp
@@ -83,17 +83,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP1]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK2]](
@@ -113,17 +113,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP2]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK3]](
@@ -142,7 +142,7 @@
 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
 // CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
 // CHECK: br label
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
@@ -192,14 +192,14 @@
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: store i32 %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
 // CHECK: store i32 %{{.+}}, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
Index: test/OpenMP/target_simd_codegen.cpp
===
--- test/OpenMP/target_simd_codegen.cpp
+++ test/OpenMP/target_simd_codegen.cpp
@@ -342,7 +342,7 @@
 // CHECK-64:

[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-12-18 Thread Hal Finkel via Phabricator via cfe-commits
hfinkel accepted this revision.
hfinkel added a comment.

Minor typo noted below, but otherwise, LGTM (to avoid any misunderstanding: 
this should be committed after the LLVM change lands).




Comment at: lib/CodeGen/CGLoopInfo.cpp:341
+for (const LoopInfo  : Active) {
+  // Here we assume that ever loop that has an access group is parallel.
+  if (MDNode *Group = AL.getAccessGroup())

ever -> every


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D52117/new/

https://reviews.llvm.org/D52117



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-12-07 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur updated this revision to Diff 177324.
Meinersbur added a comment.

- Fix wrong patch upload
- Simplify access group emission ... .. possible due to the added possibility 
for instructions to belong to multiple access groups in D52116 
. However, the number of access groups is not 
minimized anymore.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D52117/new/

https://reviews.llvm.org/D52117

Files:
  lib/CodeGen/CGLoopInfo.cpp
  lib/CodeGen/CGLoopInfo.h
  test/CodeGenCXX/pragma-loop-safety-imperfectly_nested.cpp
  test/CodeGenCXX/pragma-loop-safety-nested.cpp
  test/CodeGenCXX/pragma-loop-safety-outer.cpp
  test/CodeGenCXX/pragma-loop-safety.cpp
  test/OpenMP/for_codegen.cpp
  test/OpenMP/for_simd_codegen.cpp
  test/OpenMP/loops_explicit_clauses_codegen.cpp
  test/OpenMP/ordered_codegen.cpp
  test/OpenMP/parallel_for_simd_codegen.cpp
  test/OpenMP/schedule_codegen.cpp
  test/OpenMP/simd_codegen.cpp
  test/OpenMP/simd_metadata.c
  test/OpenMP/target_parallel_for_simd_codegen.cpp
  test/OpenMP/target_simd_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp

Index: test/OpenMP/taskloop_simd_codegen.cpp
===
--- test/OpenMP/taskloop_simd_codegen.cpp
+++ test/OpenMP/taskloop_simd_codegen.cpp
@@ -83,17 +83,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP1]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK2]](
@@ -113,17 +113,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP2]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK3]](
@@ -142,7 +142,7 @@
 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
 // CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
 // CHECK: br label
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
@@ -192,14 +192,14 @@
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: store i32 %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
 // CHECK: store i32 %{{.+}}, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // 

[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-12-07 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur updated this revision to Diff 177320.
Meinersbur marked an inline comment as done.
Meinersbur added a comment.

- Allow multiple access groups per instructions, i.e. an instruction can be in 
multiple access groups. This allows a simple 'union' operation that occurs when 
inlining into another function. A memory access is considered parallel when at 
least one access group is listed in llvm.loop.parallel_accesses. This is 
prioritized over the 'intersect' case for combining instructions which would be 
dual. We only do best-effort here.


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D52117/new/

https://reviews.llvm.org/D52117

Files:
  docs/LangRef.rst
  include/llvm/Analysis/LoopInfo.h
  include/llvm/Analysis/LoopInfoImpl.h
  include/llvm/Analysis/VectorUtils.h
  include/llvm/IR/LLVMContext.h
  include/llvm/Transforms/Utils/LoopUtils.h
  lib/Analysis/LoopInfo.cpp
  lib/Analysis/VectorUtils.cpp
  lib/IR/LLVMContext.cpp
  lib/Transforms/InstCombine/InstCombineCalls.cpp
  lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
  lib/Transforms/InstCombine/InstCombinePHI.cpp
  lib/Transforms/Scalar/GVNHoist.cpp
  lib/Transforms/Scalar/LoopVersioningLICM.cpp
  lib/Transforms/Scalar/MemCpyOptimizer.cpp
  lib/Transforms/Scalar/SROA.cpp
  lib/Transforms/Scalar/Scalarizer.cpp
  lib/Transforms/Utils/InlineFunction.cpp
  lib/Transforms/Utils/Local.cpp
  lib/Transforms/Utils/LoopUtils.cpp
  lib/Transforms/Utils/SimplifyCFG.cpp
  test/Analysis/LoopInfo/annotated-parallel-complex.ll
  test/Analysis/LoopInfo/annotated-parallel-simple.ll
  test/ThinLTO/X86/lazyload_metadata.ll
  test/Transforms/Inline/parallel-loop-md-callee.ll
  test/Transforms/Inline/parallel-loop-md-merge.ll
  test/Transforms/Inline/parallel-loop-md.ll
  test/Transforms/InstCombine/intersect-accessgroup.ll
  test/Transforms/InstCombine/loadstore-metadata.ll
  test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
  test/Transforms/LoopVectorize/X86/force-ifcvt.ll
  test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
  test/Transforms/LoopVectorize/X86/parallel-loops.ll
  test/Transforms/LoopVectorize/X86/pr34438.ll
  test/Transforms/LoopVectorize/X86/vect.omp.force.ll
  test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
  test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
  test/Transforms/SROA/mem-par-metadata-sroa.ll
  test/Transforms/Scalarizer/basic.ll
  test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll

Index: test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
===
--- test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
+++ test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
@@ -8,39 +8,39 @@
   br label %for.body
 
 ; CHECK-LABEL: @Test
-; CHECK: load i32, i32* {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
-; CHECK: load i32, i32* {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
-; CHECK: store i32 {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
+; CHECK: load i32, i32* {{.*}}, align 4, !llvm.access.group !0
+; CHECK: load i32, i32* {{.*}}, align 4, !llvm.access.group !0
+; CHECK: store i32 {{.*}}, align 4, !llvm.access.group !0
 ; CHECK-NOT: load
 ; CHECK-NOT: store
 
 for.body: ; preds = %cond.end, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0
+  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !0
   %cmp1 = icmp eq i32 %0, 0
   br i1 %cmp1, label %cond.true, label %cond.false
 
 cond.false:   ; preds = %for.body
   %arrayidx3 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
-  %v = load i32, i32* %arrayidx3, align 4, !llvm.mem.parallel_loop_access !0
+  %v = load i32, i32* %arrayidx3, align 4, !llvm.access.group !0
   %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx7, align 4, !llvm.mem.parallel_loop_access !0
+  %1 = load i32, i32* %arrayidx7, align 4, !llvm.access.group !0
   %add = add nsw i32 %1, %v
   br label %cond.end
 
 cond.true:   ; preds = %for.body
   %arrayidx4 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
-  %w = load i32, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0
+  %w = load i32, i32* %arrayidx4, align 4, !llvm.access.group !0
   %arrayidx8 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx8, align 4, !llvm.mem.parallel_loop_access !0
+  %2 = load i32, i32* %arrayidx8, align 4, !llvm.access.group !0
   %add2 = add nsw i32 %2, %w
   br label %cond.end
 
 cond.end: ; preds = %for.body, %cond.false
   %cond = phi i32 [ %add, %cond.false ], [ %add2, %cond.true ]
   %arrayidx9 = getelementptr inbounds i32, i32* 

[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-12-03 Thread Hal Finkel via Phabricator via cfe-commits
hfinkel added inline comments.



Comment at: lib/CodeGen/CGLoopInfo.cpp:372
+  if (Active.size() >= 2) {
+LoopInfo  = reverse(Active).begin()[1];
+NewFront.addAccGroups(Front.getNestedAccGroups());

reverse(Active).begin() looks odd. Can we get the same thing by calling last()?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D52117/new/

https://reviews.llvm.org/D52117



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-10-04 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur updated this revision to Diff 168251.
Meinersbur marked 2 inline comments as done.
Meinersbur added a comment.

- Address @pekka.jaaskelainen's review.


Repository:
  rC Clang

https://reviews.llvm.org/D52117

Files:
  lib/CodeGen/CGLoopInfo.cpp
  lib/CodeGen/CGLoopInfo.h
  test/CodeGenCXX/pragma-loop-safety-imperfectly_nested.cpp
  test/CodeGenCXX/pragma-loop-safety-nested.cpp
  test/CodeGenCXX/pragma-loop-safety-outer.cpp
  test/CodeGenCXX/pragma-loop-safety.cpp
  test/OpenMP/for_codegen.cpp
  test/OpenMP/for_simd_codegen.cpp
  test/OpenMP/loops_explicit_clauses_codegen.cpp
  test/OpenMP/ordered_codegen.cpp
  test/OpenMP/parallel_for_simd_codegen.cpp
  test/OpenMP/schedule_codegen.cpp
  test/OpenMP/simd_codegen.cpp
  test/OpenMP/simd_metadata.c
  test/OpenMP/target_parallel_for_simd_codegen.cpp
  test/OpenMP/target_simd_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp

Index: test/OpenMP/taskloop_simd_codegen.cpp
===
--- test/OpenMP/taskloop_simd_codegen.cpp
+++ test/OpenMP/taskloop_simd_codegen.cpp
@@ -83,17 +83,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP1]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK2]](
@@ -113,17 +113,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP2]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK3]](
@@ -142,7 +142,7 @@
 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
 // CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
 // CHECK: br label
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
@@ -192,14 +192,14 @@
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: store i32 %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
 // CHECK: store i32 %{{.+}}, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
Index: test/OpenMP/target_simd_codegen.cpp
===
--- test/OpenMP/target_simd_codegen.cpp
+++ test/OpenMP/target_simd_codegen.cpp
@@ -342,7 +342,7 @@
 // CHECK-64:

[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-10-04 Thread Pekka Jääskeläinen via Phabricator via cfe-commits
pekka.jaaskelainen accepted this revision.
pekka.jaaskelainen added a comment.
This revision is now accepted and ready to land.

I glimpsed over this without spotting anything crucial. My Clang code base 
knowledge is a bit lightweight though so you might want to wait for an another 
reviewer. On the other hand, the semantics seem to be retained so it might be 
safe to commit this in case the tests still pass.

We need to remember to update pocl to produce this format then.




Comment at: lib/CodeGen/CGLoopInfo.cpp:337
+  // llvm.loop.parallel_accesses to include these accesses. At the moment a 
loop
+  // has an access group iff it is parallel such that the last propert already
+  // is a "llvm.loop.parallel_accesses".

typo 'propert'



Comment at: test/CodeGenCXX/pragma-loop-safety-nested.cpp:6
 void vectorize_nested_test(int *List, int Length) {
 #pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
   for (int i = 0; i < Length; ++i) {

Can you add a test case of a nested loop that is not "perfect", that is, has 
accesses also in the outer loop bodies?


Repository:
  rC Clang

https://reviews.llvm.org/D52117



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-10-03 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur updated this revision to Diff 168096.
Meinersbur added a comment.

- Upload diff for clang portion (instead of https://reviews.llvm.org/D52116)


Repository:
  rC Clang

https://reviews.llvm.org/D52117

Files:
  lib/CodeGen/CGLoopInfo.cpp
  lib/CodeGen/CGLoopInfo.h
  test/CodeGenCXX/pragma-loop-safety-nested.cpp
  test/CodeGenCXX/pragma-loop-safety-outer.cpp
  test/CodeGenCXX/pragma-loop-safety.cpp
  test/OpenMP/for_codegen.cpp
  test/OpenMP/for_simd_codegen.cpp
  test/OpenMP/loops_explicit_clauses_codegen.cpp
  test/OpenMP/ordered_codegen.cpp
  test/OpenMP/parallel_for_simd_codegen.cpp
  test/OpenMP/schedule_codegen.cpp
  test/OpenMP/simd_codegen.cpp
  test/OpenMP/simd_metadata.c
  test/OpenMP/target_parallel_for_simd_codegen.cpp
  test/OpenMP/target_simd_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp

Index: test/OpenMP/taskloop_simd_codegen.cpp
===
--- test/OpenMP/taskloop_simd_codegen.cpp
+++ test/OpenMP/taskloop_simd_codegen.cpp
@@ -83,17 +83,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP1]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK2]](
@@ -113,17 +113,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP2]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK3]](
@@ -142,7 +142,7 @@
 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
 // CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
 // CHECK: br label
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
@@ -192,14 +192,14 @@
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: store i32 %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
 // CHECK: store i32 %{{.+}}, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
Index: test/OpenMP/target_simd_codegen.cpp
===
--- test/OpenMP/target_simd_codegen.cpp
+++ test/OpenMP/target_simd_codegen.cpp
@@ -342,7 +342,7 @@
 // CHECK-64:[[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
 // CHECK-64:   

[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-09-26 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur updated this revision to Diff 167099.
Meinersbur added a comment.
Herald added subscribers: llvm-commits, dexonsmith, steven_wu, eraman, 
mehdi_amini.

- Rebase
- Use call access group if instruction's access group is not set


Repository:
  rL LLVM

https://reviews.llvm.org/D52117

Files:
  docs/LangRef.rst
  include/llvm/IR/LLVMContext.h
  include/llvm/Transforms/Utils/LoopUtils.h
  lib/Analysis/LoopInfo.cpp
  lib/IR/LLVMContext.cpp
  lib/Transforms/InstCombine/InstCombineCalls.cpp
  lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
  lib/Transforms/Scalar/LoopVersioningLICM.cpp
  lib/Transforms/Scalar/SROA.cpp
  lib/Transforms/Scalar/Scalarizer.cpp
  lib/Transforms/Utils/InlineFunction.cpp
  lib/Transforms/Utils/Local.cpp
  lib/Transforms/Utils/LoopUtils.cpp
  lib/Transforms/Utils/SimplifyCFG.cpp
  test/ThinLTO/X86/lazyload_metadata.ll
  test/Transforms/Inline/parallel-loop-md-callee.ll
  test/Transforms/Inline/parallel-loop-md.ll
  test/Transforms/InstCombine/loadstore-metadata.ll
  test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
  test/Transforms/LoopVectorize/X86/force-ifcvt.ll
  test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
  test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
  test/Transforms/LoopVectorize/X86/parallel-loops.ll
  test/Transforms/LoopVectorize/X86/pr34438.ll
  test/Transforms/LoopVectorize/X86/vect.omp.force.ll
  test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
  test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
  test/Transforms/SROA/mem-par-metadata-sroa.ll
  test/Transforms/Scalarizer/basic.ll
  test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll

Index: test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
===
--- test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
+++ test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
@@ -8,39 +8,39 @@
   br label %for.body
 
 ; CHECK-LABEL: @Test
-; CHECK: load i32, i32* {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
-; CHECK: load i32, i32* {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
-; CHECK: store i32 {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
+; CHECK: load i32, i32* {{.*}}, align 4, !llvm.access.group !0
+; CHECK: load i32, i32* {{.*}}, align 4, !llvm.access.group !0
+; CHECK: store i32 {{.*}}, align 4, !llvm.access.group !0
 ; CHECK-NOT: load
 ; CHECK-NOT: store
 
 for.body: ; preds = %cond.end, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0
+  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !0
   %cmp1 = icmp eq i32 %0, 0
   br i1 %cmp1, label %cond.true, label %cond.false
 
 cond.false:   ; preds = %for.body
   %arrayidx3 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
-  %v = load i32, i32* %arrayidx3, align 4, !llvm.mem.parallel_loop_access !0
+  %v = load i32, i32* %arrayidx3, align 4, !llvm.access.group !0
   %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx7, align 4, !llvm.mem.parallel_loop_access !0
+  %1 = load i32, i32* %arrayidx7, align 4, !llvm.access.group !0
   %add = add nsw i32 %1, %v
   br label %cond.end
 
 cond.true:   ; preds = %for.body
   %arrayidx4 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
-  %w = load i32, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0
+  %w = load i32, i32* %arrayidx4, align 4, !llvm.access.group !0
   %arrayidx8 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx8, align 4, !llvm.mem.parallel_loop_access !0
+  %2 = load i32, i32* %arrayidx8, align 4, !llvm.access.group !0
   %add2 = add nsw i32 %2, %w
   br label %cond.end
 
 cond.end: ; preds = %for.body, %cond.false
   %cond = phi i32 [ %add, %cond.false ], [ %add2, %cond.true ]
   %arrayidx9 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
-  store i32 %cond, i32* %arrayidx9, align 4, !llvm.mem.parallel_loop_access !0
+  store i32 %cond, i32* %arrayidx9, align 4, !llvm.access.group !0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 16
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
@@ -51,5 +51,6 @@
 
 attributes #0 = { norecurse nounwind uwtable }
 
-!0 = distinct !{!0, !1}
+!0 = distinct !{!0, !1, !{!"llvm.loop.parallel_accesses", !10}}
 !1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!10 = distinct !{}
Index: test/Transforms/Scalarizer/basic.ll
===
--- test/Transforms/Scalarizer/basic.ll
+++ test/Transforms/Scalarizer/basic.ll
@@ -205,28 +205,28 @@
   ret void
 }
 
-; 

[PATCH] D52117: Generate llvm.loop.parallel_accesses instead of llvm.mem.parallel_loop_access metadata.

2018-09-14 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur created this revision.
Meinersbur added reviewers: hfinkel, amusman, ABataev, tyler.nowicki.
Meinersbur added a dependency: D52116: Introduce llvm.loop.parallel_accesses 
and llvm.access.group metadata..

Instead of generating llvm.mem.parallel_loop_access metadata, generate 
llvm.access.group on instructions and llvm.loop.parallel_accesses on loops. 
Minimize the number of access groups by only creating one for loops that are 
parallel.

This is clang part of https://reviews.llvm.org/D52116.


Repository:
  rC Clang

https://reviews.llvm.org/D52117

Files:
  lib/CodeGen/CGLoopInfo.cpp
  lib/CodeGen/CGLoopInfo.h
  test/CodeGenCXX/pragma-loop-safety-nested.cpp
  test/CodeGenCXX/pragma-loop-safety-outer.cpp
  test/CodeGenCXX/pragma-loop-safety.cpp
  test/OpenMP/for_codegen.cpp
  test/OpenMP/for_simd_codegen.cpp
  test/OpenMP/loops_explicit_clauses_codegen.cpp
  test/OpenMP/ordered_codegen.cpp
  test/OpenMP/parallel_for_simd_codegen.cpp
  test/OpenMP/schedule_codegen.cpp
  test/OpenMP/simd_codegen.cpp
  test/OpenMP/simd_metadata.c
  test/OpenMP/target_parallel_for_simd_codegen.cpp
  test/OpenMP/target_simd_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp

Index: test/OpenMP/taskloop_simd_codegen.cpp
===
--- test/OpenMP/taskloop_simd_codegen.cpp
+++ test/OpenMP/taskloop_simd_codegen.cpp
@@ -83,17 +83,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP1]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK2]](
@@ -113,17 +113,17 @@
 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
 // CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2:!.+]]
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
-// CHECK: br label %{{.*}}!llvm.loop [[LOOP2]]
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.access.group
+// CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
 // CHECK: define internal i32 [[TASK3]](
@@ -142,7 +142,7 @@
 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
 // CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
 // CHECK: br label
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: br label %{{.*}}!llvm.loop
 // CHECK: ret i32 0
 
@@ -192,14 +192,14 @@
 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: store i32 %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: load i32, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 // CHECK: add nsw i32 %{{.+}}, 1
 // CHECK: store i32 %{{.+}}, i32* %
-// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-NOT: !llvm.access.group
 //