Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-03-02 Thread Alexey Bataev via cfe-commits
ABataev accepted this revision.
ABataev added a comment.
This revision is now accepted and ready to land.

LG


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-03-02 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Hi Alexey,

Thanks for the review!



Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:49-51
@@ -48,2 +48,5 @@
 TargetRegion,
+/// \brief Region that do not require function outlining and uses
+/// information from a inner scope.
+InlinedInnerRegion,
   };

ABataev wrote:
> Do we really need this one? I don't think it will be used in codegen for 
> directives, so do not add it.
Ok, I removed it. I was just following what was being done for the other APIs.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:316-318
@@ +315,5 @@
+  static bool classof(const CGCapturedStmtInfo *Info) {
+return CGOpenMPRegionInfo::classof(Info) &&
+   cast(Info)->getRegionKind() ==
+   InlinedInnerRegion;
+  }

ABataev wrote:
> I think it will be enough just to return 'false' here always, it should not 
> be used in any casting operations ever
Ok, done.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:4438-4440
@@ +4437,5 @@
+
+if (NumTeams) {
+  assert(ThreadLimit && "Thread limit expression should be available along 
"
+"with number of teams.");
+  llvm::Value *OffloadingArgs[] = {

ABataev wrote:
> What if ThreadLimit is 'nullptr'? And why it cannot be 'nullptr' if NumTeams 
> is not 'nullptr'?
Both values should be defined if there is a nested teams directive. If there 
are no num_teams or thread_limit clauses (but we have a team directive), those 
values will be defined with a int32 constant zero, which is the default value 
for the runtime library. So, no matter the clauses, if there is a teams 
directive both values will be defined. So, it is safe to assume that both 
values will either be defined or both null. 

I added a comment to clarify that. 


Comment at: lib/CodeGen/CGStmtOpenMP.cpp:2712
@@ +2711,3 @@
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  const CapturedStmt &CS = *cast(S.getAssociatedStmt());

ABataev wrote:
> Use 'OMPLexicalScope Scope(*this, S);' instead.
Done!


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-03-02 Thread Samuel Antao via cfe-commits
sfantao updated this revision to Diff 49712.
sfantao marked 4 inline comments as done.
sfantao added a comment.

- Remove InnerInlineRegion Kind. Improve comments and other two minor edits.


http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGStmtOpenMP.cpp
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,210 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+int &Gblc = Gbla;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
+  // CK1-DAG: [[NTB]] = load i32, i32* %{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* @Gblb,
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1
+  // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
+
+  // CK1-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 2
+  // CK1-DAG: [[TLA]] = load i32, i32* @Gbla,
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}
+  #pragma omp target
+  #pragma omp teams num_teams(Gblc+1) thread_limit(Gblc+2)
+  {
+comp += Gblc;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-un

Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-03-02 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:49-51
@@ -48,2 +48,5 @@
 TargetRegion,
+/// \brief Region that do not require function outlining and uses
+/// information from a inner scope.
+InlinedInnerRegion,
   };

Do we really need this one? I don't think it will be used in codegen for 
directives, so do not add it.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:316-318
@@ +315,5 @@
+  static bool classof(const CGCapturedStmtInfo *Info) {
+return CGOpenMPRegionInfo::classof(Info) &&
+   cast(Info)->getRegionKind() ==
+   InlinedInnerRegion;
+  }

I think it will be enough just to return 'false' here always, it should not be 
used in any casting operations ever


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:4438-4440
@@ +4437,5 @@
+
+if (NumTeams) {
+  assert(ThreadLimit && "Thread limit expression should be available along 
"
+"with number of teams.");
+  llvm::Value *OffloadingArgs[] = {

What if ThreadLimit is 'nullptr'? And why it cannot be 'nullptr' if NumTeams is 
not 'nullptr'?


Comment at: lib/CodeGen/CGStmtOpenMP.cpp:2712
@@ +2711,3 @@
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  const CapturedStmt &CS = *cast(S.getAssociatedStmt());

Use 'OMPLexicalScope Scope(*this, S);' instead.


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-03-02 Thread Samuel Antao via cfe-commits
sfantao added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:256-296
@@ -255,26 +255,43 @@
 /// \brief RAII for emitting code of OpenMP constructs.
 class InlinedOpenMPRegionRAII {
   CodeGenFunction &CGF;
+  /// \brief Saves the varaibles that were forced to be local in the current
+  /// inlined region.
+  SmallVector ForcedLocalVars;
 
 public:
-  /// \brief Constructs region for combined constructs.
+  /// \brief Constructs inlined region. Mostly used for combined constructs. If
+  /// a captured statement is provided it also ensures the captured variables
+  /// are all defined in the scope of the enclosing function. This is typical
+  /// used for regions that make local instances of global variables, e.g.
+  /// target regions.
   /// \param CodeGen Code generation sequence for combined directives. Includes
   /// a list of functions used for code generation of implicitly inlined
   /// regions.
   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
-  OpenMPDirectiveKind Kind, bool HasCancel)
+  OpenMPDirectiveKind Kind, bool HasCancel,
+  const CapturedStmt *CS)
   : CGF(CGF) {
 // Start emission for the construct.
 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
+
+// Ensures that all the captures are local in the current inlined region.
+if (CS)
+  CGF.StartOpenMPInlinedCapturedRegion(*CS, ForcedLocalVars);
   }
 
   ~InlinedOpenMPRegionRAII() {
+// Restore the local variable information if we have anything forced in 
this
+// inlined region.
+if (!ForcedLocalVars.empty())
+  CGF.CloseOpenMPInlinedCapturedRegion(ForcedLocalVars);
+
 // Restore original CapturedStmtInfo only if we're done with code emission.
 auto *OldCSI =
 cast(CGF.CapturedStmtInfo)->getOldCSI();
 delete CGF.CapturedStmtInfo;
 CGF.CapturedStmtInfo = OldCSI;
   }
 };
 

ABataev wrote:
> Do not modify this one, add a new one like this:
> ```
> static void EmptyCodeGen(CodeGenFunction &) {
>   llvm_unreachable("No codegen for expressions");
> }
> /// \brief API for generation of expressions captured in OpenMP region in 
> outer
> /// scope.
> class CGOpenMPOuterExprInfo : public CGOpenMPInlinedRegionInfo {
> public:
>   CGOpenMPOuterExprInfo(CodeGenFunction &CGF)
>   : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, OMPD_unknown,
>   /*HasCancel=*/false),
> CGF(CGF) {}
> 
>   /// \brief Lookup the captured field decl for a variable.
>   const FieldDecl *lookup(const VarDecl *VD) const override {
> if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
>   return FD;
> if (!VD->isLocalVarDeclOrParm() && Mapped.count(VD) == 0) {
>   auto It =
>   PrivateGlobals.insert(new CodeGenFunction::OMPPrivateScope(CGF));
>   DeclRefExpr DRE(const_cast(VD),
>   /*RefersToEnclosingVariableOrCapture=*/false,
>   VD->getType().getNonReferenceType(), VK_LValue,
>   SourceLocation());
>   It.first->addPrivate(
>   VD, [&]() -> Address { return CGF.EmitLValue(&DRE).getAddress(); });
>   (void)It.first->Privatize();
>   Mapped.insert(VD);
> }
> return nullptr;
>   }
> 
>   /// \brief Emit the captured statement body.
>   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
> llvm_unreachable("No body for expressions");
>   }
> 
>   /// \brief Get a variable or parameter for storing global thread id
>   /// inside OpenMP construct.
>   const VarDecl *getThreadIDVariable() const override {
> llvm_unreachable("No thread id for expressions");
>   }
> 
>   /// \brief Get the name of the capture helper.
>   StringRef getHelperName() const override {
> llvm_unreachable("No helper name for expressions");
>   }
> 
>   static bool classof(const CGCapturedStmtInfo *Info) {
> llvm_unreachable("No helper name for expressions");
>   }
> 
>   virtual ~CGOpenMPOuterExprInfo() {
> for (auto *Scope : PrivateGlobals)
>   delete Scope;
> PrivateGlobals.clear();
>   }
> 
> private:
>   CodeGenFunction &CGF;
>   /// Private scopes for each captured global variables.
>   llvm::SmallPtrSet PrivateGlobals;
>   SmallSet Mapped;
> };
> ```
Ok, I adapted the code you pasted above and I am now creating a new inline 
region API. I am naming it `CGOpenMPInnerExprInfo` given that it relates to the 
emission of expression defined in the inner scope. Also, I am doing the 
privatization in the constructor given that the globals have to be local 
already by the time the expression is emitted. 


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-03-02 Thread Samuel Antao via cfe-commits
sfantao updated this revision to Diff 49653.
sfantao added a comment.

Use new innermost scope API for the emission of the num_teams and thread_limit 
expressions.


http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGStmtOpenMP.cpp
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,210 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+int &Gblc = Gbla;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
+  // CK1-DAG: [[NTB]] = load i32, i32* %{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* @Gblb,
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1
+  // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
+
+  // CK1-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 2
+  // CK1-DAG: [[TLA]] = load i32, i32* @Gbla,
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}
+  #pragma omp target
+  #pragma omp teams num_teams(Gblc+1) thread_limit(Gblc+2)
+  {
+comp += Gblc;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=

Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-03-02 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:256-296
@@ -255,26 +255,43 @@
 /// \brief RAII for emitting code of OpenMP constructs.
 class InlinedOpenMPRegionRAII {
   CodeGenFunction &CGF;
+  /// \brief Saves the varaibles that were forced to be local in the current
+  /// inlined region.
+  SmallVector ForcedLocalVars;
 
 public:
-  /// \brief Constructs region for combined constructs.
+  /// \brief Constructs inlined region. Mostly used for combined constructs. If
+  /// a captured statement is provided it also ensures the captured variables
+  /// are all defined in the scope of the enclosing function. This is typical
+  /// used for regions that make local instances of global variables, e.g.
+  /// target regions.
   /// \param CodeGen Code generation sequence for combined directives. Includes
   /// a list of functions used for code generation of implicitly inlined
   /// regions.
   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
-  OpenMPDirectiveKind Kind, bool HasCancel)
+  OpenMPDirectiveKind Kind, bool HasCancel,
+  const CapturedStmt *CS)
   : CGF(CGF) {
 // Start emission for the construct.
 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
+
+// Ensures that all the captures are local in the current inlined region.
+if (CS)
+  CGF.StartOpenMPInlinedCapturedRegion(*CS, ForcedLocalVars);
   }
 
   ~InlinedOpenMPRegionRAII() {
+// Restore the local variable information if we have anything forced in 
this
+// inlined region.
+if (!ForcedLocalVars.empty())
+  CGF.CloseOpenMPInlinedCapturedRegion(ForcedLocalVars);
+
 // Restore original CapturedStmtInfo only if we're done with code emission.
 auto *OldCSI =
 cast(CGF.CapturedStmtInfo)->getOldCSI();
 delete CGF.CapturedStmtInfo;
 CGF.CapturedStmtInfo = OldCSI;
   }
 };
 

Do not modify this one, add a new one like this:
```
static void EmptyCodeGen(CodeGenFunction &) {
  llvm_unreachable("No codegen for expressions");
}
/// \brief API for generation of expressions captured in OpenMP region in outer
/// scope.
class CGOpenMPOuterExprInfo : public CGOpenMPInlinedRegionInfo {
public:
  CGOpenMPOuterExprInfo(CodeGenFunction &CGF)
  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, OMPD_unknown,
  /*HasCancel=*/false),
CGF(CGF) {}

  /// \brief Lookup the captured field decl for a variable.
  const FieldDecl *lookup(const VarDecl *VD) const override {
if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
  return FD;
if (!VD->isLocalVarDeclOrParm() && Mapped.count(VD) == 0) {
  auto It =
  PrivateGlobals.insert(new CodeGenFunction::OMPPrivateScope(CGF));
  DeclRefExpr DRE(const_cast(VD),
  /*RefersToEnclosingVariableOrCapture=*/false,
  VD->getType().getNonReferenceType(), VK_LValue,
  SourceLocation());
  It.first->addPrivate(
  VD, [&]() -> Address { return CGF.EmitLValue(&DRE).getAddress(); });
  (void)It.first->Privatize();
  Mapped.insert(VD);
}
return nullptr;
  }

  /// \brief Emit the captured statement body.
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
llvm_unreachable("No body for expressions");
  }

  /// \brief Get a variable or parameter for storing global thread id
  /// inside OpenMP construct.
  const VarDecl *getThreadIDVariable() const override {
llvm_unreachable("No thread id for expressions");
  }

  /// \brief Get the name of the capture helper.
  StringRef getHelperName() const override {
llvm_unreachable("No helper name for expressions");
  }

  static bool classof(const CGCapturedStmtInfo *Info) {
llvm_unreachable("No helper name for expressions");
  }

  virtual ~CGOpenMPOuterExprInfo() {
for (auto *Scope : PrivateGlobals)
  delete Scope;
PrivateGlobals.clear();
  }

private:
  CodeGenFunction &CGF;
  /// Private scopes for each captured global variables.
  llvm::SmallPtrSet PrivateGlobals;
  SmallSet Mapped;
};
```


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-28 Thread Samuel Antao via cfe-commits
sfantao added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:4002
@@ +4001,3 @@
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,

ABataev wrote:
> I still don't like the generation of some functions, that, generally 
> speaking, are not required.
> Could you try to add a new 'class CGOpenMPInlinedRegionInfo' like class, that 
> will be able to handle not captured variables in expressions?
Ok. In the the new diff I am using the logic for emission of inlined regions 
for num teams and thread limit as well. I still had to add extra logic in 
CodeGenFunction to insert extra entries in the local declaration cache, given 
that, as I discussed above, target regions make captured global variables 
local.  Hope this is aligned with what you have in mind.

Thanks! 


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-28 Thread Samuel Antao via cfe-commits
sfantao updated this revision to Diff 49331.
sfantao marked an inline comment as done.
sfantao added a comment.

Emit num teams and thread limit using the inlined directives machinery.


http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,194 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
+  // CK1-DAG: [[NTB]] = load i32, i32* %{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* @Gblb,
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+++comp;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=

Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-25 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:4002
@@ +4001,3 @@
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,

I still don't like the generation of some functions, that, generally speaking, 
are not required.
Could you try to add a new 'class CGOpenMPInlinedRegionInfo' like class, that 
will be able to handle not captured variables in expressions?


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-23 Thread Samuel Antao via cfe-commits
sfantao updated this revision to Diff 48839.
sfantao added a comment.

Rebase.


http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,211 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}},
+  // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}},
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+++comp;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp

Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-21 Thread Carlo Bertolli via cfe-commits
carlo.bertolli added a comment.

Just wanted to add that tgt_target_teams needs the values for num_teams and 
thread_limit because, for some accelerators, it is necessary to know those 
values in advance, before teams gets actually executed. For instance, on Nvidia 
GPUs we launch one CUDA block for each team. This can only be done at kernel 
launch time, which is performed in the implementation of tgt_target_teams.


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-20 Thread Samuel Antao via cfe-commits
sfantao added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3799-3806
@@ -3780,4 +3798,10 @@
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
   DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,

ABataev wrote:
> sfantao wrote:
> > ABataev wrote:
> > > sfantao wrote:
> > > > ABataev wrote:
> > > > > I don't understand why global var is not captured in target region. 
> > > > > If it is not implemented yet, it must be implemented. If it is not 
> > > > > captured, it must be captured in Sema. We should not introduce some 
> > > > > function/other objects to find a workaround for 'not implemented' 
> > > > > features.
> > > > Sorry, I was not clear in my comment. It is not that globals are not 
> > > > captured in target regions - they are, we already have Sema doing that. 
> > > > 
> > > > My point is that exactly because we capture globals in target regions 
> > > > the magic that `OMPCapturedExprDecl` introduces does not work for that 
> > > > specific case. So, if we have something like:
> > > > 
> > > > ```
> > > > int Gbl;
> > > > 
> > > > foo() {
> > > >   #pragma omp target
> > > >   #pragma omp teams num_teams(Gbl)
> > > >   {}
> > > > }
> > > > ```
> > > > when the DeclRefExpr for Gbl used in num_teams is emitted in the scope 
> > > > that encloses '#pragma omp target', it will crash because Gbl is not a 
> > > > local and is marked as refer to enclosing capture. 
> > > > 
> > > > If I got it right, a solution based on `OMPCapturedExprDecl` basically 
> > > > makes local declarations whose initializers are the expression we are 
> > > > interested in. In the cases that  `OMPCapturedExprDecl` is currently 
> > > > employed we don't have globals being captured and that is why it works 
> > > > fine.
> > > > 
> > > > It is likely I am missing something here. Let me know if you need me to 
> > > > provide more details.
> > > > 
> > > > Thanks!
> > > It should not crash, because if it is captured, we must use captured 
> > > version of this variable, passed in arguments to outlined function
> > I am afraid I may not be understanding what you want me to do. Going back 
> > to my example:
> > 
> > ```
> > int Gbl;
> > 
> > foo() {
> >   // a) I need to emit num_teams(Gbl) here. DeclRefExpr(Gbl) emission won't 
> > work because it is marked "refer to enclosing capture". 
> >   #pragma omp target
> >   // b) If I emit it here, that's fine because I already have the arguments 
> > of the outlined function, but that is not what I need.
> >   #pragma omp teams num_teams(Gbl)
> >   {}
> > }
> > ```
> > Can you please elaborate on how `OMPCapturedExprDecl` would help me 
> > implement a). Sorry for the trouble.
> > 
> > Thanks!
> Ok, why you don't want to emit it in b), but in a)?
The reason is that the runtime library requires the number of teams and thread 
limit to be passed. So, if we have a target region with an enclosed teams 
region, we have to use `tgt_target_teams` instead of `tgt_target`. 
`tgt_target_teams` takes thread_limit and num_teams as arguments. Therefore, we 
need to get that information from the teams directive given that that 
information is captured in its clauses.

Thanks!


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-19 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3799-3806
@@ -3780,4 +3798,10 @@
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
   DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,

sfantao wrote:
> ABataev wrote:
> > sfantao wrote:
> > > ABataev wrote:
> > > > I don't understand why global var is not captured in target region. If 
> > > > it is not implemented yet, it must be implemented. If it is not 
> > > > captured, it must be captured in Sema. We should not introduce some 
> > > > function/other objects to find a workaround for 'not implemented' 
> > > > features.
> > > Sorry, I was not clear in my comment. It is not that globals are not 
> > > captured in target regions - they are, we already have Sema doing that. 
> > > 
> > > My point is that exactly because we capture globals in target regions the 
> > > magic that `OMPCapturedExprDecl` introduces does not work for that 
> > > specific case. So, if we have something like:
> > > 
> > > ```
> > > int Gbl;
> > > 
> > > foo() {
> > >   #pragma omp target
> > >   #pragma omp teams num_teams(Gbl)
> > >   {}
> > > }
> > > ```
> > > when the DeclRefExpr for Gbl used in num_teams is emitted in the scope 
> > > that encloses '#pragma omp target', it will crash because Gbl is not a 
> > > local and is marked as refer to enclosing capture. 
> > > 
> > > If I got it right, a solution based on `OMPCapturedExprDecl` basically 
> > > makes local declarations whose initializers are the expression we are 
> > > interested in. In the cases that  `OMPCapturedExprDecl` is currently 
> > > employed we don't have globals being captured and that is why it works 
> > > fine.
> > > 
> > > It is likely I am missing something here. Let me know if you need me to 
> > > provide more details.
> > > 
> > > Thanks!
> > It should not crash, because if it is captured, we must use captured 
> > version of this variable, passed in arguments to outlined function
> I am afraid I may not be understanding what you want me to do. Going back to 
> my example:
> 
> ```
> int Gbl;
> 
> foo() {
>   // a) I need to emit num_teams(Gbl) here. DeclRefExpr(Gbl) emission won't 
> work because it is marked "refer to enclosing capture". 
>   #pragma omp target
>   // b) If I emit it here, that's fine because I already have the arguments 
> of the outlined function, but that is not what I need.
>   #pragma omp teams num_teams(Gbl)
>   {}
> }
> ```
> Can you please elaborate on how `OMPCapturedExprDecl` would help me implement 
> a). Sorry for the trouble.
> 
> Thanks!
Ok, why you don't want to emit it in b), but in a)?


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-18 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Hi Alexey



Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3799-3806
@@ -3780,4 +3798,10 @@
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
   DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,

ABataev wrote:
> sfantao wrote:
> > ABataev wrote:
> > > I don't understand why global var is not captured in target region. If it 
> > > is not implemented yet, it must be implemented. If it is not captured, it 
> > > must be captured in Sema. We should not introduce some function/other 
> > > objects to find a workaround for 'not implemented' features.
> > Sorry, I was not clear in my comment. It is not that globals are not 
> > captured in target regions - they are, we already have Sema doing that. 
> > 
> > My point is that exactly because we capture globals in target regions the 
> > magic that `OMPCapturedExprDecl` introduces does not work for that specific 
> > case. So, if we have something like:
> > 
> > ```
> > int Gbl;
> > 
> > foo() {
> >   #pragma omp target
> >   #pragma omp teams num_teams(Gbl)
> >   {}
> > }
> > ```
> > when the DeclRefExpr for Gbl used in num_teams is emitted in the scope that 
> > encloses '#pragma omp target', it will crash because Gbl is not a local and 
> > is marked as refer to enclosing capture. 
> > 
> > If I got it right, a solution based on `OMPCapturedExprDecl` basically 
> > makes local declarations whose initializers are the expression we are 
> > interested in. In the cases that  `OMPCapturedExprDecl` is currently 
> > employed we don't have globals being captured and that is why it works fine.
> > 
> > It is likely I am missing something here. Let me know if you need me to 
> > provide more details.
> > 
> > Thanks!
> It should not crash, because if it is captured, we must use captured version 
> of this variable, passed in arguments to outlined function
I am afraid I may not be understanding what you want me to do. Going back to my 
example:

```
int Gbl;

foo() {
  // a) I need to emit num_teams(Gbl) here. DeclRefExpr(Gbl) emission won't 
work because it is marked "refer to enclosing capture". 
  #pragma omp target
  // b) If I emit it here, that's fine because I already have the arguments of 
the outlined function, but that is not what I need.
  #pragma omp teams num_teams(Gbl)
  {}
}
```
Can you please elaborate on how `OMPCapturedExprDecl` would help me implement 
a). Sorry for the trouble.

Thanks!


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-18 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3799-3806
@@ -3780,4 +3798,10 @@
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
   DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,

sfantao wrote:
> ABataev wrote:
> > I don't understand why global var is not captured in target region. If it 
> > is not implemented yet, it must be implemented. If it is not captured, it 
> > must be captured in Sema. We should not introduce some function/other 
> > objects to find a workaround for 'not implemented' features.
> Sorry, I was not clear in my comment. It is not that globals are not captured 
> in target regions - they are, we already have Sema doing that. 
> 
> My point is that exactly because we capture globals in target regions the 
> magic that `OMPCapturedExprDecl` introduces does not work for that specific 
> case. So, if we have something like:
> 
> ```
> int Gbl;
> 
> foo() {
>   #pragma omp target
>   #pragma omp teams num_teams(Gbl)
>   {}
> }
> ```
> when the DeclRefExpr for Gbl used in num_teams is emitted in the scope that 
> encloses '#pragma omp target', it will crash because Gbl is not a local and 
> is marked as refer to enclosing capture. 
> 
> If I got it right, a solution based on `OMPCapturedExprDecl` basically makes 
> local declarations whose initializers are the expression we are interested 
> in. In the cases that  `OMPCapturedExprDecl` is currently employed we don't 
> have globals being captured and that is why it works fine.
> 
> It is likely I am missing something here. Let me know if you need me to 
> provide more details.
> 
> Thanks!
It should not crash, because if it is captured, we must use captured version of 
this variable, passed in arguments to outlined function


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-18 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Hi Alexey,



Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3799-3806
@@ -3780,4 +3798,10 @@
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
   DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,

ABataev wrote:
> I don't understand why global var is not captured in target region. If it is 
> not implemented yet, it must be implemented. If it is not captured, it must 
> be captured in Sema. We should not introduce some function/other objects to 
> find a workaround for 'not implemented' features.
Sorry, I was not clear in my comment. It is not that globals are not captured 
in target regions - they are, we already have Sema doing that. 

My point is that exactly because we capture globals in target regions the magic 
that `OMPCapturedExprDecl` introduces does not work for that specific case. So, 
if we have something like:

```
int Gbl;

foo() {
  #pragma omp target
  #pragma omp teams num_teams(Gbl)
  {}
}
```
when the DeclRefExpr for Gbl used in num_teams is emitted in the scope that 
encloses '#pragma omp target', it will crash because Gbl is not a local and is 
marked as refer to enclosing capture. 

If I got it right, a solution based on `OMPCapturedExprDecl` basically makes 
local declarations whose initializers are the expression we are interested in. 
In the cases that  `OMPCapturedExprDecl` is currently employed we don't have 
globals being captured and that is why it works fine.

It is likely I am missing something here. Let me know if you need me to provide 
more details.

Thanks!


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-18 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3799-3806
@@ -3780,4 +3798,10 @@
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
   DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,

I don't understand why global var is not captured in target region. If it is 
not implemented yet, it must be implemented. If it is not captured, it must be 
captured in Sema. We should not introduce some function/other objects to find a 
workaround for 'not implemented' features.


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-17 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Hi Alexey,

Thanks for the review!



Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3811-3818
@@ -3789,1 +3810,10 @@
   DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams and thread limit to pass to the runtime function call
+  // later on. This is done through a function that returns the value. This is
+  // required because the expression is captured in the enclosing target
+  // environment when the teams directive is not combined with target. This 
only
+  // has to be done for the host.
+  //
+  // FIXME: Accommodate other combined directives with teams when they become

ABataev wrote:
> It is better to use OMPCapturedExprDecl for this, just like it is done for 
> schedule clause
I don't think that would completel solve the problem in this case. In my 
understanding the problem I have here is slightly differetn than the one 
`OMPCapturedExprDecl` attempts to solve:

I have a clause (num_teams/thread_limit) that is part of an enclosed directive 
(teams) that I need to emit in the outer scope (target). If I create a 
OMPCapturedExprDecl, that would have to go with some dummy clause for the 
target  so that the initializer is emitted at the target lexical scope, and 
that emission would only work because in most directives the captures are local 
variables of the enclosing scope, and emission on the locals takes precedence 
over declaration that "refer to enclosing capture". However, target directive 
is special in the sense that it also captures global variables. So if I use 
OMPCapturedExprDecl on a expression that refers to globals that will cause a 
crash during the emission of the initializer because the capture of the target 
directive was not created yet. The patch has regression tests exactly to test 
this subtle difference in the target directive.

I am not saying that there are no other ways of doing this, but this approach 
seemed to me as the least disruptive as it is self-contained in the target 
codegen.

Let me know if you disagree.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3822-3857
@@ -3790,1 +3821,38 @@
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) {
+  if (auto *NTE = TeamsDir->getSingleClause()) {
+auto &&CodeGen = [NTE](CodeGenFunction &CGF) {
+  auto *V = CGF.EmitScalarExpr(NTE->getNumTeams());
+  CGF.Builder.CreateRet(
+  CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+  CGF.EmitBlock(CGF.createBasicBlock());
+};
+
+CodeGenFunction CGF(CGM, true);
+CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+".omp_offload.get_num_teams");
+CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+NestedNumTeamsFn =
+CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+  if (auto *TLE = TeamsDir->getSingleClause()) {
+auto &&CodeGen = [TLE](CodeGenFunction &CGF) {
+  auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit());
+  CGF.Builder.CreateRet(
+  CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+  CGF.EmitBlock(CGF.createBasicBlock());
+};
+
+CodeGenFunction CGF(CGM, true);
+CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+".omp_offload.get_thread_limit");
+CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+NestedThreadLimitFn =
+CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+}
   return;

ABataev wrote:
> Please, do it in separate functions
I centralized the emission of num_teams and thread_limit in a function.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:4148-4161
@@ +4147,16 @@
+  llvm::Value *ThreadLimit = nullptr;
+  if (TD->getSingleClause()) {
+assert(NestedNumTeamsFn && "Helper function is required to get the "
+   "number of teams of an enclosed teams "
+   "directive.");
+NumTeams = CGF.Builder.CreateCall(NestedNumTeamsFn, BasePointers);
+  } else
+NumTeams = CGF.Builder.getInt32(0);
+  if (TD->getSingleClause()) {
+assert(NestedThreadLimitFn && "Helper function is required to get the "
+  "thread limit of an enclosed teams "
+  "directive.");
+ThreadLimit = CGF.Builder.CreateCall(NestedThreadLimitFn, 
BasePointers);
+  } else
+ThreadLimit = CGF.Builder.getInt32(0);
+
-

Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-17 Thread Samuel Antao via cfe-commits
sfantao updated this revision to Diff 48229.
sfantao marked 4 inline comments as done.
sfantao updated the summary for this revision.
sfantao added a comment.

Separate emission of num_teams and thread_limit into functions.


http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,211 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}},
+  // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}},
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+++comp;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+//

Re: [PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-16 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3811-3818
@@ -3789,1 +3810,10 @@
   DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams and thread limit to pass to the runtime function call
+  // later on. This is done through a function that returns the value. This is
+  // required because the expression is captured in the enclosing target
+  // environment when the teams directive is not combined with target. This 
only
+  // has to be done for the host.
+  //
+  // FIXME: Accommodate other combined directives with teams when they become

It is better to use OMPCapturedExprDecl for this, just like it is done for 
schedule clause


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3822-3857
@@ -3790,1 +3821,38 @@
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) {
+  if (auto *NTE = TeamsDir->getSingleClause()) {
+auto &&CodeGen = [NTE](CodeGenFunction &CGF) {
+  auto *V = CGF.EmitScalarExpr(NTE->getNumTeams());
+  CGF.Builder.CreateRet(
+  CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+  CGF.EmitBlock(CGF.createBasicBlock());
+};
+
+CodeGenFunction CGF(CGM, true);
+CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+".omp_offload.get_num_teams");
+CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+NestedNumTeamsFn =
+CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+  if (auto *TLE = TeamsDir->getSingleClause()) {
+auto &&CodeGen = [TLE](CodeGenFunction &CGF) {
+  auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit());
+  CGF.Builder.CreateRet(
+  CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+  CGF.EmitBlock(CGF.createBasicBlock());
+};
+
+CodeGenFunction CGF(CGM, true);
+CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+".omp_offload.get_thread_limit");
+CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+NestedThreadLimitFn =
+CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+}
   return;

Please, do it in separate functions


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3837
@@ +3836,3 @@
+NestedNumTeamsFn =
+CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline);

Return type must be Int32Ty, I think


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3854
@@ +3853,3 @@
+NestedThreadLimitFn =
+CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline);

Also Int32Ty


Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:4148-4161
@@ +4147,16 @@
+  llvm::Value *ThreadLimit = nullptr;
+  if (TD->getSingleClause()) {
+assert(NestedNumTeamsFn && "Helper function is required to get the "
+   "number of teams of an enclosed teams "
+   "directive.");
+NumTeams = CGF.Builder.CreateCall(NestedNumTeamsFn, BasePointers);
+  } else
+NumTeams = CGF.Builder.getInt32(0);
+  if (TD->getSingleClause()) {
+assert(NestedThreadLimitFn && "Helper function is required to get the "
+  "thread limit of an enclosed teams "
+  "directive.");
+ThreadLimit = CGF.Builder.CreateCall(NestedThreadLimitFn, 
BasePointers);
+  } else
+ThreadLimit = CGF.Builder.getInt32(0);
+

Again, all this must be done in separate functions


http://reviews.llvm.org/D17019



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

2016-02-08 Thread Samuel Antao via cfe-commits
sfantao created this revision.
sfantao added reviewers: ABataev, hfinkel, carlo.bertolli, arpith-jacob, kkwli0.
sfantao added subscribers: fraggamuffin, caomhin, cfe-commits.

This patch implements the launching of a target region in the presence of a 
nested teams region, i.e calls tgt_target_teams with the required arguments 
gathered from the enclosed teams directive.

The actual codegen of the region enclosed by the teams construct will be 
contributed in a separate patch. 

http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,211 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}},
+  // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}},
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+++comp;
+  }
+
+  re