Author: abataev
Date: Tue Jun 26 10:24:03 2018
New Revision: 335632

URL: http://llvm.org/viewvc/llvm-project?rev=335632&view=rev
Log:
[OPENMP, NVPTX] Reduce the number of the globalized variables.

Patch tries to make better analysis of the variables that should be
globalized. From now, instead of all parallel directives it will check
only distribute parallel .. directives and check only for
firstprivte/lastprivate variables if they must be globalized.

Added:
    cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=335632&r1=335631&r2=335632&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Tue Jun 26 10:24:03 2018
@@ -187,7 +187,7 @@ class CheckVarsEscapingDeclContext final
   RecordDecl *GlobalizedRD = nullptr;
   llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
   bool AllEscaped = false;
-  bool IsForParallelRegion = false;
+  bool IsForCombinedParallelRegion = false;
 
   static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
   isDeclareTargetDeclaration(const ValueDecl *VD) {
@@ -210,7 +210,7 @@ class CheckVarsEscapingDeclContext final
       if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
         // Check if need to capture the variable that was already captured by
         // value in the outer region.
-        if (!IsForParallelRegion) {
+        if (!IsForCombinedParallelRegion) {
           if (!FD->hasAttrs())
             return;
           const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
@@ -225,13 +225,13 @@ class CheckVarsEscapingDeclContext final
           assert(!VD->getType()->isVariablyModifiedType() &&
                  "Parameter captured by value with variably modified type");
           EscapedParameters.insert(VD);
-        } else if (!IsForParallelRegion) {
+        } else if (!IsForCombinedParallelRegion) {
           return;
         }
       }
     }
     if ((!CGF.CapturedStmtInfo ||
-         (IsForParallelRegion && CGF.CapturedStmtInfo)) &&
+         (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
         VD->getType()->isReferenceType())
       // Do not globalize variables with reference type.
       return;
@@ -253,18 +253,49 @@ class CheckVarsEscapingDeclContext final
       }
     }
   }
-  void VisitOpenMPCapturedStmt(const CapturedStmt *S, bool IsParallelRegion) {
+  void VisitOpenMPCapturedStmt(const CapturedStmt *S,
+                               ArrayRef<OMPClause *> Clauses,
+                               bool IsCombinedParallelRegion) {
     if (!S)
       return;
     for (const CapturedStmt::Capture &C : S->captures()) {
       if (C.capturesVariable() && !C.capturesVariableByCopy()) {
         const ValueDecl *VD = C.getCapturedVar();
-        bool SavedIsParallelRegion = IsForParallelRegion;
-        IsForParallelRegion = IsParallelRegion;
+        bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
+        if (IsCombinedParallelRegion) {
+          // Check if the variable is privatized in the combined construct and
+          // those private copies must be shared in the inner parallel
+          // directive.
+          IsForCombinedParallelRegion = false;
+          for (const OMPClause *C : Clauses) {
+            if (!isOpenMPPrivate(C->getClauseKind()) ||
+                C->getClauseKind() == OMPC_reduction ||
+                C->getClauseKind() == OMPC_linear ||
+                C->getClauseKind() == OMPC_private)
+              continue;
+            ArrayRef<const Expr *> Vars;
+            if (const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
+              Vars = PC->getVarRefs();
+            else if (const auto *PC = dyn_cast<OMPLastprivateClause>(C))
+              Vars = PC->getVarRefs();
+            else
+              llvm_unreachable("Unexpected clause.");
+            for (const auto *E : Vars) {
+              const Decl *D =
+                  cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
+              if (D == VD->getCanonicalDecl()) {
+                IsForCombinedParallelRegion = true;
+                break;
+              }
+            }
+            if (IsForCombinedParallelRegion)
+              break;
+          }
+        }
         markAsEscaped(VD);
         if (isa<OMPCapturedExprDecl>(VD))
           VisitValueDecl(VD);
-        IsForParallelRegion = SavedIsParallelRegion;
+        IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
       }
     }
   }
@@ -341,7 +372,10 @@ public:
         VisitStmt(S->getCapturedStmt());
         return;
       }
-      VisitOpenMPCapturedStmt(S, CaptureRegions.back() == OMPD_parallel);
+      VisitOpenMPCapturedStmt(
+          S, D->clauses(),
+          CaptureRegions.back() == OMPD_parallel &&
+              isOpenMPDistributeDirective(D->getDirectiveKind()));
     }
   }
   void VisitCapturedStmt(const CapturedStmt *S) {

Added: cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp?rev=335632&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp 
(added)
+++ cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp 
Tue Jun 26 10:24:03 2018
@@ -0,0 +1,51 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple 
powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc 
%s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s 
-fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck 
%s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple 
i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o 
%t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple 
nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s 
-fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck 
%s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions 
-fcxx-exceptions -x c++ -triple nvptx-unknown-unknown 
-fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device 
-fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix 
CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+int a;
+
+int foo(int *a);
+
+int main(int argc, char **argv) {
+  int b[10], c[10], d[10];
+#pragma omp target teams map(tofrom:a)
+#pragma omp distribute parallel for firstprivate(b) lastprivate(c) if(a)
+  for (int i= 0; i < argc; ++i)
+    a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]);
+  return 0;
+}
+
+// CHECK: @__omp_offloading_{{.*}}_main_l17_exec_mode = weak constant i8 1
+
+// CHECK-LABEL: define internal void @__omp_offloading_{{.*}}_main_l17_worker(
+
+// CHECK: define weak void @__omp_offloading_{{.*}}_main_l17([10 x i32]* 
dereferenceable(40) %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}, i32* 
dereferenceable(4) %{{.+}}, i{{64|32}} %{{.+}}, [10 x i32]* dereferenceable(40) 
%{{.+}})
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 
84, i16 0)
+// CHECK: [[STACK:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
+// CHECK: [[ARGC:%.+]] = load i32, i32* %{{.+}}, align
+// CHECK: [[ARGC_ADDR:%.+]] = getelementptr inbounds 
%struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], 
i{{32|64}} 0, i{{32|64}} 0
+// CHECK: store i32 [[ARGC]], i32* [[ARGC_ADDR]],
+// CHECK: getelementptr inbounds %struct._globalized_locals_ty, 
%struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 1
+// CHECK: getelementptr inbounds %struct._globalized_locals_ty, 
%struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 2
+// CHECK: call void @__kmpc_for_static_init_4(
+
+// CHECK: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[PARALLEL:@.+]](
+// CHECK: call void @__kmpc_end_serialized_parallel(
+
+// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @
+
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
+
+// CHECK: define internal void [[PARALLEL]](
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, 
i16 0)
+
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
+
+#endif


_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to