Hahnfeld created this revision. Hahnfeld added reviewers: ABataev, hfinkel, kkwli0, rjmccall. Hahnfeld added a subscriber: cfe-commits.
`#pragma omp parallel` needs an implicit barrier that is currently done by an explicit call to `__kmpc_barrier`. However, the runtime already ensures a barrier in `__kmpc_fork_call` which currently leads to two barriers per region per thread. Or are there any corner cases where this is needed? I couldn't make up a test case... http://reviews.llvm.org/D15561 Files: lib/CodeGen/CGStmtOpenMP.cpp Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -879,10 +879,7 @@ (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S); - // Emit implicit barrier at the end of the 'parallel' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); } @@ -1847,12 +1844,7 @@ (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); - // Emit implicit barrier at the end of parallel region, but this barrier - // is at the end of 'for' directive, so emit it as the implicit barrier for - // this 'for' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); } @@ -1865,12 +1857,7 @@ (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); - // Emit implicit barrier at the end of parallel region, but this barrier - // is at the end of 'for' directive, so emit it as the implicit barrier for - // this 'for' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); } @@ -1882,10 +1869,7 @@ LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { (void)CGF.EmitSections(S); - // Emit implicit barrier at the end of parallel region. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); }
Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -879,10 +879,7 @@ (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S); - // Emit implicit barrier at the end of the 'parallel' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); } @@ -1847,12 +1844,7 @@ (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); - // Emit implicit barrier at the end of parallel region, but this barrier - // is at the end of 'for' directive, so emit it as the implicit barrier for - // this 'for' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); } @@ -1865,12 +1857,7 @@ (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); - // Emit implicit barrier at the end of parallel region, but this barrier - // is at the end of 'for' directive, so emit it as the implicit barrier for - // this 'for' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); } @@ -1882,10 +1869,7 @@ LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { (void)CGF.EmitSections(S); - // Emit implicit barrier at the end of parallel region. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Implicit barrier is done by runtime in __kmpc_fork_call. }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits