Author: Johannes Doerfert Date: 2023-11-29T14:49:13-08:00 New Revision: fae233c63f93b4b6f9693685abe6c7d24393682f
URL: https://github.com/llvm/llvm-project/commit/fae233c63f93b4b6f9693685abe6c7d24393682f DIFF: https://github.com/llvm/llvm-project/commit/fae233c63f93b4b6f9693685abe6c7d24393682f.diff LOG: [OpenMP] Avoid initializing the KernelLaunchEnvironment if possible (#73864) If we don't have a team reduction we don't need a kernel launch environment (for now). In that case we can avoid the cost. Added: Modified: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 7ddc67e8a04ab64..5b9dbbf7e83a968 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -804,7 +804,9 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, CGM.getTypes().ConvertTypeForMem(StaticTy); const auto &DL = CGM.getModule().getDataLayout(); uint64_t ReductionDataSize = - DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue(); + TeamsReductions.empty() + ? 0 + : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue(); CGBuilderTy &Bld = CGF.Builder; OMPBuilder.createTargetDeinit(Bld, ReductionDataSize, C.getLangOpts().OpenMPCUDAReductionBufNum); diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index 477e0cad06fd50a..2ba9aca9e141a4d 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -464,6 +464,10 @@ GenericKernelTy::getKernelLaunchEnvironment( if (isCtorOrDtor() || RecordReplay.isReplaying()) return nullptr; + if (!KernelEnvironment.Configuration.ReductionDataSize || + !KernelEnvironment.Configuration.ReductionBufferLength) + return reinterpret_cast<KernelLaunchEnvironmentTy *>(~0); + // TODO: Check if the kernel needs a launch environment. auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy), /*HostPtr=*/nullptr, @@ -478,8 +482,7 @@ GenericKernelTy::getKernelLaunchEnvironment( /// async data transfer. auto &LocalKLE = (*AsyncInfoWrapper).KernelLaunchEnvironment; LocalKLE = KernelLaunchEnvironment; - if (KernelEnvironment.Configuration.ReductionDataSize && - KernelEnvironment.Configuration.ReductionBufferLength) { + { auto AllocOrErr = GenericDevice.dataAlloc( KernelEnvironment.Configuration.ReductionDataSize * KernelEnvironment.Configuration.ReductionBufferLength, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits