Author: Johannes Doerfert Date: 2022-08-15T09:31:09+02:00 New Revision: 6ce43697482ea0c841f0cf614fcda09ceeb325a9
URL: https://github.com/llvm/llvm-project/commit/6ce43697482ea0c841f0cf614fcda09ceeb325a9 DIFF: https://github.com/llvm/llvm-project/commit/6ce43697482ea0c841f0cf614fcda09ceeb325a9.diff LOG: [OpenMP][FIX] Ensure __kmpc_kernel_parallel is reachable The problem is we create the call to __kmpc_kernel_parallel in the openmp-opt pass but while we optimize the code, the call is not there yet. Thus, we assume we never reach it from __kmpc_target_deinit. That allows us to remove the store in there (`ParallelRegionFn = nullptr`), which leads to bad results later on. This is a shortstop solution until we come up with something better. Fixes https://github.com/llvm/llvm-project/issues/57064 (cherry picked from commit a8cda3290944687b4fd0138e63cd980ea497a438) Added: Modified: openmp/libomptarget/DeviceRTL/src/Kernel.cpp Removed: ################################################################################ diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp index 74c22a61f3b88..d682652830a03 100644 --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -35,7 +35,7 @@ static void genericStateMachine(IdentTy *Ident) { uint32_t TId = mapping::getThreadIdInBlock(); do { - ParallelRegionFnTy WorkFn = 0; + ParallelRegionFnTy WorkFn = nullptr; // Wait for the signal that we have a new work function. synchronize::threads(); @@ -100,8 +100,20 @@ int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, // doing any work. mapping::getBlockSize() does not include any of the main // thread's warp, so none of its threads can ever be active worker threads. if (UseGenericStateMachine && - mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) + mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) { genericStateMachine(Ident); + } else { + // Retrieve the work function just to ensure we always call + // __kmpc_kernel_parallel even if a custom state machine is used. + // TODO: this is not super pretty. The problem is we create the call to + // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it is + // not there yet. Thus, we assume we never reach it from + // __kmpc_target_deinit. That allows us to remove the store in there to + // ParallelRegionFn, which leads to bad results later on. + ParallelRegionFnTy WorkFn = nullptr; + __kmpc_kernel_parallel(&WorkFn); + ASSERT(WorkFn == nullptr); + } return mapping::getThreadIdInBlock(); } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits