tra created this revision. tra added reviewers: nikic, fhahn. Herald added subscribers: bixia, hiraditya, yaxunl. tra requested review of this revision. Herald added projects: clang, LLVM.
Attempt to enable MemCpyOpt unconditionally in D104801 <https://reviews.llvm.org/D104801> uncovered the fact that there are users that do not expect LLVM to materialize `memset` intrinsic. While other passes can do that, too, MemCpyOpt triggers it more frequently and breaks sanitizers and some downstream users. For now introduce a flag to force-enable the flag and opt-in only CUDA compilation with NVPTX back-end. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D106401 Files: clang/lib/Driver/ToolChains/Cuda.cpp llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -67,6 +67,10 @@ #define DEBUG_TYPE "memcpyopt" +static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( + "-enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, + cl::desc("Enable memcpyopt even when libcalls are disabled")); + static cl::opt<bool> EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use MemorySSA-backed MemCpyOpt.")); @@ -1757,7 +1761,16 @@ // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. - if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy)) + bool ShoudRunMemCpyOpt = + (TLI->has(LibFunc_memset) && TLI->has(LibFunc_memcpy)) + // Some targets have libcalls disabled, but still want to run the pass. + // TODO: the pass should not depend on libcall availability as the targets + // should provide alternative ways of lowering standard LLVM + // intrinsics. Alas, there are existing users that can't currently handle + // `memset` intrinsic materialized by this pass. See + // https://reviews.llvm.org/D104801 for the details. + || EnableMemCpyOptWithoutLibcalls; + if (!ShoudRunMemCpyOpt) return false; while (true) { Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -686,7 +686,8 @@ "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); if (DeviceOffloadingKind == Action::OFK_Cuda) { - CC1Args.push_back("-fcuda-is-device"); + CC1Args.append( + {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"}); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, options::OPT_fno_cuda_approx_transcendentals, false))
Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -67,6 +67,10 @@ #define DEBUG_TYPE "memcpyopt" +static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( + "-enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, + cl::desc("Enable memcpyopt even when libcalls are disabled")); + static cl::opt<bool> EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use MemorySSA-backed MemCpyOpt.")); @@ -1757,7 +1761,16 @@ // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. - if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy)) + bool ShoudRunMemCpyOpt = + (TLI->has(LibFunc_memset) && TLI->has(LibFunc_memcpy)) + // Some targets have libcalls disabled, but still want to run the pass. + // TODO: the pass should not depend on libcall availability as the targets + // should provide alternative ways of lowering standard LLVM + // intrinsics. Alas, there are existing users that can't currently handle + // `memset` intrinsic materialized by this pass. See + // https://reviews.llvm.org/D104801 for the details. + || EnableMemCpyOptWithoutLibcalls; + if (!ShoudRunMemCpyOpt) return false; while (true) { Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -686,7 +686,8 @@ "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); if (DeviceOffloadingKind == Action::OFK_Cuda) { - CC1Args.push_back("-fcuda-is-device"); + CC1Args.append( + {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"}); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, options::OPT_fno_cuda_approx_transcendentals, false))
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits