tra updated this revision to Diff 364653. tra added a comment. Updated post D106769 <https://reviews.llvm.org/D106769>
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D106401/new/ https://reviews.llvm.org/D106401 Files: clang/lib/Driver/ToolChains/Cuda.cpp llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp llvm/test/Transforms/MemCpyOpt/no-libcalls.ll Index: llvm/test/Transforms/MemCpyOpt/no-libcalls.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/no-libcalls.ll +++ llvm/test/Transforms/MemCpyOpt/no-libcalls.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s --check-prefixes=CHECK,LIBCALLS ; RUN: opt -S -memcpyopt -mtriple=amdgcn-- < %s | FileCheck %s --check-prefixes=CHECK,NO-LIBCALLS +; RUN: opt -S -memcpyopt -mtriple=amdgcn-- -enable-memcpyopt-without-libcalls < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LIBCALLS ; REQUIRES: amdgpu-registered-target Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -67,6 +67,10 @@ #define DEBUG_TYPE "memcpyopt" +static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( + "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, + cl::desc("Enable memcpyopt even when libcalls are disabled")); + static cl::opt<bool> EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use MemorySSA-backed MemCpyOpt.")); @@ -677,8 +681,9 @@ // the corresponding libcalls are not available. // TODO: We should really distinguish between libcall availability and // our ability to introduce intrinsics. - if (T->isAggregateType() && TLI->has(LibFunc_memcpy) && - TLI->has(LibFunc_memmove)) { + if (T->isAggregateType() && + (EnableMemCpyOptWithoutLibcalls || + (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) { MemoryLocation LoadLoc = MemoryLocation::get(LI); // We use alias analysis to check if an instruction may store to @@ -805,7 +810,7 @@ // this if the corresponding libfunc is not available. // TODO: We should really distinguish between libcall availability and // our ability to introduce intrinsics. - if (!TLI->has(LibFunc_memset)) + if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls)) return false; // There are two cases that are interesting for this code to handle: memcpy Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -685,7 +685,8 @@ "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); if (DeviceOffloadingKind == Action::OFK_Cuda) { - CC1Args.push_back("-fcuda-is-device"); + CC1Args.append( + {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"}); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, options::OPT_fno_cuda_approx_transcendentals, false))
Index: llvm/test/Transforms/MemCpyOpt/no-libcalls.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/no-libcalls.ll +++ llvm/test/Transforms/MemCpyOpt/no-libcalls.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s --check-prefixes=CHECK,LIBCALLS ; RUN: opt -S -memcpyopt -mtriple=amdgcn-- < %s | FileCheck %s --check-prefixes=CHECK,NO-LIBCALLS +; RUN: opt -S -memcpyopt -mtriple=amdgcn-- -enable-memcpyopt-without-libcalls < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LIBCALLS ; REQUIRES: amdgpu-registered-target Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -67,6 +67,10 @@ #define DEBUG_TYPE "memcpyopt" +static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( + "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, + cl::desc("Enable memcpyopt even when libcalls are disabled")); + static cl::opt<bool> EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use MemorySSA-backed MemCpyOpt.")); @@ -677,8 +681,9 @@ // the corresponding libcalls are not available. // TODO: We should really distinguish between libcall availability and // our ability to introduce intrinsics. - if (T->isAggregateType() && TLI->has(LibFunc_memcpy) && - TLI->has(LibFunc_memmove)) { + if (T->isAggregateType() && + (EnableMemCpyOptWithoutLibcalls || + (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) { MemoryLocation LoadLoc = MemoryLocation::get(LI); // We use alias analysis to check if an instruction may store to @@ -805,7 +810,7 @@ // this if the corresponding libfunc is not available. // TODO: We should really distinguish between libcall availability and // our ability to introduce intrinsics. - if (!TLI->has(LibFunc_memset)) + if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls)) return false; // There are two cases that are interesting for this code to handle: memcpy Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -685,7 +685,8 @@ "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); if (DeviceOffloadingKind == Action::OFK_Cuda) { - CC1Args.push_back("-fcuda-is-device"); + CC1Args.append( + {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"}); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, options::OPT_fno_cuda_approx_transcendentals, false))
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits