On 28 June 2018 at 18:23, Alex Smith <asm...@feralinteractive.com> wrote: > Hi Dave, > > I did a quick test with this on Rise of the Tomb Raider. It reduced the time > taken to create all pipelines for the whole game over 8 threads (with > RADV_DEBUG=nocache) from 12m24s to 11m35s. Nice improvement :)
Oh good to have some real world numbers. Thanks for testing, Dave. > > Also didn't see any issues, so: > > Tested-by: Alex Smith <asm...@feralinteractive.com> > > Thanks, > Alex > > On 27 June 2018 at 04:58, Dave Airlie <airl...@gmail.com> wrote: >> >> From: Dave Airlie <airl...@redhat.com> >> >> I'd like to encourage people to test this to see if it helps (like >> does it make app startup better or less hitching in dxvk). >> >> The basic idea is to store a bunch of LLVM related data structs >> in thread local storage so we can avoid reiniting them every time >> we compile a shader. Since we know llvm objects aren't thread safe >> it has to be stored using TLS to avoid any collisions. >> >> This should remove all the fixed overheads setup costs of creating >> the pass manager each time. >> >> This takes a demo app time to compile the radv meta shaders on nocache >> and exit from 1.7s to 1s. >> >> TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS >> works if you have radeonsi and radv loaded at the same time, if >> they'll magically try and use the same tls stuff, in which case >> this might explode all over the place. >> >> v2: fix llvm6 build, inline emit function, handle multiple targets >> in one thread >> v3: rebase and port onto new structure >> --- >> src/amd/common/ac_llvm_helper.cpp | 120 ++++++++++++++++++++++++++++-- >> src/amd/common/ac_llvm_util.c | 10 +-- >> src/amd/common/ac_llvm_util.h | 9 +++ >> src/amd/vulkan/radv_debug.h | 1 + >> src/amd/vulkan/radv_device.c | 1 + >> src/amd/vulkan/radv_shader.c | 2 + >> 6 files changed, 132 insertions(+), 11 deletions(-) >> >> diff --git a/src/amd/common/ac_llvm_helper.cpp >> b/src/amd/common/ac_llvm_helper.cpp >> index 27403dbe085..f1f1399b3fb 100644 >> --- a/src/amd/common/ac_llvm_helper.cpp >> +++ b/src/amd/common/ac_llvm_helper.cpp >> @@ -31,12 +31,21 @@ >> >> #include "ac_llvm_util.h" >> #include <llvm-c/Core.h> >> -#include <llvm/Target/TargetOptions.h> >> -#include <llvm/ExecutionEngine/ExecutionEngine.h> >> -#include <llvm/IR/Attributes.h> >> -#include <llvm/IR/CallSite.h> >> +#include <llvm/Target/TargetMachine.h> >> #include <llvm/IR/IRBuilder.h> >> #include <llvm/Analysis/TargetLibraryInfo.h> >> +#include <llvm/IR/LegacyPassManager.h> >> + >> +#include <llvm-c/Transforms/IPO.h> >> +#include <llvm-c/Transforms/Scalar.h> >> +#if HAVE_LLVM >= 0x0700 >> +#include <llvm-c/Transforms/Utils.h> >> +#endif >> + >> +#if HAVE_LLVM < 0x0700 >> +#include "llvm/Support/raw_ostream.h" >> +#endif >> +#include <list> >> >> void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) >> { >> @@ -101,11 +110,110 @@ >> ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) >> delete reinterpret_cast<llvm::TargetLibraryInfoImpl >> *>(library_info); >> } >> >> +class ac_llvm_per_thread_info { >> +public: >> + ac_llvm_per_thread_info(enum radeon_family arg_family, >> + enum ac_target_machine_options >> arg_tm_options) >> + : family(arg_family), tm_options(arg_tm_options), >> + OStream(CodeString) {} >> + ~ac_llvm_per_thread_info() { >> + ac_llvm_compiler_dispose_internal(&llvm_info); >> + } >> + >> + struct ac_llvm_compiler_info llvm_info; >> + enum radeon_family family; >> + enum ac_target_machine_options tm_options; >> + llvm::SmallString<0> CodeString; >> + llvm::raw_svector_ostream OStream; >> + llvm::legacy::PassManager pass; >> +}; >> + >> +/* we have to store a linked list per thread due to the possiblity of >> multiple gpus being required */ >> +static thread_local std::list<ac_llvm_per_thread_info> >> ac_llvm_per_thread_list; >> + >> bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info, >> LLVMModuleRef M, >> char **ErrorMessage, >> LLVMMemoryBufferRef *OutMemBuf) >> { >> - return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, >> LLVMObjectFile, >> - ErrorMessage, >> OutMemBuf); >> + ac_llvm_per_thread_info *thread_info = nullptr; >> + if (info->thread_stored) { >> + for (auto &I : ac_llvm_per_thread_list) { >> + if (I.llvm_info.tm == info->tm) { >> + thread_info = &I; >> + break; >> + } >> + } >> + >> + if (!thread_info) { >> + assert(0); >> + return false; >> + } >> + } else { >> + return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, >> LLVMObjectFile, >> + ErrorMessage, >> OutMemBuf); >> + } >> + >> + llvm::TargetMachine *TM = >> reinterpret_cast<llvm::TargetMachine*>(thread_info->llvm_info.tm); >> + llvm::Module *Mod = llvm::unwrap(M); >> + llvm::StringRef Data; >> + >> + Mod->setDataLayout(TM->createDataLayout()); >> + >> + thread_info->pass.run(*Mod); >> + >> + Data = thread_info->OStream.str(); >> + *OutMemBuf = >> LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), ""); >> + thread_info->CodeString = ""; >> + return false; >> +} >> + >> +bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info, >> + bool add_target_library_info, >> + enum radeon_family family, >> + enum ac_target_machine_options tm_options) >> +{ >> + if (tm_options & AC_TM_THREAD_LLVM) { >> + for (auto &I : ac_llvm_per_thread_list) { >> + if (I.family == family && >> + I.tm_options == tm_options) { >> + *info = I.llvm_info; >> + return true; >> + } >> + } >> + >> + ac_llvm_per_thread_list.emplace_back(family, tm_options); >> + ac_llvm_per_thread_info &tinfo = >> ac_llvm_per_thread_list.back(); >> + if (!ac_llvm_compiler_init_internal(&tinfo.llvm_info, >> + true, >> + family, >> + tm_options)) >> + return false; >> + >> + tinfo.llvm_info.thread_stored = true; >> + *info = tinfo.llvm_info; >> + >> + llvm::TargetMachine *TM = >> reinterpret_cast<llvm::TargetMachine*>(tinfo.llvm_info.tm); >> + if (TM->addPassesToEmitFile(tinfo.pass, tinfo.OStream, >> +#if HAVE_LLVM >= 0x0700 >> + nullptr, >> +#endif >> + >> llvm::TargetMachine::CGFT_ObjectFile)) { >> + assert(0); >> + return false; >> + } >> + } else { >> + if (!ac_llvm_compiler_init_internal(info, >> + >> add_target_library_info, >> + family, >> + tm_options)) >> + return false; >> + } >> + return true; >> +} >> + >> +void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info) >> +{ >> + if (!info->thread_stored) >> + ac_llvm_compiler_dispose_internal(info); >> } >> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c >> index 7f8c5746b37..d8ec4ee0092 100644 >> --- a/src/amd/common/ac_llvm_util.c >> +++ b/src/amd/common/ac_llvm_util.c >> @@ -188,10 +188,10 @@ static LLVMPassManagerRef >> ac_init_passmgr(LLVMTargetLibraryInfoRef target_librar >> return passmgr; >> } >> >> -bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info, >> - bool add_target_library_info, >> - enum radeon_family family, >> - enum ac_target_machine_options tm_options) >> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info, >> + bool add_target_library_info, >> + enum radeon_family family, >> + enum ac_target_machine_options >> tm_options) >> { >> memset(info, 0, sizeof(*info)); >> info->tm = ac_create_target_machine(family, tm_options, >> &info->triple); >> @@ -223,7 +223,7 @@ fail: >> return false; >> } >> >> -void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info) >> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info >> *info) >> { >> if (info->data_layout) >> LLVMDisposeMessage((char*)info->data_layout); >> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h >> index 1ba972dab87..c579e6a9ec3 100644 >> --- a/src/amd/common/ac_llvm_util.h >> +++ b/src/amd/common/ac_llvm_util.h >> @@ -60,6 +60,7 @@ enum ac_target_machine_options { >> AC_TM_FORCE_DISABLE_XNACK = (1 << 3), >> AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4), >> AC_TM_CHECK_IR = (1 << 5), >> + AC_TM_THREAD_LLVM = (1 << 6), >> }; >> >> enum ac_float_mode { >> @@ -74,6 +75,7 @@ struct ac_llvm_compiler_info { >> LLVMTargetLibraryInfoRef target_library_info; >> const char *triple; >> const char *data_layout; >> + bool thread_stored; >> }; >> >> const char *ac_get_llvm_processor_name(enum radeon_family family); >> @@ -114,6 +116,12 @@ ac_get_store_intr_attribs(bool writeonly_memory) >> unsigned >> ac_count_scratch_private_memory(LLVMValueRef function); >> >> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info, >> + bool add_target_library_info, /* crash >> workaround */ >> + enum radeon_family family, >> + enum ac_target_machine_options >> tm_options); >> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info >> *info); >> + >> bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info, >> bool add_target_library_info, /* crash >> workaround */ >> enum radeon_family family, >> @@ -127,6 +135,7 @@ bool ac_compile_to_memory_buffer(struct >> ac_llvm_compiler_info *info, >> LLVMModuleRef M, >> char **ErrorMessage, >> LLVMMemoryBufferRef *OutMemBuf); >> + >> #ifdef __cplusplus >> } >> #endif >> diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h >> index f1b0dc26a63..9fe4c3b7404 100644 >> --- a/src/amd/vulkan/radv_debug.h >> +++ b/src/amd/vulkan/radv_debug.h >> @@ -49,6 +49,7 @@ enum { >> RADV_DEBUG_ERRORS = 0x80000, >> RADV_DEBUG_STARTUP = 0x100000, >> RADV_DEBUG_CHECKIR = 0x200000, >> + RADV_DEBUG_NOTHREADLLVM = 0x400000, >> }; >> >> enum { >> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c >> index ad3465f594e..73c48cef1f0 100644 >> --- a/src/amd/vulkan/radv_device.c >> +++ b/src/amd/vulkan/radv_device.c >> @@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] >> = { >> {"errors", RADV_DEBUG_ERRORS}, >> {"startup", RADV_DEBUG_STARTUP}, >> {"checkir", RADV_DEBUG_CHECKIR}, >> + {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, >> {NULL, 0} >> }; >> >> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c >> index 3bbb7867640..f7033aff771 100644 >> --- a/src/amd/vulkan/radv_shader.c >> +++ b/src/amd/vulkan/radv_shader.c >> @@ -547,6 +547,8 @@ shader_variant_create(struct radv_device *device, >> tm_options |= AC_TM_SISCHED; >> if (options->check_ir) >> tm_options |= AC_TM_CHECK_IR; >> + if (!(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM)) >> + tm_options |= AC_TM_THREAD_LLVM; >> >> radv_init_llvm_once(); >> >> -- >> 2.17.1 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev