Commit: 77c3e67d3d7d8055619491bf09f0e7626afe33f9 Author: Michael Jones Date: Wed Jan 4 14:23:33 2023 +0000 Branches: master https://developer.blender.org/rB77c3e67d3d7d8055619491bf09f0e7626afe33f9
Cycles: Improved render start/stop responsiveness on Metal All kernel specialisation is now performed in the background regardless of kernel type, meaning that the first render will be visible a few seconds sooner. The only exception is during benchmark warm up, in which case we wait for all kernels to be cached. When stopping a render, we call a new `cancel()` method on the device which causes any outstanding compilation work to be cancelled, and we destroy the device in a detached thread so that any stale queued compilations can be safely purge [...] Reviewed By: brecht Differential Revision: https://developer.blender.org/D16371 =================================================================== M intern/cycles/device/device.h M intern/cycles/device/metal/device_impl.h M intern/cycles/device/metal/device_impl.mm M intern/cycles/device/metal/kernel.h M intern/cycles/device/metal/kernel.mm M intern/cycles/device/metal/queue.mm M intern/cycles/integrator/path_trace.cpp M intern/cycles/session/session.cpp =================================================================== diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index b9308dc8949..959939ddbb7 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -167,6 +167,17 @@ class Device { return true; } + /* Request cancellation of any long-running work. */ + virtual void cancel() + { + } + + /* Return true if device is ready for rendering, or report status if not. */ + virtual bool is_ready(string &status) const + { + return true; + } + /* GPU device only functions. * These may not be used on CPU or multi-devices. */ diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index e57b8628023..526535ff132 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -76,7 +76,20 @@ class MetalDevice : public Device { bool use_metalrt = false; MetalPipelineType kernel_specialization_level = PSO_GENERIC; - std::atomic_bool async_compile_and_load = false; + + int device_id = 0; + + static thread_mutex existing_devices_mutex; + static std::map<int, MetalDevice *> active_device_ids; + + static bool is_device_cancelled(int device_id); + + static MetalDevice *get_device_by_ID(int device_idID, + thread_scoped_lock &existing_devices_mutex_lock); + + virtual bool is_ready(string &status) const override; + + virtual void cancel() override; virtual BVHLayoutMask get_bvh_layout_mask() const override; @@ -92,14 +105,12 @@ class MetalDevice : public Device { bool use_adaptive_compilation(); + bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type); + void make_source(MetalPipelineType pso_type, const uint kernel_features); virtual bool load_kernels(const uint kernel_features) override; - void reserve_local_memory(const uint kernel_features); - - void init_host_memory(); - void load_texture_info(); void erase_allocation(device_memory &mem); @@ -112,7 +123,7 @@ class MetalDevice : public Device { virtual void optimize_for_scene(Scene *scene) override; - bool compile_and_load(MetalPipelineType pso_type); + static void compile_and_load(int device_id, MetalPipelineType pso_type); /* ------------------------------------------------------------------ */ /* low-level memory management */ diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 95935ce2a3a..a6966bf167d 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -13,10 +13,32 @@ # include "util/path.h" # include "util/time.h" +# include <crt_externs.h> + CCL_NAMESPACE_BEGIN class MetalDevice; +thread_mutex MetalDevice::existing_devices_mutex; +std::map<int, MetalDevice *> MetalDevice::active_device_ids; + +/* Thread-safe device access for async work. Calling code must pass an appropriatelty scoped lock + * to existing_devices_mutex to safeguard against destruction of the returned instance. */ +MetalDevice *MetalDevice::get_device_by_ID(int ID, thread_scoped_lock &existing_devices_mutex_lock) +{ + auto it = active_device_ids.find(ID); + if (it != active_device_ids.end()) { + return it->second; + } + return nullptr; +} + +bool MetalDevice::is_device_cancelled(int ID) +{ + thread_scoped_lock lock(existing_devices_mutex); + return get_device_by_ID(ID, lock) == nullptr; +} + BVHLayoutMask MetalDevice::get_bvh_layout_mask() const { return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2; @@ -40,6 +62,15 @@ void MetalDevice::set_error(const string &error) MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL) { + { + /* Assign an ID for this device which we can use to query whether async shader compilation + * requests are still relevant. */ + thread_scoped_lock lock(existing_devices_mutex); + static int existing_devices_counter = 1; + device_id = existing_devices_counter++; + active_device_ids[device_id] = this; + } + mtlDevId = info.num; /* select chosen device */ @@ -57,7 +88,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile if (@available(macos 11.0, *)) { if ([mtlDevice hasUnifiedMemory]) { default_storage_mode = MTLResourceStorageModeShared; - init_host_memory(); } } @@ -181,6 +211,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile MetalDevice::~MetalDevice() { + /* Cancel any async shader compilations that are in flight. */ + cancel(); + + /* This lock safeguards against destruction during use (see other uses of + * existing_devices_mutex). */ + thread_scoped_lock lock(existing_devices_mutex); + for (auto &tex : texture_slot_map) { if (tex) { [tex release]; @@ -326,22 +363,67 @@ bool MetalDevice::load_kernels(const uint _kernel_features) * active, but may still need to be rendered without motion blur if that isn't active as well. */ motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION; - bool result = compile_and_load(PSO_GENERIC); + /* Only request generic kernels if they aren't cached in memory. */ + if (make_source_and_check_if_compile_needed(PSO_GENERIC)) { + /* If needed, load them asynchronously in order to responsively message progess to the user. */ + int this_device_id = this->device_id; + auto compile_kernels_fn = ^() { + compile_and_load(this_device_id, PSO_GENERIC); + }; + + dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), + compile_kernels_fn); + } - reserve_local_memory(kernel_features); - return result; + return true; } -bool MetalDevice::compile_and_load(MetalPipelineType pso_type) +bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type) { - make_source(pso_type, kernel_features); + if (this->source[pso_type].empty()) { + make_source(pso_type, kernel_features); + } + return MetalDeviceKernels::should_load_kernels(this, pso_type); +} + +void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type) +{ + /* Thread-safe front-end compilation. Typically the MSL->AIR compilation can take a few seconds, + * so we avoid blocking device teardown if the user cancels a render immediately. + */ + + id<MTLDevice> mtlDevice; + string source; + MetalGPUVendor device_vendor; + + /* Safely gather any state required for the MSL->AIR compilation. */ + { + thread_scoped_lock lock(existing_devices_mutex); - if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) { - /* We already have a full set of matching pipelines which are cached or queued. */ - metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type)); - return true; + /* Check whether the device still exists. */ + MetalDevice *instance = get_device_by_ID(device_id, lock); + if (!instance) { + metal_printf("Ignoring %s compilation request - device no longer exists\n", + kernel_type_as_string(pso_type)); + return; + } + + if (!instance->make_source_and_check_if_compile_needed(pso_type)) { + /* We already have a full set of matching pipelines which are cached or queued. Return early + * to avoid redundant MTLLibrary compilation. */ + metal_printf("Ignoreing %s compilation request - kernels already requested\n", + kernel_type_as_string(pso_type)); + return; + } + + mtlDevice = instance->mtlDevice; + device_vendor = instance->device_vendor; + source = instance->source[pso_type]; } + /* Perform the actual compilation using our cached context. The MetalDevice can safely destruct + * in this time. */ + MTLCompileOptions *options = [[MTLCompileOptions alloc] init]; # if defined(MAC_OS_VERSION_13_0) @@ -359,20 +441,15 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type) if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) { path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))), - source[pso_type]); + source); } const double starttime = time_dt(); NSError *error = NULL; - mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str()) - options:options - error:&error]; - - if (!mtlLibrary[pso_type]) { - NSString *err = [error localizedDescription]; - set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); - } + id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str()) + options:options + error:&error]; metal_printf("Front-end compilation finished in %.1f seconds (%s)\n", time_dt() - starttime, @@ -380,17 +457,21 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type) [options release]; - return MetalDeviceKernels::load(this, pso_type); -} - -void MetalDevice::reserve_local_memory(const uint kernel_features) -{ - /* METAL_WIP - implement this */ -} - -void MetalDevice::init_host_memory() -{ - /* METAL_WIP - implement this */ + /* Save the compiled MTLLibrary and trigger the AIR->PSO builds (if the MetalDevice still + * exists). */ + { + thread_scoped_lock lock(existing_devices_mutex); + if (MetalDevice *instance = get_device_by_ID(device_id, lock)) { + if (mtlLibrary) { + instance->mtlLibrary[pso_type] = mtlLibrary; + MetalDeviceKernels::load(instance, pso_type); + } + else { + NSString *err = [error localizedDescription]; + instance->set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); + } + } + } } void MetalDevice::load_texture_info() @@ -700,55 +781,74 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz return 0; } -void MetalDevice::optimize_ @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs