Commit: f2d39b810b4902bb5accbac7c5b2e8ec1e60c679 Author: Michael Jones Date: Wed May 11 14:52:49 2022 +0100 Branches: temp-pbvh-split https://developer.blender.org/rBf2d39b810b4902bb5accbac7c5b2e8ec1e60c679
Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives This patch is the same as D14763, but with a fix for unit test failures caused by ShaderCache fetch logic not working in the non-MetalRT case: ``` diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index ad268ae7057..6aa1a56056e 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -203,9 +203,12 @@ bool kernel_has_intersection(DeviceKernel device_kernel) /* metalrt options */ request.pipeline->use_metalrt = device->use_metalrt; - request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR; - request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK; - request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD; + request.pipeline->metalrt_hair = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR); + request.pipeline->metalrt_hair_thick = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + request.pipeline->metalrt_pointcloud = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); { thread_scoped_lock lock(cache_mutex); @@ -225,9 +228,9 @@ bool kernel_has_intersection(DeviceKernel device_kernel) /* metalrt options */ bool use_metalrt = device->use_metalrt; - bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR; - bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK; - bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD; + bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR); + bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); MetalKernelPipeline *best_pipeline = nullptr; for (auto &pipeline : collection) { ``` Reviewed By: brecht Differential Revision: https://developer.blender.org/D14923 =================================================================== M intern/cycles/device/metal/device_impl.h M intern/cycles/device/metal/device_impl.mm M intern/cycles/device/metal/kernel.h M intern/cycles/device/metal/kernel.mm M intern/cycles/device/metal/queue.mm M intern/cycles/kernel/device/metal/compat.h =================================================================== diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index 27c58ce6d2f..7506b9b069f 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -28,7 +28,8 @@ class MetalDevice : public Device { id<MTLCommandQueue> mtlGeneralCommandQueue = nil; id<MTLArgumentEncoder> mtlAncillaryArgEncoder = nil; /* encoder used for fetching device pointers from MTLBuffers */ - string source_used_for_compile[PSO_NUM]; + string source[PSO_NUM]; + string source_md5[PSO_NUM]; KernelParamsMetal launch_params = {0}; @@ -72,7 +73,6 @@ class MetalDevice : public Device { id<MTLBuffer> texture_bindings_3d = nil; std::vector<id<MTLTexture>> texture_slot_map; - MetalDeviceKernels kernels; bool use_metalrt = false; bool use_function_specialisation = false; @@ -110,6 +110,8 @@ class MetalDevice : public Device { virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override; + id<MTLLibrary> compile(string const &source); + /* ------------------------------------------------------------------ */ /* low-level memory management */ diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index c01f51fb506..e1438a9d6e2 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features) * active, but may still need to be rendered without motion blur if that isn't active as well. */ motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION; - NSError *error = NULL; + source[PSO_GENERIC] = get_source(kernel_features); + mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]); - for (int i = 0; i < PSO_NUM; i++) { - if (mtlLibrary[i]) { - [mtlLibrary[i] release]; - mtlLibrary[i] = nil; - } - } + MD5Hash md5; + md5.append(source[PSO_GENERIC]); + source_md5[PSO_GENERIC] = md5.get_hex(); + + metal_printf("Front-end compilation finished (generic)\n"); + + bool result = MetalDeviceKernels::load(this, false); + + reserve_local_memory(kernel_features); + + return result; +} +id<MTLLibrary> MetalDevice::compile(string const &source) +{ MTLCompileOptions *options = [[MTLCompileOptions alloc] init]; options.fastMathEnabled = YES; if (@available(macOS 12.0, *)) { options.languageVersion = MTLLanguageVersion2_4; } - else { - return false; - } - string metalsrc; - - /* local helper: dump source to disk and return filepath */ - auto dump_source = [&](int kernel_type) -> string { - string &source = source_used_for_compile[kernel_type]; - string metalsrc = path_cache_get(path_join("kernels", - string_printf("%s.%s.metal", - kernel_type_as_string(kernel_type), - util_md5_string(source).c_str()))); - path_write_text(metalsrc, source); - return metalsrc; - }; - - /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor, - * then compile it into a MTLLibrary */ - auto fetch_and_compile_source = [&](int kernel_type) { - /* Record the source used to compile this library, for hash building later. */ - string &source = source_used_for_compile[kernel_type]; - - switch (kernel_type) { - case PSO_GENERIC: { - source = get_source(kernel_features); - break; - } - case PSO_SPECIALISED: { - /* PSO_SPECIALISED derives from PSO_GENERIC */ - string &generic_source = source_used_for_compile[PSO_GENERIC]; - if (generic_source.empty()) { - generic_source = get_source(kernel_features); - } - source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source; - break; - } - default: - assert(0); - } - - /* create MTLLibrary (front-end compilation) */ - mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str()) + NSError *error = NULL; + id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str()) options:options error:&error]; - bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr); - - if (!mtlLibrary[kernel_type] || do_source_dump) { - string metalsrc = dump_source(kernel_type); - - if (!mtlLibrary[kernel_type]) { - NSString *err = [error localizedDescription]; - set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); - - return false; - } - } - return true; - }; - - fetch_and_compile_source(PSO_GENERIC); - - if (use_function_specialisation) { - fetch_and_compile_source(PSO_SPECIALISED); + if (!mtlLibrary) { + NSString *err = [error localizedDescription]; + set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); } - metal_printf("Front-end compilation finished\n"); - - bool result = kernels.load(this, PSO_GENERIC); - [options release]; - reserve_local_memory(kernel_features); - return result; + return mtlLibrary; } void MetalDevice::reserve_local_memory(const uint kernel_features) diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h index b12491d820d..69b2a686ecc 100644 --- a/intern/cycles/device/metal/kernel.h +++ b/intern/cycles/device/metal/kernel.h @@ -54,103 +54,41 @@ enum { const char *kernel_type_as_string(int kernel_type); struct MetalKernelPipeline { - void release() - { - if (pipeline) { - [pipeline release]; - pipeline = nil; - if (@available(macOS 11.0, *)) { - for (int i = 0; i < METALRT_TABLE_NUM; i++) { - if (intersection_func_table[i]) { - [intersection_func_table[i] release]; - intersection_func_table[i] = nil; - } - } - } - } - if (function) { - [function release]; - function = nil; - } - if (@available(macOS 11.0, *)) { - for (int i = 0; i < METALRT_TABLE_NUM; i++) { - if (intersection_func_table[i]) { - [intersection_func_table[i] release]; - } - } - } - } - bool loaded = false; - id<MTLFunction> function = nil; - id<MTLComputePipelineState> pipeline = nil; - - API_AVAILABLE(macos(11.0)) - id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil}; -}; - -struct MetalKernelLoadDesc { - int pso_index = 0; - const char *function_name = nullptr; - int kernel_index = 0; - int threads_per_threadgroup = 0; - MTLFunctionConstantValues *constant_values = nullptr; - NSArray *linked_functions = nullptr; - - struct IntersectorFunctions { - NSArray *defaults; - NSArray *shadow; - NSArray *local; - NSArray *operator[](int index) const - { - if (index == METALRT_TABLE_DEFAULT) - return defaults; - if (index == METALRT_TABLE_SHADOW) - return shadow; - return local; - } - } intersector_functions = {nullptr}; -}; - -/* Metal kernel and associate occupancy information. */ -class MetalDeviceKernel { - public: - ~MetalDeviceKernel(); + void compile(); - bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5); + id<MTLLibrary> mtlLibrary = nil; + bool scene_specialized; + string source_md5; - void mark_loaded(int pso_index) - { - pso[pso_index].loaded = true; - } + bool use_metalrt; + bool metalrt_hair; + bool metalrt_hair_thick; + bool metalrt_pointcloud; - int get_num_threads_per_block() const - { - return num_threads_per_block; - } - const MetalKernelPipeline &get_pso() const; + int threads_per_threadgroup; - double load_duration = 0.0; + DeviceKernel device_kernel; + bool loaded = false; + id<MTLDevice> mtlDevice = nil; + id<MTLFunction> function = nil; + id<MTLComputePipelineState> pipeline = nil; + int num_threads_per_block = 0; - private: - MetalKernelPipeline pso[PSO_NUM]; + string error_str; - int num_threads_per_block = 0; + API_AVAILABLE(macos(11.0)) + id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil}; + id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil}; }; /* Cache of Metal kernels for each DeviceKernel. */ -class MetalDeviceKernels { - public: - bool load(MetalDevice *device, int kernel_type); - bool available(DeviceKernel kernel) const; - const MetalDeviceKernel &get(DeviceKernel kernel) const; +namespace MetalDeviceKernels { - MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM]; +bool load(MetalDevice *device, bool scene_specialized); +const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel); - id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}}; - - string loaded_md5[PSO_NUM]; -}; +} /* namespace MetalDeviceKernels */ CCL_NAMESPACE_END diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 9555ca03c8e..fc9a8cecd75 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -9,6 +9,7 @@ # include "util/path.h" # include "util/tbb.h" # include "util/time.h" +# include "util/unique_ptr.h" CCL_NAMESPACE_BEGIN @@ -28,82 +29,376 @@ const char *kernel_type_as_string(int kernel_type) return ""; } -MetalDeviceKernel::~MetalDeviceKernel() +bool kernel_has_intersection(DeviceKernel device_kernel) { - for (int i = 0; i < PSO_NUM; i++) { - pso[i].release(); + return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || + device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); +} + +struct ShaderCache { + ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice) + { + } + ~ShaderCache(); @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs