Commit: 52a5f68562680c0ccd6d4e525098bb5e2af7d0bd Author: Brecht Van Lommel Date: Thu Apr 28 00:46:14 2022 +0200 Branches: master https://developer.blender.org/rB52a5f68562680c0ccd6d4e525098bb5e2af7d0bd
Revert "Cycles: Enable inlining on Apple Silicon for 1.1x speedup" This reverts commit b82de02e7ce857e20b842a074c0068b146a9fd79. It is causing crashes in various regression tests. Ref D14763 =================================================================== M intern/cycles/device/metal/device_impl.h M intern/cycles/device/metal/device_impl.mm M intern/cycles/device/metal/kernel.h M intern/cycles/device/metal/kernel.mm M intern/cycles/device/metal/queue.mm M intern/cycles/kernel/device/metal/compat.h =================================================================== diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index d7311ee985f..27c58ce6d2f 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -28,8 +28,7 @@ class MetalDevice : public Device { id<MTLCommandQueue> mtlGeneralCommandQueue = nil; id<MTLArgumentEncoder> mtlAncillaryArgEncoder = nil; /* encoder used for fetching device pointers from MTLBuffers */ - string source[PSO_NUM]; - string source_md5[PSO_NUM]; + string source_used_for_compile[PSO_NUM]; KernelParamsMetal launch_params = {0}; @@ -111,12 +110,6 @@ class MetalDevice : public Device { virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override; - id<MTLLibrary> compile(string const &source); - - const MetalKernelPipeline &get_best_pipeline(DeviceKernel kernel) const; - - bool kernel_available(DeviceKernel kernel) const; - /* ------------------------------------------------------------------ */ /* low-level memory management */ diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 7d1212cb37c..c01f51fb506 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -275,44 +275,96 @@ bool MetalDevice::load_kernels(const uint _kernel_features) * active, but may still need to be rendered without motion blur if that isn't active as well. */ motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION; - source[PSO_GENERIC] = get_source(kernel_features); - mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]); - - MD5Hash md5; - md5.append(source[PSO_GENERIC]); - source_md5[PSO_GENERIC] = md5.get_hex(); - - metal_printf("Front-end compilation finished (generic)\n"); - - bool result = kernels.load(this, false); - - reserve_local_memory(kernel_features); + NSError *error = NULL; - return result; -} + for (int i = 0; i < PSO_NUM; i++) { + if (mtlLibrary[i]) { + [mtlLibrary[i] release]; + mtlLibrary[i] = nil; + } + } -id<MTLLibrary> MetalDevice::compile(string const &source) -{ MTLCompileOptions *options = [[MTLCompileOptions alloc] init]; options.fastMathEnabled = YES; if (@available(macOS 12.0, *)) { options.languageVersion = MTLLanguageVersion2_4; } + else { + return false; + } - NSError *error = NULL; - id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str()) + string metalsrc; + + /* local helper: dump source to disk and return filepath */ + auto dump_source = [&](int kernel_type) -> string { + string &source = source_used_for_compile[kernel_type]; + string metalsrc = path_cache_get(path_join("kernels", + string_printf("%s.%s.metal", + kernel_type_as_string(kernel_type), + util_md5_string(source).c_str()))); + path_write_text(metalsrc, source); + return metalsrc; + }; + + /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor, + * then compile it into a MTLLibrary */ + auto fetch_and_compile_source = [&](int kernel_type) { + /* Record the source used to compile this library, for hash building later. */ + string &source = source_used_for_compile[kernel_type]; + + switch (kernel_type) { + case PSO_GENERIC: { + source = get_source(kernel_features); + break; + } + case PSO_SPECIALISED: { + /* PSO_SPECIALISED derives from PSO_GENERIC */ + string &generic_source = source_used_for_compile[PSO_GENERIC]; + if (generic_source.empty()) { + generic_source = get_source(kernel_features); + } + source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source; + break; + } + default: + assert(0); + } + + /* create MTLLibrary (front-end compilation) */ + mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str()) options:options error:&error]; - if (!mtlLibrary) { - NSString *err = [error localizedDescription]; - set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); + bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr); + + if (!mtlLibrary[kernel_type] || do_source_dump) { + string metalsrc = dump_source(kernel_type); + + if (!mtlLibrary[kernel_type]) { + NSString *err = [error localizedDescription]; + set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); + + return false; + } + } + return true; + }; + + fetch_and_compile_source(PSO_GENERIC); + + if (use_function_specialisation) { + fetch_and_compile_source(PSO_SPECIALISED); } + metal_printf("Front-end compilation finished\n"); + + bool result = kernels.load(this, PSO_GENERIC); + [options release]; + reserve_local_memory(kernel_features); - return mtlLibrary; + return result; } void MetalDevice::reserve_local_memory(const uint kernel_features) @@ -619,11 +671,6 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz return 0; } -const MetalKernelPipeline &MetalDevice::get_best_pipeline(DeviceKernel kernel) const -{ - return kernels.get_best_pipeline(this, kernel); -} - void MetalDevice::const_copy_to(const char *name, void *host, size_t size) { if (strcmp(name, "__data") == 0) { diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h index 7e398d1cf41..b12491d820d 100644 --- a/intern/cycles/device/metal/kernel.h +++ b/intern/cycles/device/metal/kernel.h @@ -54,41 +54,98 @@ enum { const char *kernel_type_as_string(int kernel_type); struct MetalKernelPipeline { + void release() + { + if (pipeline) { + [pipeline release]; + pipeline = nil; + if (@available(macOS 11.0, *)) { + for (int i = 0; i < METALRT_TABLE_NUM; i++) { + if (intersection_func_table[i]) { + [intersection_func_table[i] release]; + intersection_func_table[i] = nil; + } + } + } + } + if (function) { + [function release]; + function = nil; + } + if (@available(macOS 11.0, *)) { + for (int i = 0; i < METALRT_TABLE_NUM; i++) { + if (intersection_func_table[i]) { + [intersection_func_table[i] release]; + } + } + } + } - void compile(); - - id<MTLLibrary> mtlLibrary = nil; - bool scene_specialized; - string source_md5; - - bool use_metalrt; - bool metalrt_hair; - bool metalrt_hair_thick; - bool metalrt_pointcloud; - - int threads_per_threadgroup; - - DeviceKernel device_kernel; bool loaded = false; - id<MTLDevice> mtlDevice = nil; id<MTLFunction> function = nil; id<MTLComputePipelineState> pipeline = nil; - int num_threads_per_block = 0; - - string error_str; API_AVAILABLE(macos(11.0)) id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil}; - id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil}; +}; + +struct MetalKernelLoadDesc { + int pso_index = 0; + const char *function_name = nullptr; + int kernel_index = 0; + int threads_per_threadgroup = 0; + MTLFunctionConstantValues *constant_values = nullptr; + NSArray *linked_functions = nullptr; + + struct IntersectorFunctions { + NSArray *defaults; + NSArray *shadow; + NSArray *local; + NSArray *operator[](int index) const + { + if (index == METALRT_TABLE_DEFAULT) + return defaults; + if (index == METALRT_TABLE_SHADOW) + return shadow; + return local; + } + } intersector_functions = {nullptr}; +}; + +/* Metal kernel and associate occupancy information. */ +class MetalDeviceKernel { + public: + ~MetalDeviceKernel(); + + bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5); + + void mark_loaded(int pso_index) + { + pso[pso_index].loaded = true; + } + + int get_num_threads_per_block() const + { + return num_threads_per_block; + } + const MetalKernelPipeline &get_pso() const; + + double load_duration = 0.0; + + private: + MetalKernelPipeline pso[PSO_NUM]; + + int num_threads_per_block = 0; }; /* Cache of Metal kernels for each DeviceKernel. */ class MetalDeviceKernels { public: - bool load(MetalDevice *device, bool scene_specialized); - bool available(const MetalDevice *device, DeviceKernel kernel) const; - const MetalKernelPipeline &get_best_pipeline(const MetalDevice *device, - DeviceKernel kernel) const; + bool load(MetalDevice *device, int kernel_type); + bool available(DeviceKernel kernel) const; + const MetalDeviceKernel &get(DeviceKernel kernel) const; + + MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM]; id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}}; diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 44a5e23d00f..9555ca03c8e 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -9,7 +9,6 @@ # include "util/path.h" # include "util/tbb.h" # include "util/time.h" -# include "util/unique_ptr.h" CCL_NAMESPACE_BEGIN @@ -29,370 +28,82 @@ const char *kernel_type_as_string(int kernel_type) return ""; } -bool kernel_has_intersection(DeviceKernel device_kernel) +MetalDeviceKernel::~MetalDeviceKernel() { - return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || - device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || - device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || - device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); -} - -struct ShaderCache { - ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice) - { - } - ~ShaderCache(); - - /* Get the fastest a @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs