Commit: 08b3426df9e5b5dd3c7cc042197bea3ea2398e75 Author: Michael Jones Date: Thu Jan 19 17:55:53 2023 +0000 Branches: master https://developer.blender.org/rB08b3426df9e5b5dd3c7cc042197bea3ea2398e75
Cycles: Occupancy tuning for new higher end M2 machines This patch adds occupancy tuning for the newly announced high-end M2 machines, giving 10-15% render speedup over a pre-tuned build. Reviewed By: brecht Differential Revision: https://developer.blender.org/D17037 =================================================================== M intern/cycles/device/metal/kernel.mm M intern/cycles/device/metal/queue.mm M intern/cycles/device/metal/util.h M intern/cycles/device/metal/util.mm =================================================================== diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index e4ce5e19f63..48bdf2f0ef1 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -49,6 +49,18 @@ struct ShaderCache { if (MetalInfo::get_device_vendor(mtlDevice) == METAL_GPU_APPLE) { switch (MetalInfo::get_apple_gpu_architecture(mtlDevice)) { default: + case APPLE_M2_BIG: + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {384, 128}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {640, 128}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST] = {1024, 64}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] = {704, 704}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE] = {640, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY] = {896, 768}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND] = {512, 128}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW] = {32, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = {768, 576}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY] = {896, 768}; + break; case APPLE_M2: occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {32, 32}; occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {832, 32}; diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index 837be0b0c23..f335844c3f9 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -278,7 +278,8 @@ int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const if (metal_device_->device_vendor == METAL_GPU_APPLE) { result *= 4; - if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) == APPLE_M2) { + /* Increasing the state count doesn't notably benefit M1-family systems. */ + if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) { size_t system_ram = system_physical_ram(); size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize]; size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize]; diff --git a/intern/cycles/device/metal/util.h b/intern/cycles/device/metal/util.h index a988d01d361..c30c4ccd9bc 100644 --- a/intern/cycles/device/metal/util.h +++ b/intern/cycles/device/metal/util.h @@ -29,6 +29,7 @@ enum AppleGPUArchitecture { APPLE_UNKNOWN, APPLE_M1, APPLE_M2, + APPLE_M2_BIG, }; /* Contains static Metal helper functions. */ diff --git a/intern/cycles/device/metal/util.mm b/intern/cycles/device/metal/util.mm index f47638fac15..984e7a70c76 100644 --- a/intern/cycles/device/metal/util.mm +++ b/intern/cycles/device/metal/util.mm @@ -52,7 +52,7 @@ AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device) return APPLE_M1; } else if (strstr(device_name, "M2")) { - return APPLE_M2; + return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG; } return APPLE_UNKNOWN; } _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs