Commit: 001f548227c413a4fdbee275744ea8bea886081a Author: Brecht Van Lommel Date: Wed Oct 20 14:21:01 2021 +0200 Branches: master https://developer.blender.org/rB001f548227c413a4fdbee275744ea8bea886081a
Cycles: reduce kernel reserved local memory when not using shader raytracing Ref T87836 =================================================================== M intern/cycles/device/cuda/device_impl.cpp M intern/cycles/device/hip/device_impl.cpp =================================================================== diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 5e1a63c04df..1c970096801 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -454,7 +454,7 @@ bool CUDADevice::load_kernels(const uint kernel_features) return (result == CUDA_SUCCESS); } -void CUDADevice::reserve_local_memory(const uint /* kernel_features */) +void CUDADevice::reserve_local_memory(const uint kernel_features) { /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory * needed for kernel launches, so that we can reliably figure out when @@ -468,7 +468,9 @@ void CUDADevice::reserve_local_memory(const uint /* kernel_features */) { /* Use the biggest kernel for estimation. */ - const DeviceKernel test_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE; + const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE : + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE; /* Launch kernel, using just 1 block appears sufficient to reserve memory for all * multiprocessors. It would be good to do this in parallel for the multi GPU case diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 964783a08bf..583ab8ae208 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -430,7 +430,7 @@ bool HIPDevice::load_kernels(const uint kernel_features) return (result == hipSuccess); } -void HIPDevice::reserve_local_memory(const uint) +void HIPDevice::reserve_local_memory(const uint kernel_features) { /* Together with hipDeviceLmemResizeToMax, this reserves local memory * needed for kernel launches, so that we can reliably figure out when @@ -444,7 +444,9 @@ void HIPDevice::reserve_local_memory(const uint) { /* Use the biggest kernel for estimation. */ - const DeviceKernel test_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE; + const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE : + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE; /* Launch kernel, using just 1 block appears sufficient to reserve memory for all * multiprocessors. It would be good to do this in parallel for the multi GPU case _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs