Commit: 001f548227c413a4fdbee275744ea8bea886081a
Author: Brecht Van Lommel
Date:   Wed Oct 20 14:21:01 2021 +0200
Branches: master
https://developer.blender.org/rB001f548227c413a4fdbee275744ea8bea886081a

Cycles: reduce kernel reserved local memory when not using shader raytracing

Ref T87836

===================================================================

M       intern/cycles/device/cuda/device_impl.cpp
M       intern/cycles/device/hip/device_impl.cpp

===================================================================

diff --git a/intern/cycles/device/cuda/device_impl.cpp 
b/intern/cycles/device/cuda/device_impl.cpp
index 5e1a63c04df..1c970096801 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -454,7 +454,7 @@ bool CUDADevice::load_kernels(const uint kernel_features)
   return (result == CUDA_SUCCESS);
 }
 
-void CUDADevice::reserve_local_memory(const uint /* kernel_features */)
+void CUDADevice::reserve_local_memory(const uint kernel_features)
 {
   /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory
    * needed for kernel launches, so that we can reliably figure out when
@@ -468,7 +468,9 @@ void CUDADevice::reserve_local_memory(const uint /* 
kernel_features */)
 
   {
     /* Use the biggest kernel for estimation. */
-    const DeviceKernel test_kernel = 
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE;
+    const DeviceKernel test_kernel = (kernel_features & 
KERNEL_FEATURE_NODE_RAYTRACE) ?
+                                         
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+                                         
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
 
     /* Launch kernel, using just 1 block appears sufficient to reserve memory 
for all
      * multiprocessors. It would be good to do this in parallel for the multi 
GPU case
diff --git a/intern/cycles/device/hip/device_impl.cpp 
b/intern/cycles/device/hip/device_impl.cpp
index 964783a08bf..583ab8ae208 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -430,7 +430,7 @@ bool HIPDevice::load_kernels(const uint kernel_features)
   return (result == hipSuccess);
 }
 
-void HIPDevice::reserve_local_memory(const uint)
+void HIPDevice::reserve_local_memory(const uint kernel_features)
 {
   /* Together with hipDeviceLmemResizeToMax, this reserves local memory
    * needed for kernel launches, so that we can reliably figure out when
@@ -444,7 +444,9 @@ void HIPDevice::reserve_local_memory(const uint)
 
   {
     /* Use the biggest kernel for estimation. */
-    const DeviceKernel test_kernel = 
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE;
+    const DeviceKernel test_kernel = (kernel_features & 
KERNEL_FEATURE_NODE_RAYTRACE) ?
+                                         
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+                                         
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
 
     /* Launch kernel, using just 1 block appears sufficient to reserve memory 
for all
      * multiprocessors. It would be good to do this in parallel for the multi 
GPU case

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to