Commit: 222b64fcdc8166266eca10bb16c1cebb2bda8923 Author: Brecht Van Lommel Date: Wed Nov 30 21:38:57 2022 +0100 Branches: master https://developer.blender.org/rB222b64fcdc8166266eca10bb16c1cebb2bda8923
Fix Cycles CUDA crash when building kernels without optimizations (for debug) In this case the blocksize may not the one we requested, which was assumed to be the case. Instead get the effective block size from the compiler as was already done for Metal and OneAPI. =================================================================== M intern/cycles/kernel/device/gpu/kernel.h M intern/cycles/kernel/device/gpu/parallel_active_index.h =================================================================== diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index d7d2000775f..a44bd1dece7 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -314,11 +314,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) int kernel_index); ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index; - gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, - indices, - num_indices, - ccl_gpu_kernel_lambda_pass); + gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_postfix @@ -333,11 +329,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) int kernel_index); ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index; - gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, - indices, - num_indices, - ccl_gpu_kernel_lambda_pass); + gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_postfix @@ -349,11 +341,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) { ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0); - gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, - indices, - num_indices, - ccl_gpu_kernel_lambda_pass); + gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_postfix @@ -366,11 +354,8 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) { ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0); - gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, - indices + indices_offset, - num_indices, - ccl_gpu_kernel_lambda_pass); + gpu_parallel_active_index_array( + num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_postfix @@ -383,11 +368,8 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) { ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0); - gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, - indices + indices_offset, - num_indices, - ccl_gpu_kernel_lambda_pass); + gpu_parallel_active_index_array( + num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_postfix @@ -431,11 +413,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) int num_active_paths); ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths; - gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, - indices, - num_indices, - ccl_gpu_kernel_lambda_pass); + gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_postfix @@ -469,11 +447,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) int num_active_paths); ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths; - gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, - indices, - num_indices, - ccl_gpu_kernel_lambda_pass); + gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_postfix diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h index 38cdcb572eb..1d47211604b 100644 --- a/intern/cycles/kernel/device/gpu/parallel_active_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h @@ -56,7 +56,7 @@ void gpu_parallel_active_index_array_impl(const uint num_states, const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0; #else /* !__KERNEL__ONEAPI__ */ # ifndef __KERNEL_METAL__ -template<uint blocksize, typename IsActiveOp> +template<typename IsActiveOp> __device__ # endif void @@ -79,6 +79,10 @@ __device__ { extern ccl_gpu_shared int warp_offset[]; +# ifndef __KERNEL_METAL__ + const uint blocksize = ccl_gpu_block_dim_x; +# endif + const uint thread_index = ccl_gpu_thread_idx_x; const uint thread_warp = thread_index % ccl_gpu_warp_size; @@ -149,7 +153,7 @@ __device__ #ifdef __KERNEL_METAL__ -# define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \ +# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \ const uint is_active = (ccl_gpu_global_id_x() < num_states) ? \ is_active_op(ccl_gpu_global_id_x()) : \ 0; \ @@ -167,15 +171,13 @@ __device__ simdgroup_offset) #elif defined(__KERNEL_ONEAPI__) -# define gpu_parallel_active_index_array( \ - blocksize, num_states, indices, num_indices, is_active_op) \ +# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \ gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op) #else -# define gpu_parallel_active_index_array( \ - blocksize, num_states, indices, num_indices, is_active_op) \ - gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op) +# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \ + gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op) #endif _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs