Commit: 5154eb669ddb9860bf4b3b5fbc91cba8758ba604 Author: varunsundar08 Date: Tue Apr 7 15:25:22 2015 +0530 Branches: cycles_kernel_split https://developer.blender.org/rB5154eb669ddb9860bf4b3b5fbc91cba8758ba604
Add utility function to check if given tile size can be rendered by split kernel =================================================================== M intern/cycles/device/device_opencl.cpp =================================================================== diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 6f2c609..b785d7e 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -47,6 +47,10 @@ CCL_NAMESPACE_BEGIN #define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p)) #ifdef __SPLIT_KERNEL__ + +#define SPLIT_KERNEL_LOCAL_SIZE_X 64 +#define SPLIT_KERNEL_LOCAL_SIZE_Y 1 + /* This value may be tuned according to the scene we are rendering */ /* modifying PATH_ITER_INC_FACTOR value proportional to number of expected ray-bounces will improve performance */ #define PATH_ITER_INC_FACTOR 8 @@ -2416,7 +2420,7 @@ public: assert(rayState_size == 1); size_t global_size[2]; - size_t local_size[2] = { 64, 1 }; + size_t local_size[2] = { SPLIT_KERNEL_LOCAL_SIZE_X, SPLIT_KERNEL_LOCAL_SIZE_Y }; if(first_tile) { @@ -3483,6 +3487,161 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y - #endif } + /* Calculates the amount of memory that has to be always + * allocated in order for the split kernel to function. + * This memory is tile/scene-property invariant (meaning, + * the value returned by this function does not depend + * on the user set tile size or scene properties + */ + size_t get_invariable_mem_allocated() { + size_t total_invariable_mem_allocated = 0; + size_t KernelGlobals_size = 0; + size_t ShaderData_SOA_size = 0; + + /* Find KernelGlobals size */ + /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to fetch its size */ + typedef struct KernelGlobals { + ccl_constant KernelData *data; +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name; +#include "kernel_textures.h" + } KernelGlobals; + KernelGlobals_size = sizeof(KernelGlobals); + + /* Calculate ShaderData_SOA_size */ + size_t num_shader_soa_ptr = SD_NUM_FLOAT3 + SD_NUM_INT + SD_NUM_FLOAT +#ifdef __DPDU__ + + SD_NUM_DPDU_FLOAT3 +#endif +#ifdef __RAY_DIFFERENTIAL__ + + SD_NUM_RAY_DIFFERENTIALS_DIFFERENTIAL3 + + SD_NUM_DIFFERENTIAL +#endif + + SD_NUM_RAY_DP_DIFFERENTIAL3; + ShaderData_SOA_size = num_shader_soa_ptr * sizeof(void *); + + total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */ + total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */ + total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */ + total_invariable_mem_allocated += ShaderData_SOA_size; /* sd size */ + total_invariable_mem_allocated += ShaderData_SOA_size; /* sd_dl size */ + total_invariable_mem_allocated += ShaderData_SOA_size; /* sd_shadow size */ + + return total_invariable_mem_allocated; + } + + /* Calculate the memory that has-to-be/has-been allocated for the split kernel to function */ + size_t get_tile_specific_mem_allocated(RenderTile rtile) { + size_t tile_specific_mem_allocated = 0; + + /* Get required tile info */ + cl_int d_w = rtile.w; + cl_int d_h = rtile.h; + cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); + cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state); + unsigned int user_set_tile_w = rtile.tile_size.x; + unsigned int user_set_tile_h = rtile.tile_size.y; + +#ifdef __WORK_STEALING__ + /* Calculate memory to be allocated for work_pools in case of work_stealing */ + size_t max_global_size[2]; + size_t max_num_work_pools = 0; + max_global_size[0] = (((user_set_tile_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X; + max_global_size[1] = (((user_set_tile_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y; + max_num_work_pools = (max_global_size[0] * max_global_size[1]) / (SPLIT_KERNEL_LOCAL_SIZE_X * SPLIT_KERNEL_LOCAL_SIZE_Y); + tile_specific_mem_allocated += max_num_work_pools * sizeof(unsigned int); +#endif + + /* Calculate per thread memory output buffer size */ + size_t output_buffer_size = 0; + ciErr = clGetMemObjectInfo(d_buffer, CL_MEM_SIZE, sizeof(output_buffer_size), &output_buffer_size, NULL); + assert(ciErr == CL_SUCCESS && "Can't get d_buffer mem object info"); + /* This value is different when running on AMD and NV */ + size_t per_pixel_output_buffer_size = output_buffer_size / (d_w * d_h); + + tile_specific_mem_allocated = user_set_tile_w * user_set_tile_h * per_pixel_output_buffer_size; + tile_specific_mem_allocated = user_set_tile_w * user_set_tile_h * sizeof(RNG); + + return tile_specific_mem_allocated; + } + + /* Calculates the texture memories that has been allocated */ + size_t get_scene_specific_mem_allocated(cl_mem d_data) { + size_t scene_specific_mem_allocated = 0; + /* Calculate texture memories */ +#define KERNEL_TEX(type, ttype, name) \ + scene_specific_mem_allocated += get_tex_size(#name); +#include "kernel_textures.h" + + return scene_specific_mem_allocated; + } + + /* Calculate the memory required for one thread in split kernel */ + size_t get_per_thread_memory() { + + size_t shader_closure_size = 0; + size_t shaderdata_volume = 0; +#ifdef __MULTI_CLOSURE__ + shader_closure_size = get_shader_closure_size(clos_max); +#else + shader_closure_size = get_shader_closure_size(MAX_CLOSURE); +#endif + shaderdata_volume = get_shader_data_size(shader_closure_size); + + size_t retval = rng_size + throughput_size + L_transparent_size + rayState_size + work_element_size + + ISLamp_size + PathRadiance_size + Ray_size + PathState_size + + Intersection_size /* Overall isect */ + + Intersection_coop_AO_size /* Instersection_coop_AO */ + + Intersection_coop_DL_size /* Intersection coop DL */ + + shaderdata_volume /* Overall ShaderData */ + + shaderdata_volume /* ShaderData_coop_DL */ + + (shaderdata_volume * 2) /* ShaderData coop shadow */ + + LightRay_size + BSDFEval_size + AOAlpha_size + AOBSDF_size + AOLightRay_size + + (sizeof(int)* NUM_QUEUES) + + per_thread_output_buffer_size; + + return retval; + } + + /* Considers the total memory available in the device and + * and returns the maximum global work size possible + */ + size_t get_feasible_global_work_size(RenderTile rtile, cl_mem d_data) { + + /* Calculate invariably allocated memory */ + size_t invariable_mem_allocated = get_invariable_mem_allocated(); + /* Calculate tile specific allocated memory */ + size_t tile_specific_mem_allocated = get_tile_specific_mem_allocated(rtile); + /* Calculate scene specific allocated memory */ + size_t scene_specific_mem_allocated = get_scene_specific_mem_allocated(d_data); + + /* Calculate total memory available for the threads in global work size */ + size_t available_memory = total_allocatable_memory + - invariable_mem_allocated + - tile_specific_mem_allocated + - scene_specific_mem_allocated + - DATA_ALLOCATION_MEM_FACTOR; + + size_t per_thread_memory_required = get_per_thread_memory(); + + return (available_memory / per_thread_memory_required); + } + + /* Checks if the device has enough memory to render the whole tile; + * If not, we should split single tile into multiple tiles of small size + * and process them all + */ + bool need_to_split_tile(unsigned int d_w, unsigned int d_h, unsigned int feasible_global_work_size) { + size_t global_size_estimate[2] = {0, 0}; + global_size_estimate[0] = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X; + global_size_estimate[1] = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y; + if (global_size_estimate[0] * global_size_estimate[1] > feasible_global_work_size) { + return true; + } else { + return false; + } + } + void thread_run(DeviceTask *task) { if(task->type == DeviceTask::FILM_CONVERT) { _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org http://lists.blender.org/mailman/listinfo/bf-blender-cvs