Commit: 0069b484b31b647a19a220761c34db550d6eb5dc Author: varunsundar08 Date: Tue Apr 7 19:30:06 2015 +0530 Branches: cycles_kernel_split https://developer.blender.org/rB0069b484b31b647a19a220761c34db550d6eb5dc
Record buffer and rng_state offsets in RenderTile =================================================================== M intern/cycles/device/device_opencl.cpp M intern/cycles/kernel/kernel_Background_BufferUpdate.cl M intern/cycles/kernel/kernel_DataInit.cl M intern/cycles/kernel/kernel_SumAllRadiance.cl M intern/cycles/render/buffers.h =================================================================== diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 6051782..1fc765c 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -3049,6 +3049,8 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y - opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(d_h), (void*)&d_h)); opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(d_offset), (void*)&d_offset)); opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(d_stride), (void*)&d_stride)); + opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_x), (void*)&(rtile.rng_state_offset_x))); + opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_y), (void*)&(rtile.rng_state_offset_y))); opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(Queue_data), (void*)&Queue_data)); opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(Queue_index), (void*)&Queue_index)); opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(dQueue_size), (void*)&dQueue_size)); @@ -3125,6 +3127,8 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y - opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(d_x), (void*)&d_x)); opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(d_y), (void*)&d_y)); opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(d_stride), (void*)&d_stride)); + opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_x), (void*)&(rtile.rng_state_offset_x))); + opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_y), (void*)&(rtile.rng_state_offset_y))); opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(work_array), (void*)&work_array)); opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(Queue_data), (void*)&Queue_data)); opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(Queue_index), (void*)&Queue_index)); @@ -3266,6 +3270,8 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y - opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(d_w), (void *)&d_w)); opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(d_h), (void *)&d_h)); opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(d_stride), (void *)&d_stride)); + opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(rtile.buffer_offset_x), (void *)&(rtile.buffer_offset_x))); + opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(rtile.buffer_offset_y), (void *)&(rtile.buffer_offset_y))); opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(start_sample), (void*)&start_sample)); /* Enqueue ckPathTraceKernel_DataInit_SPLIT_KERNEL kernel */ @@ -3684,6 +3690,10 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y - for (int tile_iter_x = 0; tile_iter_x < num_tiles_x; tile_iter_x++) { int rtile_index = tile_iter_y * num_tiles_x + tile_iter_x; + to_path_trace_rtile[rtile_index].rng_state_offset_x = tile_iter_x * render_feasible_tile_size.x; + to_path_trace_rtile[rtile_index].rng_state_offset_y = tile_iter_y * render_feasible_tile_size.y; + to_path_trace_rtile[rtile_index].buffer_offset_x = tile_iter_x * render_feasible_tile_size.x; + to_path_trace_rtile[rtile_index].buffer_offset_y = tile_iter_y * render_feasible_tile_size.y; to_path_trace_rtile[rtile_index].start_sample = rtile.start_sample; to_path_trace_rtile[rtile_index].num_samples = rtile.num_samples; to_path_trace_rtile[rtile_index].sample = rtile.sample; @@ -3723,6 +3733,11 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y - while(task->acquire_tile(this, tile)) { #ifdef __SPLIT_KERNEL__ + tile.buffer_offset_x = 0; + tile.buffer_offset_y = 0; + tile.rng_state_offset_x = 0; + tile.rng_state_offset_y = 0; + /* The second argument is dummy */ path_trace(tile, 0); tile.sample = tile.start_sample + tile.num_samples; diff --git a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl index 7c9999b..bb32791 100644 --- a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl +++ b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl @@ -109,6 +109,8 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL( ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ ccl_global char *ray_state, /* Stores information on the current state of a ray */ int sw, int sh, int sx, int sy, int stride, + int rng_state_offset_x, + int rng_state_offset_y, ccl_global unsigned int *work_array, /* Denotes work of each ray */ ccl_global int *Queue_data, /* Queues memory */ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ @@ -187,7 +189,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL( tile_y = tile_index / sw; my_sample_tile = ray_index - (tile_index * parallel_samples); #endif - rng_state += tile_x + tile_y * stride; + rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * stride; per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { @@ -249,7 +251,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL( my_sample_tile = 0; /* Remap rng_state according to the current work */ - rng_state = initial_rng + (tile_x + tile_y * stride); + rng_state = initial_rng + ((rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * stride); /* Remap per_sample_output_buffers according to the current work */ per_sample_output_buffers = initial_per_sample_output_buffers + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; diff --git a/intern/cycles/kernel/kernel_DataInit.cl b/intern/cycles/kernel/kernel_DataInit.cl index f1c9001..9e86b9b 100644 --- a/intern/cycles/kernel/kernel_DataInit.cl +++ b/intern/cycles/kernel/kernel_DataInit.cl @@ -204,6 +204,8 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL( #include "kernel_textures.h" int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, + int rng_state_offset_x, + int rng_state_offset_y, ccl_global int *Queue_data, /* Memory for queues */ ccl_global int *Queue_index, /* Tracks the number of elements in queues */ int queuesize, /* size (capacity) of the queue */ @@ -417,7 +419,7 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL( pixel_y = sy + tile_y; #endif // __WORK_STEALING__ - rng_state += tile_x + tile_y * stride; + rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * stride; /* Initialise per_sample_output_buffers to all zeros */ per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride; diff --git a/intern/cycles/kernel/kernel_SumAllRadiance.cl b/intern/cycles/kernel/kernel_SumAllRadiance.cl index 440bc6f..a9a43ed 100644 --- a/intern/cycles/kernel/kernel_SumAllRadiance.cl +++ b/intern/cycles/kernel/kernel_SumAllRadiance.cl @@ -30,13 +30,15 @@ __kernel void kernel_ocl_path_trace_SumAllRadiance_SPLIT_KERNEL( ccl_global float *buffer, /* Output buffer of RenderTile */ ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ int parallel_samples, int sw, int sh, int stride, + int buffer_offset_x, + int buffer_offset_y, int start_sample) { int x = get_global_id(0); int y = get_global_id(1); if(x < sw && y < sh) { - buffer += (x + y * stride) * (data->film.pass_stride); + buffer += ((buffer_offset_x + x) + (buffer_offset_y + y) * stride) * (data->film.pass_stride); per_sample_output_buffer += ((x + y * stride) * parallel_samples) * (data->film.pass_stride); int sample_stride = (data->film.pass_stride); diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index afff012..8f224f1 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -141,6 +141,11 @@ public: /* user set tile-size */ int2 tile_size; + /* Used in split kernel */ + int buffer_offset_x; + int buffer_offset_y; + int rng_state_offset_x; + int rng_state_offset_y; device_ptr buffer; device_ptr rng_state; _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org http://lists.blender.org/mailman/listinfo/bf-blender-cvs