Commit: 562392cc85c1d3f44da29d67294e9a1d91153cfc
Author: Lukas Stockner
Date:   Mon Jun 20 22:44:39 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB562392cc85c1d3f44da29d67294e9a1d91153cfc

Cycles: Implement tile overscan for GPU denoising

This commit adds support for tile overscan - rendering a larger tile internally
and only showing its center area. That is needed for GPU denoising since the 
regular
approach of keeping the neighbor tiles in memory would require far too much 
memory.
Since tiles are generally quite large on GPUs, the added overhead isn't too 
large.

===================================================================

M       intern/cycles/blender/blender_session.cpp
M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/kernel/kernel_filter.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M       intern/cycles/render/buffers.cpp
M       intern/cycles/render/buffers.h
M       intern/cycles/render/session.cpp
M       intern/cycles/render/session.h
M       intern/cycles/render/tile.cpp
M       intern/cycles/render/tile.h

===================================================================

diff --git a/intern/cycles/blender/blender_session.cpp 
b/intern/cycles/blender/blender_session.cpp
index f538f57..ebda63f 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -390,10 +390,10 @@ static void add_pass(BL::RenderEngine& b_engine,
 void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool 
do_update_only, bool highlight)
 {
        BufferParams& params = rtile.buffers->params;
-       int x = params.full_x - session->tile_manager.params.full_x;
-       int y = params.full_y - session->tile_manager.params.full_y;
-       int w = params.width;
-       int h = params.height;
+       int x = params.full_x + params.overscan - 
session->tile_manager.params.full_x;
+       int y = params.full_y + params.overscan - 
session->tile_manager.params.full_y;
+       int w = params.final_width;
+       int h = params.final_height;
 
        /* get render result */
        BL::RenderResult b_rr = begin_render_result(b_engine, x, y, w, h, 
b_rlay_name.c_str(), b_rview_name.c_str());
@@ -502,7 +502,8 @@ void BlenderSession::render()
 
                buffer_params.passes = passes;
                buffer_params.denoising_passes = 
b_layer_iter->keep_denoise_data() || b_layer_iter->denoise_result();
-               session->tile_manager.denoise = b_layer_iter->denoise_result();
+               session->tile_manager.schedule_denoising = 
b_layer_iter->denoise_result();
+               session->params.denoise_result = b_layer_iter->denoise_result();
                scene->film->denoising_passes = buffer_params.denoising_passes;
                scene->film->denoise_flags = 0;
                if(b_layer_iter->denoise_diffuse_direct()) 
scene->film->denoise_flags |= DENOISE_DIFFUSE_DIR;
@@ -759,7 +760,7 @@ void 
BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
        BufferParams& params = buffers->params;
        float exposure = scene->film->exposure;
 
-       vector<float> pixels(params.width*params.height*4);
+       vector<float> pixels(params.final_width*params.final_height*4);
 
        /* Adjust absolute sample number to the range. */
        int sample = rtile.sample;
diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 0c4fed1..0ea6973 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -222,8 +222,8 @@ public:
                RenderTile tile;
 
                void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, 
int, int, int, int, int);
-               void(*filter_estimate_params_kernel)(KernelGlobals*, int, 
float**, int, int, int*, int*, int*, int*, void*);
-               void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, 
int, int, int*, int*, int*, int*, void*);
+               void(*filter_estimate_params_kernel)(KernelGlobals*, int, 
float**, int, int, int*, int*, int*, int*, void*, int4);
+               void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, 
int, int, int*, int*, int*, int*, void*, int4);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
                if(system_cpu_support_avx2()) {
@@ -314,14 +314,15 @@ public:
                                int tile_y[4] = {rtiles[0].y, rtiles[3].y, 
rtiles[6].y, rtiles[6].y+rtiles[6].h};
                                FilterStorage *storages = new 
FilterStorage[tile.w*tile.h];
 
-                               for(int y = tile.y; y < tile.y + tile.h; y++) {
-                                       for(int x = tile.x; x < tile.x + 
tile.w; x++) {
-                                               
filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages);
+                               int4 filter_rect = make_int4(tile.x, tile.y, 
tile.x + tile.w, tile.y + tile.h);
+                               for(int y = filter_rect.y; y < filter_rect.w; 
y++) {
+                                       for(int x = filter_rect.x; x < 
filter_rect.z; x++) {
+                                               
filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages, filter_rect);
                                        }
                                }
-                               for(int y = tile.y; y < tile.y + tile.h; y++) {
-                                       for(int x = tile.x; x < tile.x + 
tile.w; x++) {
-                                               filter_final_pass_kernel(&kg, 
sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages);
+                               for(int y = filter_rect.y; y < filter_rect.w; 
y++) {
+                                       for(int x = filter_rect.x; x < 
filter_rect.z; x++) {
+                                               filter_final_pass_kernel(&kg, 
sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
                                        }
                                }
                        }
diff --git a/intern/cycles/kernel/kernel_filter.h 
b/intern/cycles/kernel/kernel_filter.h
index 4971f0d..3dc2ee0 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -123,9 +123,9 @@ ccl_device_inline bool filter_firefly_rejection(float3 
pixel_color, float pixel_
  * - Start of the next upper/right neighbor (not accessed)
  * buffers contains the nine buffer pointers (y-major ordering, starting with 
the lower left tile), offset and stride the respective parameters of the tile.
  */
-ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, 
float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int 
*stride, FilterStorage *storage)
+ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, 
float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int 
*stride, FilterStorage *storage, int4 filter_rect)
 {
-       storage += (y - tile_y[1])*(tile_y[2] - tile_y[1]) + (x - tile_x[1]);
+       storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + 
(x-filter_rect.x);
 
        /* Temporary storage, used in different steps of the algorithm. */
        float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], 
tempvector[2*DENOISE_FEATURES+1];
@@ -350,9 +350,9 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals 
*kg, int sample, flo
 
 
 
-ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float 
**buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, 
FilterStorage *storage)
+ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float 
**buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, 
FilterStorage *storage, int4 filter_rect)
 {
-       storage += (y - tile_y[1])*(tile_y[2] - tile_y[1]) + (x - tile_x[1]);
+       storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + 
(x-filter_rect.x);
        float *buffer, features[DENOISE_FEATURES];
 
        /* === Get center pixel. === */
@@ -372,9 +372,9 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, 
int sample, float **
        /* Apply a median filter to the 3x3 window aroung the current pixel. */
        int sort_idx = 0;
        float global_bandwidths[9];
-       for(int py = max(y-1, tile_y[1]); py < min(y+2, tile_y[2]); py++) {
-               for(int px = max(x-1, tile_x[1]); px < min(x+2, tile_x[2]); 
px++) {
-                       int ofs = (py-y)*(tile_y[2] - tile_y[1]) + (px-x);
+       for(int py = max(y-1, filter_rect.y); py < min(y+2, filter_rect.w); 
py++) {
+               for(int px = max(x-1, filter_rect.x); px < min(x+2, 
filter_rect.z); px++) {
+                       int ofs = (py-y)*(filter_rect.z - filter_rect.x) + 
(px-x);
                        if(storage[ofs].rank != rank) continue;
                        global_bandwidths[sort_idx++] = 
storage[ofs].global_bandwidth;
                }
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index b719a79..fd0ebfa 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -58,7 +58,8 @@ void 
KERNEL_FUNCTION_FULL_NAME(filter_estimate_params)(KernelGlobals *kg,
                                                        int *tile_y,
                                                        int *offset,
                                                        int *stride,
-                                                       void *storage);
+                                                       void *storage,
+                                                       int4 filter_rect);
 
 void KERNEL_FUNCTION_FULL_NAME(filter_final_pass)(KernelGlobals *kg,
                                                   int sample,
@@ -69,6 +70,7 @@ void 
KERNEL_FUNCTION_FULL_NAME(filter_final_pass)(KernelGlobals *kg,
                                                   int *tile_y,
                                                   int *offset,
                                                   int *stride,
-                                                  void *storage);
+                                                  void *storage,
+                                                  int4 filter_rect);
 
 #undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 9e468b4..26fc871 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -140,23 +140,25 @@ void 
KERNEL_FUNCTION_FULL_NAME(filter_estimate_params)(KernelGlobals *kg,
                                                        int *tile_y,
                                                        int *offset,
                                                        int *stride,
-                                                       void *storage)
+                                                       void *storage,
+                                                       int4 filter_rect)
 {
-       kernel_filter_estimate_params(kg, sample, buffers, x, y, tile_x, 
tile_y, offset, stride, (FilterStorage*) storage);
+       kernel_filter_estimate_params(kg, sample, buffers, x, y, tile_x, 
tile_y, offset, stride, (FilterStorage*) storage, filter_rect);
 }
 
 void KERNEL_FUNCTION_FULL_NAME(filter_final_pass)(KernelGlobals *kg,
-                                                       int sample,
-                                                       float** buffers,
-                                                       int x,
-                                                       int y,
-                                                       int *tile_x,
-                                                       int *tile_y,
-                                                       int *offset,
-                                                       int *stride,
-                                                       void *storage)
+                                                  int sample,
+                                                  float** buffers,
+                                                  int x,
+                                                  int y,
+                                                  int *tile_x,
+                        

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to