Commit: fccf506ed7fd96f8a8f5edda7b99f564a386321a
Author: Lukas Stockner
Date:   Wed Feb 6 14:19:20 2019 +0100
Branches: blender2.7
https://developer.blender.org/rBfccf506ed7fd96f8a8f5edda7b99f564a386321a

Cycles: animation denoising support in the kernel.

This is the internal implementation, not available from the API or
interface yet. The algorithm takes into account past and future frames,
both to get more coherent animation and reduce noise.

Ref D3889.

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/device/device_cuda.cpp
M       intern/cycles/device/device_denoising.cpp
M       intern/cycles/device/device_denoising.h
M       intern/cycles/device/device_task.h
M       intern/cycles/device/opencl/opencl.h
M       intern/cycles/device/opencl/opencl_base.cpp
M       intern/cycles/kernel/filter/filter_defines.h
M       intern/cycles/kernel/filter/filter_features.h
M       intern/cycles/kernel/filter/filter_features_sse.h
M       intern/cycles/kernel/filter/filter_nlm_cpu.h
M       intern/cycles/kernel/filter/filter_nlm_gpu.h
M       intern/cycles/kernel/filter/filter_reconstruction.h
M       intern/cycles/kernel/filter/filter_transform.h
M       intern/cycles/kernel/filter/filter_transform_gpu.h
M       intern/cycles/kernel/filter/filter_transform_sse.h
M       intern/cycles/kernel/kernels/cpu/filter_cpu.h
M       intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
M       intern/cycles/kernel/kernels/cuda/filter.cu
M       intern/cycles/kernel/kernels/opencl/filter.cl

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 6668acc9cbe..93c63b92a55 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -186,15 +186,15 @@ public:
        KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, 
int)>                               filter_detect_outliers_kernel;
        KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, 
int)>                               filter_combine_halves_kernel;
 
-       KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, 
int, int, float, float)>      filter_nlm_calc_difference_kernel;
+       KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, 
int, int, int, float, float)> filter_nlm_calc_difference_kernel;
        KernelFunctions<void(*)(float*, float*, int*, int, int)>                
                              filter_nlm_blur_kernel;
        KernelFunctions<void(*)(float*, float*, int*, int, int)>                
                              filter_nlm_calc_weight_kernel;
        KernelFunctions<void(*)(int, int, float*, float*, float*, float*, 
float*, int*, int, int, int)>       filter_nlm_update_output_kernel;
        KernelFunctions<void(*)(float*, float*, int*, int)>                     
                              filter_nlm_normalize_kernel;
 
-       KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, 
int, float)>                         filter_construct_transform_kernel;
-       KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, 
float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
-       KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, 
int*, int)>                            filter_finalize_kernel;
+       KernelFunctions<void(*)(float*, TileInfo*, int, int, int, float*, int*, 
int*, int, int, bool, int, float)>                   
filter_construct_transform_kernel;
+       KernelFunctions<void(*)(int, int, int, float*, float*, float*, int*, 
float*, float3*, int*, int*, int, int, int, int, bool)> 
filter_nlm_construct_gramian_kernel;
+       KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, 
int*, int)>                                            filter_finalize_kernel;
 
        KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, 
ccl_global void*, int, ccl_global char*,
                               int, int, int, int, int, int, int, int, 
ccl_global int*, int,
@@ -512,7 +512,7 @@ public:
                                                            difference,
                                                            local_rect,
                                                            w, channel_offset,
-                                                           a, k_2);
+                                                           0, a, k_2);
 
                        filter_nlm_blur_kernel()       (difference, 
blurDifference, local_rect, w, f);
                        filter_nlm_calc_weight_kernel()(blurDifference, 
difference, local_rect, w, f);
@@ -542,6 +542,7 @@ public:
                for(int y = 0; y < task->filter_area.w; y++) {
                        for(int x = 0; x < task->filter_area.z; x++) {
                                filter_construct_transform_kernel()((float*) 
task->buffer.mem.device_pointer,
+                                                                   
task->tile_info,
                                                                    x + 
task->filter_area.x,
                                                                    y + 
task->filter_area.y,
                                                                    
y*task->filter_area.z + x,
@@ -549,6 +550,8 @@ public:
                                                                    (int*)   
task->storage.rank.device_pointer,
                                                                    
&task->rect.x,
                                                                    
task->buffer.pass_stride,
+                                                                   
task->buffer.frame_stride,
+                                                                   
task->buffer.use_time,
                                                                    
task->radius,
                                                                    
task->pca_threshold);
                        }
@@ -559,6 +562,7 @@ public:
        bool denoising_accumulate(device_ptr color_ptr,
                                  device_ptr color_variance_ptr,
                                  device_ptr scale_ptr,
+                                 int frame,
                                  DenoisingTask *task)
        {
                ProfilingHelper profiling(task->profiler, 
PROFILING_DENOISING_RECONSTRUCT);
@@ -568,6 +572,7 @@ public:
                float *blurDifference = temporary_mem + 
task->buffer.pass_stride;
 
                int r = task->radius;
+               int frame_offset = frame * task->buffer.frame_stride;
                for(int i = 0; i < (2*r+1)*(2*r+1); i++) {
                        int dy = i / (2*r+1) - r;
                        int dx = i % (2*r+1) - r;
@@ -583,12 +588,14 @@ public:
                                                            local_rect,
                                                            task->buffer.stride,
                                                            
task->buffer.pass_stride,
+                                                           frame_offset,
                                                            1.0f,
                                                            task->nlm_k_2);
                        filter_nlm_blur_kernel()(difference, blurDifference, 
local_rect, task->buffer.stride, 4);
                        filter_nlm_calc_weight_kernel()(blurDifference, 
difference, local_rect, task->buffer.stride, 4);
                        filter_nlm_blur_kernel()(difference, blurDifference, 
local_rect, task->buffer.stride, 4);
                        filter_nlm_construct_gramian_kernel()(dx, dy,
+                                                             
task->tile_info->frames[frame],
                                                              blurDifference,
                                                              (float*)  
task->buffer.mem.device_pointer,
                                                              (float*)  
task->storage.transform.device_pointer,
@@ -599,7 +606,9 @@ public:
                                                              
&task->reconstruction_state.filter_window.x,
                                                              
task->buffer.stride,
                                                              4,
-                                                             
task->buffer.pass_stride);
+                                                             
task->buffer.pass_stride,
+                                                             frame_offset,
+                                                             
task->buffer.use_time);
                }
 
                return true;
@@ -787,7 +796,7 @@ public:
                tile.sample = tile.start_sample + tile.num_samples;
 
                denoising.functions.construct_transform = 
function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
-               denoising.functions.accumulate = 
function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, &denoising);
+               denoising.functions.accumulate = 
function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, 
&denoising);
                denoising.functions.solve = 
function_bind(&CPUDevice::denoising_solve, this, _1, &denoising);
                denoising.functions.divide_shadow = 
function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, 
&denoising);
                denoising.functions.non_local_means = 
function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, 
&denoising);
diff --git a/intern/cycles/device/device_cuda.cpp 
b/intern/cycles/device/device_cuda.cpp
index cb7d8bbb224..e21d974ebbe 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1301,6 +1301,7 @@ public:
                int pass_stride = task->buffer.pass_stride;
                int num_shifts = (2*r+1)*(2*r+1);
                int channel_offset = task->nlm_state.is_color? 
task->buffer.pass_stride : 0;
+               int frame_offset = 0;
 
                if(have_error())
                        return false;
@@ -1327,7 +1328,7 @@ public:
 
                        CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w*h, 
num_shifts);
 
-                       void *calc_difference_args[] = {&guide_ptr, 
&variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, 
&channel_offset, &a, &k_2};
+                       void *calc_difference_args[] = {&guide_ptr, 
&variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, 
&channel_offset, &frame_offset, &a, &k_2};
                        void *blur_args[]            = {&difference, 
&blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
                        void *calc_weight_args[]     = {&blurDifference, 
&difference, &w, &h, &stride, &pass_stride, &r, &f};
                        void *update_output_args[]   = {&blurDifference, 
&image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &pass_stride, 
&channel_offset, &r, &f};
@@ -1367,13 +1368,16 @@ public:
                                   task->storage.h);
 
                void *args[] = {&task->buffer.mem.device_pointer,
+                               &task->tile_info_mem.device_pointer,
                                &task->storage.transform.device_pointer,
                                &task->storage.rank.device_pointer,
                                &task->filter_area,
                                &task->rect,
                                &task->radius,
                                &task->pca_threshold,
-                               &task->buffer.pass_stride};
+                               &task->buffer.pass_stride,
+                               &task->buffer.frame_stride,
+                               &task->buffer.use_time};
                CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
                cuda_assert(cuCtxSynchronize());
 
@@ -1383,6 +1387,7 @@ public:
        bool denoising_accumulate(device_ptr color_ptr,
                                  device_ptr color_variance_ptr,
                                  device_ptr scale_ptr,
+                                 int frame,
                                  DenoisingTask *task)
        {
                if(have_error())
@@ -1398,6 +1403,8 @@ public:
                int w = task->reconstruction_state.source_w;
                int h = task->reconstruction_state.source_h;
                int stride = task->buffer.stride;
+               int frame_offset = frame * task->buffer.frame_stride;
+               int t = task->tile_info->frames[frame];
 
                int pass_stride = task->buffer.pass_stride;
                int num_shifts = (2*r+1)*(2*r+1);
@@ -1430,10 +1437,12 @@ public:
                                                &w, &h,
                                                &stride, &pass_stride,
                                                &r, &pass_stride,
+                                               &frame_offset,
                                                &a, &k_2};
                void *blur_args[]            = {&difference, &blurDifference, 
&w, &h, &stride, &pass_stride, &r, &f};
                void *calc_weight_args[]     = {&blurDifference, &difference, 
&w, &h, &stride, &pass_stride, &r, &f};
-               void *construct_gramian_args[] = {&blurDifference,
+               void *construct_gramian_args[] = {&t,
+                                                 &blurDifference,
                                                  
&task->buffer.mem.device_pointer,
                                                  
&task->storage.transform.device_pointer,
                        

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to