Commit: fccf506ed7fd96f8a8f5edda7b99f564a386321a Author: Lukas Stockner Date: Wed Feb 6 14:19:20 2019 +0100 Branches: blender2.7 https://developer.blender.org/rBfccf506ed7fd96f8a8f5edda7b99f564a386321a
Cycles: animation denoising support in the kernel. This is the internal implementation, not available from the API or interface yet. The algorithm takes into account past and future frames, both to get more coherent animation and reduce noise. Ref D3889. =================================================================== M intern/cycles/device/device_cpu.cpp M intern/cycles/device/device_cuda.cpp M intern/cycles/device/device_denoising.cpp M intern/cycles/device/device_denoising.h M intern/cycles/device/device_task.h M intern/cycles/device/opencl/opencl.h M intern/cycles/device/opencl/opencl_base.cpp M intern/cycles/kernel/filter/filter_defines.h M intern/cycles/kernel/filter/filter_features.h M intern/cycles/kernel/filter/filter_features_sse.h M intern/cycles/kernel/filter/filter_nlm_cpu.h M intern/cycles/kernel/filter/filter_nlm_gpu.h M intern/cycles/kernel/filter/filter_reconstruction.h M intern/cycles/kernel/filter/filter_transform.h M intern/cycles/kernel/filter/filter_transform_gpu.h M intern/cycles/kernel/filter/filter_transform_sse.h M intern/cycles/kernel/kernels/cpu/filter_cpu.h M intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h M intern/cycles/kernel/kernels/cuda/filter.cu M intern/cycles/kernel/kernels/opencl/filter.cl =================================================================== diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 6668acc9cbe..93c63b92a55 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -186,15 +186,15 @@ public: KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel; - KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel; + KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, int, float, float)> filter_nlm_calc_difference_kernel; KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel; KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_calc_weight_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, int, int, int)> filter_nlm_update_output_kernel; KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel; - KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel; - KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel; - KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel; + KernelFunctions<void(*)(float*, TileInfo*, int, int, int, float*, int*, int*, int, int, bool, int, float)> filter_construct_transform_kernel; + KernelFunctions<void(*)(int, int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int, bool)> filter_nlm_construct_gramian_kernel; + KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel; KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*, int, int, int, int, int, int, int, int, ccl_global int*, int, @@ -512,7 +512,7 @@ public: difference, local_rect, w, channel_offset, - a, k_2); + 0, a, k_2); filter_nlm_blur_kernel() (difference, blurDifference, local_rect, w, f); filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f); @@ -542,6 +542,7 @@ public: for(int y = 0; y < task->filter_area.w; y++) { for(int x = 0; x < task->filter_area.z; x++) { filter_construct_transform_kernel()((float*) task->buffer.mem.device_pointer, + task->tile_info, x + task->filter_area.x, y + task->filter_area.y, y*task->filter_area.z + x, @@ -549,6 +550,8 @@ public: (int*) task->storage.rank.device_pointer, &task->rect.x, task->buffer.pass_stride, + task->buffer.frame_stride, + task->buffer.use_time, task->radius, task->pca_threshold); } @@ -559,6 +562,7 @@ public: bool denoising_accumulate(device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, + int frame, DenoisingTask *task) { ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT); @@ -568,6 +572,7 @@ public: float *blurDifference = temporary_mem + task->buffer.pass_stride; int r = task->radius; + int frame_offset = frame * task->buffer.frame_stride; for(int i = 0; i < (2*r+1)*(2*r+1); i++) { int dy = i / (2*r+1) - r; int dx = i % (2*r+1) - r; @@ -583,12 +588,14 @@ public: local_rect, task->buffer.stride, task->buffer.pass_stride, + frame_offset, 1.0f, task->nlm_k_2); filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4); filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, task->buffer.stride, 4); filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4); filter_nlm_construct_gramian_kernel()(dx, dy, + task->tile_info->frames[frame], blurDifference, (float*) task->buffer.mem.device_pointer, (float*) task->storage.transform.device_pointer, @@ -599,7 +606,9 @@ public: &task->reconstruction_state.filter_window.x, task->buffer.stride, 4, - task->buffer.pass_stride); + task->buffer.pass_stride, + frame_offset, + task->buffer.use_time); } return true; @@ -787,7 +796,7 @@ public: tile.sample = tile.start_sample + tile.num_samples; denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising); - denoising.functions.accumulate = function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, &denoising); + denoising.functions.accumulate = function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising); denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising); denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index cb7d8bbb224..e21d974ebbe 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1301,6 +1301,7 @@ public: int pass_stride = task->buffer.pass_stride; int num_shifts = (2*r+1)*(2*r+1); int channel_offset = task->nlm_state.is_color? task->buffer.pass_stride : 0; + int frame_offset = 0; if(have_error()) return false; @@ -1327,7 +1328,7 @@ public: CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w*h, num_shifts); - void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &a, &k_2}; + void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &frame_offset, &a, &k_2}; void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; void *update_output_args[] = {&blurDifference, &image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &pass_stride, &channel_offset, &r, &f}; @@ -1367,13 +1368,16 @@ public: task->storage.h); void *args[] = {&task->buffer.mem.device_pointer, + &task->tile_info_mem.device_pointer, &task->storage.transform.device_pointer, &task->storage.rank.device_pointer, &task->filter_area, &task->rect, &task->radius, &task->pca_threshold, - &task->buffer.pass_stride}; + &task->buffer.pass_stride, + &task->buffer.frame_stride, + &task->buffer.use_time}; CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args); cuda_assert(cuCtxSynchronize()); @@ -1383,6 +1387,7 @@ public: bool denoising_accumulate(device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, + int frame, DenoisingTask *task) { if(have_error()) @@ -1398,6 +1403,8 @@ public: int w = task->reconstruction_state.source_w; int h = task->reconstruction_state.source_h; int stride = task->buffer.stride; + int frame_offset = frame * task->buffer.frame_stride; + int t = task->tile_info->frames[frame]; int pass_stride = task->buffer.pass_stride; int num_shifts = (2*r+1)*(2*r+1); @@ -1430,10 +1437,12 @@ public: &w, &h, &stride, &pass_stride, &r, &pass_stride, + &frame_offset, &a, &k_2}; void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; - void *construct_gramian_args[] = {&blurDifference, + void *construct_gramian_args[] = {&t, + &blurDifference, &task->buffer.mem.device_pointer, &task->storage.transform.device_pointer, @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs