Commit: 005d83a0bb076d280837f16f9bc435c4d6b4b1de Author: Lukas Stockner Date: Thu Jul 7 20:12:35 2016 +0200 Branches: soc-2016-cycles_denoising https://developer.blender.org/rB005d83a0bb076d280837f16f9bc435c4d6b4b1de
Cycles: Add additional debugging output containing the estimated rMSE reduction per sample =================================================================== M intern/cycles/device/device_cpu.cpp M intern/cycles/device/device_cuda.cpp M intern/cycles/kernel/kernel_filter.h M intern/cycles/kernel/kernel_passes.h M intern/cycles/kernel/kernel_types.h M intern/cycles/util/util_math_matrix.h =================================================================== diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c296c17..1f7e750 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -345,17 +345,21 @@ public: } } #ifdef WITH_CYCLES_DEBUG_FILTER +#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, name).c_str(), &storages[0].var, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); for(int i = 0; i < DENOISE_FEATURES; i++) { - debug_write_pfm(string_printf("debug_%dx%d_mean_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].means[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); - debug_write_pfm(string_printf("debug_%dx%d_scale_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].scales[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); - debug_write_pfm(string_printf("debug_%dx%d_singular_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].singular[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); - debug_write_pfm(string_printf("debug_%dx%d_bandwidth_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].bandwidth[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); + WRITE_DEBUG(string_printf("mean_%d.pfm", i).c_str(), means[i]); + WRITE_DEBUG(string_printf("scale_%d.pfm", i).c_str(), scales[i]); + WRITE_DEBUG(string_printf("singular_%d.pfm", i).c_str(), singular[i]); + WRITE_DEBUG(string_printf("bandwidth_%d.pfm", i).c_str(), bandwidth[i]); } - debug_write_pfm(string_printf("debug_%dx%d_singular_threshold.pfm", tile.x, tile.y).c_str(), &storages[0].singular_threshold, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); - debug_write_pfm(string_printf("debug_%dx%d_feature_matrix_norm.pfm", tile.x, tile.y).c_str(), &storages[0].feature_matrix_norm, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); - debug_write_pfm(string_printf("debug_%dx%d_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); - debug_write_pfm(string_printf("debug_%dx%d_filtered_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].filtered_global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); - debug_write_pfm(string_printf("debug_%dx%d_sum_weight.pfm", tile.x, tile.y).c_str(), &storages[0].sum_weight, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); + WRITE_DEBUG("singular_threshold", singular_threshold); + WRITE_DEBUG("singular_threshold.pfm", singular_threshold); + WRITE_DEBUG("feature_matrix_norm.pfm", feature_matrix_norm); + WRITE_DEBUG("global_bandwidth.pfm", global_bandwidth); + WRITE_DEBUG("filtered_global_bandwidth.pfm", filtered_global_bandwidth); + WRITE_DEBUG("sum_weight.pfm", sum_weight); + WRITE_DEBUG("log_rmse_per_sample.pfm", log_rmse_per_sample); +#undef WRITE_DEBUG #endif tile.sample = sample; } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 912465b..c0bf5c1 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -831,13 +831,22 @@ public: #ifdef WITH_CYCLES_DEBUG_FILTER FilterStorage *host_storage = new FilterStorage[filter_w*filter_h]; cuda_assert(cuMemcpyDtoH(host_storage, d_storage, sizeof(FilterStorage)*filter_w*filter_h)); - std::string prefix = string_printf("debug_%dx%d_cuda", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan); - for(int i = 0; i < DENOISE_FEATURES; i++) - debug_write_pfm(string_printf("%s_bandwidth_%d.pfm", prefix.c_str(), i).c_str(), &host_storage[0].bandwidth[i], filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w); - debug_write_pfm(string_printf("%s_global_bandwidth.pfm", prefix.c_str()).c_str(), &host_storage[0].global_bandwidth, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w); - debug_write_pfm(string_printf("%s_filtered_global_bandwidth.pfm", prefix.c_str()).c_str(), &host_storage[0].filtered_global_bandwidth, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w); - debug_write_pfm(string_printf("%s_sum_weight.pfm", prefix.c_str()).c_str(), &host_storage[0].sum_weight, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w); +#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_cuda_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), &host_storage[0].var, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w); + for(int i = 0; i < DENOISE_FEATURES; i++) { + WRITE_DEBUG(string_printf("mean_%d.pfm", i).c_str(), means[i]); + WRITE_DEBUG(string_printf("scale_%d.pfm", i).c_str(), scales[i]); + WRITE_DEBUG(string_printf("singular_%d.pfm", i).c_str(), singular[i]); + WRITE_DEBUG(string_printf("bandwidth_%d.pfm", i).c_str(), bandwidth[i]); + } + WRITE_DEBUG("singular_threshold", singular_threshold); + WRITE_DEBUG("singular_threshold.pfm", singular_threshold); + WRITE_DEBUG("feature_matrix_norm.pfm", feature_matrix_norm); + WRITE_DEBUG("global_bandwidth.pfm", global_bandwidth); + WRITE_DEBUG("filtered_global_bandwidth.pfm", filtered_global_bandwidth); + WRITE_DEBUG("sum_weight.pfm", sum_weight); + WRITE_DEBUG("log_rmse_per_sample.pfm", log_rmse_per_sample); delete[] host_storage; +#undef WRITE_DEBUG #endif cuda_assert(cuMemFree(d_storage)); diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h index 00c6509..ad3d256 100644 --- a/intern/cycles/kernel/kernel_filter.h +++ b/intern/cycles/kernel/kernel_filter.h @@ -363,12 +363,21 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo /* === Estimate optimal global bandwidth. === */ - double bias_coef = math_lsq_solve(lsq_bias); - double variance_coef = math_lsq_solve(lsq_variance); + double bias_coef = math_lsq_solve(lsq_bias, NULL); + double variance_zeroth; + double variance_coef = math_lsq_solve(lsq_variance, &variance_zeroth); + if(variance_coef < 0.0) { + variance_coef = -variance_coef; + variance_zeroth = 0.0; + } float optimal_bw = (float) pow((rank * variance_coef) / (4.0 * bias_coef*bias_coef * sample), 1.0 / (rank + 4)); - - +#ifdef WITH_CYCLES_DEBUG_FILTER + double h2 = ((double) optimal_bw) * ((double) optimal_bw); + double bias = bias_coef*h2; + double variance = (variance_zeroth + variance_coef*pow(optimal_bw, -rank)) / sample; + storage->log_rmse_per_sample = ( (float) log(max(bias*bias + variance, 1e-20)) - 4.0f*logf(sample)/(rank + 4) ); +#endif /* === Store the calculated data for the second kernel. === */ storage->rank = rank; @@ -504,6 +513,10 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float ** center_buffer[0] = final_color.x; center_buffer[1] = final_color.y; center_buffer[2] = final_color.z; + +#ifdef WITH_CYCLES_DEBUG_FILTER + storage->log_rmse_per_sample -= 2.0f * logf(linear_rgb_to_gray(final_color) + 0.001f); +#endif } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index b80bdbc..18db922 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -161,7 +161,7 @@ ccl_device_inline void kernel_write_denoising_passes(KernelGlobals *kg, ccl_glob else { kernel_write_pass_float3_var(buffer, sample, make_float3(0.0f, 0.0f, 0.0f)); kernel_write_pass_float3_var(buffer + 6, sample, world_albedo); - kernel_write_pass_float_var(buffer + 12, sample, 1e10f); + kernel_write_pass_float_var(buffer + 12, sample, 0.0f); } state->flag |= PATH_RAY_DENOISING_PASS_DONE; diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 15dbcae..7ad03d8 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -1286,6 +1286,7 @@ typedef struct FilterStorage { float sum_weight; float means[DENOISE_FEATURES], scales[DENOISE_FEATURES], singular[DENOISE_FEATURES]; float singular_threshold, feature_matrix_norm; + float log_rmse_per_sample; #endif } FilterStorage; diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h index cb426dc..ba81e9f 100644 --- a/intern/cycles/util/util_math_matrix.h +++ b/intern/cycles/util/util_math_matrix.h @@ -252,9 +252,11 @@ ccl_device_inline void math_lsq_add(double *lsq, double x, double y) } /* Returns the first-order coefficient a of the fitted function. */ -ccl_device_inline double math_lsq_solve(double *lsq) +ccl_device_inline double math_lsq_solve(double *lsq, double *zeroth) { double inv_det = 1.0 / (lsq[0]*lsq[2] - lsq[1]*lsq[1] + 1e-4); + if(zeroth) + *zeroth = (lsq[2]*lsq[3] - lsq[1]*lsq[3]) * inv_det; return (lsq[0]*lsq[4] - lsq[1]*lsq[3]) * inv_det; } _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs