Commit: 3830bf4bbc812aea436102e45970f13f417a2174 Author: Lukas Stockner Date: Thu Jul 7 00:50:45 2016 +0200 Branches: soc-2016-cycles_denoising https://developer.blender.org/rB3830bf4bbc812aea436102e45970f13f417a2174
Cycles: Add experimental alternative SVD-threshold norm based on power iteration One of the first steps of the algorithm is to apply a truncated SVD to a matrix formed by the features in order to reduce the dimensionality of the problem and decorrelate dimensions. The truncation threshold, according to the paper, is defined by twice the spectral norm of the matrix that is formed like the first one, but from the variances of the features. The reference implementation doesn't compute the spectral norm, but instead computes the Frobenius norm and multiplies it with sqrt(Rank)/2. That makes sense since it's guaranteed that the Frobenius norm lies somewhere between the one and sqrt(Rank) times the spectral norm. However, it's still an approximation. Therefore, in this commit I've tried to directly compute the spectral norm since the runtime performance is currently mainly limited by the per-pixel loops, so a small constant increase shouldn't matter too much. In order to compute it, the code just constructs the Gramian matrix of the variance matrix and computes the square root of its largest eigenvalue, which is found via power iteration. I haven't tested the code too much yet, but it seems that the improvement is quite negligible and not really worth the effort. Still, it might be interesting to tweak it further. =================================================================== M intern/cycles/device/device_cpu.cpp M intern/cycles/kernel/kernel_filter.h A intern/cycles/kernel/kernel_filter.h.orig A intern/cycles/kernel/kernel_filter.h.rej M intern/cycles/kernel/kernel_types.h A intern/cycles/render/denoising.cpp A intern/cycles/render/denoising.h M intern/cycles/util/util_math_matrix.h =================================================================== diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 83ef715..7e5162f 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -345,8 +345,14 @@ public: } } #ifdef WITH_CYCLES_DEBUG_FILTER - for(int i = 0; i < DENOISE_FEATURES; i++) + for(int i = 0; i < DENOISE_FEATURES; i++) { + debug_write_pfm(string_printf("debug_%dx%d_mean_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].means[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); + debug_write_pfm(string_printf("debug_%dx%d_scale_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].scales[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); + debug_write_pfm(string_printf("debug_%dx%d_singular_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].singular[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); debug_write_pfm(string_printf("debug_%dx%d_bandwidth_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].bandwidth[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); + } + debug_write_pfm(string_printf("debug_%dx%d_singular_threshold.pfm", tile.x, tile.y).c_str(), &storages[0].singular_threshold, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); + debug_write_pfm(string_printf("debug_%dx%d_feature_matrix_norm.pfm", tile.x, tile.y).c_str(), &storages[0].feature_matrix_norm, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); debug_write_pfm(string_printf("debug_%dx%d_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); debug_write_pfm(string_printf("debug_%dx%d_filtered_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].filtered_global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); debug_write_pfm(string_printf("debug_%dx%d_sum_weight.pfm", tile.x, tile.y).c_str(), &storages[0].sum_weight, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w); diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h index b6984a5..00c6509 100644 --- a/intern/cycles/kernel/kernel_filter.h +++ b/intern/cycles/kernel/kernel_filter.h @@ -128,7 +128,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + (x-filter_rect.x); /* Temporary storage, used in different steps of the algorithm. */ - float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], tempvector[2*DENOISE_FEATURES+1]; + float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], tempvector[4*DENOISE_FEATURES+1]; float *buffer, features[DENOISE_FEATURES]; /* === Get center pixel color and variance. === */ @@ -187,6 +187,10 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f; math_matrix_zero_lower(feature_matrix, DENOISE_FEATURES); +#ifdef FULL_EIGENVALUE_NORM + float *perturbation_matrix = tempmatrix + DENOISE_FEATURES*DENOISE_FEATURES; + math_matrix_zero_lower(perturbation_matrix, DENOISE_FEATURES); +#endif FOR_PIXEL_WINDOW { filter_get_features(px, py, buffer, sample, features, feature_means); for(int i = 0; i < FEATURE_PASSES; i++) @@ -194,14 +198,26 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f); filter_get_feature_variance(px, py, buffer, sample, features, feature_scale); +#ifdef FULL_EIGENVALUE_NORM + math_add_gramian(perturbation_matrix, DENOISE_FEATURES, features, 1.0f); +#else for(int i = 0; i < FEATURE_PASSES; i++) feature_matrix_norm += features[i]; +#endif } END_FOR_PIXEL_WINDOW math_lower_tri_to_full(feature_matrix, DENOISE_FEATURES); float *feature_transform = &storage->transform[0], *singular = tempvector + DENOISE_FEATURES; int rank = svd(feature_matrix, feature_transform, singular, DENOISE_FEATURES); + +#ifdef FULL_EIGENVALUE_NORM + float *eigenvector_guess = tempvector + 2*DENOISE_FEATURES; + for(int i = 0; i < DENOISE_FEATURES; i++) + eigenvector_guess[i] = 1.0f; + float singular_threshold = 0.01f + 2.0f * sqrtf(math_largest_eigenvalue(perturbation_matrix, DENOISE_FEATURES, eigenvector_guess, tempvector + 3*DENOISE_FEATURES)); +#else float singular_threshold = 0.01f + 2.0f * (sqrtf(feature_matrix_norm) / (sqrtf(rank) * 0.5f)); +#endif rank = 0; for(int i = 0; i < DENOISE_FEATURES; i++, rank++) { @@ -213,6 +229,16 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo feature_transform[rank*DENOISE_FEATURES + j] *= feature_scale[j]; } +#ifdef WITH_CYCLES_DEBUG_FILTER + storage->feature_matrix_norm = feature_matrix_norm; + storage->singular_threshold = singular_threshold; + for(int i = 0; i < DENOISE_FEATURES; i++) { + storage->means[i] = feature_means[i]; + storage->scales[i] = feature_scale[i]; + storage->singular[i] = sqrtf(singular[i]); + } +#endif + /* From here on, the mean of the features will be shifted to the central pixel's values. */ filter_get_features(x, y, center_buffer, sample, feature_means, NULL); diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h.orig similarity index 93% copy from intern/cycles/kernel/kernel_filter.h copy to intern/cycles/kernel/kernel_filter.h.orig index b6984a5..7209431 100644 --- a/intern/cycles/kernel/kernel_filter.h +++ b/intern/cycles/kernel/kernel_filter.h.orig @@ -185,8 +185,9 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo /* === Generate the feature transformation. === * This transformation maps the DENOISE_FEATURES-dimentional feature space to a reduced feature (r-feature) space * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ - float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f; + float *feature_matrix = tempmatrix, *perturbation_matrix = tempmatrix + DENOISE_FEATURES*DENOISE_FEATURES; math_matrix_zero_lower(feature_matrix, DENOISE_FEATURES); + math_matrix_zero_lower(perturbation_matrix, DENOISE_FEATURES); FOR_PIXEL_WINDOW { filter_get_features(px, py, buffer, sample, features, feature_means); for(int i = 0; i < FEATURE_PASSES; i++) @@ -194,14 +195,26 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f); filter_get_feature_variance(px, py, buffer, sample, features, feature_scale); - for(int i = 0; i < FEATURE_PASSES; i++) - feature_matrix_norm += features[i]; + math_add_gramian(perturbation_matrix, DENOISE_FEATURES, features, 1.0f); } END_FOR_PIXEL_WINDOW math_lower_tri_to_full(feature_matrix, DENOISE_FEATURES); float *feature_transform = &storage->transform[0], *singular = tempvector + DENOISE_FEATURES; int rank = svd(feature_matrix, feature_transform, singular, DENOISE_FEATURES); - float singular_threshold = 0.01f + 2.0f * (sqrtf(feature_matrix_norm) / (sqrtf(rank) * 0.5f)); + + float *eigenvector_guess = tempvector + DENOISE_FEATURES; + for(int i = 0; i < DENOISE_FEATURES; i++) + eigenvector_guess[i] = 1.0f; + float singular_threshold = 0.01f + 2.0f * sqrtf(math_largest_eigenvalue(perturbation_matrix, DENOISE_FEATURES, eigenvector_guess, tempvector + 2*DENOISE_FEATURES)); + if (x%100== 0 && y%100 == 0) { + for(int r = 0; r < DENOISE_FEATURES; r++) { + int c; + for(c = 0; c <= r; c++) printf("%f ", (double) perturbation_matrix[r*DENOISE_FEATURES+c]); + for(; c < DENOISE_FEATURES; c++) printf("%f ", (double) perturbation_matrix[c*DENOISE_FEATURES+r]); + printf("\n"); + } + printf("Singular val: %f\n", (double) (0.5f*(singular_threshold - 0.01f))); + } rank = 0; for(int i = 0; i < DENOISE_FEATURES; i++, rank++) { @@ -213,6 +226,16 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo feature_transform[rank*DENOISE_FEATURES + j] *= feature_scale[j]; } +#ifdef WITH_CYCLES_DEBUG_FILTER + storage->feature_matrix_norm = 0.0f;//feature_matrix_norm; + storage->singular_threshold = singular_threshold; + for(int i = 0; i < DENOISE_FEATURES; i++) { + storage->means[i] = feature_means[i]; + storage->scales[i] = feature_scale[i]; + storage->singular[i] = sqrtf(singular[i]); + } +#endif + /* From here on, the mean of the features will be shifted to the central pixel's values. */ filter_get_features(x, y, center_buffer, sample, feature_means, NULL); diff --git a/intern/cycles/kernel/kernel_filter.h.rej b/intern/cycles/kernel/kernel_filter.h.rej new file mode 100644 index 0000000..0646560 --- /dev/null +++ b/intern/cycles/kernel/kernel_filter.h.rej @@ -0,0 +1,43 @@ +--- intern/cycles/kernel/kernel_filter.h ++++ intern/cycles/kernel/kernel_filter.h +@@ -185,9 +185,8 @@ + /* === Generate the feature transformation. === + * This transformation maps the DENOISE_FEATURES-dimentional feature space to a reduced feature (r-feature) space + * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ +- float *feature_matrix = tempmatrix, *perturbation_matrix = tempmatrix + DENOISE_FEATURES*DENOISE_FEATURES; ++ float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f; + math_matrix_zero_lower(feature_matrix, DENOISE_FEATURES); +- math_matrix_zero_lower(perturbation_matrix, DENOISE_FEATURES); + FOR_PIXEL_WINDOW { + filter_get_features(px, py, buffer, sample, features, feature_means); + for(int i = 0; i < FEATURE_PASSES; i++) +@@ -195,26 +194,14 @@ + math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f); + + filter_get_feature_variance(px, py, buffer, sample, features, feature_scale); +- math_add_gramian(perturbation_matrix, DENOISE_FEATURES, features, 1.0f); ++ for(int i = 0; i < FEATURE_PASSES; i++) ++ feature_matrix_norm += features[i]; @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs