Commit: 3830bf4bbc812aea436102e45970f13f417a2174
Author: Lukas Stockner
Date:   Thu Jul 7 00:50:45 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB3830bf4bbc812aea436102e45970f13f417a2174

Cycles: Add experimental alternative SVD-threshold norm based on power iteration

One of the first steps of the algorithm is to apply a truncated SVD to a
matrix formed by the features in order to reduce the dimensionality of the
problem and decorrelate dimensions.

The truncation threshold, according to the paper, is defined by twice the
spectral norm of the matrix that is formed like the first one, but from the
variances of the features.

The reference implementation doesn't compute the spectral norm, but instead
computes the Frobenius norm and multiplies it with sqrt(Rank)/2.
That makes sense since it's guaranteed that the Frobenius norm lies somewhere
between the one and sqrt(Rank) times the spectral norm.

However, it's still an approximation. Therefore, in this commit I've tried
to directly compute the spectral norm since the runtime performance is currently
mainly limited by the per-pixel loops, so a small constant increase shouldn't 
matter too much.
In order to compute it, the code just constructs the Gramian matrix of the 
variance matrix
and computes the square root of its largest eigenvalue, which is found via 
power iteration.

I haven't tested the code too much yet, but it seems that the improvement is 
quite negligible
and not really worth the effort. Still, it might be interesting to tweak it 
further.

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/kernel/kernel_filter.h
A       intern/cycles/kernel/kernel_filter.h.orig
A       intern/cycles/kernel/kernel_filter.h.rej
M       intern/cycles/kernel/kernel_types.h
A       intern/cycles/render/denoising.cpp
A       intern/cycles/render/denoising.h
M       intern/cycles/util/util_math_matrix.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 83ef715..7e5162f 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -345,8 +345,14 @@ public:
                                        }
                                }
 #ifdef WITH_CYCLES_DEBUG_FILTER
-                               for(int i = 0; i < DENOISE_FEATURES; i++)
+                               for(int i = 0; i < DENOISE_FEATURES; i++) {
+                                       
debug_write_pfm(string_printf("debug_%dx%d_mean_%d.pfm", tile.x, tile.y, 
i).c_str(), &storages[0].means[i], tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
+                                       
debug_write_pfm(string_printf("debug_%dx%d_scale_%d.pfm", tile.x, tile.y, 
i).c_str(), &storages[0].scales[i], tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
+                                       
debug_write_pfm(string_printf("debug_%dx%d_singular_%d.pfm", tile.x, tile.y, 
i).c_str(), &storages[0].singular[i], tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
                                        
debug_write_pfm(string_printf("debug_%dx%d_bandwidth_%d.pfm", tile.x, tile.y, 
i).c_str(), &storages[0].bandwidth[i], tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
+                               }
+                               
debug_write_pfm(string_printf("debug_%dx%d_singular_threshold.pfm", tile.x, 
tile.y).c_str(), &storages[0].singular_threshold, tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
+                               
debug_write_pfm(string_printf("debug_%dx%d_feature_matrix_norm.pfm", tile.x, 
tile.y).c_str(), &storages[0].feature_matrix_norm, tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
                                
debug_write_pfm(string_printf("debug_%dx%d_global_bandwidth.pfm", tile.x, 
tile.y).c_str(), &storages[0].global_bandwidth, tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
                                
debug_write_pfm(string_printf("debug_%dx%d_filtered_global_bandwidth.pfm", 
tile.x, tile.y).c_str(), &storages[0].filtered_global_bandwidth, tile.w, 
tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
                                
debug_write_pfm(string_printf("debug_%dx%d_sum_weight.pfm", tile.x, 
tile.y).c_str(), &storages[0].sum_weight, tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
diff --git a/intern/cycles/kernel/kernel_filter.h 
b/intern/cycles/kernel/kernel_filter.h
index b6984a5..00c6509 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -128,7 +128,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals 
*kg, int sample, flo
        storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + 
(x-filter_rect.x);
 
        /* Temporary storage, used in different steps of the algorithm. */
-       float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], 
tempvector[2*DENOISE_FEATURES+1];
+       float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], 
tempvector[4*DENOISE_FEATURES+1];
        float *buffer, features[DENOISE_FEATURES];
 
        /* === Get center pixel color and variance. === */
@@ -187,6 +187,10 @@ ccl_device void 
kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
         * which generally has fewer dimensions. This mainly helps to prevent 
overfitting. */
        float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f;
        math_matrix_zero_lower(feature_matrix, DENOISE_FEATURES);
+#ifdef FULL_EIGENVALUE_NORM
+       float *perturbation_matrix = tempmatrix + 
DENOISE_FEATURES*DENOISE_FEATURES;
+       math_matrix_zero_lower(perturbation_matrix, DENOISE_FEATURES);
+#endif
        FOR_PIXEL_WINDOW {
                filter_get_features(px, py, buffer, sample, features, 
feature_means);
                for(int i = 0; i < FEATURE_PASSES; i++)
@@ -194,14 +198,26 @@ ccl_device void 
kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
                math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 
1.0f);
 
                filter_get_feature_variance(px, py, buffer, sample, features, 
feature_scale);
+#ifdef FULL_EIGENVALUE_NORM
+               math_add_gramian(perturbation_matrix, DENOISE_FEATURES, 
features, 1.0f);
+#else
                for(int i = 0; i < FEATURE_PASSES; i++)
                        feature_matrix_norm += features[i];
+#endif
        } END_FOR_PIXEL_WINDOW
        math_lower_tri_to_full(feature_matrix, DENOISE_FEATURES);
 
        float *feature_transform = &storage->transform[0], *singular = 
tempvector + DENOISE_FEATURES;
        int rank = svd(feature_matrix, feature_transform, singular, 
DENOISE_FEATURES);
+
+#ifdef FULL_EIGENVALUE_NORM
+       float *eigenvector_guess = tempvector + 2*DENOISE_FEATURES;
+       for(int i = 0; i < DENOISE_FEATURES; i++)
+               eigenvector_guess[i] = 1.0f;
+       float singular_threshold = 0.01f + 2.0f * 
sqrtf(math_largest_eigenvalue(perturbation_matrix, DENOISE_FEATURES, 
eigenvector_guess, tempvector + 3*DENOISE_FEATURES));
+#else
        float singular_threshold = 0.01f + 2.0f * (sqrtf(feature_matrix_norm) / 
(sqrtf(rank) * 0.5f));
+#endif
 
        rank = 0;
        for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
@@ -213,6 +229,16 @@ ccl_device void 
kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
                        feature_transform[rank*DENOISE_FEATURES + j] *= 
feature_scale[j];
        }
 
+#ifdef WITH_CYCLES_DEBUG_FILTER
+       storage->feature_matrix_norm = feature_matrix_norm;
+       storage->singular_threshold = singular_threshold;
+       for(int i = 0; i < DENOISE_FEATURES; i++) {
+               storage->means[i] = feature_means[i];
+               storage->scales[i] = feature_scale[i];
+               storage->singular[i] = sqrtf(singular[i]);
+       }
+#endif
+
        /* From here on, the mean of the features will be shifted to the 
central pixel's values. */
        filter_get_features(x, y, center_buffer, sample, feature_means, NULL);
 
diff --git a/intern/cycles/kernel/kernel_filter.h 
b/intern/cycles/kernel/kernel_filter.h.orig
similarity index 93%
copy from intern/cycles/kernel/kernel_filter.h
copy to intern/cycles/kernel/kernel_filter.h.orig
index b6984a5..7209431 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h.orig
@@ -185,8 +185,9 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals 
*kg, int sample, flo
        /* === Generate the feature transformation. ===
         * This transformation maps the DENOISE_FEATURES-dimentional feature 
space to a reduced feature (r-feature) space
         * which generally has fewer dimensions. This mainly helps to prevent 
overfitting. */
-       float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f;
+       float *feature_matrix = tempmatrix, *perturbation_matrix = tempmatrix + 
DENOISE_FEATURES*DENOISE_FEATURES;
        math_matrix_zero_lower(feature_matrix, DENOISE_FEATURES);
+       math_matrix_zero_lower(perturbation_matrix, DENOISE_FEATURES);
        FOR_PIXEL_WINDOW {
                filter_get_features(px, py, buffer, sample, features, 
feature_means);
                for(int i = 0; i < FEATURE_PASSES; i++)
@@ -194,14 +195,26 @@ ccl_device void 
kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
                math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 
1.0f);
 
                filter_get_feature_variance(px, py, buffer, sample, features, 
feature_scale);
-               for(int i = 0; i < FEATURE_PASSES; i++)
-                       feature_matrix_norm += features[i];
+               math_add_gramian(perturbation_matrix, DENOISE_FEATURES, 
features, 1.0f);
        } END_FOR_PIXEL_WINDOW
        math_lower_tri_to_full(feature_matrix, DENOISE_FEATURES);
 
        float *feature_transform = &storage->transform[0], *singular = 
tempvector + DENOISE_FEATURES;
        int rank = svd(feature_matrix, feature_transform, singular, 
DENOISE_FEATURES);
-       float singular_threshold = 0.01f + 2.0f * (sqrtf(feature_matrix_norm) / 
(sqrtf(rank) * 0.5f));
+
+       float *eigenvector_guess = tempvector + DENOISE_FEATURES;
+       for(int i = 0; i < DENOISE_FEATURES; i++)
+               eigenvector_guess[i] = 1.0f;
+       float singular_threshold = 0.01f + 2.0f * 
sqrtf(math_largest_eigenvalue(perturbation_matrix, DENOISE_FEATURES, 
eigenvector_guess, tempvector + 2*DENOISE_FEATURES));
+       if (x%100== 0 && y%100 == 0) {
+               for(int r = 0; r < DENOISE_FEATURES; r++) {
+                       int c;
+                       for(c = 0; c <= r; c++) printf("%f ", (double) 
perturbation_matrix[r*DENOISE_FEATURES+c]);
+                       for(; c < DENOISE_FEATURES; c++) printf("%f ", (double) 
perturbation_matrix[c*DENOISE_FEATURES+r]);
+                       printf("\n");
+               }
+               printf("Singular val: %f\n", (double) (0.5f*(singular_threshold 
- 0.01f)));
+       }
 
        rank = 0;
        for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
@@ -213,6 +226,16 @@ ccl_device void 
kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
                        feature_transform[rank*DENOISE_FEATURES + j] *= 
feature_scale[j];
        }
 
+#ifdef WITH_CYCLES_DEBUG_FILTER
+       storage->feature_matrix_norm = 0.0f;//feature_matrix_norm;
+       storage->singular_threshold = singular_threshold;
+       for(int i = 0; i < DENOISE_FEATURES; i++) {
+               storage->means[i] = feature_means[i];
+               storage->scales[i] = feature_scale[i];
+               storage->singular[i] = sqrtf(singular[i]);
+       }
+#endif
+
        /* From here on, the mean of the features will be shifted to the 
central pixel's values. */
        filter_get_features(x, y, center_buffer, sample, feature_means, NULL);
 
diff --git a/intern/cycles/kernel/kernel_filter.h.rej 
b/intern/cycles/kernel/kernel_filter.h.rej
new file mode 100644
index 0000000..0646560
--- /dev/null
+++ b/intern/cycles/kernel/kernel_filter.h.rej
@@ -0,0 +1,43 @@
+--- intern/cycles/kernel/kernel_filter.h
++++ intern/cycles/kernel/kernel_filter.h
+@@ -185,9 +185,8 @@
+        /* === Generate the feature transformation. ===
+         * This transformation maps the DENOISE_FEATURES-dimentional feature 
space to a reduced feature (r-feature) space
+         * which generally has fewer dimensions. This mainly helps to prevent 
overfitting. */
+-       float *feature_matrix = tempmatrix, *perturbation_matrix = tempmatrix 
+ DENOISE_FEATURES*DENOISE_FEATURES;
++       float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f;
+        math_matrix_zero_lower(feature_matrix, DENOISE_FEATURES);
+-       math_matrix_zero_lower(perturbation_matrix, DENOISE_FEATURES);
+        FOR_PIXEL_WINDOW {
+                filter_get_features(px, py, buffer, sample, features, 
feature_means);
+                for(int i = 0; i < FEATURE_PASSES; i++)
+@@ -195,26 +194,14 @@
+                math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 
1.0f);
+ 
+                filter_get_feature_variance(px, py, buffer, sample, features, 
feature_scale);
+-               math_add_gramian(perturbation_matrix, DENOISE_FEATURES, 
features, 1.0f);
++               for(int i = 0; i < FEATURE_PASSES; i++)
++                       feature_matrix_norm += features[i];


@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to