Your message dated Mon, 03 Feb 2020 20:45:11 +0000
with message-id <[email protected]>
and subject line Bug#832095: fixed in zita-resampler 1.6.2-1
has caused the Debian Bug report #832095,
regarding patch for SSE-optimizing resampling of stereo signals
to be marked as done.

This means that you claim that the problem has been dealt with.
If this is not the case it is now your responsibility to reopen the
Bug report if necessary, and/or fix the problem forthwith.

(NB: If you are a system administrator and have no idea what this
message is talking about, this may indicate a serious mail system
misconfiguration somewhere. Please contact [email protected]
immediately.)


-- 
832095: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=832095
Debian Bug Tracking System
Contact [email protected] with problems
--- Begin Message ---
Package: libzita-resampler1
Version: 1.3.0-2
Severity: wishlist
Tags: upstream patch

Hi,

Please find attached a patch for SSE-optimizing resampling of stereo signals;
it makes this more or less three times as fast on Intel systems, without
sacrificing any quality.

I talked to upstream about this patch back in the day, and he seemed happy to
accept it, but somehow just stopped answering -- I guess he got busy with other
things in life. It would be nice if we could get it into Debian nevertheless
(I want it for reducing CPU used in my realtime video mixer).

It is taken to be by Steinar H. Gunderson <[email protected]> (ie., work hat
from my ex-job), and licensed under the same terms as zita-resampler itself
(ie., GPLv3+).

-- System Information:
Debian Release: stretch/sid
  APT prefers unstable
  APT policy: (500, 'unstable'), (500, 'testing'), (1, 'experimental')
Architecture: amd64 (x86_64)
Foreign Architectures: i386, armhf

Kernel: Linux 4.7.0-rc7 (SMP w/4 CPU cores)
Locale: LANG=nb_NO.utf8, LC_CTYPE=nb_NO.utf8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)

Versions of packages libzita-resampler1 depends on:
ii  libc6              2.23-2
ii  libgcc1            1:6.1.1-9
ii  libstdc++6         6.1.1-9
ii  multiarch-support  2.23-2

libzita-resampler1 recommends no packages.

libzita-resampler1 suggests no packages.

-- no debconf information
diff -ur orig/zita-resampler-1.3.0/libs/resampler.cc zita-resampler-1.3.0/libs/resampler.cc
--- orig/zita-resampler-1.3.0/libs/resampler.cc	2012-10-26 22:58:55.000000000 +0200
+++ zita-resampler-1.3.0/libs/resampler.cc	2015-11-15 12:27:42.764591015 +0100
@@ -24,6 +24,10 @@
 #include <math.h>
 #include <zita-resampler/resampler.h>
 
+#ifdef __SSE2__
+#include <xmmintrin.h>
+#endif
+
 
 static unsigned int gcd (unsigned int a, unsigned int b)
 {
@@ -47,6 +51,45 @@
     return 1; 
 }
 
+#ifdef __SSE2__
+
+static inline void calc_stereo_sample_sse (unsigned int hl,
+                                           float *c1,
+                                           float *c2,
+                                           float *q1,
+                                           float *q2,
+                                           float *out_data)
+{
+    unsigned int   i;
+    __m128         denorm, s, w1, w2;
+
+    denorm = _mm_set1_ps (1e-20f);
+    s = denorm;
+    for (i = 0; i < hl; i += 4)
+    {
+	q2 -= 8;
+
+	// s += *q1 * c1 [i];
+	w1 = _mm_loadu_ps (&c1 [i]);
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1),     _mm_unpacklo_ps (w1, w1)));
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + 4), _mm_unpackhi_ps (w1, w1)));
+
+	// s += *q2 * c2 [i];
+	w2 = _mm_loadu_ps (&c2 [i]);
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + 4), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (0, 0, 1, 1))));
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2),     _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (2, 2, 3, 3))));
+
+	q1 += 8;
+    }
+    s = _mm_sub_ps (s, denorm);
+    s = _mm_add_ps (s, _mm_shuffle_ps (s, s, _MM_SHUFFLE (1, 0, 3, 2)));
+
+    // Writes two bytes more than we want, but this is fine since out_count >= 2.
+    _mm_storeu_ps (out_data, s);
+}
+
+#endif
+
 
 Resampler::Resampler (void) :
     _table (0),
@@ -213,18 +256,28 @@
 		{
 		    float *c1 = _table->_ctab + hl * ph;
 		    float *c2 = _table->_ctab + hl * (np - ph);
-		    for (c = 0; c < _nchan; c++)
+#ifdef __SSE2__
+		    if ((hl % 4) == 0 && _nchan == 2 && out_count >= 2)
 		    {
-			float *q1 = p1 + c;
-			float *q2 = p2 + c;
-			float s = 1e-20f;
-			for (i = 0; i < hl; i++)
+			calc_stereo_sample_sse (hl, c1, c2, p1, p2, out_data);
+			out_data += 2;
+		    }
+		    else
+#endif
+                    {
+			for (c = 0; c < _nchan; c++)
 			{
-			    q2 -= _nchan;
-			    s += *q1 * c1 [i] + *q2 * c2 [i];
-			    q1 += _nchan;
+			    float *q1 = p1 + c;
+			    float *q2 = p2 + c;
+			    float s = 1e-20f;
+			    for (i = 0; i < hl; i++)
+			    {
+				q2 -= _nchan;
+				s += *q1 * c1 [i] + *q2 * c2 [i];
+				q1 += _nchan;
+			    }
+			    *out_data++ = s - 1e-20f;
 			}
-			*out_data++ = s - 1e-20f;
 		    }
 		}
 		else
@@ -260,4 +313,3 @@
     return 0;
 }
 
-
diff -ur orig/zita-resampler-1.3.0/libs/vresampler.cc zita-resampler-1.3.0/libs/vresampler.cc
--- orig/zita-resampler-1.3.0/libs/vresampler.cc	2012-10-26 22:58:55.000000000 +0200
+++ zita-resampler-1.3.0/libs/vresampler.cc	2015-11-15 12:27:58.424544882 +0100
@@ -25,6 +25,58 @@
 #include <zita-resampler/vresampler.h>
 
 
+#ifdef __SSE2__
+
+#include <xmmintrin.h>
+
+static inline void calc_stereo_sample_sse (int hl,
+                                           float b,
+                                           float *p1,
+                                           float *p2,
+                                           float *q1,
+                                           float *q2,
+                                           float *out_data)
+{
+    int            i;
+    __m128         denorm, bs, s, c1, c2, w1, w2;
+
+    denorm = _mm_set1_ps (1e-25f);
+    bs = _mm_set1_ps (b);
+    s = denorm;
+    for (i = 0; i < hl; i += 4)
+    {
+	p2 -= 8;
+
+	// _c1 [i] = q1 [i] + b * (q1 [i + hl] - q1 [i]);
+	w1 = _mm_loadu_ps (&q1 [i]);
+	w2 = _mm_loadu_ps (&q1 [i + hl]);
+	c1 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
+
+	// _c2 [i] = q2 [i] + b * (q2 [i - hl] - q2 [i]);
+	w1 = _mm_loadu_ps (&q2 [i]);
+	w2 = _mm_loadu_ps (&q2 [i - hl]);
+	c2 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
+
+	// s += *p1 * _c1 [i];
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1),     _mm_unpacklo_ps (c1, c1)));
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + 4), _mm_unpackhi_ps (c1, c1)));
+
+	// s += *p2 * _c2 [i];
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + 4), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (0, 0, 1, 1))));
+	s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2),     _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (2, 2, 3, 3))));
+
+	p1 += 8;
+    }
+    s = _mm_sub_ps (s, denorm);
+    s = _mm_add_ps (s, _mm_shuffle_ps (s, s, _MM_SHUFFLE (1, 0, 3, 2)));
+
+    // Writes two bytes more than we want, but this is fine since out_count >= 2.
+    _mm_storeu_ps (out_data, s);
+}
+
+#endif
+
+
 VResampler::VResampler (void) :
     _table (0),
     _nchan (0),
@@ -212,23 +264,33 @@
 		    a = 1.0f - b;
 		    q1 = _table->_ctab + hl * k;
 		    q2 = _table->_ctab + hl * (np - k);
-     		    for (i = 0; i < hl; i++)
+#ifdef __SSE2__
+		    if ((hl % 4) == 0 && _nchan == 2 && out_count >= 2)
 		    {
-                        _c1 [i] = a * q1 [i] + b * q1 [i + hl];
-    		        _c2 [i] = a * q2 [i] + b * q2 [i - hl];
+			calc_stereo_sample_sse (hl, b, p1, p2, q1, q2, out_data);
+			out_data += 2;
 		    }
-		    for (c = 0; c < _nchan; c++)
+		    else
+#endif
 		    {
-			q1 = p1 + c;
-			q2 = p2 + c;
-			a = 1e-25f;
-			for (i = 0; i < hl; i++)
-			{
-			    q2 -= _nchan;
-			    a += *q1 * _c1 [i] + *q2 * _c2 [i];
-			    q1 += _nchan;
-			}
-			*out_data++ = a - 1e-25f;
+		        for (i = 0; i < hl; i++)
+		        {
+		            _c1 [i] = a * q1 [i] + b * q1 [i + hl];
+		            _c2 [i] = a * q2 [i] + b * q2 [i - hl];
+		        }
+		        for (c = 0; c < _nchan; c++)
+		        {
+		            q1 = p1 + c;
+		            q2 = p2 + c;
+		            a = 1e-25f;
+		            for (i = 0; i < hl; i++)
+		            {
+		                q2 -= _nchan;
+		                a += *q1 * _c1 [i] + *q2 * _c2 [i];
+		                q1 += _nchan;
+		            }
+		            *out_data++ = a - 1e-25f;
+		        }
 		    }
 		}
 		else

--- End Message ---
--- Begin Message ---
Source: zita-resampler
Source-Version: 1.6.2-1

We believe that the bug you reported is fixed in the latest version of
zita-resampler, which is due to be installed in the Debian FTP archive.

A summary of the changes between this version and the previous one is
attached.

Thank you for reporting the bug, which will now be closed.  If you
have further comments please address them to [email protected],
and the maintainer will reopen the bug report if appropriate.

Debian distribution maintenance software
pp.
Dennis Braun <[email protected]> (supplier of updated zita-resampler package)

(This message was generated automatically at their request; if you
believe that there is a problem with it please contact the archive
administrators by mailing [email protected])


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512

Format: 1.8
Date: Mon, 03 Feb 2020 20:45:07 +0100
Source: zita-resampler
Architecture: source
Version: 1.6.2-1
Distribution: unstable
Urgency: medium
Maintainer: Debian Multimedia Maintainers <[email protected]>
Changed-By: Dennis Braun <[email protected]>
Closes: 832095
Changes:
 zita-resampler (1.6.2-1) unstable; urgency=medium
 .
   * Team upload
 .
   [ Felipe Sateler ]
   * d/control: change maintainer address to [email protected]
 .
   [ Ondřej Nový ]
   * d/control: Set Vcs-* to salsa.debian.org
   * d/copyright: Use https protocol in Format field
 .
   [ Dennis Braun ]
   * New upstream version 1.6.2
   * d/control:
     + Use debhelper-compat instead of debian/compat
     + Bump dh-compat to 12
     + Bump Standards-Version to 4.5.0
     + Set RRR: no
     + Use https protocol for homepage
     + Update dithering URL in description
     + Set Multi-Arch: foreign for libzita-resampler-doc
   * Drop d/*.1 and install the ones from upstream
   * d/copyright: Update year, http > https & add myself
   * Fix libzita-resampler-doc installation
   * d/patches: Update patchset
   * d/rules:
     + Add more exports to make the build work
     + Update installation
     + Replace libs/ with source/
     + Add Steinar's patch to enable SSE-optimizing resampling of stereo
       signals (Closes: #832095)
   * d/source/local-options: Drop, obsolete
   * d/watch: Use https protocol
Checksums-Sha1:
 9cce2904e21487e7c97778cba1c6b09d706e7f5f 2270 zita-resampler_1.6.2-1.dsc
 34a65adeb51d266094c00541e4dd7bc6bb032e36 125539 
zita-resampler_1.6.2.orig.tar.bz2
 a2b8a1b8e98a83daadfb0229c8c97dcb49b86b8d 6776 
zita-resampler_1.6.2-1.debian.tar.xz
Checksums-Sha256:
 483e40b9113c5fd35ad440b81c222d3675ea00add711f184091d178856542782 2270 
zita-resampler_1.6.2-1.dsc
 233baefee297094514bfc9063e47f848e8138dc7c959d9cd957b36019b98c5d7 125539 
zita-resampler_1.6.2.orig.tar.bz2
 d965a06565142bd9815f248ad80206a8b5e5c3f67c36202dec893839ee39ff2e 6776 
zita-resampler_1.6.2-1.debian.tar.xz
Files:
 a6ef3e295dd9bdf266940e6d67b85670 2270 sound optional zita-resampler_1.6.2-1.dsc
 9b2cff7fa419febbca3a13435b2a24b3 125539 sound optional 
zita-resampler_1.6.2.orig.tar.bz2
 0f01365270c2f34ad182cc497d7f9539 6776 sound optional 
zita-resampler_1.6.2-1.debian.tar.xz

-----BEGIN PGP SIGNATURE-----

iQIzBAEBCgAdFiEE94y6B4F7sUmhHTOQafL8UW6nGZMFAl44edAACgkQafL8UW6n
GZPGVRAAk5Y9H7vtLW2FwgPaAm03oNda+7HVLuxfU4xKVHNMfRhoODNEbe0H+JSo
sVIZE45MkAJ8uFxJbyeGpaTXXARmG2JFNgRs0TKGF0He42Oa9Hygwi1OkcvsFS4D
BeQs3OI6ZWTYTdtkus8i7AUgNUSo6hrwqckMk/SM7bRlQLPL2Qhq2QpIWewnxGx+
/9ndRu58mn3eKr+zm3e+ytid1I9w2L4DWuH9Tip/60qnT3xOIgXq5PgP2JG3W14X
M+/sG8Wh/m01v72HIkQbQ7CrH8wPELDV6MarcV6XIBgcWHrS8XQjmt//ys5hK75G
jjT/OIVD+kRH3o1TqRT4j12xgKwbsyLyTmaXu9GnIOc8nLBG6HDJge63LzxR44Qs
Z57Hu8/wj9Iijac8RlTv1KJDfLZnGs+OscGx4x0cfAUUHpWU0YwnL6KoeQ6ZhGwb
pbCli1Feb8+N5Z3KXYtqIjucxpGHw+x5Gtw4qgXAQGiRDOpLWKlxmhMdMQAU2dIE
ivNDQ8W/ZniHp0KGsYczSLmqMaeCUWer5gItlAop2BbHB0X2iRJ/L5+T4fs2x7zB
ix7rEjsyuqBdpseNIJfBVQJUbZzyfiV1IWrt0f0phGifzUGViMhN/59XW9a/tuIx
45KyROuYu5mJsC+9Gl4E0CBULSUCdAB9nmHQUvv/G2Kyx0+Mt7g=
=ti1q
-----END PGP SIGNATURE-----

--- End Message ---

Reply via email to