On Thu, Jun 22, 2017 at 12:53:52PM -0300, James Almer wrote:
> On 6/22/2017 9:56 AM, Matthieu Bouron wrote:
> > ---
> > 
> > The following patchset applies on top of Clément's aacpsdsp patchset.
> > 
> > ---
> >  tests/checkasm/Makefile   |   3 +-
> >  tests/checkasm/checkasm.c |   1 +
> >  tests/checkasm/checkasm.h |   1 +
> >  tests/checkasm/sbrdsp.c   | 297 
> > ++++++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 301 insertions(+), 1 deletion(-)
> >  create mode 100644 tests/checkasm/sbrdsp.c
> > 
> > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> > index 638e811931..60e80ab738 100644
> > --- a/tests/checkasm/Makefile
> > +++ b/tests/checkasm/Makefile
> > @@ -13,7 +13,8 @@ AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
> >  AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
> >  
> >  # decoders/encoders
> > -AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o
> > +AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o \
> > +                                           sbrdsp.o
> >  AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
> >  AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
> >  AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
> > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> > index e66744b162..29f201b1b3 100644
> > --- a/tests/checkasm/checkasm.c
> > +++ b/tests/checkasm/checkasm.c
> > @@ -67,6 +67,7 @@ static const struct {
> >  #if CONFIG_AVCODEC
> >      #if CONFIG_AAC_DECODER
> >          { "aacpsdsp", checkasm_check_aacpsdsp },
> > +        { "sbrdsp",   checkasm_check_sbrdsp },
> >      #endif
> >      #if CONFIG_ALAC_DECODER
> >          { "alacdsp", checkasm_check_alacdsp },
> > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> > index dfb0ce561c..fa51e71e4b 100644
> > --- a/tests/checkasm/checkasm.h
> > +++ b/tests/checkasm/checkasm.h
> > @@ -50,6 +50,7 @@ void checkasm_check_hevc_idct(void);
> >  void checkasm_check_jpeg2000dsp(void);
> >  void checkasm_check_llviddsp(void);
> >  void checkasm_check_pixblockdsp(void);
> > +void checkasm_check_sbrdsp(void);
> >  void checkasm_check_synth_filter(void);
> >  void checkasm_check_v210enc(void);
> >  void checkasm_check_vp8dsp(void);
> > diff --git a/tests/checkasm/sbrdsp.c b/tests/checkasm/sbrdsp.c
> > new file mode 100644
> > index 0000000000..8333510c6b
> > --- /dev/null
> > +++ b/tests/checkasm/sbrdsp.c
> > @@ -0,0 +1,297 @@
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License along
> > + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> > + */
> > +
> > +#include "libavcodec/sbrdsp.h"
> > +
> > +#include "checkasm.h"
> > +
> > +#define randomize(buf, len) do {                                \
> > +    int i;                                                      \
> > +    for (i = 0; i < len; i++) {                                 \
> > +        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
> > +        (buf)[i] = f;                                           \
> > +    }                                                           \
> > +} while (0)
> > +
> > +#define EPS 0.0001
> > +
> > +static void test_sum64x5(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
> > +
> > +    declare_func(void, INTFLOAT *z);
> > +
> > +    randomize((INTFLOAT *)dst0, 64 + 256);
> > +    memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
> > +    call_ref(dst0);
> > +    call_new(dst1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
> > +        fail();
> > +    bench_new(dst1);
> > +}
> > +
> > +static void test_sum_square(void)
> > +{
> > +    INTFLOAT res0;
> > +    INTFLOAT res1;
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
> > +
> > +    declare_func(INTFLOAT, INTFLOAT (*x)[2], int n);
> > +
> > +    randomize((INTFLOAT *)src, 256 * 2);
> > +    res0 = call_ref(src, 256);
> > +    res1 = call_new(src, 256);
> 
> This one is failing on x86 because the second argument has garbage in
> the upper 32 bits of the grp.
> 
> The solution is to either change n from int to ptrdiff_t, or zero the
> upper bits of n in ff_sbr_sum_square_sse(), which can be done implicitly
> in the first instruction.
> 
> > +    if (!float_near_abs_eps(res0, res1, EPS))
> > +        fail();
> > +    bench_new(src, 256);
> > +}
> > +
> > +static void test_neg_odd_64(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > +
> > +    declare_func(void, INTFLOAT *x);
> > +
> > +    randomize((INTFLOAT *)dst0, 64);
> > +    memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
> > +    call_ref(dst0);
> > +    call_new(dst1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > +        fail();
> > +    bench_new(dst1);
> > +}
> > +
> > +static void test_qmf_pre_shuffle(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > +
> > +    declare_func(void, INTFLOAT *z);
> > +
> > +    randomize((INTFLOAT *)dst0, 128);
> > +    memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
> > +    call_ref(dst0);
> > +    call_new(dst1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > +        fail();
> > +    bench_new(dst1);
> > +}
> > +
> > +static void test_qmf_post_shuffle(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
> > +
> > +    declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
> > +
> > +    randomize((INTFLOAT *)src, 64);
> > +    call_ref(dst0, src);
> > +    call_new(dst1, src);
> > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 
> > 64))
> > +        fail();
> > +    bench_new(dst1, src);
> > +}
> > +
> > +static void test_qmf_deint_neg(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > +
> > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src);
> > +
> > +    randomize((INTFLOAT *)src, 64);
> > +    call_ref(dst0, src);
> > +    call_new(dst1, src);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > +        fail();
> > +    bench_new(dst1, src);
> > +}
> > +
> > +static void test_qmf_deint_bfly(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > +
> > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT 
> > *src1);
> > +
> > +    memset(dst0, 0, 128 * sizeof(INTFLOAT));
> > +    memset(dst1, 0, 128 * sizeof(INTFLOAT));
> > +
> > +    randomize((INTFLOAT *)src0, 64);
> > +    randomize((INTFLOAT *)src1, 64);
> > +    call_ref(dst0, src0, src1);
> > +    call_new(dst1, src0, src1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > +        fail();
> > +    bench_new(dst1, src0, src1);
> > +}
> > +
> > +static void test_autocorrelate(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
> > +
> > +    declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
> > +
> > +    memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > +    memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > +
> > +    randomize((INTFLOAT *)src, 80);
> > +    call_ref(src, dst0);
> > +    call_new(src, dst1);
> > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 
> > 3 * 2 * 2))
> > +        fail();
> > +    bench_new(src, dst1);
> > +}
> > +
> > +static void test_hf_gen(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > +    INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
> > +    int i;
> > +
> > +    declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
> > +                       const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
> > +                       INTFLOAT bw, int start, int end);
> > +
> > +    randomize((INTFLOAT *)low, 128 * 2);
> > +    randomize((INTFLOAT *)alpha0, 2);
> > +    randomize((INTFLOAT *)alpha1, 2);
> > +    for (i = 2; i < 64; i += 2) {
> > +        memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
> > +        memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
> > +        call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
> > +        call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
> > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, 
> > EPS, 128 * 2))
> > +            fail();
> > +        bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
> > +    }
> > +}
> > +
> > +static void test_hf_g_filt(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > +
> > +    declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
> > +                       const INTFLOAT *g_filt, int m_max, intptr_t ixh);
> > +
> > +    randomize((INTFLOAT *)high, 128 * 40 * 2);
> > +    randomize((INTFLOAT *)g_filt, 128);
> > +
> > +    call_ref(dst0, high, g_filt, 128, 20);
> > +    call_new(dst1, high, g_filt, 128, 20);
> > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 
> > 128 * 2))
> > +        fail();
> > +    bench_new(dst1, high, g_filt, 128, 20);
> > +}
> > +
> > +static void test_hf_apply_noise(void)
> > +{
> > +    LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
> > +    LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > +    int noise = 0x2a;
> > +    int i;
> > +
> > +    declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
> > +                       const AAC_FLOAT *q_filt, int noise,
> > +                       int kx, int m_max);
> > +
> > +    randomize((INTFLOAT *)ref, 128 * 2);
> > +    randomize((INTFLOAT *)s_m, 128);
> > +    randomize((INTFLOAT *)q_filt, 128);
> > +    for (i = 0; i < 2; i++) {
> > +        memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
> > +        memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
> > +        call_ref(dst0, s_m, q_filt, noise, i, 128);
> > +        call_new(dst1, s_m, q_filt, noise, i, 128);
> > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, 
> > EPS, 128 * 2))
> > +            fail();
> > +        bench_new(dst1, s_m, q_filt, noise, i, 128);
> > +    }
> > +}
> > +
> > +void checkasm_check_sbrdsp(void)
> > +{
> > +    int i;
> > +    SBRDSPContext sbrdsp;
> > +
> > +    ff_sbrdsp_init(&sbrdsp);
> > +
> > +    if (check_func(sbrdsp.sum64x5, "sum64x5"))
> > +        test_sum64x5();
> > +    report("sum64x5");
> > +
> > +    if (check_func(sbrdsp.sum_square, "sum_square"))
> > +        test_sum_square();
> > +    report("sum_square");
> > +
> > +    if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
> > +        test_neg_odd_64();
> > +    report("neg_odd_64");
> > +
> > +    if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
> > +        test_qmf_pre_shuffle();
> > +    report("qmf_pre_shuffle");
> > +
> > +    if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
> > +        test_qmf_post_shuffle();
> > +    report("qmf_post_shuffle");
> > +
> > +    if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
> > +        test_qmf_deint_neg();
> > +    report("qmf_deint_neg");
> > +
> > +    if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
> > +        test_qmf_deint_bfly();
> > +    report("qmf_deint_bfly");
> > +
> > +    if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
> > +        test_autocorrelate();
> > +    report("autocorrelate");
> > +
> > +    if (check_func(sbrdsp.hf_gen, "hf_gen"))
> > +        test_hf_gen();
> > +    report("hf_gen");
> > +
> > +    if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
> > +        test_hf_g_filt();
> > +    report("hf_g_filt");
> > +
> > +    for (i = 0; i < 4; i++) {
> 
> You could instead add this loop inside test_hf_apply_noise(). It would
> save you a bunch of unnecessary calls to randomize().

Done in the following version of the patch.

Thanks.

-- 
Matthieu B.
>From 749b74d2146cd7ac4dd8e71bcf2a789b901590d8 Mon Sep 17 00:00:00 2001
From: Matthieu Bouron <matthieu.bou...@gmail.com>
Date: Fri, 9 Jun 2017 09:34:12 +0000
Subject: [PATCH 1/3] checkasm: add sbrdsp tests

---
 tests/checkasm/Makefile   |   3 +-
 tests/checkasm/checkasm.c |   1 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/sbrdsp.c   | 298 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/sbrdsp.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 638e811931..60e80ab738 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -13,7 +13,8 @@ AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
 AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
 
 # decoders/encoders
-AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o
+AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o \
+                                           sbrdsp.o
 AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
 AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
 AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e66744b162..29f201b1b3 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -67,6 +67,7 @@ static const struct {
 #if CONFIG_AVCODEC
     #if CONFIG_AAC_DECODER
         { "aacpsdsp", checkasm_check_aacpsdsp },
+        { "sbrdsp",   checkasm_check_sbrdsp },
     #endif
     #if CONFIG_ALAC_DECODER
         { "alacdsp", checkasm_check_alacdsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index dfb0ce561c..fa51e71e4b 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -50,6 +50,7 @@ void checkasm_check_hevc_idct(void);
 void checkasm_check_jpeg2000dsp(void);
 void checkasm_check_llviddsp(void);
 void checkasm_check_pixblockdsp(void);
+void checkasm_check_sbrdsp(void);
 void checkasm_check_synth_filter(void);
 void checkasm_check_v210enc(void);
 void checkasm_check_vp8dsp(void);
diff --git a/tests/checkasm/sbrdsp.c b/tests/checkasm/sbrdsp.c
new file mode 100644
index 0000000000..038318e021
--- /dev/null
+++ b/tests/checkasm/sbrdsp.c
@@ -0,0 +1,298 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavcodec/sbrdsp.h"
+
+#include "checkasm.h"
+
+#define randomize(buf, len) do {                                \
+    int i;                                                      \
+    for (i = 0; i < len; i++) {                                 \
+        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
+        (buf)[i] = f;                                           \
+    }                                                           \
+} while (0)
+
+#define EPS 0.0001
+
+static void test_sum64x5(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
+
+    declare_func(void, INTFLOAT *z);
+
+    randomize((INTFLOAT *)dst0, 64 + 256);
+    memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_sum_square(void)
+{
+    INTFLOAT res0;
+    INTFLOAT res1;
+    LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
+
+    declare_func(INTFLOAT, INTFLOAT (*x)[2], int n);
+
+    randomize((INTFLOAT *)src, 256 * 2);
+    res0 = call_ref(src, 256);
+    res1 = call_new(src, 256);
+    if (!float_near_abs_eps(res0, res1, EPS))
+        fail();
+    bench_new(src, 256);
+}
+
+static void test_neg_odd_64(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
+
+    declare_func(void, INTFLOAT *x);
+
+    randomize((INTFLOAT *)dst0, 64);
+    memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_qmf_pre_shuffle(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
+
+    declare_func(void, INTFLOAT *z);
+
+    randomize((INTFLOAT *)dst0, 128);
+    memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_qmf_post_shuffle(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
+
+    declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
+
+    randomize((INTFLOAT *)src, 64);
+    call_ref(dst0, src);
+    call_new(dst1, src);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
+        fail();
+    bench_new(dst1, src);
+}
+
+static void test_qmf_deint_neg(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
+
+    declare_func(void, INTFLOAT *v, const INTFLOAT *src);
+
+    randomize((INTFLOAT *)src, 64);
+    call_ref(dst0, src);
+    call_new(dst1, src);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
+        fail();
+    bench_new(dst1, src);
+}
+
+static void test_qmf_deint_bfly(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
+
+    declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
+
+    memset(dst0, 0, 128 * sizeof(INTFLOAT));
+    memset(dst1, 0, 128 * sizeof(INTFLOAT));
+
+    randomize((INTFLOAT *)src0, 64);
+    randomize((INTFLOAT *)src1, 64);
+    call_ref(dst0, src0, src1);
+    call_new(dst1, src0, src1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
+        fail();
+    bench_new(dst1, src0, src1);
+}
+
+static void test_autocorrelate(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
+
+    declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
+
+    memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
+    memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
+
+    randomize((INTFLOAT *)src, 80);
+    call_ref(src, dst0);
+    call_new(src, dst1);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
+        fail();
+    bench_new(src, dst1);
+}
+
+static void test_hf_gen(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+    INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
+    int i;
+
+    declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
+                       const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
+                       INTFLOAT bw, int start, int end);
+
+    randomize((INTFLOAT *)low, 128 * 2);
+    randomize((INTFLOAT *)alpha0, 2);
+    randomize((INTFLOAT *)alpha1, 2);
+    for (i = 2; i < 64; i += 2) {
+        memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
+        memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
+        call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
+        call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
+        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+            fail();
+        bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
+    }
+}
+
+static void test_hf_g_filt(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
+    LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+
+    declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
+                       const INTFLOAT *g_filt, int m_max, intptr_t ixh);
+
+    randomize((INTFLOAT *)high, 128 * 40 * 2);
+    randomize((INTFLOAT *)g_filt, 128);
+
+    call_ref(dst0, high, g_filt, 128, 20);
+    call_new(dst1, high, g_filt, 128, 20);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+        fail();
+    bench_new(dst1, high, g_filt, 128, 20);
+}
+
+static void test_hf_apply_noise(const SBRDSPContext *sbrdsp)
+{
+    LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
+    LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+    int noise = 0x2a;
+    int i, j;
+
+    declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                       const AAC_FLOAT *q_filt, int noise,
+                       int kx, int m_max);
+
+    randomize((INTFLOAT *)ref, 128 * 2);
+    randomize((INTFLOAT *)s_m, 128);
+    randomize((INTFLOAT *)q_filt, 128);
+
+    for (i = 0; i < 4; i++) {
+        if (check_func(sbrdsp->hf_apply_noise[i], "hf_apply_noise_%d", i)) {
+            for (j = 0; j < 2; j++) {
+                memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
+                memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
+                call_ref(dst0, s_m, q_filt, noise, j, 128);
+                call_new(dst1, s_m, q_filt, noise, j, 128);
+                if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+                    fail();
+                bench_new(dst1, s_m, q_filt, noise, j, 128);
+            }
+        }
+    }
+}
+
+void checkasm_check_sbrdsp(void)
+{
+    SBRDSPContext sbrdsp;
+
+    ff_sbrdsp_init(&sbrdsp);
+
+    if (check_func(sbrdsp.sum64x5, "sum64x5"))
+        test_sum64x5();
+    report("sum64x5");
+
+    if (check_func(sbrdsp.sum_square, "sum_square"))
+        test_sum_square();
+    report("sum_square");
+
+    if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
+        test_neg_odd_64();
+    report("neg_odd_64");
+
+    if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
+        test_qmf_pre_shuffle();
+    report("qmf_pre_shuffle");
+
+    if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
+        test_qmf_post_shuffle();
+    report("qmf_post_shuffle");
+
+    if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
+        test_qmf_deint_neg();
+    report("qmf_deint_neg");
+
+    if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
+        test_qmf_deint_bfly();
+    report("qmf_deint_bfly");
+
+    if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
+        test_autocorrelate();
+    report("autocorrelate");
+
+    if (check_func(sbrdsp.hf_gen, "hf_gen"))
+        test_hf_gen();
+    report("hf_gen");
+
+    if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
+        test_hf_g_filt();
+    report("hf_g_filt");
+
+    test_hf_apply_noise(&sbrdsp);
+    report("hf_apply_noise");
+}
-- 
2.13.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to