vf_bwdif: Add neon for filter_edge

Martin Storsjö Sat, 01 Jul 2023 14:40:26 -0700

On Thu, 29 Jun 2023, John Cox wrote:

Signed-off-by: John Cox <j...@kynesim.co.uk>
---
libavfilter/aarch64/vf_bwdif_init_aarch64.c |  20 ++++
libavfilter/aarch64/vf_bwdif_neon.S         | 104 ++++++++++++++++++++
2 files changed, 124 insertions(+)


diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c 
b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
index 3ffaa07ab3..e75cf2f204 100644
--- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
@@ -24,10 +24,29 @@
#include "libavfilter/bwdif.h"
#include "libavutil/aarch64/cpu.h"

+void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void 
*next1,
+                               int w, int prefs, int mrefs, int prefs2, int 
mrefs2,
+                               int parity, int clip_max, int spat);
+
void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int 
mrefs,
                                int prefs3, int mrefs3, int parity, int 
clip_max);


+static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void 
*next1,
+                               int w, int prefs, int mrefs, int prefs2, int 
mrefs2,
+                               int parity, int clip_max, int spat)
+{
+    const int w0 = clip_max != 255 ? 0 : w & ~15;
+
+    ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, 
prefs2, mrefs2,
+                              parity, clip_max, spat);
+
+    if (w0 < w)
+        ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char 
*)cur1 + w0, (char *)next1 + w0,
+                               w - w0, prefs, mrefs, prefs2, mrefs2,
+                               parity, clip_max, spat);
+}
+
static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int 
mrefs,
                                int prefs3, int mrefs3, int parity, int 
clip_max)
{
@@ -52,5 +71,6 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
        return;

    s->filter_intra = filter_intra_helper;
+    s->filter_edge  = filter_edge_helper;
}

diff --git a/libavfilter/aarch64/vf_bwdif_neon.S 
b/libavfilter/aarch64/vf_bwdif_neon.S
index 6c5d1598f4..a33b235882 100644
--- a/libavfilter/aarch64/vf_bwdif_neon.S
+++ b/libavfilter/aarch64/vf_bwdif_neon.S
@@ -128,6 +128,110 @@ coeffs:
        .hword          5570, 3801, 1016, -3801         // hf[0] = v0.h[2], 
-hf[1] = v0.h[5]
        .hword          5077, 981                       // sp[0] = v0.h[6]

+// ============================================================================
+//
+// void ff_bwdif_filter_edge_neon(
+//      void *dst1,     // x0
+//      void *prev1,    // x1
+//      void *cur1,     // x2
+//      void *next1,    // x3
+//      int w,          // w4
+//      int prefs,      // w5
+//      int mrefs,      // w6
+//      int prefs2,     // w7
+//      int mrefs2,     // [sp, #0]
+//      int parity,     // [sp, #8]
+//      int clip_max,   // [sp, #16]  unused
+//      int spat);      // [sp, #24]

This doesn't hold for macOS targets (and the checkasm tests fail on thatplatform).

On macOS, arguments that aren't passed in registers but on the stack, aretightly packed. So since parity is 32 bit and mrefs2 also was 32 bit,parity is available at [sp, #4].

Therefore, it's usually simplest for portability reasons, to pass anyarguments after the first 8, as intptr_t or ptrdiff_t, as that makes themconsistent across platforms.


// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 08/15] avfilter/vf_bwdif: Add neon for filter_edge

Reply via email to