Signed-off-by: John Cox <j...@kynesim.co.uk>
---
libavfilter/aarch64/vf_bwdif_init_aarch64.c | 20 ++++
libavfilter/aarch64/vf_bwdif_neon.S | 104 ++++++++++++++++++++
2 files changed, 124 insertions(+)
diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
index 3ffaa07ab3..e75cf2f204 100644
--- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
@@ -24,10 +24,29 @@
#include "libavfilter/bwdif.h"
#include "libavutil/aarch64/cpu.h"
+void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void
*next1,
+ int w, int prefs, int mrefs, int prefs2, int
mrefs2,
+ int parity, int clip_max, int spat);
+
void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int
mrefs,
int prefs3, int mrefs3, int parity, int
clip_max);
+static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void
*next1,
+ int w, int prefs, int mrefs, int prefs2, int
mrefs2,
+ int parity, int clip_max, int spat)
+{
+ const int w0 = clip_max != 255 ? 0 : w & ~15;
+
+ ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs,
prefs2, mrefs2,
+ parity, clip_max, spat);
+
+ if (w0 < w)
+ ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char
*)cur1 + w0, (char *)next1 + w0,
+ w - w0, prefs, mrefs, prefs2, mrefs2,
+ parity, clip_max, spat);
+}
+
static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int
mrefs,
int prefs3, int mrefs3, int parity, int
clip_max)
{
@@ -52,5 +71,6 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
return;
s->filter_intra = filter_intra_helper;
+ s->filter_edge = filter_edge_helper;
}
diff --git a/libavfilter/aarch64/vf_bwdif_neon.S
b/libavfilter/aarch64/vf_bwdif_neon.S
index 6c5d1598f4..a33b235882 100644
--- a/libavfilter/aarch64/vf_bwdif_neon.S
+++ b/libavfilter/aarch64/vf_bwdif_neon.S
@@ -128,6 +128,110 @@ coeffs:
.hword 5570, 3801, 1016, -3801 // hf[0] = v0.h[2],
-hf[1] = v0.h[5]
.hword 5077, 981 // sp[0] = v0.h[6]
+// ============================================================================
+//
+// void ff_bwdif_filter_edge_neon(
+// void *dst1, // x0
+// void *prev1, // x1
+// void *cur1, // x2
+// void *next1, // x3
+// int w, // w4
+// int prefs, // w5
+// int mrefs, // w6
+// int prefs2, // w7
+// int mrefs2, // [sp, #0]
+// int parity, // [sp, #8]
+// int clip_max, // [sp, #16] unused
+// int spat); // [sp, #24]