Add macros for dual scalar half->single multiply and accumulate
Add macro for shift, saturate and shorten single to byte
Add filter constants
Signed-off-by: John Cox <j...@kynesim.co.uk>
---
libavfilter/aarch64/vf_bwdif_neon.S | 46 +++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/libavfilter/aarch64/vf_bwdif_neon.S
b/libavfilter/aarch64/vf_bwdif_neon.S
index 639ab22998..a8f0ed525a 100644
--- a/libavfilter/aarch64/vf_bwdif_neon.S
+++ b/libavfilter/aarch64/vf_bwdif_neon.S
@@ -23,3 +23,49 @@
#include "libavutil/aarch64/asm.S"
+.macro SQSHRUNN b, s0, s1, s2, s3, n
+ sqshrun \s0\().4h, \s0\().4s, #\n - 8
+ sqshrun2 \s0\().8h, \s1\().4s, #\n - 8
+ sqshrun \s1\().4h, \s2\().4s, #\n - 8
+ sqshrun2 \s1\().8h, \s3\().4s, #\n - 8
+ uzp2 \b\().16b, \s0\().16b, \s1\().16b
+.endm
+
+.macro SMULL4K a0, a1, a2, a3, s0, s1, k
+ smull \a0\().4s, \s0\().4h, \k
+ smull2 \a1\().4s, \s0\().8h, \k
+ smull \a2\().4s, \s1\().4h, \k
+ smull2 \a3\().4s, \s1\().8h, \k
+.endm
+
+.macro UMULL4K a0, a1, a2, a3, s0, s1, k
+ umull \a0\().4s, \s0\().4h, \k
+ umull2 \a1\().4s, \s0\().8h, \k
+ umull \a2\().4s, \s1\().4h, \k
+ umull2 \a3\().4s, \s1\().8h, \k
+.endm
+
+.macro UMLAL4K a0, a1, a2, a3, s0, s1, k
+ umlal \a0\().4s, \s0\().4h, \k
+ umlal2 \a1\().4s, \s0\().8h, \k
+ umlal \a2\().4s, \s1\().4h, \k
+ umlal2 \a3\().4s, \s1\().8h, \k
+.endm
+
+.macro UMLSL4K a0, a1, a2, a3, s0, s1, k
+ umlsl \a0\().4s, \s0\().4h, \k
+ umlsl2 \a1\().4s, \s0\().8h, \k
+ umlsl \a2\().4s, \s1\().4h, \k
+ umlsl2 \a3\().4s, \s1\().8h, \k
+.endm
+
+// static const uint16_t coef_lf[2] = { 4309, 213 };
+// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 };
+// static const uint16_t coef_sp[2] = { 5077, 981 };
+
+ .align 16