Thanks: Rafal Dabrowa
---
libavcodec/aarch64/Makefile |3 +-
libavcodec/aarch64/hevcdsp_epel_neon.S| 2501 +
libavcodec/aarch64/hevcdsp_init_aarch64.c | 52 +
3 files changed, 2555 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/aarch64/hevcdsp_epel_neon.S
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 8592692479..ebedc03bfa 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -61,7 +61,8 @@ NEON-OBJS-$(CONFIG_VP9_DECODER) +=
aarch64/vp9itxfm_16bpp_neon.o \
aarch64/vp9lpf_neon.o
\
aarch64/vp9mc_16bpp_neon.o
\
aarch64/vp9mc_neon.o
-NEON-OBJS-$(CONFIG_HEVC_DECODER)+= aarch64/hevcdsp_idct_neon.o
\
+NEON-OBJS-$(CONFIG_HEVC_DECODER)+= aarch64/hevcdsp_epel_neon.o
\
+ aarch64/hevcdsp_idct_neon.o
\
aarch64/hevcdsp_init_aarch64.o
\
aarch64/hevcdsp_qpel_neon.o
\
aarch64/hevcdsp_sao_neon.o
diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S
b/libavcodec/aarch64/hevcdsp_epel_neon.S
new file mode 100644
index 00..bbf93c3d6a
--- /dev/null
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -0,0 +1,2501 @@
+/* -*-arm64-*-
+ * vim: syntax=arm64asm
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#define MAX_PB_SIZE 64
+
+function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
+mov x7, #(MAX_PB_SIZE * 2)
+1: ld1{v0.s}[0], [x1], x2
+ushll v4.8h, v0.8b, #6
+subsw3, w3, #1
+st1{v4.d}[0], [x0], x7
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
+mov x7, #(MAX_PB_SIZE * 2 - 8)
+1: ld1{v0.8b}, [x1], x2
+ushll v4.8h, v0.8b, #6
+st1{v4.d}[0], [x0], #8
+subsw3, w3, #1
+st1{v4.s}[2], [x0], x7
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1
+mov x7, #(MAX_PB_SIZE * 2)
+1: ld1{v0.8b}, [x1], x2
+ushll v4.8h, v0.8b, #6
+subsw3, w3, #1
+st1{v4.8h}, [x0], x7
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1
+mov x7, #(MAX_PB_SIZE * 2 - 16)
+1: ld1{v0.8b, v1.8b}, [x1], x2
+ushll v4.8h, v0.8b, #6
+st1{v4.8h}, [x0], #16
+ushll v5.8h, v1.8b, #6
+subsw3, w3, #1
+st1{v5.d}[0], [x0], x7
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1
+mov x7, #(MAX_PB_SIZE * 2)
+1: ld1{v0.8b, v1.8b}, [x1], x2
+ushll v4.8h, v0.8b, #6
+ushll v5.8h, v1.8b, #6
+subsw3, w3, #1
+st1{v4.8h, v5.8h}, [x0], x7
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1
+mov x7, #(MAX_PB_SIZE * 2)
+1: ld1{v0.8b-v2.8b}, [x1], x2
+ushll v4.8h, v0.8b, #6
+ushll v5.8h, v1.8b, #6
+ushll v6.8h, v2.8b, #6
+subsw3, w3, #1
+st1{v4.8h-v6.8h}, [x0], x7
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1
+mov x7, #(MAX_PB_SIZE * 2)
+1: ld1{v0.8b-v3.8b}, [x1], x2
+ushll v4.8h, v0.8b, #6
+ushll v5.8h, v1.8b, #6
+ushll v6.8h, v2.8b, #6
+ushll v7.8h, v3.8b, #6
+subsw3, w3, #1
+