---
libavcodec/arm/h264dsp_neon.S | 41 +--------------------------
libavcodec/arm/neon_transpose.S | 59 +++++++++++++++++++++++++++++++++++++++
libavcodec/arm/vp8dsp_neon.S | 26 +++--------------
3 files changed, 65 insertions(+), 61 deletions(-)
create mode 100644 libavcodec/arm/neon_transpose.S
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index 0fa4a6b..71d8336 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -19,46 +19,7 @@
*/
#include "asm.S"
-
- .macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7
- vtrn.32 \r0, \r4
- vtrn.32 \r1, \r5
- vtrn.32 \r2, \r6
- vtrn.32 \r3, \r7
- vtrn.16 \r0, \r2
- vtrn.16 \r1, \r3
- vtrn.16 \r4, \r6
- vtrn.16 \r5, \r7
- vtrn.8 \r0, \r1
- vtrn.8 \r2, \r3
- vtrn.8 \r4, \r5
- vtrn.8 \r6, \r7
- .endm
-
- .macro transpose_4x4 r0 r1 r2 r3
- vtrn.16 \r0, \r2
- vtrn.16 \r1, \r3
- vtrn.8 \r0, \r1
- vtrn.8 \r2, \r3
- .endm
-
- .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
- vswp \r0, \r4
- vswp \r1, \r5
- vswp \r2, \r6
- vswp \r3, \r7
- .endm
-
- .macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7
- vtrn.32 \r0, \r2
- vtrn.32 \r1, \r3
- vtrn.32 \r4, \r6
- vtrn.32 \r5, \r7
- vtrn.16 \r0, \r1
- vtrn.16 \r2, \r3
- vtrn.16 \r4, \r5
- vtrn.16 \r6, \r7
- .endm
+#include "neon_transpose.S"
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc8 type
diff --git a/libavcodec/arm/neon_transpose.S b/libavcodec/arm/neon_transpose.S
new file mode 100644
index 0000000..7bf3d86
--- /dev/null
+++ b/libavcodec/arm/neon_transpose.S
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ .macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7
+ vtrn.32 \r0, \r4
+ vtrn.32 \r1, \r5
+ vtrn.32 \r2, \r6
+ vtrn.32 \r3, \r7
+ vtrn.16 \r0, \r2
+ vtrn.16 \r1, \r3
+ vtrn.16 \r4, \r6
+ vtrn.16 \r5, \r7
+ vtrn.8 \r0, \r1
+ vtrn.8 \r2, \r3
+ vtrn.8 \r4, \r5
+ vtrn.8 \r6, \r7
+ .endm
+
+ .macro transpose_4x4 r0 r1 r2 r3
+ vtrn.16 \r0, \r2
+ vtrn.16 \r1, \r3
+ vtrn.8 \r0, \r1
+ vtrn.8 \r2, \r3
+ .endm
+
+ .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
+ vswp \r0, \r4
+ vswp \r1, \r5
+ vswp \r2, \r6
+ vswp \r3, \r7
+ .endm
+
+ .macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7
+ vtrn.32 \r0, \r2
+ vtrn.32 \r1, \r3
+ vtrn.32 \r4, \r6
+ vtrn.32 \r5, \r7
+ vtrn.16 \r0, \r1
+ vtrn.16 \r2, \r3
+ vtrn.16 \r4, \r5
+ vtrn.16 \r6, \r7
+ .endm
diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S
index 1b9f24e..34bfd0e 100644
--- a/libavcodec/arm/vp8dsp_neon.S
+++ b/libavcodec/arm/vp8dsp_neon.S
@@ -22,6 +22,7 @@
*/
#include "asm.S"
+#include "neon_transpose.S"
function ff_vp8_luma_dc_wht_neon, export=1
vld1.16 {q0-q1}, [r1,:128]
@@ -454,23 +455,6 @@ endfunc
.endif
.endm
-.macro transpose8x16matrix
- vtrn.32 q0, q4
- vtrn.32 q1, q5
- vtrn.32 q2, q6
- vtrn.32 q3, q7
-
- vtrn.16 q0, q2
- vtrn.16 q1, q3
- vtrn.16 q4, q6
- vtrn.16 q5, q7
-
- vtrn.8 q0, q1
- vtrn.8 q2, q3
- vtrn.8 q4, q5
- vtrn.8 q6, q7
-.endm
-
.macro vp8_v_loop_filter16 name, inner=0, simple=0
function ff_vp8_v_loop_filter16\name\()_neon, export=1
vpush {q4-q7}
@@ -605,7 +589,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
vld1.8 {d13}, [r0], r1
vld1.8 {d15}, [r0], r1
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
vdup.8 q14, r2 @ flim_E
.if !\simple
@@ -616,7 +600,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
sub r0, r0, r1, lsl #4 @ backup 16 rows
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
@ Store pixels:
vst1.8 {d0}, [r0], r1
@@ -670,7 +654,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
vld1.8 {d14}, [r0], r2
vld1.8 {d15}, [r1], r2
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
vdup.8 q14, r3 @ flim_E
vdup.8 q15, r12 @ flim_I
@@ -681,7 +665,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
sub r0, r0, r2, lsl #3 @ backup u 8 rows
sub r1, r1, r2, lsl #3 @ backup v 8 rows
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
@ Store pixels:
vst1.8 {d0}, [r0], r2
--
1.7.7
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel