From: Matthieu Bouron <matthieu.bou...@stupeflix.com> --- libswscale/arm/yuv2rgb_neon.S | 115 ++++++++++++++---------------------------- 1 file changed, 39 insertions(+), 76 deletions(-)
diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S index 8abb986..f77f534 100644 --- a/libswscale/arm/yuv2rgb_neon.S +++ b/libswscale/arm/yuv2rgb_neon.S @@ -105,7 +105,7 @@ compute_16px r2, d14, d15, \ofmt .endm -.macro load_args_nvx +.macro load_args_nv12 push {r4-r12, lr} vpush {q4-q7} ldr r4, [sp, #104] @ r4 = srcY @@ -122,6 +122,10 @@ sub r7, r7, r0 @ r7 = linesizeC - width (paddingC) .endm +.macro load_args_nv21 + load_args_nv12 +.endm + .macro load_args_yuv420p push {r4-r12, lr} vpush {q4-q7} @@ -146,113 +150,72 @@ load_args_yuv420p .endm -.macro declare_func ifmt ofmt -function ff_\ifmt\()_to_\ofmt\()_neon, export=1 - -.ifc \ifmt,nv12 - load_args_nvx -.endif - -.ifc \ifmt,nv21 - load_args_nvx -.endif - -.ifc \ifmt,yuv420p - load_args_yuv420p -.endif - - -.ifc \ifmt,yuv422p - load_args_yuv422p -.endif - -1: - mov r8, r0 @ r8 = width -2: - pld [r6, #64*3] - pld [r4, #64*3] - - vmov.i8 d10, #128 - -.ifc \ifmt,nv12 +.macro load_chroma_nv12 vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line vsubl.u8 q14, d2, d10 @ q14 = U - 128 vsubl.u8 q15, d3, d10 @ q15 = V - 128 +.endm - process_1l_16px \ofmt -.endif - -.ifc \ifmt,nv21 +.macro load_chroma_nv21 vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line vsubl.u8 q14, d3, d10 @ q14 = U - 128 vsubl.u8 q15, d2, d10 @ q15 = V - 128 +.endm - process_1l_16px \ofmt -.endif - -.ifc \ifmt,yuv420p - pld [r10, #64*3] - - vld1.8 d2, [r6]! @ d2: chroma red line - vld1.8 d3, [r10]! @ d3: chroma blue line - vsubl.u8 q14, d2, d10 @ q14 = U - 128 - vsubl.u8 q15, d3, d10 @ q15 = V - 128 - - process_1l_16px \ofmt -.endif - -.ifc \ifmt,yuv422p +.macro load_chroma_yuv420p pld [r10, #64*3] vld1.8 d2, [r6]! @ d2: chroma red line vld1.8 d3, [r10]! @ d3: chroma blue line vsubl.u8 q14, d2, d10 @ q14 = U - 128 vsubl.u8 q15, d3, d10 @ q15 = V - 128 +.endm - process_1l_16px \ofmt -.endif - - subs r8, r8, #16 @ width -= 16 - bgt 2b - - add r2, r2, r3 @ dst += padding - add r4, r4, r5 @ srcY += paddingY - -.ifc \ifmt,nv12 - tst r1, #1 - subeq r6, r6, r0 @ if (height % 2 == 0) paddingU -= width - addne r6, r7 @ else paddingU += linesizeU - width - - subs r1, r1, #1 @ height -= 1 -.endif +.macro load_chroma_yuv422p + load_chroma_yuv420p +.endm -.ifc \ifmt,nv21 +.macro increment_nv12 tst r1, #1 subeq r6, r6, r0 @ if (height % 2 == 0) paddingU -= width addne r6, r7 @ else paddingU += linesizeU - width +.endm - subs r1, r1, #1 @ height -= 1 -.endif +.macro increment_nv21 + increment_nv12 +.endm -.ifc \ifmt,yuv420p +.macro increment_yuv420p tst r1, #1 subeq r6, r6, r0, lsr #1 @ if (height % 2 == 0) paddingU -= (width / 2) addne r6, r7 @ else paddingU += linesizeU - (width / 2) subeq r10, r10, r0, lsr #1 @ if (height % 2 == 0) paddingU -= (width / 2) addne r10, r12 @ else paddingV = linesizeV - (width / 2) +.endm - subs r1, r1, #1 @ height -= 1 -.endif - -.ifc \ifmt,yuv422p +.macro increment_yuv422p add r6, r6, r7 @ srcU += paddingU add r10,r10,r12 @ srcV += paddingV +.endm +.macro declare_func ifmt ofmt +function ff_\ifmt\()_to_\ofmt\()_neon, export=1 + load_args_\ifmt +1: + mov r8, r0 @ r8 = width +2: + pld [r6, #64*3] + pld [r4, #64*3] + vmov.i8 d10, #128 + load_chroma_\ifmt + process_1l_16px \ofmt + subs r8, r8, #16 @ width -= 16 + bgt 2b + add r2, r2, r3 @ dst += padding + add r4, r4, r5 @ srcY += paddingY + increment_\ifmt subs r1, r1, #1 @ height -= 1 -.endif - bgt 1b - vpop {q4-q7} pop {r4-r12, lr} mov pc, lr -- 2.7.4 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel