From: Matthieu Bouron <matthieu.bou...@stupeflix.com> --- libswscale/arm/yuv2rgb_neon.S | 89 ++++++++++++------------------------------- 1 file changed, 24 insertions(+), 65 deletions(-)
diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S index ef7b0a6..8abb986 100644 --- a/libswscale/arm/yuv2rgb_neon.S +++ b/libswscale/arm/yuv2rgb_neon.S @@ -105,16 +105,6 @@ compute_16px r2, d14, d15, \ofmt .endm -.macro process_2l_16px ofmt - compute_premult d28, d29, d30, d31 - - vld1.8 {q7}, [r4]! @ first line of luma - compute_16px r2, d14, d15, \ofmt - - vld1.8 {q7}, [r12]! @ second line of luma - compute_16px r11, d14, d15, \ofmt -.endm - .macro load_args_nvx push {r4-r12, lr} vpush {q4-q7} @@ -127,13 +117,9 @@ ldr r10,[sp, #128] @ r10 = y_coeff vdup.16 d0, r10 @ d0 = y_coeff vld1.16 {d1}, [r8] @ d1 = *table - add r11, r2, r3 @ r11 = dst + linesize (dst2) - add r12, r4, r5 @ r12 = srcY + linesizeY (srcY2) - lsl r3, r3, #1 - lsl r5, r5, #1 - sub r3, r3, r0, lsl #2 @ r3 = linesize * 2 - width * 4 (padding) - sub r5, r5, r0 @ r5 = linesizeY * 2 - width (paddingY) - sub r7, r7, r0 @ r7 = linesizeC - width (paddingC) + sub r3, r3, r0, lsl #2 @ r3 = linesize - width * 4 (padding) + sub r5, r5, r0 @ r5 = linesizeY - width (paddingY) + sub r7, r7, r0 @ r7 = linesizeC - width (paddingC) .endm .macro load_args_yuv420p @@ -142,26 +128,6 @@ ldr r4, [sp, #104] @ r4 = srcY ldr r5, [sp, #108] @ r5 = linesizeY ldr r6, [sp, #112] @ r6 = srcU - ldr r8, [sp, #128] @ r8 = table - ldr r9, [sp, #132] @ r9 = y_offset - ldr r10,[sp, #136] @ r10 = y_coeff - vdup.16 d0, r10 @ d0 = y_coeff - vld1.16 {d1}, [r8] @ d1 = *table - add r11, r2, r3 @ r11 = dst + linesize (dst2) - add r12, r4, r5 @ r12 = srcY + linesizeY (srcY2) - lsl r3, r3, #1 - lsl r5, r5, #1 - sub r3, r3, r0, lsl #2 @ r3 = linesize * 2 - width * 4 (padding) - sub r5, r5, r0 @ r5 = linesizeY * 2 - width (paddingY) - ldr r10,[sp, #120] @ r10 = srcV -.endm - -.macro load_args_yuv422p - push {r4-r12, lr} - vpush {q4-q7} - ldr r4, [sp, #104] @ r4 = srcY - ldr r5, [sp, #108] @ r5 = linesizeY - ldr r6, [sp, #112] @ r6 = srcU ldr r7, [sp, #116] @ r7 = linesizeU ldr r12,[sp, #124] @ r12 = linesizeV ldr r8, [sp, #128] @ r8 = table @@ -176,6 +142,10 @@ ldr r10,[sp, #120] @ r10 = srcV .endm +.macro load_args_yuv422p + load_args_yuv420p +.endm + .macro declare_func ifmt ofmt function ff_\ifmt\()_to_\ofmt\()_neon, export=1 @@ -205,35 +175,30 @@ function ff_\ifmt\()_to_\ofmt\()_neon, export=1 vmov.i8 d10, #128 .ifc \ifmt,nv12 - pld [r12, #64*3] - vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line vsubl.u8 q14, d2, d10 @ q14 = U - 128 vsubl.u8 q15, d3, d10 @ q15 = V - 128 - process_2l_16px \ofmt + process_1l_16px \ofmt .endif .ifc \ifmt,nv21 - pld [r12, #64*3] - vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line vsubl.u8 q14, d3, d10 @ q14 = U - 128 vsubl.u8 q15, d2, d10 @ q15 = V - 128 - process_2l_16px \ofmt + process_1l_16px \ofmt .endif .ifc \ifmt,yuv420p pld [r10, #64*3] - pld [r12, #64*3] vld1.8 d2, [r6]! @ d2: chroma red line vld1.8 d3, [r10]! @ d3: chroma blue line vsubl.u8 q14, d2, d10 @ q14 = U - 128 vsubl.u8 q15, d3, d10 @ q15 = V - 128 - process_2l_16px \ofmt + process_1l_16px \ofmt .endif .ifc \ifmt,yuv422p @@ -254,35 +219,29 @@ function ff_\ifmt\()_to_\ofmt\()_neon, export=1 add r4, r4, r5 @ srcY += paddingY .ifc \ifmt,nv12 - add r11, r11, r3 @ dst2 += padding - add r12, r12, r5 @ srcY2 += paddingY - - add r6, r6, r7 @ srcC += paddingC + tst r1, #1 + subeq r6, r6, r0 @ if (height % 2 == 0) paddingU -= width + addne r6, r7 @ else paddingU += linesizeU - width - subs r1, r1, #2 @ height -= 2 + subs r1, r1, #1 @ height -= 1 .endif .ifc \ifmt,nv21 - add r11, r11, r3 @ dst2 += padding - add r12, r12, r5 @ srcY2 += paddingY + tst r1, #1 + subeq r6, r6, r0 @ if (height % 2 == 0) paddingU -= width + addne r6, r7 @ else paddingU += linesizeU - width - add r6, r6, r7 @ srcC += paddingC - subs r1, r1, #2 @ height -= 2 + subs r1, r1, #1 @ height -= 1 .endif .ifc \ifmt,yuv420p - add r11, r11, r3 @ dst2 += padding - add r12, r12, r5 @ srcY2 += paddingY + tst r1, #1 + subeq r6, r6, r0, lsr #1 @ if (height % 2 == 0) paddingU -= (width / 2) + addne r6, r7 @ else paddingU += linesizeU - (width / 2) + subeq r10, r10, r0, lsr #1 @ if (height % 2 == 0) paddingU -= (width / 2) + addne r10, r12 @ else paddingV = linesizeV - (width / 2) - ldr r7, [sp, #116] @ r7 = linesizeU - sub r7, r7, r0, lsr #1 @ r7 = linesizeU - width / 2 (paddingU) - add r6, r6, r7 @ srcU += paddingU - - ldr r7, [sp, #124] @ r7 = linesizeV - sub r7, r7, r0, lsr #1 @ r7 = linesizeV - width / 2 (paddingV) - add r10, r10, r7 @ srcV += paddingV - - subs r1, r1, #2 @ height -= 2 + subs r1, r1, #1 @ height -= 1 .endif .ifc \ifmt,yuv422p -- 2.7.4 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel