[FFmpeg-cvslog] build: Add an option for passing linker flags to the shared library build
ffmpeg | branch: master | Janne Grunau | Tue Jun 20 19:25:43 2017 +0200| [857e26b655a769e5a56bada1a0d9adb44cc176b7] | committer: Diego Biurrun build: Add an option for passing linker flags to the shared library build Also employ this mechanism to pass $libdir to the runtime library search path if rpath is enabled. This fixes underlinking of some test binaries on some systems. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=857e26b655a769e5a56bada1a0d9adb44cc176b7 --- avbuild/library.mak | 2 +- configure | 11 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/avbuild/library.mak b/avbuild/library.mak index 2095f61d80..30757546fd 100644 --- a/avbuild/library.mak +++ b/avbuild/library.mak @@ -48,7 +48,7 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SUBDIR)lib$(NAME).ver $(DEP_LIBS) $(SLIB_CREATE_DEF_CMD) - $$(LD) $(SHFLAGS) $(LDFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) + $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) $(SLIB_EXTRA_CMD) clean:: diff --git a/configure b/configure index bbed2258b9..ce0f6c919e 100755 --- a/configure +++ b/configure @@ -283,6 +283,7 @@ Toolchain options: --extra-objcflags=FLAGS add FLAGS to OBJCFLAGS [$CFLAGS] --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS] --extra-ldexeflags=ELDFLAGS add ELDFLAGS to LDEXEFLAGS [$LDEXEFLAGS] + --extra-ldsoflags=ELDFLAGS add ELDFLAGS to LDSOFLAGS [$LDSOFLAGS] --extra-libs=ELIBS add ELIBS [$ELIBS] --extra-version=STRING version string suffix [] --optflags=OPTFLAGS override optimization-related compiler flags @@ -740,6 +741,10 @@ add_ldexeflags(){ append LDEXEFLAGS $($ldflags_filter "$@") } +add_ldsoflags(){ +append LDSOFLAGS $($ldflags_filter "$@") +} + add_stripflags(){ append STRIPFLAGS "$@" } @@ -2802,6 +2807,9 @@ for opt do --extra-ldexeflags=*) add_ldexeflags $optval ;; +--extra-ldsoflags=*) +add_ldsoflags $optval +;; --extra-libs=*) add_extralibs $optval ;; @@ -4950,7 +4958,7 @@ EOF # add some linker flags check_ldflags -Wl,--warn-common check_ldflags -Wl,-rpath-link=libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample -enabled rpath && add_ldexeflags -Wl,-rpath,$libdir +enabled rpath && add_ldexeflags -Wl,-rpath,$libdir && add_ldsoflags -Wl,-rpath,$libdir test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic # add some strip flags @@ -5410,6 +5418,7 @@ LD_PATH=$LD_PATH DLLTOOL=$dlltool LDFLAGS=$LDFLAGS LDEXEFLAGS=$LDEXEFLAGS +LDSOFLAGS=$LDSOFLAGS SHFLAGS=$(echo $($ldflags_filter $SHFLAGS)) STRIPFLAGS=$STRIPFLAGS X86ASMFLAGS=$X86ASMFLAGS ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Revert "mmaldec: autodetect by default" since it breaks linking on systems without mmal libraries
ffmpeg | branch: master | Janne Grunau | Sat Apr 30 16:05:32 2016 +0200| [c26741332165a049717e6da84db13a24ee8edade] | committer: Janne Grunau Revert "mmaldec: autodetect by default" since it breaks linking on systems without mmal libraries This reverts commit 33ac77e850efdfd0e8835950c3d947baffd4df45. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c26741332165a049717e6da84db13a24ee8edade --- configure |7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 3d236f5..e395e38 100755 --- a/configure +++ b/configure @@ -142,7 +142,7 @@ Hardware accelerators: Hardware-accelerated decoding/encoding: --enable-cudaenable dynamically linked CUDA [no] --enable-libmfx enable HW acceleration through libmfx - --enable-mmalenable decoding via MMAL [auto] + --enable-mmalenable decoding via MMAL [no] --enable-nvenc enable encoding via NVENC [no] --enable-omx enable encoding via OpenMAX IL [no] --enable-omx-rpi enable encoding via OpenMAX IL for Raspberry Pi [no] @@ -1252,7 +1252,6 @@ EXTERNAL_LIBRARY_LIST=" libxcb_shm libxcb_xfixes libxvid -mmal openssl x11grab zlib @@ -4623,13 +4622,13 @@ enabled libx265 && require_pkg_config x265 x265.h x265_api_get && die "ERROR: libx265 version must be >= 57."; } enabled libxavs && require libxavs xavs.h xavs_encoder_encode -lxavs enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore -disabled mmal || enable mmal && { check_lib interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host || +enabled mmal && { check_lib interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host || { ! enabled cross_compile && { add_cflags -isystem/opt/vc/include/ -isystem/opt/vc/include/interface/vmcs_host/linux -isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline ; add_extralibs -L/opt/vc/lib/ -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ; check_lib interface/mmal/mmal.h mmal_port_connect ; } check_lib interface/mmal/mmal.h mmal_port_connect ; } || - disable mmal; } + die "ERROR: mmal not found"; } enabled mmal && check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS" enabled omx_rpi && enable omx enabled omx && { check_header OMX_Core.h || ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: Add --ignore-tests configure option for omitting specific FATE tests
ffmpeg | branch: master | Janne Grunau | Wed Dec 28 00:19:49 2016 +0200| [35d1f726eb9fdd376ab900587fb02122b72f2b9a] | committer: Diego Biurrun fate: Add --ignore-tests configure option for omitting specific FATE tests This can be useful to filter out noise in known-broken scenarios like miscompilation by legacy compilers and similar. Originally based on a patch by Diego Biurrun. Signed-off-by: Diego Biurrun > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=35d1f726eb9fdd376ab900587fb02122b72f2b9a --- configure | 11 +++ doc/fate.texi | 1 + tests/Makefile| 4 +++- tests/fate-run.sh | 9 +++-- tests/fate.sh | 1 + 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 09674aad4d..d8b59e2548 100755 --- a/configure +++ b/configure @@ -348,6 +348,8 @@ Developer options (useful when working on Libav itself): --random-seed=VALUE seed value for --enable/disable-random --disable-valgrind-backtrace do not print a backtrace under Valgrind (only applies to --disable-optimizations builds) + --ignore-tests=TESTS comma-separated list (without "fate-" prefix + in the name) of tests whose result is ignored NOTE: Object files are built at the place where configure is launched. EOF @@ -1815,6 +1817,7 @@ CMDLINE_SET=" host_ld host_ldflags host_os +ignore_tests ld logfile malloc_prefix @@ -5192,6 +5195,13 @@ for type in decoder encoder hwaccel parser demuxer muxer protocol filter bsf ind echo done +if test -n "$ignore_tests"; then +ignore_tests=$(echo $ignore_tests | tr ',' ' ') +echo "Ignored FATE tests:" +echo $ignore_tests | print_3_columns +echo +fi + license="LGPL version 2.1 or later" if enabled nonfree; then license="nonfree and unredistributable" @@ -5316,6 +5326,7 @@ SLIB_INSTALL_EXTRA_LIB=${SLIB_INSTALL_EXTRA_LIB} SLIB_INSTALL_EXTRA_SHLIB=${SLIB_INSTALL_EXTRA_SHLIB} VERSION_SCRIPT_POSTPROCESS_CMD=${VERSION_SCRIPT_POSTPROCESS_CMD} SAMPLES:=${samples:-\$(LIBAV_SAMPLES)} +IGNORE_TESTS=$ignore_tests EOF map 'eval echo "${v}_FFLIBS=\$${v}_deps" >> avbuild/config.mak' $LIBRARY_LIST diff --git a/doc/fate.texi b/doc/fate.texi index 9e654e79a1..b1bfa2e7ed 100644 --- a/doc/fate.texi +++ b/doc/fate.texi @@ -139,6 +139,7 @@ workdir=# directory in which to do all the work fate_recv="ssh -T fate@@fate.libav.org" # command to submit report comment=# optional description build_only= # set to "yes" for a compile-only instance that skips tests +ignore_tests= # the following are optional and map to configure options arch= diff --git a/tests/Makefile b/tests/Makefile index 0e475a2836..30e06e8fdd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -143,11 +143,13 @@ endif FATE_UTILS = base64 tiny_psnr +$(addprefix fate-, $(IGNORE_TESTS)): REPORT=ignore + fate: $(FATE) $(FATE): $(FATE_UTILS:%=tests/%$(HOSTEXESUF)) @echo "TEST$(@:fate-%=%)" - $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' '$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)' + $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' '$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)' '$(REPORT)' fate-list: @printf '%s\n' $(sort $(FATE)) diff --git a/tests/fate-run.sh b/tests/fate-run.sh index b1b299a055..27cd6261a5 100755 --- a/tests/fate-run.sh +++ b/tests/fate-run.sh @@ -24,6 +24,7 @@ size_tolerance=${14:-0} cmp_unit=${15:-2} gen=${16:-no} hwaccel=${17:-none} +report_type=${18:-standard} outdir="tests/data/fate" outfile="${outdir}/${test}" @@ -212,13 +213,17 @@ if test -e "$ref" || test $cmp = "oneline" ; then esac cmperr=$? test $err = 0 && err=$cmperr -test $err = 0 || cat $cmpfile +if [ "$report_type" = "ignore" ]; then +test $err = 0 || echo "IGNORE fate-${test}" && err=0 +else +test $err = 0 || cat $cmpfile +fi else echo "reference file '$ref' not found" err=1
[FFmpeg-cvslog] build: remove hardcoded name of version header
ffmpeg | branch: master | Janne Grunau | Mon Sep 12 21:52:01 2016 +0200| [15fcf6292ed79be274c824fedb099c2665f4cc15] | committer: Janne Grunau build: remove hardcoded name of version header Fixes an oversight in 1316df7aa98c4. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=15fcf6292ed79be274c824fedb099c2665f4cc15 --- version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sh b/version.sh index 6f72b2c..4689627 100755 --- a/version.sh +++ b/version.sh @@ -18,7 +18,7 @@ if [ -z "$2" ]; then fi NEW_REVISION="#define LIBAV_VERSION \"$version\"" -OLD_REVISION=$(cat version.h 2> /dev/null) +OLD_REVISION=$(cat "$2" 2> /dev/null) # Update version.h only on revision changes to avoid spurious rebuilds if test "$NEW_REVISION" != "$OLD_REVISION"; then ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: vp9: loop_filter: fix typo in skip flatout8 check
ffmpeg | branch: master | Janne Grunau | Mon Nov 14 01:16:00 2016 +0100| [31756abe29eb039a11c59a42cb12e0cc2aef3b97] | committer: Martin Storsjö aarch64: vp9: loop_filter: fix typo in skip flatout8 check The 16_16 loop filter functions could miss an early exit before flatout8. Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=31756abe29eb039a11c59a42cb12e0cc2aef3b97 --- libavcodec/aarch64/vp9lpf_neon.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index 995a97d..c1b0c88 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -413,7 +413,7 @@ cbz x5, 7f mov x5, v7.d[0] .ifc \sz, .16b -mov x6, v2.d[1] +mov x6, v7.d[1] orr x5, x5, x6 .endif // If no pixels need flat8out, jump to a writeout of the inner 6 pixels ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: vp9: use alternative returns in the core loop filter function
ffmpeg | branch: master | Janne Grunau | Mon Nov 14 00:13:34 2016 +0100| [d7595de0b25e7064fd9e06dea5d0425536cef6dc] | committer: Janne Grunau aarch64: vp9: use alternative returns in the core loop filter function Since aarch64 has enough free general purpose registers use them to branch to the appropiate storage code. 1-2 cycles faster for the functions using loop_filter 8/16, ... on a cortex-a53. Mixed results (up to 2 cycles faster/slower) on a cortex-a57. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d7595de0b25e7064fd9e06dea5d0425536cef6dc --- libavcodec/aarch64/vp9lpf_neon.S | 48 +++- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index c1b0c88..392794b 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -410,15 +410,19 @@ .endif // If no pixels needed flat8in nor flat8out, jump to a // writeout of the inner 4 pixels -cbz x5, 7f +cbnzx5, 1f +br x14 +1: mov x5, v7.d[0] .ifc \sz, .16b mov x6, v7.d[1] orr x5, x5, x6 .endif // If no pixels need flat8out, jump to a writeout of the inner 6 pixels -cbz x5, 8f +cbnzx5, 1f +br x15 +1: // flat8out // This writes all outputs into v2-v17 (skipping v6 and v16). // If this part is skipped, the output is read from v21-v26 (which is the input @@ -549,35 +553,24 @@ endfunc function vp9_loop_filter_8 loop_filter 8, .8b, 0,v16, v17, v18, v19, v28, v29, v30, v31 -mov x5, #0 ret 6: -mov x5, #6 -ret +br x13 9: br x10 endfunc function vp9_loop_filter_8_16b_mix loop_filter 8, .16b, 88, v16, v17, v18, v19, v28, v29, v30, v31 -mov x5, #0 ret 6: -mov x5, #6 -ret +br x13 9: br x10 endfunc function vp9_loop_filter_16 loop_filter 16, .8b, 0,v8, v9, v10, v11, v12, v13, v14, v15 -mov x5, #0 -ret -7: -mov x5, #7 -ret -8: -mov x5, #8 ret 9: ldp d8, d9, [sp], 0x10 @@ -589,13 +582,6 @@ endfunc function vp9_loop_filter_16_16b loop_filter 16, .16b, 0,v8, v9, v10, v11, v12, v13, v14, v15 -mov x5, #0 -ret -7: -mov x5, #7 -ret -8: -mov x5, #8 ret 9: ldp d8, d9, [sp], 0x10 @@ -614,11 +600,14 @@ endfunc .endm .macro loop_filter_8 +// calculate alternative 'return' targets +adr x13, 6f bl vp9_loop_filter_8 -cbnzx5, 6f .endm .macro loop_filter_8_16b_mix mix +// calculate alternative 'return' targets +adr x13, 6f .if \mix == 48 mov x11, #0x .elseif \mix == 84 @@ -627,21 +616,20 @@ endfunc mov x11, #0x .endif bl vp9_loop_filter_8_16b_mix -cbnzx5, 6f .endm .macro loop_filter_16 +// calculate alternative 'return' targets +adr x14, 7f +adr x15, 8f bl vp9_loop_filter_16 -cmp x5, 7 -b.gt8f -b.eq7f .endm .macro loop_filter_16_16b +// calculate alternative 'return' targets +adr x14, 7f +adr x15, 8f bl vp9_loop_filter_16_16b -cmp x5, 7 -b.gt8f -b.eq7f .endm ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne};
ffmpeg | branch: master | Janne Grunau | Mon Nov 14 22:35:13 2016 +0100| [e7ae8f7a715843a5089d18e033afb3ee19ab3057] | committer: Janne Grunau aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne}; The latter is 1 cycle faster on a cortex-53 and since the operands are bytewise (or larger) bitmask (impossible to overflow to zero) both are equivalent. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e7ae8f7a715843a5089d18e033afb3ee19ab3057 --- libavcodec/aarch64/vp9lpf_neon.S | 31 --- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index 392794b..e9c7d9e 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -218,13 +218,15 @@ xtn_sz v5, v6.8h, v7.8h, \sz and v4\sz, v4\sz, v5\sz // fm +// If no pixels need filtering, just exit as soon as possible mov x5, v4.d[0] .ifc \sz, .16b mov x6, v4.d[1] -orr x5, x5, x6 -.endif -// If no pixels need filtering, just exit as soon as possible +addsx5, x5, x6 +b.eq9f +.else cbz x5, 9f +.endif .if \wd >= 8 moviv0\sz, #1 @@ -344,15 +346,17 @@ bit v22\sz, v0\sz, v5\sz // if (!hev && fm && !flat8in) bit v25\sz, v2\sz, v5\sz +// If no pixels need flat8in, jump to flat8out +// (or to a writeout of the inner 4 pixels, for wd=8) .if \wd >= 8 mov x5, v6.d[0] .ifc \sz, .16b mov x6, v6.d[1] -orr x5, x5, x6 -.endif -// If no pixels need flat8in, jump to flat8out -// (or to a writeout of the inner 4 pixels, for wd=8) +addsx5, x5, x6 +b.eq6f +.else cbz x5, 6f +.endif // flat8in uaddl_sz\tmp1\().8h, \tmp2\().8h, v20, v21, \sz @@ -406,20 +410,25 @@ mov x5, v2.d[0] .ifc \sz, .16b mov x6, v2.d[1] -orr x5, x5, x6 +adds x5, x5, x6 +b.ne1f +.else +cbnzx5, 1f .endif // If no pixels needed flat8in nor flat8out, jump to a // writeout of the inner 4 pixels -cbnzx5, 1f br x14 1: + mov x5, v7.d[0] .ifc \sz, .16b mov x6, v7.d[1] -orr x5, x5, x6 +adds x5, x5, x6 +b.ne1f +.else +cbnzx5, 1f .endif // If no pixels need flat8out, jump to a writeout of the inner 6 pixels -cbnzx5, 1f br x15 1: ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: vp9itxfm: Simplify the stack alignment code
ffmpeg | branch: master | Janne Grunau | Fri Nov 18 09:36:59 2016 +0200| [e5b0fc170f85b00f7dd0ac514918fb5c95253d39] | committer: Martin Storsjö arm: vp9itxfm: Simplify the stack alignment code This is one instruction less for thumb, and only have got 1/2 arm/thumb specific instructions. Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e5b0fc170f85b00f7dd0ac514918fb5c95253d39 --- libavcodec/arm/vp9itxfm_neon.S | 28 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index cdb43b5..5d73d84 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -791,15 +791,13 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1 .ifnc \txfm1\()_\txfm2,idct_idct vpush {q4-q7} .endif -mov r7, sp @ Align the stack, allocate a temp buffer -T mov r12, sp -T bic r12, r12, #15 -T sub r12, r12, #512 -T mov sp, r12 -A bic sp, sp, #15 -A sub sp, sp, #512 +T mov r7, sp +T and r7, r7, #15 +A and r7, sp, #15 +add r7, r7, #512 +sub sp, sp, r7 mov r4, r0 mov r5, r1 @@ -828,7 +826,7 @@ A sub sp, sp, #512 bl \txfm2\()16_1d_4x16_pass2_neon .endr -mov sp, r7 +add sp, sp, r7 .ifnc \txfm1\()_\txfm2,idct_idct vpop{q4-q7} .endif @@ -1117,15 +1115,13 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1 beq idct32x32_dc_add_neon push{r4-r7,lr} vpush {q4-q7} -mov r7, sp @ Align the stack, allocate a temp buffer -T mov r12, sp -T bic r12, r12, #15 -T sub r12, r12, #2048 -T mov sp, r12 -A bic sp, sp, #15 -A sub sp, sp, #2048 +T mov r7, sp +T and r7, r7, #15 +A and r7, sp, #15 +add r7, r7, #2048 +sub sp, sp, r7 mov r4, r0 mov r5, r1 @@ -1143,7 +1139,7 @@ A sub sp, sp, #2048 bl idct32_1d_4x32_pass2_neon .endr -mov sp, r7 +add sp, sp, r7 vpop{q4-q7} pop {r4-r7,pc} endfunc ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: warn/error on movrelx usage problematic with PIC on ELF
ffmpeg | branch: master | Janne Grunau | Fri Nov 18 21:06:40 2016 +0100| [6a1ea4ec932f4fc9fdc00ec51ee070b298ddb35f] | committer: Janne Grunau arm: warn/error on movrelx usage problematic with PIC on ELF The warning has false positives but our asm does not trigger it. For new code false positives can only be avoided by changing the register allocation. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6a1ea4ec932f4fc9fdc00ec51ee070b298ddb35f --- libavutil/arm/asm.S | 9 + 1 file changed, 9 insertions(+) diff --git a/libavutil/arm/asm.S b/libavutil/arm/asm.S index 4ac0ea2..a791e80 100644 --- a/libavutil/arm/asm.S +++ b/libavutil/arm/asm.S @@ -184,6 +184,15 @@ T ldr \rd, [\rd] .endm .macro movrelx rd, val, gp +.ifc \rd,\gp +.error "movrelx needs two distinct registers" +.endif +.ifc \rd\()_\gp,r12_ +.warning"movrelx rd=\rd without explicit set gp" +.endif +.ifc \rd\()_\gp,ip_ +.warning"movrelx rd=\rd without explicit set gp" +.endif #if CONFIG_PIC && defined(__ELF__) .ifnb \gp .if .Lpic_gp ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm64: replace 'bic' with immediate with 'and' with inverted immediate
ffmpeg | branch: master | Janne Grunau | Thu Dec 8 20:40:34 2016 +0100| [2425d7329fdccfa9954faba748f3865151354f0c] | committer: Janne Grunau arm64: replace 'bic' with immediate with 'and' with inverted immediate The former is not an official pseudo instruction although gas and llvm's internal assembler support it. Fixes a build error with xcode 6.2 reported by Memphiz on github. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2425d7329fdccfa9954faba748f3865151354f0c --- libavcodec/aarch64/synth_filter_neon.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/aarch64/synth_filter_neon.S b/libavcodec/aarch64/synth_filter_neon.S index 9551bff8e3..b001c737da 100644 --- a/libavcodec/aarch64/synth_filter_neon.S +++ b/libavcodec/aarch64/synth_filter_neon.S @@ -50,7 +50,7 @@ function ff_synth_filter_float_neon, export=1 add x1, x1, x7, lsl #2 // synth_buf sub w8, w7, #32 stp x5, x1, [sp, #16] -bic x7, x7, #63 +and x7, x7, #~63 and w8, w8, #511 stp x7, x30, [sp, #32] str w8, [x2] ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] vp8/armv6: mc: avoid boolean expression in calculation
ffmpeg | branch: master | Janne Grunau | Sat Jul 9 15:30:34 2016 +0200| [5f74bd31a9bd1ac7655103b11743c12d38e0419f] | committer: Janne Grunau vp8/armv6: mc: avoid boolean expression in calculation GNU as evaluates true as '-1' while Apple's variant and llvm's internal assembler evaluate it as '1'. The best way to avoid this madness is to eliminate boolean expressions instead of trying to fix it with preprocessor directives. Use a direct formula to calculate the required temporary space on the stack in ff_put_vp8_{epel,bilin}{4,8,16}_h[246]v[246]_armv6(). Fixes a checkasm segfault in vp8dsp.mc when using llvm's internal assembler for a non-Apple target. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5f74bd31a9bd1ac7655103b11743c12d38e0419f --- libavcodec/arm/vp8dsp_armv6.S | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/libavcodec/arm/vp8dsp_armv6.S b/libavcodec/arm/vp8dsp_armv6.S index 4e83fe1..565361e 100644 --- a/libavcodec/arm/vp8dsp_armv6.S +++ b/libavcodec/arm/vp8dsp_armv6.S @@ -1226,13 +1226,8 @@ vp8_mc_1bilin, 8, v vp8_mc_1bilin, 4, h vp8_mc_1bilin, 4, v -/* True relational expressions have the value -1 in the GNU assembler, - +1 in Apple's. */ -#ifdef __APPLE__ -# define TMPSIZE \size * (8 + 8*(\size > 4) + \ytaps - 1) -#else -# define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1) -#endif +@ 4 and 8 pixel wide mc blocks might have height of 8 or 16 lines +#define TMPSIZE \size * (16 / ((16 / \size + 1) / 2) + \ytaps - 1) .macro vp8_mc_hv name, size, h, v, ytaps function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] doc: escape left brace in texi2pod.pl regex
ffmpeg | branch: master | Janne Grunau | Fri May 6 13:32:06 2016 +0200| [fc5cdc0d5372f5103c71d5dede296734fe71ead2] | committer: Janne Grunau doc: escape left brace in texi2pod.pl regex Unescaped literal left braces are deprecated and a warning was added in Perl 5.22. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fc5cdc0d5372f5103c71d5dede296734fe71ead2 --- doc/texi2pod.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/texi2pod.pl b/doc/texi2pod.pl index e4eb61c..934c1a2 100755 --- a/doc/texi2pod.pl +++ b/doc/texi2pod.pl @@ -380,7 +380,7 @@ sub postprocess # @* is also impossible in .pod; we discard it and any newline that # follows it. Similarly, our macro @gol must be discarded. -s/\@anchor{(?:[^\}]*)\}//g; +s/\@anchor\{(?:[^\}]*)\}//g; s/\(?\@xref\{(?:[^\}]*)\}(?:[^.<]|(?:<[^<>]*>))*\.\)?//g; s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g; s/;\s+\@pxref\{(?:[^\}]*)\}//g; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm: vp8: mc: test unequal width/height for partitions
ffmpeg | branch: master | Janne Grunau | Sun Jul 10 00:32:12 2016 +0200| [ec32574209f36467ef0d22c21a7e811ba98c15b6] | committer: Janne Grunau checkasm: vp8: mc: test unequal width/height for partitions > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ec32574209f36467ef0d22c21a7e811ba98c15b6 --- tests/checkasm/vp8dsp.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/checkasm/vp8dsp.c b/tests/checkasm/vp8dsp.c index 3fe09c5..9733ff7 100644 --- a/tests/checkasm/vp8dsp.c +++ b/tests/checkasm/vp8dsp.c @@ -268,15 +268,17 @@ static void check_mc(void) LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]); LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]); VP8DSPContext d; -int type, hsize, dx, dy; +int type, k, dx, dy; declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, ptrdiff_t, int, int, int); ff_vp78dsp_init(&d); for (type = 0; type < 2; type++) { vp8_mc_func (*tab)[3][3] = type ? d.put_vp8_bilinear_pixels_tab : d.put_vp8_epel_pixels_tab; -for (hsize = 0; hsize < 3; hsize++) { -int size = 16 >> hsize; +for (k = 1; k < 8; k++) { +int hsize = k / 3; +int size = 16 >> hsize; +int height = (size << 1) >> (k % 3); for (dy = 0; dy < 3; dy++) { for (dx = 0; dx < 3; dx++) { char str[100]; @@ -309,11 +311,11 @@ static void check_mc(void) src[i ] = val; src[i * SRC_BUF_STRIDE] = val; } -call_ref(dst0, size, src, SRC_BUF_STRIDE, size, mx, my); -call_new(dst1, size, src, SRC_BUF_STRIDE, size, mx, my); -if (memcmp(dst0, dst1, size * size)) +call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, my); +call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my); +if (memcmp(dst0, dst1, size * height)) fail(); -bench_new(dst1, size, src, SRC_BUF_STRIDE, size, mx, my); +bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my); } } } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm/arm: align the clobber check data properly for ldrd
ffmpeg | branch: master | Janne Grunau | Sun Jul 10 13:23:13 2016 +0200| [8c816c0c9b12fdefd9046415e97df299880bc9b8] | committer: Janne Grunau checkasm/arm: align the clobber check data properly for ldrd Should fix the SIGBUS in the armv7-linux-clang-3.7 fate target. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8c816c0c9b12fdefd9046415e97df299880bc9b8 --- tests/checkasm/arm/checkasm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S index 160dba4..199c96a 100644 --- a/tests/checkasm/arm/checkasm.S +++ b/tests/checkasm/arm/checkasm.S @@ -22,7 +22,7 @@ #include "libavutil/arm/asm.S" -const register_init +const register_init, align=3 .quad 0x21f86d66c8ca00ce .quad 0x75b6ba21077c48ad .quad 0xed56bb2dcb3c7736 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] h2645_parse: handle embedded Annex B NAL units in size prefixed NAL units
ffmpeg | branch: master | Janne Grunau | Thu Jul 7 20:33:01 2016 +0200| [17c99b6158f2c6720af74e81ee727ee50d2e7e96] | committer: Janne Grunau h2645_parse: handle embedded Annex B NAL units in size prefixed NAL units Fixes a regression in ca2f19b9cc3 with some mov/mp4 files. The files have several NAL units in the supposed single NAL unit after the size field. Annex B start code prefixes are used to separate them. The first NAL unit is correctly parsed but the buffer does not point to the next size field. Instead semi random data (it seems to be the rbsp_stop_one_bit and the start code prefix) is then parsed as length and will exceed the remaining length of the buffer. Patch based on the code in h264's decode_nal_units() and a similar patch by Hendrik Leppkes in FFmpeg (a9bb4cf87d1). Bug-Id: ffmpeg/trac5529 Reported-By: Vittorio Giovara > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=17c99b6158f2c6720af74e81ee727ee50d2e7e96 --- libavcodec/h2645_parse.c | 63 +++- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c index defe001..e252efa 100644 --- a/libavcodec/h2645_parse.c +++ b/libavcodec/h2645_parse.c @@ -195,11 +195,27 @@ static int h264_parse_nal_header(H2645NAL *nal, void *logctx) return 1; } +static int find_next_start_code(const uint8_t *buf, const uint8_t *next_avc) +{ +int i = 0; + +if (buf + 3 >= next_avc) +return next_avc - buf; + +while (buf + i + 3 < next_avc) { +if (buf[i] == 0 && buf[i + 1] == 0 && buf[i + 2] == 1) +break; +i++; +} +return i + 3; +} + int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length, void *logctx, int is_nalff, int nal_length_size, enum AVCodecID codec_id) { int consumed, ret = 0; +const uint8_t *next_avc = buf + (is_nalff ? 0 : length); pkt->nb_nals = 0; while (length >= 4) { @@ -207,29 +223,52 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length, int extract_length = 0; int skip_trailing_zeros = 1; -if (is_nalff) { +/* + * Only parse an AVC1 length field if one is expected at the current + * buffer position. There are unfortunately streams with multiple + * NAL units covered by the length field. Those NAL units are delimited + * by Annex B start code prefixes. ff_h2645_extract_rbsp() detects it + * correctly and consumes only the first NAL unit. The additional NAL + * units are handled here in the Annex B parsing code. + */ +if (buf == next_avc) { int i; for (i = 0; i < nal_length_size; i++) extract_length = (extract_length << 8) | buf[i]; -buf+= nal_length_size; -length -= nal_length_size; if (extract_length > length) { av_log(logctx, AV_LOG_ERROR, "Invalid NAL unit size.\n"); return AVERROR_INVALIDDATA; } +buf += nal_length_size; +length -= nal_length_size; +// keep track of the next AVC1 length field +next_avc = buf + extract_length; } else { -if (buf[2] == 0) { -length--; -buf++; +/* + * expected to return immediately except for streams with mixed + * NAL unit coding + */ +int buf_index = find_next_start_code(buf, next_avc); + +buf+= buf_index; +length -= buf_index; + +/* + * break if an AVC1 length field is expected at the current buffer + * position + */ +if (buf == next_avc) continue; -} -if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) -return AVERROR_INVALIDDATA; -buf += 3; -length-= 3; -extract_length = length; +if (length > 0) { +extract_length = length; +} else if (pkt->nb_nals == 0) { +av_log(logctx, AV_LOG_ERROR, "No NAL unit found\n"); +return AVERROR_INVALIDDATA; +} else { +break; +} } if (pkt->nals_allocated < pkt->nb_nals + 1) { ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm: vp8.mc: initialize the full src buffer after ec32574209f
ffmpeg | branch: master | Janne Grunau | Tue Jul 12 21:31:57 2016 +0200| [80fbb7becae530167373fe5178966b7d7604306e] | committer: Janne Grunau checkasm: vp8.mc: initialize the full src buffer after ec32574209f Fixes "Use of uninitialised value" valgrind warnings in checkasm. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=80fbb7becae530167373fe5178966b7d7604306e --- tests/checkasm/vp8dsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/checkasm/vp8dsp.c b/tests/checkasm/vp8dsp.c index 9733ff7..0260d63 100644 --- a/tests/checkasm/vp8dsp.c +++ b/tests/checkasm/vp8dsp.c @@ -247,7 +247,7 @@ static void check_luma_dc_wht(void) } #define SRC_BUF_STRIDE 32 -#define SRC_BUF_SIZE ((size + 5) * SRC_BUF_STRIDE) +#define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE) // The mc subpixel interpolation filter needs the 2 previous pixels in either // direction, the +1 is to make sure the actual load addresses always are // unaligned. ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm/arm: preserve the stack alignment checkasm_checked_call
ffmpeg | branch: master | Janne Grunau | Tue Jul 12 22:46:43 2016 +0200| [7b1ae0e73ab7f7c5eabc70dbe2e579127c6e154f] | committer: Janne Grunau checkasm/arm: preserve the stack alignment checkasm_checked_call The stack used by checkasm_checked_call_vfp was a multiple of 4 when the checked function is called. AAPCS requires a double word (8 byte) aligned stack public interfaces. Since both calls are public interfaces the stack is misaligned when the checked is called. Might fix the SIGBUS error in the armv7-linux-clang-3.7 fate config. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7b1ae0e73ab7f7c5eabc70dbe2e579127c6e154f --- tests/checkasm/arm/checkasm.S | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S index 199c96a..f004af3 100644 --- a/tests/checkasm/arm/checkasm.S +++ b/tests/checkasm/arm/checkasm.S @@ -42,6 +42,9 @@ endconst #define ARG_STACK 4*(MAX_ARGS - 2) +@ align the used stack space to 8 to preserve the stack alignment +#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed) + .macro clobbercheck variant .equ pushed, 4*9 function checkasm_checked_call_\variant, export=1 @@ -59,10 +62,10 @@ function checkasm_checked_call_\variant, export=1 .endif ldm r12, {r4-r11} -sub sp, sp, #ARG_STACK +sub sp, sp, #ARG_STACK_A .equ pos, 0 .rept MAX_ARGS-2 -ldr r12, [sp, #ARG_STACK + pushed + 8 + pos] +ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos] str r12, [sp, #pos] .equ pos, pos + 4 .endr @@ -70,9 +73,9 @@ function checkasm_checked_call_\variant, export=1 mov r12, r0 mov r0, r2 mov r1, r3 -ldrdr2, r3, [sp, #ARG_STACK + pushed] +ldrdr2, r3, [sp, #ARG_STACK_A + pushed] blx r12 -add sp, sp, #ARG_STACK +add sp, sp, #ARG_STACK_A push{r0, r1} movrel r12, register_init ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne};
ffmpeg | branch: master | Janne Grunau | Tue Jan 10 00:15:08 2017 +0200| [cb220eeef9bfe889769dc4e08248b0a59d24e2a9] | committer: Michael Niedermayer aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne}; The latter is 1 cycle faster on a cortex-53 and since the operands are bytewise (or larger) bitmask (impossible to overflow to zero) both are equivalent. This is cherrypicked from libav commit e7ae8f7a715843a5089d18e033afb3ee19ab3057. Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cb220eeef9bfe889769dc4e08248b0a59d24e2a9 --- libavcodec/aarch64/vp9lpf_neon.S | 31 --- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index 78aae61..55e1964 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -218,13 +218,15 @@ xtn_sz v5, v6.8h, v7.8h, \sz and v4\sz, v4\sz, v5\sz // fm +// If no pixels need filtering, just exit as soon as possible mov x5, v4.d[0] .ifc \sz, .16b mov x6, v4.d[1] -orr x5, x5, x6 -.endif -// If no pixels need filtering, just exit as soon as possible +addsx5, x5, x6 +b.eq9f +.else cbz x5, 9f +.endif .if \wd >= 8 moviv0\sz, #1 @@ -344,15 +346,17 @@ bit v22\sz, v0\sz, v5\sz // if (!hev && fm && !flat8in) bit v25\sz, v2\sz, v5\sz +// If no pixels need flat8in, jump to flat8out +// (or to a writeout of the inner 4 pixels, for wd=8) .if \wd >= 8 mov x5, v6.d[0] .ifc \sz, .16b mov x6, v6.d[1] -orr x5, x5, x6 -.endif -// If no pixels need flat8in, jump to flat8out -// (or to a writeout of the inner 4 pixels, for wd=8) +addsx5, x5, x6 +b.eq6f +.else cbz x5, 6f +.endif // flat8in uaddl_sz\tmp1\().8h, \tmp2\().8h, v20, v21, \sz @@ -406,20 +410,25 @@ mov x5, v2.d[0] .ifc \sz, .16b mov x6, v2.d[1] -orr x5, x5, x6 +adds x5, x5, x6 +b.ne1f +.else +cbnzx5, 1f .endif // If no pixels needed flat8in nor flat8out, jump to a // writeout of the inner 4 pixels -cbnzx5, 1f br x14 1: + mov x5, v7.d[0] .ifc \sz, .16b mov x6, v7.d[1] -orr x5, x5, x6 +adds x5, x5, x6 +b.ne1f +.else +cbnzx5, 1f .endif // If no pixels need flat8out, jump to a writeout of the inner 6 pixels -cbnzx5, 1f br x15 1: ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: vp9: use alternative returns in the core loop filter function
ffmpeg | branch: master | Janne Grunau | Tue Jan 10 00:15:07 2017 +0200| [62ea07d797c503bc4b727e56d9c0f914a93c8ef6] | committer: Michael Niedermayer aarch64: vp9: use alternative returns in the core loop filter function Since aarch64 has enough free general purpose registers use them to branch to the appropiate storage code. 1-2 cycles faster for the functions using loop_filter 8/16, ... on a cortex-a53. Mixed results (up to 2 cycles faster/slower) on a cortex-a57. This is cherrypicked from libav commit d7595de0b25e7064fd9e06dea5d0425536cef6dc. Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=62ea07d797c503bc4b727e56d9c0f914a93c8ef6 --- libavcodec/aarch64/vp9lpf_neon.S | 48 +++- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index e727a4d..78aae61 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -410,15 +410,19 @@ .endif // If no pixels needed flat8in nor flat8out, jump to a // writeout of the inner 4 pixels -cbz x5, 7f +cbnzx5, 1f +br x14 +1: mov x5, v7.d[0] .ifc \sz, .16b mov x6, v7.d[1] orr x5, x5, x6 .endif // If no pixels need flat8out, jump to a writeout of the inner 6 pixels -cbz x5, 8f +cbnzx5, 1f +br x15 +1: // flat8out // This writes all outputs into v2-v17 (skipping v6 and v16). // If this part is skipped, the output is read from v21-v26 (which is the input @@ -549,35 +553,24 @@ endfunc function vp9_loop_filter_8 loop_filter 8, .8b, 0,v16, v17, v18, v19, v28, v29, v30, v31 -mov x5, #0 ret 6: -mov x5, #6 -ret +br x13 9: br x10 endfunc function vp9_loop_filter_8_16b_mix loop_filter 8, .16b, 88, v16, v17, v18, v19, v28, v29, v30, v31 -mov x5, #0 ret 6: -mov x5, #6 -ret +br x13 9: br x10 endfunc function vp9_loop_filter_16 loop_filter 16, .8b, 0,v8, v9, v10, v11, v12, v13, v14, v15 -mov x5, #0 -ret -7: -mov x5, #7 -ret -8: -mov x5, #8 ret 9: ldp d8, d9, [sp], 0x10 @@ -589,13 +582,6 @@ endfunc function vp9_loop_filter_16_16b loop_filter 16, .16b, 0,v8, v9, v10, v11, v12, v13, v14, v15 -mov x5, #0 -ret -7: -mov x5, #7 -ret -8: -mov x5, #8 ret 9: ldp d8, d9, [sp], 0x10 @@ -614,11 +600,14 @@ endfunc .endm .macro loop_filter_8 +// calculate alternative 'return' targets +adr x13, 6f bl vp9_loop_filter_8 -cbnzx5, 6f .endm .macro loop_filter_8_16b_mix mix +// calculate alternative 'return' targets +adr x13, 6f .if \mix == 48 mov x11, #0x .elseif \mix == 84 @@ -627,21 +616,20 @@ endfunc mov x11, #0x .endif bl vp9_loop_filter_8_16b_mix -cbnzx5, 6f .endm .macro loop_filter_16 +// calculate alternative 'return' targets +adr x14, 7f +adr x15, 8f bl vp9_loop_filter_16 -cmp x5, 7 -b.gt8f -b.eq7f .endm .macro loop_filter_16_16b +// calculate alternative 'return' targets +adr x14, 7f +adr x15, 8f bl vp9_loop_filter_16_16b -cmp x5, 7 -b.gt8f -b.eq7f .endm ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: vp9itxfm: Simplify the stack alignment code
ffmpeg | branch: master | Janne Grunau | Tue Jan 10 00:15:09 2017 +0200| [a71cd8439fd32fd83b7a9b9ac8d6f861846770c7] | committer: Michael Niedermayer arm: vp9itxfm: Simplify the stack alignment code This is one instruction less for thumb, and only have got 1/2 arm/thumb specific instructions. This is cherrypicked from libav commit e5b0fc170f85b00f7dd0ac514918fb5c95253d39. Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a71cd8439fd32fd83b7a9b9ac8d6f861846770c7 --- libavcodec/arm/vp9itxfm_neon.S | 28 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index 06470a3..d7a2654 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -791,15 +791,13 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1 .ifnc \txfm1\()_\txfm2,idct_idct vpush {q4-q7} .endif -mov r7, sp @ Align the stack, allocate a temp buffer -T mov r12, sp -T bic r12, r12, #15 -T sub r12, r12, #512 -T mov sp, r12 -A bic sp, sp, #15 -A sub sp, sp, #512 +T mov r7, sp +T and r7, r7, #15 +A and r7, sp, #15 +add r7, r7, #512 +sub sp, sp, r7 mov r4, r0 mov r5, r1 @@ -828,7 +826,7 @@ A sub sp, sp, #512 bl \txfm2\()16_1d_4x16_pass2_neon .endr -mov sp, r7 +add sp, sp, r7 .ifnc \txfm1\()_\txfm2,idct_idct vpop{q4-q7} .endif @@ -1117,15 +1115,13 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1 beq idct32x32_dc_add_neon push{r4-r7,lr} vpush {q4-q7} -mov r7, sp @ Align the stack, allocate a temp buffer -T mov r12, sp -T bic r12, r12, #15 -T sub r12, r12, #2048 -T mov sp, r12 -A bic sp, sp, #15 -A sub sp, sp, #2048 +T mov r7, sp +T and r7, r7, #15 +A and r7, sp, #15 +add r7, r7, #2048 +sub sp, sp, r7 mov r4, r0 mov r5, r1 @@ -1143,7 +1139,7 @@ A sub sp, sp, #2048 bl idct32_1d_4x32_pass2_neon .endr -mov sp, r7 +add sp, sp, r7 vpop{q4-q7} pop {r4-r7,pc} endfunc ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm: arm: report the first clobbered register in checkasm_checked_call
ffmpeg | branch: master | Janne Grunau | Thu Jul 14 23:16:14 2016 +0200| [71a0472114574993df7035f4de9aa007e03817b8] | committer: Janne Grunau checkasm: arm: report the first clobbered register in checkasm_checked_call > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=71a0472114574993df7035f4de9aa007e03817b8 --- tests/checkasm/arm/checkasm.S | 64 ++- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S index f004af3..989f613 100644 --- a/tests/checkasm/arm/checkasm.S +++ b/tests/checkasm/arm/checkasm.S @@ -33,8 +33,12 @@ const register_init, align=3 .quad 0x249214109d5d1c88 endconst -const error_message -.asciz "failed to preserve register" +const error_message_fpscr +.asciz "failed to preserve register FPSCR" +const error_message_gpr +.asciz "failed to preserve register r%d" +const error_message_vfp +.asciz "failed to preserve register d%d" endconst @ max number of args used by any asm function. @@ -79,39 +83,42 @@ function checkasm_checked_call_\variant, export=1 push{r0, r1} movrel r12, register_init -mov r3, #0 .ifc \variant, vfp -.macro check_reg_vfp, dreg, inc=8 -ldrdr0, r1, [r12], #\inc -vmovr2, lr, \dreg -eor r0, r0, r2 -eor r1, r1, lr -orr r3, r3, r0 -orr r3, r3, r1 +.macro check_reg_vfp, dreg, offset +vldrd0, [r12, #8 * (\offset)] +veord0, d0, \dreg +vmovr2, r3, d0 +orrsr2, r2, r3 +bne 4f .endm -.irp n, 8, 9, 10, 11, 12, 13, 14 -check_reg_vfp d\n +.irp n, 8, 9, 10, 11, 12, 13, 14, 15 +@ keep track of the checked double/SIMD register +mov r1, #\n +check_reg_vfp d\n, \n-8 .endr -check_reg_vfp d15, -56 .purgem check_reg_vfp fmrxr0, FPSCR -ldr r1, [sp, #8] -eor r0, r0, r1 +ldr r3, [sp, #8] +eor r0, r0, r3 @ Ignore changes in the topmost 5 bits -lsl r0, r0, #5 -orr r3, r3, r0 +lslsr0, r0, #5 +bne 3f .endif +@ keep track of the checked GPR +mov r1, #4 .macro check_reg reg1, reg2= -ldrdr0, r1, [r12], #8 -eor r0, r0, \reg1 -orrsr3, r3, r0 +ldrdr2, r3, [r12], #8 +eorsr2, r2, \reg1 +bne 2f +add r1, r1, #1 .ifnb \reg2 -eor r1, r1, \reg2 -orrsr3, r3, r1 +eorsr3, r3, \reg2 +bne 2f .endif +add r1, r1, #1 .endm check_reg r4, r5 check_reg r6, r7 @@ -124,9 +131,16 @@ function checkasm_checked_call_\variant, export=1 check_reg r10, r11 .purgem check_reg -beq 0f - -movrel r0, error_message +b 0f +4: +movrel r0, error_message_vfp +b 1f +3: +movrel r0, error_message_fpscr +b 1f +2: +movrel r0, error_message_gpr +1: blx X(checkasm_fail_func) 0: pop {r0, r1} ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] cheackasm/arm: remove NEON instructions from checkasm_checked_call_vfp
ffmpeg | branch: master | Janne Grunau | Sun Jul 17 10:43:10 2016 +0200| [59aeed93e4e928b884be72b8c267ff6b2785ab66] | committer: Janne Grunau cheackasm/arm: remove NEON instructions from checkasm_checked_call_vfp Fixes AS error on non NEON builds introduced in 71a04721145. Also set the fpu directly to vfp in checkasm.S to cause build errors on NEON builds. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=59aeed93e4e928b884be72b8c267ff6b2785ab66 --- tests/checkasm/arm/checkasm.S | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S index 5aa92c5..2768bb3 100644 --- a/tests/checkasm/arm/checkasm.S +++ b/tests/checkasm/arm/checkasm.S @@ -22,6 +22,12 @@ #include "libavutil/arm/asm.S" +/* override fpu so that NEON instructions are rejected */ +#if HAVE_VFP +.fpuvfp +ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch +#endif + const register_init, align=3 .quad 0x21f86d66c8ca00ce .quad 0x75b6ba21077c48ad @@ -85,9 +91,10 @@ function checkasm_checked_call_\variant, export=1 movrel r12, register_init .ifc \variant, vfp .macro check_reg_vfp, dreg, offset -vldrd0, [r12, #8 * (\offset)] -veord0, d0, \dreg -vmovr2, r3, d0 +ldrdr2, r3, [r12, #8 * (\offset)] +vmovr0, lr, \dreg +eor r2, r2, r0 +eor r3, r3, lr orrsr2, r2, r3 bne 4f .endm ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] h264/aarch64: sign extend int stride in loop filter asm
ffmpeg | branch: master | Janne Grunau | Tue Jan 1 18:16:33 2019 +0100| [bb515e3a735f526ccb1068031e289eb5aeb69e22] | committer: Janne Grunau h264/aarch64: sign extend int stride in loop filter asm > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bb515e3a735f526ccb1068031e289eb5aeb69e22 --- libavcodec/aarch64/h264dsp_neon.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libavcodec/aarch64/h264dsp_neon.S b/libavcodec/aarch64/h264dsp_neon.S index 9b4610a4d4..60ffa24500 100644 --- a/libavcodec/aarch64/h264dsp_neon.S +++ b/libavcodec/aarch64/h264dsp_neon.S @@ -130,6 +130,7 @@ endfunc function ff_h264_h_loop_filter_luma_neon, export=1 h264_loop_filter_start +sxtwx1, w1 sub x0, x0, #4 ld1 {v6.8B}, [x0], x1 @@ -210,6 +211,7 @@ endfunc function ff_h264_v_loop_filter_chroma_neon, export=1 h264_loop_filter_start +sxtwx1, w1 sub x0, x0, x1, lsl #1 ld1 {v18.8B}, [x0], x1 @@ -228,6 +230,7 @@ endfunc function ff_h264_h_loop_filter_chroma_neon, export=1 h264_loop_filter_start +sxtwx1, w1 sub x0, x0, #2 ld1 {v18.S}[0], [x0], x1 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm/h264: add loop filter tests
ffmpeg | branch: master | Janne Grunau | Tue Jan 1 18:25:55 2019 +0100| [d7f4f5c4a18a0c9e62635cfa6fe8a9302b413c01] | committer: Janne Grunau checkasm/h264: add loop filter tests > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d7f4f5c4a18a0c9e62635cfa6fe8a9302b413c01 --- tests/checkasm/h264dsp.c | 124 +++ 1 file changed, 124 insertions(+) diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c index f355a72a74..706fc79397 100644 --- a/tests/checkasm/h264dsp.c +++ b/tests/checkasm/h264dsp.c @@ -28,6 +28,7 @@ #include "libavutil/intreadwrite.h" static const uint32_t pixel_mask[3] = { 0x, 0x01ff01ff, 0x03ff03ff }; +static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f }; #define SIZEOF_PIXEL ((bit_depth + 7) / 8) #define SIZEOF_COEF (2 * ((bit_depth + 7) / 8)) @@ -312,9 +313,132 @@ static void check_idct_multiple(void) } } + +static void check_loop_filter(void) +{ +LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]); +LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]); +LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]); +H264DSPContext h; +int bit_depth; +int alphas[36], betas[36]; +int8_t tc0[36][4]; + +declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, int stride, + int alpha, int beta, int8_t *tc0); + +for (bit_depth = 8; bit_depth <= 10; bit_depth++) { +int i, j, a, c; +uint32_t mask = pixel_mask_lf[bit_depth - 8]; +ff_h264dsp_init(&h, bit_depth, 1); +for (i = 35, a = 255, c = 250; i >= 0; i--) { +alphas[i] = a << (bit_depth - 8); +betas[i] = (i + 1) / 2 << (bit_depth - 8); +tc0[i][0] = tc0[i][3] = (c + 6) / 10; +tc0[i][1] = (c + 7) / 15; +tc0[i][2] = (c + 9) / 20; +a = a*9/10; +c = c*9/10; +} + +#define CHECK_LOOP_FILTER(name, align, ...) \ +do {\ +if (check_func(h.name, #name "_%dbpp", bit_depth)) {\ +for (j = 0; j < 36; j++) { \ +intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \ +for (i = 0; i < 1024; i+=4) { \ +AV_WN32A(dst + i, rnd() & mask);\ +} \ +memcpy(dst0, dst, 32 * 16 * 2); \ +memcpy(dst1, dst, 32 * 16 * 2); \ +\ +call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \ +call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \ +if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \ +fprintf(stderr, #name ": j:%d, alpha:%d beta:%d " \ +"tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \ +tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \ +fail(); \ +} \ +bench_new(dst1, 32, alphas[j], betas[j], tc0[j]); \ +} \ +} \ +} while (0) + +CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1); +CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0); +CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0); +CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1); +CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0); +CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0); +#undef CHECK_LOOP_FILTER +} +} + +static void check_loop_filter_intra(void) +{ +LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]); +LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]); +LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]); +H264DSPContext h; +int bit_depth; +int alphas[36], betas[36]; + +declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, int stride, + int alpha, int beta); + +for (bit_depth = 8; bit_depth <= 10; bit_depth++) { +int i, j, a; +uint32_t mask = pixel_mask_lf[bit_depth - 8]; +ff_h264dsp_init(&h, bit_depth, 1); +for (i = 35, a = 255; i >= 0; i--) { +alphas[i] = a << (bit_depth - 8); +betas[i] = (i + 1) / 2 << (bit_depth - 8); +a = a*9/10; +} + +#define CHECK_LOOP_FILTER(name, align) \ +do {
[FFmpeg-cvslog] h264/aarch64: optimize neon loop filter
ffmpeg | branch: master | Janne Grunau | Tue Jan 1 22:37:11 2019 +0100| [846c3d6aca5484904e60946c4fe8b8833bc07f92] | committer: Janne Grunau h264/aarch64: optimize neon loop filter Exit as soon as possible if no filtering will be done. Improves the checkasm --bench cycle count on a Snapdragon 820e: h264_h_loop_filter_luma_8bpp_c: 72.4 -> 72.5 h264_h_loop_filter_luma_8bpp_neon: 97.1 -> 56.3 h264_v_loop_filter_luma_8bpp_c: 174.0 -> 173.5 h264_v_loop_filter_luma_8bpp_neon: 62.9 -> 60.9 h264_h_loop_filter_chroma_8bpp_c:30.2 -> 30.3 h264_h_loop_filter_chroma_8bpp_neon: 51.6 -> 25.7 h264_v_loop_filter_chroma_8bpp_c:57.3 -> 57.3 h264_v_loop_filter_chroma_8bpp_neon: 28.0 -> 24.0 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=846c3d6aca5484904e60946c4fe8b8833bc07f92 --- libavcodec/aarch64/h264dsp_neon.S | 33 +++-- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/libavcodec/aarch64/h264dsp_neon.S b/libavcodec/aarch64/h264dsp_neon.S index 60ffa24500..b649f1d018 100644 --- a/libavcodec/aarch64/h264dsp_neon.S +++ b/libavcodec/aarch64/h264dsp_neon.S @@ -54,9 +54,12 @@ uabdv17.16B, v20.16B, v16.16B // abs(p2 - p0) and v21.16B, v21.16B, v28.16B uabdv19.16B, v4.16B, v0.16B // abs(q2 - q0) +and v21.16B, v21.16B, v30.16B // < beta +shrnv30.8b, v21.8h, #4 +mov x7, v30.d[0] cmhiv17.16B, v22.16B, v17.16B // < beta -and v21.16B, v21.16B, v30.16B cmhiv19.16B, v22.16B, v19.16B // < beta +cbz x7, 9f and v17.16B, v17.16B, v21.16B and v19.16B, v19.16B, v21.16B and v24.16B, v24.16B, v21.16B @@ -124,7 +127,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1 st1 {v16.16B}, [x0], x1 st1 {v0.16B}, [x0], x1 st1 {v19.16B}, [x0] - +9: ret endfunc @@ -174,32 +177,34 @@ function ff_h264_h_loop_filter_luma_neon, export=1 st1 {v16.S}[3], [x0], x1 st1 {v0.S}[3], [x0], x1 st1 {v19.S}[3], [x0], x1 - +9: ret endfunc .macro h264_loop_filter_chroma dup v22.8B, w2 // alpha +dup v23.8B, w3 // beta uxtlv24.8H, v24.8B uabdv26.8B, v16.8B, v0.8B // abs(p0 - q0) -uxtlv4.8H, v0.8B uabdv28.8B, v18.8B, v16.8B // abs(p1 - p0) +uabdv30.8B, v2.8B, v0.8B // abs(q1 - q0) +cmhiv26.8B, v22.8B, v26.8B // < alpha +cmhiv28.8B, v23.8B, v28.8B // < beta +cmhiv30.8B, v23.8B, v30.8B // < beta +uxtlv4.8H, v0.8B +and v26.8B, v26.8B, v28.8B usubw v4.8H, v4.8H, v16.8B -sli v24.8H, v24.8H, #8 +and v26.8B, v26.8B, v30.8B shl v4.8H, v4.8H, #2 -uabdv30.8B, v2.8B, v0.8B // abs(q1 - q0) +mov x2, v26.d[0] +sli v24.8H, v24.8H, #8 uaddw v4.8H, v4.8H, v18.8B -cmhiv26.8B, v22.8B, v26.8B // < alpha +cbz x2, 9f usubw v4.8H, v4.8H, v2.8B -dup v22.8B, w3 // beta rshrn v4.8B, v4.8H, #3 -cmhiv28.8B, v22.8B, v28.8B // < beta -cmhiv30.8B, v22.8B, v30.8B // < beta sminv4.8B, v4.8B, v24.8B neg v25.8B, v24.8B -and v26.8B, v26.8B, v28.8B smaxv4.8B, v4.8B, v25.8B -and v26.8B, v26.8B, v30.8B uxtlv22.8H, v0.8B and v4.8B, v4.8B, v26.8B uxtlv28.8H, v16.8B @@ -224,7 +229,7 @@ function ff_h264_v_loop_filter_chroma_neon, export=1 sub x0, x0, x1, lsl #1 st1 {v16.8B}, [x0], x1 st1 {v0.8B}, [x0], x1 - +9: ret endfunc @@ -257,7 +262,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 st1 {v16.S}[1], [x0], x1 st1 {v0.S}[1], [x0], x1 st1 {v2.S}[1], [x0], x1 - +9: ret endfunc ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] h264/aarch64: add intra loop filter neon asm
ffmpeg | branch: master | Janne Grunau | Mon Aug 13 20:43:19 2018 +0200| [28a8b5413b64b831dfb8650208bccd8b78360484] | committer: Janne Grunau h264/aarch64: add intra loop filter neon asm Add my neon asm from x264 relicensed under the LGPL 2.1 or later. Ported (x264 uses nv12 chroma) and optimized. Cycle count for checkasm --bench on a Snapdragon 820e: h264_h_loop_filter_luma_intra_8bpp_c: 60.0 h264_h_loop_filter_luma_intra_8bpp_neon: 54.2 h264_v_loop_filter_luma_intra_8bpp_c: 148.3 h264_v_loop_filter_luma_intra_8bpp_neon: 73.8 h264_h_loop_filter_chroma_intra_8bpp_c: 27.8 h264_h_loop_filter_chroma_intra_8bpp_neon: 21.4 h264_h_loop_filter_chroma_mbaff_intra_8bpp_c: 15.8 h264_h_loop_filter_chroma_mbaff_intra_8bpp_neon: 15.7 h264_v_loop_filter_chroma_intra_8bpp_c: 45.8 h264_v_loop_filter_chroma_intra_8bpp_neon: 17.3 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=28a8b5413b64b831dfb8650208bccd8b78360484 --- libavcodec/aarch64/h264dsp_init_aarch64.c | 16 ++ libavcodec/aarch64/h264dsp_neon.S | 297 ++ 2 files changed, 313 insertions(+) diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c index b106f11134..07bda2ff07 100644 --- a/libavcodec/aarch64/h264dsp_init_aarch64.c +++ b/libavcodec/aarch64/h264dsp_init_aarch64.c @@ -29,10 +29,20 @@ void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha, + int beta); +void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha, + int beta); void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, int stride, + int alpha, int beta); +void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, int stride, + int alpha, int beta); +void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, int stride, + int alpha, int beta); void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, int log2_den, int weight, int offset); @@ -77,8 +87,14 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth, if (have_neon(cpu_flags) && bit_depth == 8) { c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; +c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon; +c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon; + c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; +c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; +c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon; +c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; diff --git a/libavcodec/aarch64/h264dsp_neon.S b/libavcodec/aarch64/h264dsp_neon.S index b649f1d018..448e575b8c 100644 --- a/libavcodec/aarch64/h264dsp_neon.S +++ b/libavcodec/aarch64/h264dsp_neon.S @@ -1,6 +1,7 @@ /* * Copyright (c) 2008 Mans Rullgard * Copyright (c) 2013 Janne Grunau + * Copyright (c) 2014 Janne Grunau * * This file is part of Libav. * @@ -181,6 +182,203 @@ function ff_h264_h_loop_filter_luma_neon, export=1 ret endfunc + +.macro h264_loop_filter_start_intra +orr w4, w2, w3 +cbnzw4, 1f +ret +1: +sxtwx1, w1 +dup v30.16b, w2// alpha +dup v31.16b, w3// beta +.endm + +.macro h264_loop_filter_luma_intra +uabdv16.16b, v7.16b, v0.16b// abs(p0 - q0) +uabdv17.16b, v6.16b, v7.16b// abs(p1 - p0) +uabdv18.16b, v1.16b, v0.16b// abs(q1 - q0) +cmhiv19.16b, v30.16b, v16.16b // < alpha +cmhiv17.16b, v31.16b, v17.16b // < beta +cmhiv18.16b, v31.16b, v18.
[FFmpeg-cvslog] h264/x86: sign extend int stride in deblock functions
ffmpeg | branch: master | Janne Grunau | Sun Jan 27 11:06:34 2019 +0100| [156ea66c91b1986a87916f187216978d686725f6] | committer: Janne Grunau h264/x86: sign extend int stride in deblock functions Fixes checkasm errors after adding the h264 deblock tests. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=156ea66c91b1986a87916f187216978d686725f6 --- libavcodec/x86/h264_deblock.asm | 8 libavcodec/x86/h264_deblock_10bit.asm | 9 + 2 files changed, 17 insertions(+) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 33fd5a9dd7..4b9cf85d16 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -288,6 +288,7 @@ cextern pb_3 ;- %macro DEBLOCK_LUMA 0 cglobal deblock_v_luma_8, 5,5,10 +movsxdifnidn r1, r1d movdm8, [r4] ; tc0 lea r4, [r1*3] dec r2d; alpha-1 @@ -335,6 +336,7 @@ cglobal deblock_v_luma_8, 5,5,10 INIT_MMX cpuname cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 movsxd r7, r1d +movsxdifnidn r1, r1d lear8, [r7+r7*2] lear6, [r0-4] lear5, [r0-4+r8] @@ -395,6 +397,7 @@ DEBLOCK_LUMA ; int8_t *tc0) ;- cglobal deblock_%1_luma_8, 5,5,8,2*%2 +movsxdifnidn r1, r1d lea r4, [r1*3] dec r2 ; alpha-1 neg r4 @@ -445,6 +448,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2 ;- INIT_MMX cpuname cglobal deblock_h_luma_8, 0,5,8,0x60+12 +movsxdifnidn r1, r1d movr0, r0mp movr3, r1m lear4, [r3*3] @@ -646,6 +650,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,0x10 %else cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50 %endif +movsxdifnidn r1, r1d lea r4, [r1*4] lea r5, [r1*3] ; 3*stride dec r2d; alpha-1 @@ -703,6 +708,7 @@ INIT_MMX cpuname ;- cglobal deblock_h_luma_intra_8, 4,9,0,0x80 movsxd r7, r1d +movsxdifnidn r1, r1d lear8, [r7*3] lear6, [r0-4] lear5, [r0-4+r8] @@ -782,6 +788,7 @@ DEBLOCK_LUMA_INTRA v8 INIT_MMX mmxext %macro CHROMA_V_START 0 +movsxdifnidn r1, r1d decr2d ; alpha-1 decr3d ; beta-1 movt5, r0 @@ -790,6 +797,7 @@ INIT_MMX mmxext %endmacro %macro CHROMA_H_START 0 +movsxdifnidn r1, r1d decr2d decr3d subr0, 2 diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index d049c62bf2..1a424b7f43 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -162,6 +162,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) %define ms2 [rsp+mmsize*2] %define am [rsp+mmsize*3] %define bm [rsp+mmsize*4] +movsxdifnidn r1, r1d SUBrsp, pad shlr2d, 2 shlr3d, 2 @@ -219,6 +220,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) %define p2m [rsp+mmsize*4] %define am [rsp+mmsize*5] %define bm [rsp+mmsize*6] +movsxdifnidn r1, r1d SUBrsp, pad shlr2d, 2 shlr3d, 2 @@ -349,6 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15 %define mask0 m7 %define mask1 m10 %define mask2 m11 +movsxdifnidn r1, r1d shlr2d, 2 shlr3d, 2 LOAD_ABm12, m13, r2d, r3d @@ -377,6 +380,7 @@ cglobal deblock_v_luma_10, 5,5,15 REP_RET cglobal deblock_h_luma_10, 5,7,15 +movsxdifnidn r1, r1d shlr2d, 2 shlr3d, 2 LOAD_ABm12, m13, r2d, r3d @@ -492,6 +496,7 @@ DEBLOCK_LUMA_64 CAT_XDEFINE t, i, [rsp+mmsize*(i-4)] %assign i i+1 %endrep +movsxdifnidn r1, r1d SUBrsp, pad %endmacro @@ -615,6 +620,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16 %define q2 m13 %define aa m5 %define bb m14 +movsxdifnidn r1, r1d lea r4, [r1*4] lea r5, [r1*3] ; 3*stride neg r4 @@ -668,6 +674,7 @@ cglobal deblock_h_luma_intra_10, 4,7,16 %define p3 m4 %define spill [rsp] %assign pad 24-(stack_offset&15) +movsxdifnidn r1, r1d SUB rsp, pad lea r4, [r1*4] lea r5, [r1*3] ; 3*stride @@ -852,6 +859,7 @@ DEBLOCK_LUMA_INTRA ; int8_t *tc0) ;- cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) +movsxdifnidn r1, r1d mov r5, r0 sub r0, r1 sub r0, r1 @@ -887,6 +895,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) ; i
[FFmpeg-cvslog] h264/arm64: implement missing 4:2:2 chroma loop filter neon functions
ffmpeg | branch: master | Janne Grunau | Wed Feb 27 21:51:27 2019 +0100| [186bd30aa3b6c2b29b4dbf18278700b572068b1e] | committer: Janne Grunau h264/arm64: implement missing 4:2:2 chroma loop filter neon functions > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=186bd30aa3b6c2b29b4dbf18278700b572068b1e --- libavcodec/aarch64/h264dsp_init_aarch64.c | 18 +--- libavcodec/aarch64/h264dsp_neon.S | 36 ++- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c index 07bda2ff07..85fea8e040 100644 --- a/libavcodec/aarch64/h264dsp_init_aarch64.c +++ b/libavcodec/aarch64/h264dsp_init_aarch64.c @@ -37,10 +37,14 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, int stride, int alpha, int beta); void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, int stride, int alpha, int beta); +void ff_h264_h_loop_filter_chroma422_intra_neon(uint8_t *pix, int stride, +int alpha, int beta); void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, int stride, int alpha, int beta); @@ -91,10 +95,18 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; -c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; -c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon; -c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon; + +if (chroma_format_idc <= 1) { +c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; +c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon; +c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon; +} else { +c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; +c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon; +c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon; +c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon; +} c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; diff --git a/libavcodec/aarch64/h264dsp_neon.S b/libavcodec/aarch64/h264dsp_neon.S index 448e575b8c..bcce7e7da5 100644 --- a/libavcodec/aarch64/h264dsp_neon.S +++ b/libavcodec/aarch64/h264dsp_neon.S @@ -28,9 +28,9 @@ ldr w6, [x4] ccmpw3, #0, #0, ne mov v24.S[0], w6 -and w6, w6, w6, lsl #16 +and w8, w6, w6, lsl #16 b.eq1f -andsw6, w6, w6, lsl #8 +andsw8, w8, w8, lsl #8 b.ge2f 1: ret @@ -394,10 +394,10 @@ endfunc usubw v4.8H, v4.8H, v16.8B and v26.8B, v26.8B, v30.8B shl v4.8H, v4.8H, #2 -mov x2, v26.d[0] +mov x8, v26.d[0] sli v24.8H, v24.8H, #8 uaddw v4.8H, v4.8H, v18.8B -cbz x2, 9f +cbz x8, 9f usubw v4.8H, v4.8H, v2.8B rshrn v4.8B, v4.8H, #3 sminv4.8B, v4.8B, v24.8B @@ -436,6 +436,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 sxtwx1, w1 sub x0, x0, #2 +h_loop_filter_chroma420: ld1 {v18.S}[0], [x0], x1 ld1 {v16.S}[0], [x0], x1 ld1 {v0.S}[0], [x0], x1 @@ -464,6 +465,19 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 ret endfunc +function ff_h264_h_loop_filter_chroma422_neon, export=1 +sxtwx1, w1 +h264_loop_filter_start +add x5, x0, x1
[FFmpeg-cvslog] checkasm/h264: test 4:2:2 chroma loop filter functions
ffmpeg | branch: master | Janne Grunau | Wed Feb 27 20:51:48 2019 +0100| [f8abf7d4dfa0504f7f65e4f1fd9d22e01cb371cc] | committer: Janne Grunau checkasm/h264: test 4:2:2 chroma loop filter functions > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f8abf7d4dfa0504f7f65e4f1fd9d22e01cb371cc --- tests/checkasm/h264dsp.c | 44 ++-- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c index 706fc79397..ee07121ab4 100644 --- a/tests/checkasm/h264dsp.c +++ b/tests/checkasm/h264dsp.c @@ -341,9 +341,9 @@ static void check_loop_filter(void) c = c*9/10; } -#define CHECK_LOOP_FILTER(name, align, ...) \ +#define CHECK_LOOP_FILTER(name, align, idc) \ do {\ -if (check_func(h.name, #name "_%dbpp", bit_depth)) {\ +if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \ for (j = 0; j < 36; j++) { \ intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \ for (i = 0; i < 1024; i+=4) { \ @@ -355,7 +355,7 @@ static void check_loop_filter(void) call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \ call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \ if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \ -fprintf(stderr, #name ": j:%d, alpha:%d beta:%d " \ +fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \ "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \ tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \ fail(); \ @@ -365,12 +365,16 @@ static void check_loop_filter(void) } \ } while (0) -CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1); -CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0); -CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0); -CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1); -CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0); -CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0); +CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,); +CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,); +CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,); +CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,); +CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,); +CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,); + +ff_h264dsp_init(&h, bit_depth, 2); +CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422); +CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422); #undef CHECK_LOOP_FILTER } } @@ -397,9 +401,9 @@ static void check_loop_filter_intra(void) a = a*9/10; } -#define CHECK_LOOP_FILTER(name, align) \ +#define CHECK_LOOP_FILTER(name, align, idc) \ do {\ -if (check_func(h.name, #name "_%dbpp", bit_depth)) {\ +if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \ for (j = 0; j < 36; j++) { \ intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \ for (i = 0; i < 1024; i+=4) { \ @@ -411,7 +415,7 @@ static void check_loop_filter_intra(void) call_ref(dst0 + off, 32, alphas[j], betas[j]); \ call_new(dst1 + off, 32, alphas[j], betas[j]); \ if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \ -fprintf(stderr, #name ": j:%d, alpha:%d beta:%d\n", \ +fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \ j, alphas[j], betas[j]);\ fail(); \ } \ @@ -420,12 +424,16 @@ static void check_loop_filter_intra(void) } \ } while (0) -CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1); -CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0); -CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0); -CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1); -CHECK
[FFmpeg-cvslog] avcodec/arm64: fix inverted register order in transpose_4x4H
ffmpeg | branch: master | Janne Grunau | Fri Dec 18 11:27:05 2015 +0100| [2dba0407fdb855bbe44c888232d58ddb2fd3a412] | committer: Michael Niedermayer avcodec/arm64: fix inverted register order in transpose_4x4H Fix related register order issue in ff_h264_idct_add_neon. Found-by: zjh8890 <243186...@qq.com> Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2dba0407fdb855bbe44c888232d58ddb2fd3a412 --- libavcodec/aarch64/h264idct_neon.S |4 ++-- libavcodec/aarch64/neon.S |4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S index 04b5a47..91f1e77 100644 --- a/libavcodec/aarch64/h264idct_neon.S +++ b/libavcodec/aarch64/h264idct_neon.S @@ -37,8 +37,8 @@ function ff_h264_idct_add_neon, export=1 sub v7.4H, v16.4H, v3.4H add v0.4H, v4.4H, v6.4H add v1.4H, v5.4H, v7.4H -sub v2.4H, v4.4H, v6.4H -sub v3.4H, v5.4H, v7.4H +sub v3.4H, v4.4H, v6.4H +sub v2.4H, v5.4H, v7.4H transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7 diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S index 619aec6..a227cbd 100644 --- a/libavcodec/aarch64/neon.S +++ b/libavcodec/aarch64/neon.S @@ -107,8 +107,8 @@ .macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7 trn1\r4\().4H, \r0\().4H, \r1\().4H trn2\r5\().4H, \r0\().4H, \r1\().4H -trn1\r7\().4H, \r3\().4H, \r2\().4H -trn2\r6\().4H, \r3\().4H, \r2\().4H +trn1\r7\().4H, \r2\().4H, \r3\().4H +trn2\r6\().4H, \r2\().4H, \r3\().4H trn1\r0\().2S, \r4\().2S, \r7\().2S trn2\r3\().2S, \r4\().2S, \r7\().2S trn1\r1\().2S, \r5\().2S, \r6\().2S ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] libavutil: move FFALIGN macro from common.h to macros.h
ffmpeg | branch: master | Janne Grunau | Thu Dec 10 21:49:30 2015 +0100| [50078c1c8070dd8d1c329e8117ff30ec72489039] | committer: Janne Grunau libavutil: move FFALIGN macro from common.h to macros.h Include macros.h explicitly in common.h so that external code using FFALIGN does not break. It was already implicitly included through version.h. Include macros.h in lls.h and internal.h for FFALIGN. lls.h was including common.h only for FFALIGN and internal.h was missing the include for FFALIGN. `make checkheaders` did not catch it because it's an internal header. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=50078c1c8070dd8d1c329e8117ff30ec72489039 --- libavutil/common.h |2 +- libavutil/internal.h |1 + libavutil/lls.c |2 ++ libavutil/lls.h |2 +- libavutil/macros.h |2 ++ 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavutil/common.h b/libavutil/common.h index 56556e7..7a43ccf 100644 --- a/libavutil/common.h +++ b/libavutil/common.h @@ -36,6 +36,7 @@ #include #include "attributes.h" +#include "macros.h" #include "version.h" #include "libavutil/avconfig.h" @@ -59,7 +60,6 @@ #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0])) -#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1)) /* misc math functions */ diff --git a/libavutil/internal.h b/libavutil/internal.h index 4264c4e..b9be333 100644 --- a/libavutil/internal.h +++ b/libavutil/internal.h @@ -37,6 +37,7 @@ #include "config.h" #include "attributes.h" #include "dict.h" +#include "macros.h" #include "pixfmt.h" #if ARCH_X86 diff --git a/libavutil/lls.c b/libavutil/lls.c index 1298946..60d2b64 100644 --- a/libavutil/lls.c +++ b/libavutil/lls.c @@ -29,6 +29,8 @@ #include #include "attributes.h" +#include "config.h" +#include "internal.h" #include "version.h" #include "lls.h" diff --git a/libavutil/lls.h b/libavutil/lls.h index 9b2b3a4..3977e97 100644 --- a/libavutil/lls.h +++ b/libavutil/lls.h @@ -23,7 +23,7 @@ #ifndef AVUTIL_LLS_H #define AVUTIL_LLS_H -#include "common.h" +#include "macros.h" #include "mem.h" #include "version.h" diff --git a/libavutil/macros.h b/libavutil/macros.h index bf3eb9b..3e7b005 100644 --- a/libavutil/macros.h +++ b/libavutil/macros.h @@ -45,4 +45,6 @@ #define AV_PRAGMA(s) _Pragma(#s) +#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1)) + #endif /* AVUTIL_MACROS_H */ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm64: add cycle counter support
ffmpeg | branch: master | Janne Grunau | Thu Dec 3 00:12:39 2015 +0100| [64034849dad8410bedbe1def4c533490fb85cc4a] | committer: Janne Grunau arm64: add cycle counter support The ISB (instruction synchronization barrier) might be too heavy for START/STOPTIMER use but should be more accurate in checkasm where the timing overhead is subtracted. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=64034849dad8410bedbe1def4c533490fb85cc4a --- libavutil/aarch64/timer.h | 44 libavutil/timer.h |4 +++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/libavutil/aarch64/timer.h b/libavutil/aarch64/timer.h new file mode 100644 index 000..382cfd9 --- /dev/null +++ b/libavutil/aarch64/timer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_AARCH64_TIMER_H +#define AVUTIL_AARCH64_TIMER_H + +#include +#include "config.h" + +#if HAVE_INLINE_ASM + +#define AV_READ_TIME read_time + +static inline uint64_t read_time(void) +{ +uint64_t cycle_counter; +__asm__ volatile( +"isb \t\n" +"mrs %0, pmccntr_el0 " +: "=r"(cycle_counter) :: "memory" ); + +return cycle_counter; +} + +#endif /* HAVE_INLINE_ASM */ + +#endif /* AVUTIL_AARCH64_TIMER_H */ diff --git a/libavutil/timer.h b/libavutil/timer.h index 0d93d7c..cc4c5a5 100644 --- a/libavutil/timer.h +++ b/libavutil/timer.h @@ -38,7 +38,9 @@ #include "log.h" -#if ARCH_ARM +#if ARCH_AARCH64 +# include "aarch64/timer.h" +#elif ARCH_ARM # include "arm/timer.h" #elif ARCH_BFIN # include "bfin/timer.h" ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] x86_64: int32_to_float_fmul_scalar sign extend integer length
ffmpeg | branch: master | Janne Grunau | Tue Dec 8 16:20:19 2015 +0100| [5dfe4edad63971d669ae456b0bc40ef9364cca80] | committer: Janne Grunau x86_64: int32_to_float_fmul_scalar sign extend integer length > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5dfe4edad63971d669ae456b0bc40ef9364cca80 --- libavcodec/x86/fmtconvert.asm |3 +++ 1 file changed, 3 insertions(+) diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 727daa9..b9a78a5 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -38,6 +38,9 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len %elif ARCH_X86_32 movss m0, mulm %endif +%if ARCH_X86_64 +movsxd lenq, lend +%endif SPLATD m0 shl lenq, 2 add srcq, lenq ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: add a cpu flag for the VFPv2 vector mode
ffmpeg | branch: master | Janne Grunau | Wed Dec 9 22:28:36 2015 +0100| [e2710e790c09e49e86baa58c6063af0097cc8cb0] | committer: Janne Grunau arm: add a cpu flag for the VFPv2 vector mode The vector mode was deprecated in ARMv7-A/VFPv3 and various cpu implementations do not support it in hardware. Vector mode code will depending the OS either be emulated in software or result in an illegal instruction on cpus which does not support it. This was not really problem in practice since NEON implementations of the same functions are preferred. It will however become a problem for checkasm which tests every cpu flag separately. Since this is a cpu feature newer cpu do not support anymore the behaviour of this flag differs from the other flags. It can be only activated by runtime cpu feature selection. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e2710e790c09e49e86baa58c6063af0097cc8cb0 --- libavcodec/arm/dcadsp_init_arm.c |4 ++-- libavcodec/arm/fft_init_arm.c|2 +- libavcodec/arm/fmtconvert_init_arm.c |2 +- libavutil/arm/cpu.c |4 libavutil/arm/cpu.h |5 + libavutil/cpu.c |2 ++ libavutil/cpu.h |1 + libavutil/version.h |2 +- tests/checkasm/checkasm.c|1 + 9 files changed, 18 insertions(+), 5 deletions(-) diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index 5400484..252f4ae 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -59,7 +59,7 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); -if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { +if (have_vfp_vm(cpu_flags)) { s->lfe_fir[0] = ff_dca_lfe_fir32_vfp; s->lfe_fir[1] = ff_dca_lfe_fir64_vfp; s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; @@ -75,7 +75,7 @@ av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) { int cpu_flags = av_get_cpu_flags(); -if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) +if (have_vfp_vm(cpu_flags)) s->synth_filter_float = ff_synth_filter_float_vfp; if (have_neon(cpu_flags)) s->synth_filter_float = ff_synth_filter_float_neon; diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c index bc143c1..6d6fa22 100644 --- a/libavcodec/arm/fft_init_arm.c +++ b/libavcodec/arm/fft_init_arm.c @@ -40,7 +40,7 @@ av_cold void ff_fft_init_arm(FFTContext *s) { int cpu_flags = av_get_cpu_flags(); -if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { +if (have_vfp_vm(cpu_flags)) { s->fft_calc = ff_fft_calc_vfp; #if CONFIG_MDCT s->imdct_half = ff_imdct_half_vfp; diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c index 27d3c88..6a80bfb 100644 --- a/libavcodec/arm/fmtconvert_init_arm.c +++ b/libavcodec/arm/fmtconvert_init_arm.c @@ -38,7 +38,7 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx { int cpu_flags = av_get_cpu_flags(); -if (have_vfp(cpu_flags)) { +if (have_vfp_vm(cpu_flags)) { if (!have_vfpv3(cpu_flags)) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp; diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c index 8bdaa88..2effb72 100644 --- a/libavutil/arm/cpu.c +++ b/libavutil/arm/cpu.c @@ -131,6 +131,10 @@ int ff_get_cpu_flags_arm(void) if (flags & AV_CPU_FLAG_ARMV6T2) flags |= AV_CPU_FLAG_ARMV6; +/* set the virtual VFPv2 vector mode flag */ +if ((flags & AV_CPU_FLAG_VFP) && !(flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON))) +flags |= AV_CPU_FLAG_VFP_VM; + return flags; } diff --git a/libavutil/arm/cpu.h b/libavutil/arm/cpu.h index 224409a..5563fc1 100644 --- a/libavutil/arm/cpu.h +++ b/libavutil/arm/cpu.h @@ -30,6 +30,11 @@ #define have_vfpv3(flags) CPUEXT(flags, VFPV3) #define have_neon(flags)CPUEXT(flags, NEON) +/* some functions use the VFPv2 vector mode which is deprecated in ARMv7-A + * and might trap on such CPU depending on the OS configuration */ +#define have_vfp_vm(flags) \ +(have_armv6(flags) && ((flags) & AV_CPU_FLAG_VFP_VM)) + /* Some functions use the 'setend' instruction which is deprecated on ARMv8 * and serializing on some ARMv7 cores. This macro ensures such functions * are only enabled on ARMv6. */ diff --git a/libavutil/cpu.c b/libavutil/cpu.c index e24b9dd..5f04461 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -124,6 +124,7 @@ int av_parse_cpu_flags(const char *s) { "armv6",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6
[FFmpeg-cvslog] arm64: convert dcadsp neon asm from arm
ffmpeg | branch: master | Janne Grunau | Sat Nov 28 15:23:52 2015 +0100| [c33c1fa8af2b2e82418a06901b6ad17b3d61b73e] | committer: Janne Grunau arm64: convert dcadsp neon asm from arm ~2% faster dts decoding overall. cortex-a57 cortex-a53 dca_decode_hf_c:474.81659.9 dca_decode_hf_neon: 225.2 301.1 dca_lfe_fir0_c: 913.21537.7 dca_lfe_fir0_neon: 286.8 451.9 dca_lfe_fir1_c: 848.71711.5 dca_lfe_fir1_neon: 387.1 506.4 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c33c1fa8af2b2e82418a06901b6ad17b3d61b73e --- libavcodec/aarch64/Makefile |2 + libavcodec/aarch64/dcadsp_init.c | 51 libavcodec/aarch64/dcadsp_neon.S | 169 ++ libavcodec/dcadsp.c |2 + libavcodec/dcadsp.h |1 + 5 files changed, 225 insertions(+) diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index d001b34..0b614a3 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -1,3 +1,4 @@ +OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_init.o OBJS-$(CONFIG_FFT) += aarch64/fft_init_aarch64.o OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o @@ -15,6 +16,7 @@ OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o +NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c new file mode 100644 index 000..ad91070 --- /dev/null +++ b/libavcodec/aarch64/dcadsp_init.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2010 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/aarch64/cpu.h" +#include "libavutil/attributes.h" +#include "libavcodec/dcadsp.h" + +void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs); +void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs); + +void ff_synth_filter_float_neon(FFTContext *imdct, +float *synth_buf_ptr, int *synth_buf_offset, +float synth_buf2[32], const float window[512], +float out[32], const float in[32], +float scale); + +void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], + const int32_t vq_num[DCA_SUBBANDS], + const int8_t hf_vq[1024][32], intptr_t vq_offset, + int32_t scale[DCA_SUBBANDS][2], + intptr_t start, intptr_t end); + +av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) +{ +int cpu_flags = av_get_cpu_flags(); + +if (have_neon(cpu_flags)) { +s->lfe_fir[0] = ff_dca_lfe_fir0_neon; +s->lfe_fir[1] = ff_dca_lfe_fir1_neon; +s->decode_hf = ff_decode_hf_neon; +} +} diff --git a/libavcodec/aarch64/dcadsp_neon.S b/libavcodec/aarch64/dcadsp_neon.S new file mode 100644 index 000..73196d9 --- /dev/null +++ b/libavcodec/aarch64/dcadsp_neon.S @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2010 Mans Rullgard + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License
[FFmpeg-cvslog] arm64: port synth_filter_float_neon from arm
ffmpeg | branch: master | Janne Grunau | Tue Dec 1 13:37:41 2015 +0100| [705f5e5e155f6f280a360af220fc5b30cfcee702] | committer: Janne Grunau arm64: port synth_filter_float_neon from arm ~25% faster dts decoding overall. The checkasm CPU cycles numbers are not that useful since synth_filter_float() calls FFTContext.imdct_half(). cortex-a57 cortex-a53 synth_filter_float_c:1866.2 3490.9 synth_filter_float_neon: 915.0 1531.5 With fftc.imdct_half forced to imdct_half_neon: cortex-a57 cortex-a53 synth_filter_float_c:1718.4 3025.3 synth_filter_float_neon: 926.2 1530.1 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=705f5e5e155f6f280a360af220fc5b30cfcee702 --- libavcodec/aarch64/Makefile|3 +- libavcodec/aarch64/asm-offsets.h |3 + libavcodec/aarch64/dcadsp_init.c | 16 + libavcodec/aarch64/synth_filter_neon.S | 119 libavcodec/synth_filter.c |8 ++- libavcodec/synth_filter.h |1 + 6 files changed, 147 insertions(+), 3 deletions(-) diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index 0b614a3..2175578 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -16,7 +16,8 @@ OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o -NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o +NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o \ + aarch64/synth_filter_neon.o NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ diff --git a/libavcodec/aarch64/asm-offsets.h b/libavcodec/aarch64/asm-offsets.h index 45b5c40..60e32dd 100644 --- a/libavcodec/aarch64/asm-offsets.h +++ b/libavcodec/aarch64/asm-offsets.h @@ -27,4 +27,7 @@ #define CELT_TMP0x10 #define CELT_TWIDDLE(CELT_TMP + 0x8)// loaded as pair +/* FFTContext */ +#define IMDCT_HALF 0x48 + #endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */ diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c index ad91070..c66ec3f 100644 --- a/libavcodec/aarch64/dcadsp_init.c +++ b/libavcodec/aarch64/dcadsp_init.c @@ -22,7 +22,15 @@ #include "libavutil/aarch64/cpu.h" #include "libavutil/attributes.h" +#include "libavutil/internal.h" #include "libavcodec/dcadsp.h" +#include "libavcodec/fft.h" + +#include "asm-offsets.h" + +#if HAVE_NEON || HAVE_VFP +AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF); +#endif void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs); void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs); @@ -49,3 +57,11 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) s->decode_hf = ff_decode_hf_neon; } } + +av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s) +{ +int cpu_flags = av_get_cpu_flags(); + +if (have_neon(cpu_flags)) +s->synth_filter_float = ff_synth_filter_float_neon; +} diff --git a/libavcodec/aarch64/synth_filter_neon.S b/libavcodec/aarch64/synth_filter_neon.S new file mode 100644 index 000..9551bff --- /dev/null +++ b/libavcodec/aarch64/synth_filter_neon.S @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2010 Mans Rullgard + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm-offsets.h" + +#include "libavutil/aarch64/asm.S" + +.macro inner_loop +ld1 {v29.4s}, [x9], x15 +ld1 {v28.4s}, [x8], x15 +ld1 {v30.4s}, [x10], x15 +ld1 {v31.4s}, [x11], x15 +rev64 v28.4s, v28.4s +ld1 {v24.4s}, [x4], x15 +ld1 {v25.4s}, [x5], x15 +rev64 v31.4s, v31.4s +ld1 {v26.4s}, [x6], x1
[FFmpeg-cvslog] arm64: int32_to_float_fmul neon asm
ffmpeg | branch: master | Janne Grunau | Thu Dec 3 11:04:29 2015 +0100| [a0fc780a2093784e8664f88205ee1b215e109cee] | committer: Janne Grunau arm64: int32_to_float_fmul neon asm 3% faster dts decoding on a cortex-a57. cortex-a57 cortex-a53 int32_to_float_fmul_array8_c:1270.9 4475.6 int32_to_float_fmul_array8_neon: 328.6569.2 int32_to_float_fmul_scalar_c: 928.5 4119.6 int32_to_float_fmul_scalar_neon: 309.1524.1 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a0fc780a2093784e8664f88205ee1b215e109cee --- libavcodec/aarch64/Makefile |2 + libavcodec/aarch64/fmtconvert_init.c | 43 +++ libavcodec/aarch64/fmtconvert_neon.S | 76 ++ libavcodec/fmtconvert.c | 11 +++-- libavcodec/fmtconvert.h |1 + 5 files changed, 130 insertions(+), 3 deletions(-) diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index 2175578..022ed84 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -1,5 +1,6 @@ OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_init.o OBJS-$(CONFIG_FFT) += aarch64/fft_init_aarch64.o +OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o @@ -19,6 +20,7 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o \ aarch64/synth_filter_neon.o NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o +NEON-OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ aarch64/h264idct_neon.o diff --git a/libavcodec/aarch64/fmtconvert_init.c b/libavcodec/aarch64/fmtconvert_init.c new file mode 100644 index 000..0a55a1b --- /dev/null +++ b/libavcodec/aarch64/fmtconvert_init.c @@ -0,0 +1,43 @@ +/* + * ARM optimized Format Conversion Utils + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/aarch64/cpu.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/fmtconvert.h" + +void ff_int32_to_float_fmul_array8_neon(FmtConvertContext *c, float *dst, +const int32_t *src, const float *mul, +int len); +void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src, +float mul, int len); + +av_cold void ff_fmt_convert_init_aarch64(FmtConvertContext *c, + AVCodecContext *avctx) +{ +int cpu_flags = av_get_cpu_flags(); + +if (have_neon(cpu_flags)) { +c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_neon; +c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon; +} +} diff --git a/libavcodec/aarch64/fmtconvert_neon.S b/libavcodec/aarch64/fmtconvert_neon.S new file mode 100644 index 000..3b33c87 --- /dev/null +++ b/libavcodec/aarch64/fmtconvert_neon.S @@ -0,0 +1,76 @@ +/* + * ARM NEON optimised Format Conversion Utils + * Copyright (c) 2008 Mans Rullgard + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You shou
[FFmpeg-cvslog] arm: add ff_int32_to_float_fmul_array8_neon
ffmpeg | branch: master | Janne Grunau | Thu Dec 3 16:17:32 2015 +0100| [90b1b9350c0a97c4065ae9054b83e57f48a0de1f] | committer: Janne Grunau arm: add ff_int32_to_float_fmul_array8_neon Quite a bit faster than int32_to_float_fmul_array8_c calling ff_int32_to_float_fmul_scalar_neon through FmtConvertContext. Number of cycles per int32_to_float_fmul_array8 call while decoding padded.dts on exynos5422: before after change cortex-a7: 1270 951-25% cortex-a15: 434 285-34% checkasm --bench cycle counts: cortex-a15 cortex-a7 int32_to_float_fmul_array8_c: 1730.4 4384.5 int32_to_float_fmul_array8_neon_c: 571.5 1694.3 int32_to_float_fmul_array8_neon:374.0 1448.8 Interesting are the differences between int32_to_float_fmul_array8_neon_c and int32_to_float_fmul_array8_neon. The former is current behaviour of calling ff_int32_to_float_fmul_scalar_neon repeatedly from the c function, The raw numbers differ since checkasm uses different lengths than the dca decoder. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=90b1b9350c0a97c4065ae9054b83e57f48a0de1f --- libavcodec/arm/fmtconvert_init_arm.c |4 libavcodec/arm/fmtconvert_neon.S | 37 ++ 2 files changed, 41 insertions(+) diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c index 6a80bfb..11396e8 100644 --- a/libavcodec/arm/fmtconvert_init_arm.c +++ b/libavcodec/arm/fmtconvert_init_arm.c @@ -25,6 +25,9 @@ #include "libavcodec/avcodec.h" #include "libavcodec/fmtconvert.h" +void ff_int32_to_float_fmul_array8_neon(FmtConvertContext *c, float *dst, +const int32_t *src, const float *mul, +int len); void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src, float mul, int len); @@ -46,6 +49,7 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx } if (have_neon(cpu_flags)) { +c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_neon; c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon; } } diff --git a/libavcodec/arm/fmtconvert_neon.S b/libavcodec/arm/fmtconvert_neon.S index 5e0ac68..5d48e3d 100644 --- a/libavcodec/arm/fmtconvert_neon.S +++ b/libavcodec/arm/fmtconvert_neon.S @@ -1,6 +1,7 @@ /* * ARM NEON optimised Format Conversion Utils * Copyright (c) 2008 Mans Rullgard + * Copyright (c) 2015 Janne Grunau b * * This file is part of Libav. * @@ -49,3 +50,39 @@ NOVFP len .reqr3 bx lr .unreq len endfunc + +function ff_int32_to_float_fmul_array8_neon, export=1 +ldr r0, [sp] +lsr r0, r0, #3 +subsr0, r0, #1 +beq 1f +2: +vld1.32 {q0-q1}, [r2,:128]! +vld1.32 {q2-q3}, [r2,:128]! +vld1.32 {d20}, [r3]! +subsr0, r0, #2 +vcvt.f32.s32q0, q0 +vcvt.f32.s32q1, q1 +vdup.32 q8, d20[0] +vcvt.f32.s32q2, q2 +vcvt.f32.s32q3, q3 +vmul.f32q0, q0, q8 +vdup.32 q9, d20[1] +vmul.f32q1, q1, q8 +vmul.f32q2, q2, q9 +vmul.f32q3, q3, q9 +vst1.32 {q0-q1}, [r1,:128]! +vst1.32 {q2-q3}, [r1,:128]! +bgt 2b +it lt +bxltlr +1: +vld1.32 {q0-q1}, [r2,:128] +vld1.32 {d16[],d17[]}, [r3] +vcvt.f32.s32q0, q0 +vcvt.f32.s32q1, q1 +vmul.f32q0, q0, q8 +vmul.f32q1, q1, q8 +vst1.32 {q0-q1}, [r1,:128] +bx lr +endfunc ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm64: fix inverted register order in transpose_4x4H
ffmpeg | branch: master | Janne Grunau | Fri Dec 18 11:23:22 2015 +0100| [cc29d96d5a379dbcf2649947d884c202c2a52767] | committer: Janne Grunau arm64: fix inverted register order in transpose_4x4H Fix related register order issue in ff_h264_idct_add_neon. Found-by: zjh8890 <243186...@qq.com> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cc29d96d5a379dbcf2649947d884c202c2a52767 --- libavcodec/aarch64/h264idct_neon.S |4 ++-- libavcodec/aarch64/neon.S |4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S index 99c2cb5..78f780a 100644 --- a/libavcodec/aarch64/h264idct_neon.S +++ b/libavcodec/aarch64/h264idct_neon.S @@ -37,8 +37,8 @@ function ff_h264_idct_add_neon, export=1 sub v7.4H, v16.4H, v3.4H add v0.4H, v4.4H, v6.4H add v1.4H, v5.4H, v7.4H -sub v2.4H, v4.4H, v6.4H -sub v3.4H, v5.4H, v7.4H +sub v3.4H, v4.4H, v6.4H +sub v2.4H, v5.4H, v7.4H transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7 diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S index f1072b7..767bc9d 100644 --- a/libavcodec/aarch64/neon.S +++ b/libavcodec/aarch64/neon.S @@ -107,8 +107,8 @@ .macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7 trn1\r4\().4H, \r0\().4H, \r1\().4H trn2\r5\().4H, \r0\().4H, \r1\().4H -trn1\r7\().4H, \r3\().4H, \r2\().4H -trn2\r6\().4H, \r3\().4H, \r2\().4H +trn1\r7\().4H, \r2\().4H, \r3\().4H +trn2\r6\().4H, \r2\().4H, \r3\().4H trn1\r0\().2S, \r4\().2S, \r7\().2S trn2\r3\().2S, \r4\().2S, \r7\().2S trn1\r1\().2S, \r5\().2S, \r6\().2S ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] x86: checkasm: check for or handle missing cleanup after MMX instructions
ffmpeg | branch: master | Janne Grunau | Fri Dec 11 14:06:38 2015 +0100| [711781d7a1714ea4eb0217eb1ba04811978c43d1] | committer: Janne Grunau x86: checkasm: check for or handle missing cleanup after MMX instructions Not every asm routine is expected clear the MMX state after returning. It is however a requisite for testing floating point code in checkasm. Annotate functions requiring cleanup with declare_func_emms() and issue emms after the call. The remaining functions are checked for having a cleared MMX state after return. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=711781d7a1714ea4eb0217eb1ba04811978c43d1 --- tests/checkasm/checkasm.h | 16 +++- tests/checkasm/h264pred.c |8 ++-- tests/checkasm/h264qpel.c |2 +- tests/checkasm/x86/checkasm.asm | 78 +-- 4 files changed, 78 insertions(+), 26 deletions(-) diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index c1206e7..6fc30ca 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -26,6 +26,7 @@ #include #include "config.h" #include "libavutil/avstring.h" +#include "libavutil/cpu.h" #include "libavutil/lfg.h" #include "libavutil/timer.h" @@ -54,6 +55,7 @@ static av_unused void *func_ref, *func_new; /* Declare the function prototype. The first argument is the return value, the remaining * arguments are the function parameters. Naming parameters is optional. */ #define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) +#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) /* Indicate that the current test has failed */ #define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) @@ -65,8 +67,12 @@ static av_unused void *func_ref, *func_new; #define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__) #if ARCH_X86 && HAVE_YASM -/* Verifies that clobbered callee-saved registers are properly saved and restored */ +/* Verifies that clobbered callee-saved registers are properly saved and restored + * and that either no MMX registers are touched or emms is issued */ void checkasm_checked_call(void *func, ...); +/* Verifies that clobbered callee-saved registers are properly saved and restored + * and issues emms for asm functions which are not required to do so */ +void checkasm_checked_call_emms(void *func, ...); #if ARCH_X86_64 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. @@ -81,16 +87,24 @@ void checkasm_checked_call(void *func, ...); void checkasm_stack_clobber(uint64_t clobber, ...); #define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\ = (void *)checkasm_checked_call; +#define declare_new_emms(cpu_flags, ret, ...) \ +ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \ +((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ + (void *)checkasm_checked_call; #define CLOB (UINT64_C(0xdeadbeefdeadbeef)) #define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) #elif ARCH_X86_32 #define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call; +#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \ +((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms :\ + (void *)checkasm_checked_call; #define call_new(...) checked_call(func_new, __VA_ARGS__) #endif #else #define declare_new(ret, ...) +#define declare_new_emms(cpu_flags, ret, ...) /* Call the function */ #define call_new(...) ((func_type *)func_new)(__VA_ARGS__) #endif diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c index a1ee720..6dffa34 100644 --- a/tests/checkasm/h264pred.c +++ b/tests/checkasm/h264pred.c @@ -144,7 +144,7 @@ static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, if (chroma_format == 1) { uint8_t *topright = buf0 + 2*16; int pred_mode; -declare_func(void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); +declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 15; pred_mode++) { if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { @@ -163,7 +163,7 @@ static void check_pred8
[FFmpeg-cvslog] checkasm: add float comparison util functions
ffmpeg | branch: master | Janne Grunau | Mon Dec 7 16:14:46 2015 +0100| [9d218d573f8088c606d873e80df572582e6773ef] | committer: Janne Grunau checkasm: add float comparison util functions > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9d218d573f8088c606d873e80df572582e6773ef --- tests/checkasm/checkasm.c | 73 + tests/checkasm/checkasm.h | 11 +++ 2 files changed, 84 insertions(+) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 37bc139..becfe35 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -27,6 +27,7 @@ #include "checkasm.h" #include "libavutil/common.h" #include "libavutil/cpu.h" +#include "libavutil/intfloat.h" #include "libavutil/random_seed.h" #if HAVE_IO_H @@ -151,6 +152,78 @@ static struct { /* PRNG state */ AVLFG checkasm_lfg; +/* float compare support code */ +static int is_negative(union av_intfloat32 u) +{ +return u.i >> 31; +} + +int float_near_ulp(float a, float b, unsigned max_ulp) +{ +union av_intfloat32 x, y; + +x.f = a; +y.f = b; + +if (is_negative(x) != is_negative(y)) { +// handle -0.0 == +0.0 +return a == b; +} + +if (abs(x.i - y.i) <= max_ulp) +return 1; + +return 0; +} + +int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, + unsigned len) +{ +unsigned i; + +for (i = 0; i < len; i++) { +if (!float_near_ulp(a[i], b[i], max_ulp)) +return 0; +} +return 1; +} + +int float_near_abs_eps(float a, float b, float eps) +{ +float abs_diff = fabsf(a - b); + +return abs_diff < eps; +} + +int float_near_abs_eps_array(const float *a, const float *b, float eps, + unsigned len) +{ +unsigned i; + +for (i = 0; i < len; i++) { +if (!float_near_abs_eps(a[i], b[i], eps)) +return 0; +} +return 1; +} + +int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp) +{ +return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); +} + +int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, + unsigned max_ulp, unsigned len) +{ +unsigned i; + +for (i = 0; i < len; i++) { +if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) +return 0; +} +return 1; +} + /* Print colored text to stderr if the terminal supports it */ static void color_printf(int color, const char *fmt, ...) { diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 6fc30ca..4a4cce4 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -42,6 +42,17 @@ void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2); void checkasm_update_bench(int iterations, uint64_t cycles); void checkasm_report(const char *name, ...) av_printf_format(1, 2); +/* float compare utilities */ +int float_near_ulp(float a, float b, unsigned max_ulp); +int float_near_abs_eps(float a, float b, float eps); +int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp); +int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, + unsigned len); +int float_near_abs_eps_array(const float *a, const float *b, float eps, + unsigned len); +int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, + unsigned max_ulp, unsigned len); + extern AVLFG checkasm_lfg; #define rnd() av_lfg_get(&checkasm_lfg) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm: add tests for dcadsp
ffmpeg | branch: master | Janne Grunau | Mon Dec 7 01:23:47 2015 +0100| [e71b747e9dc56cb84f8a06ec8214d5f3bd98bb6d] | committer: Janne Grunau checkasm: add tests for dcadsp > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e71b747e9dc56cb84f8a06ec8214d5f3bd98bb6d --- tests/checkasm/Makefile |1 + tests/checkasm/checkasm.c |3 + tests/checkasm/checkasm.h |1 + tests/checkasm/dcadsp.c | 137 + 4 files changed, 142 insertions(+) diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 75c9a18..a7d13d5 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -1,5 +1,6 @@ # libavcodec tests AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o +AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index becfe35..a563eaf 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -61,6 +61,9 @@ static const struct { #if CONFIG_BSWAPDSP { "bswapdsp", checkasm_check_bswapdsp }, #endif +#if CONFIG_DCA_DECODER +{ "dcadsp", checkasm_check_dcadsp }, +#endif #if CONFIG_H264PRED { "h264pred", checkasm_check_h264pred }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 4a4cce4..eb8b6dd 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -31,6 +31,7 @@ #include "libavutil/timer.h" void checkasm_check_bswapdsp(void); +void checkasm_check_dcadsp(void); void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); void checkasm_check_hevc_mc(void); diff --git a/tests/checkasm/dcadsp.c b/tests/checkasm/dcadsp.c new file mode 100644 index 000..1665cbb --- /dev/null +++ b/tests/checkasm/dcadsp.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with Libav; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include + +#include "libavutil/internal.h" +#include "libavutil/intfloat.h" +#include "libavcodec/dca.h" +#include "libavcodec/dcadsp.h" +#include "libavcodec/dcadata.h" + +#include "checkasm.h" + +#define randomize_lfe_fir(size) \ +do {\ +int i; \ +for (i = 0; i < size; i++) {\ +float f = (float)rnd() / (UINT_MAX >> 1) - 1.0f;\ +in[i] = f; \ +} \ +for (i = 0; i < 256; i++) { \ +float f = (float)rnd() / (UINT_MAX >> 1) - 1.0f;\ +coeffs[i] = f; \ +} \ +} while (0) + +#define check_lfe_fir(decifactor, eps) \ +do {\ +LOCAL_ALIGNED_16(float, in, [256 / decifactor]);\ +LOCAL_ALIGNED_16(float, out0, [decifactor * 2]); \ +LOCAL_ALIGNED_16(float, out1, [decifactor * 2]); \ +LOCAL_ALIGNED_16(float, coeffs, [256]); \ +int i; \ +const float * in_ptr = in + (256 / decifactor) - 1; \ +declare_func(void, float *out, const float *in, const float *coeffs); \ +/* repeat the test several times */ \ +for (i = 0; i < 32; i++) { \ +int j; \ +memset(out0,0, sizeof(*out0) * 2 * decifactor); \ +memset(out1, 0xFF, sizeof(*out1) * 2 * decifactor); \ +randomize_lfe_fir(256 / decifactor);\ +call_ref(out0, in_ptr, coeffs);
[FFmpeg-cvslog] checkasm: add synth_filter test
ffmpeg | branch: master | Janne Grunau | Mon Dec 7 23:38:46 2015 +0100| [568a4323fbde03665b2b23a98068d02b39121812] | committer: Janne Grunau checkasm: add synth_filter test > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=568a4323fbde03665b2b23a98068d02b39121812 --- tests/checkasm/Makefile |2 +- tests/checkasm/checkasm.c |1 + tests/checkasm/checkasm.h |1 + tests/checkasm/synth_filter.c | 121 + 4 files changed, 124 insertions(+), 1 deletion(-) diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index a7d13d5..9bd13ac 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -1,6 +1,6 @@ # libavcodec tests AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o -AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o +AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o synth_filter.o AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index a563eaf..c61e4d4 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -63,6 +63,7 @@ static const struct { #endif #if CONFIG_DCA_DECODER { "dcadsp", checkasm_check_dcadsp }, +{ "synth_filter", checkasm_check_synth_filter }, #endif #if CONFIG_H264PRED { "h264pred", checkasm_check_h264pred }, diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index eb8b6dd..a599dba 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -35,6 +35,7 @@ void checkasm_check_dcadsp(void); void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); void checkasm_check_hevc_mc(void); +void checkasm_check_synth_filter(void); void checkasm_check_v210enc(void); void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3); diff --git a/tests/checkasm/synth_filter.c b/tests/checkasm/synth_filter.c new file mode 100644 index 000..157400b --- /dev/null +++ b/tests/checkasm/synth_filter.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with Libav; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include + +#include "libavutil/internal.h" +#include "libavutil/intfloat.h" +#include "libavcodec/dcadata.h" +#include "libavcodec/synth_filter.h" + +#include "checkasm.h" + +#define BUF_SIZE 32 + +#define randomize_input() \ +do {\ +int i; \ +for (i = 0; i < BUF_SIZE; i++) {\ +float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \ +in[i] = f; \ +} \ +} while (0) + +void checkasm_check_synth_filter(void) +{ +FFTContext imdct; +SynthFilterContext synth; + +ff_mdct_init(&imdct, 6, 1, 1.0); +ff_synth_filter_init(&synth); + +if (check_func(synth.synth_filter_float, "synth_filter_float")) { +LOCAL_ALIGNED(32, float, out0, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, out1, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, out_b, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, in, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, buf2_0, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, buf2_1, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, buf2_b, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, buf0, [512]); +LOCAL_ALIGNED(32, float, buf1, [512]); +LOCAL_ALIGNED(32, float, buf_b, [512]); +float scale = 1.0f; +int i, offset0 = 0, offset1 = 0, offset_b = 0; + +declare_func(void, FFTContext *, float *, int *, float[32], const float[512], + float[32], float[32], float); + +memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE); +memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE); +memset(buf2_b, 0, sizeof(*buf2_b) * BUF_SIZE); +memset(buf0, 0, sizeof(*buf2_0) * 512); +memse
[FFmpeg-cvslog] checkasm: add fmtconvert tests
ffmpeg | branch: master | Janne Grunau | Tue Dec 8 16:24:57 2015 +0100| [489e6add4478b0f5717dbf644234c6f3a3baf02c] | committer: Janne Grunau checkasm: add fmtconvert tests > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=489e6add4478b0f5717dbf644234c6f3a3baf02c --- tests/checkasm/Makefile |1 + tests/checkasm/checkasm.c |3 ++ tests/checkasm/checkasm.h |1 + tests/checkasm/fmtconvert.c | 105 +++ 4 files changed, 110 insertions(+) diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 9bd13ac..a66fc73 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -1,6 +1,7 @@ # libavcodec tests AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o synth_filter.o +AVCODECOBJS-$(CONFIG_FMTCONVERT) += fmtconvert.o AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index c61e4d4..d6f8ffc 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -65,6 +65,9 @@ static const struct { { "dcadsp", checkasm_check_dcadsp }, { "synth_filter", checkasm_check_synth_filter }, #endif +#if CONFIG_FMTCONVERT +{ "fmtconvert", checkasm_check_fmtconvert }, +#endif #if CONFIG_H264PRED { "h264pred", checkasm_check_h264pred }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index a599dba..0bc66b9 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -32,6 +32,7 @@ void checkasm_check_bswapdsp(void); void checkasm_check_dcadsp(void); +void checkasm_check_fmtconvert(void); void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); void checkasm_check_hevc_mc(void); diff --git a/tests/checkasm/fmtconvert.c b/tests/checkasm/fmtconvert.c new file mode 100644 index 000..1a843b0 --- /dev/null +++ b/tests/checkasm/fmtconvert.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with Libav; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include + +#include "libavutil/internal.h" +#include "libavutil/common.h" +#include "libavcodec/fmtconvert.h" + +#include "checkasm.h" + +#define BUF_SIZE 1024 + +#define randomize_input(len)\ +do {\ +int k; \ +for (k = 0; k < len; k++) { \ +in[k] = rnd() - INT32_MAX; \ +} \ +for ( ; k < BUF_SIZE; k++) {\ +in[k] = INT32_MAX; \ +} \ +} while (0) + +void checkasm_check_fmtconvert(void) +{ +FmtConvertContext c; +LOCAL_ALIGNED(32, float, dst0, [BUF_SIZE]); +LOCAL_ALIGNED(32, float, dst1, [BUF_SIZE]); +LOCAL_ALIGNED(32, int32_t, in, [BUF_SIZE]); +float scale_arr[128]; +int length[] = {8, 16, 24, 56, 72, 128, 512, 520, 656, 768, 992}; +int i, j; + +for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++) +scale_arr[i] = (FF_ARRAY_ELEMS(scale_arr) - FF_ARRAY_ELEMS(scale_arr) / 2) / 13; + +ff_fmt_convert_init(&c, NULL); + +memset(dst0, 0, sizeof(*dst0) * BUF_SIZE); +memset(dst1, 0, sizeof(*dst1) * BUF_SIZE); + +if (check_func(c.int32_to_float_fmul_scalar, "int32_to_float_fmul_scalar")) { +declare_func(void, float *, const int32_t *, float, int); + +for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++) { +for (j = 0; j < FF_ARRAY_ELEMS(length); j++) { + +randomize_input(length[j]); + +call_ref(dst0, in, scale_arr[i], length[j]); +call_new(dst1, in, scale_arr[i], length[j]); + +if (!float_near_ulp_array(dst0, dst1, 3, length[j])) { +fail(); +break; +} + +
[FFmpeg-cvslog] x86: zero extend the 32-bit length in int32_to_float_fmul_scalar implicitly
ffmpeg | branch: master | Janne Grunau | Tue Dec 22 22:45:42 2015 +0100| [f4f27e4cf1013c55b2c7df359ce8d58ee922662c] | committer: Janne Grunau x86: zero extend the 32-bit length in int32_to_float_fmul_scalar implicitly This reverts commit 5dfe4edad63971d669ae456b0bc40ef9364cca80. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f4f27e4cf1013c55b2c7df359ce8d58ee922662c --- libavcodec/x86/fmtconvert.asm |5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index b9a78a5..0383322 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -38,11 +38,8 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len %elif ARCH_X86_32 movss m0, mulm %endif -%if ARCH_X86_64 -movsxd lenq, lend -%endif SPLATD m0 -shl lenq, 2 +shl lend, 2 add srcq, lenq add dstq, lenq neg lenq ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm: x86: post commit review fixes
ffmpeg | branch: master | Janne Grunau | Tue Dec 22 22:51:55 2015 +0100| [f0f54117c8f206e8045d301c2eb975b26e9f263d] | committer: Janne Grunau checkasm: x86: post commit review fixes Check the full FPU tag word instead of only the lower half and simplify the comparison. Use upper-case function base name as macro name to instantiate both checked_call variants. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f0f54117c8f206e8045d301c2eb975b26e9f263d --- tests/checkasm/x86/checkasm.asm | 20 +--- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm index 147d7a7..52d10ae 100644 --- a/tests/checkasm/x86/checkasm.asm +++ b/tests/checkasm/x86/checkasm.asm @@ -98,7 +98,7 @@ cglobal stack_clobber, 1,2 ; void checkasm_checked_call(void *func, ...) ;- INIT_XMM -%macro check_call 0-1 +%macro CHECKED_CALL 0-1 cglobal checked_call%1, 2,15,16,max_args*8+8 mov t0, r0 @@ -171,9 +171,8 @@ cglobal checked_call%1, 2,15,16,max_args*8+8 .clobber_ok: %ifnid %1, _emms fstenv [rsp] -mov r9h, [rsp + 8] -add r9h, 1 -jz .emms_ok +cmp word [rsp + 8], 0x +je .emms_ok report_fail error_message_emms emms .emms_ok: @@ -201,7 +200,7 @@ cglobal checked_call%1, 2,15,16,max_args*8+8 mov eax, r3 %endmacro -%macro check_call 0-1 +%macro CHECKED_CALL 0-1 ;- ; void checkasm_checked_call(void *func, ...) ;- @@ -225,10 +224,9 @@ cglobal checked_call%1, 1,7 report_fail error_message .clobber_ok: %ifnid %1, _emms -fstenv [rsp] -mov r3h, [rsp + 8] -add r3h, 1 -jz .emms_ok +fstenv [esp] +cmp word [esp + 8], 0x +je .emms_ok report_fail error_message_emms emms .emms_ok: @@ -241,5 +239,5 @@ cglobal checked_call%1, 1,7 %endif ; ARCH_X86_64 -check_call -check_call _emms +CHECKED_CALL +CHECKED_CALL _emms ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] x86: use emms after ff_int32_to_float_fmul_scalar_sse
ffmpeg | branch: master | Janne Grunau | Tue Dec 29 12:08:38 2015 +0100| [8563f9887194b07c972c3475d6b51592d77f73f7] | committer: Janne Grunau x86: use emms after ff_int32_to_float_fmul_scalar_sse Intel's Instruction Set Reference (as of September 2015) clearly states that cvtpi2ps switches to MMX state. Actual CPUs do not switch if the source is a memory location. The Instruction Set Reference from 1999 (Order Number 243191) describes this behaviour but all later versions I've seen have make no distinction whether MMX registers or memory is used as source. The documentation for the matching SSE2 instruction to convert to double (cvtpi2pd) was fixed (see the valgrind bug https://bugs.kde.org/show_bug.cgi?id=210264). It will take time to get a clarification and fixes in place. In the meantime it makes sense to change ff_int32_to_float_fmul_scalar_sse to be correct according to the documentation. The vast majority of users will have SSE2 so a change to the SSE version has little effect. Fixes fate-checkasm on x86 valgrind targets. Valgrind 'bug' reported as https://bugs.kde.org/show_bug.cgi?id=357059 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8563f9887194b07c972c3475d6b51592d77f73f7 --- libavcodec/x86/fmtconvert.asm |9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 0383322..2a3e4a5 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -61,7 +61,14 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len mova [dstq+lenq+16], m2 add lenq, 32 jl .loop -REP_RET +%if notcpuflag(sse2) +;; cvtpi2ps switches to MMX even if the source is a memory location +;; possible an error in documentation since every tested CPU disagrees with +;; that. Use emms anyway since the vast majority of machines will use the +;; SSE2 variant +emms +%endif +RET %endmacro INIT_XMM sse ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] mpjpgdec: free AVIOContext leak on early probe fail
ffmpeg | branch: master | Janne Grunau | Mon Jun 8 13:31:04 2015 +0200| [caf7be30b11288c498fae67be4741bfbf083d977] | committer: Janne Grunau mpjpgdec: free AVIOContext leak on early probe fail > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=caf7be30b11288c498fae67be4741bfbf083d977 --- libavformat/mpjpegdec.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavformat/mpjpegdec.c b/libavformat/mpjpegdec.c index 354278c..72891e7 100644 --- a/libavformat/mpjpegdec.c +++ b/libavformat/mpjpegdec.c @@ -88,7 +88,7 @@ static int mpjpeg_read_probe(AVProbeData *p) return AVERROR(ENOMEM); if (p->buf_size < 2 || p->buf[0] != '-' || p->buf[1] != '-') -return 0; +goto end; while (!pb->eof_reached) { ret = get_line(pb, line, sizeof(line)); @@ -101,7 +101,7 @@ static int mpjpeg_read_probe(AVProbeData *p) break; } } - +end: av_free(pb); return ret; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aac_parser: add required padding for GetBitContext buffer
ffmpeg | branch: master | Janne Grunau | Mon Jun 8 14:45:12 2015 +0200| [fb1473080223a634b8ac2cca48a632d037a0a69d] | committer: Janne Grunau aac_parser: add required padding for GetBitContext buffer Fixes stack buffer overflow errors detected by address sanitizer in various fate tests. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fb1473080223a634b8ac2cca48a632d037a0a69d --- libavcodec/aac_parser.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c index fdaa5f8..acb05d4 100644 --- a/libavcodec/aac_parser.c +++ b/libavcodec/aac_parser.c @@ -34,7 +34,7 @@ static int aac_sync(uint64_t state, AACAC3ParseContext *hdr_info, int size; union { uint64_t u64; -uint8_t u8[8]; +uint8_t u8[8 + FF_INPUT_BUFFER_PADDING_SIZE]; } tmp; tmp.u64 = av_be2ne64(state); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] ac3_parser: add required padding for GetBitContext buffer
ffmpeg | branch: master | Janne Grunau | Mon Jun 8 14:48:26 2015 +0200| [09447f2b0fafac6d9565aab82a4c5f16fc99ee5e] | committer: Janne Grunau ac3_parser: add required padding for GetBitContext buffer Fixes stack buffer overflow errors detected by address sanitizer in various fate tests. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=09447f2b0fafac6d9565aab82a4c5f16fc99ee5e --- libavcodec/ac3_parser.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c index 5ea09f8..69d88c1 100644 --- a/libavcodec/ac3_parser.c +++ b/libavcodec/ac3_parser.c @@ -150,7 +150,7 @@ static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info, int err; union { uint64_t u64; -uint8_t u8[8]; +uint8_t u8[8 + FF_INPUT_BUFFER_PADDING_SIZE]; } tmp = { av_be2ne64(state) }; AC3HeaderInfo hdr; GetBitContext gbc; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] imc: add required padding for GetBitContext buffer
ffmpeg | branch: master | Janne Grunau | Mon Jun 8 14:48:54 2015 +0200| [210921722bf828b3b895ebcbc34374e6c4452c6f] | committer: Janne Grunau imc: add required padding for GetBitContext buffer Fixes stack buffer overflow errors detected by address sanitizer in fate-imc. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=210921722bf828b3b895ebcbc34374e6c4452c6f --- libavcodec/imc.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 411bc85..c60fc7a 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -997,7 +997,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data, IMCContext *q = avctx->priv_data; -LOCAL_ALIGNED_16(uint16_t, buf16, [IMC_BLOCK_SIZE / 2]); +LOCAL_ALIGNED_16(uint16_t, buf16, [(IMC_BLOCK_SIZE + FF_INPUT_BUFFER_PADDING_SIZE) / 2]); if (buf_size < IMC_BLOCK_SIZE * avctx->channels) { av_log(avctx, AV_LOG_ERROR, "frame too small!\n"); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] movenc: fixes a questionable valgrind uninitialized value warning
ffmpeg | branch: master | Janne Grunau | Tue Jun 9 12:27:47 2015 +0200| [529c05698e88b057f0bea61e0d85f2b42925b5ea] | committer: Janne Grunau movenc: fixes a questionable valgrind uninitialized value warning display_matrix_size is only initialized when av_stream_get_side_data() returns a side data pointer. The code is safe since the only effect this has is setting the display_matrix pointer to NULL which it was already anyway. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=529c05698e88b057f0bea61e0d85f2b42925b5ea --- libavformat/movenc.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 30d397a..761c3e8 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -1518,7 +1518,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov, display_matrix = (uint32_t*)av_stream_get_side_data(st, AV_PKT_DATA_DISPLAYMATRIX, &display_matrix_size); -if (display_matrix_size < 9 * sizeof(*display_matrix)) +if (display_matrix && display_matrix_size < 9 * sizeof(*display_matrix)) display_matrix = NULL; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm64: constify src in h264qpel dsp function definitions
ffmpeg | branch: master | Janne Grunau | Sun Jun 14 20:53:56 2015 +0200| [c2de2cf0d2927f3f584dab6d54276fbda92a0a71] | committer: Janne Grunau arm64: constify src in h264qpel dsp function definitions > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c2de2cf0d2927f3f584dab6d54276fbda92a0a71 --- libavcodec/aarch64/h264qpel_init_aarch64.c | 128 ++-- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/libavcodec/aarch64/h264qpel_init_aarch64.c b/libavcodec/aarch64/h264qpel_init_aarch64.c index 4beb11b..74088b2 100644 --- a/libavcodec/aarch64/h264qpel_init_aarch64.c +++ b/libavcodec/aarch64/h264qpel_init_aarch64.c @@ -27,73 +27,73 @@ #include "libavutil/aarch64/cpu.h" #include "libavcodec/h264qpel.h" -void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); -void ff_put_h264_qpel8_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, pt
[FFmpeg-cvslog] libvpx: fix test for VPX_IMAGE_ABI_VERSION
ffmpeg | branch: master | Janne Grunau | Wed Jun 24 08:39:40 2015 +0200| [41740ef8be6ec409f7eff3932ddba9a9eeec27b1] | committer: Janne Grunau libvpx: fix test for VPX_IMAGE_ABI_VERSION > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=41740ef8be6ec409f7eff3932ddba9a9eeec27b1 --- libavcodec/libvpx.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c index 4cc050d..49f966d 100644 --- a/libavcodec/libvpx.c +++ b/libavcodec/libvpx.c @@ -40,7 +40,7 @@ enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img) case VPX_IMG_FMT_I422: return AV_PIX_FMT_YUV422P; case VPX_IMG_FMT_I444: return AV_PIX_FMT_YUV444P; case VPX_IMG_FMT_444A: return AV_PIX_FMT_YUVA444P; -#ifdef VPX_IMAGE_ABI_VERSION >= 3 +#if VPX_IMAGE_ABI_VERSION >= 3 case VPX_IMG_FMT_I440: return AV_PIX_FMT_YUV440P; case VPX_IMG_FMT_I42016:return AV_PIX_FMT_YUV420P16BE; case VPX_IMG_FMT_I42216:return AV_PIX_FMT_YUV422P16BE; @@ -68,7 +68,7 @@ vpx_img_fmt_t ff_vpx_pixfmt_to_imgfmt(enum AVPixelFormat pix) case AV_PIX_FMT_YUV422P: return VPX_IMG_FMT_I422; case AV_PIX_FMT_YUV444P: return VPX_IMG_FMT_I444; case AV_PIX_FMT_YUVA444P: return VPX_IMG_FMT_444A; -#ifdef VPX_IMAGE_ABI_VERSION >= 3 +#if VPX_IMAGE_ABI_VERSION >= 3 case AV_PIX_FMT_YUV440P: return VPX_IMG_FMT_I440; case AV_PIX_FMT_YUV420P16BE: return VPX_IMG_FMT_I42016; case AV_PIX_FMT_YUV422P16BE: return VPX_IMG_FMT_I42216; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec: add missing CODEC_CAP_DR1 to codecs using get_buffer()
ffmpeg | branch: master | Janne Grunau | Sun Jun 28 14:58:47 2015 +0200| [007e27d363ba7d994019dc897dc9c39071bb204a] | committer: Janne Grunau avcodec: add missing CODEC_CAP_DR1 to codecs using get_buffer() > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=007e27d363ba7d994019dc897dc9c39071bb204a --- libavcodec/atrac3plusdec.c |1 + libavcodec/jpeg2000dec.c |2 +- libavcodec/sp5xdec.c |1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/atrac3plusdec.c b/libavcodec/atrac3plusdec.c index ddbfb53..3e061af 100644 --- a/libavcodec/atrac3plusdec.c +++ b/libavcodec/atrac3plusdec.c @@ -387,6 +387,7 @@ AVCodec ff_atrac3p_decoder = { .long_name= NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"), .type = AVMEDIA_TYPE_AUDIO, .id = AV_CODEC_ID_ATRAC3P, +.capabilities = CODEC_CAP_DR1, .priv_data_size = sizeof(ATRAC3PContext), .init = atrac3p_decode_init, .init_static_data = ff_atrac3p_init_vlcs, diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 5135297..69dc566 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1470,7 +1470,7 @@ AVCodec ff_jpeg2000_decoder = { .long_name= NULL_IF_CONFIG_SMALL("JPEG 2000"), .type = AVMEDIA_TYPE_VIDEO, .id = AV_CODEC_ID_JPEG2000, -.capabilities = CODEC_CAP_FRAME_THREADS, +.capabilities = CODEC_CAP_FRAME_THREADS | CODEC_CAP_DR1, .priv_data_size = sizeof(Jpeg2000DecoderContext), .init_static_data = jpeg2000_init_static_data, .init = jpeg2000_decode_init, diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c index 08bdbc0..ddf728f 100644 --- a/libavcodec/sp5xdec.c +++ b/libavcodec/sp5xdec.c @@ -116,5 +116,6 @@ AVCodec ff_amv_decoder = { .init = ff_mjpeg_decode_init, .close = ff_mjpeg_decode_end, .decode = sp5x_decode_frame, +.capabilities = CODEC_CAP_DR1, .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, }; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] g2meet: use an unsigned type for the djb hash
ffmpeg | branch: master | Janne Grunau | Wed Jul 1 13:34:50 2015 +0200| [4d6c40a6d0ce85e96a6e37f558236e2a6a75] | committer: Janne Grunau g2meet: use an unsigned type for the djb hash > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4d6c40a6d0ce85e96a6e37f558236e2a6a75 --- libavcodec/g2meet.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c index d0b36f0..600e2b2 100644 --- a/libavcodec/g2meet.c +++ b/libavcodec/g2meet.c @@ -399,7 +399,7 @@ static inline int log2_ceil(uint32_t x) /* improved djb2 hash from http://www.cse.yorku.ca/~oz/hash.html */ static int djb2_hash(uint32_t key) { -int h = 5381; +uint32_t h = 5381; h = (h * 33) ^ ((key >> 24) & 0xFF); // xxx: probably not needed at all h = (h * 33) ^ ((key >> 16) & 0xFF); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] g2meet: use av_ceil_log2 instead of a custom function
ffmpeg | branch: master | Janne Grunau | Wed Jul 1 13:58:34 2015 +0200| [9eec23b8a7fd0f91827bbc3ed0792c39a8cc9a8a] | committer: Janne Grunau g2meet: use av_ceil_log2 instead of a custom function > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9eec23b8a7fd0f91827bbc3ed0792c39a8cc9a8a --- libavcodec/g2meet.c | 12 +--- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c index 600e2b2..af27a5d 100644 --- a/libavcodec/g2meet.c +++ b/libavcodec/g2meet.c @@ -386,16 +386,6 @@ static int jpg_decode_data(JPGContext *c, int width, int height, #define G_shift 8 #define B_shift 0 -static inline int log2_ceil(uint32_t x) -{ -int c = 0; - -for (--x; x > 0; x >>= 1) -c++; - -return c; -} - /* improved djb2 hash from http://www.cse.yorku.ca/~oz/hash.html */ static int djb2_hash(uint32_t key) { @@ -701,7 +691,7 @@ static int epic_decode_run_length(ePICContext *dc, int x, int y, int tile_width, if (!(above_row[pos] == pix)) break; run = pos - start_pos - 1; -idx = log2_ceil(run); +idx = av_ceil_log2(run); if (ff_els_decode_bit(&dc->els_ctx, &dc->prev_row_rung[idx])) *pRun += run; else { ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] g2meet: force simple idct for identical results over all fate configs
ffmpeg | branch: master | Janne Grunau | Wed Jul 1 15:33:20 2015 +0200| [f91fe24e9bd6912c29bbb03d8afe878e045f9721] | committer: Janne Grunau g2meet: force simple idct for identical results over all fate configs > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f91fe24e9bd6912c29bbb03d8afe878e045f9721 --- tests/fate/screen.mak |6 +++--- tests/ref/fate/g2m2 | 24 ++--- tests/ref/fate/g2m3 | 40 +-- tests/ref/fate/g2m4 | 56 - 4 files changed, 63 insertions(+), 63 deletions(-) diff --git a/tests/fate/screen.mak b/tests/fate/screen.mak index 14c5089..cc29d22 100644 --- a/tests/fate/screen.mak +++ b/tests/fate/screen.mak @@ -30,13 +30,13 @@ FATE_SAMPLES_AVCONV-$(call DEMDEC, AVI, FRAPS) += $(FATE_FRAPS) fate-fraps: $(FATE_FRAPS) FATE_G2M += fate-g2m2 -fate-g2m2: CMD = framecrc -i $(TARGET_SAMPLES)/g2m/g2m2.asf -an +fate-g2m2: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m2.asf -an FATE_G2M += fate-g2m3 -fate-g2m3: CMD = framecrc -i $(TARGET_SAMPLES)/g2m/g2m3.asf -frames:v 20 +fate-g2m3: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m3.asf -frames:v 20 FATE_G2M += fate-g2m4 -fate-g2m4: CMD = framecrc -i $(TARGET_SAMPLES)/g2m/g2m4.asf +fate-g2m4: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m4.asf FATE_SAMPLES_AVCONV-$(call DEMDEC, ASF, G2M) += $(FATE_G2M) fate-g2m: $(FATE_G2M) diff --git a/tests/ref/fate/g2m2 b/tests/ref/fate/g2m2 index f117b48..710dbd1 100644 --- a/tests/ref/fate/g2m2 +++ b/tests/ref/fate/g2m2 @@ -147,15 +147,15 @@ 0, 8531, 8531,0, 2359296, 0x47874e4f 0, 8547, 8547,0, 2359296, 0xdead4e4f 0, 8562, 8562,0, 2359296, 0x847e4e4f -0, 9344, 9344,0, 2359296, 0x614ce46d -0, 9345, 9345,0, 2359296, 0x8dece312 -0, 9876, 9876,0, 2359296, 0xbdf9e34e -0, 9922, 9922,0, 2359296, 0x3e21e50a -0, 9938, 9938,0, 2359296, 0xf348e4a4 -0, 9954, 9954,0, 2359296, 0x8178e415 -0, 9955, 9955,0, 2359296, 0xf0b5e199 -0, 9969, 9969,0, 2359296, 0x5a33e00e -0, 9970, 9970,0, 2359296, 0xaceddf05 -0, 9985, 9985,0, 2359296, 0xca09e023 -0, 9986, 9986,0, 2359296, 0xeb8be0c0 -0, 10001, 10001,0, 2359296, 0x6a0fdf28 +0, 9344, 9344,0, 2359296, 0x1a13e47c +0, 9345, 9345,0, 2359296, 0x46b3e321 +0, 9876, 9876,0, 2359296, 0x76c0e35d +0, 9922, 9922,0, 2359296, 0xf6d9e519 +0, 9938, 9938,0, 2359296, 0xac0fe4b3 +0, 9954, 9954,0, 2359296, 0x3a3fe424 +0, 9955, 9955,0, 2359296, 0xa97ce1a8 +0, 9969, 9969,0, 2359296, 0x12fae01d +0, 9970, 9970,0, 2359296, 0x65b4df14 +0, 9985, 9985,0, 2359296, 0x82d0e032 +0, 9986, 9986,0, 2359296, 0xa452e0cf +0, 10001, 10001,0, 2359296, 0x22d6df37 diff --git a/tests/ref/fate/g2m3 b/tests/ref/fate/g2m3 index 9956710..8866a7e 100644 --- a/tests/ref/fate/g2m3 +++ b/tests/ref/fate/g2m3 @@ -1,25 +1,25 @@ #tb 0: 1/1000 #tb 1: 1/44100 -0, 0, 0,0, 3824640, 0xf8cf3d18 +0, 0, 0,0, 3824640, 0x9a253d29 1, 0, 0,16384,32768, 0x6b41078a 1, 14288, 14288,16384,32768, 0x96f7bfa2 -0,499,499,0, 3824640, 0x29dc2af5 -0,624,624,0, 3824640, 0x0ef5287b -0,625,625,0, 3824640, 0x84b5283d -0,626,626,0, 3824640, 0xcde31cda -0,627,627,0, 3824640, 0x61cf2454 -0,628,628,0, 3824640, 0xb8e32127 +0,499,499,0, 3824640, 0xcb232b06 +0,624,624,0, 3824640, 0xb03c288c +0,625,625,0, 3824640, 0x260b284e +0,626,626,0, 3824640, 0x6f391ceb +0,627,627,0, 3824640, 0x03252465 +0,628,628,0, 3824640, 0x5a392138 1, 30650, 30650,14336,28672, 0xfafb3922 -0,749,749,0, 3824640, 0xb2972f1f -0,750,750,0, 3824640, 0x5f59333c -0,751,751,0, 3824640, 0x8d3529ea -0,752,752,0, 3824640, 0xceb4385f -0,753,753,0, 3824640, 0xb93139f8 -0,754,754,0, 3824640, 0xfb802d6f -0,874,874,0, 3824640, 0x94643ee6 -0,875,875,0, 3824640, 0x646c4e89 -0,876,876,0, 3824640, 0xa65d5e7a -0,877,877,0, 3824
[FFmpeg-cvslog] fate-g2m3: disable the audio stream
ffmpeg | branch: master | Janne Grunau | Wed Jul 1 17:35:51 2015 +0200| [a31c4b2cbef9aee15910fc3df52519aef46760de] | committer: Janne Grunau fate-g2m3: disable the audio stream The audio decoder is not in fate-g2m3 dependencies and the wma2 decoder is probably not bit-exact since it it float based. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a31c4b2cbef9aee15910fc3df52519aef46760de --- tests/fate/screen.mak |2 +- tests/ref/fate/g2m3 |4 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/fate/screen.mak b/tests/fate/screen.mak index cc29d22..26e6736 100644 --- a/tests/fate/screen.mak +++ b/tests/fate/screen.mak @@ -33,7 +33,7 @@ FATE_G2M += fate-g2m2 fate-g2m2: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m2.asf -an FATE_G2M += fate-g2m3 -fate-g2m3: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m3.asf -frames:v 20 +fate-g2m3: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m3.asf -frames:v 20 -an FATE_G2M += fate-g2m4 fate-g2m4: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m4.asf diff --git a/tests/ref/fate/g2m3 b/tests/ref/fate/g2m3 index 8866a7e..5a6ff64 100644 --- a/tests/ref/fate/g2m3 +++ b/tests/ref/fate/g2m3 @@ -1,15 +1,11 @@ #tb 0: 1/1000 -#tb 1: 1/44100 0, 0, 0,0, 3824640, 0x9a253d29 -1, 0, 0,16384,32768, 0x6b41078a -1, 14288, 14288,16384,32768, 0x96f7bfa2 0,499,499,0, 3824640, 0xcb232b06 0,624,624,0, 3824640, 0xb03c288c 0,625,625,0, 3824640, 0x260b284e 0,626,626,0, 3824640, 0x6f391ceb 0,627,627,0, 3824640, 0x03252465 0,628,628,0, 3824640, 0x5a392138 -1, 30650, 30650,14336,28672, 0xfafb3922 0,749,749,0, 3824640, 0x53ed2f30 0,750,750,0, 3824640, 0x00af334d 0,751,751,0, 3824640, 0x2e8b29fb ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] h264: arm: use intra pred8x8 functions only for chroma_format_idc <= 1
ffmpeg | branch: master | Janne Grunau | Sun Jul 12 17:03:13 2015 +0200| [256ef19844892c6cf8e0386e3287bae970ec6320] | committer: Janne Grunau h264: arm: use intra pred8x8 functions only for chroma_format_idc <= 1 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=256ef19844892c6cf8e0386e3287bae970ec6320 --- libavcodec/arm/h264pred_init_arm.c | 30 -- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c index bbfe63f..a445d4d 100644 --- a/libavcodec/arm/h264pred_init_arm.c +++ b/libavcodec/arm/h264pred_init_arm.c @@ -54,20 +54,22 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, if (high_depth) return; -h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon; -h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon; -if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) -h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon; -h->pred8x8[DC_128_PRED8x8 ] = ff_pred8x8_128_dc_neon; -if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 && -codec_id != AV_CODEC_ID_VP8) { -h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon; -h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon; -h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon; -h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon; -h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon; -h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon; -h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon; +if (chroma_format_idc <= 1) { +h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon; +h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon; +if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) +h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon; +h->pred8x8[DC_128_PRED8x8 ] = ff_pred8x8_128_dc_neon; +if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 && +codec_id != AV_CODEC_ID_VP8) { +h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon; +h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon; +h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon; +h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon; +h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon; +h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon; +h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon; +} } h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm: test all architectures with optimisations
ffmpeg | branch: master | Janne Grunau | Sun Jul 12 16:41:42 2015 +0200| [82e6ac85ff9aa7631b8c01521b3d6b5ca0bc8014] | committer: Janne Grunau checkasm: test all architectures with optimisations > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=82e6ac85ff9aa7631b8c01521b3d6b5ca0bc8014 --- tests/checkasm/checkasm.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 826cd35..e6cf3d7 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -72,7 +72,21 @@ static const struct { const char *suffix; int flag; } cpus[] = { -#if ARCH_X86 +#if ARCH_AARCH64 +{ "ARMV8","armv8",AV_CPU_FLAG_ARMV8 }, +{ "NEON", "neon", AV_CPU_FLAG_NEON }, +#elif ARCH_ARM +{ "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, +{ "ARMV6","armv6",AV_CPU_FLAG_ARMV6 }, +{ "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 }, +{ "VFP", "vfp", AV_CPU_FLAG_VFP }, +{ "VFPV3","vfp3", AV_CPU_FLAG_VFPV3 }, +{ "NEON", "neon", AV_CPU_FLAG_NEON }, +#elif ARCH_PPC +{ "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC }, +{ "VSX", "vsx", AV_CPU_FLAG_VSX }, +{ "POWER8", "power8", AV_CPU_FLAG_POWER8 }, +#elif ARCH_X86 { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, { "3DNOW","3dnow",AV_CPU_FLAG_3DNOW }, ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: add checkasm target
ffmpeg | branch: master | Janne Grunau | Sun Jul 12 17:35:21 2015 +0200| [c9f8cfb6d9b34f3c51f1b7152c4dc3f2f8724dc4] | committer: Janne Grunau fate: add checkasm target > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c9f8cfb6d9b34f3c51f1b7152c4dc3f2f8724dc4 --- tests/Makefile |1 + tests/fate/checkasm.mak |5 + 2 files changed, 6 insertions(+) diff --git a/tests/Makefile b/tests/Makefile index fa83ba4..d7a229c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -72,6 +72,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak include $(SRC_PATH)/tests/fate/audio.mak include $(SRC_PATH)/tests/fate/bmp.mak include $(SRC_PATH)/tests/fate/cdxl.mak +include $(SRC_PATH)/tests/fate/checkasm.mak include $(SRC_PATH)/tests/fate/cover-art.mak include $(SRC_PATH)/tests/fate/demux.mak include $(SRC_PATH)/tests/fate/dfa.mak diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak new file mode 100644 index 000..daefe69 --- /dev/null +++ b/tests/fate/checkasm.mak @@ -0,0 +1,5 @@ +fate-checkasm: tests/checkasm/checkasm$(EXESUF) +fate-checkasm: CMD = run tests/checkasm/checkasm +fate-checkasm: REF = /dev/null + +FATE += fate-checkasm ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Revert "tiff: support reading gray+alpha at 8 bits"
ffmpeg | branch: master | Janne Grunau | Mon Jul 21 08:35:32 2014 +0200| [a9f3f5fadb57bae3f3ff0be69e56b2c6014f2513] | committer: Janne Grunau Revert "tiff: support reading gray+alpha at 8 bits" This reverts commit b31d76e45fc3c6529dd7109e721676f3ec376d00 as it uses an unkown pixel format. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a9f3f5fadb57bae3f3ff0be69e56b2c6014f2513 --- libavcodec/tiff.c |3 --- 1 file changed, 3 deletions(-) diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c index 69d55c6..6c72dc8 100644 --- a/libavcodec/tiff.c +++ b/libavcodec/tiff.c @@ -262,9 +262,6 @@ static int init_image(TiffContext *s, AVFrame *frame) case 161: s->avctx->pix_fmt = s->le ? AV_PIX_FMT_GRAY16LE : AV_PIX_FMT_GRAY16BE; break; -case 162: -s->avctx->pix_fmt = AV_PIX_FMT_GRAY8A; -break; case 324: s->avctx->pix_fmt = AV_PIX_FMT_RGBA; break; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: add support for neon intrinsics
ffmpeg | branch: master | Janne Grunau | Tue May 13 23:03:28 2014 +0200| [d45ebd4876ab8fc07736a644de07e1b1d11a8e5d] | committer: Janne Grunau configure: add support for neon intrinsics > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d45ebd4876ab8fc07736a644de07e1b1d11a8e5d --- Makefile |1 + configure | 12 2 files changed, 13 insertions(+) diff --git a/Makefile b/Makefile index 99d3240..cc016b3 100644 --- a/Makefile +++ b/Makefile @@ -130,6 +130,7 @@ $(foreach V,$(SUBDIR_VARS),$(eval $(call RESET,$(V SUBDIR := $(1)/ include $(SRC_PATH)/$(1)/Makefile -include $(SRC_PATH)/$(1)/$(ARCH)/Makefile +-include $(SRC_PATH)/$(1)/$(INTRINSICS)/Makefile include $(SRC_PATH)/library.mak endef diff --git a/configure b/configure index c74e433..1cc2265 100755 --- a/configure +++ b/configure @@ -1397,6 +1397,10 @@ HEADERS_LIST=" winsock2_h " +INTRINSICS_LIST=" +intrinsics_neon +" + MATH_FUNCS=" atanf atan2f @@ -1512,6 +1516,7 @@ HAVE_LIST=" $HAVE_LIST_CMDLINE $HAVE_LIST_PUB $HEADERS_LIST +$INTRINSICS_LIST $MATH_FUNCS $SYSTEM_FUNCS $THREADS_LIST @@ -1651,6 +1656,7 @@ armv6_deps="arm" armv6t2_deps="arm" armv8_deps="aarch64" neon_deps_any="aarch64 arm" +intrinsics_neon_deps="neon" vfp_deps_any="aarch64 arm" vfpv3_deps="vfp" @@ -2196,6 +2202,7 @@ nogas=":" # machine arch_default=$(uname -m) cpu="generic" +intrinsics="none" # OS target_os_default=$(tolower $(uname -s)) @@ -3928,6 +3935,8 @@ EOF fi +check_code cc arm_neon.h "int64x2_t test" && enable intrinsics_neon + check_ldflags -Wl,--as-needed if check_func dlopen; then @@ -4423,6 +4432,8 @@ enabled_all dxva2 CoTaskMemFree && ! enabled_any memalign posix_memalign aligned_malloc && enabled_any $need_memalign && enable memalign_hack +map 'enabled $v && intrinsics=${v#intrinsics_}' $INTRINSICS_LIST + for thread in $THREADS_LIST; do if enabled $thread; then test -n "$thread_type" && @@ -4555,6 +4566,7 @@ MANDIR=\$(DESTDIR)$mandir SRC_PATH=$source_path CC_IDENT=$cc_ident ARCH=$arch +INTRINSICS=$intrinsics CC=$cc AS=$as LD=$ld ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: support testing of release branches
ffmpeg | branch: master | Janne Grunau | Sat Jul 26 23:29:46 2014 +0200| [42eb9154a83e9a7aedb1168b2f1112af765cf2b5] | committer: Janne Grunau fate: support testing of release branches Adding 'branch=release/10' to the fate config file will check the release/10 branch instead of master. If no branch is specified it will use 'master' so that existing config are still valid. The server side changes are already deployed, see https://fate.libav.org/v10/ for an example. The server supports only the release/* branches. The server enforces that a single slot tests always the same branch. Please append "-v$RELEASE" to the slot of release branch configs or make the slot otherwise unique. A different fate samples dir is needed for each release branch. make fate-rsync has the correct URL in each branch. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=42eb9154a83e9a7aedb1168b2f1112af765cf2b5 --- tests/fate.sh |8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/fate.sh b/tests/fate.sh index 6e0c0c6..af0f6c0 100755 --- a/tests/fate.sh +++ b/tests/fate.sh @@ -19,6 +19,8 @@ test -n "$slot"|| die "slot not specified" test -n "$repo"|| die "repo not specified" test -d "$samples" || die "samples location not specified" +: ${branch:=master} + lock(){ lock=$1/fate.lock (set -C; exec >$lock) 2>/dev/null || return @@ -28,14 +30,14 @@ lock(){ checkout(){ case "$repo" in file:*|/*) src="${repo#file:}" ;; -git:*) git clone --quiet "$repo" "$src" ;; +git:*) git clone --quiet --branch "$branch" "$repo" "$src" ;; esac } update()( cd ${src} || return case "$repo" in -git:*) git fetch --force; git reset --hard origin/master ;; +git:*) git fetch --force; git reset --hard "origin/$branch" ;; esac ) @@ -79,7 +81,7 @@ clean(){ report(){ date=$(date -u +%Y%m%d%H%M%S) -echo "fate:0:${date}:${slot}:${version}:$1:$2:${comment}" >report +echo "fate:1:${date}:${slot}:${version}:$1:$2:${branch}:${comment}" >report cat ${build}/config.fate ${build}/tests/data/fate/*.rep >>report test -n "$fate_recv" && $tar report *.log | gzip | $fate_recv } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: support testing of release branches
ffmpeg | branch: release/2.3 | Janne Grunau | Sat Jul 26 23:29:46 2014 +0200| [6a250c858ebbb9d5111c5b14d8d46d41bd08e218] | committer: Michael Niedermayer fate: support testing of release branches Adding 'branch=release/2.3' to the fate config file will check the release/2.3 branch instead of master. If no branch is specified it will use 'master' so that existing config are still valid. (cherry picked from commit 42eb9154a83e9a7aedb1168b2f1112af765cf2b5) Conflicts: tests/fate.sh > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6a250c858ebbb9d5111c5b14d8d46d41bd08e218 --- tests/fate.sh |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/fate.sh b/tests/fate.sh index ca3caa3..5a78018 100755 --- a/tests/fate.sh +++ b/tests/fate.sh @@ -19,6 +19,8 @@ test -n "$slot"|| die "slot not specified" test -n "$repo"|| die "repo not specified" test -d "$samples" || die "samples location not specified" +: ${branch:=master} + lock(){ lock=$1/fate.lock (set -C; exec >$lock) 2>/dev/null || return @@ -28,14 +30,14 @@ lock(){ checkout(){ case "$repo" in file:*|/*) src="${repo#file:}" ;; -git:*) git clone --quiet "$repo" "$src" ;; +git:*) git clone --quiet --branch "$branch" "$repo" "$src" ;; esac } update()( cd ${src} || return case "$repo" in -git:*) git fetch --force && git reset --hard FETCH_HEAD ;; +git:*) git fetch --force && git reset --hard "origin/$branch" ;; esac ) @@ -82,6 +84,7 @@ clean(){ report(){ date=$(date -u +%Y%m%d%H%M%S) echo "fate:0:${date}:${slot}:${version}:$1:$2:${comment}" >report +#echo "fate:1:${date}:${slot}:${version}:$1:$2:${branch}:${comment}" >report cat ${build}/config.fate >>report cat ${build}/tests/data/fate/*.rep >>report || for i in ${build}/tests/data/fate/*.rep ; do cat "$i" >>report ; done test -n "$fate_recv" && $tar report *.log | gzip | $fate_recv ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: add informative cpu test
ffmpeg | branch: master | Janne Grunau | Thu Jan 30 13:48:35 2014 +0100| [07d8fa58121be8fe315bd51ab760547fe209a745] | committer: Janne Grunau fate: add informative cpu test libavutil/cpu-test prints raw and effective cpu flags to STDERR. Detected cpu flags can be useful for debugging fate errors. No comparison of the result against a expected result since that would require fate config specific references. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=07d8fa58121be8fe315bd51ab760547fe209a745 --- libavutil/cpu.c | 49 -- tests/fate/libavutil.mak |5 + 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index e755d15..20f0fe1 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -181,6 +181,10 @@ int av_cpu_count(void) #include +#if !HAVE_GETOPT +#include "compat/getopt.c" +#endif + static const struct { int flag; const char *name; @@ -224,17 +228,50 @@ static const struct { { 0 } }; -int main(void) +static void print_cpu_flags(int cpu_flags, const char *type) { -int cpu_flags = av_get_cpu_flags(); int i; -printf("cpu_flags = 0x%08X\n", cpu_flags); -printf("cpu_flags ="); +fprintf(stderr, "cpu_flags(%s) = 0x%08X\n", type, cpu_flags); +fprintf(stderr, "cpu_flags_str(%s) =", type); for (i = 0; cpu_flag_tab[i].flag; i++) if (cpu_flags & cpu_flag_tab[i].flag) -printf(" %s", cpu_flag_tab[i].name); -printf("\n"); +fprintf(stderr, " %s", cpu_flag_tab[i].name); +fprintf(stderr, "\n"); +} + + +int main(int argc, char **argv) +{ +int cpu_flags_raw = av_get_cpu_flags(); +int cpu_flags_eff; + +if (cpu_flags_raw < 0) +return 1; + +for (;;) { +int c = getopt(argc, argv, "c:"); +if (c == -1) +break; +switch (c) { +case 'c': +{ +int cpuflags = av_parse_cpu_flags(optarg); +if (cpuflags < 0) +return 2; +av_set_cpu_flags_mask(cpuflags); +break; +} +} +} + +cpu_flags_eff = av_get_cpu_flags(); + +if (cpu_flags_eff < 0) +return 3; + +print_cpu_flags(cpu_flags_raw, "raw"); +print_cpu_flags(cpu_flags_eff, "effective"); return 0; } diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak index 81d0571..44d88c4 100644 --- a/tests/fate/libavutil.mak +++ b/tests/fate/libavutil.mak @@ -25,6 +25,11 @@ FATE_LIBAVUTIL += fate-blowfish fate-blowfish: libavutil/blowfish-test$(EXESUF) fate-blowfish: CMD = run libavutil/blowfish-test +FATE_LIBAVUTIL += fate-cpu +fate-cpu: libavutil/cpu-test$(EXESUF) +fate-cpu: CMD = run libavutil/cpu-test $(CPUFLAGS:%=-c%) +fate-cpu: REF = /dev/null + FATE_LIBAVUTIL += fate-crc fate-crc: libavutil/crc-test$(EXESUF) fate-crc: CMD = run libavutil/crc-test ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: add ', ' between assembler macro arguments where missing
ffmpeg | branch: master | Janne Grunau | Thu Jul 24 14:50:46 2014 +0200| [ac6b95dbc0b53b3ea461bd5e5e7f7f31d2983733] | committer: Janne Grunau aarch64: add ',' between assembler macro arguments where missing llvm's integrated assembler does not accept spaces as macro argument delimiter when targeting darwin. Using a explicit delimiter is a good idea in principle since it makes case like 'macro 4 -2' vs 'macro 4 - 2' clear. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ac6b95dbc0b53b3ea461bd5e5e7f7f31d2983733 --- libavcodec/aarch64/fft_neon.S |2 +- libavcodec/aarch64/mpegaudiodsp_neon.S |2 +- libavcodec/aarch64/opus_imdct_neon.S |8 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/aarch64/fft_neon.S b/libavcodec/aarch64/fft_neon.S index 9802349..5f88bed 100644 --- a/libavcodec/aarch64/fft_neon.S +++ b/libavcodec/aarch64/fft_neon.S @@ -336,7 +336,7 @@ function fft_pass_neon endfunc .macro def_fft n, n2, n4 -function fft\n\()_neon align=6 +function fft\n\()_neon, align=6 sub sp, sp, #16 stp x28, x30, [sp] add x28, x0, #\n4*2*8 diff --git a/libavcodec/aarch64/mpegaudiodsp_neon.S b/libavcodec/aarch64/mpegaudiodsp_neon.S index 39875fe..808576a 100644 --- a/libavcodec/aarch64/mpegaudiodsp_neon.S +++ b/libavcodec/aarch64/mpegaudiodsp_neon.S @@ -24,7 +24,7 @@ #define WFRAC_BITS 16 // fractional bits for window #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15) -const tbl_rev128.s align=4 +const tbl_rev128.s, align=4 .byte 12, 13, 14, 15 .byte8, 9, 10, 11 .byte4, 5, 6, 7 diff --git a/libavcodec/aarch64/opus_imdct_neon.S b/libavcodec/aarch64/opus_imdct_neon.S index 6b06396..5f6c502 100644 --- a/libavcodec/aarch64/opus_imdct_neon.S +++ b/libavcodec/aarch64/opus_imdct_neon.S @@ -23,7 +23,7 @@ #include "asm-offsets.h" .macro shuffle a, b, c, d -const shuffle_\a\b\c\d align=4 +const shuffle_\a\b\c\d, align=4 .byte (\a * 4), (\a * 4 + 1), (\a * 4 + 2), (\a * 4 + 3) .byte (\b * 4), (\b * 4 + 1), (\b * 4 + 2), (\b * 4 + 3) .byte (\c * 4), (\c * 4 + 1), (\c * 4 + 2), (\c * 4 + 3) @@ -344,7 +344,7 @@ function fft15_pass ret endfunc -function fft30_neon align=6 +function fft30_neon, align=6 sub sp, sp, #0x20 stp x20, x21, [sp] stp x22, x30, [sp, #0x10] @@ -372,7 +372,7 @@ function fft30_neon align=6 endfunc .macro def_fft n, n2 -function fft\n\()_neon align=6 +function fft\n\()_neon, align=6 sub sp, sp, #0x30 stp x20, x21, [sp] stp x22, x30, [sp, #0x10] @@ -641,7 +641,7 @@ function ff_celt_imdct_half_neon, export=1 endfunc // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5) -const fact5 align=4 +const fact5, align=4 .float 0.30901699437494745, 0.95105651629515353 .float -0.80901699437494734, 0.58778525229247325 endconst ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] cpu-test: test av_cpu_count
ffmpeg | branch: master | Janne Grunau | Sun Aug 3 13:03:13 2014 +0200| [caf5ef852bf71984d3322bbeaf48cfb04ac8255f] | committer: Janne Grunau cpu-test: test av_cpu_count Add CPU count and number threads as informative values for fate. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=caf5ef852bf71984d3322bbeaf48cfb04ac8255f --- libavutil/cpu.c | 14 +- tests/fate/libavutil.mak |2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 20f0fe1..a7e5f7f 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -180,6 +180,7 @@ int av_cpu_count(void) #ifdef TEST #include +#include "avstring.h" #if !HAVE_GETOPT #include "compat/getopt.c" @@ -245,12 +246,14 @@ int main(int argc, char **argv) { int cpu_flags_raw = av_get_cpu_flags(); int cpu_flags_eff; +int cpu_count = av_cpu_count(); +char threads[5] = "auto"; if (cpu_flags_raw < 0) return 1; for (;;) { -int c = getopt(argc, argv, "c:"); +int c = getopt(argc, argv, "c:t:"); if (c == -1) break; switch (c) { @@ -262,6 +265,14 @@ int main(int argc, char **argv) av_set_cpu_flags_mask(cpuflags); break; } +case 't': +{ +int len = av_strlcpy(threads, optarg, sizeof(threads)); +if (len >= sizeof(threads)) { +fprintf(stderr, "Invalid thread count '%s'\n", optarg); +return 2; +} +} } } @@ -272,6 +283,7 @@ int main(int argc, char **argv) print_cpu_flags(cpu_flags_raw, "raw"); print_cpu_flags(cpu_flags_eff, "effective"); +fprintf(stderr, "threads = %s (cpu_count = %d)\n", threads, cpu_count); return 0; } diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak index 44d88c4..7f3329b 100644 --- a/tests/fate/libavutil.mak +++ b/tests/fate/libavutil.mak @@ -27,7 +27,7 @@ fate-blowfish: CMD = run libavutil/blowfish-test FATE_LIBAVUTIL += fate-cpu fate-cpu: libavutil/cpu-test$(EXESUF) -fate-cpu: CMD = run libavutil/cpu-test $(CPUFLAGS:%=-c%) +fate-cpu: CMD = run libavutil/cpu-test $(CPUFLAGS:%=-c%) $(THREADS:%=-t%) fate-cpu: REF = /dev/null FATE_LIBAVUTIL += fate-crc ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: use MACH-O const data asm directive in const macro
ffmpeg | branch: master | Janne Grunau | Wed Jul 23 10:06:15 2014 +0200| [a238b83b13640e3192d7d4aaad2242f13a9a84a1] | committer: Janne Grunau aarch64: use MACH-O const data asm directive in const macro > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a238b83b13640e3192d7d4aaad2242f13a9a84a1 --- libavutil/aarch64/asm.S |4 1 file changed, 4 insertions(+) diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S index 850f16b..b766e67 100644 --- a/libavutil/aarch64/asm.S +++ b/libavutil/aarch64/asm.S @@ -57,7 +57,11 @@ FUNC.func \name ELF .size \name, . - \name .purgem endconst .endm +#ifndef __MACH__ .section.rodata +#else +.const_data +#endif .align \align \name: .endm ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: explicitly set the default THREADS value
ffmpeg | branch: master | Janne Grunau | Mon Aug 4 10:04:08 2014 +0200| [23dfa00b88fc927d4c1854ab4fc60f5c6398f3ac] | committer: Janne Grunau fate: explicitly set the default THREADS value This makes the default of '1' more explicit than defaulting to '1' in fate-run.sh and regression-funcs.sh if THREADS is not set. Fixes the reported thread count in fate-cpu if THREADS is not set. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=23dfa00b88fc927d4c1854ab4fc60f5c6398f3ac --- tests/Makefile |1 + 1 file changed, 1 insertion(+) diff --git a/tests/Makefile b/tests/Makefile index 6a0def9..5fad291 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,3 +1,4 @@ +THREADS = 1 VREF = tests/vsynth1/00.pgm AREF = tests/data/asynth1.sw ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: generate tests/pixfmts.mak for all targets requiring it
ffmpeg | branch: master | Janne Grunau | Tue Aug 5 09:39:00 2014 +0200| [d395895cdb2ac8c95bd488549e7f893bd4dcc248] | committer: Janne Grunau fate: generate tests/pixfmts.mak for all targets requiring it All subtargets which should run the fate-filter-pixdesc% need to generate and include tests/pixfmts.mak. Most noteable missing target was fate itself. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d395895cdb2ac8c95bd488549e7f893bd4dcc248 --- tests/fate/filter-video.mak |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak index cd90507..42fb063 100644 --- a/tests/fate/filter-video.mak +++ b/tests/fate/filter-video.mak @@ -122,7 +122,7 @@ tests/pixfmts.mak: avconv$(EXESUF) $(Q)$(TARGET_EXEC) $(TARGET_PATH)/$< -pix_fmts list 2> /dev/null | awk 'NR > 8 && /^IO/ { printf $$2 " " }' >> $@ $(Q)printf "\n" >> $@ -RUNNING_PIXFMTS_TESTS := $(filter check fate-list fate-filter-pixdesc%,$(MAKECMDGOALS)) +RUNNING_PIXFMTS_TESTS := $(filter check fate fate-list fate-filter fate-vfilter fate-filter-pixdesc%,$(MAKECMDGOALS)) ifneq (,$(RUNNING_PIXFMTS_TESTS)) -include tests/pixfmts.mak ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: check $as first before using $gas as GNU as
ffmpeg | branch: master | Janne Grunau | Tue Aug 5 12:08:09 2014 +0200| [15201e256035a3e8f9d3d7b96fc327467e1a8ead] | committer: Janne Grunau configure: check $as first before using $gas as GNU as llvm's integrated assembler supports the AArch64 asm on darwin since August 2014. So check $as first before using gas-preprocessor.pl via $gas. Makes the checks specific for that the architecture specific asm needs. PPC Altivec and AArch64 needs on ':vararg' for macro arguments. Arm needs in addition the '.altmacro' directive. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=15201e256035a3e8f9d3d7b96fc327467e1a8ead --- configure | 32 +--- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/configure b/configure index b2eb0c8..22c78e7 100755 --- a/configure +++ b/configure @@ -3760,22 +3760,32 @@ unsigned int endian = 'B' << 24 | 'I' << 16 | 'G' << 8 | 'E'; EOF od -t x1 $TMPO | grep -q '42 *49 *47 *45' && enable bigendian - -if enabled asm; then -enabled arm && nogas=die -enabled_all ppc altivec && nogas=warn -as=${gas:=$as} -check_as <http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] h264: slice-mt: check master context for valid current_picture_ptr
ffmpeg | branch: release/0.10 | Janne Grunau | Wed Dec 5 20:08:01 2012 +0100| [3e60501f311c50bf234033f206c19d34d889df01] | committer: Diego Biurrun h264: slice-mt: check master context for valid current_picture_ptr Fixes errors in slice based multithreading introduced in 0b300daad2f5. CC: libav-sta...@libav.org (cherry picked from commit 5945c7b35d9169caf9ecef1c419eebdebb909e60) Signed-off-by: Diego Biurrun > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3e60501f311c50bf234033f206c19d34d889df01 --- libavcodec/h264.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 547cf3d..c485325 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -2952,7 +2952,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ s->picture_structure = last_pic_structure; s->dropable = last_pic_dropable; return AVERROR_INVALIDDATA; -} else if (!s->current_picture_ptr) { +} else if (!s0->current_picture_ptr) { av_log(s->avctx, AV_LOG_ERROR, "unset current_picture_ptr on %d. slice\n", h0->current_slice + 1); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] rv34: use ff_mpeg_update_thread_context only when decoder is fully initialized
ffmpeg | branch: master | Janne Grunau | Thu Aug 21 13:26:33 2014 +0200| [dc4b2e7d33903a6b9380e8a84b22b3a20facbb08] | committer: Janne Grunau rv34: use ff_mpeg_update_thread_context only when decoder is fully initialized MpegEncContext based decoders are only fully initialized after the first ff_thread_get_buffer() call. The RV30/40 decoders may fail before a frame buffer was requested. ff_mpeg_update_thread_context() fails on half initialized MpegEncContexts. Since this can only happen before a the first frame was decoded there is no need to call ff_mpeg_update_thread_context(). Based on patches by John Stebbins and tested by John Stebbins. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dc4b2e7d33903a6b9380e8a84b22b3a20facbb08 --- libavcodec/rv34.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index 4ed2a33..26ab7e4 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -1555,16 +1555,18 @@ int ff_rv34_decode_update_thread_context(AVCodecContext *dst, const AVCodecConte return err; } -if ((err = ff_mpeg_update_thread_context(dst, src))) -return err; - r->cur_pts = r1->cur_pts; r->last_pts = r1->last_pts; r->next_pts = r1->next_pts; memset(&r->si, 0, sizeof(r->si)); -return 0; +// Do no call ff_mpeg_update_thread_context on a partially initialized +// decoder context. +if (!s1->linesize) +return 0; + +return ff_mpeg_update_thread_context(dst, src); } static int get_slice_offset(AVCodecContext *avctx, const uint8_t *buf, int n) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate-vc1_ilaced_twomv: use -flags +bitexact
ffmpeg | branch: master | Janne Grunau | Sat Oct 4 11:19:09 2014 +0200| [36f3aec3630f27df64f4ff2b52a1c9ced760eb52] | committer: Janne Grunau fate-vc1_ilaced_twomv: use -flags +bitexact Also updates the reference since it was generated by the non-bitexact x86 specific code. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=36f3aec3630f27df64f4ff2b52a1c9ced760eb52 --- tests/fate/microsoft.mak|2 +- tests/ref/fate/vc1_ilaced_twomv |4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/fate/microsoft.mak b/tests/fate/microsoft.mak index c1cedea..10bbb30 100644 --- a/tests/fate/microsoft.mak +++ b/tests/fate/microsoft.mak @@ -54,7 +54,7 @@ FATE_VC1-$(CONFIG_VC1_DEMUXER) += fate-vc1_sa20021 fate-vc1_sa20021: CMD = framecrc -i $(TARGET_SAMPLES)/vc1/SA20021.vc1 FATE_VC1-$(CONFIG_VC1_DEMUXER) += fate-vc1_ilaced_twomv -fate-vc1_ilaced_twomv: CMD = framecrc -i $(TARGET_SAMPLES)/vc1/ilaced_twomv.vc1 +fate-vc1_ilaced_twomv: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/vc1/ilaced_twomv.vc1 FATE_VC1-$(CONFIG_MOV_DEMUXER) += fate-vc1-ism fate-vc1-ism: CMD = framecrc -i $(TARGET_SAMPLES)/isom/vc1-wmapro.ism -an diff --git a/tests/ref/fate/vc1_ilaced_twomv b/tests/ref/fate/vc1_ilaced_twomv index 9a5d391..abfd507 100644 --- a/tests/ref/fate/vc1_ilaced_twomv +++ b/tests/ref/fate/vc1_ilaced_twomv @@ -1,12 +1,12 @@ #tb 0: 1/25 0, 0, 0,1, 3110400, 0x764f8856 -0, 2, 2,1, 3110400, 0x5b6680fa +0, 2, 2,1, 3110400, 0x1dc5592c 0, 3, 3,1, 3110400, 0x8ee86a47 0, 4, 4,1, 3110400, 0xc1ca8532 0, 5, 5,1, 3110400, 0x53efd0f9 0, 6, 6,1, 3110400, 0xa9605bc9 0, 7, 7,1, 3110400, 0xbaa9aede -0, 8, 8,1, 3110400, 0x6035644c +0, 8, 8,1, 3110400, 0x7191bcf4 0, 9, 9,1, 3110400, 0x1d6aff98 0, 10, 10,1, 3110400, 0x7b047286 0, 11, 11,1, 3110400, 0xa7cb2f84 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: add mpeg4 tests for frame size changes
ffmpeg | branch: master | Janne Grunau | Tue Sep 18 16:03:08 2012 +0200| [f29c226af0ecc34c417b646580a9acd7d8bbaf48] | committer: Janne Grunau fate: add mpeg4 tests for frame size changes > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f29c226af0ecc34c417b646580a9acd7d8bbaf48 --- tests/Makefile |1 + tests/fate/mpeg4.mak |9 ++ tests/ref/fate/mpeg4-resolution-change-down-down | 151 ++ tests/ref/fate/mpeg4-resolution-change-down-up | 151 ++ tests/ref/fate/mpeg4-resolution-change-up-down | 151 ++ tests/ref/fate/mpeg4-resolution-change-up-up | 151 ++ 6 files changed, 614 insertions(+) diff --git a/tests/Makefile b/tests/Makefile index 5fad291..a2ee79b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -96,6 +96,7 @@ include $(SRC_PATH)/tests/fate/microsoft.mak include $(SRC_PATH)/tests/fate/monkeysaudio.mak include $(SRC_PATH)/tests/fate/mp3.mak include $(SRC_PATH)/tests/fate/mpc.mak +include $(SRC_PATH)/tests/fate/mpeg4.mak include $(SRC_PATH)/tests/fate/opus.mak include $(SRC_PATH)/tests/fate/pcm.mak include $(SRC_PATH)/tests/fate/probe.mak diff --git a/tests/fate/mpeg4.mak b/tests/fate/mpeg4.mak new file mode 100644 index 000..1565247 --- /dev/null +++ b/tests/fate/mpeg4.mak @@ -0,0 +1,9 @@ + +MPEG4_RESOLUTION_CHANGE = down-down down-up up-down up-up + +fate-mpeg4-resolution-change-%: CMD = framemd5 -flags +bitexact -idct simple -i $(SAMPLES)/mpeg4/resize_$(@:fate-mpeg4-resolution-change-%=%).h263 + +FATE_MPEG4-$(call DEMDEC, H263, H263) := $(addprefix fate-mpeg4-resolution-change-, $(MPEG4_RESOLUTION_CHANGE)) + +FATE_SAMPLES_AVCONV += $(FATE_MPEG4-yes) +fate-mpeg4: $(FATE_MPEG4-yes) diff --git a/tests/ref/fate/mpeg4-resolution-change-down-down b/tests/ref/fate/mpeg4-resolution-change-down-down new file mode 100644 index 000..137575a --- /dev/null +++ b/tests/ref/fate/mpeg4-resolution-change-down-down @@ -0,0 +1,151 @@ +#tb 0: 1/25 +0, 0, 0,1, 460800, d65fcc79c7eb9ebd9d88dca3ebb15bf4 +0, 1, 1,1, 460800, 6c86b8c7e8eae3d63b21342f233fb44e +0, 2, 2,1, 460800, 7fea65fd8ee4d3fcec722f721d05ef45 +0, 3, 3,1, 460800, 2a1d943211f8c1995cc250586f105991 +0, 4, 4,1, 460800, 0430fa1da6a968c0936fc60a425c3b9f +0, 5, 5,1, 460800, 1593684b29c0f394176c9fce83ebe4a3 +0, 6, 6,1, 460800, ff8c25a20ced839a1ce33ef25d04f342 +0, 7, 7,1, 460800, 08869a31b677080f3fb1b12e3178c1f8 +0, 8, 8,1, 460800, 7e8d4c417698e434508663dfd851e95d +0, 9, 9,1, 460800, 97488eafaa5db813bc9fbb13a4204240 +0, 10, 10,1, 460800, aac8d92f678f077b560f24a74427ef33 +0, 11, 11,1, 460800, dbb953e70f356c528e232dd90b21af99 +0, 12, 12,1, 460800, 2f7b6c9f006ca733c159aadb78958621 +0, 13, 13,1, 460800, 938142d358a298df924da648ff9542e2 +0, 14, 14,1, 460800, 93cae8797e91f0ecb94782fd614ba477 +0, 15, 15,1, 460800, 4644a98dbd46865c6c4e1ebe168b5095 +0, 16, 16,1, 460800, cc932f281bf90456c508f58fda085658 +0, 17, 17,1, 460800, 9280ef543c11446d7005a098d19b74a3 +0, 18, 18,1, 460800, f5c91502bd600e1cfcd9ff7a5e683ce4 +0, 19, 19,1, 460800, 4eda3b1d48ca986eeb14c90af947b6af +0, 20, 20,1, 460800, 84e8398c333e76412bd310c207e131d3 +0, 21, 21,1, 460800, 7abe283b322cc4c9aaeb56e4b2e03597 +0, 22, 22,1, 460800, b4daa2055782c6f3769bf71cb1534124 +0, 23, 23,1, 460800, 3deb3e2f41ef4549da3b0d89031eaa42 +0, 24, 24,1, 460800, 73b9efcd2714b3cd65b1d8aee953cd38 +0, 25, 25,1, 460800, 493ee5aab3a0ca22887b2b673d871efd +0, 26, 26,1, 460800, 5fe537734707bbc050290df8c0095d0f +0, 27, 27,1, 460800, ca337619639144e0aea0fe226e9dad63 +0, 28, 28,1, 460800, 8fa2e0ff609d1593d34722058e56b19a +0, 29, 29,1, 460800, be0950c431591485ed4de678f8f17187 +0, 30, 30,1, 460800, 0ef4b6a8d2e3d455d697deaf730cf402 +0, 31, 31,1, 460800, f74302190c8e47120b9597073525b08e +0, 32, 32,1, 460800, 129fb2cc916aa16f8fee053ae89c31b3 +0, 33, 33,1, 460800, 0186eacb73263bb0ae02c20f827dd650 +0, 34, 34,1, 460800, f3c0245c28ded8d497665c87e66531de +0, 35, 35,1, 460800, e550cae2b446a5460a7201ef20ad74fd +0,
[FFmpeg-cvslog] fate-mpeg4: use TARGET_SAMPLES for resize tests
ffmpeg | branch: master | Janne Grunau | Tue Oct 21 09:56:23 2014 +0200| [04d8af5f17961b9b7076b8c974e360feb08787c2] | committer: Janne Grunau fate-mpeg4: use TARGET_SAMPLES for resize tests > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=04d8af5f17961b9b7076b8c974e360feb08787c2 --- tests/fate/mpeg4.mak |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fate/mpeg4.mak b/tests/fate/mpeg4.mak index 1565247..f9b94e6 100644 --- a/tests/fate/mpeg4.mak +++ b/tests/fate/mpeg4.mak @@ -1,7 +1,7 @@ MPEG4_RESOLUTION_CHANGE = down-down down-up up-down up-up -fate-mpeg4-resolution-change-%: CMD = framemd5 -flags +bitexact -idct simple -i $(SAMPLES)/mpeg4/resize_$(@:fate-mpeg4-resolution-change-%=%).h263 +fate-mpeg4-resolution-change-%: CMD = framemd5 -flags +bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg4/resize_$(@:fate-mpeg4-resolution-change-%=%).h263 FATE_MPEG4-$(call DEMDEC, H263, H263) := $(addprefix fate-mpeg4-resolution-change-, $(MPEG4_RESOLUTION_CHANGE)) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: make ff_mlp_filter_channel_arm and ff_mlp_rematrix_channel_arm position independent
ffmpeg | branch: master | Janne Grunau | Mon Dec 8 13:25:55 2014 +0100| [581c7f0e12b1fa39f73d683e54d6ecda0772c5a9] | committer: Janne Grunau arm: make ff_mlp_filter_channel_arm and ff_mlp_rematrix_channel_arm position independent No significant difference in used cpu cycles on a cortex-a9. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=581c7f0e12b1fa39f73d683e54d6ecda0772c5a9 --- libavcodec/arm/mlpdsp_armv5te.S | 23 +-- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S index cf7d367..fecbe29 100644 --- a/libavcodec/arm/mlpdsp_armv5te.S +++ b/libavcodec/arm/mlpdsp_armv5te.S @@ -338,22 +338,23 @@ T orr AC0, AC0, AC1 .endm .macro switch_on_fir_taps mask_minus1, shift_0, shift_8, iir_taps -A ldr pc, [pc, a3, lsl #2] // firorder is in range 0-(8-iir_taps) +A ldr CO0, [pc, a3, lsl #2] // firorder is in range 0-(8-iir_taps) +A add pc, pc, CO0 T tbh [pc, a3, lsl #1] 0: -A .word 0, 70f, 71f, 72f, 73f, 74f +A .word (70f - 0b) - 4, (71f - 0b) - 4, (72f - 0b) - 4, (73f - 0b) - 4, (74f - 0b) - 4 T .hword (70f - 0b) / 2, (71f - 0b) / 2, (72f - 0b) / 2, (73f - 0b) / 2, (74f - 0b) / 2 .if \iir_taps <= 3 -A .word 75f +A .word (75f - 0b) - 4 T .hword (75f - 0b) / 2 .if \iir_taps <= 2 -A .word 76f +A .word (76f - 0b) - 4 T .hword (76f - 0b) / 2 .if \iir_taps <= 1 -A .word 77f +A .word (77f - 0b) - 4 T .hword (77f - 0b) / 2 .if \iir_taps == 0 -A .word 78f +A .word (78f - 0b) - 4 T .hword (78f - 0b) / 2 .endif .endif @@ -379,10 +380,11 @@ T .hword (78f - 0b) / 2 .endm .macro switch_on_iir_taps mask_minus1, shift_0, shift_8 -A ldr pc, [pc, a4, lsl #2] // irorder is in range 0-4 +A ldr CO0, [pc, a4, lsl #2] // irorder is in range 0-4 +A add pc, pc, CO0 T tbh [pc, a4, lsl #1] 0: -A .word 0, 60f, 61f, 62f, 63f, 64f +A .word (60f - 0b) - 4, (61f - 0b) - 4, (62f - 0b) - 4, (63f - 0b) - 4, (64f - 0b) - 4 T .hword (60f - 0b) / 2, (61f - 0b) / 2, (62f - 0b) / 2, (63f - 0b) / 2, (64f - 0b) / 2 60: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 0 61: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 1 @@ -604,10 +606,11 @@ function ff_mlp_rematrix_channel_arm, export=1 cmp v5, #1 beq 11f blo 10f -A ldr pc, [pc, v5, lsl #2] +A ldr v5, [pc, v5, lsl #2] +A add pc, pc, v5 T tbh [pc, v5, lsl #1] 0: -A .word 0, 0, 0, 12f, 13f, 14f, 15f, 16f, 17f, 18f, 19f, 20f, 21f, 22f, 23f, 24f, 25f +A .word 0, 0, (12f - 0b) - 4, (13f - 0b) - 4, (14f - 0b) - 4, (15f - 0b) - 4, (16f - 0b) - 4, (17f - 0b) - 4, (18f - 0b) - 4, (19f - 0b) - 4, (20f - 0b) - 4, (21f - 0b) - 4, (22f - 0b) - 4, (23f - 0b) - 4, (24f - 0b) - 4, (25f - 0b) - 4 T .hword 0, 0, (12f - 0b) / 2, (13f - 0b) / 2, (14f - 0b) / 2, (15f - 0b) / 2 T .hword (16f - 0b) / 2, (17f - 0b) / 2, (18f - 0b) / 2, (19f - 0b) / 2 T .hword (20f - 0b) / 2, (21f - 0b) / 2, (22f - 0b) / 2, (23f - 0b) / 2, (24f - 0b) / 2, (25f - 0b) / 2 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: mlpdsp: handle pic offset calculation in a macro
ffmpeg | branch: master | Janne Grunau | Mon Dec 8 14:10:18 2014 +0100| [4c81613df499ba81d64ea102b38d0c6686cc304c] | committer: Janne Grunau arm: mlpdsp: handle pic offset calculation in a macro Makes the code easier to read since it hides different offset calculations for arm and thumb mode. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4c81613df499ba81d64ea102b38d0c6686cc304c --- libavcodec/arm/mlpdsp_armv5te.S | 36 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S index fecbe29..4272dae 100644 --- a/libavcodec/arm/mlpdsp_armv5te.S +++ b/libavcodec/arm/mlpdsp_armv5te.S @@ -43,6 +43,14 @@ I .reqip PSAMP .reqlr +.macro branch_pic_label first, remainder:vararg +A .word \first - 4 +T .hword (\first) / 2 +.ifnb \remainder +branch_pic_label \remainder +.endif +.endm + // Some macros that do loads/multiplies where the register number is determined // from an assembly-time expression. Boy is GNU assembler's syntax ugly... @@ -342,20 +350,16 @@ A ldr CO0, [pc, a3, lsl #2] // firorder is in range 0-(8-iir_taps) A add pc, pc, CO0 T tbh [pc, a3, lsl #1] 0: -A .word (70f - 0b) - 4, (71f - 0b) - 4, (72f - 0b) - 4, (73f - 0b) - 4, (74f - 0b) - 4 -T .hword (70f - 0b) / 2, (71f - 0b) / 2, (72f - 0b) / 2, (73f - 0b) / 2, (74f - 0b) / 2 +branch_pic_label (70f - 0b), (71f - 0b), (72f - 0b), (73f - 0b) +branch_pic_label (74f - 0b) .if \iir_taps <= 3 -A .word (75f - 0b) - 4 -T .hword (75f - 0b) / 2 +branch_pic_label (75f - 0b) .if \iir_taps <= 2 -A .word (76f - 0b) - 4 -T .hword (76f - 0b) / 2 +branch_pic_label (76f - 0b) .if \iir_taps <= 1 -A .word (77f - 0b) - 4 -T .hword (77f - 0b) / 2 +branch_pic_label (77f - 0b) .if \iir_taps == 0 -A .word (78f - 0b) - 4 -T .hword (78f - 0b) / 2 +branch_pic_label (78f - 0b) .endif .endif .endif @@ -384,8 +388,8 @@ A ldr CO0, [pc, a4, lsl #2] // irorder is in range 0-4 A add pc, pc, CO0 T tbh [pc, a4, lsl #1] 0: -A .word (60f - 0b) - 4, (61f - 0b) - 4, (62f - 0b) - 4, (63f - 0b) - 4, (64f - 0b) - 4 -T .hword (60f - 0b) / 2, (61f - 0b) / 2, (62f - 0b) / 2, (63f - 0b) / 2, (64f - 0b) / 2 +branch_pic_label (60f - 0b), (61f - 0b), (62f - 0b), (63f - 0b) +branch_pic_label (64f - 0b) 60: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 0 61: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 1 62: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 2 @@ -610,10 +614,10 @@ A ldr v5, [pc, v5, lsl #2] A add pc, pc, v5 T tbh [pc, v5, lsl #1] 0: -A .word 0, 0, (12f - 0b) - 4, (13f - 0b) - 4, (14f - 0b) - 4, (15f - 0b) - 4, (16f - 0b) - 4, (17f - 0b) - 4, (18f - 0b) - 4, (19f - 0b) - 4, (20f - 0b) - 4, (21f - 0b) - 4, (22f - 0b) - 4, (23f - 0b) - 4, (24f - 0b) - 4, (25f - 0b) - 4 -T .hword 0, 0, (12f - 0b) / 2, (13f - 0b) / 2, (14f - 0b) / 2, (15f - 0b) / 2 -T .hword (16f - 0b) / 2, (17f - 0b) / 2, (18f - 0b) / 2, (19f - 0b) / 2 -T .hword (20f - 0b) / 2, (21f - 0b) / 2, (22f - 0b) / 2, (23f - 0b) / 2, (24f - 0b) / 2, (25f - 0b) / 2 +branch_pic_label 0, 0, (12f - 0b), (13f - 0b) +branch_pic_label (14f - 0b), (15f - 0b), (16f - 0b), (17f - 0b) +branch_pic_label (18f - 0b), (19f - 0b), (20f - 0b), (21f - 0b) +branch_pic_label (22f - 0b), (23f - 0b), (24f - 0b), (25f - 0b) 10: switch_on_au_size 0 11: switch_on_au_size 1 12: switch_on_au_size 2 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: add dolby true hd tests
ffmpeg | branch: master | Janne Grunau | Mon Dec 8 16:21:43 2014 +0100| [d2f1d42b18787e4fcb28864d9d9f701dd64a5747] | committer: Janne Grunau fate: add dolby true hd tests The existing meridian audio test does not test ff_mlp_rematrix_channel_arm. This sample (first 640k of https://samples.libav.org/A-codecs/TrueHD/TrueHD.raw) uses ff_mlp_rematrix_channel_arm. Since this sample has 5.1 channels it also allows testing the integrated downmixing. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d2f1d42b18787e4fcb28864d9d9f701dd64a5747 --- tests/fate/lossless-audio.mak |6 ++ tests/ref/fate/lossless-truehd-5.1 |1 + tests/ref/fate/lossless-truehd-5.1-downmix-2.0 |1 + 3 files changed, 8 insertions(+) diff --git a/tests/fate/lossless-audio.mak b/tests/fate/lossless-audio.mak index 3638f17..751c721 100644 --- a/tests/fate/lossless-audio.mak +++ b/tests/fate/lossless-audio.mak @@ -15,6 +15,12 @@ fate-lossless-tak: CMD = md5 -i $(TARGET_SAMPLES)/lossless-audio/luckynight-part fate-lossless-tak: CMP = oneline fate-lossless-tak: REF = a28d4e5f2192057f7d4bece870f40bd0 +FATE_TRUEHD = fate-lossless-truehd-5.1 fate-lossless-truehd-5.1-downmix-2.0 +fate-lossless-truehd-5.1: CMD = md5 -f truehd -i $(TARGET_SAMPLES)/lossless-audio/truehd_5.1.raw -f s32le +fate-lossless-truehd-5.1-downmix-2.0: CMD = md5 -f truehd -request_channel_layout 2 -i $(TARGET_SAMPLES)/lossless-audio/truehd_5.1.raw -f s32le +fate-lossless-truehd: $(FATE_TRUEHD) +FATE_SAMPLES_AVCONV-$(call DEMDEC, TRUEHD, TRUEHD) += $(FATE_TRUEHD) + FATE_SAMPLES_AVCONV-$(call DEMDEC, TTA, TTA) += fate-lossless-tta fate-lossless-tta: CMD = crc -i $(TARGET_SAMPLES)/lossless-audio/inside.tta diff --git a/tests/ref/fate/lossless-truehd-5.1 b/tests/ref/fate/lossless-truehd-5.1 new file mode 100644 index 000..373b917 --- /dev/null +++ b/tests/ref/fate/lossless-truehd-5.1 @@ -0,0 +1 @@ +95d8aac39dd9f0d7fb83dc7b6f88df35 diff --git a/tests/ref/fate/lossless-truehd-5.1-downmix-2.0 b/tests/ref/fate/lossless-truehd-5.1-downmix-2.0 new file mode 100644 index 000..f4afbc1 --- /dev/null +++ b/tests/ref/fate/lossless-truehd-5.1-downmix-2.0 @@ -0,0 +1 @@ +a269aee0051d4400c9117136f08c9767 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] doc: fate: remove outdated SSH key fingerprint
ffmpeg | branch: master | Janne Grunau | Mon Mar 9 23:19:55 2015 +0100| [a9d60c390f35f3954821bd635fd31bbb5036b29d] | committer: Janne Grunau doc: fate: remove outdated SSH key fingerprint > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a9d60c390f35f3954821bd635fd31bbb5036b29d --- doc/fate.texi |1 - 1 file changed, 1 deletion(-) diff --git a/doc/fate.texi b/doc/fate.texi index 0185d87..1d6d1d1 100644 --- a/doc/fate.texi +++ b/doc/fate.texi @@ -165,4 +165,3 @@ through @command{ssh}. @section Submitting Reports In order to send reports you need to create an @command{ssh} key and send it to @email{root@@libav.org}. -The current server fingerprint is @var{a4:99:d7:d3:1c:92:0d:56:d6:d5:61:be:01:ae:7d:e6} ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: add explicit support for the toolchain configure option
ffmpeg | branch: master | Janne Grunau | Thu Feb 26 22:04:12 2015 +0100| [f01c77157789b8e3a59ed2c9646faf8299e41641] | committer: Janne Grunau fate: add explicit support for the toolchain configure option > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f01c77157789b8e3a59ed2c9646faf8299e41641 --- tests/fate.sh |1 + 1 file changed, 1 insertion(+) diff --git a/tests/fate.sh b/tests/fate.sh index af0f6c0..f9f8b95 100755 --- a/tests/fate.sh +++ b/tests/fate.sh @@ -49,6 +49,7 @@ configure()( --enable-gpl\ ${arch:+--arch=$arch} \ ${cpu:+--cpu="$cpu"}\ +${toolchain:+--toolchain="$toolchain"} \ ${cross_prefix:+--cross-prefix="$cross_prefix"} \ ${as:+--as="$as"} \ ${cc:+--cc="$cc"} \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: handle Apple's armv7s in probe_arm_arch()
ffmpeg | branch: master | Janne Grunau | Wed Feb 25 18:16:52 2015 +0100| [3e1b5cbc9ab0a61c9bec08a1df1404b9da6ed7ea] | committer: Janne Grunau configure: handle Apple's armv7s in probe_arm_arch() > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3e1b5cbc9ab0a61c9bec08a1df1404b9da6ed7ea --- configure |1 + 1 file changed, 1 insertion(+) diff --git a/configure b/configure index c237979..5eef9f1 100755 --- a/configure +++ b/configure @@ -3198,6 +3198,7 @@ elif enabled arm; then elif check_arm_arch 6T2; then echo armv6t2; elif check_arm_arch 7;then echo armv7; elif check_arm_arch 7A 7_A; then echo armv7-a; +elif check_arm_arch 7S; then echo armv7-a; elif check_arm_arch 7R 7_R; then echo armv7-r; elif check_arm_arch 7M 7_M; then echo armv7-m; elif check_arm_arch 7EM 7E_M; then echo armv7-m; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: move cross_compile checks after the toolchain section
ffmpeg | branch: master | Janne Grunau | Thu Feb 26 21:54:55 2015 +0100| [faab8f9fcb4ffeb967dc6872c0f1e9da719106ce] | committer: Janne Grunau configure: move cross_compile checks after the toolchain section > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=faab8f9fcb4ffeb967dc6872c0f1e9da719106ce --- configure | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 5eef9f1..7340844 100755 --- a/configure +++ b/configure @@ -2494,13 +2494,6 @@ disabled logging && logfile=/dev/null echo "# $0 $LIBAV_CONFIGURATION" > $logfile set >> $logfile -test -n "$cross_prefix" && enable cross_compile - -if enabled cross_compile; then -test -n "$arch" && test -n "$target_os" || -die "Must specify target arch and OS when cross-compiling" -fi - case "$toolchain" in clang-asan) cc_default="clang" @@ -2585,6 +2578,13 @@ case "$toolchain" in ;; esac +test -n "$cross_prefix" && enable cross_compile + +if enabled cross_compile; then +test -n "$arch" && test -n "$target_os" || +die "Must specify target arch and OS when cross-compiling" +fi + ar_default="${cross_prefix}${ar_default}" cc_default="${cross_prefix}${cc_default}" nm_default="${cross_prefix}${nm_default}" ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] h264: aarch64: intra prediction optimisations
ffmpeg | branch: master | Janne Grunau | Sun Jul 12 18:30:09 2015 +0200| [f56d8d8dd72b1ab52aa814c5a0fccabf8040ef68] | committer: Janne Grunau h264: aarch64: intra prediction optimisations > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f56d8d8dd72b1ab52aa814c5a0fccabf8040ef68 --- libavcodec/aarch64/Makefile|2 + libavcodec/aarch64/h264pred_init.c | 93 ++ libavcodec/aarch64/h264pred_neon.S | 361 libavcodec/h264pred.c |8 +- libavcodec/h264pred.h |3 + 5 files changed, 465 insertions(+), 2 deletions(-) diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index 2afff29..d001b34 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -1,6 +1,7 @@ OBJS-$(CONFIG_FFT) += aarch64/fft_init_aarch64.o OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o +OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o OBJS-$(CONFIG_IMDCT15) += aarch64/imdct15_init.o @@ -18,6 +19,7 @@ NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ aarch64/h264idct_neon.o +NEON-OBJS-$(CONFIG_H264PRED)+= aarch64/h264pred_neon.o NEON-OBJS-$(CONFIG_H264QPEL)+= aarch64/h264qpel_neon.o \ aarch64/hpeldsp_neon.o NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o diff --git a/libavcodec/aarch64/h264pred_init.c b/libavcodec/aarch64/h264pred_init.c new file mode 100644 index 000..8f912cb --- /dev/null +++ b/libavcodec/aarch64/h264pred_init.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/aarch64/cpu.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/h264pred.h" + +void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride); + +void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride); + +static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, +const int bit_depth, +const int chroma_format_idc) +{ +const int high_depth = bit_depth > 8; + +if (high_depth) +return; + +if (chroma_format_idc <= 1) { +h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon; +h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon; +if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) +h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon; +h->pred8x8[DC_128_PRED8x8 ] = ff_pred8x8_128_dc_neon; +if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_
[FFmpeg-cvslog] arm: use a local label instead of the function symbol in ff_prefetch_arm
ffmpeg | branch: master | Janne Grunau | Mon Jul 20 10:46:15 2015 +0200| [9ed6f9a17cc1f7d3699a1223783dadc1ee222069] | committer: Janne Grunau arm: use a local label instead of the function symbol in ff_prefetch_arm Avoids a relocation which might end out of range for thumb2. Reported-By: Ludovic Fauvet Bug-Id: https://bugs.webkit.org/show_bug.cgi?id=137022 CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9ed6f9a17cc1f7d3699a1223783dadc1ee222069 --- libavcodec/arm/videodsp_armv5te.S |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/arm/videodsp_armv5te.S b/libavcodec/arm/videodsp_armv5te.S index bbd0a61..0510019 100644 --- a/libavcodec/arm/videodsp_armv5te.S +++ b/libavcodec/arm/videodsp_armv5te.S @@ -23,9 +23,10 @@ #include "libavutil/arm/asm.S" function ff_prefetch_arm, export=1 +1: subsr2, r2, #1 pld [r0] add r0, r0, r1 -bne X(ff_prefetch_arm) +bne 1b bx lr endfunc ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] checkasm: remove empty array initializer list in h264pred test
ffmpeg | branch: master | Janne Grunau | Mon Jul 20 23:17:57 2015 +0200| [e605bf3b590d295f215fcc9fd58eb11be55b68cb] | committer: Janne Grunau checkasm: remove empty array initializer list in h264pred test Fixes MSVC compilation. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e605bf3b590d295f215fcc9fd58eb11be55b68cb --- tests/checkasm/h264pred.c |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c index a5eb033..40e949a 100644 --- a/tests/checkasm/h264pred.c +++ b/tests/checkasm/h264pred.c @@ -87,8 +87,7 @@ static const char * const pred8x8_modes[4][11] = { [LEFT_DC_PRED8x8] = "left_dc_rv40", [TOP_DC_PRED8x8 ] = "top_dc_rv40", }, -{ /* SVQ3 */ -}, +/* nothing for SVQ3 */ }; static const char * const pred16x16_modes[4][9] = { ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] ac3_parser: add required padding for GetBitContext buffer
ffmpeg | branch: release/2.4 | Janne Grunau | Mon Jun 8 14:48:26 2015 +0200| [a9f108bd78e842a47ade2f7c8b22a1764d01d4e6] | committer: Janne Grunau ac3_parser: add required padding for GetBitContext buffer Fixes stack buffer overflow errors detected by address sanitizer in various fate tests. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a9f108bd78e842a47ade2f7c8b22a1764d01d4e6 --- libavcodec/ac3_parser.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c index 5ea09f8..69d88c1 100644 --- a/libavcodec/ac3_parser.c +++ b/libavcodec/ac3_parser.c @@ -150,7 +150,7 @@ static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info, int err; union { uint64_t u64; -uint8_t u8[8]; +uint8_t u8[8 + FF_INPUT_BUFFER_PADDING_SIZE]; } tmp = { av_be2ne64(state) }; AC3HeaderInfo hdr; GetBitContext gbc; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aac_parser: add required padding for GetBitContext buffer
ffmpeg | branch: release/2.4 | Janne Grunau | Mon Jun 8 14:45:12 2015 +0200| [02477323b92aacdabe0a2d129eeb0c15fbd1ec9e] | committer: Janne Grunau aac_parser: add required padding for GetBitContext buffer Fixes stack buffer overflow errors detected by address sanitizer in various fate tests. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=02477323b92aacdabe0a2d129eeb0c15fbd1ec9e --- libavcodec/aac_parser.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c index fdaa5f8..acb05d4 100644 --- a/libavcodec/aac_parser.c +++ b/libavcodec/aac_parser.c @@ -34,7 +34,7 @@ static int aac_sync(uint64_t state, AACAC3ParseContext *hdr_info, int size; union { uint64_t u64; -uint8_t u8[8]; +uint8_t u8[8 + FF_INPUT_BUFFER_PADDING_SIZE]; } tmp; tmp.u64 = av_be2ne64(state); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] imc: add required padding for GetBitContext buffer
ffmpeg | branch: release/2.4 | Janne Grunau | Mon Jun 8 14:48:54 2015 +0200| [aa3ec219e1a5cc0e96ddec6ea83312ec780448f5] | committer: Janne Grunau imc: add required padding for GetBitContext buffer Fixes stack buffer overflow errors detected by address sanitizer in fate-imc. CC: libav-sta...@libav.org > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aa3ec219e1a5cc0e96ddec6ea83312ec780448f5 --- libavcodec/imc.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 500f564..26fbcd4 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -997,7 +997,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data, IMCContext *q = avctx->priv_data; -LOCAL_ALIGNED_16(uint16_t, buf16, [IMC_BLOCK_SIZE / 2]); +LOCAL_ALIGNED_16(uint16_t, buf16, [(IMC_BLOCK_SIZE + FF_INPUT_BUFFER_PADDING_SIZE) / 2]); if (buf_size < IMC_BLOCK_SIZE * avctx->channels) { av_log(avctx, AV_LOG_ERROR, "frame too small!\n"); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: test only demuxing in asf-repldata
ffmpeg | branch: master | Janne Grunau | Thu Aug 20 14:46:42 2015 +0200| [faa3f17a76333b672ce4a40cf80f678ab68bdbae] | committer: Janne Grunau fate: test only demuxing in asf-repldata > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=faa3f17a76333b672ce4a40cf80f678ab68bdbae --- tests/fate/microsoft.mak|6 ++ tests/ref/fate/asf-repldata | 28 ++-- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/tests/fate/microsoft.mak b/tests/fate/microsoft.mak index 6f83d2e..30bd35a 100644 --- a/tests/fate/microsoft.mak +++ b/tests/fate/microsoft.mak @@ -62,7 +62,5 @@ fate-vc1-ism: CMD = framecrc -i $(TARGET_SAMPLES)/isom/vc1-wmapro.ism -an FATE_SAMPLES_AVCONV-$(CONFIG_VC1_DECODER) += $(FATE_VC1-yes) fate-vc1: $(FATE_VC1-yes) -FATE_ASF_REPLDATA += fate-asf-repldata -fate-asf-repldata: CMD = framecrc -i $(TARGET_SAMPLES)/asf/bug821-2.asf - -FATE_SAMPLES_AVCONV-$(call DEMDEC, ASF, MPEG4) += $(FATE_ASF_REPLDATA) +FATE_SAMPLES_AVCONV-$(CONFIG_ASF_DEMUXER) += fate-asf-repldata +fate-asf-repldata: CMD = framecrc -i $(TARGET_SAMPLES)/asf/bug821-2.asf -c copy diff --git a/tests/ref/fate/asf-repldata b/tests/ref/fate/asf-repldata index 6a1dbae..5abc9da 100644 --- a/tests/ref/fate/asf-repldata +++ b/tests/ref/fate/asf-repldata @@ -1,15 +1,15 @@ #tb 0: 1/1000 -0, 0, 0,0, 460800, 0x85b053af -0,122,122,0, 460800, 0xfbdbb291 -0,245,245,0, 460800, 0x1389b3d9 -0,367,367,0, 460800, 0xc81e8326 -0,490,490,0, 460800, 0xb0028333 -0,612,612,0, 460800, 0x002b8619 -0,735,735,0, 460800, 0xeb808d70 -0,857,857,0, 460800, 0xe8288d27 -0,980,980,0, 460800, 0xcfbe8bcc -0, 1102, 1102,0, 460800, 0x682b8d38 -0, 1224, 1224,0, 460800, 0x5879cd04 -0, 1347, 1347,0, 460800, 0x88a6a3e6 -0, 1469, 1469,0, 460800, 0xc491db72 -0, 1592, 1592,0, 460800, 0xb779d5cc +0, 0, 0,0,23374, 0x8725b3b8 +0,122,122,0,13732, 0x3ac8531a +0,245,245,0, 615, 0xd31641b4 +0,367,367,0, 6361, 0xf263af54 +0,490,490,0, 320, 0xd6f2d6b8 +0,612,612,0, 3750, 0xfcf1d501 +0,735,735,0, 2541, 0xd9fc04f9 +0,857,857,0, 205, 0x4d38a947 +0,980,980,0, 2166, 0x2f1e7d74 +0, 1102, 1102,0, 1667, 0x0cd84b61 +0, 1224, 1224,0,13645, 0x543bd032 +0, 1347, 1347,0, 5953, 0xc3037c73 +0, 1469, 1469,0,36169, 0xca9f716d +0, 1592, 1592,0, 3030, 0x9aba5683 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog