[FFmpeg-cvslog] build: Add an option for passing linker flags to the shared library build

2017-11-06 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jun 20 
19:25:43 2017 +0200| [857e26b655a769e5a56bada1a0d9adb44cc176b7] | committer: 
Diego Biurrun

build: Add an option for passing linker flags to the shared library build

Also employ this mechanism to pass $libdir to the runtime library search
path if rpath is enabled. This fixes underlinking of some test binaries
on some systems.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=857e26b655a769e5a56bada1a0d9adb44cc176b7
---

 avbuild/library.mak |  2 +-
 configure   | 11 ++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/avbuild/library.mak b/avbuild/library.mak
index 2095f61d80..30757546fd 100644
--- a/avbuild/library.mak
+++ b/avbuild/library.mak
@@ -48,7 +48,7 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
 
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SUBDIR)lib$(NAME).ver $(DEP_LIBS)
$(SLIB_CREATE_DEF_CMD)
-   $$(LD) $(SHFLAGS) $(LDFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS)
+   $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) 
$(FFEXTRALIBS)
$(SLIB_EXTRA_CMD)
 
 clean::
diff --git a/configure b/configure
index bbed2258b9..ce0f6c919e 100755
--- a/configure
+++ b/configure
@@ -283,6 +283,7 @@ Toolchain options:
   --extra-objcflags=FLAGS  add FLAGS to OBJCFLAGS [$CFLAGS]
   --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]
   --extra-ldexeflags=ELDFLAGS add ELDFLAGS to LDEXEFLAGS [$LDEXEFLAGS]
+  --extra-ldsoflags=ELDFLAGS add ELDFLAGS to LDSOFLAGS [$LDSOFLAGS]
   --extra-libs=ELIBS   add ELIBS [$ELIBS]
   --extra-version=STRING   version string suffix []
   --optflags=OPTFLAGS  override optimization-related compiler flags
@@ -740,6 +741,10 @@ add_ldexeflags(){
 append LDEXEFLAGS $($ldflags_filter "$@")
 }
 
+add_ldsoflags(){
+append LDSOFLAGS $($ldflags_filter "$@")
+}
+
 add_stripflags(){
 append STRIPFLAGS "$@"
 }
@@ -2802,6 +2807,9 @@ for opt do
 --extra-ldexeflags=*)
 add_ldexeflags $optval
 ;;
+--extra-ldsoflags=*)
+add_ldsoflags $optval
+;;
 --extra-libs=*)
 add_extralibs $optval
 ;;
@@ -4950,7 +4958,7 @@ EOF
 # add some linker flags
 check_ldflags -Wl,--warn-common
 check_ldflags 
-Wl,-rpath-link=libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample
-enabled rpath && add_ldexeflags -Wl,-rpath,$libdir
+enabled rpath && add_ldexeflags -Wl,-rpath,$libdir && add_ldsoflags 
-Wl,-rpath,$libdir
 test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic
 
 # add some strip flags
@@ -5410,6 +5418,7 @@ LD_PATH=$LD_PATH
 DLLTOOL=$dlltool
 LDFLAGS=$LDFLAGS
 LDEXEFLAGS=$LDEXEFLAGS
+LDSOFLAGS=$LDSOFLAGS
 SHFLAGS=$(echo $($ldflags_filter $SHFLAGS))
 STRIPFLAGS=$STRIPFLAGS
 X86ASMFLAGS=$X86ASMFLAGS

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Revert "mmaldec: autodetect by default" since it breaks linking on systems without mmal libraries

2016-06-21 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sat Apr 30 
16:05:32 2016 +0200| [c26741332165a049717e6da84db13a24ee8edade] | committer: 
Janne Grunau

Revert "mmaldec: autodetect by default" since it breaks linking on systems 
without mmal libraries

This reverts commit 33ac77e850efdfd0e8835950c3d947baffd4df45.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c26741332165a049717e6da84db13a24ee8edade
---

 configure |7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index 3d236f5..e395e38 100755
--- a/configure
+++ b/configure
@@ -142,7 +142,7 @@ Hardware accelerators:
 Hardware-accelerated decoding/encoding:
   --enable-cudaenable dynamically linked CUDA [no]
   --enable-libmfx  enable HW acceleration through libmfx
-  --enable-mmalenable decoding via MMAL [auto]
+  --enable-mmalenable decoding via MMAL [no]
   --enable-nvenc   enable encoding via NVENC [no]
   --enable-omx enable encoding via OpenMAX IL [no]
   --enable-omx-rpi enable encoding via OpenMAX IL for Raspberry Pi [no]
@@ -1252,7 +1252,6 @@ EXTERNAL_LIBRARY_LIST="
 libxcb_shm
 libxcb_xfixes
 libxvid
-mmal
 openssl
 x11grab
 zlib
@@ -4623,13 +4622,13 @@ enabled libx265   && require_pkg_config x265 
x265.h x265_api_get &&
die "ERROR: libx265 version must be >= 57."; }
 enabled libxavs   && require libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid   && require libxvid xvid.h xvid_global -lxvidcore
-disabled mmal || enable mmal && { check_lib interface/mmal/mmal.h 
mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
+enabled mmal  && { check_lib interface/mmal/mmal.h 
mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
 { ! enabled cross_compile && {
 add_cflags -isystem/opt/vc/include/ 
-isystem/opt/vc/include/interface/vmcs_host/linux 
-isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline ;
 add_extralibs -L/opt/vc/lib/ -lmmal_core 
-lmmal_util -lmmal_vc_client -lbcm_host ;
 check_lib interface/mmal/mmal.h 
mmal_port_connect ; }
 check_lib interface/mmal/mmal.h 
mmal_port_connect ; } ||
-   disable mmal; }
+   die "ERROR: mmal not found"; }
 enabled mmal && check_func_headers interface/mmal/mmal.h 
"MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS"
 enabled omx_rpi && enable omx
 enabled omx   && { check_header OMX_Core.h ||

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: Add --ignore-tests configure option for omitting specific FATE tests

2017-05-05 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Dec 28 
00:19:49 2016 +0200| [35d1f726eb9fdd376ab900587fb02122b72f2b9a] | committer: 
Diego Biurrun

fate: Add --ignore-tests configure option for omitting specific FATE tests

This can be useful to filter out noise in known-broken scenarios like
miscompilation by legacy compilers and similar.

Originally based on a patch by Diego Biurrun.

Signed-off-by: Diego Biurrun 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=35d1f726eb9fdd376ab900587fb02122b72f2b9a
---

 configure | 11 +++
 doc/fate.texi |  1 +
 tests/Makefile|  4 +++-
 tests/fate-run.sh |  9 +++--
 tests/fate.sh |  1 +
 5 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index 09674aad4d..d8b59e2548 100755
--- a/configure
+++ b/configure
@@ -348,6 +348,8 @@ Developer options (useful when working on Libav itself):
   --random-seed=VALUE  seed value for --enable/disable-random
   --disable-valgrind-backtrace do not print a backtrace under Valgrind
(only applies to --disable-optimizations builds)
+  --ignore-tests=TESTS comma-separated list (without "fate-" prefix
+   in the name) of tests whose result is ignored
 
 NOTE: Object files are built at the place where configure is launched.
 EOF
@@ -1815,6 +1817,7 @@ CMDLINE_SET="
 host_ld
 host_ldflags
 host_os
+ignore_tests
 ld
 logfile
 malloc_prefix
@@ -5192,6 +5195,13 @@ for type in decoder encoder hwaccel parser demuxer muxer 
protocol filter bsf ind
 echo
 done
 
+if test -n "$ignore_tests"; then
+ignore_tests=$(echo $ignore_tests | tr ',' ' ')
+echo "Ignored FATE tests:"
+echo $ignore_tests | print_3_columns
+echo
+fi
+
 license="LGPL version 2.1 or later"
 if enabled nonfree; then
 license="nonfree and unredistributable"
@@ -5316,6 +5326,7 @@ SLIB_INSTALL_EXTRA_LIB=${SLIB_INSTALL_EXTRA_LIB}
 SLIB_INSTALL_EXTRA_SHLIB=${SLIB_INSTALL_EXTRA_SHLIB}
 VERSION_SCRIPT_POSTPROCESS_CMD=${VERSION_SCRIPT_POSTPROCESS_CMD}
 SAMPLES:=${samples:-\$(LIBAV_SAMPLES)}
+IGNORE_TESTS=$ignore_tests
 EOF
 
 map 'eval echo "${v}_FFLIBS=\$${v}_deps" >> avbuild/config.mak' $LIBRARY_LIST
diff --git a/doc/fate.texi b/doc/fate.texi
index 9e654e79a1..b1bfa2e7ed 100644
--- a/doc/fate.texi
+++ b/doc/fate.texi
@@ -139,6 +139,7 @@ workdir=# directory in 
which to do all the work
 fate_recv="ssh -T fate@@fate.libav.org"  # command to submit report
 comment=# optional description
 build_only= # set to "yes" for a compile-only instance that skips tests
+ignore_tests=
 
 # the following are optional and map to configure options
 arch=
diff --git a/tests/Makefile b/tests/Makefile
index 0e475a2836..30e06e8fdd 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -143,11 +143,13 @@ endif
 
 FATE_UTILS = base64 tiny_psnr
 
+$(addprefix fate-, $(IGNORE_TESTS)): REPORT=ignore
+
 fate: $(FATE)
 
 $(FATE): $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
@echo "TEST$(@:fate-%=%)"
-   $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" 
"$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' 
'$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' 
'$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)'
+   $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" 
"$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' 
'$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' 
'$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)' '$(REPORT)'
 
 fate-list:
@printf '%s\n' $(sort $(FATE))
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index b1b299a055..27cd6261a5 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -24,6 +24,7 @@ size_tolerance=${14:-0}
 cmp_unit=${15:-2}
 gen=${16:-no}
 hwaccel=${17:-none}
+report_type=${18:-standard}
 
 outdir="tests/data/fate"
 outfile="${outdir}/${test}"
@@ -212,13 +213,17 @@ if test -e "$ref" || test $cmp = "oneline" ; then
 esac
 cmperr=$?
 test $err = 0 && err=$cmperr
-test $err = 0 || cat $cmpfile
+if [ "$report_type" = "ignore" ]; then
+test $err = 0 || echo "IGNORE  fate-${test}" && err=0
+else
+test $err = 0 || cat $cmpfile
+fi
 else
 echo "reference file '$ref' not found"
 err=1

[FFmpeg-cvslog] build: remove hardcoded name of version header

2017-03-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Sep 12 
21:52:01 2016 +0200| [15fcf6292ed79be274c824fedb099c2665f4cc15] | committer: 
Janne Grunau

build: remove hardcoded name of version header

Fixes an oversight in 1316df7aa98c4.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=15fcf6292ed79be274c824fedb099c2665f4cc15
---

 version.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version.sh b/version.sh
index 6f72b2c..4689627 100755
--- a/version.sh
+++ b/version.sh
@@ -18,7 +18,7 @@ if [ -z "$2" ]; then
 fi
 
 NEW_REVISION="#define LIBAV_VERSION \"$version\""
-OLD_REVISION=$(cat version.h 2> /dev/null)
+OLD_REVISION=$(cat "$2" 2> /dev/null)
 
 # Update version.h only on revision changes to avoid spurious rebuilds
 if test "$NEW_REVISION" != "$OLD_REVISION"; then

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: vp9: loop_filter: fix typo in skip flatout8 check

2017-03-31 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Nov 14 
01:16:00 2016 +0100| [31756abe29eb039a11c59a42cb12e0cc2aef3b97] | committer: 
Martin Storsjö

aarch64: vp9: loop_filter: fix typo in skip flatout8 check

The 16_16 loop filter functions could miss an early exit before
flatout8.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=31756abe29eb039a11c59a42cb12e0cc2aef3b97
---

 libavcodec/aarch64/vp9lpf_neon.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index 995a97d..c1b0c88 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -413,7 +413,7 @@
 cbz x5,  7f
 mov x5,  v7.d[0]
 .ifc \sz, .16b
-mov x6,  v2.d[1]
+mov x6,  v7.d[1]
 orr x5,  x5,  x6
 .endif
 // If no pixels need flat8out, jump to a writeout of the inner 6 pixels

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: vp9: use alternative returns in the core loop filter function

2017-04-01 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Nov 14 
00:13:34 2016 +0100| [d7595de0b25e7064fd9e06dea5d0425536cef6dc] | committer: 
Janne Grunau

aarch64: vp9: use alternative returns in the core loop filter function

Since aarch64 has enough free general purpose registers use them to
branch to the appropiate storage code. 1-2 cycles faster for the
functions using loop_filter 8/16, ... on a cortex-a53. Mixed results
(up to 2 cycles faster/slower) on a cortex-a57.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d7595de0b25e7064fd9e06dea5d0425536cef6dc
---

 libavcodec/aarch64/vp9lpf_neon.S | 48 +++-
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index c1b0c88..392794b 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -410,15 +410,19 @@
 .endif
 // If no pixels needed flat8in nor flat8out, jump to a
 // writeout of the inner 4 pixels
-cbz x5,  7f
+cbnzx5,  1f
+br  x14
+1:
 mov x5,  v7.d[0]
 .ifc \sz, .16b
 mov x6,  v7.d[1]
 orr x5,  x5,  x6
 .endif
 // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
-cbz x5,  8f
+cbnzx5,  1f
+br  x15
 
+1:
 // flat8out
 // This writes all outputs into v2-v17 (skipping v6 and v16).
 // If this part is skipped, the output is read from v21-v26 (which is 
the input
@@ -549,35 +553,24 @@ endfunc
 
 function vp9_loop_filter_8
 loop_filter 8,  .8b,  0,v16, v17, v18, v19, v28, v29, v30, v31
-mov x5,  #0
 ret
 6:
-mov x5,  #6
-ret
+br  x13
 9:
 br  x10
 endfunc
 
 function vp9_loop_filter_8_16b_mix
 loop_filter 8,  .16b, 88,   v16, v17, v18, v19, v28, v29, v30, v31
-mov x5,  #0
 ret
 6:
-mov x5,  #6
-ret
+br  x13
 9:
 br  x10
 endfunc
 
 function vp9_loop_filter_16
 loop_filter 16, .8b,  0,v8,  v9,  v10, v11, v12, v13, v14, v15
-mov x5,  #0
-ret
-7:
-mov x5,  #7
-ret
-8:
-mov x5,  #8
 ret
 9:
 ldp d8,  d9,  [sp], 0x10
@@ -589,13 +582,6 @@ endfunc
 
 function vp9_loop_filter_16_16b
 loop_filter 16, .16b, 0,v8,  v9,  v10, v11, v12, v13, v14, v15
-mov x5,  #0
-ret
-7:
-mov x5,  #7
-ret
-8:
-mov x5,  #8
 ret
 9:
 ldp d8,  d9,  [sp], 0x10
@@ -614,11 +600,14 @@ endfunc
 .endm
 
 .macro loop_filter_8
+// calculate alternative 'return' targets
+adr x13, 6f
 bl  vp9_loop_filter_8
-cbnzx5,  6f
 .endm
 
 .macro loop_filter_8_16b_mix mix
+// calculate alternative 'return' targets
+adr x13, 6f
 .if \mix == 48
 mov x11, #0x
 .elseif \mix == 84
@@ -627,21 +616,20 @@ endfunc
 mov x11, #0x
 .endif
 bl  vp9_loop_filter_8_16b_mix
-cbnzx5,  6f
 .endm
 
 .macro loop_filter_16
+// calculate alternative 'return' targets
+adr x14, 7f
+adr x15, 8f
 bl  vp9_loop_filter_16
-cmp x5,  7
-b.gt8f
-b.eq7f
 .endm
 
 .macro loop_filter_16_16b
+// calculate alternative 'return' targets
+adr x14, 7f
+adr x15, 8f
 bl  vp9_loop_filter_16_16b
-cmp x5,  7
-b.gt8f
-b.eq7f
 .endm
 
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne};

2017-04-01 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Nov 14 
22:35:13 2016 +0100| [e7ae8f7a715843a5089d18e033afb3ee19ab3057] | committer: 
Janne Grunau

aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne};

The latter is 1 cycle faster on a cortex-53 and since the operands are
bytewise (or larger) bitmask (impossible to overflow to zero) both are
equivalent.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e7ae8f7a715843a5089d18e033afb3ee19ab3057
---

 libavcodec/aarch64/vp9lpf_neon.S | 31 ---
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index 392794b..e9c7d9e 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -218,13 +218,15 @@
 xtn_sz  v5, v6.8h,  v7.8h,  \sz
 and v4\sz,  v4\sz,  v5\sz // fm
 
+// If no pixels need filtering, just exit as soon as possible
 mov x5,  v4.d[0]
 .ifc \sz, .16b
 mov x6,  v4.d[1]
-orr x5,  x5,  x6
-.endif
-// If no pixels need filtering, just exit as soon as possible
+addsx5,  x5,  x6
+b.eq9f
+.else
 cbz x5,  9f
+.endif
 
 .if \wd >= 8
 moviv0\sz,  #1
@@ -344,15 +346,17 @@
 bit v22\sz, v0\sz,  v5\sz   // if (!hev && fm && 
!flat8in)
 bit v25\sz, v2\sz,  v5\sz
 
+// If no pixels need flat8in, jump to flat8out
+// (or to a writeout of the inner 4 pixels, for wd=8)
 .if \wd >= 8
 mov x5,  v6.d[0]
 .ifc \sz, .16b
 mov x6,  v6.d[1]
-orr x5,  x5,  x6
-.endif
-// If no pixels need flat8in, jump to flat8out
-// (or to a writeout of the inner 4 pixels, for wd=8)
+addsx5,  x5,  x6
+b.eq6f
+.else
 cbz x5,  6f
+.endif
 
 // flat8in
 uaddl_sz\tmp1\().8h, \tmp2\().8h,  v20, v21, \sz
@@ -406,20 +410,25 @@
 mov x5,  v2.d[0]
 .ifc \sz, .16b
 mov x6,  v2.d[1]
-orr x5,  x5,  x6
+adds x5,  x5,  x6
+b.ne1f
+.else
+cbnzx5,  1f
 .endif
 // If no pixels needed flat8in nor flat8out, jump to a
 // writeout of the inner 4 pixels
-cbnzx5,  1f
 br  x14
 1:
+
 mov x5,  v7.d[0]
 .ifc \sz, .16b
 mov x6,  v7.d[1]
-orr x5,  x5,  x6
+adds x5,  x5,  x6
+b.ne1f
+.else
+cbnzx5,  1f
 .endif
 // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
-cbnzx5,  1f
 br  x15
 
 1:

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: vp9itxfm: Simplify the stack alignment code

2017-04-03 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Fri Nov 18 
09:36:59 2016 +0200| [e5b0fc170f85b00f7dd0ac514918fb5c95253d39] | committer: 
Martin Storsjö

arm: vp9itxfm: Simplify the stack alignment code

This is one instruction less for thumb, and only have got
1/2 arm/thumb specific instructions.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e5b0fc170f85b00f7dd0ac514918fb5c95253d39
---

 libavcodec/arm/vp9itxfm_neon.S | 28 
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index cdb43b5..5d73d84 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -791,15 +791,13 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, 
export=1
 .ifnc \txfm1\()_\txfm2,idct_idct
 vpush   {q4-q7}
 .endif
-mov r7,  sp
 
 @ Align the stack, allocate a temp buffer
-T   mov r12, sp
-T   bic r12, r12, #15
-T   sub r12, r12, #512
-T   mov sp,  r12
-A   bic sp,  sp,  #15
-A   sub sp,  sp,  #512
+T   mov r7,  sp
+T   and r7,  r7,  #15
+A   and r7,  sp,  #15
+add r7,  r7,  #512
+sub sp,  sp,  r7
 
 mov r4,  r0
 mov r5,  r1
@@ -828,7 +826,7 @@ A   sub sp,  sp,  #512
 bl  \txfm2\()16_1d_4x16_pass2_neon
 .endr
 
-mov sp,  r7
+add sp,  sp,  r7
 .ifnc \txfm1\()_\txfm2,idct_idct
 vpop{q4-q7}
 .endif
@@ -1117,15 +1115,13 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
 beq idct32x32_dc_add_neon
 push{r4-r7,lr}
 vpush   {q4-q7}
-mov r7,  sp
 
 @ Align the stack, allocate a temp buffer
-T   mov r12, sp
-T   bic r12, r12, #15
-T   sub r12, r12, #2048
-T   mov sp,  r12
-A   bic sp,  sp,  #15
-A   sub sp,  sp,  #2048
+T   mov r7,  sp
+T   and r7,  r7,  #15
+A   and r7,  sp,  #15
+add r7,  r7,  #2048
+sub sp,  sp,  r7
 
 mov r4,  r0
 mov r5,  r1
@@ -1143,7 +1139,7 @@ A   sub sp,  sp,  #2048
 bl  idct32_1d_4x32_pass2_neon
 .endr
 
-mov sp,  r7
+add sp,  sp,  r7
 vpop{q4-q7}
 pop {r4-r7,pc}
 endfunc

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: warn/error on movrelx usage problematic with PIC on ELF

2017-04-04 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Fri Nov 18 
21:06:40 2016 +0100| [6a1ea4ec932f4fc9fdc00ec51ee070b298ddb35f] | committer: 
Janne Grunau

arm: warn/error on movrelx usage problematic with PIC on ELF

The warning has false positives but our asm does not trigger it. For
new code false positives can only be avoided by changing the register
allocation.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6a1ea4ec932f4fc9fdc00ec51ee070b298ddb35f
---

 libavutil/arm/asm.S | 9 +
 1 file changed, 9 insertions(+)

diff --git a/libavutil/arm/asm.S b/libavutil/arm/asm.S
index 4ac0ea2..a791e80 100644
--- a/libavutil/arm/asm.S
+++ b/libavutil/arm/asm.S
@@ -184,6 +184,15 @@ T   ldr \rd, [\rd]
 .endm
 
 .macro  movrelx rd,  val, gp
+.ifc \rd,\gp
+.error  "movrelx needs two distinct registers"
+.endif
+.ifc \rd\()_\gp,r12_
+.warning"movrelx rd=\rd without explicit set gp"
+.endif
+.ifc \rd\()_\gp,ip_
+.warning"movrelx rd=\rd without explicit set gp"
+.endif
 #if CONFIG_PIC && defined(__ELF__)
 .ifnb \gp
   .if .Lpic_gp

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm64: replace 'bic' with immediate with 'and' with inverted immediate

2017-04-26 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Dec  8 
20:40:34 2016 +0100| [2425d7329fdccfa9954faba748f3865151354f0c] | committer: 
Janne Grunau

arm64: replace 'bic' with immediate with 'and' with inverted immediate

The former is not an official pseudo instruction although gas and llvm's
internal assembler support it. Fixes a build error with xcode 6.2
reported by Memphiz on github.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2425d7329fdccfa9954faba748f3865151354f0c
---

 libavcodec/aarch64/synth_filter_neon.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/aarch64/synth_filter_neon.S 
b/libavcodec/aarch64/synth_filter_neon.S
index 9551bff8e3..b001c737da 100644
--- a/libavcodec/aarch64/synth_filter_neon.S
+++ b/libavcodec/aarch64/synth_filter_neon.S
@@ -50,7 +50,7 @@ function ff_synth_filter_float_neon, export=1
 add x1,  x1,  x7,  lsl #2   // synth_buf
 sub w8,  w7,  #32
 stp x5,  x1,  [sp, #16]
-bic x7,  x7,  #63
+and x7,  x7,  #~63
 and w8,  w8,  #511
 stp x7,  x30, [sp, #32]
 str w8,  [x2]

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] vp8/armv6: mc: avoid boolean expression in calculation

2016-11-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sat Jul  9 
15:30:34 2016 +0200| [5f74bd31a9bd1ac7655103b11743c12d38e0419f] | committer: 
Janne Grunau

vp8/armv6: mc: avoid boolean expression in calculation

GNU as evaluates true as '-1' while Apple's variant and llvm's internal
assembler evaluate it as '1'. The best way to avoid this madness is to
eliminate boolean expressions instead of trying to fix it with
preprocessor directives. Use a direct formula to calculate the
required temporary space on the stack in
ff_put_vp8_{epel,bilin}{4,8,16}_h[246]v[246]_armv6().

Fixes a checkasm segfault in vp8dsp.mc when using llvm's internal
assembler for a non-Apple target.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5f74bd31a9bd1ac7655103b11743c12d38e0419f
---

 libavcodec/arm/vp8dsp_armv6.S | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/libavcodec/arm/vp8dsp_armv6.S b/libavcodec/arm/vp8dsp_armv6.S
index 4e83fe1..565361e 100644
--- a/libavcodec/arm/vp8dsp_armv6.S
+++ b/libavcodec/arm/vp8dsp_armv6.S
@@ -1226,13 +1226,8 @@ vp8_mc_1bilin,  8, v
 vp8_mc_1bilin,  4, h
 vp8_mc_1bilin,  4, v
 
-/* True relational expressions have the value -1 in the GNU assembler,
-   +1 in Apple's. */
-#ifdef __APPLE__
-#   define TMPSIZE \size * (8 + 8*(\size > 4) + \ytaps - 1)
-#else
-#   define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1)
-#endif
+@ 4 and 8 pixel wide mc blocks might have height of 8 or 16 lines
+#define TMPSIZE \size * (16 / ((16 / \size + 1) / 2) + \ytaps - 1)
 
 .macro  vp8_mc_hv   name, size, h, v, ytaps
 function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] doc: escape left brace in texi2pod.pl regex

2016-11-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Fri May  6 
13:32:06 2016 +0200| [fc5cdc0d5372f5103c71d5dede296734fe71ead2] | committer: 
Janne Grunau

doc: escape left brace in texi2pod.pl regex

Unescaped literal left braces are deprecated and a warning was added in
Perl 5.22.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fc5cdc0d5372f5103c71d5dede296734fe71ead2
---

 doc/texi2pod.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/texi2pod.pl b/doc/texi2pod.pl
index e4eb61c..934c1a2 100755
--- a/doc/texi2pod.pl
+++ b/doc/texi2pod.pl
@@ -380,7 +380,7 @@ sub postprocess
 # @* is also impossible in .pod; we discard it and any newline that
 # follows it.  Similarly, our macro @gol must be discarded.
 
-s/\@anchor{(?:[^\}]*)\}//g;
+s/\@anchor\{(?:[^\}]*)\}//g;
 s/\(?\@xref\{(?:[^\}]*)\}(?:[^.<]|(?:<[^<>]*>))*\.\)?//g;
 s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g;
 s/;\s+\@pxref\{(?:[^\}]*)\}//g;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm: vp8: mc: test unequal width/height for partitions

2016-11-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jul 10 
00:32:12 2016 +0200| [ec32574209f36467ef0d22c21a7e811ba98c15b6] | committer: 
Janne Grunau

checkasm: vp8: mc: test unequal width/height for partitions

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ec32574209f36467ef0d22c21a7e811ba98c15b6
---

 tests/checkasm/vp8dsp.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/checkasm/vp8dsp.c b/tests/checkasm/vp8dsp.c
index 3fe09c5..9733ff7 100644
--- a/tests/checkasm/vp8dsp.c
+++ b/tests/checkasm/vp8dsp.c
@@ -268,15 +268,17 @@ static void check_mc(void)
 LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]);
 LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
 VP8DSPContext d;
-int type, hsize, dx, dy;
+int type, k, dx, dy;
 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, 
ptrdiff_t, int, int, int);
 
 ff_vp78dsp_init(&d);
 
 for (type = 0; type < 2; type++) {
 vp8_mc_func (*tab)[3][3] = type ? d.put_vp8_bilinear_pixels_tab : 
d.put_vp8_epel_pixels_tab;
-for (hsize = 0; hsize < 3; hsize++) {
-int size = 16 >> hsize;
+for (k = 1; k < 8; k++) {
+int hsize  = k / 3;
+int size   = 16 >> hsize;
+int height = (size << 1) >> (k % 3);
 for (dy = 0; dy < 3; dy++) {
 for (dx = 0; dx < 3; dx++) {
 char str[100];
@@ -309,11 +311,11 @@ static void check_mc(void)
 src[i ] = val;
 src[i * SRC_BUF_STRIDE] = val;
 }
-call_ref(dst0, size, src, SRC_BUF_STRIDE, size, mx, 
my);
-call_new(dst1, size, src, SRC_BUF_STRIDE, size, mx, 
my);
-if (memcmp(dst0, dst1, size * size))
+call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, 
my);
+call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, 
my);
+if (memcmp(dst0, dst1, size * height))
 fail();
-bench_new(dst1, size, src, SRC_BUF_STRIDE, size, mx, 
my);
+bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, 
my);
 }
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm/arm: align the clobber check data properly for ldrd

2016-11-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jul 10 
13:23:13 2016 +0200| [8c816c0c9b12fdefd9046415e97df299880bc9b8] | committer: 
Janne Grunau

checkasm/arm: align the clobber check data properly for ldrd

Should fix the SIGBUS in the armv7-linux-clang-3.7 fate target.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8c816c0c9b12fdefd9046415e97df299880bc9b8
---

 tests/checkasm/arm/checkasm.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
index 160dba4..199c96a 100644
--- a/tests/checkasm/arm/checkasm.S
+++ b/tests/checkasm/arm/checkasm.S
@@ -22,7 +22,7 @@
 
 #include "libavutil/arm/asm.S"
 
-const register_init
+const register_init, align=3
 .quad 0x21f86d66c8ca00ce
 .quad 0x75b6ba21077c48ad
 .quad 0xed56bb2dcb3c7736

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] h2645_parse: handle embedded Annex B NAL units in size prefixed NAL units

2016-11-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Jul  7 
20:33:01 2016 +0200| [17c99b6158f2c6720af74e81ee727ee50d2e7e96] | committer: 
Janne Grunau

h2645_parse: handle embedded Annex B NAL units in size prefixed NAL units

Fixes a regression in ca2f19b9cc3 with some mov/mp4 files. The files have
several NAL units in the supposed single NAL unit after the size field.
Annex B start code prefixes are used to separate them. The first NAL unit
is correctly parsed but the buffer does not point to the next size field.
Instead semi random data (it seems to be the rbsp_stop_one_bit and the
start code prefix) is then parsed as length and will exceed the
remaining length of the buffer.

Patch based on the code in h264's decode_nal_units() and a similar
patch by Hendrik Leppkes in FFmpeg (a9bb4cf87d1).

Bug-Id: ffmpeg/trac5529
Reported-By: Vittorio Giovara

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=17c99b6158f2c6720af74e81ee727ee50d2e7e96
---

 libavcodec/h2645_parse.c | 63 +++-
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
index defe001..e252efa 100644
--- a/libavcodec/h2645_parse.c
+++ b/libavcodec/h2645_parse.c
@@ -195,11 +195,27 @@ static int h264_parse_nal_header(H2645NAL *nal, void 
*logctx)
 return 1;
 }
 
+static int find_next_start_code(const uint8_t *buf, const uint8_t *next_avc)
+{
+int i = 0;
+
+if (buf + 3 >= next_avc)
+return next_avc - buf;
+
+while (buf + i + 3 < next_avc) {
+if (buf[i] == 0 && buf[i + 1] == 0 && buf[i + 2] == 1)
+break;
+i++;
+}
+return i + 3;
+}
+
 int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
   void *logctx, int is_nalff, int nal_length_size,
   enum AVCodecID codec_id)
 {
 int consumed, ret = 0;
+const uint8_t *next_avc = buf + (is_nalff ? 0 : length);
 
 pkt->nb_nals = 0;
 while (length >= 4) {
@@ -207,29 +223,52 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t 
*buf, int length,
 int extract_length = 0;
 int skip_trailing_zeros = 1;
 
-if (is_nalff) {
+/*
+ * Only parse an AVC1 length field if one is expected at the current
+ * buffer position. There are unfortunately streams with multiple
+ * NAL units covered by the length field. Those NAL units are delimited
+ * by Annex B start code prefixes. ff_h2645_extract_rbsp() detects it
+ * correctly and consumes only the first NAL unit. The additional NAL
+ * units are handled here in the Annex B parsing code.
+ */
+if (buf == next_avc) {
 int i;
 for (i = 0; i < nal_length_size; i++)
 extract_length = (extract_length << 8) | buf[i];
-buf+= nal_length_size;
-length -= nal_length_size;
 
 if (extract_length > length) {
 av_log(logctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
 return AVERROR_INVALIDDATA;
 }
+buf += nal_length_size;
+length  -= nal_length_size;
+// keep track of the next AVC1 length field
+next_avc = buf + extract_length;
 } else {
-if (buf[2] == 0) {
-length--;
-buf++;
+/*
+ * expected to return immediately except for streams with mixed
+ * NAL unit coding
+ */
+int buf_index = find_next_start_code(buf, next_avc);
+
+buf+= buf_index;
+length -= buf_index;
+
+/*
+ * break if an AVC1 length field is expected at the current buffer
+ * position
+ */
+if (buf == next_avc)
 continue;
-}
-if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1)
-return AVERROR_INVALIDDATA;
 
-buf   += 3;
-length-= 3;
-extract_length = length;
+if (length > 0) {
+extract_length = length;
+} else if (pkt->nb_nals == 0) {
+av_log(logctx, AV_LOG_ERROR, "No NAL unit found\n");
+return AVERROR_INVALIDDATA;
+} else {
+break;
+}
 }
 
 if (pkt->nals_allocated < pkt->nb_nals + 1) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm: vp8.mc: initialize the full src buffer after ec32574209f

2016-11-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jul 12 
21:31:57 2016 +0200| [80fbb7becae530167373fe5178966b7d7604306e] | committer: 
Janne Grunau

checkasm: vp8.mc: initialize the full src buffer after ec32574209f

Fixes "Use of uninitialised value" valgrind warnings in checkasm.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=80fbb7becae530167373fe5178966b7d7604306e
---

 tests/checkasm/vp8dsp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkasm/vp8dsp.c b/tests/checkasm/vp8dsp.c
index 9733ff7..0260d63 100644
--- a/tests/checkasm/vp8dsp.c
+++ b/tests/checkasm/vp8dsp.c
@@ -247,7 +247,7 @@ static void check_luma_dc_wht(void)
 }
 
 #define SRC_BUF_STRIDE 32
-#define SRC_BUF_SIZE ((size + 5) * SRC_BUF_STRIDE)
+#define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
 // The mc subpixel interpolation filter needs the 2 previous pixels in either
 // direction, the +1 is to make sure the actual load addresses always are
 // unaligned.

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm/arm: preserve the stack alignment checkasm_checked_call

2016-11-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jul 12 
22:46:43 2016 +0200| [7b1ae0e73ab7f7c5eabc70dbe2e579127c6e154f] | committer: 
Janne Grunau

checkasm/arm: preserve the stack alignment checkasm_checked_call

The stack used by checkasm_checked_call_vfp was a multiple of 4 when the
checked function is called. AAPCS requires a double word (8 byte)
aligned stack public interfaces. Since both calls are public interfaces
the stack is misaligned when the checked is called.

Might fix the SIGBUS error in the armv7-linux-clang-3.7 fate config.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7b1ae0e73ab7f7c5eabc70dbe2e579127c6e154f
---

 tests/checkasm/arm/checkasm.S | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
index 199c96a..f004af3 100644
--- a/tests/checkasm/arm/checkasm.S
+++ b/tests/checkasm/arm/checkasm.S
@@ -42,6 +42,9 @@ endconst
 
 #define ARG_STACK 4*(MAX_ARGS - 2)
 
+@ align the used stack space to 8 to preserve the stack alignment
+#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed)
+
 .macro clobbercheck variant
 .equ pushed, 4*9
 function checkasm_checked_call_\variant, export=1
@@ -59,10 +62,10 @@ function checkasm_checked_call_\variant, export=1
 .endif
 ldm r12, {r4-r11}
 
-sub sp,  sp,  #ARG_STACK
+sub sp,  sp,  #ARG_STACK_A
 .equ pos, 0
 .rept MAX_ARGS-2
-ldr r12, [sp, #ARG_STACK + pushed + 8 + pos]
+ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
 str r12, [sp, #pos]
 .equ pos, pos + 4
 .endr
@@ -70,9 +73,9 @@ function checkasm_checked_call_\variant, export=1
 mov r12, r0
 mov r0,  r2
 mov r1,  r3
-ldrdr2,  r3,  [sp, #ARG_STACK + pushed]
+ldrdr2,  r3,  [sp, #ARG_STACK_A + pushed]
 blx r12
-add sp,  sp,  #ARG_STACK
+add sp,  sp,  #ARG_STACK_A
 
 push{r0, r1}
 movrel  r12, register_init

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne};

2017-01-14 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jan 10 
00:15:08 2017 +0200| [cb220eeef9bfe889769dc4e08248b0a59d24e2a9] | committer: 
Michael Niedermayer

aarch64: vp9: loop filter: replace 'orr; cbn?z' with 'adds; b.{eq,ne};

The latter is 1 cycle faster on a cortex-53 and since the operands are
bytewise (or larger) bitmask (impossible to overflow to zero) both are
equivalent.

This is cherrypicked from libav commit
e7ae8f7a715843a5089d18e033afb3ee19ab3057.

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cb220eeef9bfe889769dc4e08248b0a59d24e2a9
---

 libavcodec/aarch64/vp9lpf_neon.S | 31 ---
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index 78aae61..55e1964 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -218,13 +218,15 @@
 xtn_sz  v5, v6.8h,  v7.8h,  \sz
 and v4\sz,  v4\sz,  v5\sz // fm
 
+// If no pixels need filtering, just exit as soon as possible
 mov x5,  v4.d[0]
 .ifc \sz, .16b
 mov x6,  v4.d[1]
-orr x5,  x5,  x6
-.endif
-// If no pixels need filtering, just exit as soon as possible
+addsx5,  x5,  x6
+b.eq9f
+.else
 cbz x5,  9f
+.endif
 
 .if \wd >= 8
 moviv0\sz,  #1
@@ -344,15 +346,17 @@
 bit v22\sz, v0\sz,  v5\sz   // if (!hev && fm && 
!flat8in)
 bit v25\sz, v2\sz,  v5\sz
 
+// If no pixels need flat8in, jump to flat8out
+// (or to a writeout of the inner 4 pixels, for wd=8)
 .if \wd >= 8
 mov x5,  v6.d[0]
 .ifc \sz, .16b
 mov x6,  v6.d[1]
-orr x5,  x5,  x6
-.endif
-// If no pixels need flat8in, jump to flat8out
-// (or to a writeout of the inner 4 pixels, for wd=8)
+addsx5,  x5,  x6
+b.eq6f
+.else
 cbz x5,  6f
+.endif
 
 // flat8in
 uaddl_sz\tmp1\().8h, \tmp2\().8h,  v20, v21, \sz
@@ -406,20 +410,25 @@
 mov x5,  v2.d[0]
 .ifc \sz, .16b
 mov x6,  v2.d[1]
-orr x5,  x5,  x6
+adds x5,  x5,  x6
+b.ne1f
+.else
+cbnzx5,  1f
 .endif
 // If no pixels needed flat8in nor flat8out, jump to a
 // writeout of the inner 4 pixels
-cbnzx5,  1f
 br  x14
 1:
+
 mov x5,  v7.d[0]
 .ifc \sz, .16b
 mov x6,  v7.d[1]
-orr x5,  x5,  x6
+adds x5,  x5,  x6
+b.ne1f
+.else
+cbnzx5,  1f
 .endif
 // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
-cbnzx5,  1f
 br  x15
 
 1:

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: vp9: use alternative returns in the core loop filter function

2017-01-14 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jan 10 
00:15:07 2017 +0200| [62ea07d797c503bc4b727e56d9c0f914a93c8ef6] | committer: 
Michael Niedermayer

aarch64: vp9: use alternative returns in the core loop filter function

Since aarch64 has enough free general purpose registers use them to
branch to the appropiate storage code. 1-2 cycles faster for the
functions using loop_filter 8/16, ... on a cortex-a53. Mixed results
(up to 2 cycles faster/slower) on a cortex-a57.

This is cherrypicked from libav commit
d7595de0b25e7064fd9e06dea5d0425536cef6dc.

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=62ea07d797c503bc4b727e56d9c0f914a93c8ef6
---

 libavcodec/aarch64/vp9lpf_neon.S | 48 +++-
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index e727a4d..78aae61 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -410,15 +410,19 @@
 .endif
 // If no pixels needed flat8in nor flat8out, jump to a
 // writeout of the inner 4 pixels
-cbz x5,  7f
+cbnzx5,  1f
+br  x14
+1:
 mov x5,  v7.d[0]
 .ifc \sz, .16b
 mov x6,  v7.d[1]
 orr x5,  x5,  x6
 .endif
 // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
-cbz x5,  8f
+cbnzx5,  1f
+br  x15
 
+1:
 // flat8out
 // This writes all outputs into v2-v17 (skipping v6 and v16).
 // If this part is skipped, the output is read from v21-v26 (which is 
the input
@@ -549,35 +553,24 @@ endfunc
 
 function vp9_loop_filter_8
 loop_filter 8,  .8b,  0,v16, v17, v18, v19, v28, v29, v30, v31
-mov x5,  #0
 ret
 6:
-mov x5,  #6
-ret
+br  x13
 9:
 br  x10
 endfunc
 
 function vp9_loop_filter_8_16b_mix
 loop_filter 8,  .16b, 88,   v16, v17, v18, v19, v28, v29, v30, v31
-mov x5,  #0
 ret
 6:
-mov x5,  #6
-ret
+br  x13
 9:
 br  x10
 endfunc
 
 function vp9_loop_filter_16
 loop_filter 16, .8b,  0,v8,  v9,  v10, v11, v12, v13, v14, v15
-mov x5,  #0
-ret
-7:
-mov x5,  #7
-ret
-8:
-mov x5,  #8
 ret
 9:
 ldp d8,  d9,  [sp], 0x10
@@ -589,13 +582,6 @@ endfunc
 
 function vp9_loop_filter_16_16b
 loop_filter 16, .16b, 0,v8,  v9,  v10, v11, v12, v13, v14, v15
-mov x5,  #0
-ret
-7:
-mov x5,  #7
-ret
-8:
-mov x5,  #8
 ret
 9:
 ldp d8,  d9,  [sp], 0x10
@@ -614,11 +600,14 @@ endfunc
 .endm
 
 .macro loop_filter_8
+// calculate alternative 'return' targets
+adr x13, 6f
 bl  vp9_loop_filter_8
-cbnzx5,  6f
 .endm
 
 .macro loop_filter_8_16b_mix mix
+// calculate alternative 'return' targets
+adr x13, 6f
 .if \mix == 48
 mov x11, #0x
 .elseif \mix == 84
@@ -627,21 +616,20 @@ endfunc
 mov x11, #0x
 .endif
 bl  vp9_loop_filter_8_16b_mix
-cbnzx5,  6f
 .endm
 
 .macro loop_filter_16
+// calculate alternative 'return' targets
+adr x14, 7f
+adr x15, 8f
 bl  vp9_loop_filter_16
-cmp x5,  7
-b.gt8f
-b.eq7f
 .endm
 
 .macro loop_filter_16_16b
+// calculate alternative 'return' targets
+adr x14, 7f
+adr x15, 8f
 bl  vp9_loop_filter_16_16b
-cmp x5,  7
-b.gt8f
-b.eq7f
 .endm
 
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: vp9itxfm: Simplify the stack alignment code

2017-01-14 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jan 10 
00:15:09 2017 +0200| [a71cd8439fd32fd83b7a9b9ac8d6f861846770c7] | committer: 
Michael Niedermayer

arm: vp9itxfm: Simplify the stack alignment code

This is one instruction less for thumb, and only have got
1/2 arm/thumb specific instructions.

This is cherrypicked from libav commit
e5b0fc170f85b00f7dd0ac514918fb5c95253d39.

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a71cd8439fd32fd83b7a9b9ac8d6f861846770c7
---

 libavcodec/arm/vp9itxfm_neon.S | 28 
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index 06470a3..d7a2654 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -791,15 +791,13 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, 
export=1
 .ifnc \txfm1\()_\txfm2,idct_idct
 vpush   {q4-q7}
 .endif
-mov r7,  sp
 
 @ Align the stack, allocate a temp buffer
-T   mov r12, sp
-T   bic r12, r12, #15
-T   sub r12, r12, #512
-T   mov sp,  r12
-A   bic sp,  sp,  #15
-A   sub sp,  sp,  #512
+T   mov r7,  sp
+T   and r7,  r7,  #15
+A   and r7,  sp,  #15
+add r7,  r7,  #512
+sub sp,  sp,  r7
 
 mov r4,  r0
 mov r5,  r1
@@ -828,7 +826,7 @@ A   sub sp,  sp,  #512
 bl  \txfm2\()16_1d_4x16_pass2_neon
 .endr
 
-mov sp,  r7
+add sp,  sp,  r7
 .ifnc \txfm1\()_\txfm2,idct_idct
 vpop{q4-q7}
 .endif
@@ -1117,15 +1115,13 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
 beq idct32x32_dc_add_neon
 push{r4-r7,lr}
 vpush   {q4-q7}
-mov r7,  sp
 
 @ Align the stack, allocate a temp buffer
-T   mov r12, sp
-T   bic r12, r12, #15
-T   sub r12, r12, #2048
-T   mov sp,  r12
-A   bic sp,  sp,  #15
-A   sub sp,  sp,  #2048
+T   mov r7,  sp
+T   and r7,  r7,  #15
+A   and r7,  sp,  #15
+add r7,  r7,  #2048
+sub sp,  sp,  r7
 
 mov r4,  r0
 mov r5,  r1
@@ -1143,7 +1139,7 @@ A   sub sp,  sp,  #2048
 bl  idct32_1d_4x32_pass2_neon
 .endr
 
-mov sp,  r7
+add sp,  sp,  r7
 vpop{q4-q7}
 pop {r4-r7,pc}
 endfunc

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm: arm: report the first clobbered register in checkasm_checked_call

2017-01-24 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Jul 14 
23:16:14 2016 +0200| [71a0472114574993df7035f4de9aa007e03817b8] | committer: 
Janne Grunau

checkasm: arm: report the first clobbered register in checkasm_checked_call

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=71a0472114574993df7035f4de9aa007e03817b8
---

 tests/checkasm/arm/checkasm.S | 64 ++-
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
index f004af3..989f613 100644
--- a/tests/checkasm/arm/checkasm.S
+++ b/tests/checkasm/arm/checkasm.S
@@ -33,8 +33,12 @@ const register_init, align=3
 .quad 0x249214109d5d1c88
 endconst
 
-const error_message
-.asciz "failed to preserve register"
+const error_message_fpscr
+.asciz "failed to preserve register FPSCR"
+const error_message_gpr
+.asciz "failed to preserve register r%d"
+const error_message_vfp
+.asciz "failed to preserve register d%d"
 endconst
 
 @ max number of args used by any asm function.
@@ -79,39 +83,42 @@ function checkasm_checked_call_\variant, export=1
 
 push{r0, r1}
 movrel  r12, register_init
-mov r3,  #0
 .ifc \variant, vfp
-.macro check_reg_vfp, dreg, inc=8
-ldrdr0,  r1,  [r12], #\inc
-vmovr2,  lr,  \dreg
-eor r0,  r0,  r2
-eor r1,  r1,  lr
-orr r3,  r3,  r0
-orr r3,  r3,  r1
+.macro check_reg_vfp, dreg, offset
+vldrd0,  [r12, #8 * (\offset)]
+veord0,  d0,  \dreg
+vmovr2,  r3,  d0
+orrsr2,  r2,  r3
+bne 4f
 .endm
 
-.irp n, 8, 9, 10, 11, 12, 13, 14
-check_reg_vfp d\n
+.irp n, 8, 9, 10, 11, 12, 13, 14, 15
+@ keep track of the checked double/SIMD register
+mov r1,  #\n
+check_reg_vfp d\n, \n-8
 .endr
-check_reg_vfp d15, -56
 .purgem check_reg_vfp
 
 fmrxr0,  FPSCR
-ldr r1,  [sp, #8]
-eor r0,  r0,  r1
+ldr r3,  [sp, #8]
+eor r0,  r0,  r3
 @ Ignore changes in the topmost 5 bits
-lsl r0,  r0,  #5
-orr r3,  r3,  r0
+lslsr0,  r0,  #5
+bne 3f
 .endif
 
+@ keep track of the checked GPR
+mov r1,  #4
 .macro check_reg reg1, reg2=
-ldrdr0,  r1,  [r12], #8
-eor r0,  r0,  \reg1
-orrsr3,  r3,  r0
+ldrdr2,  r3,  [r12], #8
+eorsr2,  r2,  \reg1
+bne 2f
+add r1,  r1,  #1
 .ifnb \reg2
-eor r1,  r1,  \reg2
-orrsr3,  r3,  r1
+eorsr3,  r3,  \reg2
+bne 2f
 .endif
+add r1,  r1,  #1
 .endm
 check_reg   r4,  r5
 check_reg   r6,  r7
@@ -124,9 +131,16 @@ function checkasm_checked_call_\variant, export=1
 check_reg   r10, r11
 .purgem check_reg
 
-beq 0f
-
-movrel  r0, error_message
+b   0f
+4:
+movrel  r0, error_message_vfp
+b   1f
+3:
+movrel  r0, error_message_fpscr
+b   1f
+2:
+movrel  r0, error_message_gpr
+1:
 blx X(checkasm_fail_func)
 0:
 pop {r0, r1}

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] cheackasm/arm: remove NEON instructions from checkasm_checked_call_vfp

2017-01-24 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jul 17 
10:43:10 2016 +0200| [59aeed93e4e928b884be72b8c267ff6b2785ab66] | committer: 
Janne Grunau

cheackasm/arm: remove NEON instructions from checkasm_checked_call_vfp

Fixes AS error on non NEON builds introduced in 71a04721145. Also
set the fpu directly to vfp in checkasm.S to cause build errors on NEON
builds.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=59aeed93e4e928b884be72b8c267ff6b2785ab66
---

 tests/checkasm/arm/checkasm.S | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
index 5aa92c5..2768bb3 100644
--- a/tests/checkasm/arm/checkasm.S
+++ b/tests/checkasm/arm/checkasm.S
@@ -22,6 +22,12 @@
 
 #include "libavutil/arm/asm.S"
 
+/* override fpu so that NEON instructions are rejected */
+#if HAVE_VFP
+.fpuvfp
+ELF .eabi_attribute 10, 0   @ suppress Tag_FP_arch
+#endif
+
 const register_init, align=3
 .quad 0x21f86d66c8ca00ce
 .quad 0x75b6ba21077c48ad
@@ -85,9 +91,10 @@ function checkasm_checked_call_\variant, export=1
 movrel  r12, register_init
 .ifc \variant, vfp
 .macro check_reg_vfp, dreg, offset
-vldrd0,  [r12, #8 * (\offset)]
-veord0,  d0,  \dreg
-vmovr2,  r3,  d0
+ldrdr2,  r3,  [r12, #8 * (\offset)]
+vmovr0,  lr,  \dreg
+eor r2,  r2,  r0
+eor r3,  r3,  lr
 orrsr2,  r2,  r3
 bne 4f
 .endm

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] h264/aarch64: sign extend int stride in loop filter asm

2019-02-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jan  1 
18:16:33 2019 +0100| [bb515e3a735f526ccb1068031e289eb5aeb69e22] | committer: 
Janne Grunau

h264/aarch64: sign extend int stride in loop filter asm

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bb515e3a735f526ccb1068031e289eb5aeb69e22
---

 libavcodec/aarch64/h264dsp_neon.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/aarch64/h264dsp_neon.S 
b/libavcodec/aarch64/h264dsp_neon.S
index 9b4610a4d4..60ffa24500 100644
--- a/libavcodec/aarch64/h264dsp_neon.S
+++ b/libavcodec/aarch64/h264dsp_neon.S
@@ -130,6 +130,7 @@ endfunc
 
 function ff_h264_h_loop_filter_luma_neon, export=1
 h264_loop_filter_start
+sxtwx1,  w1
 
 sub x0,  x0,  #4
 ld1 {v6.8B},  [x0], x1
@@ -210,6 +211,7 @@ endfunc
 
 function ff_h264_v_loop_filter_chroma_neon, export=1
 h264_loop_filter_start
+sxtwx1,  w1
 
 sub x0,  x0,  x1, lsl #1
 ld1 {v18.8B}, [x0], x1
@@ -228,6 +230,7 @@ endfunc
 
 function ff_h264_h_loop_filter_chroma_neon, export=1
 h264_loop_filter_start
+sxtwx1,  w1
 
 sub x0,  x0,  #2
 ld1 {v18.S}[0], [x0], x1

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm/h264: add loop filter tests

2019-02-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jan  1 
18:25:55 2019 +0100| [d7f4f5c4a18a0c9e62635cfa6fe8a9302b413c01] | committer: 
Janne Grunau

checkasm/h264: add loop filter tests

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d7f4f5c4a18a0c9e62635cfa6fe8a9302b413c01
---

 tests/checkasm/h264dsp.c | 124 +++
 1 file changed, 124 insertions(+)

diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index f355a72a74..706fc79397 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -28,6 +28,7 @@
 #include "libavutil/intreadwrite.h"
 
 static const uint32_t pixel_mask[3] = { 0x, 0x01ff01ff, 0x03ff03ff };
+static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f 
};
 
 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
 #define SIZEOF_COEF  (2 * ((bit_depth + 7) / 8))
@@ -312,9 +313,132 @@ static void check_idct_multiple(void)
 }
 }
 
+
+static void check_loop_filter(void)
+{
+LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
+LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
+LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
+H264DSPContext h;
+int bit_depth;
+int alphas[36], betas[36];
+int8_t tc0[36][4];
+
+declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, int stride,
+  int alpha, int beta, int8_t *tc0);
+
+for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+int i, j, a, c;
+uint32_t mask = pixel_mask_lf[bit_depth - 8];
+ff_h264dsp_init(&h, bit_depth, 1);
+for (i = 35, a = 255, c = 250; i >= 0; i--) {
+alphas[i] = a << (bit_depth - 8);
+betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+tc0[i][0] = tc0[i][3] = (c + 6) / 10;
+tc0[i][1] = (c + 7) / 15;
+tc0[i][2] = (c + 9) / 20;
+a = a*9/10;
+c = c*9/10;
+}
+
+#define CHECK_LOOP_FILTER(name, align, ...) \
+do {\
+if (check_func(h.name, #name "_%dbpp", bit_depth)) {\
+for (j = 0; j < 36; j++) {  \
+intptr_t off = 8 * 32 + (j & 15) * 4 * !align;  \
+for (i = 0; i < 1024; i+=4) {   \
+AV_WN32A(dst + i, rnd() & mask);\
+}   \
+memcpy(dst0, dst, 32 * 16 * 2); \
+memcpy(dst1, dst, 32 * 16 * 2); \
+\
+call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
+call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
+if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) {   \
+fprintf(stderr, #name ": j:%d, alpha:%d beta:%d " \
+"tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], 
\
+tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
+fail(); \
+}   \
+bench_new(dst1, 32, alphas[j], betas[j], tc0[j]);   \
+}   \
+}   \
+} while (0)
+
+CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1);
+CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0);
+CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0);
+CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1);
+CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0);
+CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0);
+#undef CHECK_LOOP_FILTER
+}
+}
+
+static void check_loop_filter_intra(void)
+{
+LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
+LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
+LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
+H264DSPContext h;
+int bit_depth;
+int alphas[36], betas[36];
+
+declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, int stride,
+  int alpha, int beta);
+
+for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+int i, j, a;
+uint32_t mask = pixel_mask_lf[bit_depth - 8];
+ff_h264dsp_init(&h, bit_depth, 1);
+for (i = 35, a = 255; i >= 0; i--) {
+alphas[i] = a << (bit_depth - 8);
+betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+a = a*9/10;
+}
+
+#define CHECK_LOOP_FILTER(name, align)  \
+do {  

[FFmpeg-cvslog] h264/aarch64: optimize neon loop filter

2019-02-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jan  1 
22:37:11 2019 +0100| [846c3d6aca5484904e60946c4fe8b8833bc07f92] | committer: 
Janne Grunau

h264/aarch64: optimize neon loop filter

Exit as soon as possible if no filtering will be done.

Improves the checkasm --bench cycle count on a Snapdragon 820e:
h264_h_loop_filter_luma_8bpp_c:  72.4 ->  72.5
h264_h_loop_filter_luma_8bpp_neon:   97.1 ->  56.3
h264_v_loop_filter_luma_8bpp_c: 174.0 -> 173.5
h264_v_loop_filter_luma_8bpp_neon:   62.9 ->  60.9
h264_h_loop_filter_chroma_8bpp_c:30.2 ->  30.3
h264_h_loop_filter_chroma_8bpp_neon: 51.6 ->  25.7
h264_v_loop_filter_chroma_8bpp_c:57.3 ->  57.3
h264_v_loop_filter_chroma_8bpp_neon: 28.0 ->  24.0

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=846c3d6aca5484904e60946c4fe8b8833bc07f92
---

 libavcodec/aarch64/h264dsp_neon.S | 33 +++--
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/libavcodec/aarch64/h264dsp_neon.S 
b/libavcodec/aarch64/h264dsp_neon.S
index 60ffa24500..b649f1d018 100644
--- a/libavcodec/aarch64/h264dsp_neon.S
+++ b/libavcodec/aarch64/h264dsp_neon.S
@@ -54,9 +54,12 @@
 uabdv17.16B, v20.16B, v16.16B   // abs(p2 - p0)
 and v21.16B, v21.16B, v28.16B
 uabdv19.16B,  v4.16B,  v0.16B   // abs(q2 - q0)
+and v21.16B, v21.16B, v30.16B  // < beta
+shrnv30.8b,  v21.8h,  #4
+mov x7, v30.d[0]
 cmhiv17.16B, v22.16B, v17.16B   // < beta
-and v21.16B, v21.16B, v30.16B
 cmhiv19.16B, v22.16B, v19.16B   // < beta
+cbz x7,  9f
 and v17.16B, v17.16B, v21.16B
 and v19.16B, v19.16B, v21.16B
 and v24.16B, v24.16B, v21.16B
@@ -124,7 +127,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
 st1 {v16.16B}, [x0], x1
 st1 {v0.16B},  [x0], x1
 st1 {v19.16B}, [x0]
-
+9:
 ret
 endfunc
 
@@ -174,32 +177,34 @@ function ff_h264_h_loop_filter_luma_neon, export=1
 st1 {v16.S}[3], [x0], x1
 st1 {v0.S}[3],  [x0], x1
 st1 {v19.S}[3], [x0], x1
-
+9:
 ret
 endfunc
 
 .macro  h264_loop_filter_chroma
 dup v22.8B, w2  // alpha
+dup v23.8B, w3  // beta
 uxtlv24.8H, v24.8B
 uabdv26.8B, v16.8B, v0.8B   // abs(p0 - q0)
-uxtlv4.8H,  v0.8B
 uabdv28.8B, v18.8B, v16.8B  // abs(p1 - p0)
+uabdv30.8B, v2.8B,  v0.8B   // abs(q1 - q0)
+cmhiv26.8B, v22.8B, v26.8B  // < alpha
+cmhiv28.8B, v23.8B, v28.8B  // < beta
+cmhiv30.8B, v23.8B, v30.8B  // < beta
+uxtlv4.8H,  v0.8B
+and v26.8B, v26.8B, v28.8B
 usubw   v4.8H,  v4.8H,  v16.8B
-sli v24.8H, v24.8H, #8
+and v26.8B, v26.8B, v30.8B
 shl v4.8H,  v4.8H,  #2
-uabdv30.8B, v2.8B,  v0.8B   // abs(q1 - q0)
+mov x2,  v26.d[0]
+sli v24.8H, v24.8H, #8
 uaddw   v4.8H,  v4.8H,  v18.8B
-cmhiv26.8B, v22.8B, v26.8B  // < alpha
+cbz x2,  9f
 usubw   v4.8H,  v4.8H,  v2.8B
-dup v22.8B, w3  // beta
 rshrn   v4.8B,  v4.8H,  #3
-cmhiv28.8B, v22.8B, v28.8B  // < beta
-cmhiv30.8B, v22.8B, v30.8B  // < beta
 sminv4.8B,  v4.8B,  v24.8B
 neg v25.8B, v24.8B
-and v26.8B, v26.8B, v28.8B
 smaxv4.8B,  v4.8B,  v25.8B
-and v26.8B, v26.8B, v30.8B
 uxtlv22.8H, v0.8B
 and v4.8B,  v4.8B,  v26.8B
 uxtlv28.8H, v16.8B
@@ -224,7 +229,7 @@ function ff_h264_v_loop_filter_chroma_neon, export=1
 sub x0,  x0,  x1, lsl #1
 st1 {v16.8B}, [x0], x1
 st1 {v0.8B},  [x0], x1
-
+9:
 ret
 endfunc
 
@@ -257,7 +262,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1
 st1 {v16.S}[1], [x0], x1
 st1 {v0.S}[1],  [x0], x1
 st1 {v2.S}[1],  [x0], x1
-
+9:
 ret
 endfunc
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] h264/aarch64: add intra loop filter neon asm

2019-02-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Aug 13 
20:43:19 2018 +0200| [28a8b5413b64b831dfb8650208bccd8b78360484] | committer: 
Janne Grunau

h264/aarch64: add intra loop filter neon asm

Add my neon asm from x264 relicensed under the LGPL 2.1 or later. Ported
(x264 uses nv12 chroma) and optimized.

Cycle count for checkasm --bench on a Snapdragon 820e:
h264_h_loop_filter_luma_intra_8bpp_c: 60.0
h264_h_loop_filter_luma_intra_8bpp_neon: 54.2
h264_v_loop_filter_luma_intra_8bpp_c: 148.3
h264_v_loop_filter_luma_intra_8bpp_neon: 73.8
h264_h_loop_filter_chroma_intra_8bpp_c: 27.8
h264_h_loop_filter_chroma_intra_8bpp_neon: 21.4
h264_h_loop_filter_chroma_mbaff_intra_8bpp_c: 15.8
h264_h_loop_filter_chroma_mbaff_intra_8bpp_neon: 15.7
h264_v_loop_filter_chroma_intra_8bpp_c: 45.8
h264_v_loop_filter_chroma_intra_8bpp_neon: 17.3

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=28a8b5413b64b831dfb8650208bccd8b78360484
---

 libavcodec/aarch64/h264dsp_init_aarch64.c |  16 ++
 libavcodec/aarch64/h264dsp_neon.S | 297 ++
 2 files changed, 313 insertions(+)

diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c 
b/libavcodec/aarch64/h264dsp_init_aarch64.c
index b106f11134..07bda2ff07 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -29,10 +29,20 @@ void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int 
stride, int alpha,
  int beta, int8_t *tc0);
 void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
  int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha,
+   int beta);
+void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha,
+   int beta);
 void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
 void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, int stride,
+ int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, int stride,
+ int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, int stride,
+   int alpha, int beta);
 
 void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
@@ -77,8 +87,14 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, 
const int bit_depth,
 if (have_neon(cpu_flags) && bit_depth == 8) {
 c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
 c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
+c->h264_v_loop_filter_luma_intra= 
ff_h264_v_loop_filter_luma_intra_neon;
+c->h264_h_loop_filter_luma_intra= 
ff_h264_h_loop_filter_luma_intra_neon;
+
 c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
 c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+c->h264_v_loop_filter_chroma_intra = 
ff_h264_v_loop_filter_chroma_intra_neon;
+c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
+c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
 
 c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
 c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
diff --git a/libavcodec/aarch64/h264dsp_neon.S 
b/libavcodec/aarch64/h264dsp_neon.S
index b649f1d018..448e575b8c 100644
--- a/libavcodec/aarch64/h264dsp_neon.S
+++ b/libavcodec/aarch64/h264dsp_neon.S
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2008 Mans Rullgard 
  * Copyright (c) 2013 Janne Grunau 
+ * Copyright (c) 2014 Janne Grunau 
  *
  * This file is part of Libav.
  *
@@ -181,6 +182,203 @@ function ff_h264_h_loop_filter_luma_neon, export=1
 ret
 endfunc
 
+
+.macro h264_loop_filter_start_intra
+orr w4,  w2,  w3
+cbnzw4,  1f
+ret
+1:
+sxtwx1,  w1
+dup v30.16b, w2// alpha
+dup v31.16b, w3// beta
+.endm
+
+.macro h264_loop_filter_luma_intra
+uabdv16.16b, v7.16b,  v0.16b// abs(p0 - q0)
+uabdv17.16b, v6.16b,  v7.16b// abs(p1 - p0)
+uabdv18.16b, v1.16b,  v0.16b// abs(q1 - q0)
+cmhiv19.16b, v30.16b, v16.16b   // < alpha
+cmhiv17.16b, v31.16b, v17.16b   // < beta
+cmhiv18.16b, v31.16b, v18.

[FFmpeg-cvslog] h264/x86: sign extend int stride in deblock functions

2019-02-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jan 27 
11:06:34 2019 +0100| [156ea66c91b1986a87916f187216978d686725f6] | committer: 
Janne Grunau

h264/x86: sign extend int stride in deblock functions

Fixes checkasm errors after adding the h264 deblock tests.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=156ea66c91b1986a87916f187216978d686725f6
---

 libavcodec/x86/h264_deblock.asm   | 8 
 libavcodec/x86/h264_deblock_10bit.asm | 9 +
 2 files changed, 17 insertions(+)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 33fd5a9dd7..4b9cf85d16 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -288,6 +288,7 @@ cextern pb_3
 ;-
 %macro DEBLOCK_LUMA 0
 cglobal deblock_v_luma_8, 5,5,10
+movsxdifnidn  r1, r1d
 movdm8, [r4] ; tc0
 lea r4, [r1*3]
 dec r2d; alpha-1
@@ -335,6 +336,7 @@ cglobal deblock_v_luma_8, 5,5,10
 INIT_MMX cpuname
 cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
 movsxd r7,  r1d
+movsxdifnidn  r1, r1d
 lear8,  [r7+r7*2]
 lear6,  [r0-4]
 lear5,  [r0-4+r8]
@@ -395,6 +397,7 @@ DEBLOCK_LUMA
 ; int8_t *tc0)
 ;-
 cglobal deblock_%1_luma_8, 5,5,8,2*%2
+movsxdifnidn  r1, r1d
 lea r4, [r1*3]
 dec r2 ; alpha-1
 neg r4
@@ -445,6 +448,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
 ;-
 INIT_MMX cpuname
 cglobal deblock_h_luma_8, 0,5,8,0x60+12
+movsxdifnidn  r1, r1d
 movr0, r0mp
 movr3, r1m
 lear4, [r3*3]
@@ -646,6 +650,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,0x10
 %else
 cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
 %endif
+movsxdifnidn  r1, r1d
 lea r4, [r1*4]
 lea r5, [r1*3] ; 3*stride
 dec r2d; alpha-1
@@ -703,6 +708,7 @@ INIT_MMX cpuname
 ;-
 cglobal deblock_h_luma_intra_8, 4,9,0,0x80
 movsxd r7,  r1d
+movsxdifnidn  r1, r1d
 lear8,  [r7*3]
 lear6,  [r0-4]
 lear5,  [r0-4+r8]
@@ -782,6 +788,7 @@ DEBLOCK_LUMA_INTRA v8
 INIT_MMX mmxext
 
 %macro CHROMA_V_START 0
+movsxdifnidn  r1, r1d
 decr2d  ; alpha-1
 decr3d  ; beta-1
 movt5, r0
@@ -790,6 +797,7 @@ INIT_MMX mmxext
 %endmacro
 
 %macro CHROMA_H_START 0
+movsxdifnidn  r1, r1d
 decr2d
 decr3d
 subr0, 2
diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index d049c62bf2..1a424b7f43 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -162,6 +162,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
 %define ms2 [rsp+mmsize*2]
 %define am  [rsp+mmsize*3]
 %define bm  [rsp+mmsize*4]
+movsxdifnidn  r1, r1d
 SUBrsp, pad
 shlr2d, 2
 shlr3d, 2
@@ -219,6 +220,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
 %define p2m [rsp+mmsize*4]
 %define am  [rsp+mmsize*5]
 %define bm  [rsp+mmsize*6]
+movsxdifnidn  r1, r1d
 SUBrsp, pad
 shlr2d, 2
 shlr3d, 2
@@ -349,6 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15
 %define mask0 m7
 %define mask1 m10
 %define mask2 m11
+movsxdifnidn  r1, r1d
 shlr2d, 2
 shlr3d, 2
 LOAD_ABm12, m13, r2d, r3d
@@ -377,6 +380,7 @@ cglobal deblock_v_luma_10, 5,5,15
 REP_RET
 
 cglobal deblock_h_luma_10, 5,7,15
+movsxdifnidn  r1, r1d
 shlr2d, 2
 shlr3d, 2
 LOAD_ABm12, m13, r2d, r3d
@@ -492,6 +496,7 @@ DEBLOCK_LUMA_64
 CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
 %assign i i+1
 %endrep
+movsxdifnidn  r1, r1d
 SUBrsp, pad
 %endmacro
 
@@ -615,6 +620,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
 %define q2 m13
 %define aa m5
 %define bb m14
+movsxdifnidn  r1, r1d
 lea r4, [r1*4]
 lea r5, [r1*3] ; 3*stride
 neg r4
@@ -668,6 +674,7 @@ cglobal deblock_h_luma_intra_10, 4,7,16
 %define p3 m4
 %define spill [rsp]
 %assign pad 24-(stack_offset&15)
+movsxdifnidn  r1, r1d
 SUB rsp, pad
 lea r4, [r1*4]
 lea r5, [r1*3] ; 3*stride
@@ -852,6 +859,7 @@ DEBLOCK_LUMA_INTRA
 ; int8_t *tc0)
 ;-
 cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
+movsxdifnidn  r1, r1d
 mov r5, r0
 sub r0, r1
 sub r0, r1
@@ -887,6 +895,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
 ;   i

[FFmpeg-cvslog] h264/arm64: implement missing 4:2:2 chroma loop filter neon functions

2019-03-14 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Feb 27 
21:51:27 2019 +0100| [186bd30aa3b6c2b29b4dbf18278700b572068b1e] | committer: 
Janne Grunau

h264/arm64: implement missing 4:2:2 chroma loop filter neon functions

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=186bd30aa3b6c2b29b4dbf18278700b572068b1e
---

 libavcodec/aarch64/h264dsp_init_aarch64.c | 18 +---
 libavcodec/aarch64/h264dsp_neon.S | 36 ++-
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c 
b/libavcodec/aarch64/h264dsp_init_aarch64.c
index 07bda2ff07..85fea8e040 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -37,10 +37,14 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int 
stride, int alpha,
int beta, int8_t *tc0);
 void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, int stride, int alpha,
+  int beta, int8_t *tc0);
 void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, int stride,
  int alpha, int beta);
 void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, int stride,
  int alpha, int beta);
+void ff_h264_h_loop_filter_chroma422_intra_neon(uint8_t *pix, int stride,
+int alpha, int beta);
 void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, int stride,
int alpha, int beta);
 
@@ -91,10 +95,18 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, 
const int bit_depth,
 c->h264_h_loop_filter_luma_intra= 
ff_h264_h_loop_filter_luma_intra_neon;
 
 c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
-c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
 c->h264_v_loop_filter_chroma_intra = 
ff_h264_v_loop_filter_chroma_intra_neon;
-c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
-c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
+
+if (chroma_format_idc <= 1) {
+c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
+c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
+} else {
+c->h264_h_loop_filter_chroma = 
ff_h264_h_loop_filter_chroma422_neon;
+c->h264_h_loop_filter_chroma_mbaff = 
ff_h264_h_loop_filter_chroma_neon;
+c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma422_intra_neon;
+c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
+}
 
 c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
 c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
diff --git a/libavcodec/aarch64/h264dsp_neon.S 
b/libavcodec/aarch64/h264dsp_neon.S
index 448e575b8c..bcce7e7da5 100644
--- a/libavcodec/aarch64/h264dsp_neon.S
+++ b/libavcodec/aarch64/h264dsp_neon.S
@@ -28,9 +28,9 @@
 ldr w6,  [x4]
 ccmpw3,  #0, #0, ne
 mov v24.S[0], w6
-and w6,  w6,  w6,  lsl #16
+and w8,  w6,  w6,  lsl #16
 b.eq1f
-andsw6,  w6,  w6,  lsl #8
+andsw8,  w8,  w8,  lsl #8
 b.ge2f
 1:
 ret
@@ -394,10 +394,10 @@ endfunc
 usubw   v4.8H,  v4.8H,  v16.8B
 and v26.8B, v26.8B, v30.8B
 shl v4.8H,  v4.8H,  #2
-mov x2,  v26.d[0]
+mov x8,  v26.d[0]
 sli v24.8H, v24.8H, #8
 uaddw   v4.8H,  v4.8H,  v18.8B
-cbz x2,  9f
+cbz x8,  9f
 usubw   v4.8H,  v4.8H,  v2.8B
 rshrn   v4.8B,  v4.8H,  #3
 sminv4.8B,  v4.8B,  v24.8B
@@ -436,6 +436,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1
 sxtwx1,  w1
 
 sub x0,  x0,  #2
+h_loop_filter_chroma420:
 ld1 {v18.S}[0], [x0], x1
 ld1 {v16.S}[0], [x0], x1
 ld1 {v0.S}[0],  [x0], x1
@@ -464,6 +465,19 @@ function ff_h264_h_loop_filter_chroma_neon, export=1
 ret
 endfunc
 
+function ff_h264_h_loop_filter_chroma422_neon, export=1
+sxtwx1,  w1
+h264_loop_filter_start
+add x5,  x0,  x1

[FFmpeg-cvslog] checkasm/h264: test 4:2:2 chroma loop filter functions

2019-03-14 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Feb 27 
20:51:48 2019 +0100| [f8abf7d4dfa0504f7f65e4f1fd9d22e01cb371cc] | committer: 
Janne Grunau

checkasm/h264: test 4:2:2 chroma loop filter functions

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f8abf7d4dfa0504f7f65e4f1fd9d22e01cb371cc
---

 tests/checkasm/h264dsp.c | 44 ++--
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index 706fc79397..ee07121ab4 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -341,9 +341,9 @@ static void check_loop_filter(void)
 c = c*9/10;
 }
 
-#define CHECK_LOOP_FILTER(name, align, ...) \
+#define CHECK_LOOP_FILTER(name, align, idc) \
 do {\
-if (check_func(h.name, #name "_%dbpp", bit_depth)) {\
+if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) {   \
 for (j = 0; j < 36; j++) {  \
 intptr_t off = 8 * 32 + (j & 15) * 4 * !align;  \
 for (i = 0; i < 1024; i+=4) {   \
@@ -355,7 +355,7 @@ static void check_loop_filter(void)
 call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
 call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
 if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) {   \
-fprintf(stderr, #name ": j:%d, alpha:%d beta:%d " \
+fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " 
\
 "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], 
\
 tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
 fail(); \
@@ -365,12 +365,16 @@ static void check_loop_filter(void)
 }   \
 } while (0)
 
-CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1);
-CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0);
-CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0);
-CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1);
-CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0);
-CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0);
+CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,);
+CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,);
+CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,);
+CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,);
+CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,);
+CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,);
+
+ff_h264dsp_init(&h, bit_depth, 2);
+CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422);
+CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422);
 #undef CHECK_LOOP_FILTER
 }
 }
@@ -397,9 +401,9 @@ static void check_loop_filter_intra(void)
 a = a*9/10;
 }
 
-#define CHECK_LOOP_FILTER(name, align)  \
+#define CHECK_LOOP_FILTER(name, align, idc) \
 do {\
-if (check_func(h.name, #name "_%dbpp", bit_depth)) {\
+if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) {   \
 for (j = 0; j < 36; j++) {  \
 intptr_t off = 8 * 32 + (j & 15) * 4 * !align;  \
 for (i = 0; i < 1024; i+=4) {   \
@@ -411,7 +415,7 @@ static void check_loop_filter_intra(void)
 call_ref(dst0 + off, 32, alphas[j], betas[j]);  \
 call_new(dst1 + off, 32, alphas[j], betas[j]);  \
 if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) {   \
-fprintf(stderr, #name ": j:%d, alpha:%d beta:%d\n", \
+fprintf(stderr, #name #idc ": j:%d, alpha:%d 
beta:%d\n", \
 j, alphas[j], betas[j]);\
 fail(); \
 }   \
@@ -420,12 +424,16 @@ static void check_loop_filter_intra(void)
 }   \
 } while (0)
 
-CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1);
-CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0);
-CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0);
-CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1);
-CHECK

[FFmpeg-cvslog] avcodec/arm64: fix inverted register order in transpose_4x4H

2015-12-18 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Fri Dec 18 
11:27:05 2015 +0100| [2dba0407fdb855bbe44c888232d58ddb2fd3a412] | committer: 
Michael Niedermayer

avcodec/arm64: fix inverted register order in transpose_4x4H

Fix related register order issue in ff_h264_idct_add_neon.

Found-by: zjh8890 <243186...@qq.com>

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2dba0407fdb855bbe44c888232d58ddb2fd3a412
---

 libavcodec/aarch64/h264idct_neon.S |4 ++--
 libavcodec/aarch64/neon.S  |4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/aarch64/h264idct_neon.S 
b/libavcodec/aarch64/h264idct_neon.S
index 04b5a47..91f1e77 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -37,8 +37,8 @@ function ff_h264_idct_add_neon, export=1
 sub v7.4H,  v16.4H, v3.4H
 add v0.4H,  v4.4H,  v6.4H
 add v1.4H,  v5.4H,  v7.4H
-sub v2.4H,  v4.4H,  v6.4H
-sub v3.4H,  v5.4H,  v7.4H
+sub v3.4H,  v4.4H,  v6.4H
+sub v2.4H,  v5.4H,  v7.4H
 
 transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
 
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index 619aec6..a227cbd 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -107,8 +107,8 @@
 .macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
 trn1\r4\().4H,  \r0\().4H,  \r1\().4H
 trn2\r5\().4H,  \r0\().4H,  \r1\().4H
-trn1\r7\().4H,  \r3\().4H,  \r2\().4H
-trn2\r6\().4H,  \r3\().4H,  \r2\().4H
+trn1\r7\().4H,  \r2\().4H,  \r3\().4H
+trn2\r6\().4H,  \r2\().4H,  \r3\().4H
 trn1\r0\().2S,  \r4\().2S,  \r7\().2S
 trn2\r3\().2S,  \r4\().2S,  \r7\().2S
 trn1\r1\().2S,  \r5\().2S,  \r6\().2S

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] libavutil: move FFALIGN macro from common.h to macros.h

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Dec 10 
21:49:30 2015 +0100| [50078c1c8070dd8d1c329e8117ff30ec72489039] | committer: 
Janne Grunau

libavutil: move FFALIGN macro from common.h to macros.h

Include macros.h explicitly in common.h so that external code using
FFALIGN does not break. It was already implicitly included through
version.h. Include macros.h in lls.h and internal.h for FFALIGN.
lls.h was including common.h only for FFALIGN and internal.h was
missing the include for FFALIGN. `make checkheaders` did not catch it
because it's an internal header.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=50078c1c8070dd8d1c329e8117ff30ec72489039
---

 libavutil/common.h   |2 +-
 libavutil/internal.h |1 +
 libavutil/lls.c  |2 ++
 libavutil/lls.h  |2 +-
 libavutil/macros.h   |2 ++
 5 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavutil/common.h b/libavutil/common.h
index 56556e7..7a43ccf 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -36,6 +36,7 @@
 #include 
 
 #include "attributes.h"
+#include "macros.h"
 #include "version.h"
 #include "libavutil/avconfig.h"
 
@@ -59,7 +60,6 @@
 
 #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
 #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
-#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
 
 /* misc math functions */
 
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 4264c4e..b9be333 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -37,6 +37,7 @@
 #include "config.h"
 #include "attributes.h"
 #include "dict.h"
+#include "macros.h"
 #include "pixfmt.h"
 
 #if ARCH_X86
diff --git a/libavutil/lls.c b/libavutil/lls.c
index 1298946..60d2b64 100644
--- a/libavutil/lls.c
+++ b/libavutil/lls.c
@@ -29,6 +29,8 @@
 #include 
 
 #include "attributes.h"
+#include "config.h"
+#include "internal.h"
 #include "version.h"
 #include "lls.h"
 
diff --git a/libavutil/lls.h b/libavutil/lls.h
index 9b2b3a4..3977e97 100644
--- a/libavutil/lls.h
+++ b/libavutil/lls.h
@@ -23,7 +23,7 @@
 #ifndef AVUTIL_LLS_H
 #define AVUTIL_LLS_H
 
-#include "common.h"
+#include "macros.h"
 #include "mem.h"
 #include "version.h"
 
diff --git a/libavutil/macros.h b/libavutil/macros.h
index bf3eb9b..3e7b005 100644
--- a/libavutil/macros.h
+++ b/libavutil/macros.h
@@ -45,4 +45,6 @@
 
 #define AV_PRAGMA(s) _Pragma(#s)
 
+#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
+
 #endif /* AVUTIL_MACROS_H */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm64: add cycle counter support

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Dec  3 
00:12:39 2015 +0100| [64034849dad8410bedbe1def4c533490fb85cc4a] | committer: 
Janne Grunau

arm64: add cycle counter support

The ISB (instruction synchronization barrier) might be too heavy for
START/STOPTIMER use but should be more accurate in checkasm where the
timing overhead is subtracted.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=64034849dad8410bedbe1def4c533490fb85cc4a
---

 libavutil/aarch64/timer.h |   44 
 libavutil/timer.h |4 +++-
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/libavutil/aarch64/timer.h b/libavutil/aarch64/timer.h
new file mode 100644
index 000..382cfd9
--- /dev/null
+++ b/libavutil/aarch64/timer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015 Janne Grunau 
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_AARCH64_TIMER_H
+#define AVUTIL_AARCH64_TIMER_H
+
+#include 
+#include "config.h"
+
+#if HAVE_INLINE_ASM
+
+#define AV_READ_TIME read_time
+
+static inline uint64_t read_time(void)
+{
+uint64_t cycle_counter;
+__asm__ volatile(
+"isb   \t\n"
+"mrs %0, pmccntr_el0   "
+: "=r"(cycle_counter) :: "memory" );
+
+return cycle_counter;
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVUTIL_AARCH64_TIMER_H */
diff --git a/libavutil/timer.h b/libavutil/timer.h
index 0d93d7c..cc4c5a5 100644
--- a/libavutil/timer.h
+++ b/libavutil/timer.h
@@ -38,7 +38,9 @@
 
 #include "log.h"
 
-#if   ARCH_ARM
+#if   ARCH_AARCH64
+#   include "aarch64/timer.h"
+#elif ARCH_ARM
 #   include "arm/timer.h"
 #elif ARCH_BFIN
 #   include "bfin/timer.h"

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] x86_64: int32_to_float_fmul_scalar sign extend integer length

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Dec  8 
16:20:19 2015 +0100| [5dfe4edad63971d669ae456b0bc40ef9364cca80] | committer: 
Janne Grunau

x86_64: int32_to_float_fmul_scalar sign extend integer length

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5dfe4edad63971d669ae456b0bc40ef9364cca80
---

 libavcodec/x86/fmtconvert.asm |3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 727daa9..b9a78a5 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -38,6 +38,9 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, 
len
 %elif ARCH_X86_32
 movss   m0, mulm
 %endif
+%if ARCH_X86_64
+movsxd  lenq, lend
+%endif
 SPLATD  m0
 shl lenq, 2
 add srcq, lenq

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: add a cpu flag for the VFPv2 vector mode

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Dec  9 
22:28:36 2015 +0100| [e2710e790c09e49e86baa58c6063af0097cc8cb0] | committer: 
Janne Grunau

arm: add a cpu flag for the VFPv2 vector mode

The vector mode was deprecated in ARMv7-A/VFPv3 and various cpu
implementations do not support it in hardware. Vector mode code will
depending the OS either be emulated in software or result in an illegal
instruction on cpus which does not support it. This was not really
problem in practice since NEON implementations of the same functions are
preferred. It will however become a problem for checkasm which tests
every cpu flag separately.

Since this is a cpu feature newer cpu do not support anymore the
behaviour of this flag differs from the other flags. It can be only
activated by runtime cpu feature selection.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e2710e790c09e49e86baa58c6063af0097cc8cb0
---

 libavcodec/arm/dcadsp_init_arm.c |4 ++--
 libavcodec/arm/fft_init_arm.c|2 +-
 libavcodec/arm/fmtconvert_init_arm.c |2 +-
 libavutil/arm/cpu.c  |4 
 libavutil/arm/cpu.h  |5 +
 libavutil/cpu.c  |2 ++
 libavutil/cpu.h  |1 +
 libavutil/version.h  |2 +-
 tests/checkasm/checkasm.c|1 +
 9 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index 5400484..252f4ae 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -59,7 +59,7 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
+if (have_vfp_vm(cpu_flags)) {
 s->lfe_fir[0]  = ff_dca_lfe_fir32_vfp;
 s->lfe_fir[1]  = ff_dca_lfe_fir64_vfp;
 s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
@@ -75,7 +75,7 @@ av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags))
+if (have_vfp_vm(cpu_flags))
 s->synth_filter_float = ff_synth_filter_float_vfp;
 if (have_neon(cpu_flags))
 s->synth_filter_float = ff_synth_filter_float_neon;
diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c
index bc143c1..6d6fa22 100644
--- a/libavcodec/arm/fft_init_arm.c
+++ b/libavcodec/arm/fft_init_arm.c
@@ -40,7 +40,7 @@ av_cold void ff_fft_init_arm(FFTContext *s)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
+if (have_vfp_vm(cpu_flags)) {
 s->fft_calc = ff_fft_calc_vfp;
 #if CONFIG_MDCT
 s->imdct_half   = ff_imdct_half_vfp;
diff --git a/libavcodec/arm/fmtconvert_init_arm.c 
b/libavcodec/arm/fmtconvert_init_arm.c
index 27d3c88..6a80bfb 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -38,7 +38,7 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, 
AVCodecContext *avctx
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (have_vfp(cpu_flags)) {
+if (have_vfp_vm(cpu_flags)) {
 if (!have_vfpv3(cpu_flags)) {
 c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
 c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c
index 8bdaa88..2effb72 100644
--- a/libavutil/arm/cpu.c
+++ b/libavutil/arm/cpu.c
@@ -131,6 +131,10 @@ int ff_get_cpu_flags_arm(void)
 if (flags & AV_CPU_FLAG_ARMV6T2)
 flags |= AV_CPU_FLAG_ARMV6;
 
+/* set the virtual VFPv2 vector mode flag */
+if ((flags & AV_CPU_FLAG_VFP) && !(flags & (AV_CPU_FLAG_VFPV3 | 
AV_CPU_FLAG_NEON)))
+flags |= AV_CPU_FLAG_VFP_VM;
+
 return flags;
 }
 
diff --git a/libavutil/arm/cpu.h b/libavutil/arm/cpu.h
index 224409a..5563fc1 100644
--- a/libavutil/arm/cpu.h
+++ b/libavutil/arm/cpu.h
@@ -30,6 +30,11 @@
 #define have_vfpv3(flags)   CPUEXT(flags, VFPV3)
 #define have_neon(flags)CPUEXT(flags, NEON)
 
+/* some functions use the VFPv2 vector mode which is deprecated in ARMv7-A
+ * and might trap on such CPU depending on the OS configuration */
+#define have_vfp_vm(flags)  \
+(have_armv6(flags) && ((flags) & AV_CPU_FLAG_VFP_VM))
+
 /* Some functions use the 'setend' instruction which is deprecated on ARMv8
  * and serializing on some ARMv7 cores. This macro ensures such functions
  * are only enabled on ARMv6. */
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index e24b9dd..5f04461 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -124,6 +124,7 @@ int av_parse_cpu_flags(const char *s)
 { "armv6",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6   

[FFmpeg-cvslog] arm64: convert dcadsp neon asm from arm

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sat Nov 28 
15:23:52 2015 +0100| [c33c1fa8af2b2e82418a06901b6ad17b3d61b73e] | committer: 
Janne Grunau

arm64: convert dcadsp neon asm from arm

~2% faster dts decoding overall.

cortex-a57   cortex-a53
dca_decode_hf_c:474.81659.9
dca_decode_hf_neon: 225.2 301.1
dca_lfe_fir0_c: 913.21537.7
dca_lfe_fir0_neon:  286.8 451.9
dca_lfe_fir1_c: 848.71711.5
dca_lfe_fir1_neon:  387.1 506.4

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c33c1fa8af2b2e82418a06901b6ad17b3d61b73e
---

 libavcodec/aarch64/Makefile  |2 +
 libavcodec/aarch64/dcadsp_init.c |   51 
 libavcodec/aarch64/dcadsp_neon.S |  169 ++
 libavcodec/dcadsp.c  |2 +
 libavcodec/dcadsp.h  |1 +
 5 files changed, 225 insertions(+)

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index d001b34..0b614a3 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -1,3 +1,4 @@
+OBJS-$(CONFIG_DCA_DECODER)  += aarch64/dcadsp_init.o
 OBJS-$(CONFIG_FFT)  += aarch64/fft_init_aarch64.o
 OBJS-$(CONFIG_H264CHROMA)   += aarch64/h264chroma_init_aarch64.o
 OBJS-$(CONFIG_H264DSP)  += aarch64/h264dsp_init_aarch64.o
@@ -15,6 +16,7 @@ OBJS-$(CONFIG_VORBIS_DECODER)   += 
aarch64/vorbisdsp_init.o
 
 ARMV8-OBJS-$(CONFIG_VIDEODSP)   += aarch64/videodsp.o
 
+NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o
 NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)  += aarch64/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o  
\
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c
new file mode 100644
index 000..ad91070
--- /dev/null
+++ b/libavcodec/aarch64/dcadsp_init.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard 
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/dcadsp.h"
+
+void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs);
+void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs);
+
+void ff_synth_filter_float_neon(FFTContext *imdct,
+float *synth_buf_ptr, int *synth_buf_offset,
+float synth_buf2[32], const float window[512],
+float out[32], const float in[32],
+float scale);
+
+void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
+   const int32_t vq_num[DCA_SUBBANDS],
+   const int8_t hf_vq[1024][32], intptr_t vq_offset,
+   int32_t scale[DCA_SUBBANDS][2],
+   intptr_t start, intptr_t end);
+
+av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
+{
+int cpu_flags = av_get_cpu_flags();
+
+if (have_neon(cpu_flags)) {
+s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
+s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
+s->decode_hf  = ff_decode_hf_neon;
+}
+}
diff --git a/libavcodec/aarch64/dcadsp_neon.S b/libavcodec/aarch64/dcadsp_neon.S
new file mode 100644
index 000..73196d9
--- /dev/null
+++ b/libavcodec/aarch64/dcadsp_neon.S
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard 
+ * Copyright (c) 2015 Janne Grunau 
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License 

[FFmpeg-cvslog] arm64: port synth_filter_float_neon from arm

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Dec  1 
13:37:41 2015 +0100| [705f5e5e155f6f280a360af220fc5b30cfcee702] | committer: 
Janne Grunau

arm64: port synth_filter_float_neon from arm

~25% faster dts decoding overall. The checkasm CPU cycles numbers are
not that useful since synth_filter_float() calls FFTContext.imdct_half().

 cortex-a57   cortex-a53
synth_filter_float_c:1866.2   3490.9
synth_filter_float_neon:  915.0   1531.5

With fftc.imdct_half forced to imdct_half_neon:
 cortex-a57   cortex-a53
synth_filter_float_c:1718.4   3025.3
synth_filter_float_neon:  926.2   1530.1

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=705f5e5e155f6f280a360af220fc5b30cfcee702
---

 libavcodec/aarch64/Makefile|3 +-
 libavcodec/aarch64/asm-offsets.h   |3 +
 libavcodec/aarch64/dcadsp_init.c   |   16 +
 libavcodec/aarch64/synth_filter_neon.S |  119 
 libavcodec/synth_filter.c  |8 ++-
 libavcodec/synth_filter.h  |1 +
 6 files changed, 147 insertions(+), 3 deletions(-)

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 0b614a3..2175578 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -16,7 +16,8 @@ OBJS-$(CONFIG_VORBIS_DECODER)   += 
aarch64/vorbisdsp_init.o
 
 ARMV8-OBJS-$(CONFIG_VIDEODSP)   += aarch64/videodsp.o
 
-NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o
+NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o   
\
+   aarch64/synth_filter_neon.o
 NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)  += aarch64/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o  
\
diff --git a/libavcodec/aarch64/asm-offsets.h b/libavcodec/aarch64/asm-offsets.h
index 45b5c40..60e32dd 100644
--- a/libavcodec/aarch64/asm-offsets.h
+++ b/libavcodec/aarch64/asm-offsets.h
@@ -27,4 +27,7 @@
 #define CELT_TMP0x10
 #define CELT_TWIDDLE(CELT_TMP + 0x8)// loaded as pair
 
+/* FFTContext */
+#define IMDCT_HALF  0x48
+
 #endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c
index ad91070..c66ec3f 100644
--- a/libavcodec/aarch64/dcadsp_init.c
+++ b/libavcodec/aarch64/dcadsp_init.c
@@ -22,7 +22,15 @@
 
 #include "libavutil/aarch64/cpu.h"
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
 #include "libavcodec/dcadsp.h"
+#include "libavcodec/fft.h"
+
+#include "asm-offsets.h"
+
+#if HAVE_NEON || HAVE_VFP
+AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
+#endif
 
 void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs);
@@ -49,3 +57,11 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
 s->decode_hf  = ff_decode_hf_neon;
 }
 }
+
+av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s)
+{
+int cpu_flags = av_get_cpu_flags();
+
+if (have_neon(cpu_flags))
+s->synth_filter_float = ff_synth_filter_float_neon;
+}
diff --git a/libavcodec/aarch64/synth_filter_neon.S 
b/libavcodec/aarch64/synth_filter_neon.S
new file mode 100644
index 000..9551bff
--- /dev/null
+++ b/libavcodec/aarch64/synth_filter_neon.S
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard 
+ * Copyright (c) 2015 Janne Grunau 
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm-offsets.h"
+
+#include "libavutil/aarch64/asm.S"
+
+.macro inner_loop
+ld1 {v29.4s},  [x9],  x15
+ld1 {v28.4s},  [x8],  x15
+ld1 {v30.4s},  [x10], x15
+ld1 {v31.4s},  [x11], x15
+rev64   v28.4s, v28.4s
+ld1 {v24.4s},  [x4],  x15
+ld1 {v25.4s},  [x5],  x15
+rev64   v31.4s, v31.4s
+ld1 {v26.4s},  [x6],  x1

[FFmpeg-cvslog] arm64: int32_to_float_fmul neon asm

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Dec  3 
11:04:29 2015 +0100| [a0fc780a2093784e8664f88205ee1b215e109cee] | committer: 
Janne Grunau

arm64: int32_to_float_fmul neon asm

3% faster dts decoding on a cortex-a57.

 cortex-a57   cortex-a53
int32_to_float_fmul_array8_c:1270.9   4475.6
int32_to_float_fmul_array8_neon:  328.6569.2
int32_to_float_fmul_scalar_c: 928.5   4119.6
int32_to_float_fmul_scalar_neon:  309.1524.1

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a0fc780a2093784e8664f88205ee1b215e109cee
---

 libavcodec/aarch64/Makefile  |2 +
 libavcodec/aarch64/fmtconvert_init.c |   43 +++
 libavcodec/aarch64/fmtconvert_neon.S |   76 ++
 libavcodec/fmtconvert.c  |   11 +++--
 libavcodec/fmtconvert.h  |1 +
 5 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 2175578..022ed84 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -1,5 +1,6 @@
 OBJS-$(CONFIG_DCA_DECODER)  += aarch64/dcadsp_init.o
 OBJS-$(CONFIG_FFT)  += aarch64/fft_init_aarch64.o
+OBJS-$(CONFIG_FMTCONVERT)   += aarch64/fmtconvert_init.o
 OBJS-$(CONFIG_H264CHROMA)   += aarch64/h264chroma_init_aarch64.o
 OBJS-$(CONFIG_H264DSP)  += aarch64/h264dsp_init_aarch64.o
 OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
@@ -19,6 +20,7 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP)   += aarch64/videodsp.o
 NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o   
\
aarch64/synth_filter_neon.o
 NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o
+NEON-OBJS-$(CONFIG_FMTCONVERT)  += aarch64/fmtconvert_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)  += aarch64/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o  
\
aarch64/h264idct_neon.o
diff --git a/libavcodec/aarch64/fmtconvert_init.c 
b/libavcodec/aarch64/fmtconvert_init.c
new file mode 100644
index 000..0a55a1b
--- /dev/null
+++ b/libavcodec/aarch64/fmtconvert_init.c
@@ -0,0 +1,43 @@
+/*
+ * ARM optimized Format Conversion Utils
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fmtconvert.h"
+
+void ff_int32_to_float_fmul_array8_neon(FmtConvertContext *c, float *dst,
+const int32_t *src, const float *mul,
+int len);
+void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src,
+float mul, int len);
+
+av_cold void ff_fmt_convert_init_aarch64(FmtConvertContext *c,
+ AVCodecContext *avctx)
+{
+int cpu_flags = av_get_cpu_flags();
+
+if (have_neon(cpu_flags)) {
+c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_neon;
+c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
+}
+}
diff --git a/libavcodec/aarch64/fmtconvert_neon.S 
b/libavcodec/aarch64/fmtconvert_neon.S
new file mode 100644
index 000..3b33c87
--- /dev/null
+++ b/libavcodec/aarch64/fmtconvert_neon.S
@@ -0,0 +1,76 @@
+/*
+ * ARM NEON optimised Format Conversion Utils
+ * Copyright (c) 2008 Mans Rullgard 
+ * Copyright (c) 2015 Janne Grunau  
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You shou

[FFmpeg-cvslog] arm: add ff_int32_to_float_fmul_array8_neon

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Dec  3 
16:17:32 2015 +0100| [90b1b9350c0a97c4065ae9054b83e57f48a0de1f] | committer: 
Janne Grunau

arm: add ff_int32_to_float_fmul_array8_neon

Quite a bit faster than int32_to_float_fmul_array8_c calling
ff_int32_to_float_fmul_scalar_neon through FmtConvertContext.
Number of cycles per int32_to_float_fmul_array8 call while decoding
padded.dts on exynos5422:

   before  after   change
cortex-a7: 1270 951-25%
cortex-a15: 434 285-34%

checkasm --bench cycle counts: cortex-a15   cortex-a7
int32_to_float_fmul_array8_c:  1730.4   4384.5
int32_to_float_fmul_array8_neon_c:  571.5   1694.3
int32_to_float_fmul_array8_neon:374.0   1448.8

Interesting are the differences between
int32_to_float_fmul_array8_neon_c and int32_to_float_fmul_array8_neon.
The former is current behaviour of calling
ff_int32_to_float_fmul_scalar_neon repeatedly from the c function,
The raw numbers differ since checkasm uses different lengths than the
dca decoder.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=90b1b9350c0a97c4065ae9054b83e57f48a0de1f
---

 libavcodec/arm/fmtconvert_init_arm.c |4 
 libavcodec/arm/fmtconvert_neon.S |   37 ++
 2 files changed, 41 insertions(+)

diff --git a/libavcodec/arm/fmtconvert_init_arm.c 
b/libavcodec/arm/fmtconvert_init_arm.c
index 6a80bfb..11396e8 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -25,6 +25,9 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/fmtconvert.h"
 
+void ff_int32_to_float_fmul_array8_neon(FmtConvertContext *c, float *dst,
+const int32_t *src, const float *mul,
+int len);
 void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src,
 float mul, int len);
 
@@ -46,6 +49,7 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, 
AVCodecContext *avctx
 }
 
 if (have_neon(cpu_flags)) {
+c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_neon;
 c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
 }
 }
diff --git a/libavcodec/arm/fmtconvert_neon.S b/libavcodec/arm/fmtconvert_neon.S
index 5e0ac68..5d48e3d 100644
--- a/libavcodec/arm/fmtconvert_neon.S
+++ b/libavcodec/arm/fmtconvert_neon.S
@@ -1,6 +1,7 @@
 /*
  * ARM NEON optimised Format Conversion Utils
  * Copyright (c) 2008 Mans Rullgard 
+ * Copyright (c) 2015 Janne Grunau  b
  *
  * This file is part of Libav.
  *
@@ -49,3 +50,39 @@ NOVFP   len .reqr3
 bx  lr
 .unreq  len
 endfunc
+
+function ff_int32_to_float_fmul_array8_neon, export=1
+ldr r0,  [sp]
+lsr r0,  r0,  #3
+subsr0,  r0,  #1
+beq 1f
+2:
+vld1.32 {q0-q1},   [r2,:128]!
+vld1.32 {q2-q3},   [r2,:128]!
+vld1.32 {d20}, [r3]!
+subsr0,  r0,  #2
+vcvt.f32.s32q0,  q0
+vcvt.f32.s32q1,  q1
+vdup.32 q8,  d20[0]
+vcvt.f32.s32q2,  q2
+vcvt.f32.s32q3,  q3
+vmul.f32q0,  q0,  q8
+vdup.32 q9,  d20[1]
+vmul.f32q1,  q1,  q8
+vmul.f32q2,  q2,  q9
+vmul.f32q3,  q3,  q9
+vst1.32 {q0-q1},   [r1,:128]!
+vst1.32 {q2-q3},   [r1,:128]!
+bgt 2b
+it  lt
+bxltlr
+1:
+vld1.32 {q0-q1},   [r2,:128]
+vld1.32 {d16[],d17[]},  [r3]
+vcvt.f32.s32q0,  q0
+vcvt.f32.s32q1,  q1
+vmul.f32q0,  q0,  q8
+vmul.f32q1,  q1,  q8
+vst1.32 {q0-q1},   [r1,:128]
+bx  lr
+endfunc

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm64: fix inverted register order in transpose_4x4H

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Fri Dec 18 
11:23:22 2015 +0100| [cc29d96d5a379dbcf2649947d884c202c2a52767] | committer: 
Janne Grunau

arm64: fix inverted register order in transpose_4x4H

Fix related register order issue in ff_h264_idct_add_neon.

Found-by: zjh8890 <243186...@qq.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cc29d96d5a379dbcf2649947d884c202c2a52767
---

 libavcodec/aarch64/h264idct_neon.S |4 ++--
 libavcodec/aarch64/neon.S  |4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/aarch64/h264idct_neon.S 
b/libavcodec/aarch64/h264idct_neon.S
index 99c2cb5..78f780a 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -37,8 +37,8 @@ function ff_h264_idct_add_neon, export=1
 sub v7.4H,  v16.4H, v3.4H
 add v0.4H,  v4.4H,  v6.4H
 add v1.4H,  v5.4H,  v7.4H
-sub v2.4H,  v4.4H,  v6.4H
-sub v3.4H,  v5.4H,  v7.4H
+sub v3.4H,  v4.4H,  v6.4H
+sub v2.4H,  v5.4H,  v7.4H
 
 transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
 
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index f1072b7..767bc9d 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -107,8 +107,8 @@
 .macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
 trn1\r4\().4H,  \r0\().4H,  \r1\().4H
 trn2\r5\().4H,  \r0\().4H,  \r1\().4H
-trn1\r7\().4H,  \r3\().4H,  \r2\().4H
-trn2\r6\().4H,  \r3\().4H,  \r2\().4H
+trn1\r7\().4H,  \r2\().4H,  \r3\().4H
+trn2\r6\().4H,  \r2\().4H,  \r3\().4H
 trn1\r0\().2S,  \r4\().2S,  \r7\().2S
 trn2\r3\().2S,  \r4\().2S,  \r7\().2S
 trn1\r1\().2S,  \r5\().2S,  \r6\().2S

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] x86: checkasm: check for or handle missing cleanup after MMX instructions

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Fri Dec 11 
14:06:38 2015 +0100| [711781d7a1714ea4eb0217eb1ba04811978c43d1] | committer: 
Janne Grunau

x86: checkasm: check for or handle missing cleanup after MMX instructions

Not every asm routine is expected clear the MMX state after returning.
It is however a requisite for testing floating point code in checkasm.
Annotate functions requiring cleanup with declare_func_emms() and issue
emms after the call. The remaining functions are checked for having  a
cleared MMX state after return.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=711781d7a1714ea4eb0217eb1ba04811978c43d1
---

 tests/checkasm/checkasm.h   |   16 +++-
 tests/checkasm/h264pred.c   |8 ++--
 tests/checkasm/h264qpel.c   |2 +-
 tests/checkasm/x86/checkasm.asm |   78 +--
 4 files changed, 78 insertions(+), 26 deletions(-)

diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index c1206e7..6fc30ca 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -26,6 +26,7 @@
 #include 
 #include "config.h"
 #include "libavutil/avstring.h"
+#include "libavutil/cpu.h"
 #include "libavutil/lfg.h"
 #include "libavutil/timer.h"
 
@@ -54,6 +55,7 @@ static av_unused void *func_ref, *func_new;
 /* Declare the function prototype. The first argument is the return value, the 
remaining
  * arguments are the function parameters. Naming parameters is optional. */
 #define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret 
func_type(__VA_ARGS__)
+#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, 
ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
 
 /* Indicate that the current test has failed */
 #define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
@@ -65,8 +67,12 @@ static av_unused void *func_ref, *func_new;
 #define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__)
 
 #if ARCH_X86 && HAVE_YASM
-/* Verifies that clobbered callee-saved registers are properly saved and 
restored */
+/* Verifies that clobbered callee-saved registers are properly saved and 
restored
+ * and that either no MMX registers are touched or emms is issued */
 void checkasm_checked_call(void *func, ...);
+/* Verifies that clobbered callee-saved registers are properly saved and 
restored
+ * and issues emms for asm functions which are not required to do so */
+void checkasm_checked_call_emms(void *func, ...);
 
 #if ARCH_X86_64
 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended 
to 64-bit.
@@ -81,16 +87,24 @@ void checkasm_checked_call(void *func, ...);
 void checkasm_stack_clobber(uint64_t clobber, ...);
 #define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, 
int, __VA_ARGS__)\
   = (void *)checkasm_checked_call;
+#define declare_new_emms(cpu_flags, ret, ...) \
+ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \
+((cpu_flags) & av_get_cpu_flags()) ? (void 
*)checkasm_checked_call_emms : \
+ (void *)checkasm_checked_call;
 #define CLOB (UINT64_C(0xdeadbeefdeadbeef))
 #define call_new(...) 
(checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
   
CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
   checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
 #elif ARCH_X86_32
 #define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void 
*)checkasm_checked_call;
+#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, 
__VA_ARGS__) = \
+((cpu_flags) & av_get_cpu_flags()) ? (void 
*)checkasm_checked_call_emms :\
+ (void *)checkasm_checked_call;
 #define call_new(...) checked_call(func_new, __VA_ARGS__)
 #endif
 #else
 #define declare_new(ret, ...)
+#define declare_new_emms(cpu_flags, ret, ...)
 /* Call the function */
 #define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
 #endif
diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c
index a1ee720..6dffa34 100644
--- a/tests/checkasm/h264pred.c
+++ b/tests/checkasm/h264pred.c
@@ -144,7 +144,7 @@ static void check_pred4x4(H264PredContext *h, uint8_t 
*buf0, uint8_t *buf1,
 if (chroma_format == 1) {
 uint8_t *topright = buf0 + 2*16;
 int pred_mode;
-declare_func(void, uint8_t *src, const uint8_t *topright, ptrdiff_t 
stride);
+declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t 
*src, const uint8_t *topright, ptrdiff_t stride);
 
 for (pred_mode = 0; pred_mode < 15; pred_mode++) {
 if (check_pred_func(h->pred4x4[pred_mode], "4x4", 
pred4x4_modes[codec][pred_mode])) {
@@ -163,7 +163,7 @@ static void check_pred8

[FFmpeg-cvslog] checkasm: add float comparison util functions

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Dec  7 
16:14:46 2015 +0100| [9d218d573f8088c606d873e80df572582e6773ef] | committer: 
Janne Grunau

checkasm: add float comparison util functions

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9d218d573f8088c606d873e80df572582e6773ef
---

 tests/checkasm/checkasm.c |   73 +
 tests/checkasm/checkasm.h |   11 +++
 2 files changed, 84 insertions(+)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 37bc139..becfe35 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -27,6 +27,7 @@
 #include "checkasm.h"
 #include "libavutil/common.h"
 #include "libavutil/cpu.h"
+#include "libavutil/intfloat.h"
 #include "libavutil/random_seed.h"
 
 #if HAVE_IO_H
@@ -151,6 +152,78 @@ static struct {
 /* PRNG state */
 AVLFG checkasm_lfg;
 
+/* float compare support code */
+static int is_negative(union av_intfloat32 u)
+{
+return u.i >> 31;
+}
+
+int float_near_ulp(float a, float b, unsigned max_ulp)
+{
+union av_intfloat32 x, y;
+
+x.f = a;
+y.f = b;
+
+if (is_negative(x) != is_negative(y)) {
+// handle -0.0 == +0.0
+return a == b;
+}
+
+if (abs(x.i - y.i) <= max_ulp)
+return 1;
+
+return 0;
+}
+
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+ unsigned len)
+{
+unsigned i;
+
+for (i = 0; i < len; i++) {
+if (!float_near_ulp(a[i], b[i], max_ulp))
+return 0;
+}
+return 1;
+}
+
+int float_near_abs_eps(float a, float b, float eps)
+{
+float abs_diff = fabsf(a - b);
+
+return abs_diff < eps;
+}
+
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+ unsigned len)
+{
+unsigned i;
+
+for (i = 0; i < len; i++) {
+if (!float_near_abs_eps(a[i], b[i], eps))
+return 0;
+}
+return 1;
+}
+
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp)
+{
+return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
+}
+
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+ unsigned max_ulp, unsigned len)
+{
+unsigned i;
+
+for (i = 0; i < len; i++) {
+if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
+return 0;
+}
+return 1;
+}
+
 /* Print colored text to stderr if the terminal supports it */
 static void color_printf(int color, const char *fmt, ...)
 {
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 6fc30ca..4a4cce4 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -42,6 +42,17 @@ void checkasm_fail_func(const char *msg, ...) 
av_printf_format(1, 2);
 void checkasm_update_bench(int iterations, uint64_t cycles);
 void checkasm_report(const char *name, ...) av_printf_format(1, 2);
 
+/* float compare utilities */
+int float_near_ulp(float a, float b, unsigned max_ulp);
+int float_near_abs_eps(float a, float b, float eps);
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+ unsigned len);
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+ unsigned len);
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+ unsigned max_ulp, unsigned len);
+
 extern AVLFG checkasm_lfg;
 #define rnd() av_lfg_get(&checkasm_lfg)
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm: add tests for dcadsp

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Dec  7 
01:23:47 2015 +0100| [e71b747e9dc56cb84f8a06ec8214d5f3bd98bb6d] | committer: 
Janne Grunau

checkasm: add tests for dcadsp

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e71b747e9dc56cb84f8a06ec8214d5f3bd98bb6d
---

 tests/checkasm/Makefile   |1 +
 tests/checkasm/checkasm.c |3 +
 tests/checkasm/checkasm.h |1 +
 tests/checkasm/dcadsp.c   |  137 +
 4 files changed, 142 insertions(+)

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 75c9a18..a7d13d5 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -1,5 +1,6 @@
 # libavcodec tests
 AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o
+AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o
 AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
 AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o
 AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index becfe35..a563eaf 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -61,6 +61,9 @@ static const struct {
 #if CONFIG_BSWAPDSP
 { "bswapdsp", checkasm_check_bswapdsp },
 #endif
+#if CONFIG_DCA_DECODER
+{ "dcadsp", checkasm_check_dcadsp },
+#endif
 #if CONFIG_H264PRED
 { "h264pred", checkasm_check_h264pred },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 4a4cce4..eb8b6dd 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -31,6 +31,7 @@
 #include "libavutil/timer.h"
 
 void checkasm_check_bswapdsp(void);
+void checkasm_check_dcadsp(void);
 void checkasm_check_h264pred(void);
 void checkasm_check_h264qpel(void);
 void checkasm_check_hevc_mc(void);
diff --git a/tests/checkasm/dcadsp.c b/tests/checkasm/dcadsp.c
new file mode 100644
index 000..1665cbb
--- /dev/null
+++ b/tests/checkasm/dcadsp.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include 
+#include 
+
+#include "libavutil/internal.h"
+#include "libavutil/intfloat.h"
+#include "libavcodec/dca.h"
+#include "libavcodec/dcadsp.h"
+#include "libavcodec/dcadata.h"
+
+#include "checkasm.h"
+
+#define randomize_lfe_fir(size) \
+do {\
+int i;  \
+for (i = 0; i < size; i++) {\
+float f = (float)rnd() / (UINT_MAX >> 1) - 1.0f;\
+in[i] = f;  \
+}   \
+for (i = 0; i < 256; i++) { \
+float f = (float)rnd() / (UINT_MAX >> 1) - 1.0f;\
+coeffs[i] = f;  \
+}   \
+} while (0)
+
+#define check_lfe_fir(decifactor, eps)  \
+do {\
+LOCAL_ALIGNED_16(float, in, [256 / decifactor]);\
+LOCAL_ALIGNED_16(float, out0,   [decifactor * 2]);  \
+LOCAL_ALIGNED_16(float, out1,   [decifactor * 2]);  \
+LOCAL_ALIGNED_16(float, coeffs, [256]); \
+int i;  \
+const float * in_ptr = in + (256 / decifactor) - 1; \
+declare_func(void, float *out, const float *in, const float *coeffs); \
+/* repeat the test several times */ \
+for (i = 0; i < 32; i++) {  \
+int j;  \
+memset(out0,0, sizeof(*out0) * 2 * decifactor); \
+memset(out1, 0xFF, sizeof(*out1) * 2 * decifactor); \
+randomize_lfe_fir(256 / decifactor);\
+call_ref(out0, in_ptr, coeffs); 

[FFmpeg-cvslog] checkasm: add synth_filter test

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Dec  7 
23:38:46 2015 +0100| [568a4323fbde03665b2b23a98068d02b39121812] | committer: 
Janne Grunau

checkasm: add synth_filter test

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=568a4323fbde03665b2b23a98068d02b39121812
---

 tests/checkasm/Makefile   |2 +-
 tests/checkasm/checkasm.c |1 +
 tests/checkasm/checkasm.h |1 +
 tests/checkasm/synth_filter.c |  121 +
 4 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index a7d13d5..9bd13ac 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -1,6 +1,6 @@
 # libavcodec tests
 AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o
-AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o
+AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o synth_filter.o
 AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
 AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o
 AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index a563eaf..c61e4d4 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -63,6 +63,7 @@ static const struct {
 #endif
 #if CONFIG_DCA_DECODER
 { "dcadsp", checkasm_check_dcadsp },
+{ "synth_filter", checkasm_check_synth_filter },
 #endif
 #if CONFIG_H264PRED
 { "h264pred", checkasm_check_h264pred },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index eb8b6dd..a599dba 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -35,6 +35,7 @@ void checkasm_check_dcadsp(void);
 void checkasm_check_h264pred(void);
 void checkasm_check_h264qpel(void);
 void checkasm_check_hevc_mc(void);
+void checkasm_check_synth_filter(void);
 void checkasm_check_v210enc(void);
 
 void *checkasm_check_func(void *func, const char *name, ...) 
av_printf_format(2, 3);
diff --git a/tests/checkasm/synth_filter.c b/tests/checkasm/synth_filter.c
new file mode 100644
index 000..157400b
--- /dev/null
+++ b/tests/checkasm/synth_filter.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "libavutil/internal.h"
+#include "libavutil/intfloat.h"
+#include "libavcodec/dcadata.h"
+#include "libavcodec/synth_filter.h"
+
+#include "checkasm.h"
+
+#define BUF_SIZE 32
+
+#define randomize_input()   \
+do {\
+int i;  \
+for (i = 0; i < BUF_SIZE; i++) {\
+float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+in[i] = f;  \
+}   \
+} while (0)
+
+void checkasm_check_synth_filter(void)
+{
+FFTContext imdct;
+SynthFilterContext synth;
+
+ff_mdct_init(&imdct, 6, 1, 1.0);
+ff_synth_filter_init(&synth);
+
+if (check_func(synth.synth_filter_float, "synth_filter_float")) {
+LOCAL_ALIGNED(32, float,   out0,   [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   out1,   [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   out_b,  [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   in, [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   buf2_0, [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   buf2_1, [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   buf2_b, [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   buf0,   [512]);
+LOCAL_ALIGNED(32, float,   buf1,   [512]);
+LOCAL_ALIGNED(32, float,   buf_b,  [512]);
+float scale = 1.0f;
+int i, offset0 = 0, offset1 = 0, offset_b = 0;
+
+declare_func(void, FFTContext *, float *, int *, float[32], const 
float[512],
+ float[32], float[32], float);
+
+memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE);
+memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE);
+memset(buf2_b, 0, sizeof(*buf2_b) * BUF_SIZE);
+memset(buf0, 0, sizeof(*buf2_0) * 512);
+memse

[FFmpeg-cvslog] checkasm: add fmtconvert tests

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Dec  8 
16:24:57 2015 +0100| [489e6add4478b0f5717dbf644234c6f3a3baf02c] | committer: 
Janne Grunau

checkasm: add fmtconvert tests

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=489e6add4478b0f5717dbf644234c6f3a3baf02c
---

 tests/checkasm/Makefile |1 +
 tests/checkasm/checkasm.c   |3 ++
 tests/checkasm/checkasm.h   |1 +
 tests/checkasm/fmtconvert.c |  105 +++
 4 files changed, 110 insertions(+)

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 9bd13ac..a66fc73 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -1,6 +1,7 @@
 # libavcodec tests
 AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o
 AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o synth_filter.o
+AVCODECOBJS-$(CONFIG_FMTCONVERT)   += fmtconvert.o
 AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
 AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o
 AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index c61e4d4..d6f8ffc 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -65,6 +65,9 @@ static const struct {
 { "dcadsp", checkasm_check_dcadsp },
 { "synth_filter", checkasm_check_synth_filter },
 #endif
+#if CONFIG_FMTCONVERT
+{ "fmtconvert", checkasm_check_fmtconvert },
+#endif
 #if CONFIG_H264PRED
 { "h264pred", checkasm_check_h264pred },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index a599dba..0bc66b9 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -32,6 +32,7 @@
 
 void checkasm_check_bswapdsp(void);
 void checkasm_check_dcadsp(void);
+void checkasm_check_fmtconvert(void);
 void checkasm_check_h264pred(void);
 void checkasm_check_h264qpel(void);
 void checkasm_check_hevc_mc(void);
diff --git a/tests/checkasm/fmtconvert.c b/tests/checkasm/fmtconvert.c
new file mode 100644
index 000..1a843b0
--- /dev/null
+++ b/tests/checkasm/fmtconvert.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include 
+#include 
+
+#include "libavutil/internal.h"
+#include "libavutil/common.h"
+#include "libavcodec/fmtconvert.h"
+
+#include "checkasm.h"
+
+#define BUF_SIZE 1024
+
+#define randomize_input(len)\
+do {\
+int k;  \
+for (k = 0; k < len; k++) { \
+in[k] = rnd() - INT32_MAX; \
+}   \
+for ( ; k < BUF_SIZE; k++) {\
+in[k] = INT32_MAX;  \
+}   \
+} while (0)
+
+void checkasm_check_fmtconvert(void)
+{
+FmtConvertContext c;
+LOCAL_ALIGNED(32, float,   dst0, [BUF_SIZE]);
+LOCAL_ALIGNED(32, float,   dst1, [BUF_SIZE]);
+LOCAL_ALIGNED(32, int32_t, in,   [BUF_SIZE]);
+float scale_arr[128];
+int length[] = {8, 16, 24, 56, 72, 128, 512, 520, 656, 768, 992};
+int i, j;
+
+for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++)
+scale_arr[i] = (FF_ARRAY_ELEMS(scale_arr) - FF_ARRAY_ELEMS(scale_arr) 
/ 2) / 13;
+
+ff_fmt_convert_init(&c, NULL);
+
+memset(dst0, 0, sizeof(*dst0) * BUF_SIZE);
+memset(dst1, 0, sizeof(*dst1) * BUF_SIZE);
+
+if (check_func(c.int32_to_float_fmul_scalar, 
"int32_to_float_fmul_scalar")) {
+declare_func(void, float *, const int32_t *, float, int);
+
+for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++) {
+for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
+
+randomize_input(length[j]);
+
+call_ref(dst0, in, scale_arr[i], length[j]);
+call_new(dst1, in, scale_arr[i], length[j]);
+
+if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
+fail();
+break;
+}
+
+  

[FFmpeg-cvslog] x86: zero extend the 32-bit length in int32_to_float_fmul_scalar implicitly

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Dec 22 
22:45:42 2015 +0100| [f4f27e4cf1013c55b2c7df359ce8d58ee922662c] | committer: 
Janne Grunau

x86: zero extend the 32-bit length in int32_to_float_fmul_scalar implicitly

This reverts commit 5dfe4edad63971d669ae456b0bc40ef9364cca80.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f4f27e4cf1013c55b2c7df359ce8d58ee922662c
---

 libavcodec/x86/fmtconvert.asm |5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index b9a78a5..0383322 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -38,11 +38,8 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, 
len
 %elif ARCH_X86_32
 movss   m0, mulm
 %endif
-%if ARCH_X86_64
-movsxd  lenq, lend
-%endif
 SPLATD  m0
-shl lenq, 2
+shl lend, 2
 add srcq, lenq
 add dstq, lenq
 neg lenq

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm: x86: post commit review fixes

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Dec 22 
22:51:55 2015 +0100| [f0f54117c8f206e8045d301c2eb975b26e9f263d] | committer: 
Janne Grunau

checkasm: x86: post commit review fixes

Check the full FPU tag word instead of only the lower half and simplify
the comparison.
Use upper-case function base name as macro name to instantiate both
checked_call variants.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f0f54117c8f206e8045d301c2eb975b26e9f263d
---

 tests/checkasm/x86/checkasm.asm |   20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm
index 147d7a7..52d10ae 100644
--- a/tests/checkasm/x86/checkasm.asm
+++ b/tests/checkasm/x86/checkasm.asm
@@ -98,7 +98,7 @@ cglobal stack_clobber, 1,2
 ; void checkasm_checked_call(void *func, ...)
 ;-
 INIT_XMM
-%macro check_call 0-1
+%macro CHECKED_CALL 0-1
 cglobal checked_call%1, 2,15,16,max_args*8+8
 mov  t0, r0
 
@@ -171,9 +171,8 @@ cglobal checked_call%1, 2,15,16,max_args*8+8
 .clobber_ok:
 %ifnid %1, _emms
 fstenv [rsp]
-mov  r9h, [rsp + 8]
-add  r9h, 1
-jz   .emms_ok
+cmp  word [rsp + 8], 0x
+je   .emms_ok
 report_fail error_message_emms
 emms
 .emms_ok:
@@ -201,7 +200,7 @@ cglobal checked_call%1, 2,15,16,max_args*8+8
 mov  eax, r3
 %endmacro
 
-%macro check_call 0-1
+%macro CHECKED_CALL 0-1
 ;-
 ; void checkasm_checked_call(void *func, ...)
 ;-
@@ -225,10 +224,9 @@ cglobal checked_call%1, 1,7
 report_fail error_message
 .clobber_ok:
 %ifnid %1, _emms
-fstenv [rsp]
-mov  r3h, [rsp + 8]
-add  r3h, 1
-jz   .emms_ok
+fstenv [esp]
+cmp  word [esp + 8], 0x
+je   .emms_ok
 report_fail error_message_emms
 emms
 .emms_ok:
@@ -241,5 +239,5 @@ cglobal checked_call%1, 1,7
 
 %endif ; ARCH_X86_64
 
-check_call
-check_call _emms
+CHECKED_CALL
+CHECKED_CALL _emms

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] x86: use emms after ff_int32_to_float_fmul_scalar_sse

2016-01-02 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Dec 29 
12:08:38 2015 +0100| [8563f9887194b07c972c3475d6b51592d77f73f7] | committer: 
Janne Grunau

x86: use emms after ff_int32_to_float_fmul_scalar_sse

Intel's Instruction Set Reference (as of September 2015) clearly states
that cvtpi2ps switches to MMX state. Actual CPUs do not switch if the
source is a memory location. The Instruction Set Reference from 1999
(Order Number 243191) describes this behaviour but all later versions
I've seen have make no distinction whether MMX registers or memory is
used as source.
The documentation for the matching SSE2 instruction to convert to double
(cvtpi2pd) was fixed (see the valgrind bug
https://bugs.kde.org/show_bug.cgi?id=210264).

It will take time to get a clarification and fixes in place. In the
meantime it makes sense to change ff_int32_to_float_fmul_scalar_sse to
be correct according to the documentation. The vast majority of users
will have SSE2 so a change to the SSE version has little effect.

Fixes fate-checkasm on x86 valgrind targets.

Valgrind 'bug' reported as https://bugs.kde.org/show_bug.cgi?id=357059

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8563f9887194b07c972c3475d6b51592d77f73f7
---

 libavcodec/x86/fmtconvert.asm |9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 0383322..2a3e4a5 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -61,7 +61,14 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, 
len
 mova  [dstq+lenq+16], m2
 add lenq, 32
 jl .loop
-REP_RET
+%if notcpuflag(sse2)
+;; cvtpi2ps switches to MMX even if the source is a memory location
+;; possible an error in documentation since every tested CPU disagrees with
+;; that. Use emms anyway since the vast majority of machines will use the
+;; SSE2 variant
+emms
+%endif
+RET
 %endmacro
 
 INIT_XMM sse

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] mpjpgdec: free AVIOContext leak on early probe fail

2015-06-08 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Jun  8 
13:31:04 2015 +0200| [caf7be30b11288c498fae67be4741bfbf083d977] | committer: 
Janne Grunau

mpjpgdec: free AVIOContext leak on early probe fail

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=caf7be30b11288c498fae67be4741bfbf083d977
---

 libavformat/mpjpegdec.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/mpjpegdec.c b/libavformat/mpjpegdec.c
index 354278c..72891e7 100644
--- a/libavformat/mpjpegdec.c
+++ b/libavformat/mpjpegdec.c
@@ -88,7 +88,7 @@ static int mpjpeg_read_probe(AVProbeData *p)
 return AVERROR(ENOMEM);
 
 if (p->buf_size < 2 || p->buf[0] != '-' || p->buf[1] != '-')
-return 0;
+goto end;
 
 while (!pb->eof_reached) {
 ret = get_line(pb, line, sizeof(line));
@@ -101,7 +101,7 @@ static int mpjpeg_read_probe(AVProbeData *p)
 break;
 }
 }
-
+end:
 av_free(pb);
 
 return ret;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aac_parser: add required padding for GetBitContext buffer

2015-06-09 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Jun  8 
14:45:12 2015 +0200| [fb1473080223a634b8ac2cca48a632d037a0a69d] | committer: 
Janne Grunau

aac_parser: add required padding for GetBitContext buffer

Fixes stack buffer overflow errors detected by address sanitizer in
various fate tests.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fb1473080223a634b8ac2cca48a632d037a0a69d
---

 libavcodec/aac_parser.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c
index fdaa5f8..acb05d4 100644
--- a/libavcodec/aac_parser.c
+++ b/libavcodec/aac_parser.c
@@ -34,7 +34,7 @@ static int aac_sync(uint64_t state, AACAC3ParseContext 
*hdr_info,
 int size;
 union {
 uint64_t u64;
-uint8_t  u8[8];
+uint8_t  u8[8 + FF_INPUT_BUFFER_PADDING_SIZE];
 } tmp;
 
 tmp.u64 = av_be2ne64(state);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] ac3_parser: add required padding for GetBitContext buffer

2015-06-09 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Jun  8 
14:48:26 2015 +0200| [09447f2b0fafac6d9565aab82a4c5f16fc99ee5e] | committer: 
Janne Grunau

ac3_parser: add required padding for GetBitContext buffer

Fixes stack buffer overflow errors detected by address sanitizer in
various fate tests.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=09447f2b0fafac6d9565aab82a4c5f16fc99ee5e
---

 libavcodec/ac3_parser.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c
index 5ea09f8..69d88c1 100644
--- a/libavcodec/ac3_parser.c
+++ b/libavcodec/ac3_parser.c
@@ -150,7 +150,7 @@ static int ac3_sync(uint64_t state, AACAC3ParseContext 
*hdr_info,
 int err;
 union {
 uint64_t u64;
-uint8_t  u8[8];
+uint8_t  u8[8 + FF_INPUT_BUFFER_PADDING_SIZE];
 } tmp = { av_be2ne64(state) };
 AC3HeaderInfo hdr;
 GetBitContext gbc;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] imc: add required padding for GetBitContext buffer

2015-06-09 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Jun  8 
14:48:54 2015 +0200| [210921722bf828b3b895ebcbc34374e6c4452c6f] | committer: 
Janne Grunau

imc: add required padding for GetBitContext buffer

Fixes stack buffer overflow errors detected by address sanitizer in
fate-imc.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=210921722bf828b3b895ebcbc34374e6c4452c6f
---

 libavcodec/imc.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 411bc85..c60fc7a 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -997,7 +997,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void 
*data,
 
 IMCContext *q = avctx->priv_data;
 
-LOCAL_ALIGNED_16(uint16_t, buf16, [IMC_BLOCK_SIZE / 2]);
+LOCAL_ALIGNED_16(uint16_t, buf16, [(IMC_BLOCK_SIZE + 
FF_INPUT_BUFFER_PADDING_SIZE) / 2]);
 
 if (buf_size < IMC_BLOCK_SIZE * avctx->channels) {
 av_log(avctx, AV_LOG_ERROR, "frame too small!\n");

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] movenc: fixes a questionable valgrind uninitialized value warning

2015-06-10 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Jun  9 
12:27:47 2015 +0200| [529c05698e88b057f0bea61e0d85f2b42925b5ea] | committer: 
Janne Grunau

movenc: fixes a questionable valgrind uninitialized value warning

display_matrix_size is only initialized when av_stream_get_side_data()
returns a side data pointer. The code is safe since the only effect this
has is setting the display_matrix pointer to NULL which it was already
anyway.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=529c05698e88b057f0bea61e0d85f2b42925b5ea
---

 libavformat/movenc.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 30d397a..761c3e8 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1518,7 +1518,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, 
MOVMuxContext *mov,
 
 display_matrix = (uint32_t*)av_stream_get_side_data(st, 
AV_PKT_DATA_DISPLAYMATRIX,
 
&display_matrix_size);
-if (display_matrix_size < 9 * sizeof(*display_matrix))
+if (display_matrix && display_matrix_size < 9 * 
sizeof(*display_matrix))
 display_matrix = NULL;
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm64: constify src in h264qpel dsp function definitions

2015-06-24 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jun 14 
20:53:56 2015 +0200| [c2de2cf0d2927f3f584dab6d54276fbda92a0a71] | committer: 
Janne Grunau

arm64: constify src in h264qpel dsp function definitions

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c2de2cf0d2927f3f584dab6d54276fbda92a0a71
---

 libavcodec/aarch64/h264qpel_init_aarch64.c |  128 ++--
 1 file changed, 64 insertions(+), 64 deletions(-)

diff --git a/libavcodec/aarch64/h264qpel_init_aarch64.c 
b/libavcodec/aarch64/h264qpel_init_aarch64.c
index 4beb11b..74088b2 100644
--- a/libavcodec/aarch64/h264qpel_init_aarch64.c
+++ b/libavcodec/aarch64/h264qpel_init_aarch64.c
@@ -27,73 +27,73 @@
 #include "libavutil/aarch64/cpu.h"
 #include "libavcodec/h264qpel.h"
 
-void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
-void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
 
-void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
-void ff_put_h264_qpel8_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride);
+void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, pt

[FFmpeg-cvslog] libvpx: fix test for VPX_IMAGE_ABI_VERSION

2015-06-24 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Jun 24 
08:39:40 2015 +0200| [41740ef8be6ec409f7eff3932ddba9a9eeec27b1] | committer: 
Janne Grunau

libvpx: fix test for VPX_IMAGE_ABI_VERSION

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=41740ef8be6ec409f7eff3932ddba9a9eeec27b1
---

 libavcodec/libvpx.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c
index 4cc050d..49f966d 100644
--- a/libavcodec/libvpx.c
+++ b/libavcodec/libvpx.c
@@ -40,7 +40,7 @@ enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img)
 case VPX_IMG_FMT_I422:  return AV_PIX_FMT_YUV422P;
 case VPX_IMG_FMT_I444:  return AV_PIX_FMT_YUV444P;
 case VPX_IMG_FMT_444A:  return AV_PIX_FMT_YUVA444P;
-#ifdef VPX_IMAGE_ABI_VERSION >= 3
+#if VPX_IMAGE_ABI_VERSION >= 3
 case VPX_IMG_FMT_I440:  return AV_PIX_FMT_YUV440P;
 case VPX_IMG_FMT_I42016:return AV_PIX_FMT_YUV420P16BE;
 case VPX_IMG_FMT_I42216:return AV_PIX_FMT_YUV422P16BE;
@@ -68,7 +68,7 @@ vpx_img_fmt_t ff_vpx_pixfmt_to_imgfmt(enum AVPixelFormat pix)
 case AV_PIX_FMT_YUV422P:  return VPX_IMG_FMT_I422;
 case AV_PIX_FMT_YUV444P:  return VPX_IMG_FMT_I444;
 case AV_PIX_FMT_YUVA444P: return VPX_IMG_FMT_444A;
-#ifdef VPX_IMAGE_ABI_VERSION >= 3
+#if VPX_IMAGE_ABI_VERSION >= 3
 case AV_PIX_FMT_YUV440P:  return VPX_IMG_FMT_I440;
 case AV_PIX_FMT_YUV420P16BE:  return VPX_IMG_FMT_I42016;
 case AV_PIX_FMT_YUV422P16BE:  return VPX_IMG_FMT_I42216;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec: add missing CODEC_CAP_DR1 to codecs using get_buffer()

2015-07-01 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jun 28 
14:58:47 2015 +0200| [007e27d363ba7d994019dc897dc9c39071bb204a] | committer: 
Janne Grunau

avcodec: add missing CODEC_CAP_DR1 to codecs using get_buffer()

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=007e27d363ba7d994019dc897dc9c39071bb204a
---

 libavcodec/atrac3plusdec.c |1 +
 libavcodec/jpeg2000dec.c   |2 +-
 libavcodec/sp5xdec.c   |1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/atrac3plusdec.c b/libavcodec/atrac3plusdec.c
index ddbfb53..3e061af 100644
--- a/libavcodec/atrac3plusdec.c
+++ b/libavcodec/atrac3plusdec.c
@@ -387,6 +387,7 @@ AVCodec ff_atrac3p_decoder = {
 .long_name= NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform 
Acoustic Coding 3+)"),
 .type = AVMEDIA_TYPE_AUDIO,
 .id   = AV_CODEC_ID_ATRAC3P,
+.capabilities = CODEC_CAP_DR1,
 .priv_data_size   = sizeof(ATRAC3PContext),
 .init = atrac3p_decode_init,
 .init_static_data = ff_atrac3p_init_vlcs,
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 5135297..69dc566 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1470,7 +1470,7 @@ AVCodec ff_jpeg2000_decoder = {
 .long_name= NULL_IF_CONFIG_SMALL("JPEG 2000"),
 .type = AVMEDIA_TYPE_VIDEO,
 .id   = AV_CODEC_ID_JPEG2000,
-.capabilities = CODEC_CAP_FRAME_THREADS,
+.capabilities = CODEC_CAP_FRAME_THREADS | CODEC_CAP_DR1,
 .priv_data_size   = sizeof(Jpeg2000DecoderContext),
 .init_static_data = jpeg2000_init_static_data,
 .init = jpeg2000_decode_init,
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 08bdbc0..ddf728f 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -116,5 +116,6 @@ AVCodec ff_amv_decoder = {
 .init   = ff_mjpeg_decode_init,
 .close  = ff_mjpeg_decode_end,
 .decode = sp5x_decode_frame,
+.capabilities   = CODEC_CAP_DR1,
 .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] g2meet: use an unsigned type for the djb hash

2015-07-01 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Jul  1 
13:34:50 2015 +0200| [4d6c40a6d0ce85e96a6e37f558236e2a6a75] | committer: 
Janne Grunau

g2meet: use an unsigned type for the djb hash

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4d6c40a6d0ce85e96a6e37f558236e2a6a75
---

 libavcodec/g2meet.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c
index d0b36f0..600e2b2 100644
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c
@@ -399,7 +399,7 @@ static inline int log2_ceil(uint32_t x)
 /* improved djb2 hash from http://www.cse.yorku.ca/~oz/hash.html */
 static int djb2_hash(uint32_t key)
 {
-int h = 5381;
+uint32_t h = 5381;
 
 h = (h * 33) ^ ((key >> 24) & 0xFF); // xxx: probably not needed at all
 h = (h * 33) ^ ((key >> 16) & 0xFF);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] g2meet: use av_ceil_log2 instead of a custom function

2015-07-01 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Jul  1 
13:58:34 2015 +0200| [9eec23b8a7fd0f91827bbc3ed0792c39a8cc9a8a] | committer: 
Janne Grunau

g2meet: use av_ceil_log2 instead of a custom function

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9eec23b8a7fd0f91827bbc3ed0792c39a8cc9a8a
---

 libavcodec/g2meet.c |   12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c
index 600e2b2..af27a5d 100644
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c
@@ -386,16 +386,6 @@ static int jpg_decode_data(JPGContext *c, int width, int 
height,
 #define G_shift  8
 #define B_shift  0
 
-static inline int log2_ceil(uint32_t x)
-{
-int c = 0;
-
-for (--x; x > 0; x >>= 1)
-c++;
-
-return c;
-}
-
 /* improved djb2 hash from http://www.cse.yorku.ca/~oz/hash.html */
 static int djb2_hash(uint32_t key)
 {
@@ -701,7 +691,7 @@ static int epic_decode_run_length(ePICContext *dc, int x, 
int y, int tile_width,
 if (!(above_row[pos] == pix))
 break;
 run = pos - start_pos - 1;
-idx = log2_ceil(run);
+idx = av_ceil_log2(run);
 if (ff_els_decode_bit(&dc->els_ctx, &dc->prev_row_rung[idx]))
 *pRun += run;
 else {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] g2meet: force simple idct for identical results over all fate configs

2015-07-01 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Jul  1 
15:33:20 2015 +0200| [f91fe24e9bd6912c29bbb03d8afe878e045f9721] | committer: 
Janne Grunau

g2meet: force simple idct for identical results over all fate configs

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f91fe24e9bd6912c29bbb03d8afe878e045f9721
---

 tests/fate/screen.mak |6 +++---
 tests/ref/fate/g2m2   |   24 ++---
 tests/ref/fate/g2m3   |   40 +--
 tests/ref/fate/g2m4   |   56 -
 4 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/tests/fate/screen.mak b/tests/fate/screen.mak
index 14c5089..cc29d22 100644
--- a/tests/fate/screen.mak
+++ b/tests/fate/screen.mak
@@ -30,13 +30,13 @@ FATE_SAMPLES_AVCONV-$(call DEMDEC, AVI, FRAPS) += 
$(FATE_FRAPS)
 fate-fraps: $(FATE_FRAPS)
 
 FATE_G2M += fate-g2m2
-fate-g2m2: CMD = framecrc -i $(TARGET_SAMPLES)/g2m/g2m2.asf -an
+fate-g2m2: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m2.asf -an
 
 FATE_G2M += fate-g2m3
-fate-g2m3: CMD = framecrc -i $(TARGET_SAMPLES)/g2m/g2m3.asf -frames:v 20
+fate-g2m3: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m3.asf 
-frames:v 20
 
 FATE_G2M += fate-g2m4
-fate-g2m4: CMD = framecrc -i $(TARGET_SAMPLES)/g2m/g2m4.asf
+fate-g2m4: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m4.asf
 
 FATE_SAMPLES_AVCONV-$(call DEMDEC, ASF, G2M) += $(FATE_G2M)
 fate-g2m: $(FATE_G2M)
diff --git a/tests/ref/fate/g2m2 b/tests/ref/fate/g2m2
index f117b48..710dbd1 100644
--- a/tests/ref/fate/g2m2
+++ b/tests/ref/fate/g2m2
@@ -147,15 +147,15 @@
 0,   8531,   8531,0,  2359296, 0x47874e4f
 0,   8547,   8547,0,  2359296, 0xdead4e4f
 0,   8562,   8562,0,  2359296, 0x847e4e4f
-0,   9344,   9344,0,  2359296, 0x614ce46d
-0,   9345,   9345,0,  2359296, 0x8dece312
-0,   9876,   9876,0,  2359296, 0xbdf9e34e
-0,   9922,   9922,0,  2359296, 0x3e21e50a
-0,   9938,   9938,0,  2359296, 0xf348e4a4
-0,   9954,   9954,0,  2359296, 0x8178e415
-0,   9955,   9955,0,  2359296, 0xf0b5e199
-0,   9969,   9969,0,  2359296, 0x5a33e00e
-0,   9970,   9970,0,  2359296, 0xaceddf05
-0,   9985,   9985,0,  2359296, 0xca09e023
-0,   9986,   9986,0,  2359296, 0xeb8be0c0
-0,  10001,  10001,0,  2359296, 0x6a0fdf28
+0,   9344,   9344,0,  2359296, 0x1a13e47c
+0,   9345,   9345,0,  2359296, 0x46b3e321
+0,   9876,   9876,0,  2359296, 0x76c0e35d
+0,   9922,   9922,0,  2359296, 0xf6d9e519
+0,   9938,   9938,0,  2359296, 0xac0fe4b3
+0,   9954,   9954,0,  2359296, 0x3a3fe424
+0,   9955,   9955,0,  2359296, 0xa97ce1a8
+0,   9969,   9969,0,  2359296, 0x12fae01d
+0,   9970,   9970,0,  2359296, 0x65b4df14
+0,   9985,   9985,0,  2359296, 0x82d0e032
+0,   9986,   9986,0,  2359296, 0xa452e0cf
+0,  10001,  10001,0,  2359296, 0x22d6df37
diff --git a/tests/ref/fate/g2m3 b/tests/ref/fate/g2m3
index 9956710..8866a7e 100644
--- a/tests/ref/fate/g2m3
+++ b/tests/ref/fate/g2m3
@@ -1,25 +1,25 @@
 #tb 0: 1/1000
 #tb 1: 1/44100
-0,  0,  0,0,  3824640, 0xf8cf3d18
+0,  0,  0,0,  3824640, 0x9a253d29
 1,  0,  0,16384,32768, 0x6b41078a
 1,  14288,  14288,16384,32768, 0x96f7bfa2
-0,499,499,0,  3824640, 0x29dc2af5
-0,624,624,0,  3824640, 0x0ef5287b
-0,625,625,0,  3824640, 0x84b5283d
-0,626,626,0,  3824640, 0xcde31cda
-0,627,627,0,  3824640, 0x61cf2454
-0,628,628,0,  3824640, 0xb8e32127
+0,499,499,0,  3824640, 0xcb232b06
+0,624,624,0,  3824640, 0xb03c288c
+0,625,625,0,  3824640, 0x260b284e
+0,626,626,0,  3824640, 0x6f391ceb
+0,627,627,0,  3824640, 0x03252465
+0,628,628,0,  3824640, 0x5a392138
 1,  30650,  30650,14336,28672, 0xfafb3922
-0,749,749,0,  3824640, 0xb2972f1f
-0,750,750,0,  3824640, 0x5f59333c
-0,751,751,0,  3824640, 0x8d3529ea
-0,752,752,0,  3824640, 0xceb4385f
-0,753,753,0,  3824640, 0xb93139f8
-0,754,754,0,  3824640, 0xfb802d6f
-0,874,874,0,  3824640, 0x94643ee6
-0,875,875,0,  3824640, 0x646c4e89
-0,876,876,0,  3824640, 0xa65d5e7a
-0,877,877,0,  3824

[FFmpeg-cvslog] fate-g2m3: disable the audio stream

2015-07-01 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Jul  1 
17:35:51 2015 +0200| [a31c4b2cbef9aee15910fc3df52519aef46760de] | committer: 
Janne Grunau

fate-g2m3: disable the audio stream

The audio decoder is not in fate-g2m3 dependencies and the wma2 decoder
is probably not bit-exact since it it float based.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a31c4b2cbef9aee15910fc3df52519aef46760de
---

 tests/fate/screen.mak |2 +-
 tests/ref/fate/g2m3   |4 
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/fate/screen.mak b/tests/fate/screen.mak
index cc29d22..26e6736 100644
--- a/tests/fate/screen.mak
+++ b/tests/fate/screen.mak
@@ -33,7 +33,7 @@ FATE_G2M += fate-g2m2
 fate-g2m2: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m2.asf -an
 
 FATE_G2M += fate-g2m3
-fate-g2m3: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m3.asf 
-frames:v 20
+fate-g2m3: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m3.asf 
-frames:v 20 -an
 
 FATE_G2M += fate-g2m4
 fate-g2m4: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m4.asf
diff --git a/tests/ref/fate/g2m3 b/tests/ref/fate/g2m3
index 8866a7e..5a6ff64 100644
--- a/tests/ref/fate/g2m3
+++ b/tests/ref/fate/g2m3
@@ -1,15 +1,11 @@
 #tb 0: 1/1000
-#tb 1: 1/44100
 0,  0,  0,0,  3824640, 0x9a253d29
-1,  0,  0,16384,32768, 0x6b41078a
-1,  14288,  14288,16384,32768, 0x96f7bfa2
 0,499,499,0,  3824640, 0xcb232b06
 0,624,624,0,  3824640, 0xb03c288c
 0,625,625,0,  3824640, 0x260b284e
 0,626,626,0,  3824640, 0x6f391ceb
 0,627,627,0,  3824640, 0x03252465
 0,628,628,0,  3824640, 0x5a392138
-1,  30650,  30650,14336,28672, 0xfafb3922
 0,749,749,0,  3824640, 0x53ed2f30
 0,750,750,0,  3824640, 0x00af334d
 0,751,751,0,  3824640, 0x2e8b29fb

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] h264: arm: use intra pred8x8 functions only for chroma_format_idc <= 1

2015-07-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jul 12 
17:03:13 2015 +0200| [256ef19844892c6cf8e0386e3287bae970ec6320] | committer: 
Janne Grunau

h264: arm: use intra pred8x8 functions only for chroma_format_idc <= 1

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=256ef19844892c6cf8e0386e3287bae970ec6320
---

 libavcodec/arm/h264pred_init_arm.c |   30 --
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/libavcodec/arm/h264pred_init_arm.c 
b/libavcodec/arm/h264pred_init_arm.c
index bbfe63f..a445d4d 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -54,20 +54,22 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, 
int codec_id,
 if (high_depth)
 return;
 
-h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon;
-h->pred8x8[HOR_PRED8x8  ] = ff_pred8x8_hor_neon;
-if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
-h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
-h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
-if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
-codec_id != AV_CODEC_ID_VP8) {
-h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon;
-h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
-h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
-h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
-h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
-h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
-h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
+if (chroma_format_idc <= 1) {
+h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon;
+h->pred8x8[HOR_PRED8x8  ] = ff_pred8x8_hor_neon;
+if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
+h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
+if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
+codec_id != AV_CODEC_ID_VP8) {
+h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon;
+h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
+h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
+h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
+h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
+h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
+h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
+}
 }
 
 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm: test all architectures with optimisations

2015-07-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jul 12 
16:41:42 2015 +0200| [82e6ac85ff9aa7631b8c01521b3d6b5ca0bc8014] | committer: 
Janne Grunau

checkasm: test all architectures with optimisations

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=82e6ac85ff9aa7631b8c01521b3d6b5ca0bc8014
---

 tests/checkasm/checkasm.c |   16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 826cd35..e6cf3d7 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -72,7 +72,21 @@ static const struct {
 const char *suffix;
 int flag;
 } cpus[] = {
-#if ARCH_X86
+#if   ARCH_AARCH64
+{ "ARMV8","armv8",AV_CPU_FLAG_ARMV8 },
+{ "NEON", "neon", AV_CPU_FLAG_NEON },
+#elif ARCH_ARM
+{ "ARMV5TE",  "armv5te",  AV_CPU_FLAG_ARMV5TE },
+{ "ARMV6","armv6",AV_CPU_FLAG_ARMV6 },
+{ "ARMV6T2",  "armv6t2",  AV_CPU_FLAG_ARMV6T2 },
+{ "VFP",  "vfp",  AV_CPU_FLAG_VFP },
+{ "VFPV3","vfp3", AV_CPU_FLAG_VFPV3 },
+{ "NEON", "neon", AV_CPU_FLAG_NEON },
+#elif ARCH_PPC
+{ "ALTIVEC",  "altivec",  AV_CPU_FLAG_ALTIVEC },
+{ "VSX",  "vsx",  AV_CPU_FLAG_VSX },
+{ "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
+#elif ARCH_X86
 { "MMX",  "mmx",  AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
 { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
 { "3DNOW","3dnow",AV_CPU_FLAG_3DNOW },

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: add checkasm target

2015-07-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jul 12 
17:35:21 2015 +0200| [c9f8cfb6d9b34f3c51f1b7152c4dc3f2f8724dc4] | committer: 
Janne Grunau

fate: add checkasm target

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c9f8cfb6d9b34f3c51f1b7152c4dc3f2f8724dc4
---

 tests/Makefile  |1 +
 tests/fate/checkasm.mak |5 +
 2 files changed, 6 insertions(+)

diff --git a/tests/Makefile b/tests/Makefile
index fa83ba4..d7a229c 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -72,6 +72,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak
 include $(SRC_PATH)/tests/fate/audio.mak
 include $(SRC_PATH)/tests/fate/bmp.mak
 include $(SRC_PATH)/tests/fate/cdxl.mak
+include $(SRC_PATH)/tests/fate/checkasm.mak
 include $(SRC_PATH)/tests/fate/cover-art.mak
 include $(SRC_PATH)/tests/fate/demux.mak
 include $(SRC_PATH)/tests/fate/dfa.mak
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
new file mode 100644
index 000..daefe69
--- /dev/null
+++ b/tests/fate/checkasm.mak
@@ -0,0 +1,5 @@
+fate-checkasm: tests/checkasm/checkasm$(EXESUF)
+fate-checkasm: CMD = run tests/checkasm/checkasm
+fate-checkasm: REF = /dev/null
+
+FATE += fate-checkasm

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Revert "tiff: support reading gray+alpha at 8 bits"

2014-07-21 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Jul 21 
08:35:32 2014 +0200| [a9f3f5fadb57bae3f3ff0be69e56b2c6014f2513] | committer: 
Janne Grunau

Revert "tiff: support reading gray+alpha at 8 bits"

This reverts commit b31d76e45fc3c6529dd7109e721676f3ec376d00 as it
uses an unkown pixel format.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a9f3f5fadb57bae3f3ff0be69e56b2c6014f2513
---

 libavcodec/tiff.c |3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 69d55c6..6c72dc8 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -262,9 +262,6 @@ static int init_image(TiffContext *s, AVFrame *frame)
 case 161:
 s->avctx->pix_fmt = s->le ? AV_PIX_FMT_GRAY16LE : AV_PIX_FMT_GRAY16BE;
 break;
-case 162:
-s->avctx->pix_fmt = AV_PIX_FMT_GRAY8A;
-break;
 case 324:
 s->avctx->pix_fmt = AV_PIX_FMT_RGBA;
 break;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: add support for neon intrinsics

2014-07-21 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue May 13 
23:03:28 2014 +0200| [d45ebd4876ab8fc07736a644de07e1b1d11a8e5d] | committer: 
Janne Grunau

configure: add support for neon intrinsics

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d45ebd4876ab8fc07736a644de07e1b1d11a8e5d
---

 Makefile  |1 +
 configure |   12 
 2 files changed, 13 insertions(+)

diff --git a/Makefile b/Makefile
index 99d3240..cc016b3 100644
--- a/Makefile
+++ b/Makefile
@@ -130,6 +130,7 @@ $(foreach V,$(SUBDIR_VARS),$(eval $(call RESET,$(V
 SUBDIR := $(1)/
 include $(SRC_PATH)/$(1)/Makefile
 -include $(SRC_PATH)/$(1)/$(ARCH)/Makefile
+-include $(SRC_PATH)/$(1)/$(INTRINSICS)/Makefile
 include $(SRC_PATH)/library.mak
 endef
 
diff --git a/configure b/configure
index c74e433..1cc2265 100755
--- a/configure
+++ b/configure
@@ -1397,6 +1397,10 @@ HEADERS_LIST="
 winsock2_h
 "
 
+INTRINSICS_LIST="
+intrinsics_neon
+"
+
 MATH_FUNCS="
 atanf
 atan2f
@@ -1512,6 +1516,7 @@ HAVE_LIST="
 $HAVE_LIST_CMDLINE
 $HAVE_LIST_PUB
 $HEADERS_LIST
+$INTRINSICS_LIST
 $MATH_FUNCS
 $SYSTEM_FUNCS
 $THREADS_LIST
@@ -1651,6 +1656,7 @@ armv6_deps="arm"
 armv6t2_deps="arm"
 armv8_deps="aarch64"
 neon_deps_any="aarch64 arm"
+intrinsics_neon_deps="neon"
 vfp_deps_any="aarch64 arm"
 vfpv3_deps="vfp"
 
@@ -2196,6 +2202,7 @@ nogas=":"
 # machine
 arch_default=$(uname -m)
 cpu="generic"
+intrinsics="none"
 
 # OS
 target_os_default=$(tolower $(uname -s))
@@ -3928,6 +3935,8 @@ EOF
 
 fi
 
+check_code cc arm_neon.h "int64x2_t test" && enable intrinsics_neon
+
 check_ldflags -Wl,--as-needed
 
 if check_func dlopen; then
@@ -4423,6 +4432,8 @@ enabled_all dxva2 CoTaskMemFree &&
 ! enabled_any memalign posix_memalign aligned_malloc &&
 enabled_any $need_memalign && enable memalign_hack
 
+map 'enabled $v && intrinsics=${v#intrinsics_}' $INTRINSICS_LIST
+
 for thread in $THREADS_LIST; do
 if enabled $thread; then
 test -n "$thread_type" &&
@@ -4555,6 +4566,7 @@ MANDIR=\$(DESTDIR)$mandir
 SRC_PATH=$source_path
 CC_IDENT=$cc_ident
 ARCH=$arch
+INTRINSICS=$intrinsics
 CC=$cc
 AS=$as
 LD=$ld

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: support testing of release branches

2014-07-27 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sat Jul 26 
23:29:46 2014 +0200| [42eb9154a83e9a7aedb1168b2f1112af765cf2b5] | committer: 
Janne Grunau

fate: support testing of release branches

Adding 'branch=release/10' to the fate config file will check the
release/10 branch instead of master. If no branch is specified it will
use 'master' so that existing config are still valid.

The server side changes are already deployed, see
https://fate.libav.org/v10/ for an example. The server supports only the
release/* branches.

The server enforces that a single slot tests always the same branch.
Please append "-v$RELEASE" to the slot of release branch configs or make
the slot otherwise unique.

A different fate samples dir is needed for each release branch. make
fate-rsync has the correct URL in each branch.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=42eb9154a83e9a7aedb1168b2f1112af765cf2b5
---

 tests/fate.sh |8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/fate.sh b/tests/fate.sh
index 6e0c0c6..af0f6c0 100755
--- a/tests/fate.sh
+++ b/tests/fate.sh
@@ -19,6 +19,8 @@ test -n "$slot"|| die "slot not specified"
 test -n "$repo"|| die "repo not specified"
 test -d "$samples" || die "samples location not specified"
 
+: ${branch:=master}
+
 lock(){
 lock=$1/fate.lock
 (set -C; exec >$lock) 2>/dev/null || return
@@ -28,14 +30,14 @@ lock(){
 checkout(){
 case "$repo" in
 file:*|/*) src="${repo#file:}"  ;;
-git:*) git clone --quiet "$repo" "$src" ;;
+git:*) git clone --quiet --branch "$branch" "$repo" "$src" ;;
 esac
 }
 
 update()(
 cd ${src} || return
 case "$repo" in
-git:*) git fetch --force; git reset --hard origin/master ;;
+git:*) git fetch --force; git reset --hard "origin/$branch" ;;
 esac
 )
 
@@ -79,7 +81,7 @@ clean(){
 
 report(){
 date=$(date -u +%Y%m%d%H%M%S)
-echo "fate:0:${date}:${slot}:${version}:$1:$2:${comment}" >report
+echo "fate:1:${date}:${slot}:${version}:$1:$2:${branch}:${comment}" >report
 cat ${build}/config.fate ${build}/tests/data/fate/*.rep >>report
 test -n "$fate_recv" && $tar report *.log | gzip | $fate_recv
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: support testing of release branches

2014-07-30 Thread Janne Grunau
ffmpeg | branch: release/2.3 | Janne Grunau  | Sat Jul 
26 23:29:46 2014 +0200| [6a250c858ebbb9d5111c5b14d8d46d41bd08e218] | committer: 
Michael Niedermayer

fate: support testing of release branches

Adding 'branch=release/2.3' to the fate config file will check the
release/2.3 branch instead of master. If no branch is specified it will
use 'master' so that existing config are still valid.

(cherry picked from commit 42eb9154a83e9a7aedb1168b2f1112af765cf2b5)

Conflicts:

tests/fate.sh

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6a250c858ebbb9d5111c5b14d8d46d41bd08e218
---

 tests/fate.sh |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/fate.sh b/tests/fate.sh
index ca3caa3..5a78018 100755
--- a/tests/fate.sh
+++ b/tests/fate.sh
@@ -19,6 +19,8 @@ test -n "$slot"|| die "slot not specified"
 test -n "$repo"|| die "repo not specified"
 test -d "$samples" || die "samples location not specified"
 
+: ${branch:=master}
+
 lock(){
 lock=$1/fate.lock
 (set -C; exec >$lock) 2>/dev/null || return
@@ -28,14 +30,14 @@ lock(){
 checkout(){
 case "$repo" in
 file:*|/*) src="${repo#file:}"  ;;
-git:*) git clone --quiet "$repo" "$src" ;;
+git:*) git clone --quiet --branch "$branch" "$repo" "$src" ;;
 esac
 }
 
 update()(
 cd ${src} || return
 case "$repo" in
-git:*) git fetch --force && git reset --hard FETCH_HEAD ;;
+git:*) git fetch --force && git reset --hard "origin/$branch" ;;
 esac
 )
 
@@ -82,6 +84,7 @@ clean(){
 report(){
 date=$(date -u +%Y%m%d%H%M%S)
 echo "fate:0:${date}:${slot}:${version}:$1:$2:${comment}" >report
+#echo "fate:1:${date}:${slot}:${version}:$1:$2:${branch}:${comment}" 
>report
 cat ${build}/config.fate >>report
 cat ${build}/tests/data/fate/*.rep >>report || for i in 
${build}/tests/data/fate/*.rep ; do cat "$i" >>report ; done
 test -n "$fate_recv" && $tar report *.log | gzip | $fate_recv

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: add informative cpu test

2014-08-03 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Jan 30 
13:48:35 2014 +0100| [07d8fa58121be8fe315bd51ab760547fe209a745] | committer: 
Janne Grunau

fate: add informative cpu test

libavutil/cpu-test prints raw and effective cpu flags to STDERR. Detected
cpu flags can be useful for debugging fate errors.

No comparison of the result against a expected result since that would
require fate config specific references.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=07d8fa58121be8fe315bd51ab760547fe209a745
---

 libavutil/cpu.c  |   49 --
 tests/fate/libavutil.mak |5 +
 2 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index e755d15..20f0fe1 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -181,6 +181,10 @@ int av_cpu_count(void)
 
 #include 
 
+#if !HAVE_GETOPT
+#include "compat/getopt.c"
+#endif
+
 static const struct {
 int flag;
 const char *name;
@@ -224,17 +228,50 @@ static const struct {
 { 0 }
 };
 
-int main(void)
+static void print_cpu_flags(int cpu_flags, const char *type)
 {
-int cpu_flags = av_get_cpu_flags();
 int i;
 
-printf("cpu_flags = 0x%08X\n", cpu_flags);
-printf("cpu_flags =");
+fprintf(stderr, "cpu_flags(%s) = 0x%08X\n", type, cpu_flags);
+fprintf(stderr, "cpu_flags_str(%s) =", type);
 for (i = 0; cpu_flag_tab[i].flag; i++)
 if (cpu_flags & cpu_flag_tab[i].flag)
-printf(" %s", cpu_flag_tab[i].name);
-printf("\n");
+fprintf(stderr, " %s", cpu_flag_tab[i].name);
+fprintf(stderr, "\n");
+}
+
+
+int main(int argc, char **argv)
+{
+int cpu_flags_raw = av_get_cpu_flags();
+int cpu_flags_eff;
+
+if (cpu_flags_raw < 0)
+return 1;
+
+for (;;) {
+int c = getopt(argc, argv, "c:");
+if (c == -1)
+break;
+switch (c) {
+case 'c':
+{
+int cpuflags = av_parse_cpu_flags(optarg);
+if (cpuflags < 0)
+return 2;
+av_set_cpu_flags_mask(cpuflags);
+break;
+}
+}
+}
+
+cpu_flags_eff = av_get_cpu_flags();
+
+if (cpu_flags_eff < 0)
+return 3;
+
+print_cpu_flags(cpu_flags_raw, "raw");
+print_cpu_flags(cpu_flags_eff, "effective");
 
 return 0;
 }
diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index 81d0571..44d88c4 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak
@@ -25,6 +25,11 @@ FATE_LIBAVUTIL += fate-blowfish
 fate-blowfish: libavutil/blowfish-test$(EXESUF)
 fate-blowfish: CMD = run libavutil/blowfish-test
 
+FATE_LIBAVUTIL += fate-cpu
+fate-cpu: libavutil/cpu-test$(EXESUF)
+fate-cpu: CMD = run libavutil/cpu-test $(CPUFLAGS:%=-c%)
+fate-cpu: REF = /dev/null
+
 FATE_LIBAVUTIL += fate-crc
 fate-crc: libavutil/crc-test$(EXESUF)
 fate-crc: CMD = run libavutil/crc-test

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: add ', ' between assembler macro arguments where missing

2014-08-03 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Jul 24 
14:50:46 2014 +0200| [ac6b95dbc0b53b3ea461bd5e5e7f7f31d2983733] | committer: 
Janne Grunau

aarch64: add ',' between assembler macro arguments where missing

llvm's integrated assembler does not accept spaces as macro argument
delimiter when targeting darwin. Using a explicit delimiter is a good
idea in principle since it makes case like 'macro 4 -2' vs 'macro 4 - 2'
clear.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ac6b95dbc0b53b3ea461bd5e5e7f7f31d2983733
---

 libavcodec/aarch64/fft_neon.S  |2 +-
 libavcodec/aarch64/mpegaudiodsp_neon.S |2 +-
 libavcodec/aarch64/opus_imdct_neon.S   |8 
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/aarch64/fft_neon.S b/libavcodec/aarch64/fft_neon.S
index 9802349..5f88bed 100644
--- a/libavcodec/aarch64/fft_neon.S
+++ b/libavcodec/aarch64/fft_neon.S
@@ -336,7 +336,7 @@ function fft_pass_neon
 endfunc
 
 .macro  def_fft n, n2, n4
-function fft\n\()_neon  align=6
+function fft\n\()_neon, align=6
 sub sp,  sp,  #16
 stp x28, x30, [sp]
 add x28, x0,  #\n4*2*8
diff --git a/libavcodec/aarch64/mpegaudiodsp_neon.S 
b/libavcodec/aarch64/mpegaudiodsp_neon.S
index 39875fe..808576a 100644
--- a/libavcodec/aarch64/mpegaudiodsp_neon.S
+++ b/libavcodec/aarch64/mpegaudiodsp_neon.S
@@ -24,7 +24,7 @@
 #define WFRAC_BITS  16   // fractional bits for window
 #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
 
-const   tbl_rev128.s align=4
+const   tbl_rev128.s, align=4
 .byte   12, 13, 14, 15
 .byte8,  9, 10, 11
 .byte4,  5,  6,  7
diff --git a/libavcodec/aarch64/opus_imdct_neon.S 
b/libavcodec/aarch64/opus_imdct_neon.S
index 6b06396..5f6c502 100644
--- a/libavcodec/aarch64/opus_imdct_neon.S
+++ b/libavcodec/aarch64/opus_imdct_neon.S
@@ -23,7 +23,7 @@
 #include "asm-offsets.h"
 
 .macro shuffle a, b, c, d
-const shuffle_\a\b\c\d align=4
+const shuffle_\a\b\c\d, align=4
 .byte (\a * 4), (\a * 4 + 1), (\a * 4 + 2), (\a * 4 + 3)
 .byte (\b * 4), (\b * 4 + 1), (\b * 4 + 2), (\b * 4 + 3)
 .byte (\c * 4), (\c * 4 + 1), (\c * 4 + 2), (\c * 4 + 3)
@@ -344,7 +344,7 @@ function fft15_pass
 ret
 endfunc
 
-function fft30_neon  align=6
+function fft30_neon, align=6
 sub sp,  sp,  #0x20
 stp x20, x21, [sp]
 stp x22, x30, [sp, #0x10]
@@ -372,7 +372,7 @@ function fft30_neon  align=6
 endfunc
 
 .macro  def_fft n, n2
-function fft\n\()_neon  align=6
+function fft\n\()_neon, align=6
 sub sp,  sp,  #0x30
 stp x20, x21, [sp]
 stp x22, x30, [sp, #0x10]
@@ -641,7 +641,7 @@ function ff_celt_imdct_half_neon, export=1
 endfunc
 
 // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5)
-const   fact5   align=4
+const   fact5,  align=4
 .float   0.30901699437494745, 0.95105651629515353
 .float  -0.80901699437494734, 0.58778525229247325
 endconst

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] cpu-test: test av_cpu_count

2014-08-03 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Aug  3 
13:03:13 2014 +0200| [caf5ef852bf71984d3322bbeaf48cfb04ac8255f] | committer: 
Janne Grunau

cpu-test: test av_cpu_count

Add CPU count and number threads as informative values for fate.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=caf5ef852bf71984d3322bbeaf48cfb04ac8255f
---

 libavutil/cpu.c  |   14 +-
 tests/fate/libavutil.mak |2 +-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 20f0fe1..a7e5f7f 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -180,6 +180,7 @@ int av_cpu_count(void)
 #ifdef TEST
 
 #include 
+#include "avstring.h"
 
 #if !HAVE_GETOPT
 #include "compat/getopt.c"
@@ -245,12 +246,14 @@ int main(int argc, char **argv)
 {
 int cpu_flags_raw = av_get_cpu_flags();
 int cpu_flags_eff;
+int cpu_count = av_cpu_count();
+char threads[5] = "auto";
 
 if (cpu_flags_raw < 0)
 return 1;
 
 for (;;) {
-int c = getopt(argc, argv, "c:");
+int c = getopt(argc, argv, "c:t:");
 if (c == -1)
 break;
 switch (c) {
@@ -262,6 +265,14 @@ int main(int argc, char **argv)
 av_set_cpu_flags_mask(cpuflags);
 break;
 }
+case 't':
+{
+int len = av_strlcpy(threads, optarg, sizeof(threads));
+if (len >= sizeof(threads)) {
+fprintf(stderr, "Invalid thread count '%s'\n", optarg);
+return 2;
+}
+}
 }
 }
 
@@ -272,6 +283,7 @@ int main(int argc, char **argv)
 
 print_cpu_flags(cpu_flags_raw, "raw");
 print_cpu_flags(cpu_flags_eff, "effective");
+fprintf(stderr, "threads = %s (cpu_count = %d)\n", threads, cpu_count);
 
 return 0;
 }
diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index 44d88c4..7f3329b 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak
@@ -27,7 +27,7 @@ fate-blowfish: CMD = run libavutil/blowfish-test
 
 FATE_LIBAVUTIL += fate-cpu
 fate-cpu: libavutil/cpu-test$(EXESUF)
-fate-cpu: CMD = run libavutil/cpu-test $(CPUFLAGS:%=-c%)
+fate-cpu: CMD = run libavutil/cpu-test $(CPUFLAGS:%=-c%) $(THREADS:%=-t%)
 fate-cpu: REF = /dev/null
 
 FATE_LIBAVUTIL += fate-crc

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: use MACH-O const data asm directive in const macro

2014-08-03 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Jul 23 
10:06:15 2014 +0200| [a238b83b13640e3192d7d4aaad2242f13a9a84a1] | committer: 
Janne Grunau

aarch64: use MACH-O const data asm directive in const macro

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a238b83b13640e3192d7d4aaad2242f13a9a84a1
---

 libavutil/aarch64/asm.S |4 
 1 file changed, 4 insertions(+)

diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index 850f16b..b766e67 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -57,7 +57,11 @@ FUNC.func   \name
 ELF .size   \name, . - \name
 .purgem endconst
 .endm
+#ifndef __MACH__
 .section.rodata
+#else
+.const_data
+#endif
 .align  \align
 \name:
 .endm

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: explicitly set the default THREADS value

2014-08-04 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Aug  4 
10:04:08 2014 +0200| [23dfa00b88fc927d4c1854ab4fc60f5c6398f3ac] | committer: 
Janne Grunau

fate: explicitly set the default THREADS value

This makes the default of '1' more explicit than defaulting to '1' in
fate-run.sh and regression-funcs.sh if THREADS is not set.
Fixes the reported thread count in fate-cpu if THREADS is not set.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=23dfa00b88fc927d4c1854ab4fc60f5c6398f3ac
---

 tests/Makefile |1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/Makefile b/tests/Makefile
index 6a0def9..5fad291 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,3 +1,4 @@
+THREADS = 1
 VREF = tests/vsynth1/00.pgm
 AREF = tests/data/asynth1.sw
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: generate tests/pixfmts.mak for all targets requiring it

2014-08-05 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Aug  5 
09:39:00 2014 +0200| [d395895cdb2ac8c95bd488549e7f893bd4dcc248] | committer: 
Janne Grunau

fate: generate tests/pixfmts.mak for all targets requiring it

All subtargets which should run the fate-filter-pixdesc% need to
generate and include tests/pixfmts.mak. Most noteable missing target was
fate itself.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d395895cdb2ac8c95bd488549e7f893bd4dcc248
---

 tests/fate/filter-video.mak |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index cd90507..42fb063 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -122,7 +122,7 @@ tests/pixfmts.mak: avconv$(EXESUF)
$(Q)$(TARGET_EXEC) $(TARGET_PATH)/$< -pix_fmts list 2> /dev/null | awk 
'NR > 8 && /^IO/ { printf $$2 " " }' >> $@
$(Q)printf "\n" >> $@
 
-RUNNING_PIXFMTS_TESTS := $(filter check fate-list 
fate-filter-pixdesc%,$(MAKECMDGOALS))
+RUNNING_PIXFMTS_TESTS := $(filter check fate fate-list fate-filter 
fate-vfilter fate-filter-pixdesc%,$(MAKECMDGOALS))
 
 ifneq (,$(RUNNING_PIXFMTS_TESTS))
 -include tests/pixfmts.mak

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: check $as first before using $gas as GNU as

2014-08-05 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Aug  5 12:08:09 
2014 +0200| [15201e256035a3e8f9d3d7b96fc327467e1a8ead] | committer: Janne Grunau

configure: check $as first before using $gas as GNU as

llvm's integrated assembler supports the AArch64 asm on darwin since
August 2014. So check $as first before using gas-preprocessor.pl via
$gas. Makes the checks specific for that the architecture specific asm
needs. PPC Altivec and AArch64 needs on ':vararg' for macro arguments.
Arm needs in addition the '.altmacro' directive.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=15201e256035a3e8f9d3d7b96fc327467e1a8ead
---

 configure |   32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/configure b/configure
index b2eb0c8..22c78e7 100755
--- a/configure
+++ b/configure
@@ -3760,22 +3760,32 @@ unsigned int endian = 'B' << 24 | 'I' << 16 | 'G' << 8 
| 'E';
 EOF
 od -t x1 $TMPO | grep -q '42 *49 *47 *45' && enable bigendian
 
-
-if enabled asm; then
-enabled arm && nogas=die
-enabled_all ppc altivec && nogas=warn
-as=${gas:=$as}
-check_as <http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] h264: slice-mt: check master context for valid current_picture_ptr

2014-08-09 Thread Janne Grunau
ffmpeg | branch: release/0.10 | Janne Grunau  | Wed Dec 
 5 20:08:01 2012 +0100| [3e60501f311c50bf234033f206c19d34d889df01] | committer: 
Diego Biurrun

h264: slice-mt: check master context for valid current_picture_ptr

Fixes errors in slice based multithreading introduced in 0b300daad2f5.

CC: libav-sta...@libav.org
(cherry picked from commit 5945c7b35d9169caf9ecef1c419eebdebb909e60)
Signed-off-by: Diego Biurrun 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3e60501f311c50bf234033f206c19d34d889df01
---

 libavcodec/h264.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 547cf3d..c485325 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2952,7 +2952,7 @@ static int decode_slice_header(H264Context *h, 
H264Context *h0){
 s->picture_structure = last_pic_structure;
 s->dropable  = last_pic_dropable;
 return AVERROR_INVALIDDATA;
-} else if (!s->current_picture_ptr) {
+} else if (!s0->current_picture_ptr) {
 av_log(s->avctx, AV_LOG_ERROR,
"unset current_picture_ptr on %d. slice\n",
h0->current_slice + 1);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] rv34: use ff_mpeg_update_thread_context only when decoder is fully initialized

2014-08-22 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Aug 21 
13:26:33 2014 +0200| [dc4b2e7d33903a6b9380e8a84b22b3a20facbb08] | committer: 
Janne Grunau

rv34: use ff_mpeg_update_thread_context only when decoder is fully initialized

MpegEncContext based decoders are only fully initialized after the first
ff_thread_get_buffer() call. The RV30/40 decoders may fail before a frame
buffer was requested. ff_mpeg_update_thread_context() fails on half
initialized MpegEncContexts. Since this can only happen before a the
first frame was decoded there is no need to call
ff_mpeg_update_thread_context().

Based on patches by John Stebbins and tested by John Stebbins.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dc4b2e7d33903a6b9380e8a84b22b3a20facbb08
---

 libavcodec/rv34.c |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 4ed2a33..26ab7e4 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1555,16 +1555,18 @@ int ff_rv34_decode_update_thread_context(AVCodecContext 
*dst, const AVCodecConte
 return err;
 }
 
-if ((err = ff_mpeg_update_thread_context(dst, src)))
-return err;
-
 r->cur_pts  = r1->cur_pts;
 r->last_pts = r1->last_pts;
 r->next_pts = r1->next_pts;
 
 memset(&r->si, 0, sizeof(r->si));
 
-return 0;
+// Do no call ff_mpeg_update_thread_context on a partially initialized
+// decoder context.
+if (!s1->linesize)
+return 0;
+
+return ff_mpeg_update_thread_context(dst, src);
 }
 
 static int get_slice_offset(AVCodecContext *avctx, const uint8_t *buf, int n)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate-vc1_ilaced_twomv: use -flags +bitexact

2014-10-04 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sat Oct  4 
11:19:09 2014 +0200| [36f3aec3630f27df64f4ff2b52a1c9ced760eb52] | committer: 
Janne Grunau

fate-vc1_ilaced_twomv: use -flags +bitexact

Also updates the reference since it was generated by the non-bitexact
x86 specific code.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=36f3aec3630f27df64f4ff2b52a1c9ced760eb52
---

 tests/fate/microsoft.mak|2 +-
 tests/ref/fate/vc1_ilaced_twomv |4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/fate/microsoft.mak b/tests/fate/microsoft.mak
index c1cedea..10bbb30 100644
--- a/tests/fate/microsoft.mak
+++ b/tests/fate/microsoft.mak
@@ -54,7 +54,7 @@ FATE_VC1-$(CONFIG_VC1_DEMUXER) += fate-vc1_sa20021
 fate-vc1_sa20021: CMD = framecrc -i $(TARGET_SAMPLES)/vc1/SA20021.vc1
 
 FATE_VC1-$(CONFIG_VC1_DEMUXER) += fate-vc1_ilaced_twomv
-fate-vc1_ilaced_twomv: CMD = framecrc -i $(TARGET_SAMPLES)/vc1/ilaced_twomv.vc1
+fate-vc1_ilaced_twomv: CMD = framecrc -flags +bitexact -i 
$(TARGET_SAMPLES)/vc1/ilaced_twomv.vc1
 
 FATE_VC1-$(CONFIG_MOV_DEMUXER) += fate-vc1-ism
 fate-vc1-ism: CMD = framecrc -i $(TARGET_SAMPLES)/isom/vc1-wmapro.ism -an
diff --git a/tests/ref/fate/vc1_ilaced_twomv b/tests/ref/fate/vc1_ilaced_twomv
index 9a5d391..abfd507 100644
--- a/tests/ref/fate/vc1_ilaced_twomv
+++ b/tests/ref/fate/vc1_ilaced_twomv
@@ -1,12 +1,12 @@
 #tb 0: 1/25
 0,  0,  0,1,  3110400, 0x764f8856
-0,  2,  2,1,  3110400, 0x5b6680fa
+0,  2,  2,1,  3110400, 0x1dc5592c
 0,  3,  3,1,  3110400, 0x8ee86a47
 0,  4,  4,1,  3110400, 0xc1ca8532
 0,  5,  5,1,  3110400, 0x53efd0f9
 0,  6,  6,1,  3110400, 0xa9605bc9
 0,  7,  7,1,  3110400, 0xbaa9aede
-0,  8,  8,1,  3110400, 0x6035644c
+0,  8,  8,1,  3110400, 0x7191bcf4
 0,  9,  9,1,  3110400, 0x1d6aff98
 0, 10, 10,1,  3110400, 0x7b047286
 0, 11, 11,1,  3110400, 0xa7cb2f84

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: add mpeg4 tests for frame size changes

2014-10-17 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Sep 18 
16:03:08 2012 +0200| [f29c226af0ecc34c417b646580a9acd7d8bbaf48] | committer: 
Janne Grunau

fate: add mpeg4 tests for frame size changes

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f29c226af0ecc34c417b646580a9acd7d8bbaf48
---

 tests/Makefile   |1 +
 tests/fate/mpeg4.mak |9 ++
 tests/ref/fate/mpeg4-resolution-change-down-down |  151 ++
 tests/ref/fate/mpeg4-resolution-change-down-up   |  151 ++
 tests/ref/fate/mpeg4-resolution-change-up-down   |  151 ++
 tests/ref/fate/mpeg4-resolution-change-up-up |  151 ++
 6 files changed, 614 insertions(+)

diff --git a/tests/Makefile b/tests/Makefile
index 5fad291..a2ee79b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -96,6 +96,7 @@ include $(SRC_PATH)/tests/fate/microsoft.mak
 include $(SRC_PATH)/tests/fate/monkeysaudio.mak
 include $(SRC_PATH)/tests/fate/mp3.mak
 include $(SRC_PATH)/tests/fate/mpc.mak
+include $(SRC_PATH)/tests/fate/mpeg4.mak
 include $(SRC_PATH)/tests/fate/opus.mak
 include $(SRC_PATH)/tests/fate/pcm.mak
 include $(SRC_PATH)/tests/fate/probe.mak
diff --git a/tests/fate/mpeg4.mak b/tests/fate/mpeg4.mak
new file mode 100644
index 000..1565247
--- /dev/null
+++ b/tests/fate/mpeg4.mak
@@ -0,0 +1,9 @@
+
+MPEG4_RESOLUTION_CHANGE = down-down down-up up-down up-up
+
+fate-mpeg4-resolution-change-%: CMD = framemd5 -flags +bitexact -idct simple 
-i $(SAMPLES)/mpeg4/resize_$(@:fate-mpeg4-resolution-change-%=%).h263
+
+FATE_MPEG4-$(call DEMDEC, H263, H263) := $(addprefix 
fate-mpeg4-resolution-change-, $(MPEG4_RESOLUTION_CHANGE))
+
+FATE_SAMPLES_AVCONV += $(FATE_MPEG4-yes)
+fate-mpeg4: $(FATE_MPEG4-yes)
diff --git a/tests/ref/fate/mpeg4-resolution-change-down-down 
b/tests/ref/fate/mpeg4-resolution-change-down-down
new file mode 100644
index 000..137575a
--- /dev/null
+++ b/tests/ref/fate/mpeg4-resolution-change-down-down
@@ -0,0 +1,151 @@
+#tb 0: 1/25
+0,  0,  0,1,   460800, d65fcc79c7eb9ebd9d88dca3ebb15bf4
+0,  1,  1,1,   460800, 6c86b8c7e8eae3d63b21342f233fb44e
+0,  2,  2,1,   460800, 7fea65fd8ee4d3fcec722f721d05ef45
+0,  3,  3,1,   460800, 2a1d943211f8c1995cc250586f105991
+0,  4,  4,1,   460800, 0430fa1da6a968c0936fc60a425c3b9f
+0,  5,  5,1,   460800, 1593684b29c0f394176c9fce83ebe4a3
+0,  6,  6,1,   460800, ff8c25a20ced839a1ce33ef25d04f342
+0,  7,  7,1,   460800, 08869a31b677080f3fb1b12e3178c1f8
+0,  8,  8,1,   460800, 7e8d4c417698e434508663dfd851e95d
+0,  9,  9,1,   460800, 97488eafaa5db813bc9fbb13a4204240
+0, 10, 10,1,   460800, aac8d92f678f077b560f24a74427ef33
+0, 11, 11,1,   460800, dbb953e70f356c528e232dd90b21af99
+0, 12, 12,1,   460800, 2f7b6c9f006ca733c159aadb78958621
+0, 13, 13,1,   460800, 938142d358a298df924da648ff9542e2
+0, 14, 14,1,   460800, 93cae8797e91f0ecb94782fd614ba477
+0, 15, 15,1,   460800, 4644a98dbd46865c6c4e1ebe168b5095
+0, 16, 16,1,   460800, cc932f281bf90456c508f58fda085658
+0, 17, 17,1,   460800, 9280ef543c11446d7005a098d19b74a3
+0, 18, 18,1,   460800, f5c91502bd600e1cfcd9ff7a5e683ce4
+0, 19, 19,1,   460800, 4eda3b1d48ca986eeb14c90af947b6af
+0, 20, 20,1,   460800, 84e8398c333e76412bd310c207e131d3
+0, 21, 21,1,   460800, 7abe283b322cc4c9aaeb56e4b2e03597
+0, 22, 22,1,   460800, b4daa2055782c6f3769bf71cb1534124
+0, 23, 23,1,   460800, 3deb3e2f41ef4549da3b0d89031eaa42
+0, 24, 24,1,   460800, 73b9efcd2714b3cd65b1d8aee953cd38
+0, 25, 25,1,   460800, 493ee5aab3a0ca22887b2b673d871efd
+0, 26, 26,1,   460800, 5fe537734707bbc050290df8c0095d0f
+0, 27, 27,1,   460800, ca337619639144e0aea0fe226e9dad63
+0, 28, 28,1,   460800, 8fa2e0ff609d1593d34722058e56b19a
+0, 29, 29,1,   460800, be0950c431591485ed4de678f8f17187
+0, 30, 30,1,   460800, 0ef4b6a8d2e3d455d697deaf730cf402
+0, 31, 31,1,   460800, f74302190c8e47120b9597073525b08e
+0, 32, 32,1,   460800, 129fb2cc916aa16f8fee053ae89c31b3
+0, 33, 33,1,   460800, 0186eacb73263bb0ae02c20f827dd650
+0, 34, 34,1,   460800, f3c0245c28ded8d497665c87e66531de
+0, 35, 35,1,   460800, e550cae2b446a5460a7201ef20ad74fd
+0,

[FFmpeg-cvslog] fate-mpeg4: use TARGET_SAMPLES for resize tests

2014-10-21 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Tue Oct 21 
09:56:23 2014 +0200| [04d8af5f17961b9b7076b8c974e360feb08787c2] | committer: 
Janne Grunau

fate-mpeg4: use TARGET_SAMPLES for resize tests

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=04d8af5f17961b9b7076b8c974e360feb08787c2
---

 tests/fate/mpeg4.mak |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fate/mpeg4.mak b/tests/fate/mpeg4.mak
index 1565247..f9b94e6 100644
--- a/tests/fate/mpeg4.mak
+++ b/tests/fate/mpeg4.mak
@@ -1,7 +1,7 @@
 
 MPEG4_RESOLUTION_CHANGE = down-down down-up up-down up-up
 
-fate-mpeg4-resolution-change-%: CMD = framemd5 -flags +bitexact -idct simple 
-i $(SAMPLES)/mpeg4/resize_$(@:fate-mpeg4-resolution-change-%=%).h263
+fate-mpeg4-resolution-change-%: CMD = framemd5 -flags +bitexact -idct simple 
-i $(TARGET_SAMPLES)/mpeg4/resize_$(@:fate-mpeg4-resolution-change-%=%).h263
 
 FATE_MPEG4-$(call DEMDEC, H263, H263) := $(addprefix 
fate-mpeg4-resolution-change-, $(MPEG4_RESOLUTION_CHANGE))
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: make ff_mlp_filter_channel_arm and ff_mlp_rematrix_channel_arm position independent

2014-12-09 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Dec  8 
13:25:55 2014 +0100| [581c7f0e12b1fa39f73d683e54d6ecda0772c5a9] | committer: 
Janne Grunau

arm: make ff_mlp_filter_channel_arm and ff_mlp_rematrix_channel_arm position 
independent

No significant difference in used cpu cycles on a cortex-a9.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=581c7f0e12b1fa39f73d683e54d6ecda0772c5a9
---

 libavcodec/arm/mlpdsp_armv5te.S |   23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
index cf7d367..fecbe29 100644
--- a/libavcodec/arm/mlpdsp_armv5te.S
+++ b/libavcodec/arm/mlpdsp_armv5te.S
@@ -338,22 +338,23 @@ T   orr AC0, AC0, AC1
 .endm
 
 .macro switch_on_fir_taps  mask_minus1, shift_0, shift_8, iir_taps
-A   ldr pc, [pc, a3, lsl #2] // firorder is in range 0-(8-iir_taps)
+A   ldr CO0, [pc, a3, lsl #2]   // firorder is in range 0-(8-iir_taps)
+A   add pc,  pc,  CO0
 T   tbh [pc, a3, lsl #1]
 0:
-A   .word   0, 70f, 71f, 72f, 73f, 74f
+A   .word   (70f - 0b) - 4, (71f - 0b) - 4, (72f - 0b) - 4, (73f - 0b) - 
4, (74f - 0b) - 4
 T   .hword  (70f - 0b) / 2, (71f - 0b) / 2, (72f - 0b) / 2, (73f - 0b) / 
2, (74f - 0b) / 2
  .if \iir_taps <= 3
-A   .word   75f
+A   .word   (75f - 0b) - 4
 T   .hword  (75f - 0b) / 2
   .if \iir_taps <= 2
-A   .word   76f
+A   .word   (76f - 0b) - 4
 T   .hword  (76f - 0b) / 2
.if \iir_taps <= 1
-A   .word   77f
+A   .word   (77f - 0b) - 4
 T   .hword  (77f - 0b) / 2
 .if \iir_taps == 0
-A   .word   78f
+A   .word   (78f - 0b) - 4
 T   .hword  (78f - 0b) / 2
 .endif
.endif
@@ -379,10 +380,11 @@ T   .hword  (78f - 0b) / 2
 .endm
 
 .macro switch_on_iir_taps  mask_minus1, shift_0, shift_8
-A   ldr pc, [pc, a4, lsl #2] // irorder is in range 0-4
+A   ldr CO0, [pc, a4, lsl #2]   // irorder is in range 0-4
+A   add pc,  pc,  CO0
 T   tbh [pc, a4, lsl #1]
 0:
-A   .word   0, 60f, 61f, 62f, 63f, 64f
+A   .word   (60f - 0b) - 4, (61f - 0b) - 4, (62f - 0b) - 4, (63f - 0b) - 
4, (64f - 0b) - 4
 T   .hword  (60f - 0b) / 2, (61f - 0b) / 2, (62f - 0b) / 2, (63f - 0b) / 
2, (64f - 0b) / 2
 60: switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 0
 61: switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 1
@@ -604,10 +606,11 @@ function ff_mlp_rematrix_channel_arm, export=1
 cmp v5, #1
 beq 11f
 blo 10f
-A   ldr pc, [pc, v5, lsl #2]
+A   ldr v5,  [pc,  v5,  lsl #2]
+A   add pc,  pc,  v5
 T   tbh [pc, v5, lsl #1]
 0:
-A   .word   0, 0, 0, 12f, 13f, 14f, 15f, 16f, 17f, 18f, 19f, 20f, 21f, 
22f, 23f, 24f, 25f
+A   .word   0, 0, (12f - 0b) - 4, (13f - 0b) - 4, (14f - 0b) - 4, (15f - 
0b) - 4, (16f - 0b) - 4, (17f - 0b) - 4, (18f - 0b) - 4, (19f - 0b) - 4, (20f - 
0b) - 4, (21f - 0b) - 4, (22f - 0b) - 4, (23f - 0b) - 4, (24f - 0b) - 4, (25f - 
0b) - 4
 T   .hword  0, 0, (12f - 0b) / 2, (13f - 0b) / 2, (14f - 0b) / 2, (15f - 
0b) / 2
 T   .hword  (16f - 0b) / 2, (17f - 0b) / 2, (18f - 0b) / 2, (19f - 0b) / 2
 T   .hword  (20f - 0b) / 2, (21f - 0b) / 2, (22f - 0b) / 2, (23f - 0b) / 
2, (24f - 0b) / 2, (25f - 0b) / 2

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: mlpdsp: handle pic offset calculation in a macro

2014-12-09 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Dec  8 
14:10:18 2014 +0100| [4c81613df499ba81d64ea102b38d0c6686cc304c] | committer: 
Janne Grunau

arm: mlpdsp: handle pic offset calculation in a macro

Makes the code easier to read since it hides different offset
calculations for arm and thumb mode.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4c81613df499ba81d64ea102b38d0c6686cc304c
---

 libavcodec/arm/mlpdsp_armv5te.S |   36 
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
index fecbe29..4272dae 100644
--- a/libavcodec/arm/mlpdsp_armv5te.S
+++ b/libavcodec/arm/mlpdsp_armv5te.S
@@ -43,6 +43,14 @@ I   .reqip
 PSAMP   .reqlr
 
 
+.macro branch_pic_label first, remainder:vararg
+A   .word   \first   - 4
+T   .hword  (\first) / 2
+.ifnb   \remainder
+branch_pic_label \remainder
+.endif
+.endm
+
 // Some macros that do loads/multiplies where the register number is determined
 // from an assembly-time expression. Boy is GNU assembler's syntax ugly...
 
@@ -342,20 +350,16 @@ A   ldr CO0, [pc, a3, lsl #2]   // firorder is in 
range 0-(8-iir_taps)
 A   add pc,  pc,  CO0
 T   tbh [pc, a3, lsl #1]
 0:
-A   .word   (70f - 0b) - 4, (71f - 0b) - 4, (72f - 0b) - 4, (73f - 0b) - 
4, (74f - 0b) - 4
-T   .hword  (70f - 0b) / 2, (71f - 0b) / 2, (72f - 0b) / 2, (73f - 0b) / 
2, (74f - 0b) / 2
+branch_pic_label (70f - 0b), (71f - 0b), (72f - 0b), (73f - 0b)
+branch_pic_label (74f - 0b)
  .if \iir_taps <= 3
-A   .word   (75f - 0b) - 4
-T   .hword  (75f - 0b) / 2
+branch_pic_label (75f - 0b)
   .if \iir_taps <= 2
-A   .word   (76f - 0b) - 4
-T   .hword  (76f - 0b) / 2
+branch_pic_label (76f - 0b)
.if \iir_taps <= 1
-A   .word   (77f - 0b) - 4
-T   .hword  (77f - 0b) / 2
+branch_pic_label (77f - 0b)
 .if \iir_taps == 0
-A   .word   (78f - 0b) - 4
-T   .hword  (78f - 0b) / 2
+branch_pic_label (78f - 0b)
 .endif
.endif
   .endif
@@ -384,8 +388,8 @@ A   ldr CO0, [pc, a4, lsl #2]   // irorder is in 
range 0-4
 A   add pc,  pc,  CO0
 T   tbh [pc, a4, lsl #1]
 0:
-A   .word   (60f - 0b) - 4, (61f - 0b) - 4, (62f - 0b) - 4, (63f - 0b) - 
4, (64f - 0b) - 4
-T   .hword  (60f - 0b) / 2, (61f - 0b) / 2, (62f - 0b) / 2, (63f - 0b) / 
2, (64f - 0b) / 2
+branch_pic_label (60f - 0b), (61f - 0b), (62f - 0b), (63f - 0b)
+branch_pic_label (64f - 0b)
 60: switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 0
 61: switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 1
 62: switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 2
@@ -610,10 +614,10 @@ A   ldr v5,  [pc,  v5,  lsl #2]
 A   add pc,  pc,  v5
 T   tbh [pc, v5, lsl #1]
 0:
-A   .word   0, 0, (12f - 0b) - 4, (13f - 0b) - 4, (14f - 0b) - 4, (15f - 
0b) - 4, (16f - 0b) - 4, (17f - 0b) - 4, (18f - 0b) - 4, (19f - 0b) - 4, (20f - 
0b) - 4, (21f - 0b) - 4, (22f - 0b) - 4, (23f - 0b) - 4, (24f - 0b) - 4, (25f - 
0b) - 4
-T   .hword  0, 0, (12f - 0b) / 2, (13f - 0b) / 2, (14f - 0b) / 2, (15f - 
0b) / 2
-T   .hword  (16f - 0b) / 2, (17f - 0b) / 2, (18f - 0b) / 2, (19f - 0b) / 2
-T   .hword  (20f - 0b) / 2, (21f - 0b) / 2, (22f - 0b) / 2, (23f - 0b) / 
2, (24f - 0b) / 2, (25f - 0b) / 2
+branch_pic_label  0,  0, (12f - 0b), (13f - 0b)
+branch_pic_label (14f - 0b), (15f - 0b), (16f - 0b), (17f - 0b)
+branch_pic_label (18f - 0b), (19f - 0b), (20f - 0b), (21f - 0b)
+branch_pic_label (22f - 0b), (23f - 0b), (24f - 0b), (25f - 0b)
 10: switch_on_au_size  0
 11: switch_on_au_size  1
 12: switch_on_au_size  2

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: add dolby true hd tests

2014-12-09 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Dec  8 
16:21:43 2014 +0100| [d2f1d42b18787e4fcb28864d9d9f701dd64a5747] | committer: 
Janne Grunau

fate: add dolby true hd tests

The existing meridian audio test does not test
ff_mlp_rematrix_channel_arm. This sample (first 640k of
https://samples.libav.org/A-codecs/TrueHD/TrueHD.raw) uses
ff_mlp_rematrix_channel_arm. Since this sample has 5.1 channels it also
allows testing the integrated downmixing.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d2f1d42b18787e4fcb28864d9d9f701dd64a5747
---

 tests/fate/lossless-audio.mak  |6 ++
 tests/ref/fate/lossless-truehd-5.1 |1 +
 tests/ref/fate/lossless-truehd-5.1-downmix-2.0 |1 +
 3 files changed, 8 insertions(+)

diff --git a/tests/fate/lossless-audio.mak b/tests/fate/lossless-audio.mak
index 3638f17..751c721 100644
--- a/tests/fate/lossless-audio.mak
+++ b/tests/fate/lossless-audio.mak
@@ -15,6 +15,12 @@ fate-lossless-tak: CMD = md5 -i 
$(TARGET_SAMPLES)/lossless-audio/luckynight-part
 fate-lossless-tak: CMP = oneline
 fate-lossless-tak: REF = a28d4e5f2192057f7d4bece870f40bd0
 
+FATE_TRUEHD = fate-lossless-truehd-5.1 fate-lossless-truehd-5.1-downmix-2.0
+fate-lossless-truehd-5.1: CMD = md5 -f truehd -i 
$(TARGET_SAMPLES)/lossless-audio/truehd_5.1.raw -f s32le
+fate-lossless-truehd-5.1-downmix-2.0: CMD = md5 -f truehd 
-request_channel_layout 2 -i $(TARGET_SAMPLES)/lossless-audio/truehd_5.1.raw -f 
s32le
+fate-lossless-truehd: $(FATE_TRUEHD)
+FATE_SAMPLES_AVCONV-$(call DEMDEC, TRUEHD, TRUEHD) += $(FATE_TRUEHD)
+
 FATE_SAMPLES_AVCONV-$(call DEMDEC, TTA, TTA) += fate-lossless-tta
 fate-lossless-tta: CMD = crc -i $(TARGET_SAMPLES)/lossless-audio/inside.tta
 
diff --git a/tests/ref/fate/lossless-truehd-5.1 
b/tests/ref/fate/lossless-truehd-5.1
new file mode 100644
index 000..373b917
--- /dev/null
+++ b/tests/ref/fate/lossless-truehd-5.1
@@ -0,0 +1 @@
+95d8aac39dd9f0d7fb83dc7b6f88df35
diff --git a/tests/ref/fate/lossless-truehd-5.1-downmix-2.0 
b/tests/ref/fate/lossless-truehd-5.1-downmix-2.0
new file mode 100644
index 000..f4afbc1
--- /dev/null
+++ b/tests/ref/fate/lossless-truehd-5.1-downmix-2.0
@@ -0,0 +1 @@
+a269aee0051d4400c9117136f08c9767

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] doc: fate: remove outdated SSH key fingerprint

2015-03-16 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Mar  9 
23:19:55 2015 +0100| [a9d60c390f35f3954821bd635fd31bbb5036b29d] | committer: 
Janne Grunau

doc: fate: remove outdated SSH key fingerprint

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a9d60c390f35f3954821bd635fd31bbb5036b29d
---

 doc/fate.texi |1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/fate.texi b/doc/fate.texi
index 0185d87..1d6d1d1 100644
--- a/doc/fate.texi
+++ b/doc/fate.texi
@@ -165,4 +165,3 @@ through @command{ssh}.
 @section Submitting Reports
 In order to send reports you need to create an @command{ssh} key and send it
 to @email{root@@libav.org}.
-The current server fingerprint is 
@var{a4:99:d7:d3:1c:92:0d:56:d6:d5:61:be:01:ae:7d:e6}

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: add explicit support for the toolchain configure option

2015-03-16 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Feb 26 22:04:12 
2015 +0100| [f01c77157789b8e3a59ed2c9646faf8299e41641] | committer: Janne Grunau

fate: add explicit support for the toolchain configure option

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f01c77157789b8e3a59ed2c9646faf8299e41641
---

 tests/fate.sh |1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/fate.sh b/tests/fate.sh
index af0f6c0..f9f8b95 100755
--- a/tests/fate.sh
+++ b/tests/fate.sh
@@ -49,6 +49,7 @@ configure()(
 --enable-gpl\
 ${arch:+--arch=$arch}   \
 ${cpu:+--cpu="$cpu"}\
+${toolchain:+--toolchain="$toolchain"}  \
 ${cross_prefix:+--cross-prefix="$cross_prefix"} \
 ${as:+--as="$as"}   \
 ${cc:+--cc="$cc"}   \

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: handle Apple's armv7s in probe_arm_arch()

2015-03-16 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Wed Feb 25 18:16:52 
2015 +0100| [3e1b5cbc9ab0a61c9bec08a1df1404b9da6ed7ea] | committer: Janne Grunau

configure: handle Apple's armv7s in probe_arm_arch()

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3e1b5cbc9ab0a61c9bec08a1df1404b9da6ed7ea
---

 configure |1 +
 1 file changed, 1 insertion(+)

diff --git a/configure b/configure
index c237979..5eef9f1 100755
--- a/configure
+++ b/configure
@@ -3198,6 +3198,7 @@ elif enabled arm; then
 elif check_arm_arch 6T2;  then echo armv6t2;
 elif check_arm_arch 7;then echo armv7;
 elif check_arm_arch 7A  7_A;  then echo armv7-a;
+elif check_arm_arch 7S;   then echo armv7-a;
 elif check_arm_arch 7R  7_R;  then echo armv7-r;
 elif check_arm_arch 7M  7_M;  then echo armv7-m;
 elif check_arm_arch 7EM 7E_M; then echo armv7-m;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: move cross_compile checks after the toolchain section

2015-03-16 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Feb 26 21:54:55 
2015 +0100| [faab8f9fcb4ffeb967dc6872c0f1e9da719106ce] | committer: Janne Grunau

configure: move cross_compile checks after the toolchain section

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=faab8f9fcb4ffeb967dc6872c0f1e9da719106ce
---

 configure |   14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/configure b/configure
index 5eef9f1..7340844 100755
--- a/configure
+++ b/configure
@@ -2494,13 +2494,6 @@ disabled logging && logfile=/dev/null
 echo "# $0 $LIBAV_CONFIGURATION" > $logfile
 set >> $logfile
 
-test -n "$cross_prefix" && enable cross_compile
-
-if enabled cross_compile; then
-test -n "$arch" && test -n "$target_os" ||
-die "Must specify target arch and OS when cross-compiling"
-fi
-
 case "$toolchain" in
 clang-asan)
 cc_default="clang"
@@ -2585,6 +2578,13 @@ case "$toolchain" in
 ;;
 esac
 
+test -n "$cross_prefix" && enable cross_compile
+
+if enabled cross_compile; then
+test -n "$arch" && test -n "$target_os" ||
+die "Must specify target arch and OS when cross-compiling"
+fi
+
 ar_default="${cross_prefix}${ar_default}"
 cc_default="${cross_prefix}${cc_default}"
 nm_default="${cross_prefix}${nm_default}"

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] h264: aarch64: intra prediction optimisations

2015-07-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Sun Jul 12 
18:30:09 2015 +0200| [f56d8d8dd72b1ab52aa814c5a0fccabf8040ef68] | committer: 
Janne Grunau

h264: aarch64: intra prediction optimisations

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f56d8d8dd72b1ab52aa814c5a0fccabf8040ef68
---

 libavcodec/aarch64/Makefile|2 +
 libavcodec/aarch64/h264pred_init.c |   93 ++
 libavcodec/aarch64/h264pred_neon.S |  361 
 libavcodec/h264pred.c  |8 +-
 libavcodec/h264pred.h  |3 +
 5 files changed, 465 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 2afff29..d001b34 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -1,6 +1,7 @@
 OBJS-$(CONFIG_FFT)  += aarch64/fft_init_aarch64.o
 OBJS-$(CONFIG_H264CHROMA)   += aarch64/h264chroma_init_aarch64.o
 OBJS-$(CONFIG_H264DSP)  += aarch64/h264dsp_init_aarch64.o
+OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
 OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
 OBJS-$(CONFIG_HPELDSP)  += aarch64/hpeldsp_init_aarch64.o
 OBJS-$(CONFIG_IMDCT15)  += aarch64/imdct15_init.o
@@ -18,6 +19,7 @@ NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)  += aarch64/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o  
\
aarch64/h264idct_neon.o
+NEON-OBJS-$(CONFIG_H264PRED)+= aarch64/h264pred_neon.o
 NEON-OBJS-$(CONFIG_H264QPEL)+= aarch64/h264qpel_neon.o 
\
aarch64/hpeldsp_neon.o
 NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
diff --git a/libavcodec/aarch64/h264pred_init.c 
b/libavcodec/aarch64/h264pred_init.c
new file mode 100644
index 000..8f912cb
--- /dev/null
+++ b/libavcodec/aarch64/h264pred_init.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard 
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+
+void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
+const int bit_depth,
+const int chroma_format_idc)
+{
+const int high_depth = bit_depth > 8;
+
+if (high_depth)
+return;
+
+if (chroma_format_idc <= 1) {
+h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon;
+h->pred8x8[HOR_PRED8x8  ] = ff_pred8x8_hor_neon;
+if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
+h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
+if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_

[FFmpeg-cvslog] arm: use a local label instead of the function symbol in ff_prefetch_arm

2015-07-20 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Jul 20 
10:46:15 2015 +0200| [9ed6f9a17cc1f7d3699a1223783dadc1ee222069] | committer: 
Janne Grunau

arm: use a local label instead of the function symbol in ff_prefetch_arm

Avoids a relocation which might end out of range for thumb2.

Reported-By: Ludovic Fauvet 
Bug-Id: https://bugs.webkit.org/show_bug.cgi?id=137022
CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9ed6f9a17cc1f7d3699a1223783dadc1ee222069
---

 libavcodec/arm/videodsp_armv5te.S |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/arm/videodsp_armv5te.S 
b/libavcodec/arm/videodsp_armv5te.S
index bbd0a61..0510019 100644
--- a/libavcodec/arm/videodsp_armv5te.S
+++ b/libavcodec/arm/videodsp_armv5te.S
@@ -23,9 +23,10 @@
 #include "libavutil/arm/asm.S"
 
 function ff_prefetch_arm, export=1
+1:
 subsr2,  r2,  #1
 pld [r0]
 add r0,  r0,  r1
-bne X(ff_prefetch_arm)
+bne 1b
 bx  lr
 endfunc

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] checkasm: remove empty array initializer list in h264pred test

2015-07-22 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Mon Jul 20 
23:17:57 2015 +0200| [e605bf3b590d295f215fcc9fd58eb11be55b68cb] | committer: 
Janne Grunau

checkasm: remove empty array initializer list in h264pred test

Fixes MSVC compilation.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e605bf3b590d295f215fcc9fd58eb11be55b68cb
---

 tests/checkasm/h264pred.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c
index a5eb033..40e949a 100644
--- a/tests/checkasm/h264pred.c
+++ b/tests/checkasm/h264pred.c
@@ -87,8 +87,7 @@ static const char * const pred8x8_modes[4][11] = {
 [LEFT_DC_PRED8x8] = "left_dc_rv40",
 [TOP_DC_PRED8x8 ] = "top_dc_rv40",
 },
-{ /* SVQ3 */
-},
+/* nothing for SVQ3 */
 };
 
 static const char * const pred16x16_modes[4][9] = {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] ac3_parser: add required padding for GetBitContext buffer

2015-08-20 Thread Janne Grunau
ffmpeg | branch: release/2.4 | Janne Grunau  | Mon Jun  
8 14:48:26 2015 +0200| [a9f108bd78e842a47ade2f7c8b22a1764d01d4e6] | committer: 
Janne Grunau

ac3_parser: add required padding for GetBitContext buffer

Fixes stack buffer overflow errors detected by address sanitizer in
various fate tests.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a9f108bd78e842a47ade2f7c8b22a1764d01d4e6
---

 libavcodec/ac3_parser.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c
index 5ea09f8..69d88c1 100644
--- a/libavcodec/ac3_parser.c
+++ b/libavcodec/ac3_parser.c
@@ -150,7 +150,7 @@ static int ac3_sync(uint64_t state, AACAC3ParseContext 
*hdr_info,
 int err;
 union {
 uint64_t u64;
-uint8_t  u8[8];
+uint8_t  u8[8 + FF_INPUT_BUFFER_PADDING_SIZE];
 } tmp = { av_be2ne64(state) };
 AC3HeaderInfo hdr;
 GetBitContext gbc;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aac_parser: add required padding for GetBitContext buffer

2015-08-20 Thread Janne Grunau
ffmpeg | branch: release/2.4 | Janne Grunau  | Mon Jun  
8 14:45:12 2015 +0200| [02477323b92aacdabe0a2d129eeb0c15fbd1ec9e] | committer: 
Janne Grunau

aac_parser: add required padding for GetBitContext buffer

Fixes stack buffer overflow errors detected by address sanitizer in
various fate tests.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=02477323b92aacdabe0a2d129eeb0c15fbd1ec9e
---

 libavcodec/aac_parser.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c
index fdaa5f8..acb05d4 100644
--- a/libavcodec/aac_parser.c
+++ b/libavcodec/aac_parser.c
@@ -34,7 +34,7 @@ static int aac_sync(uint64_t state, AACAC3ParseContext 
*hdr_info,
 int size;
 union {
 uint64_t u64;
-uint8_t  u8[8];
+uint8_t  u8[8 + FF_INPUT_BUFFER_PADDING_SIZE];
 } tmp;
 
 tmp.u64 = av_be2ne64(state);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] imc: add required padding for GetBitContext buffer

2015-08-20 Thread Janne Grunau
ffmpeg | branch: release/2.4 | Janne Grunau  | Mon Jun  
8 14:48:54 2015 +0200| [aa3ec219e1a5cc0e96ddec6ea83312ec780448f5] | committer: 
Janne Grunau

imc: add required padding for GetBitContext buffer

Fixes stack buffer overflow errors detected by address sanitizer in
fate-imc.

CC: libav-sta...@libav.org

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aa3ec219e1a5cc0e96ddec6ea83312ec780448f5
---

 libavcodec/imc.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 500f564..26fbcd4 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -997,7 +997,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void 
*data,
 
 IMCContext *q = avctx->priv_data;
 
-LOCAL_ALIGNED_16(uint16_t, buf16, [IMC_BLOCK_SIZE / 2]);
+LOCAL_ALIGNED_16(uint16_t, buf16, [(IMC_BLOCK_SIZE + 
FF_INPUT_BUFFER_PADDING_SIZE) / 2]);
 
 if (buf_size < IMC_BLOCK_SIZE * avctx->channels) {
 av_log(avctx, AV_LOG_ERROR, "frame too small!\n");

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: test only demuxing in asf-repldata

2015-08-24 Thread Janne Grunau
ffmpeg | branch: master | Janne Grunau  | Thu Aug 20 
14:46:42 2015 +0200| [faa3f17a76333b672ce4a40cf80f678ab68bdbae] | committer: 
Janne Grunau

fate: test only demuxing in asf-repldata

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=faa3f17a76333b672ce4a40cf80f678ab68bdbae
---

 tests/fate/microsoft.mak|6 ++
 tests/ref/fate/asf-repldata |   28 ++--
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/tests/fate/microsoft.mak b/tests/fate/microsoft.mak
index 6f83d2e..30bd35a 100644
--- a/tests/fate/microsoft.mak
+++ b/tests/fate/microsoft.mak
@@ -62,7 +62,5 @@ fate-vc1-ism: CMD = framecrc -i 
$(TARGET_SAMPLES)/isom/vc1-wmapro.ism -an
 FATE_SAMPLES_AVCONV-$(CONFIG_VC1_DECODER) += $(FATE_VC1-yes)
 fate-vc1: $(FATE_VC1-yes)
 
-FATE_ASF_REPLDATA += fate-asf-repldata
-fate-asf-repldata: CMD = framecrc -i $(TARGET_SAMPLES)/asf/bug821-2.asf
-
-FATE_SAMPLES_AVCONV-$(call DEMDEC, ASF, MPEG4) += $(FATE_ASF_REPLDATA)
+FATE_SAMPLES_AVCONV-$(CONFIG_ASF_DEMUXER) += fate-asf-repldata
+fate-asf-repldata: CMD = framecrc -i $(TARGET_SAMPLES)/asf/bug821-2.asf -c copy
diff --git a/tests/ref/fate/asf-repldata b/tests/ref/fate/asf-repldata
index 6a1dbae..5abc9da 100644
--- a/tests/ref/fate/asf-repldata
+++ b/tests/ref/fate/asf-repldata
@@ -1,15 +1,15 @@
 #tb 0: 1/1000
-0,  0,  0,0,   460800, 0x85b053af
-0,122,122,0,   460800, 0xfbdbb291
-0,245,245,0,   460800, 0x1389b3d9
-0,367,367,0,   460800, 0xc81e8326
-0,490,490,0,   460800, 0xb0028333
-0,612,612,0,   460800, 0x002b8619
-0,735,735,0,   460800, 0xeb808d70
-0,857,857,0,   460800, 0xe8288d27
-0,980,980,0,   460800, 0xcfbe8bcc
-0,   1102,   1102,0,   460800, 0x682b8d38
-0,   1224,   1224,0,   460800, 0x5879cd04
-0,   1347,   1347,0,   460800, 0x88a6a3e6
-0,   1469,   1469,0,   460800, 0xc491db72
-0,   1592,   1592,0,   460800, 0xb779d5cc
+0,  0,  0,0,23374, 0x8725b3b8
+0,122,122,0,13732, 0x3ac8531a
+0,245,245,0,  615, 0xd31641b4
+0,367,367,0, 6361, 0xf263af54
+0,490,490,0,  320, 0xd6f2d6b8
+0,612,612,0, 3750, 0xfcf1d501
+0,735,735,0, 2541, 0xd9fc04f9
+0,857,857,0,  205, 0x4d38a947
+0,980,980,0, 2166, 0x2f1e7d74
+0,   1102,   1102,0, 1667, 0x0cd84b61
+0,   1224,   1224,0,13645, 0x543bd032
+0,   1347,   1347,0, 5953, 0xc3037c73
+0,   1469,   1469,0,36169, 0xca9f716d
+0,   1592,   1592,0, 3030, 0x9aba5683

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog