Re: [libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function

2013-01-24 Thread Diego Biurrun
On Thu, Jan 24, 2013 at 07:04:20PM +0100, Janne Grunau wrote:
> The code was copied from per cpu extension init function so the checks
> for supported extensions was overlooked.
> ---
>  libavcodec/x86/h264_qpel.c | 147 
> +++--
>  1 file changed, 75 insertions(+), 72 deletions(-)

LGTM

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function

2013-01-24 Thread Janne Grunau
The code was copied from per cpu extension init function so the checks
for supported extensions was overlooked.
---
 libavcodec/x86/h264_qpel.c | 147 +++--
 1 file changed, 75 insertions(+), 72 deletions(-)

diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index bebf5a5..9157223 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/cpu.h"
 #include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
 #include "libavcodec/dsputil.h"
 #include "libavcodec/h264qpel.h"
 #include "libavcodec/mpegvideo.h"
@@ -530,89 +531,91 @@ QPEL16(mmxext)
 
 void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
 {
+#if HAVE_YASM
 int high_bit_depth = bit_depth > 8;
 int mm_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_EXTERNAL
-if (!high_bit_depth) {
-SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
-SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
-SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmxext, );
-SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
-SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
-SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmxext, );
-} else if (bit_depth == 10) {
+if (EXTERNAL_MMXEXT(mm_flags)) {
+if (!high_bit_depth) {
+SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
+SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
+SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmxext, );
+SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
+SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
+SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmxext, );
+} else if (bit_depth == 10) {
 #if !ARCH_X86_64
-SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
-SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
-SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_mmxext, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_mmxext, ff_);
-#endif
-SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
-}
+SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
+SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
+SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_mmxext, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_mmxext, ff_);
 #endif
-
-#if HAVE_SSE2_EXTERNAL
-if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
-// these functions are slower than mmx on AMD, but faster on Intel
-H264_QPEL_FUNCS(0, 0, sse2);
-}
-
-if (!high_bit_depth) {
-H264_QPEL_FUNCS(0, 1, sse2);
-H264_QPEL_FUNCS(0, 2, sse2);
-H264_QPEL_FUNCS(0, 3, sse2);
-H264_QPEL_FUNCS(1, 1, sse2);
-H264_QPEL_FUNCS(1, 2, sse2);
-H264_QPEL_FUNCS(1, 3, sse2);
-H264_QPEL_FUNCS(2, 1, sse2);
-H264_QPEL_FUNCS(2, 2, sse2);
-H264_QPEL_FUNCS(2, 3, sse2);
-H264_QPEL_FUNCS(3, 1, sse2);
-H264_QPEL_FUNCS(3, 2, sse2);
-H264_QPEL_FUNCS(3, 3, sse2);
+SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
+}
 }
 
-if (bit_depth == 10) {
-SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
-SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_sse2, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_sse2, ff_);
-H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
-H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
-H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
+if (EXTERNAL_SSE2(mm_flags)) {
+if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
+// these functions are slower than mmx on AMD, but faster on Intel
+H264_QPEL_FUNCS(0, 0, sse2);
+}
+
+if (!high_bit_depth) {
+H264_QPEL_FUNCS(0, 1, sse2);
+H264_QPEL_FUNCS(0, 2, sse2);
+H264_QPEL_FUNCS(0, 3, sse2);
+H264_QPEL_FUNCS(1, 1, sse2);
+H264_QPEL_FUNCS(1, 2, sse2);
+H264_QPEL_FUNCS(1, 3, sse2);
+H264_QPEL_FUNCS(2, 1, sse2);
+H264_QPEL_FUNCS(2, 2, sse2);
+H264_QPEL_FUNCS(2, 3, sse2);
+H264_QPEL_FUNCS(3, 1, sse2);
+H264_QPEL_FUNCS(3, 2, sse2);
+H264_QPEL_FUNCS(3, 3, sse2);
+}
+
+if (bit_depth == 10) {
+SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
+SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_sse2, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_sse2, ff_);
+H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
+H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
+H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
+}
 }
-#endif
 
-#if HAVE_SSSE3_EXTERNAL
-if (!h

Re: [libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function

2013-01-24 Thread Diego Biurrun
On Thu, Jan 24, 2013 at 06:41:38PM +0100, Janne Grunau wrote:
> The code was copied from per cpu extension init function so the checks
> for supported extensions was overlooked.
> ---
>  libavcodec/x86/h264_qpel.c | 139 
> -
>  1 file changed, 74 insertions(+), 65 deletions(-)
> 
> --- a/libavcodec/x86/h264_qpel.c
> +++ b/libavcodec/x86/h264_qpel.c
> @@ -534,85 +535,93 @@ void ff_h264qpel_init_x86(H264QpelContext *c, int 
> bit_depth)
>  int mm_flags = av_get_cpu_flags();
>  
>  #if HAVE_MMXEXT_EXTERNAL
> -if (!high_bit_depth) {
> -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
> -SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
> -} else if (bit_depth == 10) {
> +if (EXTERNAL_MMXEXT(mm_flags)) {
> +if (!high_bit_depth) {
> +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
> +SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );

All those #if are unneeded, you just need to wrap the body of the
function in HAVE_YASM.  Patch coming up ...

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function

2013-01-24 Thread Ronald S. Bultje
Hi,

On Thu, Jan 24, 2013 at 9:41 AM, Janne Grunau  wrote:
> The code was copied from per cpu extension init function so the checks
> for supported extensions was overlooked.
> ---
>  libavcodec/x86/h264_qpel.c | 139 
> -
>  1 file changed, 74 insertions(+), 65 deletions(-)

Thanks, lgtm.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function

2013-01-24 Thread Janne Grunau
The code was copied from per cpu extension init function so the checks
for supported extensions was overlooked.
---
 libavcodec/x86/h264_qpel.c | 139 -
 1 file changed, 74 insertions(+), 65 deletions(-)

diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index bebf5a5..a1c35bf 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/cpu.h"
 #include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
 #include "libavcodec/dsputil.h"
 #include "libavcodec/h264qpel.h"
 #include "libavcodec/mpegvideo.h"
@@ -534,85 +535,93 @@ void ff_h264qpel_init_x86(H264QpelContext *c, int 
bit_depth)
 int mm_flags = av_get_cpu_flags();
 
 #if HAVE_MMXEXT_EXTERNAL
-if (!high_bit_depth) {
-SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
-SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
-SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmxext, );
-SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
-SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
-SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmxext, );
-} else if (bit_depth == 10) {
+if (EXTERNAL_MMXEXT(mm_flags)) {
+if (!high_bit_depth) {
+SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
+SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
+SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmxext, );
+SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
+SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
+SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmxext, );
+} else if (bit_depth == 10) {
 #if !ARCH_X86_64
-SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
-SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
-SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_mmxext, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_mmxext, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
+SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
+SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_mmxext, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_mmxext, ff_);
 #endif
-SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
+SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
+}
 }
 #endif
 
 #if HAVE_SSE2_EXTERNAL
-if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
-// these functions are slower than mmx on AMD, but faster on Intel
-H264_QPEL_FUNCS(0, 0, sse2);
-}
-
-if (!high_bit_depth) {
-H264_QPEL_FUNCS(0, 1, sse2);
-H264_QPEL_FUNCS(0, 2, sse2);
-H264_QPEL_FUNCS(0, 3, sse2);
-H264_QPEL_FUNCS(1, 1, sse2);
-H264_QPEL_FUNCS(1, 2, sse2);
-H264_QPEL_FUNCS(1, 3, sse2);
-H264_QPEL_FUNCS(2, 1, sse2);
-H264_QPEL_FUNCS(2, 2, sse2);
-H264_QPEL_FUNCS(2, 3, sse2);
-H264_QPEL_FUNCS(3, 1, sse2);
-H264_QPEL_FUNCS(3, 2, sse2);
-H264_QPEL_FUNCS(3, 3, sse2);
-}
-
-if (bit_depth == 10) {
-SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
-SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_sse2, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
-SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_sse2, ff_);
-H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
-H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
-H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
+if (EXTERNAL_SSE2(mm_flags)) {
+if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
+// these functions are slower than mmx on AMD, but faster on Intel
+H264_QPEL_FUNCS(0, 0, sse2);
+}
+
+if (!high_bit_depth) {
+H264_QPEL_FUNCS(0, 1, sse2);
+H264_QPEL_FUNCS(0, 2, sse2);
+H264_QPEL_FUNCS(0, 3, sse2);
+H264_QPEL_FUNCS(1, 1, sse2);
+H264_QPEL_FUNCS(1, 2, sse2);
+H264_QPEL_FUNCS(1, 3, sse2);
+H264_QPEL_FUNCS(2, 1, sse2);
+H264_QPEL_FUNCS(2, 2, sse2);
+H264_QPEL_FUNCS(2, 3, sse2);
+H264_QPEL_FUNCS(3, 1, sse2);
+H264_QPEL_FUNCS(3, 2, sse2);
+H264_QPEL_FUNCS(3, 3, sse2);
+}
+
+if (bit_depth == 10) {
+SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
+SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_sse2, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
+SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_sse2, ff_);
+H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
+H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
+H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
+}
 }
 #endif
 
 #if HAVE_SSSE3_EXTERNAL
-if (!high_bit_depth) {
-H264_QPEL_FUNCS(1, 0, ssse3);
-H264_QPEL_F