Re: [libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function
On Thu, Jan 24, 2013 at 07:04:20PM +0100, Janne Grunau wrote: > The code was copied from per cpu extension init function so the checks > for supported extensions was overlooked. > --- > libavcodec/x86/h264_qpel.c | 147 > +++-- > 1 file changed, 75 insertions(+), 72 deletions(-) LGTM Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function
The code was copied from per cpu extension init function so the checks for supported extensions was overlooked. --- libavcodec/x86/h264_qpel.c | 147 +++-- 1 file changed, 75 insertions(+), 72 deletions(-) diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index bebf5a5..9157223 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -21,6 +21,7 @@ #include "libavutil/cpu.h" #include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" #include "libavcodec/dsputil.h" #include "libavcodec/h264qpel.h" #include "libavcodec/mpegvideo.h" @@ -530,89 +531,91 @@ QPEL16(mmxext) void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) { +#if HAVE_YASM int high_bit_depth = bit_depth > 8; int mm_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_EXTERNAL -if (!high_bit_depth) { -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); -SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); -SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); -SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); -SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); -SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); -} else if (bit_depth == 10) { +if (EXTERNAL_MMXEXT(mm_flags)) { +if (!high_bit_depth) { +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); +SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); +SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); +SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); +SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); +SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); +} else if (bit_depth == 10) { #if !ARCH_X86_64 -SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); -SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); -#endif -SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); -} +SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); +SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); #endif - -#if HAVE_SSE2_EXTERNAL -if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { -// these functions are slower than mmx on AMD, but faster on Intel -H264_QPEL_FUNCS(0, 0, sse2); -} - -if (!high_bit_depth) { -H264_QPEL_FUNCS(0, 1, sse2); -H264_QPEL_FUNCS(0, 2, sse2); -H264_QPEL_FUNCS(0, 3, sse2); -H264_QPEL_FUNCS(1, 1, sse2); -H264_QPEL_FUNCS(1, 2, sse2); -H264_QPEL_FUNCS(1, 3, sse2); -H264_QPEL_FUNCS(2, 1, sse2); -H264_QPEL_FUNCS(2, 2, sse2); -H264_QPEL_FUNCS(2, 3, sse2); -H264_QPEL_FUNCS(3, 1, sse2); -H264_QPEL_FUNCS(3, 2, sse2); -H264_QPEL_FUNCS(3, 3, sse2); +SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); +} } -if (bit_depth == 10) { -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); -SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); -H264_QPEL_FUNCS_10(1, 0, sse2_cache64); -H264_QPEL_FUNCS_10(2, 0, sse2_cache64); -H264_QPEL_FUNCS_10(3, 0, sse2_cache64); +if (EXTERNAL_SSE2(mm_flags)) { +if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { +// these functions are slower than mmx on AMD, but faster on Intel +H264_QPEL_FUNCS(0, 0, sse2); +} + +if (!high_bit_depth) { +H264_QPEL_FUNCS(0, 1, sse2); +H264_QPEL_FUNCS(0, 2, sse2); +H264_QPEL_FUNCS(0, 3, sse2); +H264_QPEL_FUNCS(1, 1, sse2); +H264_QPEL_FUNCS(1, 2, sse2); +H264_QPEL_FUNCS(1, 3, sse2); +H264_QPEL_FUNCS(2, 1, sse2); +H264_QPEL_FUNCS(2, 2, sse2); +H264_QPEL_FUNCS(2, 3, sse2); +H264_QPEL_FUNCS(3, 1, sse2); +H264_QPEL_FUNCS(3, 2, sse2); +H264_QPEL_FUNCS(3, 3, sse2); +} + +if (bit_depth == 10) { +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); +SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); +H264_QPEL_FUNCS_10(1, 0, sse2_cache64); +H264_QPEL_FUNCS_10(2, 0, sse2_cache64); +H264_QPEL_FUNCS_10(3, 0, sse2_cache64); +} } -#endif -#if HAVE_SSSE3_EXTERNAL -if (!h
Re: [libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function
On Thu, Jan 24, 2013 at 06:41:38PM +0100, Janne Grunau wrote: > The code was copied from per cpu extension init function so the checks > for supported extensions was overlooked. > --- > libavcodec/x86/h264_qpel.c | 139 > - > 1 file changed, 74 insertions(+), 65 deletions(-) > > --- a/libavcodec/x86/h264_qpel.c > +++ b/libavcodec/x86/h264_qpel.c > @@ -534,85 +535,93 @@ void ff_h264qpel_init_x86(H264QpelContext *c, int > bit_depth) > int mm_flags = av_get_cpu_flags(); > > #if HAVE_MMXEXT_EXTERNAL > -if (!high_bit_depth) { > -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); > -SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); > -} else if (bit_depth == 10) { > +if (EXTERNAL_MMXEXT(mm_flags)) { > +if (!high_bit_depth) { > +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); > +SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); All those #if are unneeded, you just need to wrap the body of the function in HAVE_YASM. Patch coming up ... Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function
Hi, On Thu, Jan 24, 2013 at 9:41 AM, Janne Grunau wrote: > The code was copied from per cpu extension init function so the checks > for supported extensions was overlooked. > --- > libavcodec/x86/h264_qpel.c | 139 > - > 1 file changed, 74 insertions(+), 65 deletions(-) Thanks, lgtm. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/1] x86: h264qpel: add cpu flag checks for init function
The code was copied from per cpu extension init function so the checks for supported extensions was overlooked. --- libavcodec/x86/h264_qpel.c | 139 - 1 file changed, 74 insertions(+), 65 deletions(-) diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index bebf5a5..a1c35bf 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -21,6 +21,7 @@ #include "libavutil/cpu.h" #include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" #include "libavcodec/dsputil.h" #include "libavcodec/h264qpel.h" #include "libavcodec/mpegvideo.h" @@ -534,85 +535,93 @@ void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) int mm_flags = av_get_cpu_flags(); #if HAVE_MMXEXT_EXTERNAL -if (!high_bit_depth) { -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); -SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); -SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); -SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); -SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); -SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); -} else if (bit_depth == 10) { +if (EXTERNAL_MMXEXT(mm_flags)) { +if (!high_bit_depth) { +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); +SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); +SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); +SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); +SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); +SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); +} else if (bit_depth == 10) { #if !ARCH_X86_64 -SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); -SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); +SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); #endif -SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); +SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); +} } #endif #if HAVE_SSE2_EXTERNAL -if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { -// these functions are slower than mmx on AMD, but faster on Intel -H264_QPEL_FUNCS(0, 0, sse2); -} - -if (!high_bit_depth) { -H264_QPEL_FUNCS(0, 1, sse2); -H264_QPEL_FUNCS(0, 2, sse2); -H264_QPEL_FUNCS(0, 3, sse2); -H264_QPEL_FUNCS(1, 1, sse2); -H264_QPEL_FUNCS(1, 2, sse2); -H264_QPEL_FUNCS(1, 3, sse2); -H264_QPEL_FUNCS(2, 1, sse2); -H264_QPEL_FUNCS(2, 2, sse2); -H264_QPEL_FUNCS(2, 3, sse2); -H264_QPEL_FUNCS(3, 1, sse2); -H264_QPEL_FUNCS(3, 2, sse2); -H264_QPEL_FUNCS(3, 3, sse2); -} - -if (bit_depth == 10) { -SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); -SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); -SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); -H264_QPEL_FUNCS_10(1, 0, sse2_cache64); -H264_QPEL_FUNCS_10(2, 0, sse2_cache64); -H264_QPEL_FUNCS_10(3, 0, sse2_cache64); +if (EXTERNAL_SSE2(mm_flags)) { +if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { +// these functions are slower than mmx on AMD, but faster on Intel +H264_QPEL_FUNCS(0, 0, sse2); +} + +if (!high_bit_depth) { +H264_QPEL_FUNCS(0, 1, sse2); +H264_QPEL_FUNCS(0, 2, sse2); +H264_QPEL_FUNCS(0, 3, sse2); +H264_QPEL_FUNCS(1, 1, sse2); +H264_QPEL_FUNCS(1, 2, sse2); +H264_QPEL_FUNCS(1, 3, sse2); +H264_QPEL_FUNCS(2, 1, sse2); +H264_QPEL_FUNCS(2, 2, sse2); +H264_QPEL_FUNCS(2, 3, sse2); +H264_QPEL_FUNCS(3, 1, sse2); +H264_QPEL_FUNCS(3, 2, sse2); +H264_QPEL_FUNCS(3, 3, sse2); +} + +if (bit_depth == 10) { +SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); +SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); +SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); +H264_QPEL_FUNCS_10(1, 0, sse2_cache64); +H264_QPEL_FUNCS_10(2, 0, sse2_cache64); +H264_QPEL_FUNCS_10(3, 0, sse2_cache64); +} } #endif #if HAVE_SSSE3_EXTERNAL -if (!high_bit_depth) { -H264_QPEL_FUNCS(1, 0, ssse3); -H264_QPEL_F