Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.
On 10/16/2015 3:04 AM, Matt Oliver wrote: > New patch attached. The HAVE_FAST_CLZ check is back and the intel functions > now apply to both icl and icc. Should be ok. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.
On 16 October 2015 at 16:13, James Almerwrote: > On 10/15/2015 10:26 PM, Matt Oliver wrote: > > On 16 October 2015 at 01:10, Matt Oliver wrote: > > > >> > Some of the existing msvc/icl optimised versions for ctz and log2 use > >> > intrinsics that are only usable on x86 systems. This patch moves them > from > >> > intmath.h to x86/intmath.h. This is technically where such functions > should > >> > go but this will also fix any issues with trying to use msvc to > compile arm > >> > code. > >> > > > Minot tweaks as per James' suggestions in other thread > > > > > > 0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch > > > > > > From c4ad534161d985370cd312a1514f04d41bc80432 Mon Sep 17 00:00:00 2001 > > From: Matt Oliver > > Date: Fri, 16 Oct 2015 12:06:38 +1100 > > Subject: [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to > x86 > > specific header. > > > > Signed-off-by: Matt Oliver > > --- > > libavutil/intmath.h | 34 ++ > > libavutil/x86/intmath.h | 23 +++ > > 2 files changed, 25 insertions(+), 32 deletions(-) > > > > diff --git a/libavutil/intmath.h b/libavutil/intmath.h > > index 5a55123..5a10d02 100644 > > --- a/libavutil/intmath.h > > +++ b/libavutil/intmath.h > > @@ -34,14 +34,7 @@ > > #endif > > > > #if HAVE_FAST_CLZ > > -#if defined( __INTEL_COMPILER ) > > -#ifndef ff_log2 > > -# define ff_log2(x) (_bit_scan_reverse((x)|1)) > > -# ifndef ff_log2_16bit > > -# define ff_log2_16bit av_log2 > > -# endif > > -#endif /* ff_log2 */ > > -#elif AV_GCC_VERSION_AT_LEAST(3,4) > > +#if AV_GCC_VERSION_AT_LEAST(3,4) > > #ifndef ff_log2 > > # define ff_log2(x) (31 - __builtin_clz((x)|1)) > > # ifndef ff_log2_16bit > > @@ -55,7 +48,6 @@ extern const uint8_t ff_log2_tab[256]; > > > > #ifndef ff_log2 > > #define ff_log2 ff_log2_c > > -#if !defined( _MSC_VER ) > > static av_always_inline av_const int ff_log2_c(unsigned int v) > > { > > int n = 0; > > @@ -71,15 +63,6 @@ static av_always_inline av_const int > ff_log2_c(unsigned int v) > > > > return n; > > } > > -#else > > -static av_always_inline av_const int ff_log2_c(unsigned int v) > > -{ > > -unsigned long n; > > -_BitScanReverse(, v|1); > > -return n; > > -} > > -#define ff_log2_16bit av_log2 > > -#endif > > #endif > > > > #ifndef ff_log2_16bit > > @@ -106,11 +89,7 @@ static av_always_inline av_const int > ff_log2_16bit_c(unsigned int v) > > */ > > > > #if HAVE_FAST_CLZ > > -#if defined( __INTEL_COMPILER ) > > -#ifndef ff_ctz > > -#define ff_ctz(v) _bit_scan_forward(v) > > -#endif > > -#elif AV_GCC_VERSION_AT_LEAST(3,4) > > +#if AV_GCC_VERSION_AT_LEAST(3,4) > > #ifndef ff_ctz > > #define ff_ctz(v) __builtin_ctz(v) > > #endif > > @@ -128,7 +107,6 @@ static av_always_inline av_const int > ff_log2_16bit_c(unsigned int v) > > * @param v input value. If v is 0, the result is undefined. > > * @return the number of trailing 0-bits > > */ > > -#if !defined( _MSC_VER ) > > /* We use the De-Bruijn method outlined in: > > * http://supertech.csail.mit.edu/papers/debruijn.pdf. */ > > static av_always_inline av_const int ff_ctz_c(int v) > > @@ -139,14 +117,6 @@ static av_always_inline av_const int ff_ctz_c(int v) > > }; > > return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27]; > > } > > -#else > > -static av_always_inline av_const int ff_ctz_c( int v ) > > -{ > > -unsigned long c; > > -_BitScanForward(, v); > > -return c; > > -} > > -#endif > > #endif > > > > #ifndef ff_ctzll > > diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h > > index f7fb9b6..c7e6d38 100644 > > --- a/libavutil/x86/intmath.h > > +++ b/libavutil/x86/intmath.h > > @@ -61,6 +61,29 @@ static av_always_inline av_const unsigned > av_mod_uintp2_bmi2(unsigned a, unsigne > > #endif /* __BMI2__ */ > > > > #elif defined(_MSC_VER) > > +#ifdef __INTEL_COMPILER > > +#define ff_log2(x) (_bit_scan_reverse((x)|1)) > > This used to apply to both ICC and ICL, but now it will only apply to ICL > since it's inside the _MSC_VER check. > Seeing that ff_log2 using the gcc builtin is a bit more complex than using > the Intel bit_scan_reverse intrinsic, it would be nice to let ICC keep > using > the latter. > > Also, you should also keep the HAVE_FAST_CLZ check. > > > +#else > > +#define ff_log2 ff_log2_x86 > > +static av_always_inline av_const int ff_log2_x86(unsigned int v) { > > +unsigned long n; > > +_BitScanReverse(, v | 1); > > +return n; > > +} > > +#endif > > +#define ff_log2_16bit av_log2 > > + > > +#ifdef __INTEL_COMPILER > > +#define ff_ctz(v) _bit_scan_forward(v) > > +#else > > +#define ff_ctz ff_ctz_x86 > > +static av_always_inline av_const int ff_ctz_x86(int v) { > > +unsigned long c; > > +_BitScanForward(, v); > > +return c; > > +} > > +#endif > > + > > #define ff_ctzll ff_ctzll_x86 > > static
[FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.
Some of the existing msvc/icl optimised versions for ctz and log2 use intrinsics that are only usable on x86 systems. This patch moves them from intmath.h to x86/intmath.h. This is technically where such functions should go but this will also fix any issues with trying to use msvc to compile arm code. 0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch Description: Binary data ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.
On 16 October 2015 at 01:10, Matt Oliverwrote: > Some of the existing msvc/icl optimised versions for ctz and log2 use > intrinsics that are only usable on x86 systems. This patch moves them from > intmath.h to x86/intmath.h. This is technically where such functions should > go but this will also fix any issues with trying to use msvc to compile arm > code. > Minot tweaks as per James' suggestions in other thread 0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch Description: Binary data ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.
On 10/15/2015 10:26 PM, Matt Oliver wrote: > On 16 October 2015 at 01:10, Matt Oliverwrote: > >> > Some of the existing msvc/icl optimised versions for ctz and log2 use >> > intrinsics that are only usable on x86 systems. This patch moves them from >> > intmath.h to x86/intmath.h. This is technically where such functions should >> > go but this will also fix any issues with trying to use msvc to compile arm >> > code. >> > > Minot tweaks as per James' suggestions in other thread > > > 0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch > > > From c4ad534161d985370cd312a1514f04d41bc80432 Mon Sep 17 00:00:00 2001 > From: Matt Oliver > Date: Fri, 16 Oct 2015 12:06:38 +1100 > Subject: [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 > specific header. > > Signed-off-by: Matt Oliver > --- > libavutil/intmath.h | 34 ++ > libavutil/x86/intmath.h | 23 +++ > 2 files changed, 25 insertions(+), 32 deletions(-) > > diff --git a/libavutil/intmath.h b/libavutil/intmath.h > index 5a55123..5a10d02 100644 > --- a/libavutil/intmath.h > +++ b/libavutil/intmath.h > @@ -34,14 +34,7 @@ > #endif > > #if HAVE_FAST_CLZ > -#if defined( __INTEL_COMPILER ) > -#ifndef ff_log2 > -# define ff_log2(x) (_bit_scan_reverse((x)|1)) > -# ifndef ff_log2_16bit > -# define ff_log2_16bit av_log2 > -# endif > -#endif /* ff_log2 */ > -#elif AV_GCC_VERSION_AT_LEAST(3,4) > +#if AV_GCC_VERSION_AT_LEAST(3,4) > #ifndef ff_log2 > # define ff_log2(x) (31 - __builtin_clz((x)|1)) > # ifndef ff_log2_16bit > @@ -55,7 +48,6 @@ extern const uint8_t ff_log2_tab[256]; > > #ifndef ff_log2 > #define ff_log2 ff_log2_c > -#if !defined( _MSC_VER ) > static av_always_inline av_const int ff_log2_c(unsigned int v) > { > int n = 0; > @@ -71,15 +63,6 @@ static av_always_inline av_const int ff_log2_c(unsigned > int v) > > return n; > } > -#else > -static av_always_inline av_const int ff_log2_c(unsigned int v) > -{ > -unsigned long n; > -_BitScanReverse(, v|1); > -return n; > -} > -#define ff_log2_16bit av_log2 > -#endif > #endif > > #ifndef ff_log2_16bit > @@ -106,11 +89,7 @@ static av_always_inline av_const int > ff_log2_16bit_c(unsigned int v) > */ > > #if HAVE_FAST_CLZ > -#if defined( __INTEL_COMPILER ) > -#ifndef ff_ctz > -#define ff_ctz(v) _bit_scan_forward(v) > -#endif > -#elif AV_GCC_VERSION_AT_LEAST(3,4) > +#if AV_GCC_VERSION_AT_LEAST(3,4) > #ifndef ff_ctz > #define ff_ctz(v) __builtin_ctz(v) > #endif > @@ -128,7 +107,6 @@ static av_always_inline av_const int > ff_log2_16bit_c(unsigned int v) > * @param v input value. If v is 0, the result is undefined. > * @return the number of trailing 0-bits > */ > -#if !defined( _MSC_VER ) > /* We use the De-Bruijn method outlined in: > * http://supertech.csail.mit.edu/papers/debruijn.pdf. */ > static av_always_inline av_const int ff_ctz_c(int v) > @@ -139,14 +117,6 @@ static av_always_inline av_const int ff_ctz_c(int v) > }; > return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27]; > } > -#else > -static av_always_inline av_const int ff_ctz_c( int v ) > -{ > -unsigned long c; > -_BitScanForward(, v); > -return c; > -} > -#endif > #endif > > #ifndef ff_ctzll > diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h > index f7fb9b6..c7e6d38 100644 > --- a/libavutil/x86/intmath.h > +++ b/libavutil/x86/intmath.h > @@ -61,6 +61,29 @@ static av_always_inline av_const unsigned > av_mod_uintp2_bmi2(unsigned a, unsigne > #endif /* __BMI2__ */ > > #elif defined(_MSC_VER) > +#ifdef __INTEL_COMPILER > +#define ff_log2(x) (_bit_scan_reverse((x)|1)) This used to apply to both ICC and ICL, but now it will only apply to ICL since it's inside the _MSC_VER check. Seeing that ff_log2 using the gcc builtin is a bit more complex than using the Intel bit_scan_reverse intrinsic, it would be nice to let ICC keep using the latter. Also, you should also keep the HAVE_FAST_CLZ check. > +#else > +#define ff_log2 ff_log2_x86 > +static av_always_inline av_const int ff_log2_x86(unsigned int v) { > +unsigned long n; > +_BitScanReverse(, v | 1); > +return n; > +} > +#endif > +#define ff_log2_16bit av_log2 > + > +#ifdef __INTEL_COMPILER > +#define ff_ctz(v) _bit_scan_forward(v) > +#else > +#define ff_ctz ff_ctz_x86 > +static av_always_inline av_const int ff_ctz_x86(int v) { > +unsigned long c; > +_BitScanForward(, v); > +return c; > +} > +#endif > + > #define ff_ctzll ff_ctzll_x86 > static av_always_inline av_const int ff_ctzll_x86(long long v) > { > -- 1.9.5.github.0 > > > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org