Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.

2015-10-16 Thread James Almer
On 10/16/2015 3:04 AM, Matt Oliver wrote:
> New patch attached. The HAVE_FAST_CLZ check is back and the intel functions
> now apply to both icl and icc.

Should be ok.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.

2015-10-16 Thread Matt Oliver
On 16 October 2015 at 16:13, James Almer  wrote:

> On 10/15/2015 10:26 PM, Matt Oliver wrote:
> > On 16 October 2015 at 01:10, Matt Oliver  wrote:
> >
> >> > Some of the existing msvc/icl optimised versions for ctz and log2 use
> >> > intrinsics that are only usable on x86 systems. This patch moves them
> from
> >> > intmath.h to x86/intmath.h. This is technically where such functions
> should
> >> > go but this will also fix any issues with trying to use msvc to
> compile arm
> >> > code.
> >> >
> > Minot tweaks as per James' suggestions in other thread
> >
> >
> > 0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch
> >
> >
> > From c4ad534161d985370cd312a1514f04d41bc80432 Mon Sep 17 00:00:00 2001
> > From: Matt Oliver 
> > Date: Fri, 16 Oct 2015 12:06:38 +1100
> > Subject: [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to
> x86
> >  specific header.
> >
> > Signed-off-by: Matt Oliver 
> > ---
> >  libavutil/intmath.h | 34 ++
> >  libavutil/x86/intmath.h | 23 +++
> >  2 files changed, 25 insertions(+), 32 deletions(-)
> >
> > diff --git a/libavutil/intmath.h b/libavutil/intmath.h
> > index 5a55123..5a10d02 100644
> > --- a/libavutil/intmath.h
> > +++ b/libavutil/intmath.h
> > @@ -34,14 +34,7 @@
> >  #endif
> >
> >  #if HAVE_FAST_CLZ
> > -#if defined( __INTEL_COMPILER )
> > -#ifndef ff_log2
> > -#   define ff_log2(x) (_bit_scan_reverse((x)|1))
> > -#   ifndef ff_log2_16bit
> > -#  define ff_log2_16bit av_log2
> > -#   endif
> > -#endif /* ff_log2 */
> > -#elif AV_GCC_VERSION_AT_LEAST(3,4)
> > +#if AV_GCC_VERSION_AT_LEAST(3,4)
> >  #ifndef ff_log2
> >  #   define ff_log2(x) (31 - __builtin_clz((x)|1))
> >  #   ifndef ff_log2_16bit
> > @@ -55,7 +48,6 @@ extern const uint8_t ff_log2_tab[256];
> >
> >  #ifndef ff_log2
> >  #define ff_log2 ff_log2_c
> > -#if !defined( _MSC_VER )
> >  static av_always_inline av_const int ff_log2_c(unsigned int v)
> >  {
> >  int n = 0;
> > @@ -71,15 +63,6 @@ static av_always_inline av_const int
> ff_log2_c(unsigned int v)
> >
> >  return n;
> >  }
> > -#else
> > -static av_always_inline av_const int ff_log2_c(unsigned int v)
> > -{
> > -unsigned long n;
> > -_BitScanReverse(, v|1);
> > -return n;
> > -}
> > -#define ff_log2_16bit av_log2
> > -#endif
> >  #endif
> >
> >  #ifndef ff_log2_16bit
> > @@ -106,11 +89,7 @@ static av_always_inline av_const int
> ff_log2_16bit_c(unsigned int v)
> >   */
> >
> >  #if HAVE_FAST_CLZ
> > -#if defined( __INTEL_COMPILER )
> > -#ifndef ff_ctz
> > -#define ff_ctz(v) _bit_scan_forward(v)
> > -#endif
> > -#elif AV_GCC_VERSION_AT_LEAST(3,4)
> > +#if AV_GCC_VERSION_AT_LEAST(3,4)
> >  #ifndef ff_ctz
> >  #define ff_ctz(v) __builtin_ctz(v)
> >  #endif
> > @@ -128,7 +107,6 @@ static av_always_inline av_const int
> ff_log2_16bit_c(unsigned int v)
> >   * @param v  input value. If v is 0, the result is undefined.
> >   * @return   the number of trailing 0-bits
> >   */
> > -#if !defined( _MSC_VER )
> >  /* We use the De-Bruijn method outlined in:
> >   * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
> >  static av_always_inline av_const int ff_ctz_c(int v)
> > @@ -139,14 +117,6 @@ static av_always_inline av_const int ff_ctz_c(int v)
> >  };
> >  return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27];
> >  }
> > -#else
> > -static av_always_inline av_const int ff_ctz_c( int v )
> > -{
> > -unsigned long c;
> > -_BitScanForward(, v);
> > -return c;
> > -}
> > -#endif
> >  #endif
> >
> >  #ifndef ff_ctzll
> > diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
> > index f7fb9b6..c7e6d38 100644
> > --- a/libavutil/x86/intmath.h
> > +++ b/libavutil/x86/intmath.h
> > @@ -61,6 +61,29 @@ static av_always_inline av_const unsigned
> av_mod_uintp2_bmi2(unsigned a, unsigne
> >  #endif /* __BMI2__ */
> >
> >  #elif defined(_MSC_VER)
> > +#ifdef __INTEL_COMPILER
> > +#define ff_log2(x) (_bit_scan_reverse((x)|1))
>
> This used to apply to both ICC and ICL, but now it will only apply to ICL
> since it's inside the _MSC_VER check.
> Seeing that ff_log2 using the gcc builtin is a bit more complex than using
> the Intel bit_scan_reverse intrinsic, it would be nice to let ICC keep
> using
> the latter.
>
> Also, you should also keep the HAVE_FAST_CLZ check.
>
> > +#else
> > +#define ff_log2 ff_log2_x86
> > +static av_always_inline av_const int ff_log2_x86(unsigned int v) {
> > +unsigned long n;
> > +_BitScanReverse(, v | 1);
> > +return n;
> > +}
> > +#endif
> > +#define ff_log2_16bit av_log2
> > +
> > +#ifdef __INTEL_COMPILER
> > +#define ff_ctz(v) _bit_scan_forward(v)
> > +#else
> > +#define ff_ctz ff_ctz_x86
> > +static av_always_inline av_const int ff_ctz_x86(int v) {
> > +unsigned long c;
> > +_BitScanForward(, v);
> > +return c;
> > +}
> > +#endif
> > +
> >  #define ff_ctzll ff_ctzll_x86
> >  static 

[FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.

2015-10-15 Thread Matt Oliver
Some of the existing msvc/icl optimised versions for ctz and log2 use
intrinsics that are only usable on x86 systems. This patch moves them from
intmath.h to x86/intmath.h. This is technically where such functions should
go but this will also fix any issues with trying to use msvc to compile arm
code.


0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.

2015-10-15 Thread Matt Oliver
On 16 October 2015 at 01:10, Matt Oliver  wrote:

> Some of the existing msvc/icl optimised versions for ctz and log2 use
> intrinsics that are only usable on x86 systems. This patch moves them from
> intmath.h to x86/intmath.h. This is technically where such functions should
> go but this will also fix any issues with trying to use msvc to compile arm
> code.
>

Minot tweaks as per James' suggestions in other thread


0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86 specific header.

2015-10-15 Thread James Almer
On 10/15/2015 10:26 PM, Matt Oliver wrote:
> On 16 October 2015 at 01:10, Matt Oliver  wrote:
> 
>> > Some of the existing msvc/icl optimised versions for ctz and log2 use
>> > intrinsics that are only usable on x86 systems. This patch moves them from
>> > intmath.h to x86/intmath.h. This is technically where such functions should
>> > go but this will also fix any issues with trying to use msvc to compile arm
>> > code.
>> >
> Minot tweaks as per James' suggestions in other thread
> 
> 
> 0002-lavu-intmath.h-Move-x86-only-msvc-icl-functions-to-x.patch
> 
> 
> From c4ad534161d985370cd312a1514f04d41bc80432 Mon Sep 17 00:00:00 2001
> From: Matt Oliver 
> Date: Fri, 16 Oct 2015 12:06:38 +1100
> Subject: [PATCH 2/2] lavu/intmath.h: Move x86 only msvc/icl functions to x86
>  specific header.
> 
> Signed-off-by: Matt Oliver 
> ---
>  libavutil/intmath.h | 34 ++
>  libavutil/x86/intmath.h | 23 +++
>  2 files changed, 25 insertions(+), 32 deletions(-)
> 
> diff --git a/libavutil/intmath.h b/libavutil/intmath.h
> index 5a55123..5a10d02 100644
> --- a/libavutil/intmath.h
> +++ b/libavutil/intmath.h
> @@ -34,14 +34,7 @@
>  #endif
>  
>  #if HAVE_FAST_CLZ
> -#if defined( __INTEL_COMPILER )
> -#ifndef ff_log2
> -#   define ff_log2(x) (_bit_scan_reverse((x)|1))
> -#   ifndef ff_log2_16bit
> -#  define ff_log2_16bit av_log2
> -#   endif
> -#endif /* ff_log2 */
> -#elif AV_GCC_VERSION_AT_LEAST(3,4)
> +#if AV_GCC_VERSION_AT_LEAST(3,4)
>  #ifndef ff_log2
>  #   define ff_log2(x) (31 - __builtin_clz((x)|1))
>  #   ifndef ff_log2_16bit
> @@ -55,7 +48,6 @@ extern const uint8_t ff_log2_tab[256];
>  
>  #ifndef ff_log2
>  #define ff_log2 ff_log2_c
> -#if !defined( _MSC_VER )
>  static av_always_inline av_const int ff_log2_c(unsigned int v)
>  {
>  int n = 0;
> @@ -71,15 +63,6 @@ static av_always_inline av_const int ff_log2_c(unsigned 
> int v)
>  
>  return n;
>  }
> -#else
> -static av_always_inline av_const int ff_log2_c(unsigned int v)
> -{
> -unsigned long n;
> -_BitScanReverse(, v|1);
> -return n;
> -}
> -#define ff_log2_16bit av_log2
> -#endif
>  #endif
>  
>  #ifndef ff_log2_16bit
> @@ -106,11 +89,7 @@ static av_always_inline av_const int 
> ff_log2_16bit_c(unsigned int v)
>   */
>  
>  #if HAVE_FAST_CLZ
> -#if defined( __INTEL_COMPILER )
> -#ifndef ff_ctz
> -#define ff_ctz(v) _bit_scan_forward(v)
> -#endif
> -#elif AV_GCC_VERSION_AT_LEAST(3,4)
> +#if AV_GCC_VERSION_AT_LEAST(3,4)
>  #ifndef ff_ctz
>  #define ff_ctz(v) __builtin_ctz(v)
>  #endif
> @@ -128,7 +107,6 @@ static av_always_inline av_const int 
> ff_log2_16bit_c(unsigned int v)
>   * @param v  input value. If v is 0, the result is undefined.
>   * @return   the number of trailing 0-bits
>   */
> -#if !defined( _MSC_VER )
>  /* We use the De-Bruijn method outlined in:
>   * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
>  static av_always_inline av_const int ff_ctz_c(int v)
> @@ -139,14 +117,6 @@ static av_always_inline av_const int ff_ctz_c(int v)
>  };
>  return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27];
>  }
> -#else
> -static av_always_inline av_const int ff_ctz_c( int v )
> -{
> -unsigned long c;
> -_BitScanForward(, v);
> -return c;
> -}
> -#endif
>  #endif
>  
>  #ifndef ff_ctzll
> diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
> index f7fb9b6..c7e6d38 100644
> --- a/libavutil/x86/intmath.h
> +++ b/libavutil/x86/intmath.h
> @@ -61,6 +61,29 @@ static av_always_inline av_const unsigned 
> av_mod_uintp2_bmi2(unsigned a, unsigne
>  #endif /* __BMI2__ */
>  
>  #elif defined(_MSC_VER)
> +#ifdef __INTEL_COMPILER
> +#define ff_log2(x) (_bit_scan_reverse((x)|1))

This used to apply to both ICC and ICL, but now it will only apply to ICL
since it's inside the _MSC_VER check.
Seeing that ff_log2 using the gcc builtin is a bit more complex than using
the Intel bit_scan_reverse intrinsic, it would be nice to let ICC keep using
the latter.

Also, you should also keep the HAVE_FAST_CLZ check.

> +#else
> +#define ff_log2 ff_log2_x86
> +static av_always_inline av_const int ff_log2_x86(unsigned int v) {
> +unsigned long n;
> +_BitScanReverse(, v | 1);
> +return n;
> +}
> +#endif
> +#define ff_log2_16bit av_log2
> +
> +#ifdef __INTEL_COMPILER
> +#define ff_ctz(v) _bit_scan_forward(v)
> +#else
> +#define ff_ctz ff_ctz_x86
> +static av_always_inline av_const int ff_ctz_x86(int v) {
> +unsigned long c;
> +_BitScanForward(, v);
> +return c;
> +}
> +#endif
> +
>  #define ff_ctzll ff_ctzll_x86
>  static av_always_inline av_const int ff_ctzll_x86(long long v)
>  {
> -- 1.9.5.github.0
> 
> 
> 
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org