Alan Kelly: > Broadwell and later have fast gather instructions. > --- > This is so that the avx2 version of ff_hscale8to15X which uses gather > instructions is only selected on machines where it will actually be > faster. > libavutil/cpu.c | 6 ++++++ > libavutil/cpu.h | 6 ++++++ > libavutil/cpu_internal.h | 1 + > libavutil/x86/cpu.c | 18 ++++++++++++++++++ > 4 files changed, 31 insertions(+) > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c > index 8960415d00..0a723eeb7a 100644 > --- a/libavutil/cpu.c > +++ b/libavutil/cpu.c > @@ -49,6 +49,12 @@ > > static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1); > > +int av_cpu_has_fast_gather(void){ > + if (ARCH_X86) > + return ff_cpu_has_fast_gather(); > + return 0; > +} > + > static int get_cpu_flags(void) > { > if (ARCH_MIPS) > diff --git a/libavutil/cpu.h b/libavutil/cpu.h > index b555422dae..faf3a221f4 100644 > --- a/libavutil/cpu.h > +++ b/libavutil/cpu.h > @@ -72,6 +72,7 @@ > #define AV_CPU_FLAG_MMI (1 << 0) > #define AV_CPU_FLAG_MSA (1 << 1) > > +int av_cpu_has_fast_gather(void); > /** > * Return the flags which specify extensions supported by the CPU. > * The returned value is affected by av_force_cpu_flags() if that was used > @@ -107,6 +108,11 @@ int av_cpu_count(void); > * av_set_cpu_flags_mask(), then this function will behave as if AVX is not > * present. > */ > + > +/** > + * Returns true if the cpu has fast gather instructions. > + * Broadwell and later cpus have fast gather > + */
You added the documentation to av_cpu_max_align(), not av_cpu_has_fast_gather(). > size_t av_cpu_max_align(void); > > #endif /* AVUTIL_CPU_H */ > diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h > index 889764320b..92525df0c1 100644 > --- a/libavutil/cpu_internal.h > +++ b/libavutil/cpu_internal.h > @@ -46,6 +46,7 @@ int ff_get_cpu_flags_aarch64(void); > int ff_get_cpu_flags_arm(void); > int ff_get_cpu_flags_ppc(void); > int ff_get_cpu_flags_x86(void); > +int ff_cpu_has_fast_gather(void); > > size_t ff_get_cpu_max_align_mips(void); > size_t ff_get_cpu_max_align_aarch64(void); > diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c > index bcd41a50a2..9724e0017b 100644 > --- a/libavutil/x86/cpu.c > +++ b/libavutil/x86/cpu.c > @@ -270,3 +270,21 @@ size_t ff_get_cpu_max_align_x86(void) > > return 8; > } > + > +int ff_cpu_has_fast_gather(void){ > + int eax, ebx, ecx; > + int max_std_level, std_caps = 0; > + int family = 0, model = 0; > + cpuid(0, max_std_level, ebx, ecx, std_caps); > + > + if (max_std_level >= 1) { > + cpuid(1, eax, ebx, ecx, std_caps); > + family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); > + model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0); > + // Broadwell and later > + if(family == 6 && model >= 70){ > + return 1; > + } > + } > + return 0; > +} > The usual way to signal things that a processor supports even if slow is by a CPU flag; see AV_CPU_FLAG_(AVX|SSE2|SSE3)SLOW. That way one also avoids adding a new public function that is completely useless when not on X86. - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".