Hi, On Sep 18, 2015 3:55 AM, "James Almer" <jamr...@gmail.com> wrote: > > Signed-off-by: James Almer <jamr...@gmail.com> > --- > Unbenched as i lack the hardware to do so. > > libavcodec/x86/vp9dsp_init_16bpp_template.c | 22 ++++++++++++++++++++++ > libavcodec/x86/vp9mc_16bpp.asm | 6 ++++++ > 2 files changed, 28 insertions(+) > > diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c > index 3e2737b..a1ce212 100644 > --- a/libavcodec/x86/vp9dsp_init_16bpp_template.c > +++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c > @@ -33,16 +33,31 @@ extern const int16_t ff_filters_16bpp[3][15][4][16]; > > decl_mc_funcs(4, sse2, int16_t, 16, BPC); > decl_mc_funcs(8, sse2, int16_t, 16, BPC); > +decl_mc_funcs(16, avx2, int16_t, 16, BPC); > > mc_rep_funcs(16, 8, 16, sse2, int16_t, 16, BPC); > mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC); > mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC); > +mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC); > +mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC); > > filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp) > filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp) > +filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp) > +filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp) > +filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp) > +filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp) > +filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp) > +filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp) > > filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp) > filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp) > +filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp) > +filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp) > +filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp) > +filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp) > +filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp) > +filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp) > > #endif /* HAVE_YASM */ > > @@ -56,6 +71,13 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp) > init_subpel3(1, avg, BPC, sse2); > } > > + if (EXTERNAL_AVX2(cpu_flags)) { > + init_subpel3_32_64(0, put, BPC, avx2); > + init_subpel3_32_64(1, avg, BPC, avx2); > + init_subpel2(2, 0, 16, put, BPC, avx2); > + init_subpel2(2, 1, 16, avg, BPC, avx2); > + } > + > #endif /* HAVE_YASM */ > > ff_vp9dsp_init_16bpp_x86(dsp); > diff --git a/libavcodec/x86/vp9mc_16bpp.asm b/libavcodec/x86/vp9mc_16bpp.asm > index 52fc5ee..d66da55 100644 > --- a/libavcodec/x86/vp9mc_16bpp.asm > +++ b/libavcodec/x86/vp9mc_16bpp.asm > @@ -201,6 +201,9 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride, > INIT_XMM sse2 > filter_h_fn put > filter_h_fn avg > +INIT_YMM avx2 > +filter_h_fn put > +filter_h_fn avg > > %macro filter_v4_fn 1-2 12 > %if ARCH_X86_64 > @@ -419,3 +422,6 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride, > INIT_XMM sse2 > filter_v_fn put > filter_v_fn avg > +INIT_YMM avx2 > +filter_v_fn put > +filter_v_fn avg > -- > 2.5.2
Cool, I was hoping that would work but don't have Intel's emulator; thanks for testing && lgtm. Ronald _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel