On Mon, Oct 19, 2015 at 4:00 PM, Timothy Gu <timothyg...@gmail.com> wrote: > About 16% faster on large clips (>1200px width), more than 2x slower on small > clips > (352px). So using a heuristic to select with one to use.
What system, what compiler, etc? Without any such information, numbers are meaningless. Please either give them in full, or not at all - particularly here since there is this "voodoo" threshold that needs to be picked. > --- > libavcodec/huffyuvenc.c | 6 +++--- > libavcodec/huffyuvencdsp.c | 4 ++-- > libavcodec/huffyuvencdsp.h | 4 ++-- > libavcodec/pngenc.c | 2 +- > libavcodec/utvideoenc.c | 2 +- > libavcodec/x86/huffyuvencdsp.asm | 5 +++++ > libavcodec/x86/huffyuvencdsp_mmx.c | 9 ++++++++- > 7 files changed, 22 insertions(+), 10 deletions(-) > > diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c > index 49d711a..7e133b5 100644 > --- a/libavcodec/huffyuvenc.c > +++ b/libavcodec/huffyuvenc.c > @@ -60,12 +60,12 @@ static inline int sub_left_prediction(HYuvContext *s, > uint8_t *dst, > } > return left; > } else { > - for (i = 0; i < 16; i++) { > + for (i = 0; i < 32; i++) { > const int temp = src[i]; > dst[i] = temp - left; > left = temp; > } > - s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16); > + s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); > return src[w-1]; > } > } else { > @@ -217,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx) > const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); > > ff_huffyuv_common_init(avctx); > - ff_huffyuvencdsp_init(&s->hencdsp); > + ff_huffyuvencdsp_init(&s->hencdsp, s->width); > > avctx->extradata = av_mallocz(3*MAX_N + 4); > if (s->flags&AV_CODEC_FLAG_PASS1) { > diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c > index fdcd0b0..08bfd63 100644 > --- a/libavcodec/huffyuvencdsp.c > +++ b/libavcodec/huffyuvencdsp.c > @@ -74,11 +74,11 @@ static void sub_hfyu_median_pred_c(uint8_t *dst, const > uint8_t *src1, > *left_top = lt; > } > > -av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c) > +av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w) > { > c->diff_bytes = diff_bytes_c; > c->sub_hfyu_median_pred = sub_hfyu_median_pred_c; > > if (ARCH_X86) > - ff_huffyuvencdsp_init_x86(c); > + ff_huffyuvencdsp_init_x86(c, w); > } > diff --git a/libavcodec/huffyuvencdsp.h b/libavcodec/huffyuvencdsp.h > index 9d09095..d66590b 100644 > --- a/libavcodec/huffyuvencdsp.h > +++ b/libavcodec/huffyuvencdsp.h > @@ -35,7 +35,7 @@ typedef struct HuffYUVEncDSPContext { > int *left, int *left_top); > } HuffYUVEncDSPContext; > > -void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c); > -void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c); > +void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w); > +void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, int w); > > #endif /* AVCODEC_HUFFYUVENCDSP_H */ > diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c > index 4204df2..26cde92 100644 > --- a/libavcodec/pngenc.c > +++ b/libavcodec/pngenc.c > @@ -981,7 +981,7 @@ FF_DISABLE_DEPRECATION_WARNINGS > FF_ENABLE_DEPRECATION_WARNINGS > #endif > > - ff_huffyuvencdsp_init(&s->hdsp); > + ff_huffyuvencdsp_init(&s->hdsp, avctx->width); > > s->filter_type = av_clip(avctx->prediction_method, > PNG_FILTER_VALUE_NONE, > diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c > index b8e1cc3..4753cfa 100644 > --- a/libavcodec/utvideoenc.c > +++ b/libavcodec/utvideoenc.c > @@ -109,7 +109,7 @@ static av_cold int utvideo_encode_init(AVCodecContext > *avctx) > } > > ff_bswapdsp_init(&c->bdsp); > - ff_huffyuvencdsp_init(&c->hdsp); > + ff_huffyuvencdsp_init(&c->hdsp, avctx->width); > > /* Check the prediction method, and error out if unsupported */ > if (avctx->prediction_method < 0 || avctx->prediction_method > 4) { > diff --git a/libavcodec/x86/huffyuvencdsp.asm > b/libavcodec/x86/huffyuvencdsp.asm > index 9625fbe..85a6616 100644 > --- a/libavcodec/x86/huffyuvencdsp.asm > +++ b/libavcodec/x86/huffyuvencdsp.asm > @@ -65,3 +65,8 @@ DIFF_BYTES > > INIT_XMM sse2 > DIFF_BYTES > + > +%if HAVE_AVX2_EXTERNAL > +INIT_YMM avx2 > +DIFF_BYTES > +%endif > diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c > b/libavcodec/x86/huffyuvencdsp_mmx.c > index 9af5305..3eda0ba 100644 > --- a/libavcodec/x86/huffyuvencdsp_mmx.c > +++ b/libavcodec/x86/huffyuvencdsp_mmx.c > @@ -33,6 +33,8 @@ void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, > const uint8_t *src2, > intptr_t w); > void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, > intptr_t w); > +void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, > + intptr_t w); > > #if HAVE_INLINE_ASM > > @@ -78,7 +80,7 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const > uint8_t *src1, > > #endif /* HAVE_INLINE_ASM */ > > -av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c) > +av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, int w) > { > av_unused int cpu_flags = av_get_cpu_flags(); > > @@ -93,4 +95,9 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext > *c) > if (EXTERNAL_SSE2(cpu_flags)) { > c->diff_bytes = ff_diff_bytes_sse2; > } > + > + // avx2 version only faster than sse2 when width is sufficiently large > + if (EXTERNAL_AVX2(cpu_flags) && w > 1200) { > + c->diff_bytes = ff_diff_bytes_avx2; > + } > } > -- > 1.9.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel