On Mon, Oct 19, 2015 at 4:00 PM, Timothy Gu wrote:
> About 16% faster on large clips (>1200px width), more than 2x slower on small
> clips
> (352px). So using a heuristic to select with one to use.
What system, what compiler, etc? Without any such information, numbers
are meaningless. Please either give them in full, or not at all -
particularly here since there is this "voodoo" threshold that needs to
be picked.
> ---
> libavcodec/huffyuvenc.c| 6 +++---
> libavcodec/huffyuvencdsp.c | 4 ++--
> libavcodec/huffyuvencdsp.h | 4 ++--
> libavcodec/pngenc.c| 2 +-
> libavcodec/utvideoenc.c| 2 +-
> libavcodec/x86/huffyuvencdsp.asm | 5 +
> libavcodec/x86/huffyuvencdsp_mmx.c | 9 -
> 7 files changed, 22 insertions(+), 10 deletions(-)
>
> diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
> index 49d711a..7e133b5 100644
> --- a/libavcodec/huffyuvenc.c
> +++ b/libavcodec/huffyuvenc.c
> @@ -60,12 +60,12 @@ static inline int sub_left_prediction(HYuvContext *s,
> uint8_t *dst,
> }
> return left;
> } else {
> -for (i = 0; i < 16; i++) {
> +for (i = 0; i < 32; i++) {
> const int temp = src[i];
> dst[i] = temp - left;
> left = temp;
> }
> -s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
> +s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32);
> return src[w-1];
> }
> } else {
> @@ -217,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
> const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
>
> ff_huffyuv_common_init(avctx);
> -ff_huffyuvencdsp_init(>hencdsp);
> +ff_huffyuvencdsp_init(>hencdsp, s->width);
>
> avctx->extradata = av_mallocz(3*MAX_N + 4);
> if (s->flags_CODEC_FLAG_PASS1) {
> diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c
> index fdcd0b0..08bfd63 100644
> --- a/libavcodec/huffyuvencdsp.c
> +++ b/libavcodec/huffyuvencdsp.c
> @@ -74,11 +74,11 @@ static void sub_hfyu_median_pred_c(uint8_t *dst, const
> uint8_t *src1,
> *left_top = lt;
> }
>
> -av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c)
> +av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w)
> {
> c->diff_bytes = diff_bytes_c;
> c->sub_hfyu_median_pred = sub_hfyu_median_pred_c;
>
> if (ARCH_X86)
> -ff_huffyuvencdsp_init_x86(c);
> +ff_huffyuvencdsp_init_x86(c, w);
> }
> diff --git a/libavcodec/huffyuvencdsp.h b/libavcodec/huffyuvencdsp.h
> index 9d09095..d66590b 100644
> --- a/libavcodec/huffyuvencdsp.h
> +++ b/libavcodec/huffyuvencdsp.h
> @@ -35,7 +35,7 @@ typedef struct HuffYUVEncDSPContext {
> int *left, int *left_top);
> } HuffYUVEncDSPContext;
>
> -void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c);
> -void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c);
> +void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w);
> +void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, int w);
>
> #endif /* AVCODEC_HUFFYUVENCDSP_H */
> diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
> index 4204df2..26cde92 100644
> --- a/libavcodec/pngenc.c
> +++ b/libavcodec/pngenc.c
> @@ -981,7 +981,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
> FF_ENABLE_DEPRECATION_WARNINGS
> #endif
>
> -ff_huffyuvencdsp_init(>hdsp);
> +ff_huffyuvencdsp_init(>hdsp, avctx->width);
>
> s->filter_type = av_clip(avctx->prediction_method,
> PNG_FILTER_VALUE_NONE,
> diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
> index b8e1cc3..4753cfa 100644
> --- a/libavcodec/utvideoenc.c
> +++ b/libavcodec/utvideoenc.c
> @@ -109,7 +109,7 @@ static av_cold int utvideo_encode_init(AVCodecContext
> *avctx)
> }
>
> ff_bswapdsp_init(>bdsp);
> -ff_huffyuvencdsp_init(>hdsp);
> +ff_huffyuvencdsp_init(>hdsp, avctx->width);
>
> /* Check the prediction method, and error out if unsupported */
> if (avctx->prediction_method < 0 || avctx->prediction_method > 4) {
> diff --git a/libavcodec/x86/huffyuvencdsp.asm
> b/libavcodec/x86/huffyuvencdsp.asm
> index 9625fbe..85a6616 100644
> --- a/libavcodec/x86/huffyuvencdsp.asm
> +++ b/libavcodec/x86/huffyuvencdsp.asm
> @@ -65,3 +65,8 @@ DIFF_BYTES
>
> INIT_XMM sse2
> DIFF_BYTES
> +
> +%if HAVE_AVX2_EXTERNAL
> +INIT_YMM avx2
> +DIFF_BYTES
> +%endif
> diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c
> b/libavcodec/x86/huffyuvencdsp_mmx.c
> index 9af5305..3eda0ba 100644
> --- a/libavcodec/x86/huffyuvencdsp_mmx.c
> +++ b/libavcodec/x86/huffyuvencdsp_mmx.c
> @@ -33,6 +33,8 @@ void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1,
> const uint8_t *src2,
> intptr_t w);
> void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t
> *src2,
>