Re: [FFmpeg-devel] [PATCH V2 2/3] libavcodec/vaapi_encode: Change the way to call async to increase performance

2022-02-06 Thread Chen, Wenbin
> On Wed, 2022-01-05 at 10:48 +0800, Wenbin Chen wrote:
> > Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> > decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
> > called at the same time (vaRenderPicture() always followed by a
> > vaSyncBuffer()). When we encode stream with B frames, we need buffer to
> > reorder frames, so we can send serveral frames to HW at once to increase
> > performance. Now I changed them to be called in a asynchronous way,
> which
> > will make better use of hardware. 1080p transcoding increases about 17%
> > fps on my environment.
> >
> > This change fits vaSyncBuffer(), so if driver does not support
> > vaSyncBuffer, it will keep previous operation.
> >
> > Signed-off-by: Wenbin Chen 
> > ---
> >  libavcodec/vaapi_encode.c | 64 -
> --
> >  libavcodec/vaapi_encode.h |  5 +++
> >  2 files changed, 58 insertions(+), 11 deletions(-)
> >
> > diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> > index b87b58a42b..9a3b3ba4ad 100644
> > --- a/libavcodec/vaapi_encode.c
> > +++ b/libavcodec/vaapi_encode.c
> > @@ -984,8 +984,10 @@ static int
> vaapi_encode_pick_next(AVCodecContext *avctx,
> >  if (!pic && ctx->end_of_stream) {
> >  --b_counter;
> >  pic = ctx->pic_end;
> > -if (pic->encode_issued)
> > +if (pic->encode_complete)
> >  return AVERROR_EOF;
> > +else if (pic->encode_issued)
> > +return AVERROR(EAGAIN);
> >  }
> >
> >  if (!pic) {
> > @@ -1210,18 +1212,45 @@ int
> ff_vaapi_encode_receive_packet(AVCodecContext
> > *avctx, AVPacket *pkt)
> >  return AVERROR(EAGAIN);
> >  }
> >
> > -pic = NULL;
> > -err = vaapi_encode_pick_next(avctx, );
> > -if (err < 0)
> > -return err;
> > -av_assert0(pic);
> > +#if VA_CHECK_VERSION(1, 9, 0)
> > +if (ctx->has_sync_buffer_func) {
> > +while (av_fifo_size(ctx->encode_fifo) <=
> > +   MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
> > +pic = NULL;
> > +err = vaapi_encode_pick_next(avctx, );
> > +if (err < 0)
> > +break;
> > +
> > +av_assert0(pic);
> > +pic->encode_order = ctx->encode_order +
> > +(av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture
> > *));
> > +err = vaapi_encode_issue(avctx, pic);
> > +if (err < 0) {
> > +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > +return err;
> > +}
> > +av_fifo_generic_write(ctx->encode_fifo, , sizeof(pic), 
> > NULL);
> > +}
> > +if (!av_fifo_size(ctx->encode_fifo))
> > +return err;
> > +av_fifo_generic_read(ctx->encode_fifo, , sizeof(pic), NULL);
> > +ctx->encode_order = pic->encode_order + 1;
> > +} else
> > +#endif
> > +{
> > +pic = NULL;
> > +err = vaapi_encode_pick_next(avctx, );
> > +if (err < 0)
> > +return err;
> > +av_assert0(pic);
> >
> > -pic->encode_order = ctx->encode_order++;
> > +pic->encode_order = ctx->encode_order++;
> >
> > -err = vaapi_encode_issue(avctx, pic);
> > -if (err < 0) {
> > -av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > -return err;
> > +err = vaapi_encode_issue(avctx, pic);
> > +if (err < 0) {
> > +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > +return err;
> > +}
> >  }
> >
> >  err = vaapi_encode_output(avctx, pic, pkt);
> > @@ -2555,6 +2584,18 @@ av_cold int
> ff_vaapi_encode_init(AVCodecContext *avctx)
> >  }
> >  }
> >
> > +#if VA_CHECK_VERSION(1, 9, 0)
> > +//check vaSyncBuffer function
> > +vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
> > +if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> > +ctx->has_sync_buffer_func = 1;
> > +ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> > +sizeof(VAAPIEncodePicture *));
> > +if (!ctx->encode_fifo)
> > +return AVERROR(ENOMEM);
> > +}
> > +#endif
> > +
> >  return 0;
> >
> >  fail:
> > @@ -2592,6 +2633,7 @@ av_cold int
> ff_vaapi_encode_close(AVCodecContext *avctx)
> >
> >  av_freep(>codec_sequence_params);
> >  av_freep(>codec_picture_params);
> > +av_fifo_freep(>encode_fifo);
> >
> >  av_buffer_unref(>recon_frames_ref);
> >  av_buffer_unref(>input_frames_ref);
> > diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> > index b41604a883..560a1c42a9 100644
> > --- a/libavcodec/vaapi_encode.h
> > +++ b/libavcodec/vaapi_encode.h
> > @@ -29,6 +29,7 @@
> >
> >  #include "libavutil/hwcontext.h"
> >  #include "libavutil/hwcontext_vaapi.h"
> > +#include "libavutil/fifo.h"
> >
> >  #include "avcodec.h"
> >  #include "hwconfig.h"
> > 

Re: [FFmpeg-devel] [PATCH V2 2/3] libavcodec/vaapi_encode: Change the way to call async to increase performance

2022-02-06 Thread Xiang, Haihao
On Wed, 2022-01-05 at 10:48 +0800, Wenbin Chen wrote:
> Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
> called at the same time (vaRenderPicture() always followed by a
> vaSyncBuffer()). When we encode stream with B frames, we need buffer to
> reorder frames, so we can send serveral frames to HW at once to increase
> performance. Now I changed them to be called in a asynchronous way, which
> will make better use of hardware. 1080p transcoding increases about 17%
> fps on my environment.
> 
> This change fits vaSyncBuffer(), so if driver does not support
> vaSyncBuffer, it will keep previous operation.
> 
> Signed-off-by: Wenbin Chen 
> ---
>  libavcodec/vaapi_encode.c | 64 ---
>  libavcodec/vaapi_encode.h |  5 +++
>  2 files changed, 58 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index b87b58a42b..9a3b3ba4ad 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -984,8 +984,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
>  if (!pic && ctx->end_of_stream) {
>  --b_counter;
>  pic = ctx->pic_end;
> -if (pic->encode_issued)
> +if (pic->encode_complete)
>  return AVERROR_EOF;
> +else if (pic->encode_issued)
> +return AVERROR(EAGAIN);
>  }
>  
>  if (!pic) {
> @@ -1210,18 +1212,45 @@ int ff_vaapi_encode_receive_packet(AVCodecContext
> *avctx, AVPacket *pkt)
>  return AVERROR(EAGAIN);
>  }
>  
> -pic = NULL;
> -err = vaapi_encode_pick_next(avctx, );
> -if (err < 0)
> -return err;
> -av_assert0(pic);
> +#if VA_CHECK_VERSION(1, 9, 0)
> +if (ctx->has_sync_buffer_func) {
> +while (av_fifo_size(ctx->encode_fifo) <=
> +   MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
> +pic = NULL;
> +err = vaapi_encode_pick_next(avctx, );
> +if (err < 0)
> +break;
> +
> +av_assert0(pic);
> +pic->encode_order = ctx->encode_order +
> +(av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture
> *));
> +err = vaapi_encode_issue(avctx, pic);
> +if (err < 0) {
> +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> +return err;
> +}
> +av_fifo_generic_write(ctx->encode_fifo, , sizeof(pic), NULL);
> +}
> +if (!av_fifo_size(ctx->encode_fifo))
> +return err;
> +av_fifo_generic_read(ctx->encode_fifo, , sizeof(pic), NULL);
> +ctx->encode_order = pic->encode_order + 1;
> +} else
> +#endif
> +{
> +pic = NULL;
> +err = vaapi_encode_pick_next(avctx, );
> +if (err < 0)
> +return err;
> +av_assert0(pic);
>  
> -pic->encode_order = ctx->encode_order++;
> +pic->encode_order = ctx->encode_order++;
>  
> -err = vaapi_encode_issue(avctx, pic);
> -if (err < 0) {
> -av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> -return err;
> +err = vaapi_encode_issue(avctx, pic);
> +if (err < 0) {
> +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> +return err;
> +}
>  }
>  
>  err = vaapi_encode_output(avctx, pic, pkt);
> @@ -2555,6 +2584,18 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
>  }
>  }
>  
> +#if VA_CHECK_VERSION(1, 9, 0)
> +//check vaSyncBuffer function
> +vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
> +if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> +ctx->has_sync_buffer_func = 1;
> +ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> +sizeof(VAAPIEncodePicture *));
> +if (!ctx->encode_fifo)
> +return AVERROR(ENOMEM);
> +}
> +#endif
> +
>  return 0;
>  
>  fail:
> @@ -2592,6 +2633,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
>  
>  av_freep(>codec_sequence_params);
>  av_freep(>codec_picture_params);
> +av_fifo_freep(>encode_fifo);
>  
>  av_buffer_unref(>recon_frames_ref);
>  av_buffer_unref(>input_frames_ref);
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index b41604a883..560a1c42a9 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -29,6 +29,7 @@
>  
>  #include "libavutil/hwcontext.h"
>  #include "libavutil/hwcontext_vaapi.h"
> +#include "libavutil/fifo.h"
>  
>  #include "avcodec.h"
>  #include "hwconfig.h"
> @@ -345,6 +346,10 @@ typedef struct VAAPIEncodeContext {
>  int roi_warned;
>  
>  AVFrame *frame;
> +//Store buffered pic
> +AVFifoBuffer *encode_fifo;
> +//Whether the driver support vaSyncBuffer

Could you update 

Re: [FFmpeg-devel] [PATCH V2 2/3] libavcodec/vaapi_encode: Change the way to call async to increase performance

2022-01-06 Thread Andreas Rheinhardt
Chen, Wenbin:
>> Wenbin Chen:
>>> Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
>>> decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
>>> called at the same time (vaRenderPicture() always followed by a
>>> vaSyncBuffer()). When we encode stream with B frames, we need buffer to
>>> reorder frames, so we can send serveral frames to HW at once to increase
>>> performance. Now I changed them to be called in a asynchronous way,
>> which
>>> will make better use of hardware. 1080p transcoding increases about 17%
>>> fps on my environment.
>>>
>>> This change fits vaSyncBuffer(), so if driver does not support
>>> vaSyncBuffer, it will keep previous operation.
>>>
>>> Signed-off-by: Wenbin Chen 
>>> ---
>>>  libavcodec/vaapi_encode.c | 64 -
>> --
>>>  libavcodec/vaapi_encode.h |  5 +++
>>>  2 files changed, 58 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>>> index b87b58a42b..9a3b3ba4ad 100644
>>> --- a/libavcodec/vaapi_encode.c
>>> +++ b/libavcodec/vaapi_encode.c
>>> @@ -984,8 +984,10 @@ static int
>> vaapi_encode_pick_next(AVCodecContext *avctx,
>>>  if (!pic && ctx->end_of_stream) {
>>>  --b_counter;
>>>  pic = ctx->pic_end;
>>> -if (pic->encode_issued)
>>> +if (pic->encode_complete)
>>>  return AVERROR_EOF;
>>> +else if (pic->encode_issued)
>>> +return AVERROR(EAGAIN);
>>>  }
>>>
>>>  if (!pic) {
>>> @@ -1210,18 +1212,45 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>>>  return AVERROR(EAGAIN);
>>>  }
>>>
>>> -pic = NULL;
>>> -err = vaapi_encode_pick_next(avctx, );
>>> -if (err < 0)
>>> -return err;
>>> -av_assert0(pic);
>>> +#if VA_CHECK_VERSION(1, 9, 0)
>>> +if (ctx->has_sync_buffer_func) {
>>> +while (av_fifo_size(ctx->encode_fifo) <=
>>> +   MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
>>> +pic = NULL;
>>> +err = vaapi_encode_pick_next(avctx, );
>>> +if (err < 0)
>>> +break;
>>> +
>>> +av_assert0(pic);
>>> +pic->encode_order = ctx->encode_order +
>>> +(av_fifo_size(ctx->encode_fifo) / 
>>> sizeof(VAAPIEncodePicture *));
>>> +err = vaapi_encode_issue(avctx, pic);
>>> +if (err < 0) {
>>> +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
>>> +return err;
>>> +}
>>> +av_fifo_generic_write(ctx->encode_fifo, , sizeof(pic), 
>>> NULL);
>>> +}
>>> +if (!av_fifo_size(ctx->encode_fifo))
>>> +return err;
>>> +av_fifo_generic_read(ctx->encode_fifo, , sizeof(pic), NULL);
>>> +ctx->encode_order = pic->encode_order + 1;
>>> +} else
>>> +#endif
>>> +{
>>> +pic = NULL;
>>> +err = vaapi_encode_pick_next(avctx, );
>>> +if (err < 0)
>>> +return err;
>>> +av_assert0(pic);
>>>
>>> -pic->encode_order = ctx->encode_order++;
>>> +pic->encode_order = ctx->encode_order++;
>>>
>>> -err = vaapi_encode_issue(avctx, pic);
>>> -if (err < 0) {
>>> -av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
>>> -return err;
>>> +err = vaapi_encode_issue(avctx, pic);
>>> +if (err < 0) {
>>> +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
>>> +return err;
>>> +}
>>>  }
>>>
>>>  err = vaapi_encode_output(avctx, pic, pkt);
>>> @@ -2555,6 +2584,18 @@ av_cold int
>> ff_vaapi_encode_init(AVCodecContext *avctx)
>>>  }
>>>  }
>>>
>>> +#if VA_CHECK_VERSION(1, 9, 0)
>>> +//check vaSyncBuffer function
>>> +vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
>>> +if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
>>> +ctx->has_sync_buffer_func = 1;
>>> +ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
>>> +sizeof(VAAPIEncodePicture *));
>>> +if (!ctx->encode_fifo)
>>> +return AVERROR(ENOMEM);
>>> +}
>>> +#endif
>>> +
>>>  return 0;
>>>
>>>  fail:
>>> @@ -2592,6 +2633,7 @@ av_cold int
>> ff_vaapi_encode_close(AVCodecContext *avctx)
>>>
>>>  av_freep(>codec_sequence_params);
>>>  av_freep(>codec_picture_params);
>>> +av_fifo_freep(>encode_fifo);
>>
>> Is it guaranteed that the fifo is empty at this point? I don't think so.
> 
> I don't check the fifo size, because in ff_vaapi_encode_close() all pics
> are already freed and encode_fifo only buffer pic.
> ```
> for (pic = ctx->pic_start; pic; pic = next) {
> next = pic->next;
> vaapi_encode_free(avctx, pic);
> }
> ```
> 

Ok, seems like the FIFO does not have ownership of the pics. Alright then.

>>
>>>
>>>  av_buffer_unref(>recon_frames_ref);
>>>  

Re: [FFmpeg-devel] [PATCH V2 2/3] libavcodec/vaapi_encode: Change the way to call async to increase performance

2022-01-04 Thread Chen, Wenbin
> Wenbin Chen:
> > Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> > decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
> > called at the same time (vaRenderPicture() always followed by a
> > vaSyncBuffer()). When we encode stream with B frames, we need buffer to
> > reorder frames, so we can send serveral frames to HW at once to increase
> > performance. Now I changed them to be called in a asynchronous way,
> which
> > will make better use of hardware. 1080p transcoding increases about 17%
> > fps on my environment.
> >
> > This change fits vaSyncBuffer(), so if driver does not support
> > vaSyncBuffer, it will keep previous operation.
> >
> > Signed-off-by: Wenbin Chen 
> > ---
> >  libavcodec/vaapi_encode.c | 64 -
> --
> >  libavcodec/vaapi_encode.h |  5 +++
> >  2 files changed, 58 insertions(+), 11 deletions(-)
> >
> > diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> > index b87b58a42b..9a3b3ba4ad 100644
> > --- a/libavcodec/vaapi_encode.c
> > +++ b/libavcodec/vaapi_encode.c
> > @@ -984,8 +984,10 @@ static int
> vaapi_encode_pick_next(AVCodecContext *avctx,
> >  if (!pic && ctx->end_of_stream) {
> >  --b_counter;
> >  pic = ctx->pic_end;
> > -if (pic->encode_issued)
> > +if (pic->encode_complete)
> >  return AVERROR_EOF;
> > +else if (pic->encode_issued)
> > +return AVERROR(EAGAIN);
> >  }
> >
> >  if (!pic) {
> > @@ -1210,18 +1212,45 @@ int
> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> >  return AVERROR(EAGAIN);
> >  }
> >
> > -pic = NULL;
> > -err = vaapi_encode_pick_next(avctx, );
> > -if (err < 0)
> > -return err;
> > -av_assert0(pic);
> > +#if VA_CHECK_VERSION(1, 9, 0)
> > +if (ctx->has_sync_buffer_func) {
> > +while (av_fifo_size(ctx->encode_fifo) <=
> > +   MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
> > +pic = NULL;
> > +err = vaapi_encode_pick_next(avctx, );
> > +if (err < 0)
> > +break;
> > +
> > +av_assert0(pic);
> > +pic->encode_order = ctx->encode_order +
> > +(av_fifo_size(ctx->encode_fifo) / 
> > sizeof(VAAPIEncodePicture *));
> > +err = vaapi_encode_issue(avctx, pic);
> > +if (err < 0) {
> > +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > +return err;
> > +}
> > +av_fifo_generic_write(ctx->encode_fifo, , sizeof(pic), 
> > NULL);
> > +}
> > +if (!av_fifo_size(ctx->encode_fifo))
> > +return err;
> > +av_fifo_generic_read(ctx->encode_fifo, , sizeof(pic), NULL);
> > +ctx->encode_order = pic->encode_order + 1;
> > +} else
> > +#endif
> > +{
> > +pic = NULL;
> > +err = vaapi_encode_pick_next(avctx, );
> > +if (err < 0)
> > +return err;
> > +av_assert0(pic);
> >
> > -pic->encode_order = ctx->encode_order++;
> > +pic->encode_order = ctx->encode_order++;
> >
> > -err = vaapi_encode_issue(avctx, pic);
> > -if (err < 0) {
> > -av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > -return err;
> > +err = vaapi_encode_issue(avctx, pic);
> > +if (err < 0) {
> > +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > +return err;
> > +}
> >  }
> >
> >  err = vaapi_encode_output(avctx, pic, pkt);
> > @@ -2555,6 +2584,18 @@ av_cold int
> ff_vaapi_encode_init(AVCodecContext *avctx)
> >  }
> >  }
> >
> > +#if VA_CHECK_VERSION(1, 9, 0)
> > +//check vaSyncBuffer function
> > +vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
> > +if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> > +ctx->has_sync_buffer_func = 1;
> > +ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> > +sizeof(VAAPIEncodePicture *));
> > +if (!ctx->encode_fifo)
> > +return AVERROR(ENOMEM);
> > +}
> > +#endif
> > +
> >  return 0;
> >
> >  fail:
> > @@ -2592,6 +2633,7 @@ av_cold int
> ff_vaapi_encode_close(AVCodecContext *avctx)
> >
> >  av_freep(>codec_sequence_params);
> >  av_freep(>codec_picture_params);
> > +av_fifo_freep(>encode_fifo);
> 
> Is it guaranteed that the fifo is empty at this point? I don't think so.

I don't check the fifo size, because in ff_vaapi_encode_close() all pics
are already freed and encode_fifo only buffer pic.
```
for (pic = ctx->pic_start; pic; pic = next) {
next = pic->next;
vaapi_encode_free(avctx, pic);
}
```

> 
> >
> >  av_buffer_unref(>recon_frames_ref);
> >  av_buffer_unref(>input_frames_ref);
> > diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> > index 

Re: [FFmpeg-devel] [PATCH V2 2/3] libavcodec/vaapi_encode: Change the way to call async to increase performance

2022-01-04 Thread Andreas Rheinhardt
Wenbin Chen:
> Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
> called at the same time (vaRenderPicture() always followed by a
> vaSyncBuffer()). When we encode stream with B frames, we need buffer to
> reorder frames, so we can send serveral frames to HW at once to increase
> performance. Now I changed them to be called in a asynchronous way, which
> will make better use of hardware. 1080p transcoding increases about 17%
> fps on my environment.
> 
> This change fits vaSyncBuffer(), so if driver does not support
> vaSyncBuffer, it will keep previous operation.
> 
> Signed-off-by: Wenbin Chen 
> ---
>  libavcodec/vaapi_encode.c | 64 ---
>  libavcodec/vaapi_encode.h |  5 +++
>  2 files changed, 58 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index b87b58a42b..9a3b3ba4ad 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -984,8 +984,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
>  if (!pic && ctx->end_of_stream) {
>  --b_counter;
>  pic = ctx->pic_end;
> -if (pic->encode_issued)
> +if (pic->encode_complete)
>  return AVERROR_EOF;
> +else if (pic->encode_issued)
> +return AVERROR(EAGAIN);
>  }
>  
>  if (!pic) {
> @@ -1210,18 +1212,45 @@ int ff_vaapi_encode_receive_packet(AVCodecContext 
> *avctx, AVPacket *pkt)
>  return AVERROR(EAGAIN);
>  }
>  
> -pic = NULL;
> -err = vaapi_encode_pick_next(avctx, );
> -if (err < 0)
> -return err;
> -av_assert0(pic);
> +#if VA_CHECK_VERSION(1, 9, 0)
> +if (ctx->has_sync_buffer_func) {
> +while (av_fifo_size(ctx->encode_fifo) <=
> +   MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
> +pic = NULL;
> +err = vaapi_encode_pick_next(avctx, );
> +if (err < 0)
> +break;
> +
> +av_assert0(pic);
> +pic->encode_order = ctx->encode_order +
> +(av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture 
> *));
> +err = vaapi_encode_issue(avctx, pic);
> +if (err < 0) {
> +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> +return err;
> +}
> +av_fifo_generic_write(ctx->encode_fifo, , sizeof(pic), NULL);
> +}
> +if (!av_fifo_size(ctx->encode_fifo))
> +return err;
> +av_fifo_generic_read(ctx->encode_fifo, , sizeof(pic), NULL);
> +ctx->encode_order = pic->encode_order + 1;
> +} else
> +#endif
> +{
> +pic = NULL;
> +err = vaapi_encode_pick_next(avctx, );
> +if (err < 0)
> +return err;
> +av_assert0(pic);
>  
> -pic->encode_order = ctx->encode_order++;
> +pic->encode_order = ctx->encode_order++;
>  
> -err = vaapi_encode_issue(avctx, pic);
> -if (err < 0) {
> -av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> -return err;
> +err = vaapi_encode_issue(avctx, pic);
> +if (err < 0) {
> +av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> +return err;
> +}
>  }
>  
>  err = vaapi_encode_output(avctx, pic, pkt);
> @@ -2555,6 +2584,18 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
>  }
>  }
>  
> +#if VA_CHECK_VERSION(1, 9, 0)
> +//check vaSyncBuffer function
> +vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
> +if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> +ctx->has_sync_buffer_func = 1;
> +ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> +sizeof(VAAPIEncodePicture *));
> +if (!ctx->encode_fifo)
> +return AVERROR(ENOMEM);
> +}
> +#endif
> +
>  return 0;
>  
>  fail:
> @@ -2592,6 +2633,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
>  
>  av_freep(>codec_sequence_params);
>  av_freep(>codec_picture_params);
> +av_fifo_freep(>encode_fifo);

Is it guaranteed that the fifo is empty at this point? I don't think so.

>  
>  av_buffer_unref(>recon_frames_ref);
>  av_buffer_unref(>input_frames_ref);
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index b41604a883..560a1c42a9 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -29,6 +29,7 @@
>  
>  #include "libavutil/hwcontext.h"
>  #include "libavutil/hwcontext_vaapi.h"
> +#include "libavutil/fifo.h"
>  
>  #include "avcodec.h"
>  #include "hwconfig.h"
> @@ -345,6 +346,10 @@ typedef struct VAAPIEncodeContext {
>  int roi_warned;
>  
>  AVFrame *frame;
> +//Store buffered pic
> +AVFifoBuffer *encode_fifo;
> +//Whether the driver