On Fri, Dec 09, 2011 at 02:46:19PM +0100, Janne Grunau wrote:
> Hi,
> 
> it has still at least one hard to reproduce timing bug. I can reproduce
> it only with framecrc and 4 threads. I can't reproduce it when doing
> anything more time consuming than crc computation. Frame data and even
> framemd5 are identical.
> 
> Janne
> ---8<--
> Statistics for bourne.rmvb -an -f null
> 
> 1 thread:  37.12s user 0.03s system  99% cpu 37.174 total
> 2 threads: 47.63s user 0.24s system 185% cpu 25.807 total
> 4 threads: 41.21s user 0.30s system 327% cpu 12.674 total
> ---
>  libavcodec/rv30.c |    4 ++-
>  libavcodec/rv34.c |   91 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/rv34.h |    2 +
>  libavcodec/rv40.c |    4 ++-
>  4 files changed, 99 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
> index 26708db..4828e98 100644
> --- a/libavcodec/rv30.c
> +++ b/libavcodec/rv30.c
> @@ -275,8 +275,10 @@ AVCodec ff_rv30_decoder = {
>      .init           = rv30_decode_init,
>      .close          = ff_rv34_decode_end,
>      .decode         = ff_rv34_decode_frame,
> -    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
> +    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | 
> CODEC_CAP_FRAME_THREADS,
>      .flush          = ff_mpeg_flush,
>      .long_name      = NULL_IF_CONFIG_SMALL("RealVideo 3.0"),
>      .pix_fmts       = ff_pixfmt_list_420,
> +    .init_thread_copy      = 
> ONLY_IF_THREADS_ENABLED(ff_rv34_decode_init_thread_copy),
> +    .update_thread_context = 
> ONLY_IF_THREADS_ENABLED(ff_rv34_decode_update_thread_context),
>  };
> diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
> index 295a633..ea558e1 100644
> --- a/libavcodec/rv34.c
> +++ b/libavcodec/rv34.c
> @@ -24,12 +24,16 @@
>   * RV30/40 decoder common data
>   */
>  
> +#include "libavutil/internal.h"
> +
>  #include "avcodec.h"
>  #include "dsputil.h"
>  #include "mpegvideo.h"
>  #include "golomb.h"
> +#include "internal.h"
>  #include "mathops.h"
>  #include "rectangle.h"
> +#include "thread.h"
>  
>  #include "rv34vlc.h"
>  #include "rv34data.h"
> @@ -679,6 +683,14 @@ static inline void rv34_mc(RV34DecContext *r, const int 
> block_type,
>          if(uvmx == 6 && uvmy == 6)
>              uvmx = uvmy = 4;
>      }
> +
> +    if (HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) {
> +        int offset = (yoff + my + 15)>>4;

give shift some space to breathe

> +        AVFrame *f = dir ? &s->next_picture_ptr->f : &s->last_picture_ptr->f;
> +        /* wait for the current mb row to be finished */
> +        ff_thread_await_progress(f, s->mb_y + offset + 1, 0);
> +    }
> +
>      dxy = ly*4 + lx;
>      srcY = dir ? s->next_picture_ptr->f.data[0] : 
> s->last_picture_ptr->f.data[0];
>      srcU = dir ? s->next_picture_ptr->f.data[1] : 
> s->last_picture_ptr->f.data[1];
> @@ -833,6 +845,10 @@ static int rv34_decode_mv(RV34DecContext *r, int 
> block_type)
>          }
>      case RV34_MB_B_DIRECT:
>          //surprisingly, it uses motion scheme from next reference frame
> +        /* wait for the current mb row to be finished */
> +        if (HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
> +         ff_thread_await_progress(&s->next_picture_ptr->f, s->mb_y + 1, 0);
> +

weird indentation

>          next_bt = s->next_picture_ptr->f.mb_type[s->mb_x + s->mb_y * 
> s->mb_stride];
>          if(IS_INTRA(next_bt) || IS_SKIP(next_bt)){
>              ZERO8x2(s->current_picture_ptr->f.motion_val[0][s->mb_x * 2 + 
> s->mb_y * 2 * s->b8_stride], s->b8_stride);
> @@ -1260,6 +1276,7 @@ static int rv34_decode_slice(RV34DecContext *r, int 
> end, const uint8_t* buf, int
>              }
>          }
>          s->mb_x = s->mb_y = 0;
> +        ff_thread_finish_setup(s->avctx);
>      } else {
>          int slice_type = r->si.type ? r->si.type : AV_PICTURE_TYPE_I;
>  
> @@ -1305,6 +1322,11 @@ static int rv34_decode_slice(RV34DecContext *r, int 
> end, const uint8_t* buf, int
>  
>              if(r->loop_filter && s->mb_y >= 2)
>                  r->loop_filter(r, s->mb_y - 2);
> +
> +            if (HAVE_THREADS && (s->avctx->active_thread_type & 
> FF_THREAD_FRAME))
> +                ff_thread_report_progress(&s->current_picture_ptr->f,
> +                                          s->mb_y-2, 0);

give subtraction some space to breathe

> +
>          }
>          if(s->mb_x == s->resync_mb_x)
>              s->first_slice_line=0;
> @@ -1370,6 +1392,71 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
>      return 0;
>  }
>  
> +int ff_rv34_decode_init_thread_copy(AVCodecContext *avctx)
> +{
> +    RV34DecContext *r = avctx->priv_data;
> +
> +    r->s.avctx = avctx;
> +
> +    if (avctx->internal->is_copy) {
> +        FF_ALLOC_OR_GOTO(avctx, r->intra_types_hist, r->intra_types_stride *
> +                         4 * 2 * sizeof(*r->intra_types_hist),       
> err_mem1);
> +        FF_ALLOC_OR_GOTO(avctx, r->mb_type,          r->s.mb_stride *
> +                         r->s.mb_height * sizeof(*r->mb_type),       
> err_mem2);
> +        FF_ALLOC_OR_GOTO(avctx, r->cbp_luma,         r->s.mb_stride *
> +                         r->s.mb_height * sizeof(*r->cbp_luma),      
> err_mem3);
> +        FF_ALLOC_OR_GOTO(avctx, r->cbp_chroma,       r->s.mb_stride *
> +                         r->s.mb_height * sizeof(*r->cbp_chroma),    
> err_mem4);
> +        FF_ALLOC_OR_GOTO(avctx, r->deblock_coefs,    r->s.mb_stride *
> +                         r->s.mb_height * sizeof(*r->deblock_coefs), 
> err_mem5);
> +
> +        r->intra_types      = r->intra_types_hist + r->intra_types_stride * 
> 4;
> +        r->tmp_b_block_base = NULL;
> +
> +        memset(r->mb_type, 0,  r->s.mb_stride * r->s.mb_height *
> +               sizeof(*r->mb_type));
> +
> +        MPV_common_init(&r->s);
> +    }
> +    return 0;
> +err_mem5:
> +    av_freep(&r->cbp_chroma);
> +err_mem4:
> +    av_freep(&r->cbp_luma);
> +err_mem3:
> +    av_freep(&r->mb_type);
> +err_mem2:
> +    av_freep(&r->intra_types_hist);
> +    r->intra_types = NULL;
> +err_mem1:
> +    return AVERROR(ENOMEM);
> +}
> +
> +int ff_rv34_decode_update_thread_context(AVCodecContext *dst, const 
> AVCodecContext *src)
> +{
> +    RV34DecContext *r= dst->priv_data, *r1= src->priv_data;

this formatting is not for you, s/=/ =/g

> +    MpegEncContext * const s = &r->s, * const s1 = &r1->s;
> +    int err;
> +
> +    if (dst == src || !s1->context_initialized)
> +        return 0;
> +
> +    if ((err = ff_mpeg_update_thread_context(dst, src)))
> +        return err;
> +
> +    r->cur_pts  = r1->cur_pts;
> +    r->last_pts = r1->last_pts;
> +    r->next_pts = r1->next_pts;
> +
> +    memset(&r->si, 0, sizeof(r->si));
> +
> +    /* necessary since it is it the condition checked for in decode_slice
> +     * to call MPV_frame_start. cmp. comment at the end of decode_frame */
> +    r->s.current_picture_ptr = NULL;
> +
> +    return 0;
> +}
> +
>  static int get_slice_offset(AVCodecContext *avctx, const uint8_t *buf, int n)
>  {
>      if(avctx->slice_count) return avctx->slice_offset[n];
> @@ -1483,6 +1570,10 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
>              *data_size = sizeof(AVFrame);
>              ff_print_debug_info(s, pict);
>          }
> +
> +        if (HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
> +            ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 
> 0);
> +
>          s->current_picture_ptr = NULL; //so we can detect if frame_end wasnt 
> called (find some nicer solution...)
>      }
>      return avpkt->size;
> diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h
> index 12607fb..ac80814 100644
> --- a/libavcodec/rv34.h
> +++ b/libavcodec/rv34.h
> @@ -134,5 +134,7 @@ int ff_rv34_get_start_offset(GetBitContext *gb, int 
> blocks);
>  int ff_rv34_decode_init(AVCodecContext *avctx);
>  int ff_rv34_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 
> AVPacket *avpkt);
>  int ff_rv34_decode_end(AVCodecContext *avctx);
> +int ff_rv34_decode_init_thread_copy(AVCodecContext *avctx);
> +int ff_rv34_decode_update_thread_context(AVCodecContext *dst, const 
> AVCodecContext *src);
>  
>  #endif /* AVCODEC_RV34_H */
> diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
> index 3216e83..9a00eba 100644
> --- a/libavcodec/rv40.c
> +++ b/libavcodec/rv40.c
> @@ -534,8 +534,10 @@ AVCodec ff_rv40_decoder = {
>      .init           = rv40_decode_init,
>      .close          = ff_rv34_decode_end,
>      .decode         = ff_rv34_decode_frame,
> -    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
> +    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | 
> CODEC_CAP_FRAME_THREADS,
>      .flush          = ff_mpeg_flush,
>      .long_name      = NULL_IF_CONFIG_SMALL("RealVideo 4.0"),
>      .pix_fmts       = ff_pixfmt_list_420,
> +    .init_thread_copy      = 
> ONLY_IF_THREADS_ENABLED(ff_rv34_decode_init_thread_copy),
> +    .update_thread_context = 
> ONLY_IF_THREADS_ENABLED(ff_rv34_decode_update_thread_context),
>  };
> -- 

not that I know much about multithreaded decoding but the patch looks sane to
me
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to