Re: [libav-devel] [PATCH] pgssubdec: handle more complex PGS scenarios

John Stebbins Fri, 13 Jun 2014 08:00:04 -0700

On 06/13/2014 07:56 AM, John Stebbins wrote:
> Add ability to handle multiple palettes and objects simultaneously.
> Each simultaneous object is given its own AVSubtitleRect.
> Note that there can be up to 64 currently valid objects, but only
> 2 at any one time can be "presented".
> ---
>  libavcodec/pgssubdec.c | 349 
> +++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 249 insertions(+), 100 deletions(-)
>
> diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
> index 4f175a3..e1df4cc 100644
> --- a/libavcodec/pgssubdec.c
> +++ b/libavcodec/pgssubdec.c
> @@ -33,38 +33,107 @@
>  #include "libavutil/imgutils.h"
>  
>  #define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
> +#define MAX_EPOCH_PALETTES 8   // Max 8 allowed per PGS epoch
> +#define MAX_EPOCH_OBJECTS  64  // Max 64 allowed per PGS epoch
> +#define MAX_OBJECT_REFS    2   // Max objects per display set
>  
>  enum SegmentType {
>      PALETTE_SEGMENT      = 0x14,
> -    PICTURE_SEGMENT      = 0x15,
> +    OBJECT_SEGMENT       = 0x15,
>      PRESENTATION_SEGMENT = 0x16,
>      WINDOW_SEGMENT       = 0x17,
>      DISPLAY_SEGMENT      = 0x80,
>  };
>  
> +typedef struct PGSSubObjectRef {
> +    int     id;
> +    int     window_id;
> +    uint8_t composition_flag;
> +    int     x;
> +    int     y;
> +    int     crop_x;
> +    int     crop_y;
> +    int     crop_w;
> +    int     crop_h;
> +} PGSSubObjectRef;
> +
>  typedef struct PGSSubPresentation {
> -    int x;
> -    int y;
>      int id_number;
> -    int object_number;
> -    uint8_t composition_flag;
> +    int palette_id;
> +    int object_count;
> +    PGSSubObjectRef objects[MAX_OBJECT_REFS];
>      int64_t pts;
>  } PGSSubPresentation;
>  
> -typedef struct PGSSubPicture {
> +typedef struct PGSSubObject {
> +    int          id;
>      int          w;
>      int          h;
>      uint8_t      *rle;
>      unsigned int rle_buffer_size, rle_data_len;
>      unsigned int rle_remaining_len;
> -} PGSSubPicture;
> +} PGSSubObject;
> +
> +typedef struct PGSSubObjects {
> +    int          count;
> +    PGSSubObject object[MAX_EPOCH_OBJECTS];
> +} PGSSubObjects;
> +
> +typedef struct PGSSubPalette {
> +    int         id;
> +    uint32_t    clut[256];
> +} PGSSubPalette;
> +
> +typedef struct PGSSubPalettes {
> +    int           count;
> +    PGSSubPalette palette[MAX_EPOCH_PALETTES];
> +} PGSSubPalettes;
>  
>  typedef struct PGSSubContext {
>      PGSSubPresentation presentation;
> -    uint32_t           clut[256];
> -    PGSSubPicture      picture;
> +    PGSSubPalettes     palettes;
> +    PGSSubObjects      objects;
>  } PGSSubContext;
>  
> +static void close_objects(PGSSubObjects *objects)
> +{
> +    int i;
> +
> +    for (i = 0; i < objects->count; i++) {
> +        av_freep(&objects->object[i].rle);
> +        objects->object[i].rle_buffer_size  = 0;
> +        objects->object[i].rle_remaining_len  = 0;
> +    }
> +    objects->count = 0;
> +}
> +
> +static void close_palettes(PGSSubPalettes *palettes)
> +{
> +    palettes->count = 0;
> +}
> +
> +static PGSSubObject * find_object(int id, PGSSubObjects *objects)
> +{
> +    int i;
> +
> +    for (i = 0; i < objects->count; i++) {
> +        if (objects->object[i].id == id)
> +            return &objects->object[i];
> +    }
> +    return NULL;
> +}
> +
> +static PGSSubPalette * find_palette(int id, PGSSubPalettes *palettes)
> +{
> +    int i;
> +
> +    for (i = 0; i < palettes->count; i++) {
> +        if (palettes->palette[i].id == id)
> +            return &palettes->palette[i];
> +    }
> +    return NULL;
> +}
> +
>  static av_cold int init_decoder(AVCodecContext *avctx)
>  {
>      avctx->pix_fmt = AV_PIX_FMT_PAL8;
> @@ -76,8 +145,8 @@ static av_cold int close_decoder(AVCodecContext *avctx)
>  {
>      PGSSubContext *ctx = avctx->priv_data;
>  
> -    av_freep(&ctx->picture.rle);
> -    ctx->picture.rle_buffer_size  = 0;
> +    close_objects(&ctx->objects);
> +    close_palettes(&ctx->palettes);
>  
>      return 0;
>  }
> @@ -92,7 +161,7 @@ static av_cold int close_decoder(AVCodecContext *avctx)
>   * @param buf pointer to the RLE data to process
>   * @param buf_size size of the RLE data to process
>   */
> -static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
> +static int decode_rle(AVCodecContext *avctx, AVSubtitleRect *rect,
>                        const uint8_t *buf, unsigned int buf_size)
>  {
>      const uint8_t *rle_bitmap_end;
> @@ -100,15 +169,15 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle 
> *sub,
>  
>      rle_bitmap_end = buf + buf_size;
>  
> -    sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w * 
> sub->rects[0]->h);
> +    rect->pict.data[0] = av_malloc(rect->w * rect->h);
>  
> -    if (!sub->rects[0]->pict.data[0])
> +    if (!rect->pict.data[0])
>          return -1;
>  
>      pixel_count = 0;
>      line_count  = 0;
>  
> -    while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) {
> +    while (buf < rle_bitmap_end && line_count < rect->h) {
>          uint8_t flags, color;
>          int run;
>  
> @@ -123,27 +192,27 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle 
> *sub,
>              color = flags & 0x80 ? bytestream_get_byte(&buf) : 0;
>          }
>  
> -        if (run > 0 && pixel_count + run <= sub->rects[0]->w * 
> sub->rects[0]->h) {
> -            memset(sub->rects[0]->pict.data[0] + pixel_count, color, run);
> +        if (run > 0 && pixel_count + run <= rect->w * rect->h) {
> +            memset(rect->pict.data[0] + pixel_count, color, run);
>              pixel_count += run;
>          } else if (!run) {
>              /*
>               * New Line. Check if correct pixels decoded, if not display 
> warning
>               * and adjust bitmap pointer to correct new line position.
>               */
> -            if (pixel_count % sub->rects[0]->w > 0)
> +            if (pixel_count % rect->w > 0)
>                  av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line 
> should be %d pixels\n",
> -                       pixel_count % sub->rects[0]->w, sub->rects[0]->w);
> +                       pixel_count % rect->w, rect->w);
>              line_count++;
>          }
>      }
>  
> -    if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) {
> +    if (pixel_count < rect->w * rect->h) {
>          av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for subtitle\n");
>          return -1;
>      }
>  
> -    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, 
> sub->rects[0]->w * sub->rects[0]->h);
> +    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, rect->w * 
> rect->h);
>  
>      return 0;
>  }
> @@ -159,32 +228,45 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle 
> *sub,
>   * @param buf_size size of packet to process
>   * @todo TODO: Enable support for RLE data over multiple packets
>   */
> -static int parse_picture_segment(AVCodecContext *avctx,
> +static int parse_object_segment(AVCodecContext *avctx,
>                                    const uint8_t *buf, int buf_size)
>  {
>      PGSSubContext *ctx = avctx->priv_data;
> +    PGSSubObject *object;
>  
>      uint8_t sequence_desc;
>      unsigned int rle_bitmap_len, width, height;
> +    int id;
>  
>      if (buf_size <= 4)
>          return -1;
>      buf_size -= 4;
>  
> -    /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */
> -    buf += 3;
> +    id = bytestream_get_be16(&buf);
> +    object = find_object(id, &ctx->objects);
> +    if (!object) {
> +        if (ctx->objects.count >= MAX_EPOCH_OBJECTS) {
> +            av_log(avctx, AV_LOG_ERROR, "Too many objects in epoch\n");
> +            return -1;
> +        }
> +        object = &ctx->objects.object[ctx->objects.count++];
> +        object->id = id;
> +    }
> +
> +    /* skip object version number */
> +    buf += 1;
>  
>      /* Read the Sequence Description to determine if start of RLE data or 
> appended to previous RLE */
>      sequence_desc = bytestream_get_byte(&buf);
>  
>      if (!(sequence_desc & 0x80)) {
>          /* Additional RLE data */
> -        if (buf_size > ctx->picture.rle_remaining_len)
> +        if (buf_size > object->rle_remaining_len)
>              return -1;
>  
> -        memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf, buf_size);
> -        ctx->picture.rle_data_len += buf_size;
> -        ctx->picture.rle_remaining_len -= buf_size;
> +        memcpy(object->rle + object->rle_data_len, buf, buf_size);
> +        object->rle_data_len += buf_size;
> +        object->rle_remaining_len -= buf_size;
>  
>          return 0;
>      }
> @@ -206,17 +288,17 @@ static int parse_picture_segment(AVCodecContext *avctx,
>          return -1;
>      }
>  
> -    ctx->picture.w = width;
> -    ctx->picture.h = height;
> +    object->w = width;
> +    object->h = height;
>  
> -    av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size, 
> rle_bitmap_len);
> +    av_fast_malloc(&object->rle, &object->rle_buffer_size, rle_bitmap_len);
>  
> -    if (!ctx->picture.rle)
> +    if (!object->rle)
>          return -1;
>  
> -    memcpy(ctx->picture.rle, buf, buf_size);
> -    ctx->picture.rle_data_len = buf_size;
> -    ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size;
> +    memcpy(object->rle, buf, buf_size);
> +    object->rle_data_len = buf_size;
> +    object->rle_remaining_len = rle_bitmap_len - buf_size;
>  
>      return 0;
>  }
> @@ -231,19 +313,32 @@ static int parse_picture_segment(AVCodecContext *avctx,
>   * @param buf pointer to the packet to process
>   * @param buf_size size of packet to process
>   */
> -static void parse_palette_segment(AVCodecContext *avctx,
> +static int parse_palette_segment(AVCodecContext *avctx,
>                                    const uint8_t *buf, int buf_size)
>  {
>      PGSSubContext *ctx = avctx->priv_data;
> +    PGSSubPalette *palette;
>  
>      const uint8_t *buf_end = buf + buf_size;
>      const uint8_t *cm      = ff_crop_tab + MAX_NEG_CROP;
>      int color_id;
>      int y, cb, cr, alpha;
>      int r, g, b, r_add, g_add, b_add;
> +    int id;
>  
> -    /* Skip two null bytes */
> -    buf += 2;
> +    id  = bytestream_get_byte(&buf);
> +    palette = find_palette(id, &ctx->palettes);
> +    if (!palette) {
> +        if (ctx->palettes.count >= MAX_EPOCH_PALETTES) {
> +            av_log(avctx, AV_LOG_ERROR, "Too many palettes in epoch\n");
> +            return -1;
> +        }
> +        palette = &ctx->palettes.palette[ctx->palettes.count++];
> +        palette->id  = id;
> +    }
> +
> +    /* Skip palette version */
> +    buf += 1;
>  
>      while (buf < buf_end) {
>          color_id  = bytestream_get_byte(&buf);
> @@ -258,8 +353,9 @@ static void parse_palette_segment(AVCodecContext *avctx,
>          av_dlog(avctx, "Color %d := (%d,%d,%d,%d)\n", color_id, r, g, b, 
> alpha);
>  
>          /* Store color in palette */
> -        ctx->clut[color_id] = RGBA(r,g,b,alpha);
> +        palette->clut[color_id] = RGBA(r,g,b,alpha);
>      }
> +    return 0;
>  }
>  
>  /**
> @@ -280,8 +376,9 @@ static int parse_presentation_segment(AVCodecContext 
> *avctx,
>  {
>      PGSSubContext *ctx = avctx->priv_data;
>  
> -    int x, y, ret;
> +    int i, state, ret;
>  
> +    // Video descriptor
>      int w = bytestream_get_be16(&buf);
>      int h = bytestream_get_be16(&buf);
>  
> @@ -293,49 +390,70 @@ static int parse_presentation_segment(AVCodecContext 
> *avctx,
>      if (ret < 0)
>          return ret;
>  
> -    /* Skip 1 bytes of unknown, frame rate? */
> +    /* Skip 1 bytes of unknown, frame rate */
>      buf++;
>  
> +    // Composition descriptor
>      ctx->presentation.id_number = bytestream_get_be16(&buf);
> -
>      /*
> -     * Skip 3 bytes of unknown:
> -     *     state
> -     *     palette_update_flag (0x80),
> -     *     palette_id_to_use,
> +     * state is a 2 bit field that defines pgs epoch boundaries
> +     * 00 - Normal, previously defined objects and palettes are still valid
> +     * 01 - Acquisition point, previous objects and palettes can be released
> +     * 10 - Epoch start, previous objects and palettes can be released
> +     * 11 - Epoch continue, previous objects and palettes can be released
> +     *
> +     * reserved 6 bits discarded
>       */
> -    buf += 3;
> -
> -    ctx->presentation.object_number = bytestream_get_byte(&buf);
> -    ctx->presentation.composition_flag = 0;
> -    if (!ctx->presentation.object_number)
> -        return 0;
> +    state = bytestream_get_byte(&buf) >> 6;
> +    if (state != 0) {
> +        close_objects(&ctx->objects);
> +        close_palettes(&ctx->palettes);
> +    }
>  
>      /*
> -     * Skip 3 bytes of unknown:
> -     *     object_id_ref (2 bytes),
> -     *     window_id_ref,
> +     * skip palette_update_flag (0x80),
>       */
> -    buf += 3;
> -    ctx->presentation.composition_flag = bytestream_get_byte(&buf);
> -
> -    x = bytestream_get_be16(&buf);
> -    y = bytestream_get_be16(&buf);
> -
> -    /* TODO If cropping, cropping_x, cropping_y, cropping_width, 
> cropping_height (all 2 bytes).*/
> +    buf += 1;
> +    ctx->presentation.palette_id = bytestream_get_byte(&buf);
> +    ctx->presentation.object_count = bytestream_get_byte(&buf);
> +    if (ctx->presentation.object_count > MAX_OBJECT_REFS) {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "Invalid number of presentation objects %d\n",
> +               ctx->presentation.object_count);
> +        ctx->presentation.object_count = 2;
> +    }
>  
> -    av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y);
> +    for (i = 0; i < ctx->presentation.object_count; i++)
> +    {
> +        ctx->presentation.objects[i].id = bytestream_get_be16(&buf);
> +        ctx->presentation.objects[i].window_id = bytestream_get_byte(&buf);
> +        ctx->presentation.objects[i].composition_flag = 
> bytestream_get_byte(&buf);
> +
> +        ctx->presentation.objects[i].x = bytestream_get_be16(&buf);
> +        ctx->presentation.objects[i].y = bytestream_get_be16(&buf);
> +
> +        // If cropping
> +        if (ctx->presentation.objects[i].composition_flag & 0x80) {
> +            ctx->presentation.objects[i].crop_x = bytestream_get_be16(&buf);
> +            ctx->presentation.objects[i].crop_y = bytestream_get_be16(&buf);
> +            ctx->presentation.objects[i].crop_w = bytestream_get_be16(&buf);
> +            ctx->presentation.objects[i].crop_h = bytestream_get_be16(&buf);
> +        }
>  
> -    if (x > avctx->width || y > avctx->height) {
> -        av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y 
> = %d, video width = %d, video height = %d.\n",
> -               x, y, avctx->width, avctx->height);
> -        x = 0; y = 0;
> +        av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n",
> +                ctx->presentation.objects[i].x, 
> ctx->presentation.objects[i].y);
> +
> +        if (ctx->presentation.objects[i].x > avctx->width ||
> +            ctx->presentation.objects[i].y > avctx->height) {
> +            av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = 
> %d, y = %d, video width = %d, video height = %d.\n",
> +                   ctx->presentation.objects[i].x,
> +                   ctx->presentation.objects[i].y,
> +                    avctx->width, avctx->height);
> +            ctx->presentation.objects[i].x = 0;
> +            ctx->presentation.objects[i].y = 0;
> +        }
>      }
>  
> -    /* Fill in dimensions */
> -    ctx->presentation.x = x;
> -    ctx->presentation.y = y;
> -
>      return 0;
>  }
>  
> @@ -359,6 +477,8 @@ static int display_end_segment(AVCodecContext *avctx, 
> void *data,
>  {
>      AVSubtitle    *sub = data;
>      PGSSubContext *ctx = avctx->priv_data;
> +    PGSSubPalette *palette;
> +    int i;
>  
>      /*
>       *      The end display time is a timeout value and is only reached
> @@ -369,43 +489,72 @@ static int display_end_segment(AVCodecContext *avctx, 
> void *data,
>      memset(sub, 0, sizeof(*sub));
>      sub->pts = ctx->presentation.pts;
>  
> -    // Blank if last object_number was 0.
> +    // Blank if last object_count was 0.
>      // Note that this may be wrong for more complex subtitles.
> -    if (!ctx->presentation.object_number)
> +    if (!ctx->presentation.object_count)
>          return 1;
>      sub->start_display_time = 0;
>      sub->end_display_time   = 20000;
>      sub->format             = 0;
>  
> -    sub->rects     = av_mallocz(sizeof(*sub->rects));
> -    sub->rects[0]  = av_mallocz(sizeof(*sub->rects[0]));
> -    sub->num_rects = 1;
> -
> -    if (ctx->presentation.composition_flag & 0x40)
> -        sub->rects[0]->flags |= AV_SUBTITLE_FLAG_FORCED;
> +    sub->num_rects = ctx->presentation.object_count;
> +    sub->rects     = av_mallocz(sizeof(*sub->rects) * sub->num_rects);
> +    if (!sub->rects) {
> +        return -1;
> +    }
> +    palette = find_palette(ctx->presentation.palette_id, &ctx->palettes);
> +    if (!palette) {
> +        // Missing palette.  Should only happen with damaged streams.
> +        // TODO: use a default palette?
> +        av_log(avctx, AV_LOG_ERROR, "Invalid palette id %d\n",
> +               ctx->presentation.palette_id);
> +        sub->num_rects = 0;
> +        av_freep(&sub->rects);
> +        return -1;
> +    }


I changed the code above from the previous iteration of this patch because it 
left the AVSubtitle in a bad state which
could lead to a crash.

> +    for (i = 0; i < ctx->presentation.object_count; i++) {
> +        PGSSubObject *object;
>  
> -    sub->rects[0]->x    = ctx->presentation.x;
> -    sub->rects[0]->y    = ctx->presentation.y;
> -    sub->rects[0]->w    = ctx->picture.w;
> -    sub->rects[0]->h    = ctx->picture.h;
> -    sub->rects[0]->type = SUBTITLE_BITMAP;
> +        sub->rects[i]  = av_mallocz(sizeof(*sub->rects[0]));
> +        if (!sub->rects[i]) {
> +            sub->num_rects = i;
> +            return -1;
> +        }
> +        sub->rects[i]->type = SUBTITLE_BITMAP;
> +
> +        /* Process bitmap */
> +        object = find_object(ctx->presentation.objects[i].id, &ctx->objects);
> +        if (!object) {
> +            // Missing object.  Should only happen with damaged streams.
> +            av_log(avctx, AV_LOG_ERROR, "Invalid object id %d\n",
> +                   ctx->presentation.objects[i].id);
> +            // Leaves rect empty with 0 width and height.
> +            continue;
> +        }

ditto

> +        if (ctx->presentation.objects[i].composition_flag & 0x40)
> +            sub->rects[i]->flags |= AV_SUBTITLE_FLAG_FORCED;
> +
> +        sub->rects[i]->x    = ctx->presentation.objects[i].x;
> +        sub->rects[i]->y    = ctx->presentation.objects[i].y;
> +        sub->rects[i]->w    = object->w;
> +        sub->rects[i]->h    = object->h;
> +
> +        sub->rects[i]->pict.linesize[0] = object->w;
> +
> +        if (object->rle) {
> +            if (object->rle_remaining_len)
> +                av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes 
> shorter than expected\n",
> +                       object->rle_data_len, object->rle_remaining_len);
> +            if(decode_rle(avctx, sub->rects[i], object->rle, 
> object->rle_data_len) < 0)
> +                return 0;
> +        }
> +        /* Allocate memory for colors */
> +        sub->rects[i]->nb_colors    = 256;
> +        sub->rects[i]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
>  
> -    /* Process bitmap */
> -    sub->rects[0]->pict.linesize[0] = ctx->picture.w;
> +        memcpy(sub->rects[i]->pict.data[1], palette->clut, 
> sub->rects[i]->nb_colors * sizeof(uint32_t));
>  
> -    if (ctx->picture.rle) {
> -        if (ctx->picture.rle_remaining_len)
> -            av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes 
> shorter than expected\n",
> -                   ctx->picture.rle_data_len, 
> ctx->picture.rle_remaining_len);
> -        if(decode_rle(avctx, sub, ctx->picture.rle, 
> ctx->picture.rle_data_len) < 0)
> -            return 0;
>      }
> -    /* Allocate memory for colors */
> -    sub->rects[0]->nb_colors    = 256;
> -    sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
> -
> -    memcpy(sub->rects[0]->pict.data[1], ctx->clut, sub->rects[0]->nb_colors 
> * sizeof(uint32_t));
> -
>      return 1;
>  }
>  
> @@ -453,8 +602,8 @@ static int decode(AVCodecContext *avctx, void *data, int 
> *data_size,
>          case PALETTE_SEGMENT:
>              parse_palette_segment(avctx, buf, segment_length);
>              break;
> -        case PICTURE_SEGMENT:
> -            parse_picture_segment(avctx, buf, segment_length);
> +        case OBJECT_SEGMENT:
> +            parse_object_segment(avctx, buf, segment_length);
>              break;
>          case PRESENTATION_SEGMENT:
>              ret = parse_presentation_segment(avctx, buf, segment_length, 
> avpkt->pts);

-- 
John      GnuPG fingerprint: D0EC B3DB C372 D1F1 0B01  83F0 49F1 D7B2 60D4 D0F7

signature.asc
Description: OpenPGP digital signature

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] pgssubdec: handle more complex PGS scenarios

Reply via email to