Re: [libav-devel] [PATCH 14/14] avcodec/cbs_h2645: use AVBufferRef to store list of active parameter sets

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:16 PM Luca Barbato  wrote:
>
> From: James Almer 
>
> Removes unnecessary data copies, and partially fixes potential issues
> with dangling references held in said lists.
>
> Reviewed-by: Mark Thompson 
> Signed-off-by: James Almer 
> ---
>  libavcodec/cbs_h264.h  |  2 ++
>  libavcodec/cbs_h2645.c | 46 ++
>  libavcodec/cbs_h265.h  |  3 +++
>  3 files changed, 31 insertions(+), 20 deletions(-)
>
> diff --git a/libavcodec/cbs_h264.h b/libavcodec/cbs_h264.h
> index 5a7dc27698..8e68595614 100644
> --- a/libavcodec/cbs_h264.h
> +++ b/libavcodec/cbs_h264.h
> @@ -421,6 +421,8 @@ typedef struct CodedBitstreamH264Context {
>
>  // All currently available parameter sets.  These are updated when
>  // any parameter set NAL unit is read/written with this context.
> +AVBufferRef *sps_ref[H264_MAX_SPS_COUNT];
> +AVBufferRef *pps_ref[H264_MAX_PPS_COUNT];
>  H264RawSPS *sps[H264_MAX_SPS_COUNT];
>  H264RawPPS *pps[H264_MAX_PPS_COUNT];
>
> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
> index fab8bb7749..c05b347b1c 100644
> --- a/libavcodec/cbs_h2645.c
> +++ b/libavcodec/cbs_h2645.c
> @@ -677,9 +677,10 @@ static int 
> cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
>
>  #define cbs_h2645_replace_ps(h26n, ps_name, ps_var, id_element) \
>  static int cbs_h26 ## h26n ## _replace_ ## ps_var(CodedBitstreamContext 
> *ctx, \
> -  const H26 ## h26n ## Raw 
> ## ps_name *ps_var)  \
> +  CodedBitstreamUnit *unit)  
> \
>  { \
>  CodedBitstreamH26 ## h26n ## Context *priv = ctx->priv_data; \
> +H26 ## h26n ## Raw ## ps_name *ps_var = unit->content; \
>  unsigned int id = ps_var->id_element; \
>  if (id > FF_ARRAY_ELEMS(priv->ps_var)) { \
>  av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid " #ps_name \
> @@ -688,11 +689,16 @@ static int cbs_h26 ## h26n ## _replace_ ## 
> ps_var(CodedBitstreamContext *ctx, \
>  } \
>  if (priv->ps_var[id] == priv->active_ ## ps_var) \
>  priv->active_ ## ps_var = NULL ; \
> -av_freep(>ps_var[id]); \
> -priv->ps_var[id] = av_malloc(sizeof(*ps_var)); \
> -if (!priv->ps_var[id]) \
> +av_buffer_unref(>ps_var ## _ref[id]); \
> +if (unit->content_ref) \
> +priv->ps_var ## _ref[id] = av_buffer_ref(unit->content_ref); \
> +else \
> +priv->ps_var ## _ref[id] = av_buffer_alloc(sizeof(*ps_var)); \
> +if (!priv->ps_var ## _ref[id]) \
>  return AVERROR(ENOMEM); \
> -memcpy(priv->ps_var[id], ps_var, sizeof(*ps_var)); \
> +priv->ps_var[id] = (H26 ## h26n ## Raw ## ps_name *)priv->ps_var ## 
> _ref[id]->data; \
> +if (!unit->content_ref) \
> +memcpy(priv->ps_var[id], ps_var, sizeof(*ps_var)); \
>  return 0; \
>  }
>
> @@ -726,7 +732,7 @@ static int cbs_h264_read_nal_unit(CodedBitstreamContext 
> *ctx,
>  if (err < 0)
>  return err;
>
> -err = cbs_h264_replace_sps(ctx, sps);
> +err = cbs_h264_replace_sps(ctx, unit);
>  if (err < 0)
>  return err;
>  }
> @@ -760,7 +766,7 @@ static int cbs_h264_read_nal_unit(CodedBitstreamContext 
> *ctx,
>  if (err < 0)
>  return err;
>
> -err = cbs_h264_replace_pps(ctx, pps);
> +err = cbs_h264_replace_pps(ctx, unit);
>  if (err < 0)
>  return err;
>  }
> @@ -873,7 +879,7 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext 
> *ctx,
>  if (err < 0)
>  return err;
>
> -err = cbs_h265_replace_vps(ctx, vps);
> +err = cbs_h265_replace_vps(ctx, unit);
>  if (err < 0)
>  return err;
>  }
> @@ -892,7 +898,7 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext 
> *ctx,
>  if (err < 0)
>  return err;
>
> -err = cbs_h265_replace_sps(ctx, sps);
> +err = cbs_h265_replace_sps(ctx, unit);
>  if (err < 0)
>  return err;
>  }
> @@ -912,7 +918,7 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext 
> *ctx,
>  if (err < 0)
>  return err;
>
> -err = cbs_h265_replace_pps(ctx, pps);
> +err = cbs_h265_replace_pps(ctx, unit);
>  if (err < 0)
>  return err;
>  }
> @@ -1002,7 +1008,7 @@ static int 
> cbs_h264_write_nal_unit(CodedBitstreamContext *ctx,
>  if (err < 0)
>  return err;
>
> -err = cbs_h264_replace_sps(ctx, sps);
> +err = cbs_h264_replace_sps(ctx, unit);
>  if (err < 0)
>  return err;
>  }
> @@ -1026,7 +1032,7 @@ static int 
> cbs_h264_write_nal_unit(CodedBitstreamContext *ctx,
>  if (err < 0)
>  return err;
>
> - 

Re: [libav-devel] [PATCH 13/14] cbs_h264: Need [] in the name when subscript is required

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: Haihao Xiang 
>
> Otherwise it will hit an assert in the function
> ff_cbs_trace_syntax_element() in cbs.c, line 400.
>
> Signed-off-by: Haihao Xiang 
> ---
>  libavcodec/cbs_h264_syntax_template.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/libavcodec/cbs_h264_syntax_template.c 
> b/libavcodec/cbs_h264_syntax_template.c
> index 35ff85f653..28840f0bfe 100644
> --- a/libavcodec/cbs_h264_syntax_template.c
> +++ b/libavcodec/cbs_h264_syntax_template.c
> @@ -763,7 +763,7 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>  {
>  allocate(current->payload.other.data, current->payload_size);
>  for (i = 0; i < current->payload_size; i++)
> -xu(8, payload_byte, current->payload.other.data[i], 0, 255, 
> 1, i);
> +xu(8, payload_byte[i], current->payload.other.data[i], 0, 
> 255, 1, i);
>  }
>  }
>
> --
> 2.12.2
>
> ___
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 12/14] cbs: Add support for array subscripts in trace output

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: Mark Thompson 
>
> This makes the trace output for arrays significantly nicer.
> ---
>  libavcodec/cbs.c   |  44 ++--
>  libavcodec/cbs_h2645.c |  83 ---
>  libavcodec/cbs_h264_syntax_template.c  | 101 +--
>  libavcodec/cbs_h265_syntax_template.c  | 179 
> +
>  libavcodec/cbs_internal.h  |  10 +-
>  libavcodec/cbs_mpeg2.c |  25 ++---
>  libavcodec/cbs_mpeg2_syntax_template.c |  25 ++---
>  7 files changed, 261 insertions(+), 206 deletions(-)
>
> diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
> index 907471956b..1329caeb78 100644
> --- a/libavcodec/cbs.c
> +++ b/libavcodec/cbs.c
> @@ -357,17 +357,43 @@ void ff_cbs_trace_header(CodedBitstreamContext *ctx,
>  }
>
>  void ff_cbs_trace_syntax_element(CodedBitstreamContext *ctx, int position,
> - const char *name, const char *bits,
> - int64_t value)
> + const char *str, const int *subscripts,
> + const char *bits, int64_t value)
>  {
> +char name[256];
>  size_t name_len, bits_len;
> -int pad;
> +int pad, subs, i, j, k, n;
>
>  if (!ctx->trace_enable)
>  return;
>
>  av_assert0(value >= INT_MIN && value <= UINT32_MAX);
>
> +subs = subscripts ? subscripts[0] : 0;
> +n = 0;
> +for (i = j = 0; str[i];) {
> +if (str[i] == '[') {
> +if (n < subs) {
> +++n;
> +k = snprintf(name + j, sizeof(name) - j, "[%d", 
> subscripts[n]);
> +av_assert0(k > 0 && j + k < sizeof(name));
> +j += k;
> +for (++i; str[i] && str[i] != ']'; i++);
> +av_assert0(str[i] == ']');
> +} else {
> +while (str[i] && str[i] != ']')
> +name[j++] = str[i++];
> +av_assert0(str[i] == ']');
> +}
> +} else {
> +av_assert0(j + 1 < sizeof(name));
> +name[j++] = str[i++];
> +}
> +}
> +av_assert0(j + 1 < sizeof(name));
> +name[j] = 0;
> +av_assert0(n == subs);
> +
>  name_len = strlen(name);
>  bits_len = strlen(bits);
>
> @@ -381,7 +407,8 @@ void ff_cbs_trace_syntax_element(CodedBitstreamContext 
> *ctx, int position,
>  }
>
>  int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, BitstreamContext *bc,
> - int width, const char *name, uint32_t *write_to,
> + int width, const char *name,
> + const int *subscripts, uint32_t *write_to,
>   uint32_t range_min, uint32_t range_max)
>  {
>  uint32_t value;
> @@ -407,7 +434,8 @@ int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, 
> BitstreamContext *bc,
>  bits[i] = value >> (width - i - 1) & 1 ? '1' : '0';
>  bits[i] = 0;
>
> -ff_cbs_trace_syntax_element(ctx, position, name, bits, value);
> +ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
> +bits, value);
>  }
>
>  if (value < range_min || value > range_max) {
> @@ -422,7 +450,8 @@ int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, 
> BitstreamContext *bc,
>  }
>
>  int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, PutBitContext *pbc,
> -  int width, const char *name, uint32_t value,
> +  int width, const char *name,
> +  const int *subscripts, uint32_t value,
>uint32_t range_min, uint32_t range_max)
>  {
>  av_assert0(width <= 32);
> @@ -444,7 +473,8 @@ int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, 
> PutBitContext *pbc,
>  bits[i] = value >> (width - i - 1) & 1 ? '1' : '0';
>  bits[i] = 0;
>
> -ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc), name, bits, 
> value);
> +ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
> +name, subscripts, bits, value);
>  }
>
>  if (width < 32)
> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
> index 0509d6d836..fab8bb7749 100644
> --- a/libavcodec/cbs_h2645.c
> +++ b/libavcodec/cbs_h2645.c
> @@ -32,7 +32,8 @@
>
>
>  static int cbs_read_ue_golomb(CodedBitstreamContext *ctx, BitstreamContext 
> *bc,
> -  const char *name, uint32_t *write_to,
> +  const char *name, const int *subscripts,
> +  uint32_t *write_to,
>uint32_t range_min, uint32_t range_max)
>  {
>  uint32_t value;
> @@ -68,7 +69,8 @@ static int cbs_read_ue_golomb(CodedBitstreamContext *ctx, 
> BitstreamContext *bc,
>  --value;
>
>  if (ctx->trace_enable)
> -

Re: [libav-devel] [PATCH 11/14] cbs_h2645: Simplify representation of fixed values

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: Mark Thompson 
>
> ---
>  libavcodec/cbs_h2645.c|  5 +
>  libavcodec/cbs_h264_syntax_template.c | 30 ---
>  libavcodec/cbs_h265_syntax_template.c | 38 
> +++
>  3 files changed, 34 insertions(+), 39 deletions(-)
>
> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
> index 8cd6db3fb4..0509d6d836 100644
> --- a/libavcodec/cbs_h2645.c
> +++ b/libavcodec/cbs_h2645.c
> @@ -239,6 +239,11 @@ static int cbs_write_se_golomb(CodedBitstreamContext 
> *ctx, PutBitContext *pbc,
>  #define FUNC_H264(rw, name) FUNC_NAME(rw, h264, name)
>  #define FUNC_H265(rw, name) FUNC_NAME(rw, h265, name)
>
> +#define fixed(width, name, value) do { \
> +av_unused uint32_t fixed_value = value; \
> +xu(width, name, fixed_value, value, value); \
> +} while (0)
> +
>
>  #define READ
>  #define READWRITE read
> diff --git a/libavcodec/cbs_h264_syntax_template.c 
> b/libavcodec/cbs_h264_syntax_template.c
> index 92c1b67862..82d9d23200 100644
> --- a/libavcodec/cbs_h264_syntax_template.c
> +++ b/libavcodec/cbs_h264_syntax_template.c
> @@ -19,10 +19,10 @@
>  static int FUNC(rbsp_trailing_bits)(CodedBitstreamContext *ctx, RWContext 
> *rw)
>  {
>  int err;
> -av_unused int one = 1, zero = 0;
> -xu(1, rbsp_stop_one_bit, one, 1, 1);
> +
> +fixed(1, rbsp_stop_one_bit, 1);
>  while (byte_alignment(rw) != 0)
> -xu(1, rbsp_alignment_zero_bit, zero, 0, 0);
> +fixed(1, rbsp_alignment_zero_bit, 0);
>
>  return 0;
>  }
> @@ -740,9 +740,8 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>  break;
>  case H264_SEI_TYPE_FILLER_PAYLOAD:
>  {
> -av_unused int ff_byte = 0xff;
>  for (i = 0; i  < current->payload_size; i++)
> -xu(8, ff_byte, ff_byte, 0xff, 0xff);
> +fixed(8, ff_byte, 0xff);
>  }
>  break;
>  case H264_SEI_TYPE_USER_DATA_REGISTERED:
> @@ -770,10 +769,9 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>  }
>
>  if (byte_alignment(rw)) {
> -av_unused int one = 1, zero = 0;
> -xu(1, bit_equal_to_one, one, 1, 1);
> +fixed(1, bit_equal_to_one, 1);
>  while (byte_alignment(rw))
> -xu(1, bit_equal_to_zero, zero, 0, 0);
> +fixed(1, bit_equal_to_zero, 0);
>  }
>
>  #ifdef READ
> @@ -810,14 +808,14 @@ static int FUNC(sei)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>  uint32_t tmp;
>
>  while (bitstream_peek(rw, 8) == 0xff) {
> -xu(8, ff_byte, tmp, 0xff, 0xff);
> +fixed(8, ff_byte, 0xff);
>  payload_type += 255;
>  }
>  xu(8, last_payload_type_byte, tmp, 0, 254);
>  payload_type += tmp;
>
>  while (bitstream_peek(rw, 8) == 0xff) {
> -xu(8, ff_byte, tmp, 0xff, 0xff);
> +fixed(8, ff_byte, 0xff);
>  payload_size += 255;
>  }
>  xu(8, last_payload_size_byte, tmp, 0, 254);
> @@ -853,14 +851,14 @@ static int FUNC(sei)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>
>  tmp = current->payload[k].payload_type;
>  while (tmp >= 255) {
> -xu(8, ff_byte, 0xff, 0xff, 0xff);
> +fixed(8, ff_byte, 0xff);
>  tmp -= 255;
>  }
>  xu(8, last_payload_type_byte, tmp, 0, 254);
>
>  tmp = current->payload[k].payload_size;
>  while (tmp >= 255) {
> -xu(8, ff_byte, 0xff, 0xff, 0xff);
> +fixed(8, ff_byte, 0xff);
>  tmp -= 255;
>  }
>  xu(8, last_payload_size_byte, tmp, 0, 254);
> @@ -1240,9 +1238,8 @@ static int FUNC(slice_header)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>  }
>
>  if (pps->entropy_coding_mode_flag) {
> -av_unused int one = 1;
>  while (byte_alignment(rw))
> -xu(1, cabac_alignment_one_bit, one, 1, 1);
> +fixed(1, cabac_alignment_one_bit, 1);
>  }
>
>  return 0;
> @@ -1251,7 +1248,6 @@ static int FUNC(slice_header)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>  static int FUNC(filler)(CodedBitstreamContext *ctx, RWContext *rw,
>  H264RawFiller *current)
>  {
> -av_unused int ff_byte = 0xff;
>  int err;
>
>  HEADER("Filler Data");
> @@ -1261,14 +1257,14 @@ static int FUNC(filler)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>
>  #ifdef READ
>  while (bitstream_peek(rw, 8) == 0xff) {
> -xu(8, ff_byte, ff_byte, 0xff, 0xff);
> +fixed(8, ff_byte, 0xff);
>  ++current->filler_size;
>  }
>  #else
>  {
>  uint32_t i;
>  for (i = 0; i < current->filler_size; i++)
> -xu(8, ff_byte, ff_byte, 0xff, 0xff);
> +fixed(8, ff_byte, 0xff);
>  }
>  #endif
>
> 

Re: [libav-devel] [PATCH 09/14] avcodec/cbs_mpeg2: create a reference to the existing buffer when decomposing slice units

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: James Almer 
>
> Reviewed-by: Mark Thompson 
> Signed-off-by: James Almer 
> ---
>  libavcodec/cbs_mpeg2.c | 9 ++---
>  1 file changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c
> index 8974bcabac..d65d84bd3d 100644
> --- a/libavcodec/cbs_mpeg2.c
> +++ b/libavcodec/cbs_mpeg2.c
> @@ -191,16 +191,11 @@ static int cbs_mpeg2_read_unit(CodedBitstreamContext 
> *ctx,
>  len = unit->data_size;
>
>  slice->data_size = len - pos / 8;
> -slice->data_ref  = av_buffer_alloc(slice->data_size +
> -   AV_INPUT_BUFFER_PADDING_SIZE);
> +slice->data_ref  = av_buffer_ref(unit->data_ref);
>  if (!slice->data_ref)
>  return AVERROR(ENOMEM);
> -slice->data = slice->data_ref->data;
> +slice->data = unit->data + pos / 8;
>
> -memcpy(slice->data,
> -   unit->data + pos / 8, slice->data_size);
> -memset(slice->data + slice->data_size, 0,
> -   AV_INPUT_BUFFER_PADDING_SIZE);
>  slice->data_bit_start = pos % 8;
>
>  } else {
> --
> 2.12.2
>
> ___
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 10/14] cbs: Fragment/unit data is always reference counted

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: Mark Thompson 
>
> Make this clear in the documentation and add some asserts to ensure
> that it is always true.
> ---
>  libavcodec/cbs.c | 19 ---
>  libavcodec/cbs.h | 10 ++
>  2 files changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
> index dc8d94eedc..907471956b 100644
> --- a/libavcodec/cbs.c
> +++ b/libavcodec/cbs.c
> @@ -140,26 +140,30 @@ static int 
> cbs_read_fragment_content(CodedBitstreamContext *ctx,
>  int err, i, j;
>
>  for (i = 0; i < frag->nb_units; i++) {
> +CodedBitstreamUnit *unit = >units[i];
> +
>  if (ctx->decompose_unit_types) {
>  for (j = 0; j < ctx->nb_decompose_unit_types; j++) {
> -if (ctx->decompose_unit_types[j] == frag->units[i].type)
> +if (ctx->decompose_unit_types[j] == unit->type)
>  break;
>  }
>  if (j >= ctx->nb_decompose_unit_types)
>  continue;
>  }
>
> -av_buffer_unref(>units[i].content_ref);
> -frag->units[i].content = NULL;
> +av_buffer_unref(>content_ref);
> +unit->content = NULL;
> +
> +av_assert0(unit->data && unit->data_ref);
>
> -err = ctx->codec->read_unit(ctx, >units[i]);
> +err = ctx->codec->read_unit(ctx, unit);
>  if (err == AVERROR(ENOSYS)) {
>  av_log(ctx->log_ctx, AV_LOG_VERBOSE,
> "Decomposition unimplemented for unit %d "
> -   "(type %"PRIu32").\n", i, frag->units[i].type);
> +   "(type %"PRIu32").\n", i, unit->type);
>  } else if (err < 0) {
>  av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to read unit %d "
> -   "(type %"PRIu32").\n", i, frag->units[i].type);
> +   "(type %"PRIu32").\n", i, unit->type);
>  return err;
>  }
>  }
> @@ -278,6 +282,7 @@ int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx,
> "(type %"PRIu32").\n", i, unit->type);
>  return err;
>  }
> +av_assert0(unit->data && unit->data_ref);
>  }
>
>  av_buffer_unref(>data_ref);
> @@ -288,6 +293,7 @@ int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx,
>  av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to assemble fragment.\n");
>  return err;
>  }
> +av_assert0(frag->data && frag->data_ref);
>
>  return 0;
>  }
> @@ -328,7 +334,6 @@ int ff_cbs_write_packet(CodedBitstreamContext *ctx,
>  if (err < 0)
>  return err;
>
> -av_assert0(frag->data_ref);
>  buf = av_buffer_ref(frag->data_ref);
>  if (!buf)
>  return AVERROR(ENOMEM);
> diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h
> index 1f26be7b36..c38edc539a 100644
> --- a/libavcodec/cbs.h
> +++ b/libavcodec/cbs.h
> @@ -84,8 +84,9 @@ typedef struct CodedBitstreamUnit {
>   */
>  size_t   data_bit_padding;
>  /**
> - * If data is reference counted, a reference to the buffer containing
> - * data.  Null if data is not reference counted.
> + * A reference to the buffer containing data.
> + *
> + * Must be set if data is not NULL.
>   */
>  AVBufferRef *data_ref;
>
> @@ -130,8 +131,9 @@ typedef struct CodedBitstreamFragment {
>   */
>  size_t data_bit_padding;
>  /**
> - * If data is reference counted, a reference to the buffer containing
> - * data.  Null if data is not reference counted.
> + * A reference to the buffer containing data.
> + *
> + * Must be set if data is not NULL.
>   */
>  AVBufferRef *data_ref;
>
> --
> 2.12.2
>
> ___
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 08/14] avcodec/cbs_h2645: create a reference to the existing buffer when decomposing slice units

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: James Almer 
>
> Reviewed-by: Mark Thompson 
> Signed-off-by: James Almer 
> ---
>  libavcodec/cbs_h2645.c | 18 --
>  1 file changed, 4 insertions(+), 14 deletions(-)
>
> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
> index bf16343db5..8cd6db3fb4 100644
> --- a/libavcodec/cbs_h2645.c
> +++ b/libavcodec/cbs_h2645.c
> @@ -776,15 +776,10 @@ static int cbs_h264_read_nal_unit(CodedBitstreamContext 
> *ctx,
>  }
>
>  slice->data_size = len - pos / 8;
> -slice->data_ref  = av_buffer_alloc(slice->data_size +
> -   AV_INPUT_BUFFER_PADDING_SIZE);
> +slice->data_ref  = av_buffer_ref(unit->data_ref);
>  if (!slice->data_ref)
>  return AVERROR(ENOMEM);
> -slice->data = slice->data_ref->data;
> -memcpy(slice->data,
> -   unit->data + pos / 8, slice->data_size);
> -memset(slice->data + slice->data_size, 0,
> -   AV_INPUT_BUFFER_PADDING_SIZE);
> +slice->data = unit->data + pos / 8;
>  slice->data_bit_start = pos % 8;
>  }
>  break;
> @@ -946,15 +941,10 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext 
> *ctx,
>  }
>
>  slice->data_size = len - pos / 8;
> -slice->data_ref  = av_buffer_alloc(slice->data_size +
> -   AV_INPUT_BUFFER_PADDING_SIZE);
> +slice->data_ref  = av_buffer_ref(unit->data_ref);
>  if (!slice->data_ref)
>  return AVERROR(ENOMEM);
> -slice->data = slice->data_ref->data;
> -memcpy(slice->data,
> -   unit->data + pos / 8, slice->data_size);
> -memset(slice->data + slice->data_size, 0,
> -   AV_INPUT_BUFFER_PADDING_SIZE);
> +slice->data = unit->data + pos / 8;
>  slice->data_bit_start = pos % 8;
>  }
>  break;
> --
> 2.12.2
>
OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 07/14] avcodec/cbs_mpeg2: use existing buffer reference when splitting fragments

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: James Almer 
>
> Reviewed-by: Mark Thompson 
> Signed-off-by: James Almer 
> ---
>  libavcodec/cbs_mpeg2.c | 12 +++-
>  1 file changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c
> index 7f484267fa..8974bcabac 100644
> --- a/libavcodec/cbs_mpeg2.c
> +++ b/libavcodec/cbs_mpeg2.c
> @@ -146,18 +146,12 @@ static int 
> cbs_mpeg2_split_fragment(CodedBitstreamContext *ctx,
>  unit_size = (end - 4) - (start - 1);
>  }
>
> -unit_data = av_malloc(unit_size + AV_INPUT_BUFFER_PADDING_SIZE);
> -if (!unit_data)
> -return AVERROR(ENOMEM);
> -memcpy(unit_data, start - 1, unit_size);
> -memset(unit_data + unit_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
> +unit_data = (uint8_t *)start - 1;
>
>  err = ff_cbs_insert_unit_data(ctx, frag, i, unit_type,
> -  unit_data, unit_size, NULL);
> -if (err < 0) {
> -av_freep(_data);
> +  unit_data, unit_size, frag->data_ref);
> +if (err < 0)
>  return err;
> -}
>
>  if (end == frag->data + frag->data_size)
>  break;
> --
> 2.12.2
>
OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 06/14] avcodec/cbs_mpeg2: use memcpy when assembling fragments

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: James Almer 
>
> before:
> 419022 decicycles in assemble_fragment,2047 runs,  1 skips
>
> after:
> 104621 decicycles in assemble_fragment,2045 runs,  3 skips
>
> Benched with a 2 minutes long 720x480 DVD mpeg2 sample.
>
> Reviewed-by: Michael Niedermayer 
> Signed-off-by: James Almer 
> ---
>  libavcodec/cbs_mpeg2.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c
> index 7fa13c64fa..7f484267fa 100644
> --- a/libavcodec/cbs_mpeg2.c
> +++ b/libavcodec/cbs_mpeg2.c
> @@ -362,7 +362,7 @@ static int 
> cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx,
> CodedBitstreamFragment *frag)
>  {
>  uint8_t *data;
> -size_t size, dp, sp;
> +size_t size, dp;
>  int i;
>
>  size = 0;
> @@ -382,8 +382,8 @@ static int 
> cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx,
>  data[dp++] = 0;
>  data[dp++] = 1;
>
> -for (sp = 0; sp < unit->data_size; sp++)
> -data[dp++] = unit->data[sp];
> +memcpy(data + dp, unit->data, unit->data_size);
> +dp += unit->data_size;
>  }
>
>  av_assert0(dp == size);
> --
> 2.12.2
>

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 04/14] avcodec/cbs: use a reference to the assembled CodedBitstreamFragment buffer when writing packets

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: James Almer 
>
> This saves one malloc + memcpy per packet
>
> The CodedBitstreamFragment buffer is padded to follow the requirements
> of AVPacket.
>
> Reviewed-by: jkqxz
> Signed-off-by: James Almer 
> ---
>  libavcodec/cbs.c   | 12 
>  libavcodec/cbs_h2645.c |  8 +---
>  libavcodec/cbs_mpeg2.c |  3 ++-
>  3 files changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
> index c5c5b98d24..dc8d94eedc 100644
> --- a/libavcodec/cbs.c
> +++ b/libavcodec/cbs.c
> @@ -321,17 +321,21 @@ int ff_cbs_write_packet(CodedBitstreamContext *ctx,
>  AVPacket *pkt,
>  CodedBitstreamFragment *frag)
>  {
> +AVBufferRef *buf;
>  int err;
>
>  err = ff_cbs_write_fragment_data(ctx, frag);
>  if (err < 0)
>  return err;
>
> -err = av_new_packet(pkt, frag->data_size);
> -if (err < 0)
> -return err;
> +av_assert0(frag->data_ref);
> +buf = av_buffer_ref(frag->data_ref);
> +if (!buf)
> +return AVERROR(ENOMEM);
>
> -memcpy(pkt->data, frag->data, frag->data_size);
> +av_init_packet(pkt);
> +pkt->buf  = buf;
> +pkt->data = frag->data;
>  pkt->size = frag->data_size;
>
>  return 0;
> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
> index d6131a13e5..bf16343db5 100644
> --- a/libavcodec/cbs_h2645.c
> +++ b/libavcodec/cbs_h2645.c
> @@ -1298,7 +1298,7 @@ static int 
> cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx,
>  max_size += 3 + frag->units[i].data_size * 3 / 2;
>  }
>
> -data = av_malloc(max_size);
> +data = av_malloc(max_size + AV_INPUT_BUFFER_PADDING_SIZE);
>  if (!data)
>  return AVERROR(ENOMEM);
>
> @@ -1349,11 +1349,13 @@ static int 
> cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx,
>  }
>
>  av_assert0(dp <= max_size);
> -err = av_reallocp(, dp);
> +err = av_reallocp(, dp + AV_INPUT_BUFFER_PADDING_SIZE);
>  if (err)
>  return err;
> +memset(data + dp, 0, AV_INPUT_BUFFER_PADDING_SIZE);
>
> -frag->data_ref = av_buffer_create(data, dp, NULL, NULL, 0);
> +frag->data_ref = av_buffer_create(data, dp + 
> AV_INPUT_BUFFER_PADDING_SIZE,
> +  NULL, NULL, 0);
>  if (!frag->data_ref) {
>  av_freep();
>  return AVERROR(ENOMEM);
> diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c
> index 3db10c5152..7fa13c64fa 100644
> --- a/libavcodec/cbs_mpeg2.c
> +++ b/libavcodec/cbs_mpeg2.c
> @@ -369,7 +369,7 @@ static int 
> cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx,
>  for (i = 0; i < frag->nb_units; i++)
>  size += 3 + frag->units[i].data_size;
>
> -frag->data_ref = av_buffer_alloc(size);
> +frag->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
>  if (!frag->data_ref)
>  return AVERROR(ENOMEM);
>  data = frag->data_ref->data;
> @@ -388,6 +388,7 @@ static int 
> cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx,
>
>  av_assert0(dp == size);
>
> +memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
>  frag->data  = data;
>  frag->data_size = size;
>
> --
> 2.12.2
>

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 05/14] cbs_h265: Use helper macro for maximum values of fixed-width elements

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:15 PM Luca Barbato  wrote:
>
> From: Mark Thompson 
>
> Apply the same logic as the previous patch to H.265.  There are no cases
> which currently overflow here, but this is still more consistent.
> ---
>  libavcodec/cbs_h265_syntax_template.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/libavcodec/cbs_h265_syntax_template.c 
> b/libavcodec/cbs_h265_syntax_template.c
> index a194887d76..20b0479891 100644
> --- a/libavcodec/cbs_h265_syntax_template.c
> +++ b/libavcodec/cbs_h265_syntax_template.c
> @@ -665,7 +665,7 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>: current->bit_depth_chroma_minus8 
> + 8;
>  for (i = 0; i <= 
> current->sps_num_palette_predictor_initializer_minus1; i++)
>  u(bit_depth, sps_palette_predictor_initializers[comp][i],
> -  0, (1 << bit_depth) - 1);
> +  0, MAX_UINT_BITS(bit_depth));
>  }
>  }
>  }
> @@ -827,7 +827,7 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>  for (i = 0; i < current->num_long_term_ref_pics_sps; i++) {
>  u(current->log2_max_pic_order_cnt_lsb_minus4 + 4,
>lt_ref_pic_poc_lsb_sps[i],
> -  0, (1 << (current->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 
> 1);
> +  0, MAX_UINT_BITS(current->log2_max_pic_order_cnt_lsb_minus4 + 
> 4));
>  flag(used_by_curr_pic_lt_sps_flag[i]);
>  }
>  }
> @@ -845,7 +845,7 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>  flag(sps_multilayer_extension_flag);
>  flag(sps_3d_extension_flag);
>  flag(sps_scc_extension_flag);
> -u(4, sps_extension_4bits, 0, (1 << 4) - 1);
> +u(4, sps_extension_4bits, 0, MAX_UINT_BITS(4));
>  }
>
>  if (current->sps_range_extension_flag)
> @@ -925,7 +925,7 @@ static int FUNC(pps_scc_extension)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>: 
> current->chroma_bit_depth_entry_minus8 + 8;
>  for (i = 0; i < 
> current->pps_num_palette_predictor_initializer; i++)
>  u(bit_depth, pps_palette_predictor_initializers[comp][i],
> -  0, (1 << bit_depth) - 1);
> +  0, MAX_UINT_BITS(bit_depth));
>  }
>  }
>  }
> @@ -1038,7 +1038,7 @@ static int FUNC(pps)(CodedBitstreamContext *ctx, 
> RWContext *rw,
>  flag(pps_multilayer_extension_flag);
>  flag(pps_3d_extension_flag);
>  flag(pps_scc_extension_flag);
> -u(4, pps_extension_4bits, 0, (1 << 4) - 1);
> +u(4, pps_extension_4bits, 0, MAX_UINT_BITS(4));
>  }
>  if (current->pps_range_extension_flag)
>  CHECK(FUNC(pps_range_extension)(ctx, rw, current));
> @@ -1274,7 +1274,7 @@ static int 
> FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw,
>  const H265RawSTRefPicSet *rps;
>
>  u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, 
> slice_pic_order_cnt_lsb,
> -  0, (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1);
> +  0, MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4));
>
>  flag(short_term_ref_pic_set_sps_flag);
>  if (!current->short_term_ref_pic_set_sps_flag) {
> @@ -1321,7 +1321,7 @@ static int 
> FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw,
>  ++num_pic_total_curr;
>  } else {
>  u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, 
> poc_lsb_lt[i],
> -  0, (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 
> 4)) - 1);
> +  0, 
> MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4));
>  flag(used_by_curr_pic_lt_flag[i]);
>  if (current->used_by_curr_pic_lt_flag[i])
>  ++num_pic_total_curr;
> @@ -1487,7 +1487,7 @@ static int 
> FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw,
>  ue(offset_len_minus1, 0, 31);
>  for (i = 0; i < current->num_entry_point_offsets; i++)
>  u(current->offset_len_minus1 + 1, 
> entry_point_offset_minus1[i],
> -  0, (1 << (current->offset_len_minus1 + 1)) - 1);
> +  0, MAX_UINT_BITS(current->offset_len_minus1 + 1));
>  }
>  }
>
> --
> 2.12.2
>

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 03/14] cbs: Add a table of all supported codec IDs

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:14 PM Luca Barbato  wrote:
>
> From: Mark Thompson 
>
> Use it as the set of codec IDs supported by the trace_headers BSF.
> ---
>  configure  |  2 +-
>  libavcodec/cbs.c   | 13 +
>  libavcodec/cbs.h   |  8 
>  libavcodec/trace_headers_bsf.c |  9 +
>  4 files changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/configure b/configure
> index 8c46a870c4..7b5df4df29 100755
> --- a/configure
> +++ b/configure
> @@ -2365,7 +2365,7 @@ h264_redundant_pps_bsf_select="cbs_h264"
>  hevc_metadata_bsf_select="cbs_h265"
>  mjpeg2jpeg_bsf_select="jpegtables"
>  mpeg2_metadata_bsf_select="cbs_mpeg2"
> -trace_headers_bsf_select="cbs_h264 cbs_h265 cbs_mpeg2"
> +trace_headers_bsf_select="cbs"
>
>  # external libraries
>  avisynth_deps="LoadLibrary"
> diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
> index 04ad2dfc41..c5c5b98d24 100644
> --- a/libavcodec/cbs.c
> +++ b/libavcodec/cbs.c
> @@ -40,6 +40,19 @@ static const CodedBitstreamType *cbs_type_table[] = {
>  #endif
>  };
>
> +const enum AVCodecID ff_cbs_all_codec_ids[] = {
> +#if CONFIG_CBS_H264
> +AV_CODEC_ID_H264,
> +#endif
> +#if CONFIG_CBS_H265
> +AV_CODEC_ID_HEVC,
> +#endif
> +#if CONFIG_CBS_MPEG2
> +AV_CODEC_ID_MPEG2VIDEO,
> +#endif
> +AV_CODEC_ID_NONE
> +};
> +
>  int ff_cbs_init(CodedBitstreamContext **ctx_ptr,
>  enum AVCodecID codec_id, void *log_ctx)
>  {
> diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h
> index 6505386708..1f26be7b36 100644
> --- a/libavcodec/cbs.h
> +++ b/libavcodec/cbs.h
> @@ -202,6 +202,14 @@ typedef struct CodedBitstreamContext {
>
>
>  /**
> + * Table of all supported codec IDs.
> + *
> + * Terminated by AV_CODEC_ID_NONE.
> + */
> +extern const enum AVCodecID ff_cbs_all_codec_ids[];
> +
> +
> +/**
>   * Create and initialise a new context for the given codec.
>   */
>  int ff_cbs_init(CodedBitstreamContext **ctx,
> diff --git a/libavcodec/trace_headers_bsf.c b/libavcodec/trace_headers_bsf.c
> index 9c97dd4cea..4494bd4553 100644
> --- a/libavcodec/trace_headers_bsf.c
> +++ b/libavcodec/trace_headers_bsf.c
> @@ -109,18 +109,11 @@ static int trace_headers(AVBSFContext *bsf, AVPacket 
> *out)
>  return 0;
>  }
>
> -static const enum AVCodecID trace_headers_codec_ids[] = {
> -AV_CODEC_ID_H264,
> -AV_CODEC_ID_HEVC,
> -AV_CODEC_ID_MPEG2VIDEO,
> -AV_CODEC_ID_NONE,
> -};
> -
>  const AVBitStreamFilter ff_trace_headers_bsf = {
>  .name   = "trace_headers",
>  .priv_data_size = sizeof(TraceHeadersContext),
>  .init   = _headers_init,
>  .close  = _headers_close,
>  .filter = _headers,
> -.codec_ids  = trace_headers_codec_ids,
> +.codec_ids  = ff_cbs_all_codec_ids,
>  };
> --
> 2.12.2
>

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 02/14] cbs_h264: Fix overflow in shifts

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:14 PM Luca Barbato  wrote:
>
> From: Mark Thompson 
>
> The type of the result of a shift operation is unaffected by the type of
> the right operand, so some existing code overflows with undefined behaviour
> when the element length is 32.  Add a helper macro to calculate the maximum
> value correctly and then use it everywhere this pattern appears.
>
> Found-by: Andreas Rheinhardt 
> ---
>  libavcodec/cbs_h264_syntax_template.c | 22 +++---
>  libavcodec/cbs_internal.h |  4 
>  2 files changed, 15 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/cbs_h264_syntax_template.c 
> b/libavcodec/cbs_h264_syntax_template.c
> index 1aa7888584..92c1b67862 100644
> --- a/libavcodec/cbs_h264_syntax_template.c
> +++ b/libavcodec/cbs_h264_syntax_template.c
> @@ -342,8 +342,8 @@ static int FUNC(sps_extension)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>  flag(alpha_incr_flag);
>
>  bits = current->bit_depth_aux_minus8 + 9;
> -u(bits, alpha_opaque_value,  0, (1 << bits) - 1);
> -u(bits, alpha_transparent_value, 0, (1 << bits) - 1);
> +u(bits, alpha_opaque_value,  0, MAX_UINT_BITS(bits));
> +u(bits, alpha_transparent_value, 0, MAX_UINT_BITS(bits));
>  }
>
>  flag(additional_extension_flag);
> @@ -483,10 +483,10 @@ static int 
> FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw,
>  length = 
> sps->vui.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1 + 1;
>  xu(length, initial_cpb_removal_delay[SchedSelIdx],
> current->nal.initial_cpb_removal_delay[i],
> -   0, (1 << (uint64_t)length) - 1);
> +   1, MAX_UINT_BITS(length));
>  xu(length, initial_cpb_removal_delay_offset[SchedSelIdx],
> current->nal.initial_cpb_removal_delay_offset[i],
> -   0, (1 << (uint64_t)length) - 1);
> +   0, MAX_UINT_BITS(length));
>  }
>  }
>
> @@ -495,10 +495,10 @@ static int 
> FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw,
>  length = 
> sps->vui.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1 + 1;
>  xu(length, initial_cpb_removal_delay[SchedSelIdx],
> current->vcl.initial_cpb_removal_delay[i],
> -   0, (1 << (uint64_t)length) - 1);
> +   1, MAX_UINT_BITS(length));
>  xu(length, initial_cpb_removal_delay_offset[SchedSelIdx],
> current->vcl.initial_cpb_removal_delay_offset[i],
> -   0, (1 << (uint64_t)length) - 1);
> +   0, MAX_UINT_BITS(length));
>  }
>  }
>
> @@ -548,7 +548,7 @@ static int FUNC(sei_pic_timestamp)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>
>  if (time_offset_length > 0)
>  u(time_offset_length, time_offset,
> -  0, (1 << (uint64_t)time_offset_length) - 1);
> +  0, MAX_UINT_BITS(time_offset_length));
>  else
>  infer(time_offset, 0);
>
> @@ -600,9 +600,9 @@ static int FUNC(sei_pic_timing)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>  }
>
>  u(hrd->cpb_removal_delay_length_minus1 + 1, cpb_removal_delay,
> -  0, (1 << (uint64_t)hrd->cpb_removal_delay_length_minus1) + 1);
> +  0, MAX_UINT_BITS(hrd->cpb_removal_delay_length_minus1 + 1));
>  u(hrd->dpb_output_delay_length_minus1 + 1, dpb_output_delay,
> -  0, (1 << (uint64_t)hrd->dpb_output_delay_length_minus1) + 1);
> +  0, MAX_UINT_BITS(hrd->dpb_output_delay_length_minus1 + 1));
>  }
>
>  if (sps->vui.pic_struct_present_flag) {
> @@ -1123,7 +1123,7 @@ static int FUNC(slice_header)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>  u(2, colour_plane_id, 0, 2);
>
>  u(sps->log2_max_frame_num_minus4 + 4, frame_num,
> -  0, (1 << (sps->log2_max_frame_num_minus4 + 4)) - 1);
> +  0, MAX_UINT_BITS(sps->log2_max_frame_num_minus4 + 4));
>
>  if (!sps->frame_mbs_only_flag) {
>  flag(field_pic_flag);
> @@ -1141,7 +1141,7 @@ static int FUNC(slice_header)(CodedBitstreamContext 
> *ctx, RWContext *rw,
>
>  if (sps->pic_order_cnt_type == 0) {
>  u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, pic_order_cnt_lsb,
> -  0, (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1);
> +  0, MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4));
>  if (pps->bottom_field_pic_order_in_frame_present_flag &&
>  !current->field_pic_flag)
>  se(delta_pic_order_cnt_bottom, INT32_MIN + 1, INT32_MAX);
> diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
> index 4c6f421d19..54265d8e0e 100644
> --- a/libavcodec/cbs_internal.h
> +++ b/libavcodec/cbs_internal.h
> @@ -79,6 +79,10 @@ int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, 
> PutBitContext *pbc,
>int width, const char *name, uint32_t value,
>

Re: [libav-devel] [PATCH 01/14] maint: Ignore dot dirs

2019-05-13 Thread Alexandra Hájková
On Sat, May 4, 2019 at 2:14 PM Luca Barbato  wrote:
>
> They are usually created by tools and editors.
> ---
>  .gitignore | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/.gitignore b/.gitignore
> index 1a08fd15c5..693fa5636a 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -29,3 +29,4 @@
>  /coverage.info
>  /lcov/
>  /mapfile
> +.*/
> --
> 2.12.2
>

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] hevc: Add hevc_get_pixel_4/8/12/16/24/32/48/64

2017-12-03 Thread Alexandra Hájková
Checkasm timings:
block size bitdepth  C   NEON
4   8 bit:146.7   48.7
   10 bit:146.7   52.7
8   8 bit:430.3   84.4
   10 bit:430.4  119.5
12  8 bit:812.8  141.0
   10 bit:812.8  195.0
16  8 bit:   1499.1  268.0
   10 bit:   1498.9  368.4
24  8 bit:   4394.2  574.8
   10 bit:   3696.3  804.8
32  8 bit:   5108.6  568.9
   10 bit:   4249.6  918.8
48  8 bit:  16819.6 2304.9
   10 bit:  13882.0 3178.5
64  8 bit:  13490.8 1799.5
   10 bit:  11018.5 2519.4
---
 libavcodec/arm/Makefile   |   3 +-
 libavcodec/arm/hevc_mc.S  | 381 ++
 libavcodec/arm/hevcdsp_init_arm.c |  67 +++
 3 files changed, 450 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/arm/hevc_mc.S

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index b48745ad4..49e17ce0d 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -135,7 +135,8 @@ NEON-OBJS-$(CONFIG_AAC_DECODER)+= 
arm/aacpsdsp_neon.o   \
 NEON-OBJS-$(CONFIG_APE_DECODER)+= arm/apedsp_neon.o
 NEON-OBJS-$(CONFIG_DCA_DECODER)+= arm/dcadsp_neon.o \
   arm/synth_filter_neon.o
-NEON-OBJS-$(CONFIG_HEVC_DECODER)   += arm/hevc_idct.o
+NEON-OBJS-$(CONFIG_HEVC_DECODER)   += arm/hevc_idct.o   \
+  arm/hevc_mc.o
 NEON-OBJS-$(CONFIG_RV30_DECODER)   += arm/rv34dsp_neon.o
 NEON-OBJS-$(CONFIG_RV40_DECODER)   += arm/rv34dsp_neon.o\
   arm/rv40dsp_neon.o
diff --git a/libavcodec/arm/hevc_mc.S b/libavcodec/arm/hevc_mc.S
new file mode 100644
index 0..a1274ec71
--- /dev/null
+++ b/libavcodec/arm/hevc_mc.S
@@ -0,0 +1,381 @@
+/*
+ * ARM NEON optimised MC functions for HEVC decoding
+ *
+ * Copyright (c) 2017 Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+.macro get_pixels4 bitdepth
+function ff_hevc_get_pixels_4_\bitdepth\()_neon, export=1
+@r0 dst, r1 dststride, r2 src, r3 srcstride
+ldr r12, [sp] @height
+cmp r12, #0
+bxeqlr
+
+1: .if \bitdepth == 8
+vld1.32 {d0[0]}, [r2], r3
+vld1.32 {d1[0]}, [r2], r3
+vld1.32 {d2[0]}, [r2], r3
+vld1.32 {d3[0]}, [r2], r3
+vshll.u8q8, d0, #6
+vshll.u8q9, d1, #6
+vshll.u8q10, d2, #6
+vshll.u8q11, d3, #6
+  .else
+vld1.16 {d0}, [r2], r3
+vld1.16 {d1}, [r2], r3
+vld1.16 {d2}, [r2], r3
+vld1.16 {d3}, [r2], r3
+vshl.i16d16, d0, #4
+vshl.i16d18, d1, #4
+vshl.i16d20, d2, #4
+vshl.i16d22, d3, #4
+  .endif
+
+vst1.16 {d16}, [r0, :64], r1
+vst1.16 {d18}, [r0, :64], r1
+vst1.16 {d20}, [r0, :64], r1
+vst1.16 {d22}, [r0, :64], r1
+subsr12, #4
+bgt 1b
+
+bx  lr
+endfunc
+.endm
+
+.macro get_pixels8 bitdepth
+function ff_hevc_get_pixels_8_\bitdepth\()_neon, export=1
+@r0 dst, r1 dststride, r2 src, r3 srcstride
+ldr r12, [sp] @height
+cmp r12, #0
+bxeqlr
+
+1: .if \bitdepth == 8
+vld1.8  {d0}, [r2], r3
+vld1.8  {d1}, [r2], r3
+vld1.8  {d2}, [r2], r3
+vld1.8  {d3}, [r2], r3
+vshll.u8q8, d0, #6
+vshll.u8q9, d1, #6
+vshll.u8q10, d2, #6
+vshll.u8q11, d3, #6
+  .else
+vld1.16 {d16-d17}, [r2], r3
+vld1.16 {d18-d19}, [r2], r3
+vld1.16 {d20-d21}, [r2], r3
+vld1.16 {d22-d23}, [r2], r3
+vshl.i16q8, q8, #4
+vshl.i16q9, q9, #4
+vshl.i16q10, q10, #4
+vshl.i16q11, q11, #4
+  .endif
+
+vst1.16 {d16-d17}, [r0

Re: [libav-devel] [PATCH v2] avconv.c: fix calculation of input file duration in seek_to_start()

2017-10-30 Thread Alexandra Hájková
On Mon, Oct 30, 2017 at 12:25 PM, Peter Große  wrote:
> Fixes looping files without audio or when using stream_copy, where
> ist->nb_samples is not set since no decoding is done.
> ---
>  avtools/avconv.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/avtools/avconv.c b/avtools/avconv.c
> index 4e3ffecdef..cee7a7b452 100644
> --- a/avtools/avconv.c
> +++ b/avtools/avconv.c
> @@ -2553,9 +2553,9 @@ static int seek_to_start(InputFile *ifile, 
> AVFormatContext *is)
>  continue;
>  } else {
>  if (ist->framerate.num) {
> -duration = av_rescale_q(1, ist->framerate, 
> ist->st->time_base);
> +duration = FFMAX(av_rescale_q(1, av_inv_q(ist->framerate), 
> ist->st->time_base), 1);
>  } else if (ist->st->avg_frame_rate.num) {
> -duration = av_rescale_q(1, ist->st->avg_frame_rate, 
> ist->st->time_base);
> +duration = FFMAX(av_rescale_q(1, 
> av_inv_q(ist->st->avg_frame_rate), ist->st->time_base), 1);
>  } else duration = 1;
>  }
>  if (!ifile->duration)
> --
> 2.13.6
>
> ___
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] avconv.c: fix calculation of input file duration in seek_to_start()

2017-10-30 Thread Alexandra Hájková
> --- a/avtools/avconv.c
> +++ b/avtools/avconv.c
> @@ -2553,9 +2553,9 @@ static int seek_to_start(InputFile *ifile, 
> AVFormatContext *is)
>  continue;
>  } else {
>  if (ist->framerate.num) {
> -duration = av_rescale_q(1, ist->framerate, 
> ist->st->time_base);
> +duration = av_rescale_q(1, av_inv_q(ist->framerate), 
> ist->st->time_base);
I think it should rather be:
 duration = FFMAX(av_rescale_q(1, av_inv_q(ist->framerate),
ist->st->time_base), 1);
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] avconv.c: fix calculation of input file duration in seek_to_start()

2017-10-29 Thread Alexandra Hájková
On Sun, Oct 29, 2017 at 12:48 PM, Peter Große  wrote:
> Fixes looping files without audio or when using stream_copy, where
> ist->nb_samples is not set since no decoding is done.
>
Does the loop discards the last frame because its duration is set to 0?
Could you give me your sample(s), please?

Thank you,
Alexandra
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] avconv: when using -loop option bail out if seek to start fails

2017-06-30 Thread Alexandra Hájková
On Fri, Jun 30, 2017 at 5:28 PM, Peter Große  wrote:
> Fixes an infinite loop when a demuxer fails to seek to the start of the input.
>
> Signed-off-by: Peter Große 
> ---
>  avtools/avconv.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/avtools/avconv.c b/avtools/avconv.c
> index 8dd11bb5fc..4e3ffecdef 100644
> --- a/avtools/avconv.c
> +++ b/avtools/avconv.c
> @@ -2615,9 +2615,11 @@ static int process_input(void)
>  return ret;
>  }
>  if (ret < 0 && ifile->loop) {
> -if ((ret = seek_to_start(ifile, is)) < 0)
> -return ret;
> -ret = get_input_packet(ifile, );
> +ret = seek_to_start(ifile, is);
> +if(ret < 0)
> +av_log(NULL, AV_LOG_WARNING, "Seek to start failed.\n");
> +else
> +ret = get_input_packet(ifile, );
>  }
>  if (ret < 0) {
>  if (ret != AVERROR_EOF) {
> --
> 2.13.0
>

Producing unlooped output with avconv -loop -1 and running infinitely
is clearly wrong and I think this's a reasonable way to fix it.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] Libav Sprint Pelhřimov

2017-06-29 Thread Alexandra Hájková
Hello everyone,

I would like to announce another Libav sprint in the wilds around
Pelhřimov, the date was set to 21-23 July, but the next weekend
(28-30) is also possible. The plans are:
* hacking
* cooking
* enjoying countryside
* evenutally others.
Please, reply here, if you're interested.

(The report about the last sprint:
http://sasshkas.blogspot.cz/2016/10/another-libav-sprint.html)
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] hevc: Add NEON 32x32 IDCT

2017-05-04 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 311 +++---
 libavcodec/arm/hevcdsp_init_arm.c |   4 +
 2 files changed, 294 insertions(+), 21 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index eeb81e3..79799b2 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -28,6 +28,10 @@ const trans, align=4
 .short 89, 75, 50, 18
 .short 90, 87, 80, 70
 .short 57, 43, 25, 9
+.short 90, 90, 88, 85
+.short 82, 78, 73, 67
+.short 61, 54, 46, 38
+.short 31, 22, 13, 4
 endconst
 
 .macro clip10 in1, in2, c1, c2
@@ -509,7 +513,7 @@ endfunc
 vsub.s32\tmp_m, \e, \o
 .endm
 
-.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7
+.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7, offset
 tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, 
q14, q15
 
 vmull.s16   q12, \in1, \in0[0]
@@ -535,7 +539,7 @@ endfunc
 butterfly   q9,  q13, q1, q6
 butterfly   q10, q14, q2, q5
 butterfly   q11, q15, q3, q4
-add r4,  sp,  #512
+add r4,  sp,  #\offset
 vst1.s32{q0-q1}, [r4, :128]!
 vst1.s32{q2-q3}, [r4, :128]!
 vst1.s32{q4-q5}, [r4, :128]!
@@ -575,15 +579,15 @@ endfunc
 vsub.s32\in6, \in6, \in7
 .endm
 
-.macro store16 in0, in1, in2, in3, in4, in5, in6, in7
+.macro store16 in0, in1, in2, in3, in4, in5, in6, in7, rx
 vst1.s16\in0, [r1, :64], r2
-vst1.s16\in1, [r3, :64], r4
+vst1.s16\in1, [r3, :64], \rx
 vst1.s16\in2, [r1, :64], r2
-vst1.s16\in3, [r3, :64], r4
+vst1.s16\in3, [r3, :64], \rx
 vst1.s16\in4, [r1, :64], r2
-vst1.s16\in5, [r3, :64], r4
+vst1.s16\in5, [r3, :64], \rx
 vst1.s16\in6, [r1, :64], r2
-vst1.s16\in7, [r3, :64], r4
+vst1.s16\in7, [r3, :64], \rx
 .endm
 
 .macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, 
in3, in4, in5, in6, in7, shift
@@ -597,19 +601,35 @@ endfunc
 vqrshrn.s32 \out7, \in7, \shift
 .endm
 
-.macro tr_16x4 name, shift
+@stores in1, in2, in4, in6 ascending from off1 and
+@stores in1, in3, in5, in7 descending from off2
+.macro store_to_stack off1, off2, in0, in2, in4, in6, in7, in5, in3, in1
+add r1, sp, #\off1
+add r3, sp, #\off2
+mov r2, #-16
+vst1.s32{\in0}, [r1, :128]!
+vst1.s32{\in1}, [r3, :128], r2
+vst1.s32{\in2}, [r1, :128]!
+vst1.s32{\in3}, [r3, :128], r2
+vst1.s32{\in4}, [r1, :128]!
+vst1.s32{\in5}, [r3, :128], r2
+vst1.s32{\in6}, [r1, :128]
+vst1.s32{\in7}, [r3, :128]
+.endm
+
+.macro tr_16x4 name, shift, offset, step
 function func_tr_16x4_\name
 mov r1,  r5
-add r3,  r5, #64
-mov r2,  #128
+add r3, r5, #(\step * 64)
+mov r2, #(\step * 128)
 load16  d0, d1, d2, d3, d4, d5, d6, d7
 movrel  r1, trans
 
-tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7
+tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7, \offset
 
-add r1,  r5, #32
-add r3,  r5, #(64 + 32)
-mov r2,  #128
+add r1,  r5, #(\step * 32)
+add r3,  r5, #(\step * 3 *32)
+mov r2,  #(\step * 128)
 load16  d8, d9, d2, d3, d4, d5, d6, d7
 movrel  r1, trans + 16
 vld1.s16{q0}, [r1, :128]
@@ -630,11 +650,12 @@ function func_tr_16x4_\name
 add_member  d6, d1[2], d0[3], d0[0], d0[2], d1[1], d1[3], d1[0], 
d0[1], +, -, +, -, +, +, -, +
 add_member  d7, d1[3], d1[2], d1[1], d1[0], d0[3], d0[2], d0[1], 
d0[0], +, -, +, -, +, -, +, -
 
-add r4, sp, #512
+add r4, sp, #\offset
 vld1.s32{q0-q1}, [r4, :128]!
 vld1.s32{q2-q3}, [r4, :128]!
 
 butterfly16 q0, q5, q1, q6, q2, q7, q3, q8
+.if \shift > 0
 scale   d26, d27, d28, d29, d30, d31, d16, d17, q4, q0, q5, 
q1, q6, q2, q7, q3, \shift
 transpose8_4x4  d26, d28, d30, d16
 transpose8_4x4  d17, d31, d29, d27
@@ -642,12 +663,16 @@ function func_tr_16x4_\name
 add r3, r6, #(24 +3*32)
 mov r2, #32
 mov r4, #-32
-store16 d26, d27, d28, d29, d30, d31, d16, d17
+store16 d26, d27, d28, d29, d30, d31, d16, d17, r4
+.else
+store_to_stack  \offset, (\offset + 240), q4, q5, q6, q7, q3, q2, q1, 
q0
+.endif
 
-add   

[libav-devel] [PATCH 1/2] hevc: 16x16 NEON idct: Use the right element size for stores.

2017-05-04 Thread Alexandra Hájková
This doesn't change the actual behaviour of the code but improves
readability.
---
 libavcodec/arm/hevc_idct.S | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index fac5758..eeb81e3 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -536,10 +536,10 @@ endfunc
 butterfly   q10, q14, q2, q5
 butterfly   q11, q15, q3, q4
 add r4,  sp,  #512
-vst1.s16{q0-q1}, [r4, :128]!
-vst1.s16{q2-q3}, [r4, :128]!
-vst1.s16{q4-q5}, [r4, :128]!
-vst1.s16{q6-q7}, [r4, :128]
+vst1.s32{q0-q1}, [r4, :128]!
+vst1.s32{q2-q3}, [r4, :128]!
+vst1.s32{q4-q5}, [r4, :128]!
+vst1.s32{q6-q7}, [r4, :128]
 .endm
 
 .macro load16 in0, in1, in2, in3, in4, in5, in6, in7
@@ -631,8 +631,8 @@ function func_tr_16x4_\name
 add_member  d7, d1[3], d1[2], d1[1], d1[0], d0[3], d0[2], d0[1], 
d0[0], +, -, +, -, +, -, +, -
 
 add r4, sp, #512
-vld1.s16{q0-q1}, [r4, :128]!
-vld1.s16{q2-q3}, [r4, :128]!
+vld1.s32{q0-q1}, [r4, :128]!
+vld1.s32{q2-q3}, [r4, :128]!
 
 butterfly16 q0, q5, q1, q6, q2, q7, q3, q8
 scale   d26, d27, d28, d29, d30, d31, d16, d17, q4, q0, q5, 
q1, q6, q2, q7, q3, \shift
@@ -645,8 +645,8 @@ function func_tr_16x4_\name
 store16 d26, d27, d28, d29, d30, d31, d16, d17
 
 add r4, sp, #576
-vld1.s16{q0-q1}, [r4, :128]!
-vld1.s16{q2-q3}, [r4, :128]
+vld1.s32{q0-q1}, [r4, :128]!
+vld1.s32{q2-q3}, [r4, :128]
 butterfly16 q0, q9, q1, q10, q2, q11, q3, q12
 scale   d26, d27, d28, d29, d30, d31, d8, d9, q4, q0, q9, q1, 
q10, q2, q11, q3, \shift
 transpose8_4x4  d26, d28, d30, d8
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] hevc: 16x16 NEON idct: store 32 bit elements correctly

2017-05-03 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index fac5758..4814c86 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -536,10 +536,10 @@ endfunc
 butterfly   q10, q14, q2, q5
 butterfly   q11, q15, q3, q4
 add r4,  sp,  #512
-vst1.s16{q0-q1}, [r4, :128]!
-vst1.s16{q2-q3}, [r4, :128]!
-vst1.s16{q4-q5}, [r4, :128]!
-vst1.s16{q6-q7}, [r4, :128]
+vst1.s32{q0-q1}, [r4, :128]!
+vst1.s32{q2-q3}, [r4, :128]!
+vst1.s32{q4-q5}, [r4, :128]!
+vst1.s32{q6-q7}, [r4, :128]
 .endm
 
 .macro load16 in0, in1, in2, in3, in4, in5, in6, in7
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] hevc: Add NEON 32x32 IDCT

2017-05-03 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 319 ++
 libavcodec/arm/hevcdsp_init_arm.c |   4 +
 2 files changed, 297 insertions(+), 26 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 4814c86..3a512b4 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -28,6 +28,10 @@ const trans, align=4
 .short 89, 75, 50, 18
 .short 90, 87, 80, 70
 .short 57, 43, 25, 9
+.short 90, 90, 88, 85
+.short 82, 78, 73, 67
+.short 61, 54, 46, 38
+.short 31, 22, 13, 4
 endconst
 
 .macro clip10 in1, in2, c1, c2
@@ -509,7 +513,7 @@ endfunc
 vsub.s32\tmp_m, \e, \o
 .endm
 
-.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7
+.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7, offset
 tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, 
q14, q15
 
 vmull.s16   q12, \in1, \in0[0]
@@ -535,7 +539,7 @@ endfunc
 butterfly   q9,  q13, q1, q6
 butterfly   q10, q14, q2, q5
 butterfly   q11, q15, q3, q4
-add r4,  sp,  #512
+add r4,  sp,  #\offset
 vst1.s32{q0-q1}, [r4, :128]!
 vst1.s32{q2-q3}, [r4, :128]!
 vst1.s32{q4-q5}, [r4, :128]!
@@ -575,15 +579,15 @@ endfunc
 vsub.s32\in6, \in6, \in7
 .endm
 
-.macro store16 in0, in1, in2, in3, in4, in5, in6, in7
+.macro store16 in0, in1, in2, in3, in4, in5, in6, in7, rx
 vst1.s16\in0, [r1, :64], r2
-vst1.s16\in1, [r3, :64], r4
+vst1.s16\in1, [r3, :64], \rx
 vst1.s16\in2, [r1, :64], r2
-vst1.s16\in3, [r3, :64], r4
+vst1.s16\in3, [r3, :64], \rx
 vst1.s16\in4, [r1, :64], r2
-vst1.s16\in5, [r3, :64], r4
+vst1.s16\in5, [r3, :64], \rx
 vst1.s16\in6, [r1, :64], r2
-vst1.s16\in7, [r3, :64], r4
+vst1.s16\in7, [r3, :64], \rx
 .endm
 
 .macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, 
in3, in4, in5, in6, in7, shift
@@ -597,19 +601,33 @@ endfunc
 vqrshrn.s32 \out7, \in7, \shift
 .endm
 
-.macro tr_16x4 name, shift
+.macro store_to_stack off1, off2, in0, in2, in4, in6, in7, in5, in3, in1
+add r1, sp, #\off1
+add r3, sp, #\off2
+mov r2, #-16
+vst1.s32{\in0}, [r1, :128]!
+vst1.s32{\in1}, [r3, :128], r2
+vst1.s32{\in2}, [r1, :128]!
+vst1.s32{\in3}, [r3, :128], r2
+vst1.s32{\in4}, [r1, :128]!
+vst1.s32{\in5}, [r3, :128], r2
+vst1.s32{\in6}, [r1, :128]
+vst1.s32{\in7}, [r3, :128]
+.endm
+
+.macro tr_16x4 name, shift, offset, step
 function func_tr_16x4_\name
 mov r1,  r5
-add r3,  r5, #64
-mov r2,  #128
+add r3, r5, #(\step * 64)
+mov r2,  #(\step * 128)
 load16  d0, d1, d2, d3, d4, d5, d6, d7
 movrel  r1, trans
 
-tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7
+tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7, \offset
 
-add r1,  r5, #32
-add r3,  r5, #(64 + 32)
-mov r2,  #128
+add r1,  r5, #(\step * 32)
+add r3,  r5, #(\step * 3 *32)
+mov r2,  #(\step * 128)
 load16  d8, d9, d2, d3, d4, d5, d6, d7
 movrel  r1, trans + 16
 vld1.s16{q0}, [r1, :128]
@@ -630,11 +648,12 @@ function func_tr_16x4_\name
 add_member  d6, d1[2], d0[3], d0[0], d0[2], d1[1], d1[3], d1[0], 
d0[1], +, -, +, -, +, +, -, +
 add_member  d7, d1[3], d1[2], d1[1], d1[0], d0[3], d0[2], d0[1], 
d0[0], +, -, +, -, +, -, +, -
 
-add r4, sp, #512
-vld1.s16{q0-q1}, [r4, :128]!
-vld1.s16{q2-q3}, [r4, :128]!
+add r4, sp, #\offset
+vld1.s32{q0-q1}, [r4, :128]!
+vld1.s32{q2-q3}, [r4, :128]!
 
 butterfly16 q0, q5, q1, q6, q2, q7, q3, q8
+.if \shift > 0
 scale   d26, d27, d28, d29, d30, d31, d16, d17, q4, q0, q5, 
q1, q6, q2, q7, q3, \shift
 transpose8_4x4  d26, d28, d30, d16
 transpose8_4x4  d17, d31, d29, d27
@@ -642,12 +661,16 @@ function func_tr_16x4_\name
 add r3, r6, #(24 +3*32)
 mov r2, #32
 mov r4, #-32
-store16 d26, d27, d28, d29, d30, d31, d16, d17
-
-add r4, sp, #576
-vld1.s16{q0-q1}, [r4, :128]!
-vld1.s16{q2-q3}, [r4, :128]
+store16 d26, d27, d28, d29, d30, d31, d16, d17, r4

[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 10

2017-05-01 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 90 +++
 libavcodec/arm/hevcdsp_init_arm.c | 13 ++
 2 files changed, 103 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index b3ce00b..5d400c2 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,6 +30,13 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+.macro clip10 in1, in2, c1, c2
+vmax.s16\in1, \in1, \c1
+vmax.s16\in2, \in2, \c1
+vmin.s16\in1, \in1, \c2
+vmin.s16\in2, \in2, \c2
+.endm
+
 function ff_hevc_add_residual_4x4_8_neon, export=1
 vld1.16 {q0-q1}, [r1, :128]
 vld1.32 d4[0], [r0, :32], r2
@@ -50,6 +57,25 @@ function ff_hevc_add_residual_4x4_8_neon, export=1
 bx  lr
 endfunc
 
+function ff_hevc_add_residual_4x4_10_neon, export=1
+mov r12, r0
+vld1.16 {q0-q1}, [r1, :128]
+vld1.16 d4, [r12, :64], r2
+vld1.16 d5, [r12, :64], r2
+vld1.16 d6, [r12, :64], r2
+vqadd.s16   q0, q2
+vld1.16 d7, [r12, :64], r2
+vmov.s16q12, #0
+vqadd.s16   q1, q3
+vmov.s16q13, #0x3FF
+clip10  q0, q1, q12, q13
+vst1.16 d0, [r0, :64], r2
+vst1.16 d1, [r0, :64], r2
+vst1.16 d2, [r0, :64], r2
+vst1.16 d3, [r0, :64], r2
+bx  lr
+endfunc
+
 function ff_hevc_add_residual_8x8_8_neon, export=1
 add r12, r0, r2
 add r2,  r2, r2
@@ -70,6 +96,25 @@ function ff_hevc_add_residual_8x8_8_neon, export=1
 bx  lr
 endfunc
 
+function ff_hevc_add_residual_8x8_10_neon, export=1
+add r12, r0, r2
+add r2,  r2, r2
+mov r3,  #8
+vmov.s16q12, #0
+vmov.s16q13, #0x3FF
+1:  subsr3,  #2
+vld1.16 {q0-q1}, [r1, :128]!
+vld1.16 {q8},[r0, :128]
+vqadd.s16   q0, q8
+vld1.16 {q9},[r12, :128]
+vqadd.s16   q1, q9
+clip10  q0, q1, q12, q13
+vst1.16 {q0}, [r0, :128], r2
+vst1.16 {q1}, [r12, :128], r2
+bne 1b
+bx  lr
+endfunc
+
 function ff_hevc_add_residual_16x16_8_neon, export=1
 mov r3,  #16
 add r12, r0, r2
@@ -97,6 +142,29 @@ function ff_hevc_add_residual_16x16_8_neon, export=1
 bx  lr
 endfunc
 
+function ff_hevc_add_residual_16x16_10_neon, export=1
+mov r3,  #16
+vmov.s16q12, #0
+vmov.s16q13, #0x3FF
+add r12, r0, r2
+add r2,  r2, r2
+1:  subsr3,  #2
+vld1.16 {q8-q9},   [r0, :128]
+vld1.16 {q0, q1},  [r1, :128]!
+vqadd.s16   q0, q8
+vld1.16 {q10-q11}, [r12, :128]
+vqadd.s16   q1, q9
+vld1.16 {q2, q3},  [r1, :128]!
+vqadd.s16   q2, q10
+vqadd.s16   q3, q11
+clip10  q0, q1, q12, q13
+clip10  q2, q3, q12, q13
+vst1.16 {q0-q1},   [r0, :128], r2
+vst1.16 {q2-q3},   [r12, :128], r2
+bne 1b
+bx  lr
+endfunc
+
 function ff_hevc_add_residual_32x32_8_neon, export=1
 vpush   {q4-q7}
 add r12, r0, r2
@@ -137,6 +205,28 @@ function ff_hevc_add_residual_32x32_8_neon, export=1
 bx  lr
 endfunc
 
+function ff_hevc_add_residual_32x32_10_neon, export=1
+mov r3,  #32
+vmov.s16q12, #0
+vmov.s16q13, #0x3FF
+1:  subsr3,  #1
+vldmr1!, {q0-q3}
+vld1.16 {q8, q9},   [r0, :128]
+add r12, r0, #32
+vld1.16 {q10, q11}, [r12, :128]
+vqadd.s16   q0, q8
+vqadd.s16   q1, q9
+vqadd.s16   q2, q10
+vqadd.s16   q3, q11
+clip10  q0, q1, q12, q13
+clip10  q2, q3, q12, q13
+vst1.16 {q0-q1},   [r0, :128]
+vst1.16 {q2-q3},   [r12, :128]
+add r0, r2
+bne 1b
+bx  lr
+endfunc
+
 .macro idct_4x4_dc bitdepth
 function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index 817c157..e3d4e4e 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -28,12 +28,20 @@
 
 void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
 

Re: [libav-devel] [PATCH] arm: hevc_idct: Tune the add_res_8x8 and add_res_32x32 functions

2017-04-28 Thread Alexandra Hájková
On Thu, Apr 27, 2017 at 11:38 PM, Martin Storsjö  wrote:
> Before:  Cortex A7  A8  A9 A53
> hevc_add_res_8x8_8_neon: 116.058.780.290.7
> hevc_add_res_32x32_8_neon:  1230.0   737.5  1187.5   974.4
> After:
> hevc_add_res_8x8_8_neon:  97.757.073.780.0
> hevc_add_res_32x32_8_neon:  1216.0   698.7  1127.5   827.1

Looks great.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 10

2017-04-27 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 89 +++
 libavcodec/arm/hevcdsp_init_arm.c | 13 ++
 2 files changed, 102 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 3966e93..14af40f 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -50,6 +50,32 @@ function ff_hevc_add_residual_4x4_8_neon, export=1
 bx  lr
 endfunc
 
+.macro clip10 in1, in2, c1, c2
+vmax.s16\in1, \in1, q12
+vmax.s16\in2, \in2, q12
+vmin.s16\in1, \in1, q13
+vmin.s16\in2, \in2, q13
+.endm
+
+function ff_hevc_add_residual_4x4_10_neon, export=1
+vld1.16 {q0-q1}, [r1, :128]
+mov r12, r0
+vld1.16 d4, [r12, :64], r2
+vld1.16 d5, [r12, :64], r2
+vld1.16 d6, [r12, :64], r2
+vld1.16 d7, [r12, :64], r2
+vqadd.s16   q0, q2
+vqadd.s16   q1, q3
+vmov.s16q12, #0
+vmov.s16q13, #0x3FF
+clip10  q0, q1
+vst1.16 d0, [r0, :64], r2
+vst1.16 d1, [r0, :64], r2
+vst1.16 d2, [r0, :64], r2
+vst1.16 d3, [r0, :64], r2
+bx  lr
+endfunc
+
 function ff_hevc_add_residual_8x8_8_neon, export=1
 mov r3,   #8
 1:  subsr3,   #2
@@ -69,6 +95,24 @@ function ff_hevc_add_residual_8x8_8_neon, export=1
 bx  lr
 endfunc
 
+function ff_hevc_add_residual_8x8_10_neon, export=1
+mov r3,  #8
+vmov.s16q12, #0
+vmov.s16q13, #0x3FF
+1:  subsr3,  #2
+vld1.16 {q0-q1}, [r1, :128]!
+vld1.16 {q8},[r0, :128]
+add r12, r0, r2
+vld1.16 {q9},[r12, :128]
+vqadd.s16   q0, q8
+vqadd.s16   q1, q9
+clip10  q0, q1
+vst1.16 {q0}, [r0, :128], r2
+vst1.16 {q1}, [r0, :128], r2
+bne 1b
+bx  lr
+endfunc
+
 function ff_hevc_add_residual_16x16_8_neon, export=1
 mov r3,  #16
 add r12, r0, r2
@@ -96,6 +140,29 @@ function ff_hevc_add_residual_16x16_8_neon, export=1
 bx  lr
 endfunc
 
+function ff_hevc_add_residual_16x16_10_neon, export=1
+mov r3,  #16
+vmov.s16q12, #0
+vmov.s16q13, #0x3FF
+add r12, r0, r2
+add r2,  r2, r2
+1:  subsr3,  #2
+vld1.16 {q8-q9},   [r0, :128]
+vld1.16 {q0, q1},  [r1, :128]!
+vld1.16 {q10-q11}, [r12, :128]
+vld1.16 {q2, q3},  [r1, :128]!
+vqadd.s16   q0, q8
+vqadd.s16   q1, q9
+vqadd.s16   q2, q10
+vqadd.s16   q3, q11
+clip10  q0, q1
+clip10  q2, q3
+vst1.16 {q0-q1},   [r0, :128], r2
+vst1.16 {q2-q3},   [r12, :128], r2
+bne 1b
+bx  lr
+endfunc
+
 function ff_hevc_add_residual_32x32_8_neon, export=1
 mov r3,  #32
 1:  subsr3,  #1
@@ -118,6 +185,28 @@ function ff_hevc_add_residual_32x32_8_neon, export=1
 bx  lr
 endfunc
 
+function ff_hevc_add_residual_32x32_10_neon, export=1
+mov r3,  #32
+vmov.s16q12, #0
+vmov.s16q13, #0x3FF
+1:  subsr3,  #1
+vldmr1!, {q0-q3}
+vld1.16 {q8, q9},   [r0, :128]
+add r12, r0, #32
+vld1.16 {q10, q11}, [r12, :128]
+vqadd.s16   q0, q8
+vqadd.s16   q1, q9
+vqadd.s16   q2, q10
+vqadd.s16   q3, q11
+clip10  q0, q1
+clip10  q2, q3
+vst1.16 {q0-q1},   [r0, :128]
+vst1.16 {q2-q3},   [r12, :128]
+add r0, r2
+bne 1b
+bx  lr
+endfunc
+
 .macro idct_4x4_dc bitdepth
 function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index 817c157..e3d4e4e 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -28,12 +28,20 @@
 
 void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
  ptrdiff_t stride);
+void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, int16_t *coeffs,
+  ptrdiff_t stride);
 void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs,
  ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, 

[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 8

2017-04-27 Thread Alexandra Hájková
From: Seppo Tomperi <seppo.tomp...@vtt.fi>

Optimized by Alexandra Hájková.
---
 libavcodec/arm/hevc_idct.S| 88 +++
 libavcodec/arm/hevcdsp_init_arm.c | 15 +++
 2 files changed, 103 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 41b1b29..3966e93 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,6 +30,94 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+function ff_hevc_add_residual_4x4_8_neon, export=1
+vld1.16 {q0-q1}, [r1, :128]
+vld1.32 d4[0], [r0, :32], r2
+vld1.32 d4[1], [r0, :32], r2
+vld1.32 d5[0], [r0, :32], r2
+vld1.32 d5[1], [r0, :32], r2
+sub r0, r0, r2, lsl #2
+vmovl.u8q8, d4
+vmovl.u8q9, d5
+vqadd.s16   q0, q0, q8
+vqadd.s16   q1, q1, q9
+vqmovun.s16 d0, q0
+vqmovun.s16 d1, q1
+vst1.32 d0[0], [r0, :32], r2
+vst1.32 d0[1], [r0, :32], r2
+vst1.32 d1[0], [r0, :32], r2
+vst1.32 d1[1], [r0, :32], r2
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_8x8_8_neon, export=1
+mov r3,   #8
+1:  subsr3,   #2
+vld1.16 {q0-q1}, [r1, :128]!
+vld1.8  {d16},   [r0, :64]
+add r12, r0, r2
+vld1.8  {d17},   [r12, :64]
+vmovl.u8q9,   d16
+vmovl.u8q8,   d17
+vqadd.s16   q0,   q9
+vqadd.s16   q1,   q8
+vqmovun.s16 d0,   q0
+vqmovun.s16 d1,   q1
+vst1.8  d0,   [r0, :64], r2
+vst1.8  d1,   [r0, :64], r2
+bne 1b
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_16x16_8_neon, export=1
+mov r3,  #16
+add r12, r0, r2
+add r2,  r2, r2
+1:  subsr3,  #2
+vld1.8  {q8}, [r0, :128]
+vld1.16 {q0, q1}, [r1, :128]!
+vld1.8  {q11},[r12, :128]
+vld1.16 {q2, q3}, [r1, :128]!
+vmovl.u8q9,  d16
+vmovl.u8q10, d17
+vmovl.u8q12, d22
+vmovl.u8q13, d23
+vqadd.s16   q0,  q9
+vqadd.s16   q1,  q10
+vqadd.s16   q2,  q12
+vqadd.s16   q3,  q13
+vqmovun.s16 d0,  q0
+vqmovun.s16 d1,  q1
+vqmovun.s16 d2,  q2
+vqmovun.s16 d3,  q3
+vst1.8  {q0}, [r0, :128], r2
+vst1.8  {q1}, [r12, :128], r2
+bne 1b
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_32x32_8_neon, export=1
+mov r3,  #32
+1:  subsr3,  #1
+vldmr1!, {q0-q3}
+vld1.8  {q8, q9}, [r0, :128]
+vmovl.u8q10, d16
+vmovl.u8q11, d17
+vmovl.u8q12, d18
+vmovl.u8q13, d19
+vqadd.s16   q0,  q10
+vqadd.s16   q1,  q11
+vqadd.s16   q2,  q12
+vqadd.s16   q3,  q13
+vqmovun.s16 d0,  q0
+vqmovun.s16 d1,  q1
+vqmovun.s16 d2,  q2
+vqmovun.s16 d3,  q3
+vst1.8  {q0, q1}, [r0, :128], r2
+bne 1b
+bx  lr
+endfunc
+
 .macro idct_4x4_dc bitdepth
 function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index 3d8d06b..817c157 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,6 +25,16 @@
 
 #include "libavcodec/hevcdsp.h"
 
+
+void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
+ ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs,
+ ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs,
+   ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs,
+   ptrdiff_t stride);
+
 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
@@ -47,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 
 if (have_neon(cpu_flags)) {
 if (bit_depth == 8) {
+c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
+c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
+c->add_residual[2] = ff_hevc_add_residual_16x

[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 8

2017-04-26 Thread Alexandra Hájková
From: Seppo Tomperi <seppo.tomp...@vtt.fi>

Optimized by Alexandra Hájková.
---
 libavcodec/arm/hevc_idct.S| 86 +++
 libavcodec/arm/hevcdsp_init_arm.c | 15 +++
 2 files changed, 101 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 41b1b29..833c3fe 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,6 +30,92 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+function ff_hevc_add_residual_4x4_8_neon, export=1
+vld1.16 {q0-q1}, [r1, :128]
+vld1.32 d4[0], [r0, :32], r2
+vld1.32 d4[1], [r0, :32], r2
+vld1.32 d5[0], [r0, :32], r2
+vld1.32 d5[1], [r0, :32], r2
+sub r0, r0, r2, lsl #2
+vmovl.u8q8, d4
+vmovl.u8q9, d5
+vqadd.s16   q0, q0, q8
+vqadd.s16   q1, q1, q9
+vqmovun.s16 d0, q0
+vqmovun.s16 d1, q1
+vst1.32 d0[0], [r0], r2
+vst1.32 d0[1], [r0], r2
+vst1.32 d1[0], [r0], r2
+vst1.32 d1[1], [r0], r2
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_8x8_8_neon, export=1
+mov r3,   #8
+1:  subsr3,   #2
+vld1.16 {q0-q1}, [r1, :128]!
+vld1.8  {q8},[r0, :128]
+vmovl.u8q9,   d16
+vmovl.u8q8,   d17
+vqadd.s16   q0,   q9
+vqadd.s16   q1,   q8
+vqmovun.s16 d0,   q0
+vqmovun.s16 d1,   q1
+vst1.8  d0,   [r0, :64], r2
+vst1.8  d1,   [r0, :64], r2
+bne 1b
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_16x16_8_neon, export=1
+push{lr}
+mov r3,   #16
+1:  subsr3,   #2
+vld1.16 {q0, q1}, [r1, :128]!
+vld1.16 {q2, q3}, [r1, :128]!
+vld1.8  {q8}, [r0, :128]
+add lr, r0, r2
+vld1.8  {q11},[lr, :128]
+vmovl.u8q9,  d16
+vmovl.u8q10, d17
+vmovl.u8q12, d22
+vmovl.u8q13, d23
+vqadd.s16   q0,  q9
+vqadd.s16   q1,  q10
+vqadd.s16   q2,  q12
+vqadd.s16   q3,  q13
+vqmovun.s16 d0,  q0
+vqmovun.s16 d1,  q1
+vqmovun.s16 d2,  q2
+vqmovun.s16 d3,  q3
+vst1.8  {q0},   [r0, :128], r2
+vst1.8  {q1},   [r0, :128], r2
+bne 1b
+pop {pc}
+endfunc
+
+function ff_hevc_add_residual_32x32_8_neon, export=1
+mov r3,   #32
+1:  subsr3,   #1
+vldmr1!, {q0-q3}
+vld1.8  {q8, q9},  [r0, :128]
+vmovl.u8q10, d16
+vmovl.u8q11, d17
+vmovl.u8q12, d18
+vmovl.u8q13, d19
+vqadd.s16   q0,  q10
+vqadd.s16   q1,  q11
+vqadd.s16   q2,  q12
+vqadd.s16   q3,  q13
+vqmovun.s16 d0,  q0
+vqmovun.s16 d1,  q1
+vqmovun.s16 d2,  q2
+vqmovun.s16 d3,  q3
+vst1.8  {q0, q1},   [r0, :128], r2
+bne 1b
+bx  lr
+endfunc
+
 .macro idct_4x4_dc bitdepth
 function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index 3d8d06b..817c157 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,6 +25,16 @@
 
 #include "libavcodec/hevcdsp.h"
 
+
+void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
+ ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs,
+ ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs,
+   ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs,
+   ptrdiff_t stride);
+
 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
@@ -47,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 
 if (have_neon(cpu_flags)) {
 if (bit_depth == 8) {
+c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
+c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
+c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;
+c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon;
+
 c->idct_dc[

[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC

2017-04-25 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 40 +++
 libavcodec/arm/hevcdsp_init_arm.c |  9 +
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index ceded7a..41b1b29 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,26 +30,29 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
-function ff_hevc_idct_4x4_dc_8_neon, export=1
+.macro idct_4x4_dc bitdepth
+function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q0, r1
 vdup.16 q1, r1
 vst1.16 {q0, q1}, [r0, :128]
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_8x8_dc_8_neon, export=1
+.macro idct_8x8_dc bitdepth
+function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -61,14 +64,16 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_16x16_dc_8_neon, export=1
+.macro idct_16x16_dc bitdepth
+function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -83,14 +88,16 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_32x32_dc_8_neon, export=1
+.macro idct_32x32_dc bitdepth
+function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 mov r3, #16
 vdup.16 q8, r1
 vdup.16 q9, r1
@@ -105,6 +112,7 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
 bne 1b
 bx  lr
 endfunc
+.endm
 
 .macro sum_sub out, in, c, op
   .ifc \op, +
@@ -496,8 +504,16 @@ tr_16x4 secondpass_10, 20 - 10
 .ltorg
 
 idct_4x4 8
+idct_4x4_dc 8
 idct_4x4 10
+idct_4x4_dc 10
 idct_8x8 8
+idct_8x8_dc 8
 idct_8x8 10
+idct_8x8_dc 10
 idct_16x16 8
+idct_16x16_dc 8
 idct_16x16 10
+idct_16x16_dc 10
+idct_32x32_dc 8
+idct_32x32_dc 10
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index fa2e7ad..3d8d06b 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -29,6 +29,10 @@ void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
 
 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
@@ -53,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 c->idct[2] = ff_hevc_idct_16x16_8_neon;
 }
 if (bit_depth == 10) {
+c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
+c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
+c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
+c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
+
 c->idct[0] = ff_hevc_idct_4x4_10_neon;
 c->idct[1] = ff_hevc_idct_8x8_10_neon;
 c->idct[2] = ff_hevc_idct_16x16_10_neon;
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8

2017-04-25 Thread Alexandra Hájková
From: Seppo Tomperi <seppo.tomp...@vtt.fi>

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
---
 libavcodec/arm/hevc_idct.S| 78 +++
 libavcodec/arm/hevcdsp_init_arm.c | 10 +
 2 files changed, 88 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 156d476..ceded7a 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -1,5 +1,7 @@
 /*
  * ARM NEON optimised IDCT functions for HEVC decoding
+ *
+ * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi>
  * Copyright (c) 2017 Alexandra Hájková
  *
  * This file is part of Libav.
@@ -28,6 +30,82 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+function ff_hevc_idct_4x4_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q0, r1
+vdup.16 q1, r1
+vst1.16 {q0, q1}, [r0, :128]
+bx  lr
+endfunc
+
+function ff_hevc_idct_8x8_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0, {q8-q15}
+bx  lr
+endfunc
+
+function ff_hevc_idct_16x16_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0, {q8-q15}
+bx  lr
+endfunc
+
+function ff_hevc_idct_32x32_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+mov r3, #16
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+1:  subsr3, #1
+vstmr0!, {q8-q15}
+bne 1b
+bx  lr
+endfunc
+
 .macro sum_sub out, in, c, op
   .ifc \op, +
 vmlal.s16   \out, \in, \c
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index e61587f..fa2e7ad 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,6 +25,11 @@
 
 #include "libavcodec/hevcdsp.h"
 
+void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
+
 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
@@ -38,6 +43,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 
 if (have_neon(cpu_flags)) {
 if (bit_depth == 8) {
+c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
+c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
+c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
+c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
+
 c->idct[0] = ff_hevc_idct_4x4_8_neon;
 c->idct[1] = ff_hevc_idct_8x8_8_neon;
 c->idct[2] = ff_hevc_idct_16x16_8_neon;
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC

2017-04-20 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 42 +++
 libavcodec/arm/hevcdsp_init_arm.c | 22 ++--
 2 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index f0008aa..41b1b29 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,26 +30,29 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
-function ff_hevc_idct_4x4_dc_8_neon, export=1
+.macro idct_4x4_dc bitdepth
+function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q0, r1
 vdup.16 q1, r1
 vst1.16 {q0, q1}, [r0, :128]
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_8x8_dc_8_neon, export=1
+.macro idct_8x8_dc bitdepth
+function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -61,14 +64,16 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_16x16_dc_8_neon, export=1
+.macro idct_16x16_dc bitdepth
+function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -83,14 +88,16 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_32x32_dc_8_neon, export=1
+.macro idct_32x32_dc bitdepth
+function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
-ldr r2, =0x20
+ldr r2, =(1 << (13 - \bitdepth))
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 mov r3, #16
 vdup.16 q8, r1
 vdup.16 q9, r1
@@ -103,8 +110,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
 1:  subsr3, #1
 vstmr0!, {q8-q15}
 bne 1b
-bx lr
+bx  lr
 endfunc
+.endm
 
 .macro sum_sub out, in, c, op
   .ifc \op, +
@@ -496,8 +504,16 @@ tr_16x4 secondpass_10, 20 - 10
 .ltorg
 
 idct_4x4 8
+idct_4x4_dc 8
 idct_4x4 10
+idct_4x4_dc 10
 idct_8x8 8
+idct_8x8_dc 8
 idct_8x8 10
+idct_8x8_dc 10
 idct_16x16 8
+idct_16x16_dc 8
 idct_16x16 10
+idct_16x16_dc 10
+idct_32x32_dc 8
+idct_32x32_dc 10
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index 10f60bc..99eff78 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,13 +25,18 @@
 
 #include "libavcodec/hevcdsp.h"
 
-void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
+
+void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
@@ -52,9 +57,14 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 c->idct[2]= ff_hevc_idct_16x16_8_neon;
 }
 if (bit_depth == 10) {
-c->idct[0] = ff_hevc_idct_4x4_10_neon;
-c->idct[1] = 

[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8

2017-04-20 Thread Alexandra Hájková
From: Seppo Tomperi <seppo.tomp...@vtt.fi>

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
---
 libavcodec/arm/hevc_idct.S| 78 +++
 libavcodec/arm/hevcdsp_init_arm.c | 15 ++--
 2 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 156d476..f0008aa 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -1,5 +1,7 @@
 /*
  * ARM NEON optimised IDCT functions for HEVC decoding
+ *
+ * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi>
  * Copyright (c) 2017 Alexandra Hájková
  *
  * This file is part of Libav.
@@ -28,6 +30,82 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+function ff_hevc_idct_4x4_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q0, r1
+vdup.16 q1, r1
+vst1.16 {q0, q1}, [r0, :128]
+bx  lr
+endfunc
+
+function ff_hevc_idct_8x8_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0, {q8-q15}
+bx  lr
+endfunc
+
+function ff_hevc_idct_16x16_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0, {q8-q15}
+bx  lr
+endfunc
+
+function ff_hevc_idct_32x32_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+mov r3, #16
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+1:  subsr3, #1
+vstmr0!, {q8-q15}
+bne 1b
+bx lr
+endfunc
+
 .macro sum_sub out, in, c, op
   .ifc \op, +
 vmlal.s16   \out, \in, \c
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index e61587f..10f60bc 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -26,8 +26,12 @@
 #include "libavcodec/hevcdsp.h"
 
 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
@@ -38,9 +42,14 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 
 if (have_neon(cpu_flags)) {
 if (bit_depth == 8) {
-c->idct[0] = ff_hevc_idct_4x4_8_neon;
-c->idct[1] = ff_hevc_idct_8x8_8_neon;
-c->idct[2] = ff_hevc_idct_16x16_8_neon;
+c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
+c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
+c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
+c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
+
+c->idct[0]= ff_hevc_idct_4x4_8_neon;
+c->idct[1]= ff_hevc_idct_8x8_8_neon;
+c->idct[2]= ff_hevc_idct_16x16_8_neon;
 }
 if (bit_depth == 10) {
 c->idct[0] = ff_hevc_idct_4x4_10_neon;
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 8

2017-04-18 Thread Alexandra Hájková
From: Seppo Tomperi <seppo.tomp...@vtt.fi>

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
---
 libavcodec/arm/hevc_idct.S| 71 +++
 libavcodec/arm/hevcdsp_init_arm.c | 15 +
 2 files changed, 86 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 082f832..0e84034 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,6 +30,77 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+function ff_hevc_add_residual_4x4_8_neon, export=1
+vldmr1, {q0-q1}
+vld1.32 d4[0], [r0], r2
+vld1.32 d4[1], [r0], r2
+vld1.32 d5[0], [r0], r2
+vld1.32 d5[1], [r0], r2
+sub r0, r0, r2, lsl #2
+vmovl.u8q8, d4
+vmovl.u8q9, d5
+vqadd.s16   q0, q0, q8
+vqadd.s16   q1, q1, q9
+vqmovun.s16 d0, q0
+vqmovun.s16 d1, q1
+vst1.32 d0[0], [r0], r2
+vst1.32 d0[1], [r0], r2
+vst1.32 d1[0], [r0], r2
+vst1.32 d1[1], [r0], r2
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_8x8_8_neon, export=1
+mov r3,   #8
+1:  subsr3,   #1
+vld1.16 {q0}, [r1]!
+vld1.8  d16,  [r0]
+vmovl.u8q8,   d16
+vqadd.s16   q0,   q8
+vqmovun.s16 d0,   q0
+vst1.32 d0,   [r0], r2
+bne 1b
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_16x16_8_neon, export=1
+mov r3,   #16
+1:  subsr3,   #1
+vld1.16 {q0, q1}, [r1]!
+vld1.8  {q8},  [r0]
+vmovl.u8q9,  d16
+vmovl.u8q10, d17
+vqadd.s16   q0,  q9
+vqadd.s16   q1,  q10
+vqmovun.s16 d0,  q0
+vqmovun.s16 d1,  q1
+vst1.8  {q0},   [r0], r2
+bne 1b
+bx  lr
+endfunc
+
+function ff_hevc_add_residual_32x32_8_neon, export=1
+mov r3,   #32
+1:  subsr3,   #1
+vldmr1!, {q0-q3}
+vld1.8  {q8, q9},  [r0]
+vmovl.u8q10, d16
+vmovl.u8q11, d17
+vmovl.u8q12, d18
+vmovl.u8q13, d19
+vqadd.s16   q0,  q10
+vqadd.s16   q1,  q11
+vqadd.s16   q2,  q12
+vqadd.s16   q3,  q13
+vqmovun.s16 d0,  q0
+vqmovun.s16 d1,  q1
+vqmovun.s16 d2,  q2
+vqmovun.s16 d3,  q3
+vst1.8  {q0, q1},   [r0], r2
+bne 1b
+bx  lr
+endfunc
+
 .macro idct_4x4_dc bitdepth
 function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index febbcc1..a32b7ef 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,6 +25,16 @@
 
 #include "libavcodec/hevcdsp.h"
 
+
+void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
+  ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs,
+  ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs,
+  ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs,
+  ptrdiff_t stride);
+
 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
@@ -47,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 
 if (have_neon(cpu_flags)) {
 if (bit_depth == 8) {
+c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
+c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
+c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;
+c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon;
+
 c->idct[0]= ff_hevc_idct_4x4_8_neon;
 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
 c->idct[1]= ff_hevc_idct_8x8_8_neon;
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC

2017-04-17 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 50 ---
 libavcodec/arm/hevcdsp_init_arm.c | 21 +++-
 2 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 7fdd7cc..082f832 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,26 +30,37 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
-function ff_hevc_idct_4x4_dc_8_neon, export=1
+.macro idct_4x4_dc bitdepth
+function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q0, r1
 vdup.16 q1, r1
 vst1.16 {q0, q1}, [r0]
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_8x8_dc_8_neon, export=1
+.macro idct_8x8_dc bitdepth
+function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_16x16_dc_8_neon, export=1
+.macro idct_16x16_dc bitdepth
+function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -83,14 +100,20 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_32x32_dc_8_neon, export=1
+.macro idct_32x32_dc bitdepth
+function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 mov r3, #16
 vdup.16 q8, r1
 vdup.16 q9, r1
@@ -103,8 +126,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
 1:  subsr3, #1
 vstmr0!, {q8-q15}
 bne 1b
-bx lr
+bx  lr
 endfunc
+.endm
 
 .macro sum_sub out, in, c, op
   .ifc \op, +
@@ -496,8 +520,16 @@ tr_16x4 secondpass_10, 20 - 10
 .ltorg
 
 idct_4x4 8
+idct_4x4_dc 8
 idct_4x4 10
+idct_4x4_dc 10
 idct_8x8 8
+idct_8x8_dc 8
 idct_8x8 10
+idct_8x8_dc 10
 idct_16x16 8
+idct_16x16_dc 8
 idct_16x16 10
+idct_16x16_dc 10
+idct_32x32_dc 8
+idct_32x32_dc 10
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index b65e2e9..febbcc1 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,13 +25,18 @@
 
 #include "libavcodec/hevcdsp.h"
 
-void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
+
+void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
@@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
 }
 

Re: [libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC

2017-04-17 Thread Alexandra Hájková
>> -function ff_hevc_idct_32x32_dc_8_neon, export=1
>> +.macro idct_32x32_dc bitdepth
>> +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
>>  ldrsh   r1, [r0]
>>  ldr r2, =0x20
>> +.if \bitdepth == 8
>> +ldr r2, =0x20
>> +.else
>> +ldr r2, =0x8
>> +.endif
>
> This doesn't look quite right, shouldn't the new block replace/wrap
> the old ldr instruction, like it does in the 16x16 version (and all
> other sizes)?
>

It's wrong, yes. I'll resend it.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC

2017-04-17 Thread Alexandra Hájková
---

Indent operands.

 libavcodec/arm/hevc_idct.S| 51 ---
 libavcodec/arm/hevcdsp_init_arm.c | 21 +++-
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 7fdd7cc..f949d80 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,26 +30,37 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
-function ff_hevc_idct_4x4_dc_8_neon, export=1
+.macro idct_4x4_dc bitdepth
+function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q0, r1
 vdup.16 q1, r1
 vst1.16 {q0, q1}, [r0]
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_8x8_dc_8_neon, export=1
+.macro idct_8x8_dc bitdepth
+function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_16x16_dc_8_neon, export=1
+.macro idct_16x16_dc bitdepth
+function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -83,14 +100,21 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx  lr
 endfunc
+.endm
 
-function ff_hevc_idct_32x32_dc_8_neon, export=1
+.macro idct_32x32_dc bitdepth
+function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
 ldr r2, =0x20
+.if \bitdepth == 8
+ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 mov r3, #16
 vdup.16 q8, r1
 vdup.16 q9, r1
@@ -103,8 +127,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
 1:  subsr3, #1
 vstmr0!, {q8-q15}
 bne 1b
-bx lr
+bx  lr
 endfunc
+.endm
 
 .macro sum_sub out, in, c, op
   .ifc \op, +
@@ -496,8 +521,16 @@ tr_16x4 secondpass_10, 20 - 10
 .ltorg
 
 idct_4x4 8
+idct_4x4_dc 8
 idct_4x4 10
+idct_4x4_dc 10
 idct_8x8 8
+idct_8x8_dc 8
 idct_8x8 10
+idct_8x8_dc 10
 idct_16x16 8
+idct_16x16_dc 8
 idct_16x16 10
+idct_16x16_dc 10
+idct_32x32_dc 8
+idct_32x32_dc 10
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index b65e2e9..febbcc1 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,13 +25,18 @@
 
 #include "libavcodec/hevcdsp.h"
 
-void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
+
+void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
@@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 c->idct_dc[3] 

[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8

2017-04-17 Thread Alexandra Hájková
From: Seppo Tomperi <seppo.tomp...@vtt.fi>

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
---

Indent operands.

 libavcodec/arm/hevc_idct.S| 78 +++
 libavcodec/arm/hevcdsp_init_arm.c | 14 +--
 2 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 156d476..7fdd7cc 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -1,5 +1,7 @@
 /*
  * ARM NEON optimised IDCT functions for HEVC decoding
+ *
+ * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi>
  * Copyright (c) 2017 Alexandra Hájková
  *
  * This file is part of Libav.
@@ -28,6 +30,82 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+function ff_hevc_idct_4x4_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q0, r1
+vdup.16 q1, r1
+vst1.16 {q0, q1}, [r0]
+bx  lr
+endfunc
+
+function ff_hevc_idct_8x8_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0, {q8-q15}
+bx  lr
+endfunc
+
+function ff_hevc_idct_16x16_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0, {q8-q15}
+bx  lr
+endfunc
+
+function ff_hevc_idct_32x32_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+mov r3, #16
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+1:  subsr3, #1
+vstmr0!, {q8-q15}
+bne 1b
+bx lr
+endfunc
+
 .macro sum_sub out, in, c, op
   .ifc \op, +
 vmlal.s16   \out, \in, \c
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index e61587f..b65e2e9 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -26,8 +26,12 @@
 #include "libavcodec/hevcdsp.h"
 
 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
@@ -38,9 +42,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 
 if (have_neon(cpu_flags)) {
 if (bit_depth == 8) {
-c->idct[0] = ff_hevc_idct_4x4_8_neon;
-c->idct[1] = ff_hevc_idct_8x8_8_neon;
-c->idct[2] = ff_hevc_idct_16x16_8_neon;
+c->idct[0]= ff_hevc_idct_4x4_8_neon;
+c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
+c->idct[1]= ff_hevc_idct_8x8_8_neon;
+c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
+c->idct[2]= ff_hevc_idct_16x16_8_neon;
+c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
+c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
 }
 if (bit_depth == 10) {
 c->idct[0] = ff_hevc_idct_4x4_10_neon;
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8

2017-04-16 Thread Alexandra Hájková
From: Seppo Tomperi <seppo.tomp...@vtt.fi>

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
---
 libavcodec/arm/hevc_idct.S| 78 +++
 libavcodec/arm/hevcdsp_init_arm.c | 14 +--
 2 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 156d476..f74847b 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -1,5 +1,7 @@
 /*
  * ARM NEON optimised IDCT functions for HEVC decoding
+ *
+ * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi>
  * Copyright (c) 2017 Alexandra Hájková
  *
  * This file is part of Libav.
@@ -28,6 +30,82 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
+function ff_hevc_idct_4x4_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q0, r1
+vdup.16 q1, r1
+vst1.16 {q0, q1}, [r0]
+bx lr
+endfunc
+
+function ff_hevc_idct_8x8_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0, {q8-q15}
+bx lr
+endfunc
+
+function ff_hevc_idct_16x16_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0!, {q8-q15}
+vstmr0, {q8-q15}
+bx lr
+endfunc
+
+function ff_hevc_idct_32x32_dc_8_neon, export=1
+ldrsh   r1, [r0]
+ldr r2, =0x20
+add r1, #1
+asr r1, #1
+add r1, r2
+asr r1, #6
+mov r3, #16
+vdup.16 q8, r1
+vdup.16 q9, r1
+vmov.16 q10, q8
+vmov.16 q11, q8
+vmov.16 q12, q8
+vmov.16 q13, q8
+vmov.16 q14, q8
+vmov.16 q15, q8
+1:  subsr3, #1
+vstmr0!, {q8-q15}
+bne 1b
+bx lr
+endfunc
+
 .macro sum_sub out, in, c, op
   .ifc \op, +
 vmlal.s16   \out, \in, \c
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index e61587f..b65e2e9 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -26,8 +26,12 @@
 #include "libavcodec/hevcdsp.h"
 
 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
@@ -38,9 +42,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 
 if (have_neon(cpu_flags)) {
 if (bit_depth == 8) {
-c->idct[0] = ff_hevc_idct_4x4_8_neon;
-c->idct[1] = ff_hevc_idct_8x8_8_neon;
-c->idct[2] = ff_hevc_idct_16x16_8_neon;
+c->idct[0]= ff_hevc_idct_4x4_8_neon;
+c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
+c->idct[1]= ff_hevc_idct_8x8_8_neon;
+c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
+c->idct[2]= ff_hevc_idct_16x16_8_neon;
+c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
+c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
 }
 if (bit_depth == 10) {
 c->idct[0] = ff_hevc_idct_4x4_10_neon;
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC

2017-04-16 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S| 49 ---
 libavcodec/arm/hevcdsp_init_arm.c | 21 -
 2 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index f74847b..b80d5ff 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,26 +30,37 @@ const trans, align=4
 .short 57, 43, 25, 9
 endconst
 
-function ff_hevc_idct_4x4_dc_8_neon, export=1
+.macro idct_4x4_dc bitdepth
+function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q0, r1
 vdup.16 q1, r1
 vst1.16 {q0, q1}, [r0]
 bx lr
 endfunc
+.endm
 
-function ff_hevc_idct_8x8_dc_8_neon, export=1
+.macro idct_8x8_dc bitdepth
+function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx lr
 endfunc
+.endm
 
-function ff_hevc_idct_16x16_dc_8_neon, export=1
+.macro idct_16x16_dc bitdepth
+function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
+.if \bitdepth == 8
 ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 vdup.16 q8, r1
 vdup.16 q9, r1
 vmov.16 q10, q8
@@ -83,14 +100,21 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
 vstmr0, {q8-q15}
 bx lr
 endfunc
+.endm
 
-function ff_hevc_idct_32x32_dc_8_neon, export=1
+.macro idct_32x32_dc bitdepth
+function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
 ldrsh   r1, [r0]
 ldr r2, =0x20
+.if \bitdepth == 8
+ldr r2, =0x20
+.else
+ldr r2, =0x8
+.endif
 add r1, #1
 asr r1, #1
 add r1, r2
-asr r1, #6
+asr r1, #(14 - \bitdepth)
 mov r3, #16
 vdup.16 q8, r1
 vdup.16 q9, r1
@@ -105,6 +129,7 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
 bne 1b
 bx lr
 endfunc
+.endm
 
 .macro sum_sub out, in, c, op
   .ifc \op, +
@@ -496,8 +521,16 @@ tr_16x4 secondpass_10, 20 - 10
 .ltorg
 
 idct_4x4 8
+idct_4x4_dc 8
 idct_4x4 10
+idct_4x4_dc 10
 idct_8x8 8
+idct_8x8_dc 8
 idct_8x8 10
+idct_8x8_dc 10
 idct_16x16 8
+idct_16x16_dc 8
 idct_16x16 10
+idct_16x16_dc 10
+idct_32x32_dc 8
+idct_32x32_dc 10
diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
b/libavcodec/arm/hevcdsp_init_arm.c
index b65e2e9..febbcc1 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c
@@ -25,13 +25,18 @@
 
 #include "libavcodec/hevcdsp.h"
 
-void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
-void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
+
+void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
@@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
bit_depth)
 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
 }
 if (bit_depth == 10) {
-c->idct[0] = ff_hevc_idct_4x4_10_neon;
-c->idct[1] = ff_hevc_idct_8x8_10_neon;
-c->idct[2] = ff_hevc_idct_16x16_10_neon;
+c->idct[0]= ff_hevc_idct_4x4_10_neon;
+c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
+ 

[libav-devel] [PATCH] hevc: Optimize NEON 8x8 IDCT using col_limit

2017-04-12 Thread Alexandra Hájková
---
 libavcodec/arm/hevc_idct.S | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 4124fc8..29135ad 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -58,7 +58,7 @@ endconst
 
 .macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, 
tmp3
  vshll.s16  \tmp0, \in0, #6
- vld1.s16   {\in0}, [r1, :64]!
+ vld1.s16   {\in0}, [r4, :64]!
  vmov   \tmp1, \tmp0
  vmull.s16  \tmp2, \in1, \in0[1]
  vmull.s16  \tmp3, \in1, \in0[3]
@@ -67,14 +67,14 @@ endconst
  vmlal.s16  \tmp2, \in3, \in0[3] @o0
  vmlsl.s16  \tmp3, \in3, \in0[1] @o1
 
- vld1.s16   {\in0}, [r1, :64]
+ vld1.s16   {\in0}, [r4, :64]
 
  vadd.s32   \out0, \tmp0, \tmp2
  vadd.s32   \out1, \tmp1, \tmp3
  vsub.s32   \out2, \tmp1, \tmp3
  vsub.s32   \out3, \tmp0, \tmp2
 
- subr1,  r1,  #8
+ subr4,  r4,  #8
 .endm
 
 @ Do a 4x4 transpose, using q registers for the subtransposes that don't
@@ -166,21 +166,25 @@ endfunc
 .macro idct_8x8 bitdepth
 function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
 @r0 - coeffs
+push{r4, lr}
 vpush   {q4-q7}
 
-mov r1,  r0
+mov r4,  r0
 mov r2,  #64
 add r3,  r0,  #32
-vld1.s16{q0-q1}, [r1,:128], r2
+vld1.s16{q0-q1}, [r4,:128], r2
 vld1.s16{q2-q3}, [r3,:128], r2
-vld1.s16{q4-q5}, [r1,:128], r2
+vld1.s16{q4-q5}, [r4,:128], r2
 vld1.s16{q6-q7}, [r3,:128], r2
 
-movrel  r1, trans
+movrel  r4, trans
 
 tr_8x4  7, d0, d2, d4, d6, d8, d10, d12, d14
+cmp r1, #4
+blt 1f
 tr_8x4  7, d1, d3, d5, d7, d9, d11, d13, d15
 
+1:
 @ Transpose each 4x4 block, and swap how d4-d7 and d8-d11 are used.
 @ Layout before:
 @ d0  d1
@@ -209,16 +213,16 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
 
 transpose_8x8   d0, d2, d4, d6, d8, d10, d12, d14, d1, d3, d5, d7, d9, 
d11, d13, d15
 
-mov r1,  r0
+mov r4,  r0
 mov r2,  #64
 add r3,  r0,  #32
-vst1.s16{q0-q1}, [r1,:128], r2
+vst1.s16{q0-q1}, [r4,:128], r2
 vst1.s16{q2-q3}, [r3,:128], r2
-vst1.s16{q4-q5}, [r1,:128], r2
+vst1.s16{q4-q5}, [r4,:128], r2
 vst1.s16{q6-q7}, [r3,:128], r2
 
 vpop{q4-q7}
-bx  lr
+pop {r4, pc}
 endfunc
 .endm
 
-- 
2.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] hevc: Add NEON 16x16 IDCT

2017-04-12 Thread Alexandra Hájková
The speedup vs C code is around 6-13x.
---

Use irp to avoid the repetition.

 libavcodec/arm/hevc_idct.S| 196 ++
 libavcodec/arm/hevcdsp_init_arm.c |   4 +
 2 files changed, 200 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 4124fc8..3608f3a 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -222,7 +222,203 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
 endfunc
 .endm
 
+.macro butterfly e, o, tmp_p, tmp_m
+vadd.s32\tmp_p, \e, \o
+vsub.s32\tmp_m, \e, \o
+.endm
+
+.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7
+tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, 
q14, q15
+
+vmull.s16   q12, \in1, \in0[0]
+vmull.s16   q13, \in1, \in0[1]
+vmull.s16   q14, \in1, \in0[2]
+vmull.s16   q15, \in1, \in0[3]
+sum_sub q12, \in3, \in0[1], +
+sum_sub q13, \in3, \in0[3], -
+sum_sub q14, \in3, \in0[0], -
+sum_sub q15, \in3, \in0[2], -
+
+sum_sub q12, \in5, \in0[2], +
+sum_sub q13, \in5, \in0[0], -
+sum_sub q14, \in5, \in0[3], +
+sum_sub q15, \in5, \in0[1], +
+
+sum_sub q12, \in7, \in0[3], +
+sum_sub q13, \in7, \in0[2], -
+sum_sub q14, \in7, \in0[1], +
+sum_sub q15, \in7, \in0[0], -
+
+butterfly   q8,  q12, q0, q7
+butterfly   q9,  q13, q1, q6
+butterfly   q10, q14, q2, q5
+butterfly   q11, q15, q3, q4
+add r4,  sp,  #512
+vst1.s16{q0-q1}, [r4, :128]!
+vst1.s16{q2-q3}, [r4, :128]!
+vst1.s16{q4-q5}, [r4, :128]!
+vst1.s16{q6-q7}, [r4, :128]
+.endm
+
+.macro load16 in0, in1, in2, in3, in4, in5, in6, in7
+vld1.s16{\in0}, [r1, :64], r2
+vld1.s16{\in1}, [r3, :64], r2
+vld1.s16{\in2}, [r1, :64], r2
+vld1.s16{\in3}, [r3, :64], r2
+vld1.s16{\in4}, [r1, :64], r2
+vld1.s16{\in5}, [r3, :64], r2
+vld1.s16{\in6}, [r1, :64], r2
+vld1.s16{\in7}, [r3, :64], r2
+.endm
+
+.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, 
op5, op6, op7
+sum_sub q5, \in, \t0, \op0
+sum_sub q6, \in, \t1, \op1
+sum_sub q7, \in, \t2, \op2
+sum_sub q8, \in, \t3, \op3
+sum_sub q9, \in, \t4, \op4
+sum_sub q10,\in, \t5, \op5
+sum_sub q11,\in, \t6, \op6
+sum_sub q12,\in, \t7, \op7
+.endm
+
+.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7
+vadd.s32q4, \in0, \in1
+vsub.s32\in0, \in0, \in1
+vadd.s32\in1, \in2, \in3
+vsub.s32\in2, \in2, \in3
+vadd.s32\in3, \in4, \in5
+vsub.s32\in4, \in4, \in5
+vadd.s32\in5, \in6, \in7
+vsub.s32\in6, \in6, \in7
+.endm
+
+.macro store16 in0, in1, in2, in3, in4, in5, in6, in7
+vst1.s16\in0, [r1, :64], r2
+vst1.s16\in1, [r3, :64], r4
+vst1.s16\in2, [r1, :64], r2
+vst1.s16\in3, [r3, :64], r4
+vst1.s16\in4, [r1, :64], r2
+vst1.s16\in5, [r3, :64], r4
+vst1.s16\in6, [r1, :64], r2
+vst1.s16\in7, [r3, :64], r4
+.endm
+
+.macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, 
in3, in4, in5, in6, in7, shift
+vqrshrn.s32 \out0, \in0, \shift
+vqrshrn.s32 \out1, \in1, \shift
+vqrshrn.s32 \out2, \in2, \shift
+vqrshrn.s32 \out3, \in3, \shift
+vqrshrn.s32 \out4, \in4, \shift
+vqrshrn.s32 \out5, \in5, \shift
+vqrshrn.s32 \out6, \in6, \shift
+vqrshrn.s32 \out7, \in7, \shift
+.endm
+
+.macro tr_16x4 name, shift
+function func_tr_16x4_\name
+mov r1,  r5
+add r3,  r5, #64
+mov r2,  #128
+load16  d0, d1, d2, d3, d4, d5, d6, d7
+movrel  r1, trans
+
+tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7
+
+add r1,  r5, #32
+add r3,  r5, #(64 + 32)
+mov r2,  #128
+load16  d8, d9, d2, d3, d4, d5, d6, d7
+movrel  r1, trans + 16
+vld1.s16{q0}, [r1, :128]
+vmull.s16   q5, d8, d0[0]
+vmull.s16   q6, d8, d0[1]
+vmull.s16   q7, d8, d0[2]
+vmull.s16   q8, d8, d0[3]
+vmull.s16   q9, d8, d1[0]
+vmull.s16   q10, d8, d1[1]
+vmull.s16   q11, d8, d1[2]
+vmull.s16   q12, d8, d1[3]
+
+add_member  d9, 

[libav-devel] [PATCH] hevc: Add NEON 16x16 IDCT

2017-04-11 Thread Alexandra Hájková
The speedup vs C code is around 6-13x.
---
 libavcodec/arm/hevc_idct.S| 210 ++
 libavcodec/arm/hevcdsp_init_arm.c |   4 +
 2 files changed, 214 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 4124fc8..0ea048b 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -222,7 +222,217 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
 endfunc
 .endm
 
+.macro butterfly e, o, tmp_p, tmp_m
+vadd.s32\tmp_p, \e, \o
+vsub.s32\tmp_m, \e, \o
+.endm
+
+.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7
+tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, 
q14, q15
+
+vmull.s16   q12, \in1, \in0[0]
+vmull.s16   q13, \in1, \in0[1]
+vmull.s16   q14, \in1, \in0[2]
+vmull.s16   q15, \in1, \in0[3]
+sum_sub q12, \in3, \in0[1], +
+sum_sub q13, \in3, \in0[3], -
+sum_sub q14, \in3, \in0[0], -
+sum_sub q15, \in3, \in0[2], -
+
+sum_sub q12, \in5, \in0[2], +
+sum_sub q13, \in5, \in0[0], -
+sum_sub q14, \in5, \in0[3], +
+sum_sub q15, \in5, \in0[1], +
+
+sum_sub q12, \in7, \in0[3], +
+sum_sub q13, \in7, \in0[2], -
+sum_sub q14, \in7, \in0[1], +
+sum_sub q15, \in7, \in0[0], -
+
+butterfly   q8,  q12, q0, q7
+butterfly   q9,  q13, q1, q6
+butterfly   q10, q14, q2, q5
+butterfly   q11, q15, q3, q4
+add r4,  sp,  #512
+vst1.s16{q0-q1}, [r4, :128]!
+vst1.s16{q2-q3}, [r4, :128]!
+vst1.s16{q4-q5}, [r4, :128]!
+vst1.s16{q6-q7}, [r4, :128]
+.endm
+
+.macro load16 in0, in1, in2, in3, in4, in5, in6, in7
+vld1.s16{\in0}, [r1, :64], r2
+vld1.s16{\in1}, [r3, :64], r2
+vld1.s16{\in2}, [r1, :64], r2
+vld1.s16{\in3}, [r3, :64], r2
+vld1.s16{\in4}, [r1, :64], r2
+vld1.s16{\in5}, [r3, :64], r2
+vld1.s16{\in6}, [r1, :64], r2
+vld1.s16{\in7}, [r3, :64], r2
+.endm
+
+.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, 
op5, op6, op7
+sum_sub q5, \in, \t0, \op0
+sum_sub q6, \in, \t1, \op1
+sum_sub q7, \in, \t2, \op2
+sum_sub q8, \in, \t3, \op3
+sum_sub q9, \in, \t4, \op4
+sum_sub q10,\in, \t5, \op5
+sum_sub q11,\in, \t6, \op6
+sum_sub q12,\in, \t7, \op7
+.endm
+
+.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7
+vadd.s32q4, \in0, \in1
+vsub.s32\in0, \in0, \in1
+vadd.s32\in1, \in2, \in3
+vsub.s32\in2, \in2, \in3
+vadd.s32\in3, \in4, \in5
+vsub.s32\in4, \in4, \in5
+vadd.s32\in5, \in6, \in7
+vsub.s32\in6, \in6, \in7
+.endm
+
+.macro store16 in0, in1, in2, in3, in4, in5, in6, in7
+vst1.s16\in0, [r1, :64], r2
+vst1.s16\in1, [r3, :64], r4
+vst1.s16\in2, [r1, :64], r2
+vst1.s16\in3, [r3, :64], r4
+vst1.s16\in4, [r1, :64], r2
+vst1.s16\in5, [r3, :64], r4
+vst1.s16\in6, [r1, :64], r2
+vst1.s16\in7, [r3, :64], r4
+.endm
+
+.macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, 
in3, in4, in5, in6, in7, shift
+vqrshrn.s32 \out0, \in0, \shift
+vqrshrn.s32 \out1, \in1, \shift
+vqrshrn.s32 \out2, \in2, \shift
+vqrshrn.s32 \out3, \in3, \shift
+vqrshrn.s32 \out4, \in4, \shift
+vqrshrn.s32 \out5, \in5, \shift
+vqrshrn.s32 \out6, \in6, \shift
+vqrshrn.s32 \out7, \in7, \shift
+.endm
+
+.macro tr_16x4 name, shift
+function func_tr_16x4_\name
+mov r1,  r5
+add r3,  r5, #64
+mov r2,  #128
+load16  d0, d1, d2, d3, d4, d5, d6, d7
+movrel  r1, trans
+
+tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7
+
+add r1,  r5, #32
+add r3,  r5, #(64 + 32)
+mov r2,  #128
+load16  d8, d9, d2, d3, d4, d5, d6, d7
+movrel  r1, trans + 16
+vld1.s16{q0}, [r1, :128]
+vmull.s16   q5, d8, d0[0]
+vmull.s16   q6, d8, d0[1]
+vmull.s16   q7, d8, d0[2]
+vmull.s16   q8, d8, d0[3]
+vmull.s16   q9, d8, d1[0]
+vmull.s16   q10, d8, d1[1]
+vmull.s16   q11, d8, d1[2]
+vmull.s16   q12, d8, d1[3]
+
+add_member  d9, d0[1], d1[0], d1[3], d1[1], d0[2], 

[libav-devel] [PATCH] hevc: Add NEON 16x16 IDCT

2017-04-05 Thread Alexandra Hájková
The speedup vs C code is around 8x.
---
 libavcodec/arm/hevc_idct.S| 187 ++
 libavcodec/arm/hevcdsp_init_arm.c |   4 +
 2 files changed, 191 insertions(+)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index 4124fc8..b4279db 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -222,7 +222,194 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
 endfunc
 .endm
 
+.macro butterfly e, o, tmp_p, tmp_m
+vadd.s32\tmp_p, \e, \o
+vsub.s32\tmp_m, \e, \o
+.endm
+
+.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7
+tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, 
q14, q15
+
+vmull.s16   q12, \in1, \in0[0]
+vmull.s16   q13, \in1, \in0[1]
+vmull.s16   q14, \in1, \in0[2]
+vmull.s16   q15, \in1, \in0[3]
+sum_sub q12, \in3, \in0[1], +
+sum_sub q13, \in3, \in0[3], -
+sum_sub q14, \in3, \in0[0], -
+sum_sub q15, \in3, \in0[2], -
+
+sum_sub q12, \in5, \in0[2], +
+sum_sub q13, \in5, \in0[0], -
+sum_sub q14, \in5, \in0[3], +
+sum_sub q15, \in5, \in0[1], +
+
+sum_sub q12, \in7, \in0[3], +
+sum_sub q13, \in7, \in0[2], -
+sum_sub q14, \in7, \in0[1], +
+sum_sub q15, \in7, \in0[0], -
+
+butterfly   q8,  q12, q0, q7
+butterfly   q9,  q13, q1, q6
+butterfly   q10, q14, q2, q5
+butterfly   q11, q15, q3, q4
+add r4,  sp,  #512
+vst1.s16{q0-q1}, [r4, :128]!
+vst1.s16{q2-q3}, [r4, :128]!
+vst1.s16{q4-q5}, [r4, :128]!
+vst1.s16{q6-q7}, [r4, :128]
+.endm
+
+.macro load16 in0, in1, in2, in3, in4, in5, in6, in7
+vld1.s16{\in0}, [r1, :64], r2
+vld1.s16{\in1}, [r3, :64], r2
+vld1.s16{\in2}, [r1, :64], r2
+vld1.s16{\in3}, [r3, :64], r2
+vld1.s16{\in4}, [r1, :64], r2
+vld1.s16{\in5}, [r3, :64], r2
+vld1.s16{\in6}, [r1, :64], r2
+vld1.s16{\in7}, [r3, :64], r2
+.endm
+
+.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, 
op5, op6, op7
+sum_sub q5, \in, \t0, \op0
+sum_sub q6, \in, \t1, \op1
+sum_sub q7, \in, \t2, \op2
+sum_sub q8, \in, \t3, \op3
+sum_sub q9, \in, \t4, \op4
+sum_sub q10,\in, \t5, \op5
+sum_sub q11,\in, \t6, \op6
+sum_sub q12,\in, \t7, \op7
+.endm
+
+.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7
+vadd.s32q4, \in0, \in1
+vsub.s32\in0, \in0, \in1
+vadd.s32\in1, \in2, \in3
+vsub.s32\in2, \in2, \in3
+vadd.s32\in3, \in4, \in5
+vsub.s32\in4, \in4, \in5
+vadd.s32\in5, \in6, \in7
+vsub.s32\in6, \in6, \in7
+.endm
+
+.macro store16 in0, in1, in2, in3, in4, in5, in6, in7
+vst1.s16\in0, [r1, :64], r2
+vst1.s16\in1, [r3, :64], r4
+vst1.s16\in2, [r1, :64], r2
+vst1.s16\in3, [r3, :64], r4
+vst1.s16\in4, [r1, :64], r2
+vst1.s16\in5, [r3, :64], r4
+vst1.s16\in6, [r1, :64], r2
+vst1.s16\in7, [r3, :64], r4
+.endm
+
+.macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, 
in3, in4, in5, in6, in7, shift
+vqrshrn.s32 \out0, \in0, \shift
+vqrshrn.s32 \out1, \in1, \shift
+vqrshrn.s32 \out2, \in2, \shift
+vqrshrn.s32 \out3, \in3, \shift
+vqrshrn.s32 \out4, \in4, \shift
+vqrshrn.s32 \out5, \in5, \shift
+vqrshrn.s32 \out6, \in6, \shift
+vqrshrn.s32 \out7, \in7, \shift
+.endm
+
+.macro tr_16x4 horiz, shift, in, out
+add r1,  \in, \horiz
+add r3,  \in, #(\horiz + 64)
+mov r2,  #128
+load16  d0, d1, d2, d3, d4, d5, d6, d7
+movrel  r1, trans
+
+tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7
+
+add r1,  \in, #(\horiz + 32)
+add r3,  \in, #(\horiz + 64 + 32)
+mov r2,  #128
+load16  d8, d9, d2, d3, d4, d5, d6, d7
+movrel  r1, trans + 16
+vld1.s16{q0}, [r1, :128]
+vmull.s16   q5, d8, d0[0]
+vmull.s16   q6, d8, d0[1]
+vmull.s16   q7, d8, d0[2]
+vmull.s16   q8, d8, d0[3]
+vmull.s16   q9, d8, d1[0]
+vmull.s16   q10, d8, d1[1]
+vmull.s16   q11, d8, d1[2]
+vmull.s16   q12, d8, d1[3]
+
+add_member  d9, d0[1], d1[0], 

[libav-devel] [PATCH] hevc: Add NEON 4x4 and 8x8 IDCT

2017-03-27 Thread Alexandra Hájková
Optimized by Martin Storsjö <mar...@martin.st>.
---
 libavcodec/arm/Makefile   |   2 +
 libavcodec/arm/hevc_idct.S| 228 ++
 libavcodec/arm/hevcdsp_init_arm.c |  47 
 libavcodec/hevcdsp.c  |   2 +
 libavcodec/hevcdsp.h  |   1 +
 5 files changed, 280 insertions(+)
 create mode 100644 libavcodec/arm/hevc_idct.S
 create mode 100644 libavcodec/arm/hevcdsp_init_arm.c

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 77452b1..555de16 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -134,6 +134,8 @@ NEON-OBJS-$(CONFIG_AAC_DECODER)+= 
arm/aacpsdsp_neon.o   \
 NEON-OBJS-$(CONFIG_APE_DECODER)+= arm/apedsp_neon.o
 NEON-OBJS-$(CONFIG_DCA_DECODER)+= arm/dcadsp_neon.o \
   arm/synth_filter_neon.o
+NEON-OBJS-$(CONFIG_HEVC_DECODER)   += arm/hevc_idct.o   \
+  arm/hevcdsp_init_arm.o
 NEON-OBJS-$(CONFIG_RV30_DECODER)   += arm/rv34dsp_neon.o
 NEON-OBJS-$(CONFIG_RV40_DECODER)   += arm/rv34dsp_neon.o\
   arm/rv40dsp_neon.o
diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
new file mode 100644
index 000..1bb75e7
--- /dev/null
+++ b/libavcodec/arm/hevc_idct.S
@@ -0,0 +1,228 @@
+/*
+ * ARM NEON optimised IDCT functions for HEVC decoding
+ * Copyright (c) 2017 Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+const trans
+.short 64, 83, 64, 36
+.short 89, 75, 50, 18
+.short 90, 87, 80, 70
+.short 57, 43, 25, 9
+endconst
+
+.macro sum_sub out, in, c, op
+  .ifc \op, +
+vmlal.s16   \out, \in, \c
+  .else
+vmlsl.s16   \out, \in, \c
+  .endif
+.endm
+
+.macro tr_4x4 in0, in1, in2, in3, out0, out1, out2, out3, shift, tmp0, tmp1, 
tmp2, tmp3, tmp4
+ vshll.s16  \tmp0, \in0, #6
+ vmull.s16  \tmp2, \in1, d4[1]
+ vmov   \tmp1, \tmp0
+ vmull.s16  \tmp3, \in1, d4[3]
+ vmlal.s16  \tmp0, \in2, d4[0] @e0
+ vmlsl.s16  \tmp1, \in2, d4[0] @e1
+ vmlal.s16  \tmp2, \in3, d4[3] @o0
+ vmlsl.s16  \tmp3, \in3, d4[1] @o1
+
+ vadd.s32   \tmp4, \tmp0, \tmp2
+ vsub.s32   \tmp0, \tmp0, \tmp2
+ vadd.s32   \tmp2, \tmp1, \tmp3
+ vsub.s32   \tmp1, \tmp1, \tmp3
+ vqrshrn.s32\out0, \tmp4, #\shift
+ vqrshrn.s32\out3, \tmp0, #\shift
+ vqrshrn.s32\out1, \tmp2, #\shift
+ vqrshrn.s32\out2, \tmp1, #\shift
+.endm
+
+.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, 
tmp3
+ vshll.s16  \tmp0, \in0, #6
+ vld1.s16   {\in0}, [r1, :64]!
+ vmov   \tmp1, \tmp0
+ vmull.s16  \tmp2, \in1, \in0[1]
+ vmull.s16  \tmp3, \in1, \in0[3]
+ vmlal.s16  \tmp0, \in2, \in0[0] @e0
+ vmlsl.s16  \tmp1, \in2, \in0[0] @e1
+ vmlal.s16  \tmp2, \in3, \in0[3] @o0
+ vmlsl.s16  \tmp3, \in3, \in0[1] @o1
+
+ vld1.s16   {\in0}, [r1, :64]
+
+ vadd.s32   \out0, \tmp0, \tmp2
+ vadd.s32   \out1, \tmp1, \tmp3
+ vsub.s32   \out2, \tmp1, \tmp3
+ vsub.s32   \out3, \tmp0, \tmp2
+
+ subr1,  r1,  #8
+.endm
+
+@ Do a 4x4 transpose, using q registers for the subtransposes that don't
+@ need to address the indiviudal d registers.
+@ r0,r1 == rq0, r2,r3 == rq1
+.macro transpose_4x4 rq0, rq1, r0, r1, r2, r3
+vtrn.32 \rq0, \rq1
+vtrn.16 \r0,  \r1
+vtrn.16 \r2,  \r3
+.endm
+
+.macro idct_4x4 bitdepth
+function ff_hevc_idct_4x4_\bitdepth\()_neon, export=1
+@r0 - coeffs
+vld1.s16{q0-q1}, [r0, :128]
+
+movrel  r1, trans
+vld1.s16{d4}, [r1, :64]
+
+tr_4x4  d0, d1, d2, d3, d16, d17, d18, d19, 7, q10, q11, q12, 
q13, q0
+transpose_4x4   q8, q9, d16, d17, d18, d19
+
+tr_4x4  d16, d17, d18, d19, d0, d1, d2, d3, 20 -

[libav-devel] [PATCH] hevc: Add NEON 4x4 and 8x8 IDCT

2017-03-25 Thread Alexandra Hájková
Optimized by Martin Storsjö <mar...@martin.st>.
---
 libavcodec/arm/Makefile |   2 +
 libavcodec/arm/hevc_idct.S  | 269 
 libavcodec/arm/hevc_idct_init.c |  50 
 libavcodec/hevcdsp.c|   2 +
 libavcodec/hevcdsp.h|   2 +
 5 files changed, 325 insertions(+)
 create mode 100644 libavcodec/arm/hevc_idct.S
 create mode 100644 libavcodec/arm/hevc_idct_init.c

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 77452b1..0d30a49 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -113,6 +113,8 @@ NEON-OBJS-$(CONFIG_H264DSP)+= 
arm/h264dsp_neon.o\
 NEON-OBJS-$(CONFIG_H264PRED)   += arm/h264pred_neon.o
 NEON-OBJS-$(CONFIG_H264QPEL)   += arm/h264qpel_neon.o   \
   arm/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_HEVC_DECODER)   += arm/hevc_idct.o   \
+  arm/hevc_idct_init.o
 NEON-OBJS-$(CONFIG_HPELDSP)+= arm/hpeldsp_init_neon.o   \
   arm/hpeldsp_neon.o
 NEON-OBJS-$(CONFIG_IDCTDSP)+= arm/idctdsp_init_neon.o   \
diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
new file mode 100644
index 000..89dbe22
--- /dev/null
+++ b/libavcodec/arm/hevc_idct.S
@@ -0,0 +1,269 @@
+/*
+ * ARM NEON optimised IDCT functions for HEVC decoding
+ * Copyright (c) 2017 Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+const trans
+.short 64, 83, 64, 36
+.short 89, 75, 50, 18
+.short 90, 87, 80, 70
+.short 57, 43, 25, 9
+endconst
+
+.macro sum_sub out, in, c, op
+  .ifc \op, +
+vmlal.s16   \out, \in, \c
+  .else
+vmlsl.s16   \out, \in, \c
+  .endif
+.endm
+
+.macro sum out, in0, in1, in2, in3, c0, c1, c2, c3, op1, op2, op3, tr4
+vmull.s16   \out, \in0, \c0
+sum_sub \out, \in1, \c1, \op1
+sum_sub \out, \in2, \c2, \op2
+sum_sub \out, \in3, \c3, \op3
+.endm
+
+.macro tr_4x4 in0, in1, in2, in3, out0, out1, out2, out3, shift, tmp0, tmp1, 
tmp2, tmp3, tmp4
+ vshll.s16  \tmp0, \in0, #6
+ vmull.s16  \tmp2, \in1, d4[1]
+ vmov   \tmp1, \tmp0
+ vmull.s16  \tmp3, \in1, d4[3]
+ vmlal.s16  \tmp0, \in2, d4[0] @e0
+ vmlsl.s16  \tmp1, \in2, d4[0] @e1
+ vmlal.s16  \tmp2, \in3, d4[3] @o0
+ vmlsl.s16  \tmp3, \in3, d4[1] @o1
+
+ vadd.s32   \tmp4, \tmp0, \tmp2
+ vsub.s32   \tmp0, \tmp0, \tmp2
+ vadd.s32   \tmp2, \tmp1, \tmp3
+ vsub.s32   \tmp1, \tmp1, \tmp3
+ vqrshrn.s32\out0, \tmp4, #\shift
+ vqrshrn.s32\out3, \tmp0, #\shift
+ vqrshrn.s32\out1, \tmp2, #\shift
+ vqrshrn.s32\out2, \tmp1, #\shift
+.endm
+
+.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, 
tmp3
+ vshll.s16  \tmp0, \in0, #6
+ vld1.s16   {\in0}, [r1, :64]!
+ vmov   \tmp1, \tmp0
+ vmull.s16  \tmp2, \in1, \in0[1]
+ vmull.s16  \tmp3, \in1, \in0[3]
+ vmlal.s16  \tmp0, \in2, \in0[0] @e0
+ vmlsl.s16  \tmp1, \in2, \in0[0] @e1
+ vmlal.s16  \tmp2, \in3, \in0[3] @o0
+ vmlsl.s16  \tmp3, \in3, \in0[1] @o1
+
+ vld1.s16   {\in0}, [r1, :64]
+
+ vadd.s32   \out0, \tmp0, \tmp2
+ vadd.s32   \out1, \tmp1, \tmp3
+ vsub.s32   \out2, \tmp1, \tmp3
+ vsub.s32   \out3, \tmp0, \tmp2
+
+ subr1,  r1,  #8
+.endm
+
+@ Do a 4x4 transpose, using q registers for the subtransposes that don't
+@ need to address the indiviudal d registers.
+@ r0,r1 == rq0, r2,r3 == rq1
+.macro transpose_4x4 rq0, rq1, r0, r1, r2, r3
+vtrn.32 \rq0, \rq1
+vtrn.16 \r0,  \r1
+vtrn.16 \r2,  \r3
+.endm
+
+.macro idct_4x4 bitdepth
+function ff_hevc_idct_4x4_\bitdepth\()_neon, export=1
+@r0 - coeffs
+vld1.s16{q0-q1}, [r0, :128]
+
+movrel   

[libav-devel] [PATCH] asfdec: Account for different Format Data sizes

2017-02-08 Thread Alexandra Hájková
Some muxers may use the BMP_HEADER Format Data size instead
of the ASF-specific one.

Bug-Id: 1020
---
Use more descriptive variable names.
Upgrate the documentation.
Use better commit message.

 libavformat/asfdec.c  | 12 +++-
 libavformat/avidec.c  |  2 +-
 libavformat/riff.h|  4 ++--
 libavformat/riffdec.c |  6 --
 libavformat/wtv.c |  2 +-
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index d602af8..6fe2524 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -691,20 +691,22 @@ static int asf_read_properties(AVFormatContext *s, const 
GUIDParseTable *g)
 
 static int parse_video_info(AVIOContext *pb, AVStream *st)
 {
-uint16_t size;
+uint16_t size_asf; // ASF specific Format Data size
+uint32_t size_bmp; // BMP_HEADER specific Format Data size
 unsigned int tag;
 
 st->codecpar->width  = avio_rl32(pb);
 st->codecpar->height = avio_rl32(pb);
 avio_skip(pb, 1); // skip reserved flags
-size = avio_rl16(pb); // size of the Format Data
-tag  = ff_get_bmp_header(pb, st);
+size_asf = avio_rl16(pb);
+tag  = ff_get_bmp_header(pb, st, _bmp);
 st->codecpar->codec_tag = tag;
 st->codecpar->codec_id  = ff_codec_get_id(ff_codec_bmp_tags, tag);
+size_bmp = FFMAX(size_asf, size_bmp);
 
-if (size > BMP_HEADER_SIZE) {
+if (size_bmp > BMP_HEADER_SIZE) {
 int ret;
-st->codecpar->extradata_size  = size - BMP_HEADER_SIZE;
+st->codecpar->extradata_size  = size_bmp - BMP_HEADER_SIZE;
 if (!(st->codecpar->extradata = av_malloc(st->codecpar->extradata_size 
+
AV_INPUT_BUFFER_PADDING_SIZE))) 
{
 st->codecpar->extradata_size = 0;
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 0439c9c..61f81e8 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -613,7 +613,7 @@ static int avi_read_header(AVFormatContext *s)
 avio_skip(pb, size);
 break;
 }
-tag1 = ff_get_bmp_header(pb, st);
+tag1 = ff_get_bmp_header(pb, st, NULL);
 
 if (tag1 == MKTAG('D', 'X', 'S', 'B') ||
 tag1 == MKTAG('D', 'X', 'S', 'A')) {
diff --git a/libavformat/riff.h b/libavformat/riff.h
index a45c7f3..e77552b 100644
--- a/libavformat/riff.h
+++ b/libavformat/riff.h
@@ -40,10 +40,10 @@ void ff_end_tag(AVIOContext *pb, int64_t start);
 
 /**
  * Read BITMAPINFOHEADER structure and set AVStream codec width, height and
- * bits_per_encoded_sample fields. Does not read extradata.
+ * bits_per_encoded_sample fields. Writes the size of BMP file to *size. Does 
not read extradata.
  * @return codec tag
  */
-int ff_get_bmp_header(AVIOContext *pb, AVStream *st);
+int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size);
 
 void ff_put_bmp_header(AVIOContext *pb, AVCodecParameters *par, const 
AVCodecTag *tags, int for_asf);
 int ff_put_wav_header(AVFormatContext *s, AVIOContext *pb, AVCodecParameters 
*par);
diff --git a/libavformat/riffdec.c b/libavformat/riffdec.c
index 8124835..d10ea2b 100644
--- a/libavformat/riffdec.c
+++ b/libavformat/riffdec.c
@@ -180,10 +180,12 @@ enum AVCodecID ff_wav_codec_get_id(unsigned int tag, int 
bps)
 return id;
 }
 
-int ff_get_bmp_header(AVIOContext *pb, AVStream *st)
+int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size)
 {
 int tag1;
-avio_rl32(pb); /* size */
+uint32_t size_ = avio_rl32(pb); /* size */
+if (size)
+*size = size_;
 st->codecpar->width  = avio_rl32(pb);
 st->codecpar->height = (int32_t)avio_rl32(pb);
 avio_rl16(pb); /* planes */
diff --git a/libavformat/wtv.c b/libavformat/wtv.c
index 2cab4e5..272b317 100644
--- a/libavformat/wtv.c
+++ b/libavformat/wtv.c
@@ -586,7 +586,7 @@ static int parse_videoinfoheader2(AVFormatContext *s, 
AVStream *st)
 AVIOContext *pb = wtv->pb;
 
 avio_skip(pb, 72);  // picture aspect ratio is unreliable
-ff_get_bmp_header(pb, st);
+ff_get_bmp_header(pb, st, NULL);
 
 return 72 + 40;
 }
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] asfdec: use the BMP_HEADER specific Format Data size instead of

2017-02-07 Thread Alexandra Hájková
the ASF specific Format Data size. Fixes video decoding problem
part of the bug 1020.
---
 libavformat/asfdec.c  | 8 +---
 libavformat/avidec.c  | 2 +-
 libavformat/riff.h| 2 +-
 libavformat/riffdec.c | 6 --
 libavformat/wtv.c | 2 +-
 5 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index d602af8..10f8644 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -691,16 +691,18 @@ static int asf_read_properties(AVFormatContext *s, const 
GUIDParseTable *g)
 
 static int parse_video_info(AVIOContext *pb, AVStream *st)
 {
-uint16_t size;
+uint16_t size_;
+uint32_t size;
 unsigned int tag;
 
 st->codecpar->width  = avio_rl32(pb);
 st->codecpar->height = avio_rl32(pb);
 avio_skip(pb, 1); // skip reserved flags
-size = avio_rl16(pb); // size of the Format Data
-tag  = ff_get_bmp_header(pb, st);
+size_ = avio_rl16(pb); // size of the Format Data
+tag  = ff_get_bmp_header(pb, st, );
 st->codecpar->codec_tag = tag;
 st->codecpar->codec_id  = ff_codec_get_id(ff_codec_bmp_tags, tag);
+size = FFMAX(size_, size);
 
 if (size > BMP_HEADER_SIZE) {
 int ret;
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 0439c9c..61f81e8 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -613,7 +613,7 @@ static int avi_read_header(AVFormatContext *s)
 avio_skip(pb, size);
 break;
 }
-tag1 = ff_get_bmp_header(pb, st);
+tag1 = ff_get_bmp_header(pb, st, NULL);
 
 if (tag1 == MKTAG('D', 'X', 'S', 'B') ||
 tag1 == MKTAG('D', 'X', 'S', 'A')) {
diff --git a/libavformat/riff.h b/libavformat/riff.h
index a45c7f3..295b6d0 100644
--- a/libavformat/riff.h
+++ b/libavformat/riff.h
@@ -43,7 +43,7 @@ void ff_end_tag(AVIOContext *pb, int64_t start);
  * bits_per_encoded_sample fields. Does not read extradata.
  * @return codec tag
  */
-int ff_get_bmp_header(AVIOContext *pb, AVStream *st);
+int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size);
 
 void ff_put_bmp_header(AVIOContext *pb, AVCodecParameters *par, const 
AVCodecTag *tags, int for_asf);
 int ff_put_wav_header(AVFormatContext *s, AVIOContext *pb, AVCodecParameters 
*par);
diff --git a/libavformat/riffdec.c b/libavformat/riffdec.c
index 8124835..d10ea2b 100644
--- a/libavformat/riffdec.c
+++ b/libavformat/riffdec.c
@@ -180,10 +180,12 @@ enum AVCodecID ff_wav_codec_get_id(unsigned int tag, int 
bps)
 return id;
 }
 
-int ff_get_bmp_header(AVIOContext *pb, AVStream *st)
+int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size)
 {
 int tag1;
-avio_rl32(pb); /* size */
+uint32_t size_ = avio_rl32(pb); /* size */
+if (size)
+*size = size_;
 st->codecpar->width  = avio_rl32(pb);
 st->codecpar->height = (int32_t)avio_rl32(pb);
 avio_rl16(pb); /* planes */
diff --git a/libavformat/wtv.c b/libavformat/wtv.c
index 2cab4e5..272b317 100644
--- a/libavformat/wtv.c
+++ b/libavformat/wtv.c
@@ -586,7 +586,7 @@ static int parse_videoinfoheader2(AVFormatContext *s, 
AVStream *st)
 AVIOContext *pb = wtv->pb;
 
 avio_skip(pb, 72);  // picture aspect ratio is unreliable
-ff_get_bmp_header(pb, st);
+ff_get_bmp_header(pb, st, NULL);
 
 return 72 + 40;
 }
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC

2016-12-11 Thread Alexandra Hájková
From: Alexandra Hajkova 

---
 libavcodec/hevcdsp.c  |   2 +
 libavcodec/hevcdsp.h  |   1 +
 libavcodec/ppc/Makefile   |   1 +
 libavcodec/ppc/hevcdsp.c  | 108 ++
 libavcodec/ppc/hevcdsp_template.c |  48 +
 5 files changed, 160 insertions(+)
 create mode 100644 libavcodec/ppc/hevcdsp.c
 create mode 100644 libavcodec/ppc/hevcdsp_template.c

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 7c19198..8ae023b 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int 
bit_depth)
 break;
 }
 
+if (ARCH_PPC)
+ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
 if (ARCH_X86)
 ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
 }
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 49cb711..2f4ff01 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -115,6 +115,7 @@ typedef struct HEVCDSPContext {
 
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
+void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
 
 extern const int16_t ff_hevc_epel_coeffs[7][16];
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 09eabcb..4b92add 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP)  += ppc/vp8dsp_altivec.o
 
 # decoders/encoders
 OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o
+OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o
 OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)  += ppc/vorbisdsp_altivec.o
 OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
new file mode 100644
index 000..9200e27
--- /dev/null
+++ b/libavcodec/ppc/hevcdsp.c
@@ -0,0 +1,108 @@
+/* SIMD-optimized IDCT functions for HEVC decoding
+ * Copyright (c) Alexandra Hajkova
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include 
+#endif
+
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#if HAVE_ALTIVEC
+static const vector int16_t trans4[4] = {
+{ 64,  64, 64,  64, 64,  64, 64,  64 },
+{ 83,  36, 83,  36, 83,  36, 83,  36 },
+{ 64, -64, 64, -64, 64, -64, 64, -64 },
+{ 36, -83, 36, -83, 36, -83, 36, -83 },
+};
+
+static const vec_u8 mask[2] = {
+{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 
0x12, 0x13, 0x1A, 0x1B },
+{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 
0x16, 0x17, 0x1E, 0x1F },
+};
+
+static void transform4x4(vector int16_t src_01, vector int16_t src_23,
+ vector int32_t res[4], const int shift, int16_t 
*coeffs)
+{
+vector int16_t src_02, src_13;
+vector int32_t zero = vec_splat_s32(0);
+vector int32_t e0, o0, e1, o1;
+vector int32_t add;
+
+src_13 = vec_mergel(src_01, src_23);
+src_02 = vec_mergeh(src_01, src_23);
+
+e0 = vec_msums(src_02, trans4[0], zero);
+o0 = vec_msums(src_13, trans4[1], zero);
+e1 = vec_msums(src_02, trans4[2], zero);
+o1 = vec_msums(src_13, trans4[3], zero);
+
+add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+e0 = vec_add(e0, add);
+e1 = vec_add(e1, add);
+
+res[0] = vec_add(e0, o0);
+res[1] = vec_add(e1, o1);
+res[2] = vec_sub(e1, o1);
+res[3] = vec_sub(e0, o0);
+}
+
+static void scale(vector int32_t res[4], vector int16_t res_packed[2], int 
shift)
+{
+int i;
+vector unsigned int v_shift = vec_splat_u32(shift);
+
+for (i = 0; i < 4; i++)
+res[i] = vec_sra(res[i], v_shift);
+
+// clip16
+res_packed[0] = vec_packs(res[0], res[1]);
+res_packed[1] = vec_packs(res[2], res[3]);
+}
+
+#define FUNCDECL(a, depth) a ## _ ## depth ## _altivec
+#define FUNC(a, b) FUNCDECL(a, b)
+
+#define BIT_DEPTH 8
+#include "hevcdsp_template.c"
+#undef BIT_DEPTH
+
+#define 

[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC

2016-12-01 Thread Alexandra Hájková
From: Alexandra Hajkova 

---
 libavcodec/hevcdsp.c  |   2 +
 libavcodec/hevcdsp.h  |   1 +
 libavcodec/ppc/Makefile   |   1 +
 libavcodec/ppc/hevcdsp.c  | 110 ++
 libavcodec/ppc/hevcdsp_template.c |  48 +
 5 files changed, 162 insertions(+)
 create mode 100644 libavcodec/ppc/hevcdsp.c
 create mode 100644 libavcodec/ppc/hevcdsp_template.c

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 7c19198..8ae023b 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int 
bit_depth)
 break;
 }
 
+if (ARCH_PPC)
+ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
 if (ARCH_X86)
 ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
 }
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 49cb711..2f4ff01 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -115,6 +115,7 @@ typedef struct HEVCDSPContext {
 
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
+void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
 
 extern const int16_t ff_hevc_epel_coeffs[7][16];
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 09eabcb..4b92add 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP)  += ppc/vp8dsp_altivec.o
 
 # decoders/encoders
 OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o
+OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o
 OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)  += ppc/vorbisdsp_altivec.o
 OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
new file mode 100644
index 000..c95af67
--- /dev/null
+++ b/libavcodec/ppc/hevcdsp.c
@@ -0,0 +1,110 @@
+/* SIMD-optimized IDCT functions for HEVC decoding
+ * Copyright (c) Alexandra Hajkova
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include 
+#endif
+
+#include "libavutil/cpu.h"
+#if HAVE_ALTIVEC_H
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+#endif
+
+#include "libavcodec/hevcdsp.h"
+
+#if HAVE_ALTIVEC
+#define FUNCDECL(a, depth) a ## _ ## depth ## _altivec
+#define FUNC(a, b) FUNCDECL(a, b)
+
+static const vector int16_t trans4[4] = {
+{ 64,  64, 64,  64, 64,  64, 64,  64 },
+{ 83,  36, 83,  36, 83,  36, 83,  36 },
+{ 64, -64, 64, -64, 64, -64, 64, -64 },
+{ 36, -83, 36, -83, 36, -83, 36, -83 },
+};
+
+static const vec_u8 mask[2] = {
+{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 
0x12, 0x13, 0x1A, 0x1B },
+{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 
0x16, 0x17, 0x1E, 0x1F },
+};
+
+static void transform4x4(vector int16_t src_01, vector int16_t src_23,
+ vector int32_t res[4], const int shift, int16_t 
*coeffs)
+{
+vector int16_t src_02, src_13;
+vector int32_t zero = vec_splat_s32(0);
+vector int32_t e0, o0, e1, o1;
+vector int32_t add;
+
+src_13 = vec_mergel(src_01, src_23);
+src_02 = vec_mergeh(src_01, src_23);
+
+e0 = vec_msums(src_02, trans4[0], zero);
+o0 = vec_msums(src_13, trans4[1], zero);
+e1 = vec_msums(src_02, trans4[2], zero);
+o1 = vec_msums(src_13, trans4[3], zero);
+
+add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+e0 = vec_add(e0, add);
+e1 = vec_add(e1, add);
+
+res[0] = vec_add(e0, o0);
+res[1] = vec_add(e1, o1);
+res[2] = vec_sub(e1, o1);
+res[3] = vec_sub(e0, o0);
+}
+
+static void scale(vector int32_t res[4], vector int16_t res_packed[2], int 
shift)
+{
+int i;
+vector unsigned int v_shift = vec_splat_u32(shift);
+
+for (i = 0; i < 4; i++)
+res[i] = vec_sra(res[i], v_shift);
+
+// clip16
+res_packed[0] = vec_packs(res[0], res[1]);
+res_packed[1] = vec_packs(res[2], res[3]);
+}
+
+#define BIT_DEPTH 8
+#include 

[libav-devel] [PATCH 34/35] qcelp: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/qcelpdec.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index e9e7347..9d5e13a 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -31,9 +31,10 @@
 
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
+
 #include "avcodec.h"
+#include "bitstream.h"
 #include "internal.h"
-#include "get_bits.h"
 #include "qcelpdata.h"
 #include "celp_filters.h"
 #include "acelp_filters.h"
@@ -53,7 +54,7 @@ typedef enum {
 } qcelp_packet_rate;
 
 typedef struct QCELPContext {
-GetBitContext gb;
+BitstreamContext  bc;
 qcelp_packet_rate bitrate;
 QCELPFrameframe;/**< unpacked data frame */
 
@@ -718,12 +719,12 @@ static int qcelp_decode_frame(AVCodecContext *avctx, void 
*data,
  
qcelp_unpacking_bitmaps_lengths[q->bitrate];
 uint8_t *unpacked_data = (uint8_t *)>frame;
 
-init_get_bits(>gb, buf, 8 * buf_size);
+bitstream_init(>bc, buf, 8 * buf_size);
 
 memset(>frame, 0, sizeof(QCELPFrame));
 
 for (; bitmaps < bitmaps_end; bitmaps++)
-unpacked_data[bitmaps->index] |= get_bits(>gb, bitmaps->bitlen) 
<< bitmaps->bitpos;
+unpacked_data[bitmaps->index] |= bitstream_read(>bc, 
bitmaps->bitlen) << bitmaps->bitpos;
 
 // Check for erasures/blanks on rates 1, 1/4 and 1/8.
 if (q->frame.reserved) {
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 32/35] opus: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/opus.h| 6 +++---
 libavcodec/opusdec.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/opus.h b/libavcodec/opus.h
index 55c91fa..fbf67c9 100644
--- a/libavcodec/opus.h
+++ b/libavcodec/opus.h
@@ -32,7 +32,7 @@
 #include "libavresample/avresample.h"
 
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 
 #define MAX_FRAME_SIZE   1275
 #define MAX_FRAMES   48
@@ -92,7 +92,7 @@ typedef struct RawBitsContext {
 } RawBitsContext;
 
 typedef struct OpusRangeCoder {
-GetBitContext gb;
+BitstreamContext bc;
 RawBitsContext rb;
 unsigned int range;
 unsigned int value;
@@ -196,7 +196,7 @@ typedef struct OpusContext {
 static av_always_inline void opus_rc_normalize(OpusRangeCoder *rc)
 {
 while (rc->range <= 1<<23) {
-rc->value = ((rc->value << 8) | (get_bits(>gb, 8) ^ 0xFF)) & ((1u 
<< 31) - 1);
+rc->value = ((rc->value << 8) | (bitstream_read(>bc, 8) ^ 0xFF)) & 
((1u << 31) - 1);
 rc->range  <<= 8;
 rc->total_read_bits += 8;
 }
diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c
index 92e651c..163f0d5 100644
--- a/libavcodec/opusdec.c
+++ b/libavcodec/opusdec.c
@@ -43,9 +43,9 @@
 #include "libavresample/avresample.h"
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "celp_filters.h"
 #include "fft.h"
-#include "get_bits.h"
 #include "internal.h"
 #include "mathops.h"
 #include "opus.h"
@@ -80,12 +80,12 @@ static int get_silk_samplerate(int config)
  */
 static int opus_rc_init(OpusRangeCoder *rc, const uint8_t *data, int size)
 {
-int ret = init_get_bits8(>gb, data, size);
+int ret = bitstream_init8(>bc, data, size);
 if (ret < 0)
 return ret;
 
 rc->range = 128;
-rc->value = 127 - get_bits(>gb, 7);
+rc->value = 127 - bitstream_read(>bc, 7);
 rc->total_read_bits = 9;
 opus_rc_normalize(rc);
 
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 28/35] hq_hqa: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/hq_hqa.c | 48 
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/libavcodec/hq_hqa.c b/libavcodec/hq_hqa.c
index 98bd596..0d03e59 100644
--- a/libavcodec/hq_hqa.c
+++ b/libavcodec/hq_hqa.c
@@ -24,8 +24,8 @@
 #include "libavutil/intreadwrite.h"
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "canopus.h"
-#include "get_bits.h"
 #include "internal.h"
 
 #include "hq_hqa.h"
@@ -59,7 +59,7 @@ static inline void put_blocks(HQContext *c, AVFrame *pic,
  pic->linesize[plane] << ilace, block1);
 }
 
-static int hq_decode_block(HQContext *c, GetBitContext *gb, int16_t block[64],
+static int hq_decode_block(HQContext *c, BitstreamContext *bc, int16_t 
block[64],
int qsel, int is_chroma, int is_hqa)
 {
 const int32_t *q;
@@ -68,15 +68,15 @@ static int hq_decode_block(HQContext *c, GetBitContext *gb, 
int16_t block[64],
 memset(block, 0, 64 * sizeof(*block));
 
 if (!is_hqa) {
-block[0] = get_sbits(gb, 9) << 6;
-q = ff_hq_quants[qsel][is_chroma][get_bits(gb, 2)];
+block[0] = bitstream_read_signed(bc, 9) << 6;
+q = ff_hq_quants[qsel][is_chroma][bitstream_read(bc, 2)];
 } else {
-q = ff_hq_quants[qsel][is_chroma][get_bits(gb, 2)];
-block[0] = get_sbits(gb, 9) << 6;
+q = ff_hq_quants[qsel][is_chroma][bitstream_read(bc, 2)];
+block[0] = bitstream_read_signed(bc, 9) << 6;
 }
 
 for (;;) {
-val = get_vlc2(gb, c->hq_ac_vlc.table, 9, 2);
+val = bitstream_read_vlc(bc, c->hq_ac_vlc.table, 9, 2);
 if (val < 0)
 return AVERROR_INVALIDDATA;
 
@@ -91,16 +91,16 @@ static int hq_decode_block(HQContext *c, GetBitContext *gb, 
int16_t block[64],
 }
 
 static int hq_decode_mb(HQContext *c, AVFrame *pic,
-GetBitContext *gb, int x, int y)
+BitstreamContext *bc, int x, int y)
 {
 int qgroup, flag;
 int i, ret;
 
-qgroup = get_bits(gb, 4);
-flag = get_bits1(gb);
+qgroup = bitstream_read(bc, 4);
+flag   = bitstream_read_bit(bc);
 
 for (i = 0; i < 8; i++) {
-ret = hq_decode_block(c, gb, c->block[i], qgroup, i >= 4, 0);
+ret = hq_decode_block(c, bc, c->block[i], qgroup, i >= 4, 0);
 if (ret < 0)
 return ret;
 }
@@ -117,7 +117,7 @@ static int hq_decode_frame(HQContext *ctx, AVFrame *pic,
int prof_num, size_t data_size)
 {
 const HQProfile *profile;
-GetBitContext gb;
+BitstreamContext bc;
 const uint8_t *perm, *src = ctx->gbc.buffer;
 uint32_t slice_off[21];
 int slice, start_off, next_off, i, ret;
@@ -160,11 +160,11 @@ static int hq_decode_frame(HQContext *ctx, AVFrame *pic,
"Invalid slice size %zu.\n", data_size);
 break;
 }
-init_get_bits(, src + slice_off[slice],
-  (slice_off[slice + 1] - slice_off[slice]) * 8);
+bitstream_init(, src + slice_off[slice],
+   (slice_off[slice + 1] - slice_off[slice]) * 8);
 
 for (i = 0; i < (next_off - start_off) * profile->tab_w; i++) {
-ret = hq_decode_mb(ctx, pic, , perm[0] * 16, perm[1] * 16);
+ret = hq_decode_mb(ctx, pic, , perm[0] * 16, perm[1] * 16);
 if (ret < 0) {
 av_log(ctx->avctx, AV_LOG_ERROR,
"Error decoding macroblock %d at slice %d.\n", i, 
slice);
@@ -178,12 +178,12 @@ static int hq_decode_frame(HQContext *ctx, AVFrame *pic,
 }
 
 static int hqa_decode_mb(HQContext *c, AVFrame *pic, int qgroup,
- GetBitContext *gb, int x, int y)
+ BitstreamContext *bc, int x, int y)
 {
 int flag = 0;
 int i, ret, cbp;
 
-cbp = get_vlc2(gb, c->hqa_cbp_vlc.table, 5, 1);
+cbp = bitstream_read_vlc(bc, c->hqa_cbp_vlc.table, 5, 1);
 
 for (i = 0; i < 12; i++)
 memset(c->block[i], 0, sizeof(*c->block));
@@ -191,7 +191,7 @@ static int hqa_decode_mb(HQContext *c, AVFrame *pic, int 
qgroup,
 c->block[i][0] = -128 * (1 << 6);
 
 if (cbp) {
-flag = get_bits1(gb);
+flag = bitstream_read_bit(bc);
 
 cbp |= cbp << 4;
 if (cbp & 0x3)
@@ -201,7 +201,7 @@ static int hqa_decode_mb(HQContext *c, AVFrame *pic, int 
qgroup,
 for (i = 0; i < 12; i++) {
 if (!(cbp & (1 << i)))
 continue;
-ret = hq_decode_block(c, gb, c->block[i], qgroup, i >= 8, 1);
+ret = hq_decode_block(c, bc, c->block[i], qgroup, i >= 8, 1);
 if (ret < 0)
 return ret;
 }
@@ -217,7 +217,7 @@ static int hqa_decode_mb(HQContext *c, AVFrame *pic, int 
qgroup,
 return 0;
 }
 
-static int hqa_decode_slice(HQContext *ctx, AVFrame *pic, GetBitContext *gb,
+static int hqa_decode_slice(HQContext *ctx, 

[libav-devel] [PATCH 35/35] qdm2: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/qdm2.c | 200 +++---
 1 file changed, 100 insertions(+), 100 deletions(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 7a7c149..781999a 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -39,7 +39,7 @@
 
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 #include "mpegaudio.h"
 #include "mpegaudiodsp.h"
@@ -361,31 +361,31 @@ static av_cold void qdm2_init_vlc(void)
  INIT_VLC_USE_NEW_STATIC | INIT_VLC_LE);
 }
 
-static int qdm2_get_vlc(GetBitContext *gb, VLC *vlc, int flag, int depth)
+static int qdm2_get_vlc(BitstreamContext *bc, VLC *vlc, int flag, int depth)
 {
 int value;
 
-value = get_vlc2(gb, vlc->table, vlc->bits, depth);
+value = bitstream_read_vlc(bc, vlc->table, vlc->bits, depth);
 
 /* stage-2, 3 bits exponent escape sequence */
 if (value-- == 0)
-value = get_bits(gb, get_bits(gb, 3) + 1);
+value = bitstream_read(bc, bitstream_read(bc, 3) + 1);
 
 /* stage-3, optional */
 if (flag) {
 int tmp = vlc_stage3_values[value];
 
 if ((value & ~3) > 0)
-tmp += get_bits(gb, (value >> 2));
+tmp += bitstream_read(bc, value >> 2);
 value = tmp;
 }
 
 return value;
 }
 
-static int qdm2_get_se_vlc(VLC *vlc, GetBitContext *gb, int depth)
+static int qdm2_get_se_vlc(VLC *vlc, BitstreamContext *bc, int depth)
 {
-int value = qdm2_get_vlc(gb, vlc, 0, depth);
+int value = qdm2_get_vlc(bc, vlc, 0, depth);
 
 return (value & 1) ? ((value + 1) >> 1) : -(value >> 1);
 }
@@ -412,35 +412,35 @@ static uint16_t qdm2_packet_checksum(const uint8_t *data, 
int length, int value)
 /**
  * Fill a QDM2SubPacket structure with packet type, size, and data pointer.
  *
- * @param gbbitreader context
+ * @param bcbitreader context
  * @param sub_packetpacket under analysis
  */
-static void qdm2_decode_sub_packet_header(GetBitContext *gb,
+static void qdm2_decode_sub_packet_header(BitstreamContext *bc,
   QDM2SubPacket *sub_packet)
 {
-sub_packet->type = get_bits(gb, 8);
+sub_packet->type = bitstream_read(bc, 8);
 
 if (sub_packet->type == 0) {
 sub_packet->size = 0;
 sub_packet->data = NULL;
 } else {
-sub_packet->size = get_bits(gb, 8);
+sub_packet->size = bitstream_read(bc, 8);
 
 if (sub_packet->type & 0x80) {
 sub_packet->size <<= 8;
-sub_packet->size  |= get_bits(gb, 8);
+sub_packet->size  |= bitstream_read(bc, 8);
 sub_packet->type  &= 0x7f;
 }
 
 if (sub_packet->type == 0x7f)
-sub_packet->type |= (get_bits(gb, 8) << 8);
+sub_packet->type |= bitstream_read(bc, 8) << 8;
 
 // FIXME: this depends on bitreader-internal data
-sub_packet->data = >buffer[get_bits_count(gb) / 8];
+sub_packet->data = >buffer[bitstream_tell(bc) / 8];
 }
 
 av_log(NULL, AV_LOG_DEBUG, "Subpacket: type=%d size=%d start_offs=%x\n",
-   sub_packet->type, sub_packet->size, get_bits_count(gb) / 8);
+   sub_packet->type, sub_packet->size, bitstream_tell(bc) / 8);
 }
 
 /**
@@ -799,12 +799,12 @@ static void fill_coding_method_array(sb_int8_array 
tone_level_idx,
  * sb 8-sb_used.
  *
  * @param q context
- * @param gbbitreader context
+ * @param bcbitreader context
  * @param lengthpacket length in bits
  * @param sb_minlower subband processed (sb_min included)
  * @param sb_maxhigher subband processed (sb_max excluded)
  */
-static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
+static void synthfilt_build_sb_samples(QDM2Context *q, BitstreamContext *bc,
int length, int sb_min, int sb_max)
 {
 int sb, j, k, n, ch, run, channels;
@@ -830,12 +830,12 @@ static void synthfilt_build_sb_samples(QDM2Context *q, 
GetBitContext *gb,
 else if (sb >= 24)
 joined_stereo = 1;
 else
-joined_stereo = (get_bits_left(gb) >= 1) ? get_bits1(gb) : 0;
+joined_stereo = (bitstream_bits_left(bc) >= 1) ? 
bitstream_read_bit(bc) : 0;
 
 if (joined_stereo) {
-if (get_bits_left(gb) >= 16)
+if (bitstream_bits_left(bc) >= 16)
 for (j = 0; j < 16; j++)
-sign_bits[j] = get_bits1(gb);
+sign_bits[j] = bitstream_read_bit(bc);
 
 for (j = 0; j < 64; j++)
 if (q->coding_method[1][sb][j] > q->coding_method[0][sb][j])
@@ -851,22 +851,22 @@ static void synthfilt_build_sb_samples(QDM2Context *q, 
GetBitContext *gb,
 
 for (ch = 0; ch < channels; ch++) {
 FIX_NOISE_IDX(q->noise_idx);
-zero_encoding = (get_bits_left(gb) >= 1) ? 

[libav-devel] [PATCH 30/35] jvdec: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/jvdec.c | 52 ++--
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/libavcodec/jvdec.c b/libavcodec/jvdec.c
index c532b75..37a2770 100644
--- a/libavcodec/jvdec.c
+++ b/libavcodec/jvdec.c
@@ -28,8 +28,8 @@
 #include "libavutil/intreadwrite.h"
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "blockdsp.h"
-#include "get_bits.h"
 #include "internal.h"
 
 typedef struct JvContext {
@@ -62,84 +62,84 @@ static av_cold int decode_init(AVCodecContext *avctx)
 /**
  * Decode 2x2 block
  */
-static inline void decode2x2(GetBitContext *gb, uint8_t *dst, int linesize)
+static inline void decode2x2(BitstreamContext *bc, uint8_t *dst, int linesize)
 {
 int i, j, v[2];
 
-switch (get_bits(gb, 2)) {
+switch (bitstream_read(bc, 2)) {
 case 1:
-v[0] = get_bits(gb, 8);
+v[0] = bitstream_read(bc, 8);
 for (j = 0; j < 2; j++)
 memset(dst + j * linesize, v[0], 2);
 break;
 case 2:
-v[0] = get_bits(gb, 8);
-v[1] = get_bits(gb, 8);
+v[0] = bitstream_read(bc, 8);
+v[1] = bitstream_read(bc, 8);
 for (j = 0; j < 2; j++)
 for (i = 0; i < 2; i++)
-dst[j * linesize + i] = v[get_bits1(gb)];
+dst[j * linesize + i] = v[bitstream_read_bit(bc)];
 break;
 case 3:
 for (j = 0; j < 2; j++)
 for (i = 0; i < 2; i++)
-dst[j * linesize + i] = get_bits(gb, 8);
+dst[j * linesize + i] = bitstream_read(bc, 8);
 }
 }
 
 /**
  * Decode 4x4 block
  */
-static inline void decode4x4(GetBitContext *gb, uint8_t *dst, int linesize)
+static inline void decode4x4(BitstreamContext *bc, uint8_t *dst, int linesize)
 {
 int i, j, v[2];
 
-switch (get_bits(gb, 2)) {
+switch (bitstream_read(bc, 2)) {
 case 1:
-v[0] = get_bits(gb, 8);
+v[0] = bitstream_read(bc, 8);
 for (j = 0; j < 4; j++)
 memset(dst + j * linesize, v[0], 4);
 break;
 case 2:
-v[0] = get_bits(gb, 8);
-v[1] = get_bits(gb, 8);
+v[0] = bitstream_read(bc, 8);
+v[1] = bitstream_read(bc, 8);
 for (j = 2; j >= 0; j -= 2) {
 for (i = 0; i < 4; i++)
-dst[j * linesize + i] = v[get_bits1(gb)];
+dst[j * linesize + i] = v[bitstream_read_bit(bc)];
 for (i = 0; i < 4; i++)
-dst[(j + 1) * linesize + i] = v[get_bits1(gb)];
+dst[(j + 1) * linesize + i] = v[bitstream_read_bit(bc)];
 }
 break;
 case 3:
 for (j = 0; j < 4; j += 2)
 for (i = 0; i < 4; i += 2)
-decode2x2(gb, dst + j * linesize + i, linesize);
+decode2x2(bc, dst + j * linesize + i, linesize);
 }
 }
 
 /**
  * Decode 8x8 block
  */
-static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize,
+static inline void decode8x8(BitstreamContext *bc, uint8_t *dst, int linesize,
  BlockDSPContext *bdsp)
 {
 int i, j, v[2];
 
-switch (get_bits(gb, 2)) {
+switch (bitstream_read(bc, 2)) {
 case 1:
-v[0] = get_bits(gb, 8);
+v[0] = bitstream_read(bc, 8);
 bdsp->fill_block_tab[1](dst, v[0], linesize, 8);
 break;
 case 2:
-v[0] = get_bits(gb, 8);
-v[1] = get_bits(gb, 8);
+v[0] = bitstream_read(bc, 8);
+v[1] = bitstream_read(bc, 8);
 for (j = 7; j >= 0; j--)
 for (i = 0; i < 8; i++)
-dst[j * linesize + i] = v[get_bits1(gb)];
+dst[j * linesize + i] = v[bitstream_read_bit(bc)];
 break;
 case 3:
 for (j = 0; j < 8; j += 4)
 for (i = 0; i < 8; i += 4)
-decode4x4(gb, dst + j * linesize + i, linesize);
+decode4x4(bc, dst + j * linesize + i, linesize);
 }
 }
 
@@ -163,12 +163,12 @@ static int decode_frame(AVCodecContext *avctx, void 
*data, int *got_frame,
 }
 
 if (video_type == 0 || video_type == 1) {
-GetBitContext gb;
-init_get_bits(, buf, 8 * FFMIN(video_size, buf_end - buf));
+BitstreamContext bc;
+bitstream_init(, buf, 8 * FFMIN(video_size, buf_end - buf));
 
 for (j = 0; j < avctx->height; j += 8)
 for (i = 0; i < avctx->width; i += 8)
-decode8x8(,
+decode8x8(,
   s->frame->data[0] + j * s->frame->linesize[0] + 
i,
   s->frame->linesize[0], >bdsp);
 
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 27/35] gsm: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/gsmdec.c  | 11 ++-
 libavcodec/gsmdec_template.c | 34 +-
 libavcodec/msgsmdec.c|  9 +
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/libavcodec/gsmdec.c b/libavcodec/gsmdec.c
index a333e58..d727cf9 100644
--- a/libavcodec/gsmdec.c
+++ b/libavcodec/gsmdec.c
@@ -25,8 +25,9 @@
  */
 
 #include "libavutil/channel_layout.h"
+
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 #include "msgsmdec.h"
 
@@ -67,7 +68,7 @@ static int gsm_decode_frame(AVCodecContext *avctx, void *data,
 {
 AVFrame *frame = data;
 int res;
-GetBitContext gb;
+BitstreamContext bc;
 const uint8_t *buf = avpkt->data;
 int buf_size = avpkt->size;
 int16_t *samples;
@@ -87,10 +88,10 @@ static int gsm_decode_frame(AVCodecContext *avctx, void 
*data,
 
 switch (avctx->codec_id) {
 case AV_CODEC_ID_GSM:
-init_get_bits(, buf, buf_size * 8);
-if (get_bits(, 4) != 0xd)
+bitstream_init(, buf, buf_size * 8);
+if (bitstream_read(, 4) != 0xd)
 av_log(avctx, AV_LOG_WARNING, "Missing GSM magic!\n");
-res = gsm_decode_block(avctx, samples, , GSM_13000);
+res = gsm_decode_block(avctx, samples, , GSM_13000);
 if (res < 0)
 return res;
 break;
diff --git a/libavcodec/gsmdec_template.c b/libavcodec/gsmdec_template.c
index 2794bd1..7437908 100644
--- a/libavcodec/gsmdec_template.c
+++ b/libavcodec/gsmdec_template.c
@@ -24,17 +24,17 @@
  * GSM decoder
  */
 
-#include "get_bits.h"
+#include "bitstream.h"
 #include "gsm.h"
 #include "gsmdec_data.h"
 
-static void apcm_dequant_add(GetBitContext *gb, int16_t *dst, const int 
*frame_bits)
+static void apcm_dequant_add(BitstreamContext *bc, int16_t *dst, const int 
*frame_bits)
 {
 int i, val;
-int maxidx = get_bits(gb, 6);
+int maxidx = bitstream_read(bc, 6);
 const int16_t *tab = ff_gsm_dequant_tab[maxidx];
 for (i = 0; i < 13; i++) {
-val = get_bits(gb, frame_bits[i]);
+val = bitstream_read(bc, frame_bits[i]);
 dst[3 * i] += tab[ff_gsm_requant_tab[frame_bits[i]][val]];
 }
 }
@@ -120,28 +120,28 @@ static int postprocess(int16_t *data, int msr)
 }
 
 static int gsm_decode_block(AVCodecContext *avctx, int16_t *samples,
-GetBitContext *gb, int mode)
+BitstreamContext *bc, int mode)
 {
 GSMContext *ctx = avctx->priv_data;
 int i;
 int16_t *ref_dst = ctx->ref_buf + 120;
 int *lar = ctx->lar[ctx->lar_idx];
-lar[0] = decode_log_area(get_bits(gb, 6), 13107,  1 << 15);
-lar[1] = decode_log_area(get_bits(gb, 6), 13107,  1 << 15);
-lar[2] = decode_log_area(get_bits(gb, 5), 13107, (1 << 14) + 2048*2);
-lar[3] = decode_log_area(get_bits(gb, 5), 13107, (1 << 14) - 2560*2);
-lar[4] = decode_log_area(get_bits(gb, 4), 19223, (1 << 13) +   94*2);
-lar[5] = decode_log_area(get_bits(gb, 4), 17476, (1 << 13) - 1792*2);
-lar[6] = decode_log_area(get_bits(gb, 3), 31454, (1 << 12) -  341*2);
-lar[7] = decode_log_area(get_bits(gb, 3), 29708, (1 << 12) - 1144*2);
+lar[0] = decode_log_area(bitstream_read(bc, 6), 13107,  1 << 15);
+lar[1] = decode_log_area(bitstream_read(bc, 6), 13107,  1 << 15);
+lar[2] = decode_log_area(bitstream_read(bc, 5), 13107, (1 << 14) + 2048 * 
2);
+lar[3] = decode_log_area(bitstream_read(bc, 5), 13107, (1 << 14) - 2560 * 
2);
+lar[4] = decode_log_area(bitstream_read(bc, 4), 19223, (1 << 13) +   94 * 
2);
+lar[5] = decode_log_area(bitstream_read(bc, 4), 17476, (1 << 13) - 1792 * 
2);
+lar[6] = decode_log_area(bitstream_read(bc, 3), 31454, (1 << 12) -  341 * 
2);
+lar[7] = decode_log_area(bitstream_read(bc, 3), 29708, (1 << 12) - 1144 * 
2);
 
 for (i = 0; i < 4; i++) {
-int lag  = get_bits(gb, 7);
-int gain_idx = get_bits(gb, 2);
-int offset   = get_bits(gb, 2);
+int lag  = bitstream_read(bc, 7);
+int gain_idx = bitstream_read(bc, 2);
+int offset   = bitstream_read(bc, 2);
 lag = av_clip(lag, 40, 120);
 long_term_synth(ref_dst, lag, gain_idx);
-apcm_dequant_add(gb, ref_dst + offset, ff_gsm_apcm_bits[mode][i]);
+apcm_dequant_add(bc, ref_dst + offset, ff_gsm_apcm_bits[mode][i]);
 ref_dst += 40;
 }
 memcpy(ctx->ref_buf, ctx->ref_buf + 160, 120 * sizeof(*ctx->ref_buf));
diff --git a/libavcodec/msgsmdec.c b/libavcodec/msgsmdec.c
index 92b5ae6..c26efa9 100644
--- a/libavcodec/msgsmdec.c
+++ b/libavcodec/msgsmdec.c
@@ -21,6 +21,7 @@
 
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
+#include "bitstream.h"
 #include "gsm.h"
 #include "msgsmdec.h"
 
@@ -30,10 +31,10 @@ int ff_msgsm_decode_block(AVCodecContext *avctx, int16_t 
*samples,
   const uint8_t *buf, int mode)
 {
 int res;
-

[libav-devel] [PATCH 33/35] pcx: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/pcx.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/pcx.c b/libavcodec/pcx.c
index a2d49b4..ece885e 100644
--- a/libavcodec/pcx.c
+++ b/libavcodec/pcx.c
@@ -23,9 +23,10 @@
  */
 
 #include "libavutil/imgutils.h"
+
 #include "avcodec.h"
+#include "bitstream.h"
 #include "bytestream.h"
-#include "get_bits.h"
 #include "internal.h"
 
 #define PCX_HEADER_SIZE 128
@@ -179,15 +180,15 @@ static int pcx_decode_frame(AVCodecContext *avctx, void 
*data, int *got_frame,
 goto end;
 }
 } else if (nplanes == 1) {   /* all packed formats, max. 16 colors */
-GetBitContext s;
+BitstreamContext s;
 
 for (y = 0; y < h; y++) {
-init_get_bits(, scanline, bytes_per_scanline << 3);
+bitstream_init(, scanline, bytes_per_scanline << 3);
 
 pcx_rle_decode(, scanline, bytes_per_scanline, compressed);
 
 for (x = 0; x < w; x++)
-ptr[x] = get_bits(, bits_per_pixel);
+ptr[x] = bitstream_read(, bits_per_pixel);
 ptr += stride;
 }
 } else {/* planar, 4, 8 or 16 colors */
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 26/35] g72x: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/g722dec.c   | 13 -
 libavcodec/g723_1dec.c | 72 +-
 libavcodec/g726.c  | 11 
 3 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/libavcodec/g722dec.c b/libavcodec/g722dec.c
index c4c0ec8..bfd4b42 100644
--- a/libavcodec/g722dec.c
+++ b/libavcodec/g722dec.c
@@ -36,8 +36,9 @@
 
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
+
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "g722.h"
 #include "internal.h"
 
@@ -92,7 +93,7 @@ static int g722_decode_frame(AVCodecContext *avctx, void 
*data,
 int j, ret;
 const int skip = 8 - c->bits_per_codeword;
 const int16_t *quantizer_table = low_inv_quants[skip];
-GetBitContext gb;
+BitstreamContext bc;
 
 /* get output buffer */
 frame->nb_samples = avpkt->size * 2;
@@ -102,15 +103,15 @@ static int g722_decode_frame(AVCodecContext *avctx, void 
*data,
 }
 out_buf = (int16_t *)frame->data[0];
 
-init_get_bits(, avpkt->data, avpkt->size * 8);
+bitstream_init(, avpkt->data, avpkt->size * 8);
 
 for (j = 0; j < avpkt->size; j++) {
 int ilow, ihigh, rlow, rhigh, dhigh;
 int xout[2];
 
-ihigh = get_bits(, 2);
-ilow = get_bits(, 6 - skip);
-skip_bits(, skip);
+ihigh = bitstream_read(, 2);
+ilow  = bitstream_read(, 6 - skip);
+bitstream_skip(, skip);
 
 rlow = av_clip_intp2((c->band[0].scale_factor * quantizer_table[ilow] 
>> 10)
   + c->band[0].s_predictor, 14);
diff --git a/libavcodec/g723_1dec.c b/libavcodec/g723_1dec.c
index f50bed1..2ea3bbf 100644
--- a/libavcodec/g723_1dec.c
+++ b/libavcodec/g723_1dec.c
@@ -32,8 +32,8 @@
 #define BITSTREAM_READER_LE
 #include "acelp_vectors.h"
 #include "avcodec.h"
+#include "bitstream.h"
 #include "celp_filters.h"
-#include "get_bits.h"
 #include "internal.h"
 #include "g723_1.h"
 
@@ -68,14 +68,14 @@ static av_cold int g723_1_decode_init(AVCodecContext *avctx)
 static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf,
 int buf_size)
 {
-GetBitContext gb;
+BitstreamContext bc;
 int ad_cb_len;
 int temp, info_bits, i;
 
-init_get_bits(, buf, buf_size * 8);
+bitstream_init(, buf, buf_size * 8);
 
 /* Extract frame type and rate info */
-info_bits = get_bits(, 2);
+info_bits = bitstream_read(, 2);
 
 if (info_bits == 3) {
 p->cur_frame_type = UNTRANSMITTED_FRAME;
@@ -83,13 +83,13 @@ static int unpack_bitstream(G723_1_Context *p, const 
uint8_t *buf,
 }
 
 /* Extract 24 bit lsp indices, 8 bit for each band */
-p->lsp_index[2] = get_bits(, 8);
-p->lsp_index[1] = get_bits(, 8);
-p->lsp_index[0] = get_bits(, 8);
+p->lsp_index[2] = bitstream_read(, 8);
+p->lsp_index[1] = bitstream_read(, 8);
+p->lsp_index[0] = bitstream_read(, 8);
 
 if (info_bits == 2) {
 p->cur_frame_type = SID_FRAME;
-p->subframe[0].amp_index = get_bits(, 6);
+p->subframe[0].amp_index = bitstream_read(, 6);
 return 0;
 }
 
@@ -97,23 +97,23 @@ static int unpack_bitstream(G723_1_Context *p, const 
uint8_t *buf,
 p->cur_rate   = info_bits ? RATE_5300 : RATE_6300;
 p->cur_frame_type = ACTIVE_FRAME;
 
-p->pitch_lag[0] = get_bits(, 7);
+p->pitch_lag[0] = bitstream_read(, 7);
 if (p->pitch_lag[0] > 123)   /* test if forbidden code */
 return -1;
 p->pitch_lag[0] += PITCH_MIN;
-p->subframe[1].ad_cb_lag = get_bits(, 2);
+p->subframe[1].ad_cb_lag = bitstream_read(, 2);
 
-p->pitch_lag[1] = get_bits(, 7);
+p->pitch_lag[1] = bitstream_read(, 7);
 if (p->pitch_lag[1] > 123)
 return -1;
 p->pitch_lag[1] += PITCH_MIN;
-p->subframe[3].ad_cb_lag = get_bits(, 2);
+p->subframe[3].ad_cb_lag = bitstream_read(, 2);
 p->subframe[0].ad_cb_lag = 1;
 p->subframe[2].ad_cb_lag = 1;
 
 for (i = 0; i < SUBFRAMES; i++) {
 /* Extract combined gain */
-temp = get_bits(, 12);
+temp = bitstream_read(, 12);
 ad_cb_len = 170;
 p->subframe[i].dirac_train = 0;
 if (p->cur_rate == RATE_6300 && p->pitch_lag[i >> 1] < SUBFRAME_LEN - 
2) {
@@ -130,16 +130,16 @@ static int unpack_bitstream(G723_1_Context *p, const 
uint8_t *buf,
 }
 }
 
-p->subframe[0].grid_index = get_bits(, 1);
-p->subframe[1].grid_index = get_bits(, 1);
-p->subframe[2].grid_index = get_bits(, 1);
-p->subframe[3].grid_index = get_bits(, 1);
+p->subframe[0].grid_index = bitstream_read(, 1);
+p->subframe[1].grid_index = bitstream_read(, 1);
+p->subframe[2].grid_index = bitstream_read(, 1);
+p->subframe[3].grid_index = bitstream_read(, 1);
 
 if (p->cur_rate == RATE_6300) {
-skip_bits(, 1);  /* skip reserved bit */
+bitstream_skip(, 1); /* skip reserved bit */
 
   

[libav-devel] [PATCH 29/35] hqx: Convert to the new bitstream header

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/hqx.c | 64 
 libavcodec/hqx.h |  5 +++--
 2 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/libavcodec/hqx.c b/libavcodec/hqx.c
index 7411d3f..3c359e3 100644
--- a/libavcodec/hqx.c
+++ b/libavcodec/hqx.c
@@ -24,8 +24,8 @@
 #include "libavutil/intreadwrite.h"
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "canopus.h"
-#include "get_bits.h"
 #include "internal.h"
 
 #include "hqx.h"
@@ -95,23 +95,23 @@ static inline void put_blocks(HQXContext *ctx, int plane,
  lsize * fields, block1, quant);
 }
 
-static inline void hqx_get_ac(GetBitContext *gb, const HQXAC *ac,
+static inline void hqx_get_ac(BitstreamContext *bc, const HQXAC *ac,
   int *run, int *lev)
 {
 int val;
 
-val = show_bits(gb, ac->lut_bits);
+val = bitstream_peek(bc, ac->lut_bits);
 if (ac->lut[val].bits == -1) {
-GetBitContext gb2 = *gb;
-skip_bits(, ac->lut_bits);
-val = ac->lut[val].lev + show_bits(, ac->extra_bits);
+BitstreamContext bc2 = *bc;
+bitstream_skip(, ac->lut_bits);
+val = ac->lut[val].lev + bitstream_peek(, ac->extra_bits);
 }
 *run = ac->lut[val].run;
 *lev = ac->lut[val].lev;
-skip_bits(gb, ac->lut[val].bits);
+bitstream_skip(bc, ac->lut[val].bits);
 }
 
-static int decode_block(GetBitContext *gb, VLC *vlc,
+static int decode_block(BitstreamContext *bc, VLC *vlc,
 const int *quants, int dcb,
 int16_t block[64], int *last_dc)
 {
@@ -120,14 +120,14 @@ static int decode_block(GetBitContext *gb, VLC *vlc,
 int run, lev, pos = 1;
 
 memset(block, 0, 64 * sizeof(*block));
-dc = get_vlc2(gb, vlc->table, HQX_DC_VLC_BITS, 2);
+dc = bitstream_read_vlc(bc, vlc->table, HQX_DC_VLC_BITS, 2);
 if (dc < 0)
 return AVERROR_INVALIDDATA;
 *last_dc += dc;
 
 block[0] = sign_extend(*last_dc << (12 - dcb), 12);
 
-q = quants[get_bits(gb, 2)];
+q = quants[bitstream_read(bc, 2)];
 if (q >= 128)
 ac_idx = HQX_AC_Q128;
 else if (q >= 64)
@@ -142,7 +142,7 @@ static int decode_block(GetBitContext *gb, VLC *vlc,
 ac_idx = HQX_AC_Q0;
 
 do {
-hqx_get_ac(gb, _hqx_ac[ac_idx], , );
+hqx_get_ac(bc, _hqx_ac[ac_idx], , );
 pos += run;
 if (pos >= 64)
 break;
@@ -155,24 +155,24 @@ static int decode_block(GetBitContext *gb, VLC *vlc,
 static int hqx_decode_422(HQXContext *ctx, int slice_no, int x, int y)
 {
 HQXSlice *slice = >slice[slice_no];
-GetBitContext *gb = >gb;
+BitstreamContext *bc = >bc;
 const int *quants;
 int flag;
 int last_dc;
 int i, ret;
 
 if (ctx->interlaced)
-flag = get_bits1(gb);
+flag = bitstream_read_bit(bc);
 else
 flag = 0;
 
-quants = hqx_quants[get_bits(gb, 4)];
+quants = hqx_quants[bitstream_read(bc, 4)];
 
 for (i = 0; i < 8; i++) {
 int vlc_index = ctx->dcb - 9;
 if (i == 0 || i == 4 || i == 6)
 last_dc = 0;
-ret = decode_block(gb, >dc_vlc[vlc_index], quants,
+ret = decode_block(bc, >dc_vlc[vlc_index], quants,
ctx->dcb, slice->block[i], _dc);
 if (ret < 0)
 return ret;
@@ -189,14 +189,14 @@ static int hqx_decode_422(HQXContext *ctx, int slice_no, 
int x, int y)
 static int hqx_decode_422a(HQXContext *ctx, int slice_no, int x, int y)
 {
 HQXSlice *slice = >slice[slice_no];
-GetBitContext *gb = >gb;
+BitstreamContext *bc = >bc;
 const int *quants;
 int flag = 0;
 int last_dc;
 int i, ret;
 int cbp;
 
-cbp = get_vlc2(gb, ctx->cbp_vlc.table, ctx->cbp_vlc.bits, 1);
+cbp = bitstream_read_vlc(bc, ctx->cbp_vlc.table, ctx->cbp_vlc.bits, 1);
 
 for (i = 0; i < 12; i++)
 memset(slice->block[i], 0, sizeof(**slice->block) * 64);
@@ -204,9 +204,9 @@ static int hqx_decode_422a(HQXContext *ctx, int slice_no, 
int x, int y)
 slice->block[i][0] = -0x800;
 if (cbp) {
 if (ctx->interlaced)
-flag = get_bits1(gb);
+flag = bitstream_read_bit(bc);
 
-quants = hqx_quants[get_bits(gb, 4)];
+quants = hqx_quants[bitstream_read(bc, 4)];
 
 cbp |= cbp << 4; // alpha CBP
 if (cbp & 0x3)   // chroma CBP - top
@@ -218,7 +218,7 @@ static int hqx_decode_422a(HQXContext *ctx, int slice_no, 
int x, int y)
 last_dc = 0;
 if (cbp & (1 << i)) {
 int vlc_index = ctx->dcb - 9;
-ret = decode_block(gb, >dc_vlc[vlc_index], quants,
+ret = decode_block(bc, >dc_vlc[vlc_index], quants,
ctx->dcb, slice->block[i], _dc);
 if (ret < 0)
 return ret;
@@ -239,24 +239,24 @@ static int hqx_decode_422a(HQXContext *ctx, int 

[libav-devel] [PATCH 31/35] nellymoser: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/nellymoserdec.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c
index 355935f..390872c 100644
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -38,8 +38,8 @@
 
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
+#include "bitstream.h"
 #include "fft.h"
-#include "get_bits.h"
 #include "internal.h"
 #include "nellymoser.h"
 #include "sinewin.h"
@@ -48,7 +48,7 @@
 typedef struct NellyMoserDecodeContext {
 AVCodecContext* avctx;
 AVLFG   random_state;
-GetBitContext   gb;
+BitstreamContext bc;
 float   scale_bias;
 AVFloatDSPContext fdsp;
 FFTContext  imdct_ctx;
@@ -67,14 +67,14 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
 int bits[NELLY_BUF_LEN];
 unsigned char v;
 
-init_get_bits(>gb, block, NELLY_BLOCK_LEN * 8);
+bitstream_init(>bc, block, NELLY_BLOCK_LEN * 8);
 
 bptr = buf;
 pptr = pows;
-val = ff_nelly_init_table[get_bits(>gb, 6)];
+val = ff_nelly_init_table[bitstream_read(>bc, 6)];
 for (i=0 ; i 0)
-val += ff_nelly_delta_table[get_bits(>gb, 5)];
+val += ff_nelly_delta_table[bitstream_read(>bc, 5)];
 pval = -pow(2, val/2048) * s->scale_bias;
 for (j = 0; j < ff_nelly_band_sizes_table[i]; j++) {
 *bptr++ = val;
@@ -88,8 +88,8 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
 for (i = 0; i < 2; i++) {
 aptr = audio + i * NELLY_BUF_LEN;
 
-init_get_bits(>gb, block, NELLY_BLOCK_LEN * 8);
-skip_bits_long(>gb, NELLY_HEADER_BITS + i*NELLY_DETAIL_BITS);
+bitstream_init(>bc, block, NELLY_BLOCK_LEN * 8);
+bitstream_skip(>bc, NELLY_HEADER_BITS + i * NELLY_DETAIL_BITS);
 
 for (j = 0; j < NELLY_FILL_LEN; j++) {
 if (bits[j] <= 0) {
@@ -97,7 +97,7 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
 if (av_lfg_get(>random_state) & 1)
 aptr[j] *= -1.0;
 } else {
-v = get_bits(>gb, bits[j]);
+v = bitstream_read(>bc, bits[j]);
 aptr[j] = 
ff_nelly_dequantization_table[(1<

[libav-devel] [PATCH 21/35] exr: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/exr.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index d10841d..28cee84 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -39,8 +39,8 @@
 #include "libavutil/opt.h"
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "bytestream.h"
-#include "get_bits.h"
 #include "internal.h"
 #include "mathops.h"
 #include "thread.h"
@@ -379,16 +379,16 @@ static void huf_canonical_code_table(uint64_t *hcode)
 static int huf_unpack_enc_table(GetByteContext *gb,
 int32_t im, int32_t iM, uint64_t *hcode)
 {
-GetBitContext gbit;
-int ret = init_get_bits8(, gb->buffer, 
bytestream2_get_bytes_left(gb));
+BitstreamContext bc;
+int ret = bitstream_init8(, gb->buffer, bytestream2_get_bytes_left(gb));
 if (ret < 0)
 return ret;
 
 for (; im <= iM; im++) {
-uint64_t l = hcode[im] = get_bits(, 6);
+uint64_t l = hcode[im] = bitstream_read(, 6);
 
 if (l == LONG_ZEROCODE_RUN) {
-int zerun = get_bits(, 8) + SHORTEST_LONG_RUN;
+int zerun = bitstream_read(, 8) + SHORTEST_LONG_RUN;
 
 if (im + zerun > iM + 1)
 return AVERROR_INVALIDDATA;
@@ -410,7 +410,7 @@ static int huf_unpack_enc_table(GetByteContext *gb,
 }
 }
 
-bytestream2_skip(gb, (get_bits_count() + 7) / 8);
+bytestream2_skip(gb, (bitstream_tell() + 7) / 8);
 huf_canonical_code_table(hcode);
 
 return 0;
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 19/35] escape124: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/escape124.c | 85 --
 1 file changed, 41 insertions(+), 44 deletions(-)

diff --git a/libavcodec/escape124.c b/libavcodec/escape124.c
index 6d1b487..879f00a 100644
--- a/libavcodec/escape124.c
+++ b/libavcodec/escape124.c
@@ -21,7 +21,7 @@
 
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 typedef union MacroBlock {
@@ -48,8 +48,9 @@ typedef struct Escape124Context {
 CodeBook codebooks[3];
 } Escape124Context;
 
-static int can_safely_read(GetBitContext* gb, int bits) {
-return get_bits_left(gb) >= bits;
+static int can_safely_read(BitstreamContext *bc, int bits)
+{
+return bitstream_bits_left(bc) >= bits;
 }
 
 /**
@@ -86,13 +87,13 @@ static av_cold int escape124_decode_close(AVCodecContext 
*avctx)
 return 0;
 }
 
-static CodeBook unpack_codebook(GetBitContext* gb, unsigned depth,
+static CodeBook unpack_codebook(BitstreamContext *bc, unsigned depth,
  unsigned size)
 {
 unsigned i, j;
 CodeBook cb = { 0 };
 
-if (!can_safely_read(gb, size * 34))
+if (!can_safely_read(bc, size * 34))
 return cb;
 
 if (size >= INT_MAX / sizeof(MacroBlock))
@@ -104,9 +105,9 @@ static CodeBook unpack_codebook(GetBitContext* gb, unsigned 
depth,
 cb.depth = depth;
 cb.size = size;
 for (i = 0; i < size; i++) {
-unsigned mask_bits = get_bits(gb, 4);
-unsigned color0 = get_bits(gb, 15);
-unsigned color1 = get_bits(gb, 15);
+unsigned mask_bits = bitstream_read(bc,  4);
+unsigned color0= bitstream_read(bc, 15);
+unsigned color1= bitstream_read(bc, 15);
 
 for (j = 0; j < 4; j++) {
 if (mask_bits & (1 << j))
@@ -118,47 +119,43 @@ static CodeBook unpack_codebook(GetBitContext* gb, 
unsigned depth,
 return cb;
 }
 
-static unsigned decode_skip_count(GetBitContext* gb)
+static unsigned decode_skip_count(BitstreamContext *bc)
 {
 unsigned value;
 // This function reads a maximum of 23 bits,
 // which is within the padding space
-if (!can_safely_read(gb, 1))
+if (!can_safely_read(bc, 1))
 return -1;
-value = get_bits1(gb);
+value = bitstream_read_bit(bc);
 if (!value)
 return value;
 
-value += get_bits(gb, 3);
+value += bitstream_read(bc, 3);
 if (value != (1 + ((1 << 3) - 1)))
 return value;
 
-value += get_bits(gb, 7);
+value += bitstream_read(bc, 7);
 if (value != (1 + ((1 << 3) - 1)) + ((1 << 7) - 1))
 return value;
 
-return value + get_bits(gb, 12);
+return value + bitstream_read(bc, 12);
 }
 
-static MacroBlock decode_macroblock(Escape124Context* s, GetBitContext* gb,
-int* codebook_index, int superblock_index)
+static MacroBlock decode_macroblock(Escape124Context *s, BitstreamContext *bc,
+int *codebook_index, int superblock_index)
 {
 // This function reads a maximum of 22 bits; the callers
 // guard this function appropriately
 unsigned block_index, depth;
-int value = get_bits1(gb);
+int value = bitstream_read_bit(bc);
 if (value) {
 static const char transitions[3][2] = { {2, 1}, {0, 2}, {1, 0} };
-value = get_bits1(gb);
+value = bitstream_read_bit(bc);
 *codebook_index = transitions[*codebook_index][value];
 }
 
 depth = s->codebooks[*codebook_index].depth;
-
-// depth = 0 means that this shouldn't read any bits;
-// in theory, this is the same as get_bits(gb, 0), but
-// that doesn't actually work.
-block_index = get_bitsz(gb, depth);
+block_index = bitstream_read(bc, depth);
 
 if (*codebook_index == 1) {
 block_index += superblock_index << s->codebooks[1].depth;
@@ -208,7 +205,7 @@ static int escape124_decode_frame(AVCodecContext *avctx,
 Escape124Context *s = avctx->priv_data;
 AVFrame *frame = data;
 
-GetBitContext gb;
+BitstreamContext bc;
 unsigned frame_flags, frame_size;
 unsigned i;
 
@@ -220,15 +217,15 @@ static int escape124_decode_frame(AVCodecContext *avctx,
 unsigned old_stride, new_stride;
 int ret;
 
-init_get_bits(, buf, buf_size * 8);
+bitstream_init(, buf, buf_size * 8);
 
 // This call also guards the potential depth reads for the
 // codebook unpacking.
-if (!can_safely_read(, 64))
+if (!can_safely_read(, 64))
 return -1;
 
-frame_flags = get_bits_long(, 32);
-frame_size  = get_bits_long(, 32);
+frame_flags = bitstream_read(, 32);
+frame_size  = bitstream_read(, 32);
 
 // Leave last frame unchanged
 // FIXME: Is this necessary?  I haven't seen it in any real samples
@@ -251,10 +248,10 @@ static int escape124_decode_frame(AVCodecContext *avctx,
 if (i == 2) {
 // This 

[libav-devel] [PATCH 25/35] g2meet: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/g2meet.c | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c
index 7e90916..4a7f5a3 100644
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c
@@ -31,10 +31,10 @@
 #include "libavutil/intreadwrite.h"
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "blockdsp.h"
 #include "bytestream.h"
 #include "elsdec.h"
-#include "get_bits.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "jpegtables.h"
@@ -236,7 +236,7 @@ static void jpg_unescape(const uint8_t *src, int src_size,
 *dst_size = dst - dst_start;
 }
 
-static int jpg_decode_block(JPGContext *c, GetBitContext *gb,
+static int jpg_decode_block(JPGContext *c, BitstreamContext *bc,
 int plane, int16_t *block)
 {
 int dc, val, pos;
@@ -244,18 +244,18 @@ static int jpg_decode_block(JPGContext *c, GetBitContext 
*gb,
 const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant;
 
 c->bdsp.clear_block(block);
-dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3);
+dc = bitstream_read_vlc(bc, c->dc_vlc[is_chroma].table, 9, 3);
 if (dc < 0)
 return AVERROR_INVALIDDATA;
 if (dc)
-dc = get_xbits(gb, dc);
+dc = bitstream_read_xbits(bc, dc);
 dc= dc * qmat[0] + c->prev_dc[plane];
 block[0]  = dc;
 c->prev_dc[plane] = dc;
 
 pos = 0;
 while (pos < 63) {
-val = get_vlc2(gb, c->ac_vlc[is_chroma].table, 9, 3);
+val = bitstream_read_vlc(bc, c->ac_vlc[is_chroma].table, 9, 3);
 if (val < 0)
 return AVERROR_INVALIDDATA;
 pos += val >> 4;
@@ -265,7 +265,7 @@ static int jpg_decode_block(JPGContext *c, GetBitContext 
*gb,
 if (val) {
 int nbits = val;
 
-val = get_xbits(gb, nbits);
+val = bitstream_read_xbits(bc, 
nbits);
 val*= qmat[ff_zigzag_direct[pos]];
 block[c->scantable.permutated[pos]] = val;
 }
@@ -286,7 +286,7 @@ static int jpg_decode_data(JPGContext *c, int width, int 
height,
const uint8_t *mask, int mask_stride, int num_mbs,
int swapuv)
 {
-GetBitContext gb;
+BitstreamContext bc;
 int mb_w, mb_h, mb_x, mb_y, i, j;
 int bx, by;
 int unesc_size;
@@ -298,7 +298,7 @@ static int jpg_decode_data(JPGContext *c, int width, int 
height,
 return ret;
 jpg_unescape(src, src_size, c->buf, _size);
 memset(c->buf + unesc_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
-init_get_bits(, c->buf, unesc_size * 8);
+bitstream_init(, c->buf, unesc_size * 8);
 
 width = FFALIGN(width, 16);
 mb_w  =  width>> 4;
@@ -325,14 +325,14 @@ static int jpg_decode_data(JPGContext *c, int width, int 
height,
 if (mask && !mask[mb_x * 2 + i + j * mask_stride])
 continue;
 num_mbs--;
-if ((ret = jpg_decode_block(c, , 0,
+if ((ret = jpg_decode_block(c, , 0,
 c->block[i + j * 2])) != 0)
 return ret;
 c->idsp.idct(c->block[i + j * 2]);
 }
 }
 for (i = 1; i < 3; i++) {
-if ((ret = jpg_decode_block(c, , i, c->block[i + 3])) != 0)
+if ((ret = jpg_decode_block(c, , i, c->block[i + 3])) != 0)
 return ret;
 c->idsp.idct(c->block[i + 3]);
 }
@@ -1011,11 +1011,11 @@ static void kempf_restore_buf(const uint8_t *src, int 
len,
   int width, int height,
   const uint8_t *pal, int npal, int tidx)
 {
-GetBitContext gb;
+BitstreamContext bc;
 int i, j, nb, col;
 int align_width = FFALIGN(width, 16);
 
-init_get_bits(, src, len * 8);
+bitstream_init(, src, len * 8);
 
 if (npal <= 2)   nb = 1;
 else if (npal <= 4)  nb = 2;
@@ -1023,16 +1023,16 @@ static void kempf_restore_buf(const uint8_t *src, int 
len,
 else nb = 8;
 
 for (j = 0; j < height; j++, dst += stride, jpeg_tile += tile_stride) {
-if (get_bits(, 8))
+if (bitstream_read(, 8))
 continue;
 for (i = 0; i < width; i++) {
-col = get_bits(, nb);
+col = bitstream_read(, nb);
 if (col != tidx)
 memcpy(dst + i * 3, pal + col * 3, 3);
 else
 memcpy(dst + i * 3, jpeg_tile + i * 3, 3);
 }
-skip_bits_long(, nb * (align_width - width));
+bitstream_skip(, nb * (align_width - width));
 }
 }
 
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org

[libav-devel] [PATCH 24/35] fraps: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/fraps.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c
index 55051ff..2237991 100644
--- a/libavcodec/fraps.c
+++ b/libavcodec/fraps.c
@@ -32,7 +32,7 @@
  */
 
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "huffman.h"
 #include "bytestream.h"
 #include "bswapdsp.h"
@@ -94,7 +94,7 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, 
int stride, int w,
const int step)
 {
 int i, j, ret;
-GetBitContext gb;
+BitstreamContext bc;
 VLC vlc;
 Node nodes[512];
 
@@ -111,10 +111,10 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t 
*dst, int stride, int w,
 s->bdsp.bswap_buf((uint32_t *) s->tmpbuf,
   (const uint32_t *) src, size >> 2);
 
-init_get_bits(, s->tmpbuf, size * 8);
+bitstream_init(, s->tmpbuf, size * 8);
 for (j = 0; j < h; j++) {
 for (i = 0; i < w*step; i += step) {
-dst[i] = get_vlc2(, vlc.table, VLC_BITS, 3);
+dst[i] = bitstream_read_vlc(, vlc.table, VLC_BITS, 3);
 /* lines are stored as deltas between previous lines
  * and we need to add 0x80 to the first lines of chroma planes
  */
@@ -122,7 +122,7 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t 
*dst, int stride, int w,
 dst[i] += dst[i - stride];
 else if (Uoff)
 dst[i] += 0x80;
-if (get_bits_left() < 0) {
+if (bitstream_bits_left() < 0) {
 ff_free_vlc();
 return AVERROR_INVALIDDATA;
 }
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 23/35] flashsv: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/flashsv.c | 57 ++--
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index 2cf8f3f..20fa7bc 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c
@@ -38,9 +38,10 @@
 #include 
 
 #include "libavutil/intreadwrite.h"
+
 #include "avcodec.h"
+#include "bitstream.h"
 #include "bytestream.h"
-#include "get_bits.h"
 #include "internal.h"
 
 typedef struct BlockInfo {
@@ -175,7 +176,7 @@ static int flashsv2_prime(FlashSVContext *s, uint8_t *src, 
int size)
 }
 
 static int flashsv_decode_block(AVCodecContext *avctx, AVPacket *avpkt,
-GetBitContext *gb, int block_size,
+BitstreamContext *bc, int block_size,
 int width, int height, int x_pos, int y_pos,
 int blk_idx)
 {
@@ -194,7 +195,7 @@ static int flashsv_decode_block(AVCodecContext *avctx, 
AVPacket *avpkt,
 if (ret < 0)
 return ret;
 }
-s->zstream.next_in   = avpkt->data + get_bits_count(gb) / 8;
+s->zstream.next_in   = avpkt->data + bitstream_tell(bc) / 8;
 s->zstream.avail_in  = block_size;
 s->zstream.next_out  = s->tmpblock;
 s->zstream.avail_out = s->block_size * 3;
@@ -210,7 +211,7 @@ static int flashsv_decode_block(AVCodecContext *avctx, 
AVPacket *avpkt,
 }
 
 if (s->is_keyframe) {
-s->blocks[blk_idx].pos  = s->keyframedata + (get_bits_count(gb) / 8);
+s->blocks[blk_idx].pos  = s->keyframedata + (bitstream_tell(bc) / 8);
 s->blocks[blk_idx].size = block_size;
 }
 
@@ -233,7 +234,7 @@ static int flashsv_decode_block(AVCodecContext *avctx, 
AVPacket *avpkt,
   x_pos, s->diff_height, width,
   s->frame->linesize[0], s->pal);
 }
-skip_bits_long(gb, 8 * block_size); /* skip the consumed bits */
+bitstream_skip(bc, 8 * block_size); /* skip the consumed bits */
 return 0;
 }
 
@@ -259,7 +260,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void 
*data,
 int buf_size = avpkt->size;
 FlashSVContext *s = avctx->priv_data;
 int h_blocks, v_blocks, h_part, v_part, i, j, ret;
-GetBitContext gb;
+BitstreamContext bc;
 
 /* no supplementary picture */
 if (buf_size == 0)
@@ -267,21 +268,21 @@ static int flashsv_decode_frame(AVCodecContext *avctx, 
void *data,
 if (buf_size < 4)
 return -1;
 
-init_get_bits(, avpkt->data, buf_size * 8);
+bitstream_init(, avpkt->data, buf_size * 8);
 
 /* start to parse the bitstream */
-s->block_width  = 16 * (get_bits(, 4) + 1);
-s->image_width  = get_bits(, 12);
-s->block_height = 16 * (get_bits(, 4) + 1);
-s->image_height = get_bits(, 12);
+s->block_width  = 16 * (bitstream_read(, 4) + 1);
+s->image_width  = bitstream_read(, 12);
+s->block_height = 16 * (bitstream_read(, 4) + 1);
+s->image_height = bitstream_read(, 12);
 
 if (s->ver == 2) {
-skip_bits(, 6);
-if (get_bits1()) {
+bitstream_skip(, 6);
+if (bitstream_read_bit()) {
 avpriv_request_sample(avctx, "iframe");
 return AVERROR_PATCHWELCOME;
 }
-if (get_bits1()) {
+if (bitstream_read_bit()) {
 avpriv_request_sample(avctx, "Custom palette");
 return AVERROR_PATCHWELCOME;
 }
@@ -371,7 +372,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void 
*data,
 int has_diff = 0;
 
 /* get the size of the compressed zlib chunk */
-int size = get_bits(, 16);
+int size = bitstream_read(, 16);
 
 s->color_depth= 0;
 s->zlibprime_curr = 0;
@@ -379,17 +380,17 @@ static int flashsv_decode_frame(AVCodecContext *avctx, 
void *data,
 s->diff_start = 0;
 s->diff_height= cur_blk_height;
 
-if (8 * size > get_bits_left()) {
+if (8 * size > bitstream_bits_left()) {
 av_frame_unref(s->frame);
 return AVERROR_INVALIDDATA;
 }
 
 if (s->ver == 2 && size) {
-skip_bits(, 3);
-s->color_depth= get_bits(, 2);
-has_diff  = get_bits1();
-s->zlibprime_curr = get_bits1();
-s->zlibprime_prev = get_bits1();
+bitstream_skip(, 3);
+s->color_depth= bitstream_read(, 2);
+has_diff  = bitstream_read_bit();
+s->zlibprime_curr = bitstream_read_bit();
+s->zlibprime_prev = bitstream_read_bit();
 
 if (s->color_depth != 0 && s->color_depth != 2) {
 av_log(avctx, AV_LOG_ERROR,
@@ -404,8 +405,8 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void 
*data,
   

[libav-devel] [PATCH 22/35] faxcompr: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/faxcompr.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/libavcodec/faxcompr.c b/libavcodec/faxcompr.c
index 4cbda3f..8a9010d 100644
--- a/libavcodec/faxcompr.c
+++ b/libavcodec/faxcompr.c
@@ -25,7 +25,7 @@
  * @author Konstantin Shishkov
  */
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "put_bits.h"
 #include "faxcompr.h"
 
@@ -123,7 +123,7 @@ av_cold void ff_ccitt_unpack_init(void)
 }
 
 
-static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb,
+static int decode_group3_1d_line(AVCodecContext *avctx, BitstreamContext *bc,
  unsigned int pix_left, int *runs,
  const int *runend)
 {
@@ -131,7 +131,7 @@ static int decode_group3_1d_line(AVCodecContext *avctx, 
GetBitContext *gb,
 unsigned int run = 0;
 unsigned int t;
 for (;;) {
-t= get_vlc2(gb, ccitt_vlc[mode].table, 9, 2);
+t= bitstream_read_vlc(bc, ccitt_vlc[mode].table, 9, 2);
 run += t;
 if (t < 64) {
 *runs++ = run;
@@ -157,7 +157,7 @@ static int decode_group3_1d_line(AVCodecContext *avctx, 
GetBitContext *gb,
 return 0;
 }
 
-static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
+static int decode_group3_2d_line(AVCodecContext *avctx, BitstreamContext *bc,
  unsigned int width, int *runs,
  const int *runend, const int *ref)
 {
@@ -168,7 +168,7 @@ static int decode_group3_2d_line(AVCodecContext *avctx, 
GetBitContext *gb,
 runend--; // for the last written 0
 
 while (offs < width) {
-int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1);
+int cmode = bitstream_read_vlc(bc, ccitt_group3_2d_vlc.table, 9, 1);
 if (cmode == -1) {
 av_log(avctx, AV_LOG_ERROR, "Incorrect mode VLC\n");
 return AVERROR_INVALIDDATA;
@@ -188,7 +188,7 @@ static int decode_group3_2d_line(AVCodecContext *avctx, 
GetBitContext *gb,
 for (k = 0; k < 2; k++) {
 run = 0;
 for (;;) {
-t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2);
+t = bitstream_read_vlc(bc, ccitt_vlc[mode].table, 9, 2);
 if (t == -1) {
 av_log(avctx, AV_LOG_ERROR, "Incorrect code\n");
 return AVERROR_INVALIDDATA;
@@ -258,12 +258,12 @@ static void put_line(uint8_t *dst, int size, int width, 
const int *runs)
 flush_put_bits();
 }
 
-static int find_group3_syncmarker(GetBitContext *gb, int srcsize)
+static int find_group3_syncmarker(BitstreamContext *bc, int srcsize)
 {
 unsigned int state = -1;
-srcsize -= get_bits_count(gb);
+srcsize -= bitstream_tell(bc);
 while (srcsize-- > 0) {
-state += state + get_bits1(gb);
+state += state + bitstream_read_bit(bc);
 if ((state & 0xFFF) == 1)
 return 0;
 }
@@ -275,7 +275,7 @@ int ff_ccitt_unpack(AVCodecContext *avctx, const uint8_t 
*src, int srcsize,
 enum TiffCompr compr, int opts)
 {
 int j;
-GetBitContext gb;
+BitstreamContext bc;
 int *runs, *ref = NULL, *runend;
 int ret;
 int runsize = avctx->width + 2;
@@ -289,27 +289,27 @@ int ff_ccitt_unpack(AVCodecContext *avctx, const uint8_t 
*src, int srcsize,
 ref[0] = avctx->width;
 ref[1] = 0;
 ref[2] = 0;
-init_get_bits(, src, srcsize * 8);
+bitstream_init(, src, srcsize * 8);
 for (j = 0; j < height; j++) {
 runend = runs + runsize;
 if (compr == TIFF_G4) {
-ret = decode_group3_2d_line(avctx, , avctx->width, runs, runend,
+ret = decode_group3_2d_line(avctx, , avctx->width, runs, runend,
 ref);
 if (ret < 0)
 goto fail;
 } else {
 int g3d1 = (compr == TIFF_G3) && !(opts & 1);
 if (compr != TIFF_CCITT_RLE &&
-find_group3_syncmarker(, srcsize * 8) < 0)
+find_group3_syncmarker(, srcsize * 8) < 0)
 break;
-if (compr == TIFF_CCITT_RLE || g3d1 || get_bits1())
-ret = decode_group3_1d_line(avctx, , avctx->width, runs,
+if (compr == TIFF_CCITT_RLE || g3d1 || bitstream_read_bit())
+ret = decode_group3_1d_line(avctx, , avctx->width, runs,
 runend);
 else
-ret = decode_group3_2d_line(avctx, , avctx->width, runs,
+ret = decode_group3_2d_line(avctx, , avctx->width, runs,
 runend, ref);
 if (compr == TIFF_CCITT_RLE)
-align_get_bits();
+bitstream_align();
 }
 if (avctx->err_recognition & 

[libav-devel] [PATCH 20/35] escape130: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/escape130.c | 46 +++---
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/libavcodec/escape130.c b/libavcodec/escape130.c
index bfc1f3f..544f36d 100644
--- a/libavcodec/escape130.c
+++ b/libavcodec/escape130.c
@@ -24,7 +24,7 @@
 
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 typedef struct Escape130Context {
@@ -163,23 +163,23 @@ static av_cold int escape130_decode_close(AVCodecContext 
*avctx)
 return 0;
 }
 
-static int decode_skip_count(GetBitContext* gb)
+static int decode_skip_count(BitstreamContext *bc)
 {
 int value;
 
-value = get_bits1(gb);
+value = bitstream_read_bit(bc);
 if (value)
 return 0;
 
-value = get_bits(gb, 3);
+value = bitstream_read(bc, 3);
 if (value)
 return value;
 
-value = get_bits(gb, 8);
+value = bitstream_read(bc, 8);
 if (value)
 return value + 7;
 
-value = get_bits(gb, 15);
+value = bitstream_read(bc, 15);
 if (value)
 return value + 262;
 
@@ -193,7 +193,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, 
void *data,
 int buf_size= avpkt->size;
 Escape130Context *s = avctx->priv_data;
 AVFrame *pic= data;
-GetBitContext gb;
+BitstreamContext bc;
 int ret;
 
 uint8_t *old_y, *old_cb, *old_cr,
@@ -216,7 +216,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, 
void *data,
 if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
 return ret;
 
-init_get_bits(, buf + 16, (buf_size - 16) * 8);
+bitstream_init(, buf + 16, (buf_size - 16) * 8);
 
 new_y  = s->new_y;
 new_cb = s->new_u;
@@ -235,7 +235,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, 
void *data,
 // Note that this call will make us skip the rest of the blocks
 // if the frame ends prematurely.
 if (skip == -1)
-skip = decode_skip_count();
+skip = decode_skip_count();
 if (skip == -1) {
 av_log(avctx, AV_LOG_ERROR, "Error decoding skip value\n");
 return AVERROR_INVALIDDATA;
@@ -250,31 +250,31 @@ static int escape130_decode_frame(AVCodecContext *avctx, 
void *data,
 cb = old_cb[0];
 cr = old_cr[0];
 } else {
-if (get_bits1()) {
-unsigned sign_selector   = get_bits(, 6);
-unsigned difference_selector = get_bits(, 2);
-y_avg = 2 * get_bits(, 5);
+if (bitstream_read_bit()) {
+unsigned sign_selector   = bitstream_read(, 6);
+unsigned difference_selector = bitstream_read(, 2);
+y_avg = 2 * bitstream_read(, 5);
 for (i = 0; i < 4; i++) {
 y[i] = av_clip(y_avg + offset_table[difference_selector] *
sign_table[sign_selector][i], 0, 63);
 }
-} else if (get_bits1()) {
-if (get_bits1()) {
-y_avg = get_bits(, 6);
+} else if (bitstream_read_bit()) {
+if (bitstream_read_bit()) {
+y_avg = bitstream_read(, 6);
 } else {
-unsigned adjust_index = get_bits(, 3);
+unsigned adjust_index = bitstream_read(, 3);
 y_avg = (y_avg + luma_adjust[adjust_index]) & 63;
 }
 for (i = 0; i < 4; i++)
 y[i] = y_avg;
 }
 
-if (get_bits1()) {
-if (get_bits1()) {
-cb = get_bits(, 5);
-cr = get_bits(, 5);
+if (bitstream_read_bit()) {
+if (bitstream_read_bit()) {
+cb = bitstream_read(, 5);
+cr = bitstream_read(, 5);
 } else {
-unsigned adjust_index = get_bits(, 3);
+unsigned adjust_index = bitstream_read(, 3);
 cb = (cb + chroma_adjust[0][adjust_index]) & 31;
 cr = (cr + chroma_adjust[1][adjust_index]) & 31;
 }
@@ -333,7 +333,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, 
void *data,
 }
 
 ff_dlog(avctx, "Frame data: provided %d bytes, used %d bytes\n",
-buf_size, get_bits_count() >> 3);
+buf_size, bitstream_tell() >> 3);
 
 FFSWAP(uint8_t*, s->old_y, s->new_y);
 FFSWAP(uint8_t*, s->old_u, s->new_u);
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 09/35] atrac: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/atrac1.c | 34 ++-
 libavcodec/atrac3.c | 97 +++--
 2 files changed, 67 insertions(+), 64 deletions(-)

diff --git a/libavcodec/atrac1.c b/libavcodec/atrac1.c
index e938976..60be853 100644
--- a/libavcodec/atrac1.c
+++ b/libavcodec/atrac1.c
@@ -33,8 +33,9 @@
 #include 
 
 #include "libavutil/float_dsp.h"
+
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "fft.h"
 #include "internal.h"
 #include "sinewin.h"
@@ -164,30 +165,31 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
  * Parse the block size mode byte
  */
 
-static int at1_parse_bsm(GetBitContext* gb, int log2_block_cnt[AT1_QMF_BANDS])
+static int at1_parse_bsm(BitstreamContext *bc,
+ int log2_block_cnt[AT1_QMF_BANDS])
 {
 int log2_block_count_tmp, i;
 
 for (i = 0; i < 2; i++) {
 /* low and mid band */
-log2_block_count_tmp = get_bits(gb, 2);
+log2_block_count_tmp = bitstream_read(bc, 2);
 if (log2_block_count_tmp & 1)
 return AVERROR_INVALIDDATA;
 log2_block_cnt[i] = 2 - log2_block_count_tmp;
 }
 
 /* high band */
-log2_block_count_tmp = get_bits(gb, 2);
+log2_block_count_tmp = bitstream_read(bc, 2);
 if (log2_block_count_tmp != 0 && log2_block_count_tmp != 3)
 return AVERROR_INVALIDDATA;
 log2_block_cnt[IDX_HIGH_BAND] = 3 - log2_block_count_tmp;
 
-skip_bits(gb, 2);
+bitstream_skip(bc, 2);
 return 0;
 }
 
 
-static int at1_unpack_dequant(GetBitContext* gb, AT1SUCtx* su,
+static int at1_unpack_dequant(BitstreamContext *bc, AT1SUCtx *su,
   float spec[AT1_SU_SAMPLES])
 {
 int bits_used, band_num, bfu_num, i;
@@ -195,22 +197,22 @@ static int at1_unpack_dequant(GetBitContext* gb, 
AT1SUCtx* su,
 uint8_t idsfs[AT1_MAX_BFU]; ///< the scalefactor indexes 
for each BFU
 
 /* parse the info byte (2nd byte) telling how much BFUs were coded */
-su->num_bfus = bfu_amount_tab1[get_bits(gb, 3)];
+su->num_bfus = bfu_amount_tab1[bitstream_read(bc, 3)];
 
 /* calc number of consumed bits:
 num_BFUs * (idwl(4bits) + idsf(6bits)) + log2_block_count(8bits) + 
info_byte(8bits)
 + info_byte_copy(8bits) + log2_block_count_copy(8bits) */
 bits_used = su->num_bfus * 10 + 32 +
-bfu_amount_tab2[get_bits(gb, 2)] +
-(bfu_amount_tab3[get_bits(gb, 3)] << 1);
+bfu_amount_tab2[bitstream_read(bc, 2)] +
+(bfu_amount_tab3[bitstream_read(bc, 3)] << 1);
 
 /* get word length index (idwl) for each BFU */
 for (i = 0; i < su->num_bfus; i++)
-idwls[i] = get_bits(gb, 4);
+idwls[i] = bitstream_read(bc, 4);
 
 /* get scalefactor index (idsf) for each BFU */
 for (i = 0; i < su->num_bfus; i++)
-idsfs[i] = get_bits(gb, 6);
+idsfs[i] = bitstream_read(bc, 6);
 
 /* zero idwl/idsf for empty BFUs */
 for (i = su->num_bfus; i < AT1_MAX_BFU; i++)
@@ -240,7 +242,7 @@ static int at1_unpack_dequant(GetBitContext* gb, AT1SUCtx* 
su,
 /* read in a quantized spec and convert it to
  * signed int and then inverse quantization
  */
-spec[pos+i] = get_sbits(gb, word_len) * scale_factor * 
max_quant;
+spec[pos+i] = bitstream_read_signed(bc, word_len) * 
scale_factor * max_quant;
 }
 } else { /* word_len = 0 -> empty BFU, zero all specs in the emty 
BFU */
 memset([pos], 0, num_specs * sizeof(float));
@@ -277,7 +279,7 @@ static int atrac1_decode_frame(AVCodecContext *avctx, void 
*data,
 int buf_size   = avpkt->size;
 AT1Ctx *q  = avctx->priv_data;
 int ch, ret;
-GetBitContext gb;
+BitstreamContext bc;
 
 
 if (buf_size < 212 * avctx->channels) {
@@ -295,14 +297,14 @@ static int atrac1_decode_frame(AVCodecContext *avctx, 
void *data,
 for (ch = 0; ch < avctx->channels; ch++) {
 AT1SUCtx* su = >SUs[ch];
 
-init_get_bits(, [212 * ch], 212 * 8);
+bitstream_init(, [212 * ch], 212 * 8);
 
 /* parse block_size_mode, 1st byte */
-ret = at1_parse_bsm(, su->log2_block_count);
+ret = at1_parse_bsm(, su->log2_block_count);
 if (ret < 0)
 return ret;
 
-ret = at1_unpack_dequant(, su, q->spec);
+ret = at1_unpack_dequant(, su, q->spec);
 if (ret < 0)
 return ret;
 
diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c
index 2e1fd3c..be32a0e 100644
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -38,10 +38,11 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/float_dsp.h"
+
 #include "avcodec.h"
+#include "bitstream.h"
 #include "bytestream.h"
 #include "fft.h"
-#include "get_bits.h"
 #include "internal.h"
 
 #include "atrac.h"
@@ 

[libav-devel] [PATCH 14/35] cdxl: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/cdxl.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/libavcodec/cdxl.c b/libavcodec/cdxl.c
index 99e96eb..4c0410d 100644
--- a/libavcodec/cdxl.c
+++ b/libavcodec/cdxl.c
@@ -21,8 +21,9 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
+
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 #define BIT_PLANAR   0x00
@@ -69,30 +70,30 @@ static void import_palette(CDXLVideoContext *c, uint32_t 
*new_palette)
 
 static void bitplanar2chunky(CDXLVideoContext *c, int linesize, uint8_t *out)
 {
-GetBitContext gb;
+BitstreamContext bc;
 int x, y, plane;
 
-init_get_bits(, c->video, c->video_size * 8);
+bitstream_init(, c->video, c->video_size * 8);
 for (plane = 0; plane < c->bpp; plane++) {
 for (y = 0; y < c->avctx->height; y++) {
 for (x = 0; x < c->avctx->width; x++)
-out[linesize * y + x] |= get_bits1() << plane;
-skip_bits(, c->padded_bits);
+out[linesize * y + x] |= bitstream_read_bit() << plane;
+bitstream_skip(, c->padded_bits);
 }
 }
 }
 
 static void bitline2chunky(CDXLVideoContext *c, int linesize, uint8_t *out)
 {
-GetBitContext  gb;
+BitstreamContext bc;
 int x, y, plane;
 
-init_get_bits(, c->video, c->video_size * 8);
+bitstream_init(, c->video, c->video_size * 8);
 for (y = 0; y < c->avctx->height; y++) {
 for (plane = 0; plane < c->bpp; plane++) {
 for (x = 0; x < c->avctx->width; x++)
-out[linesize * y + x] |= get_bits1() << plane;
-skip_bits(, c->padded_bits);
+out[linesize * y + x] |= bitstream_read_bit() << plane;
+bitstream_skip(, c->padded_bits);
 }
 }
 }
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 06/35] 4xm: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/4xm.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index b2d4db2..ee9d020 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -29,11 +29,12 @@
 #include "libavutil/frame.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
+
 #include "avcodec.h"
+#include "bitstream.h"
 #include "blockdsp.h"
 #include "bswapdsp.h"
 #include "bytestream.h"
-#include "get_bits.h"
 #include "internal.h"
 
 #define BLOCK_TYPE_VLC_BITS 5
@@ -136,8 +137,8 @@ typedef struct FourXContext {
 BswapDSPContext bbdsp;
 uint16_t *frame_buffer;
 uint16_t *last_frame_buffer;
-GetBitContext pre_gb;  ///< ac/dc prefix
-GetBitContext gb;
+BitstreamContext pre_bc;// ac/dc prefix
+BitstreamContext bc;
 GetByteContext g;
 GetByteContext g2;
 int mv[256];
@@ -352,8 +353,8 @@ static int decode_p_block(FourXContext *f, uint16_t *dst, 
uint16_t *src,
 return AVERROR_INVALIDDATA;
 
 h = 1 << log2h;
-code  = get_vlc2(>gb, block_type_vlc[1 - (f->version > 1)][index].table,
- BLOCK_TYPE_VLC_BITS, 1);
+code  = bitstream_read_vlc(>bc, block_type_vlc[1 - (f->version > 
1)][index].table,
+   BLOCK_TYPE_VLC_BITS, 1);
 if (code < 0 || code > 6)
 return AVERROR_INVALIDDATA;
 
@@ -453,7 +454,7 @@ static int decode_p_frame(FourXContext *f, const uint8_t 
*buf, int length)
bitstream_size / 4);
 memset((uint8_t*)f->bitstream_buffer + bitstream_size,
0, AV_INPUT_BUFFER_PADDING_SIZE);
-init_get_bits(>gb, f->bitstream_buffer, 8 * bitstream_size);
+bitstream_init(>bc, f->bitstream_buffer, 8 * bitstream_size);
 
 wordstream_offset = extra + bitstream_size;
 bytestream_offset = extra + bitstream_size + wordstream_size;
@@ -484,19 +485,19 @@ static int decode_i_block(FourXContext *f, int16_t *block)
 int code, i, j, level, val;
 
 /* DC coef */
-val = get_vlc2(>pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
+val = bitstream_read_vlc(>pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 if (val >> 4)
 av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 
 if (val)
-val = get_xbits(>gb, val);
+val = bitstream_read_xbits(>bc, val);
 
 val= val * dequant_table[0] + f->last_dc;
 f->last_dc = block[0] = val;
 /* AC coefs */
 i = 1;
 for (;;) {
-code = get_vlc2(>pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
+code = bitstream_read_vlc(>pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 
3);
 
 /* EOB */
 if (code == 0)
@@ -504,7 +505,7 @@ static int decode_i_block(FourXContext *f, int16_t *block)
 if (code == 0xf0) {
 i += 16;
 } else {
-level = get_xbits(>gb, code & 0xf);
+level = bitstream_read_xbits(>bc, code & 0xf);
 i+= code >> 4;
 if (i >= 64) {
 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
@@ -764,7 +765,7 @@ static int decode_i_frame(FourXContext *f, const uint8_t 
*buf, int length)
 return AVERROR_INVALIDDATA;
 }
 
-init_get_bits(>gb, buf + 4, 8 * bitstream_size);
+bitstream_init(>bc, buf + 4, 8 * bitstream_size);
 
 prestream_size = length + buf - prestream;
 
@@ -776,7 +777,7 @@ static int decode_i_frame(FourXContext *f, const uint8_t 
*buf, int length)
prestream_size / 4);
 memset((uint8_t*)f->bitstream_buffer + prestream_size,
0, AV_INPUT_BUFFER_PADDING_SIZE);
-init_get_bits(>pre_gb, f->bitstream_buffer, 8 * prestream_size);
+bitstream_init(>pre_bc, f->bitstream_buffer, 8 * prestream_size);
 
 f->last_dc = 0 * 128 * 8 * 8;
 
@@ -789,7 +790,7 @@ static int decode_i_frame(FourXContext *f, const uint8_t 
*buf, int length)
 }
 }
 
-if (get_vlc2(>pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
+if (bitstream_read_vlc(>pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 
256)
 av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 
 return 0;
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 03/35] eamad: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/eamad.c | 42 +++---
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c
index 070cfdb..7509c2d 100644
--- a/libavcodec/eamad.c
+++ b/libavcodec/eamad.c
@@ -29,16 +29,17 @@
  */
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "blockdsp.h"
 #include "bytestream.h"
 #include "bswapdsp.h"
-#include "get_bits.h"
 #include "aandcttab.h"
 #include "eaidct.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "mpeg12data.h"
 #include "mpeg12vlc.h"
+#include "vlc.h"
 
 #define EA_PREAMBLE_SIZE8
 #define MADk_TAG MKTAG('M', 'A', 'D', 'k')/* MAD I-frame */
@@ -51,7 +52,7 @@ typedef struct MadContext {
 BswapDSPContext bbdsp;
 IDCTDSPContext idsp;
 AVFrame *last_frame;
-GetBitContext gb;
+BitstreamContext bc;
 void *bitstream_buf;
 unsigned int bitstream_buf_size;
 DECLARE_ALIGNED(16, int16_t, block)[64];
@@ -129,17 +130,15 @@ static inline void decode_block_intra(MadContext *s, 
int16_t * block)
 const uint8_t *scantable = s->scantable.permutated;
 int16_t *quant_matrix = s->quant_matrix;
 
-block[0] = (128 + get_sbits(>gb, 8)) * quant_matrix[0];
+block[0] = (128 + bitstream_read_signed(>bc, 8)) * quant_matrix[0];
 
 /* The RL decoder is derived from mpeg1_decode_block_intra;
Escaped level and run values a decoded differently */
 i = 0;
 {
-OPEN_READER(re, >gb);
 /* now quantify & encode AC coefficients */
 for (;;) {
-UPDATE_CACHE(re, >gb);
-GET_RL_VLC(level, run, re, >gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 
0);
+BITSTREAM_RL_VLC(level, run, >bc, rl->rl_vlc[0], TEX_VLC_BITS, 
2);
 
 if (level == 127) {
 break;
@@ -153,15 +152,12 @@ static inline void decode_block_intra(MadContext *s, 
int16_t * block)
 j = scantable[i];
 level = (level*quant_matrix[j]) >> 4;
 level = (level-1)|1;
-level = (level ^ SHOW_SBITS(re, >gb, 1)) - SHOW_SBITS(re, 
>gb, 1);
-LAST_SKIP_BITS(re, >gb, 1);
+level = bitstream_apply_sign(>bc, level);
 } else {
 /* escape */
-UPDATE_CACHE(re, >gb);
-level = SHOW_SBITS(re, >gb, 10); SKIP_BITS(re, >gb, 10);
+level = bitstream_read_signed(>bc, 10);
 
-UPDATE_CACHE(re, >gb);
-run = SHOW_UBITS(re, >gb, 6)+1; LAST_SKIP_BITS(re, >gb, 
6);
+run = bitstream_read(>bc, 6) + 1;
 
 i += run;
 if (i > 63) {
@@ -183,17 +179,17 @@ static inline void decode_block_intra(MadContext *s, 
int16_t * block)
 
 block[j] = level;
 }
-CLOSE_READER(re, >gb);
 }
 }
 
-static int decode_motion(GetBitContext *gb)
+static int decode_motion(BitstreamContext *bc)
 {
 int value = 0;
-if (get_bits1(gb)) {
-if (get_bits1(gb))
+
+if (bitstream_read_bit(bc)) {
+if (bitstream_read_bit(bc))
 value = -17;
-value += get_bits(gb, 4) + 1;
+value += bitstream_read(bc, 4) + 1;
 }
 return value;
 }
@@ -205,11 +201,11 @@ static void decode_mb(MadContext *s, AVFrame *frame, int 
inter)
 int j;
 
 if (inter) {
-int v = decode210(>gb);
+int v = bitstream_decode210(>bc);
 if (v < 2) {
-mv_map = v ? get_bits(>gb, 6) : 63;
-mv_x = decode_motion(>gb);
-mv_y = decode_motion(>gb);
+mv_map = v ? bitstream_read(>bc, 6) : 63;
+mv_x = decode_motion(>bc);
+mv_y = decode_motion(>bc);
 } else {
 mv_map = 0;
 }
@@ -217,7 +213,7 @@ static void decode_mb(MadContext *s, AVFrame *frame, int 
inter)
 
 for (j=0; j<6; j++) {
 if (mv_map & (1<gb);
+int add = 2 * decode_motion(>bc);
 comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add);
 } else {
 s->bdsp.clear_block(s->block);
@@ -299,7 +295,7 @@ static int decode_frame(AVCodecContext *avctx,
 return AVERROR(ENOMEM);
 s->bbdsp.bswap16_buf(s->bitstream_buf, (const uint16_t *)(buf + 
bytestream2_tell()),
  bytestream2_get_bytes_left() / 2);
-init_get_bits(>gb, s->bitstream_buf, 
8*(bytestream2_get_bytes_left()));
+bitstream_init8(>bc, s->bitstream_buf, bytestream2_get_bytes_left());
 
 for (s->mb_y=0; s->mb_y < (avctx->height+15)/16; s->mb_y++)
 for (s->mb_x=0; s->mb_x < (avctx->width +15)/16; s->mb_x++)
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 18/35] dvdsubdec: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/dvdsubdec.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index 86c2873..b02bb6b 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c
@@ -20,7 +20,7 @@
  */
 
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 #include "libavutil/attributes.h"
@@ -50,13 +50,13 @@ static void yuv_a_to_rgba(const uint8_t *ycbcr, const 
uint8_t *alpha, uint32_t *
 }
 }
 
-static int decode_run_2bit(GetBitContext *gb, int *color)
+static int decode_run_2bit(BitstreamContext *bc, int *color)
 {
 unsigned int v, t;
 
 v = 0;
 for (t = 1; v < t && t <= 0x40; t <<= 2)
-v = (v << 4) | get_bits(gb, 4);
+v = (v << 4) | bitstream_read(bc, 4);
 *color = v & 3;
 if (v < 4) { /* Code for fill rest of line */
 return INT_MAX;
@@ -64,23 +64,23 @@ static int decode_run_2bit(GetBitContext *gb, int *color)
 return v >> 2;
 }
 
-static int decode_run_8bit(GetBitContext *gb, int *color)
+static int decode_run_8bit(BitstreamContext *bc, int *color)
 {
 int len;
-int has_run = get_bits1(gb);
-if (get_bits1(gb))
-*color = get_bits(gb, 8);
+int has_run = bitstream_read_bit(bc);
+if (bitstream_read_bit(bc))
+*color = bitstream_read(bc, 8);
 else
-*color = get_bits(gb, 2);
+*color = bitstream_read(bc, 2);
 if (has_run) {
-if (get_bits1(gb)) {
-len = get_bits(gb, 7);
+if (bitstream_read_bit(bc)) {
+len = bitstream_read(bc, 7);
 if (len == 0)
 len = INT_MAX;
 else
 len += 9;
 } else
-len = get_bits(gb, 3) + 2;
+len = bitstream_read(bc, 3) + 2;
 } else
 len = 1;
 return len;
@@ -89,24 +89,24 @@ static int decode_run_8bit(GetBitContext *gb, int *color)
 static int decode_rle(uint8_t *bitmap, int linesize, int w, int h,
   const uint8_t *buf, int start, int buf_size, int is_8bit)
 {
-GetBitContext gb;
+BitstreamContext bc;
 int bit_len;
 int x, y, len, color;
 uint8_t *d;
 
 bit_len = (buf_size - start) * 8;
-init_get_bits(, buf + start, bit_len);
+bitstream_init(, buf + start, bit_len);
 
 x = 0;
 y = 0;
 d = bitmap;
 for(;;) {
-if (get_bits_count() > bit_len)
+if (bitstream_tell() > bit_len)
 return -1;
 if (is_8bit)
-len = decode_run_8bit(, );
+len = decode_run_8bit(, );
 else
-len = decode_run_2bit(, );
+len = decode_run_2bit(, );
 len = FFMIN(len, w - x);
 memset(d + x, color, len);
 x += len;
@@ -117,7 +117,7 @@ static int decode_rle(uint8_t *bitmap, int linesize, int w, 
int h,
 d += linesize;
 x = 0;
 /* byte align */
-align_get_bits();
+bitstream_align();
 }
 }
 return 0;
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 05/35] on2avc: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/on2avc.c | 64 +++--
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/libavcodec/on2avc.c b/libavcodec/on2avc.c
index 2a528c6..1b81980 100644
--- a/libavcodec/on2avc.c
+++ b/libavcodec/on2avc.c
@@ -22,11 +22,13 @@
 
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
+
 #include "avcodec.h"
+#include "bitstream.h"
 #include "bytestream.h"
 #include "fft.h"
-#include "get_bits.h"
 #include "internal.h"
+#include "vlc.h"
 
 #include "on2avcdata.h"
 
@@ -84,11 +86,11 @@ typedef struct On2AVCContext {
 DECLARE_ALIGNED(32, float, short_win)[ON2AVC_SUBFRAME_SIZE / 8];
 } On2AVCContext;
 
-static void on2avc_read_ms_info(On2AVCContext *c, GetBitContext *gb)
+static void on2avc_read_ms_info(On2AVCContext *c, BitstreamContext *bc)
 {
 int w, b, band_off = 0;
 
-c->ms_present = get_bits1(gb);
+c->ms_present = bitstream_read_bit(bc);
 if (!c->ms_present)
 return;
 for (w = 0; w < c->num_windows; w++) {
@@ -100,12 +102,12 @@ static void on2avc_read_ms_info(On2AVCContext *c, 
GetBitContext *gb)
 continue;
 }
 for (b = 0; b < c->num_bands; b++)
-c->ms_info[band_off++] = get_bits1(gb);
+c->ms_info[band_off++] = bitstream_read_bit(bc);
 }
 }
 
 // do not see Table 17 in ISO/IEC 13818-7
-static int on2avc_decode_band_types(On2AVCContext *c, GetBitContext *gb)
+static int on2avc_decode_band_types(On2AVCContext *c, BitstreamContext *bc)
 {
 int bits_per_sect = c->is_long ? 5 : 3;
 int esc_val = (1 << bits_per_sect) - 1;
@@ -113,10 +115,10 @@ static int on2avc_decode_band_types(On2AVCContext *c, 
GetBitContext *gb)
 int band = 0, i, band_type, run_len, run;
 
 while (band < num_bands) {
-band_type = get_bits(gb, 4);
+band_type = bitstream_read(bc, 4);
 run_len   = 1;
 do {
-run = get_bits(gb, bits_per_sect);
+run = bitstream_read(bc, bits_per_sect);
 run_len += run;
 } while (run == esc_val);
 if (band + run_len > num_bands) {
@@ -135,7 +137,7 @@ static int on2avc_decode_band_types(On2AVCContext *c, 
GetBitContext *gb)
 
 // completely not like Table 18 in ISO/IEC 13818-7
 // (no intensity stereo, different coding for the first coefficient)
-static int on2avc_decode_band_scales(On2AVCContext *c, GetBitContext *gb)
+static int on2avc_decode_band_scales(On2AVCContext *c, BitstreamContext *bc)
 {
 int w, w2, b, scale, first = 1;
 int band_off = 0;
@@ -165,10 +167,10 @@ static int on2avc_decode_band_scales(On2AVCContext *c, 
GetBitContext *gb)
 }
 }
 if (first) {
-scale = get_bits(gb, 7);
+scale = bitstream_read(bc, 7);
 first = 0;
 } else {
-scale += get_vlc2(gb, c->scale_diff.table, 9, 3) - 60;
+scale += bitstream_read_vlc(bc, c->scale_diff.table, 9, 3) - 
60;
 }
 if (scale < 0 || scale > 127) {
 av_log(c->avctx, AV_LOG_ERROR, "Invalid scale value %d\n",
@@ -188,13 +190,13 @@ static inline float on2avc_scale(int v, float scale)
 }
 
 // spectral data is coded completely differently - there are no unsigned 
codebooks
-static int on2avc_decode_quads(On2AVCContext *c, GetBitContext *gb, float *dst,
+static int on2avc_decode_quads(On2AVCContext *c, BitstreamContext *bc, float 
*dst,
int dst_size, int type, float band_scale)
 {
 int i, j, val, val1;
 
 for (i = 0; i < dst_size; i += 4) {
-val = get_vlc2(gb, c->cb_vlc[type].table, 9, 3);
+val = bitstream_read_vlc(bc, c->cb_vlc[type].table, 9, 3);
 
 for (j = 0; j < 4; j++) {
 val1 = sign_extend((val >> (12 - j * 4)) & 0xF, 4);
@@ -205,11 +207,11 @@ static int on2avc_decode_quads(On2AVCContext *c, 
GetBitContext *gb, float *dst,
 return 0;
 }
 
-static inline int get_egolomb(GetBitContext *gb)
+static inline int get_egolomb(BitstreamContext *bc)
 {
 int v = 4;
 
-while (get_bits1(gb)) {
+while (bitstream_read_bit(bc)) {
 v++;
 if (v > 30) {
 av_log(NULL, AV_LOG_WARNING, "Too large golomb code in 
get_egolomb.\n");
@@ -218,27 +220,27 @@ static inline int get_egolomb(GetBitContext *gb)
 }
 }
 
-return (1 << v) + get_bits_long(gb, v);
+return (1 << v) + bitstream_read(bc, v);
 }
 
-static int on2avc_decode_pairs(On2AVCContext *c, GetBitContext *gb, float *dst,
+static int on2avc_decode_pairs(On2AVCContext *c, BitstreamContext *bc, float 
*dst,
int dst_size, int type, float band_scale)
 {
 int i, val, val1, val2, sign;
 
 for (i = 0; i < dst_size; i += 2) {
-val = get_vlc2(gb, c->cb_vlc[type].table, 9, 3);
+val = bitstream_read_vlc(bc, c->cb_vlc[type].table, 9, 3);
 
 val1 = 

[libav-devel] [PATCH 12/35] bink: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/bink.c | 264 ++
 1 file changed, 128 insertions(+), 136 deletions(-)

diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index 7433697..74db80a 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -28,8 +28,8 @@
 #include "avcodec.h"
 #include "binkdata.h"
 #include "binkdsp.h"
+#include "bitstream.h"
 #include "blockdsp.h"
-#include "get_bits.h"
 #include "hpeldsp.h"
 #include "internal.h"
 #include "mathops.h"
@@ -93,8 +93,9 @@ typedef struct Tree {
 uint8_t syms[16]; ///< leaf value to symbol mapping
 } Tree;
 
-#define GET_HUFF(gb, tree)  (tree).syms[get_vlc2(gb, 
bink_trees[(tree).vlc_num].table,\
- 
bink_trees[(tree).vlc_num].bits, 1)]
+#define GET_HUFF(bc, tree)\
+(tree).syms[bitstream_read_vlc(bc, bink_trees[(tree).vlc_num].table,  \
+   bink_trees[(tree).vlc_num].bits, 1)]
 
 /**
  * data structure used for decoding single Bink data type
@@ -204,18 +205,18 @@ static av_cold void free_bundles(BinkContext *c)
 /**
  * Merge two consequent lists of equal size depending on bits read.
  *
- * @param gb   context for reading bits
+ * @param bc   context for reading bits
  * @param dst  buffer where merged list will be written to
  * @param src  pointer to the head of the first list (the second lists starts 
at src+size)
  * @param size input lists size
  */
-static void merge(GetBitContext *gb, uint8_t *dst, uint8_t *src, int size)
+static void merge(BitstreamContext *bc, uint8_t *dst, uint8_t *src, int size)
 {
 uint8_t *src2 = src + size;
 int size2 = size;
 
 do {
-if (!get_bits1(gb)) {
+if (!bitstream_read_bit(bc)) {
 *dst++ = *src++;
 size--;
 } else {
@@ -233,37 +234,37 @@ static void merge(GetBitContext *gb, uint8_t *dst, 
uint8_t *src, int size)
 /**
  * Read information about Huffman tree used to decode data.
  *
- * @param gb   context for reading bits
+ * @param bc   context for reading bits
  * @param tree pointer for storing tree data
  */
-static void read_tree(GetBitContext *gb, Tree *tree)
+static void read_tree(BitstreamContext *bc, Tree *tree)
 {
 uint8_t tmp1[16] = { 0 }, tmp2[16], *in = tmp1, *out = tmp2;
 int i, t, len;
 
-tree->vlc_num = get_bits(gb, 4);
+tree->vlc_num = bitstream_read(bc, 4);
 if (!tree->vlc_num) {
 for (i = 0; i < 16; i++)
 tree->syms[i] = i;
 return;
 }
-if (get_bits1(gb)) {
-len = get_bits(gb, 3);
+if (bitstream_read_bit(bc)) {
+len = bitstream_read(bc, 3);
 for (i = 0; i <= len; i++) {
-tree->syms[i] = get_bits(gb, 4);
+tree->syms[i] = bitstream_read(bc, 4);
 tmp1[tree->syms[i]] = 1;
 }
 for (i = 0; i < 16 && len < 16 - 1; i++)
 if (!tmp1[i])
 tree->syms[++len] = i;
 } else {
-len = get_bits(gb, 2);
+len = bitstream_read(bc, 2);
 for (i = 0; i < 16; i++)
 in[i] = i;
 for (i = 0; i <= len; i++) {
 int size = 1 << i;
 for (t = 0; t < 16; t += size << 1)
-merge(gb, out + t, in + t, size);
+merge(bc, out + t, in + t, size);
 FFSWAP(uint8_t*, in, out);
 }
 memcpy(tree->syms, in, 16);
@@ -273,21 +274,21 @@ static void read_tree(GetBitContext *gb, Tree *tree)
 /**
  * Prepare bundle for decoding data.
  *
- * @param gb  context for reading bits
+ * @param bc  context for reading bits
  * @param c   decoder context
  * @param bundle_num  number of the bundle to initialize
  */
-static void read_bundle(GetBitContext *gb, BinkContext *c, int bundle_num)
+static void read_bundle(BitstreamContext *bc, BinkContext *c, int bundle_num)
 {
 int i;
 
 if (bundle_num == BINK_SRC_COLORS) {
 for (i = 0; i < 16; i++)
-read_tree(gb, >col_high[i]);
+read_tree(bc, >col_high[i]);
 c->col_lastval = 0;
 }
 if (bundle_num != BINK_SRC_INTRA_DC && bundle_num != BINK_SRC_INTER_DC)
-read_tree(gb, >bundle[bundle_num].tree);
+read_tree(bc, >bundle[bundle_num].tree);
 c->bundle[bundle_num].cur_dec =
 c->bundle[bundle_num].cur_ptr = c->bundle[bundle_num].data;
 }
@@ -295,66 +296,64 @@ static void read_bundle(GetBitContext *gb, BinkContext 
*c, int bundle_num)
 /**
  * common check before starting decoding bundle data
  *
- * @param gb context for reading bits
+ * @param bc context for reading bits
  * @param b  bundle
  * @param t  variable where number of elements to decode will be stored
  */
-#define CHECK_READ_VAL(gb, b, t) \
+#define CHECK_READ_VAL(bc, b, t) \
 if (!b->cur_dec || (b->cur_dec > b->cur_ptr)) \
 return 0; \
-t = get_bits(gb, b->len); \
+t = 

[libav-devel] [PATCH 17/35] dss_sp: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/dss_sp.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/libavcodec/dss_sp.c b/libavcodec/dss_sp.c
index 20b0528..44d98d8 100644
--- a/libavcodec/dss_sp.c
+++ b/libavcodec/dss_sp.c
@@ -25,7 +25,7 @@
 #include "libavutil/opt.h"
 
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 #define SUBFRAMES 4
@@ -302,7 +302,7 @@ static av_cold int dss_sp_decode_init(AVCodecContext *avctx)
 
 static void dss_sp_unpack_coeffs(DssSpContext *p, const uint8_t *src)
 {
-GetBitContext gb;
+BitstreamContext bc;
 DssSpFrame *fparam = >fparam;
 int i;
 int subframe_idx;
@@ -315,24 +315,24 @@ static void dss_sp_unpack_coeffs(DssSpContext *p, const 
uint8_t *src)
 p->bits[i + 1] = src[i];
 }
 
-init_get_bits(, p->bits, DSS_SP_FRAME_SIZE * 8);
+bitstream_init(, p->bits, DSS_SP_FRAME_SIZE * 8);
 
 for (i = 0; i < 2; i++)
-fparam->filter_idx[i] = get_bits(, 5);
+fparam->filter_idx[i] = bitstream_read(, 5);
 for (; i < 8; i++)
-fparam->filter_idx[i] = get_bits(, 4);
+fparam->filter_idx[i] = bitstream_read(, 4);
 for (; i < 14; i++)
-fparam->filter_idx[i] = get_bits(, 3);
+fparam->filter_idx[i] = bitstream_read(, 3);
 
 for (subframe_idx = 0; subframe_idx < 4; subframe_idx++) {
-fparam->sf_adaptive_gain[subframe_idx] = get_bits(, 5);
+fparam->sf_adaptive_gain[subframe_idx] = bitstream_read(, 5);
 
-fparam->sf[subframe_idx].combined_pulse_pos = get_bits_long(, 31);
+fparam->sf[subframe_idx].combined_pulse_pos = bitstream_read(, 31);
 
-fparam->sf[subframe_idx].gain = get_bits(, 6);
+fparam->sf[subframe_idx].gain = bitstream_read(, 6);
 
 for (i = 0; i < 7; i++)
-fparam->sf[subframe_idx].pulse_val[i] = get_bits(, 3);
+fparam->sf[subframe_idx].pulse_val[i] = bitstream_read(, 3);
 }
 
 for (subframe_idx = 0; subframe_idx < 4; subframe_idx++) {
@@ -394,7 +394,7 @@ static void dss_sp_unpack_coeffs(DssSpContext *p, const 
uint8_t *src)
 }
 }
 
-combined_pitch = get_bits(, 24);
+combined_pitch = bitstream_read(, 24);
 
 fparam->pitch_lag[0] = (combined_pitch % 151) + 36;
 
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 15/35] cljrdec: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/cljrdec.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavcodec/cljrdec.c b/libavcodec/cljrdec.c
index 33d8023..833707b 100644
--- a/libavcodec/cljrdec.c
+++ b/libavcodec/cljrdec.c
@@ -25,7 +25,7 @@
  */
 
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 static int decode_frame(AVCodecContext *avctx,
@@ -34,7 +34,7 @@ static int decode_frame(AVCodecContext *avctx,
 {
 const uint8_t *buf = avpkt->data;
 int buf_size   = avpkt->size;
-GetBitContext gb;
+BitstreamContext bc;
 AVFrame * const p = data;
 int x, y, ret;
 
@@ -56,20 +56,20 @@ static int decode_frame(AVCodecContext *avctx,
 p->pict_type = AV_PICTURE_TYPE_I;
 p->key_frame = 1;
 
-init_get_bits(, buf, buf_size * 8);
+bitstream_init(, buf, buf_size * 8);
 
 for (y = 0; y < avctx->height; y++) {
 uint8_t *luma = >data[0][y * p->linesize[0]];
 uint8_t *cb   = >data[1][y * p->linesize[1]];
 uint8_t *cr   = >data[2][y * p->linesize[2]];
 for (x = 0; x < avctx->width; x += 4) {
-luma[3] = get_bits(, 5) << 3;
-luma[2] = get_bits(, 5) << 3;
-luma[1] = get_bits(, 5) << 3;
-luma[0] = get_bits(, 5) << 3;
+luma[3] = bitstream_read(, 5) << 3;
+luma[2] = bitstream_read(, 5) << 3;
+luma[1] = bitstream_read(, 5) << 3;
+luma[0] = bitstream_read(, 5) << 3;
 luma += 4;
-*(cb++) = get_bits(, 6) << 2;
-*(cr++) = get_bits(, 6) << 2;
+*(cb++) = bitstream_read(, 6) << 2;
+*(cr++) = bitstream_read(, 6) << 2;
 }
 }
 
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 10/35] atrac3plus: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/atrac3plus.c| 525 +++--
 libavcodec/atrac3plus.h|   7 +-
 libavcodec/atrac3plusdec.c |  15 +-
 3 files changed, 275 insertions(+), 272 deletions(-)

diff --git a/libavcodec/atrac3plus.c b/libavcodec/atrac3plus.c
index 076fb84..2731a80 100644
--- a/libavcodec/atrac3plus.c
+++ b/libavcodec/atrac3plus.c
@@ -26,8 +26,9 @@
  */
 
 #include "libavutil/avassert.h"
+
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "atrac3plus.h"
 #include "atrac3plus_data.h"
 
@@ -212,20 +213,20 @@ av_cold void ff_atrac3p_init_vlcs(AVCodec *codec)
 /**
  * Decode number of coded quantization units.
  *
- * @param[in] gbthe GetBit context
+ * @param[in] bcthe Bitstream context
  * @param[in,out] chan  ptr to the channel parameters
  * @param[in,out] ctx   ptr to the channel unit context
  * @param[in] avctx ptr to the AVCodecContext
  * @return result code: 0 = OK, otherwise - error code
  */
-static int num_coded_units(GetBitContext *gb, Atrac3pChanParams *chan,
+static int num_coded_units(BitstreamContext *bc, Atrac3pChanParams *chan,
Atrac3pChanUnitCtx *ctx, AVCodecContext *avctx)
 {
-chan->fill_mode = get_bits(gb, 2);
+chan->fill_mode = bitstream_read(bc, 2);
 if (!chan->fill_mode) {
 chan->num_coded_vals = ctx->num_quant_units;
 } else {
-chan->num_coded_vals = get_bits(gb, 5);
+chan->num_coded_vals = bitstream_read(bc, 5);
 if (chan->num_coded_vals > ctx->num_quant_units) {
 av_log(avctx, AV_LOG_ERROR,
"Invalid number of transmitted units!\n");
@@ -233,7 +234,7 @@ static int num_coded_units(GetBitContext *gb, 
Atrac3pChanParams *chan,
 }
 
 if (chan->fill_mode == 3)
-chan->split_point = get_bits(gb, 2) + (chan->ch_num << 1) + 1;
+chan->split_point = bitstream_read(bc, 2) + (chan->ch_num << 1) + 
1;
 }
 
 return 0;
@@ -318,21 +319,21 @@ static inline void unpack_vq_shape(int start_val, const 
int8_t *shape_vec,
 }
 }
 
-#define UNPACK_SF_VQ_SHAPE(gb, dst, num_vals)\
-start_val = get_bits((gb), 6);   \
-unpack_vq_shape(start_val, _sf_shapes[get_bits((gb), 6)][0], \
+#define UNPACK_SF_VQ_SHAPE(bc, dst, num_vals)  
\
+start_val = bitstream_read((bc), 6);   
\
+unpack_vq_shape(start_val, _sf_shapes[bitstream_read((bc), 6)][0], 
\
 (dst), (num_vals))
 
 /**
  * Decode word length for each quantization unit of a channel.
  *
- * @param[in] gbthe GetBit context
+ * @param[in] bcthe Bitstream context
  * @param[in,out] ctx   ptr to the channel unit context
  * @param[in] ch_numchannel to process
  * @param[in] avctx ptr to the AVCodecContext
  * @return result code: 0 = OK, otherwise - error code
  */
-static int decode_channel_wordlen(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
+static int decode_channel_wordlen(BitstreamContext *bc, Atrac3pChanUnitCtx 
*ctx,
   int ch_num, AVCodecContext *avctx)
 {
 int i, weight_idx = 0, delta, diff, pos, delta_bits, min_val, flag,
@@ -343,107 +344,107 @@ static int decode_channel_wordlen(GetBitContext *gb, 
Atrac3pChanUnitCtx *ctx,
 
 chan->fill_mode = 0;
 
-switch (get_bits(gb, 2)) { /* switch according to coding mode */
+switch (bitstream_read(bc, 2)) { /* switch according to coding mode */
 case 0: /* coded using constant number of bits */
 for (i = 0; i < ctx->num_quant_units; i++)
-chan->qu_wordlen[i] = get_bits(gb, 3);
+chan->qu_wordlen[i] = bitstream_read(bc, 3);
 break;
 case 1:
 if (ch_num) {
-if ((ret = num_coded_units(gb, chan, ctx, avctx)) < 0)
+if ((ret = num_coded_units(bc, chan, ctx, avctx)) < 0)
 return ret;
 
 if (chan->num_coded_vals) {
-vlc_tab = _vlc_tabs[get_bits(gb, 2)];
+vlc_tab = _vlc_tabs[bitstream_read(bc, 2)];
 
 for (i = 0; i < chan->num_coded_vals; i++) {
-delta = get_vlc2(gb, vlc_tab->table, vlc_tab->bits, 1);
+delta = bitstream_read_vlc(bc, vlc_tab->table, 
vlc_tab->bits, 1);
 chan->qu_wordlen[i] = (ref_chan->qu_wordlen[i] + delta) & 
7;
 }
 }
 } else {
-weight_idx = get_bits(gb, 2);
-if ((ret = num_coded_units(gb, chan, ctx, avctx)) < 0)
+weight_idx = bitstream_read(bc, 2);
+if ((ret = num_coded_units(bc, chan, ctx, avctx)) < 0)
 return ret;
 
 if (chan->num_coded_vals) {
-pos = get_bits(gb, 5);

[libav-devel] [PATCH 02/35] cllc: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/cllc.c | 88 +++
 1 file changed, 36 insertions(+), 52 deletions(-)

diff --git a/libavcodec/cllc.c b/libavcodec/cllc.c
index cdbed74..bac2b73 100644
--- a/libavcodec/cllc.c
+++ b/libavcodec/cllc.c
@@ -23,11 +23,13 @@
 #include 
 
 #include "libavutil/intreadwrite.h"
+
+#include "bitstream.h"
 #include "bswapdsp.h"
 #include "canopus.h"
-#include "get_bits.h"
 #include "avcodec.h"
 #include "internal.h"
+#include "vlc.h"
 
 typedef struct CLLCContext {
 AVCodecContext *avctx;
@@ -37,7 +39,7 @@ typedef struct CLLCContext {
 int  swapped_buf_size;
 } CLLCContext;
 
-static int read_code_table(CLLCContext *ctx, GetBitContext *gb, VLC *vlc)
+static int read_code_table(CLLCContext *ctx, BitstreamContext *bc, VLC *vlc)
 {
 uint8_t symbols[256];
 uint8_t bits[256];
@@ -49,10 +51,10 @@ static int read_code_table(CLLCContext *ctx, GetBitContext 
*gb, VLC *vlc)
 count = 0;
 num_codes_sum = 0;
 
-num_lens = get_bits(gb, 5);
+num_lens = bitstream_read(bc, 5);
 
 for (i = 0; i < num_lens; i++) {
-num_codes  = get_bits(gb, 9);
+num_codes  = bitstream_read(bc, 9);
 num_codes_sum += num_codes;
 
 if (num_codes_sum > 256) {
@@ -64,7 +66,7 @@ static int read_code_table(CLLCContext *ctx, GetBitContext 
*gb, VLC *vlc)
 }
 
 for (j = 0; j < num_codes; j++) {
-symbols[count] = get_bits(gb, 8);
+symbols[count] = bitstream_read(bc, 8);
 bits[count]= i + 1;
 codes[count]   = prefix++;
 
@@ -82,7 +84,7 @@ static int read_code_table(CLLCContext *ctx, GetBitContext 
*gb, VLC *vlc)
  * Unlike the RGB24 read/restore, which reads in a component at a time,
  * ARGB read/restore reads in ARGB quads.
  */
-static int read_argb_line(CLLCContext *ctx, GetBitContext *gb, int *top_left,
+static int read_argb_line(CLLCContext *ctx, BitstreamContext *bc, int 
*top_left,
   VLC *vlc, uint8_t *outbuf)
 {
 uint8_t *dst;
@@ -90,8 +92,6 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext 
*gb, int *top_left,
 int code;
 int i;
 
-OPEN_READER(bits, gb);
-
 dst = outbuf;
 pred[0] = top_left[0];
 pred[1] = top_left[1];
@@ -100,8 +100,7 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext 
*gb, int *top_left,
 
 for (i = 0; i < ctx->avctx->width; i++) {
 /* Always get the alpha component */
-UPDATE_CACHE(bits, gb);
-GET_VLC(code, bits, gb, vlc[0].table, 7, 2);
+code = bitstream_read_vlc(bc, vlc[0].table, 7, 2);
 
 pred[0] += code;
 dst[0]   = pred[0];
@@ -109,22 +108,19 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext 
*gb, int *top_left,
 /* Skip the components if they are  entirely transparent */
 if (dst[0]) {
 /* Red */
-UPDATE_CACHE(bits, gb);
-GET_VLC(code, bits, gb, vlc[1].table, 7, 2);
+code = bitstream_read_vlc(bc, vlc[1].table, 7, 2);
 
 pred[1] += code;
 dst[1]   = pred[1];
 
 /* Green */
-UPDATE_CACHE(bits, gb);
-GET_VLC(code, bits, gb, vlc[2].table, 7, 2);
+code = bitstream_read_vlc(bc, vlc[2].table, 7, 2);
 
 pred[2] += code;
 dst[2]   = pred[2];
 
 /* Blue */
-UPDATE_CACHE(bits, gb);
-GET_VLC(code, bits, gb, vlc[3].table, 7, 2);
+code = bitstream_read_vlc(bc, vlc[3].table, 7, 2);
 
 pred[3] += code;
 dst[3]   = pred[3];
@@ -137,8 +133,6 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext 
*gb, int *top_left,
 dst += 4;
 }
 
-CLOSE_READER(bits, gb);
-
 top_left[0]  = outbuf[0];
 
 /* Only stash components if they are not transparent */
@@ -151,65 +145,55 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext 
*gb, int *top_left,
 return 0;
 }
 
-static int read_rgb24_component_line(CLLCContext *ctx, GetBitContext *gb,
+static int read_rgb24_component_line(CLLCContext *ctx, BitstreamContext *bc,
  int *top_left, VLC *vlc, uint8_t *outbuf)
 {
 uint8_t *dst;
 int pred, code;
 int i;
 
-OPEN_READER(bits, gb);
-
 dst  = outbuf;
 pred = *top_left;
 
 /* Simultaneously read and restore the line */
 for (i = 0; i < ctx->avctx->width; i++) {
-UPDATE_CACHE(bits, gb);
-GET_VLC(code, bits, gb, vlc->table, 7, 2);
+code = bitstream_read_vlc(bc, vlc->table, 7, 2);
 
 pred  += code;
 dst[0] = pred;
 dst   += 3;
 }
 
-CLOSE_READER(bits, gb);
-
 /* Stash the first pixel */
 *top_left = outbuf[0];
 
 return 0;
 }
 
-static int read_yuv_component_line(CLLCContext *ctx, GetBitContext *gb,
+static int read_yuv_component_line(CLLCContext *ctx, 

[libav-devel] [PATCH 07/35] adpcm: Convert to the new bitstream header

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/adpcm.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c
index 3ab16dd..fe51c0d 100644
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c
@@ -29,8 +29,9 @@
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "put_bits.h"
 #include "bytestream.h"
 #include "adpcm.h"
@@ -366,32 +367,33 @@ static int xa_decode(AVCodecContext *avctx, int16_t 
*out0, int16_t *out1,
 static void adpcm_swf_decode(AVCodecContext *avctx, const uint8_t *buf, int 
buf_size, int16_t *samples)
 {
 ADPCMDecodeContext *c = avctx->priv_data;
-GetBitContext gb;
+BitstreamContext bc;
 const int *table;
 int k0, signmask, nb_bits, count;
 int size = buf_size*8;
 int i;
 
-init_get_bits(, buf, size);
+bitstream_init(, buf, size);
 
 //read bits & initial values
-nb_bits = get_bits(, 2)+2;
+nb_bits = bitstream_read(, 2)+2;
 table = swf_index_tables[nb_bits-2];
 k0 = 1 << (nb_bits-2);
 signmask = 1 << (nb_bits-1);
 
-while (get_bits_count() <= size - 22*avctx->channels) {
+while (bitstream_tell() <= size - 22 * avctx->channels) {
 for (i = 0; i < avctx->channels; i++) {
-*samples++ = c->status[i].predictor = get_sbits(, 16);
-c->status[i].step_index = get_bits(, 6);
+*samples++  =
+c->status[i].predictor  = bitstream_read_signed(, 16);
+c->status[i].step_index = bitstream_read(, 6);
 }
 
-for (count = 0; get_bits_count() <= size - nb_bits*avctx->channels 
&& count < 4095; count++) {
+for (count = 0; bitstream_tell() <= size - nb_bits * 
avctx->channels && count < 4095; count++) {
 int i;
 
 for (i = 0; i < avctx->channels; i++) {
 // similar to IMA adpcm
-int delta = get_bits(, nb_bits);
+int delta = bitstream_read(, nb_bits);
 int step = ff_adpcm_step_table[c->status[i].step_index];
 long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
 int k = k0;
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 08/35] asvdec: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/asv.h|  4 ++--
 libavcodec/asvdec.c | 54 ++---
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/libavcodec/asv.h b/libavcodec/asv.h
index 18f7a95..7c4e4fd 100644
--- a/libavcodec/asv.h
+++ b/libavcodec/asv.h
@@ -31,11 +31,11 @@
 #include "libavutil/mem.h"
 
 #include "avcodec.h"
+#include "bitstream.h"
 #include "blockdsp.h"
 #include "bswapdsp.h"
 #include "fdctdsp.h"
 #include "idctdsp.h"
-#include "get_bits.h"
 #include "pixblockdsp.h"
 #include "put_bits.h"
 
@@ -47,7 +47,7 @@ typedef struct ASV1Context {
 IDCTDSPContext idsp;
 PixblockDSPContext pdsp;
 PutBitContext pb;
-GetBitContext gb;
+BitstreamContext bc;
 ScanTable scantable;
 int inv_qscale;
 int mb_width;
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index f17f064..cbda63d 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -70,27 +70,27 @@ static av_cold void init_vlcs(ASV1Context *a)
 }
 
 // FIXME write a reversed bitstream reader to avoid the double reverse
-static inline int asv2_get_bits(GetBitContext *gb, int n)
+static inline int asv2_get_bits(BitstreamContext *bc, int n)
 {
-return ff_reverse[get_bits(gb, n) << (8 - n)];
+return ff_reverse[bitstream_read(bc, n) << (8 - n)];
 }
 
-static inline int asv1_get_level(GetBitContext *gb)
+static inline int asv1_get_level(BitstreamContext *bc)
 {
-int code = get_vlc2(gb, level_vlc.table, VLC_BITS, 1);
+int code = bitstream_read_vlc(bc, level_vlc.table, VLC_BITS, 1);
 
 if (code == 3)
-return get_sbits(gb, 8);
+return bitstream_read_signed(bc, 8);
 else
 return code - 3;
 }
 
-static inline int asv2_get_level(GetBitContext *gb)
+static inline int asv2_get_level(BitstreamContext *bc)
 {
-int code = get_vlc2(gb, asv2_level_vlc.table, ASV2_LEVEL_VLC_BITS, 1);
+int code = bitstream_read_vlc(bc, asv2_level_vlc.table, 
ASV2_LEVEL_VLC_BITS, 1);
 
 if (code == 31)
-return (int8_t) asv2_get_bits(gb, 8);
+return (int8_t) asv2_get_bits(bc, 8);
 else
 return code - 31;
 }
@@ -99,10 +99,10 @@ static inline int asv1_decode_block(ASV1Context *a, int16_t 
block[64])
 {
 int i;
 
-block[0] = 8 * get_bits(>gb, 8);
+block[0] = 8 * bitstream_read(>bc, 8);
 
 for (i = 0; i < 11; i++) {
-const int ccp = get_vlc2(>gb, ccp_vlc.table, VLC_BITS, 1);
+const int ccp = bitstream_read_vlc(>bc, ccp_vlc.table, VLC_BITS, 1);
 
 if (ccp) {
 if (ccp == 16)
@@ -113,13 +113,13 @@ static inline int asv1_decode_block(ASV1Context *a, 
int16_t block[64])
 }
 
 if (ccp & 8)
-block[a->scantable.permutated[4 * i + 0]] = 
(asv1_get_level(>gb) * a->intra_matrix[4 * i + 0]) >> 4;
+block[a->scantable.permutated[4 * i + 0]] = 
(asv1_get_level(>bc) * a->intra_matrix[4 * i + 0]) >> 4;
 if (ccp & 4)
-block[a->scantable.permutated[4 * i + 1]] = 
(asv1_get_level(>gb) * a->intra_matrix[4 * i + 1]) >> 4;
+block[a->scantable.permutated[4 * i + 1]] = 
(asv1_get_level(>bc) * a->intra_matrix[4 * i + 1]) >> 4;
 if (ccp & 2)
-block[a->scantable.permutated[4 * i + 2]] = 
(asv1_get_level(>gb) * a->intra_matrix[4 * i + 2]) >> 4;
+block[a->scantable.permutated[4 * i + 2]] = 
(asv1_get_level(>bc) * a->intra_matrix[4 * i + 2]) >> 4;
 if (ccp & 1)
-block[a->scantable.permutated[4 * i + 3]] = 
(asv1_get_level(>gb) * a->intra_matrix[4 * i + 3]) >> 4;
+block[a->scantable.permutated[4 * i + 3]] = 
(asv1_get_level(>bc) * a->intra_matrix[4 * i + 3]) >> 4;
 }
 }
 
@@ -130,32 +130,32 @@ static inline int asv2_decode_block(ASV1Context *a, 
int16_t block[64])
 {
 int i, count, ccp;
 
-count = asv2_get_bits(>gb, 4);
+count = asv2_get_bits(>bc, 4);
 
-block[0] = 8 * asv2_get_bits(>gb, 8);
+block[0] = 8 * asv2_get_bits(>bc, 8);
 
-ccp = get_vlc2(>gb, dc_ccp_vlc.table, VLC_BITS, 1);
+ccp = bitstream_read_vlc(>bc, dc_ccp_vlc.table, VLC_BITS, 1);
 if (ccp) {
 if (ccp & 4)
-block[a->scantable.permutated[1]] = (asv2_get_level(>gb) * 
a->intra_matrix[1]) >> 4;
+block[a->scantable.permutated[1]] = (asv2_get_level(>bc) * 
a->intra_matrix[1]) >> 4;
 if (ccp & 2)
-block[a->scantable.permutated[2]] = (asv2_get_level(>gb) * 
a->intra_matrix[2]) >> 4;
+block[a->scantable.permutated[2]] = (asv2_get_level(>bc) * 
a->intra_matrix[2]) >> 4;
 if (ccp & 1)
-block[a->scantable.permutated[3]] = (asv2_get_level(>gb) * 
a->intra_matrix[3]) >> 4;
+block[a->scantable.permutated[3]] = (asv2_get_level(>bc) * 
a->intra_matrix[3]) >> 4;
 }
 
 for (i = 1; i < count + 1; i++) {
-const int ccp = get_vlc2(>gb, ac_ccp_vlc.table, VLC_BITS, 1);
+   

[libav-devel] [PATCH 11/35] avs: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/avs.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/avs.c b/libavcodec/avs.c
index 0d127f8..bea01a2 100644
--- a/libavcodec/avs.c
+++ b/libavcodec/avs.c
@@ -20,7 +20,7 @@
  */
 
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 typedef struct AvsContext {
@@ -57,7 +57,7 @@ avs_decode_frame(AVCodecContext * avctx,
 int i, j, x, y, stride, ret, vect_w = 3, vect_h = 3;
 AvsVideoSubType sub_type;
 AvsBlockType type;
-GetBitContext change_map;
+BitstreamContext change_map;
 
 if ((ret = ff_reget_buffer(avctx, p)) < 0) {
 av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
@@ -125,13 +125,13 @@ avs_decode_frame(AVCodecContext * avctx,
 int map_size = ((318 / vect_w + 7) / 8) * (198 / vect_h);
 if (buf_end - table < map_size)
 return AVERROR_INVALIDDATA;
-init_get_bits(_map, table, map_size * 8);
+bitstream_init(_map, table, map_size * 8);
 table += map_size;
 }
 
 for (y=0; y<198; y+=vect_h) {
 for (x=0; x<318; x+=vect_w) {
-if (sub_type == AVS_I_FRAME || get_bits1(_map)) {
+if (sub_type == AVS_I_FRAME || bitstream_read_bit(_map)) {
 if (buf_end - table < 1)
 return AVERROR_INVALIDDATA;
 vect = [*table++ * (vect_w * vect_h)];
@@ -145,7 +145,7 @@ avs_decode_frame(AVCodecContext * avctx,
 }
 }
 if (sub_type != AVS_I_FRAME)
-align_get_bits(_map);
+bitstream_align(_map);
 }
 
 if ((ret = av_frame_ref(picture, p)) < 0)
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 13/35] binkaudio: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/binkaudio.c | 59 +-
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
index 2638eb2..cf61135 100644
--- a/libavcodec/binkaudio.c
+++ b/libavcodec/binkaudio.c
@@ -33,8 +33,8 @@
 
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
+#include "bitstream.h"
 #include "dct.h"
-#include "get_bits.h"
 #include "internal.h"
 #include "rdft.h"
 #include "wma_freqs.h"
@@ -45,7 +45,7 @@ static float quant_table[96];
 #define BINK_BLOCK_MAX_SIZE (MAX_CHANNELS << 11)
 
 typedef struct BinkAudioContext {
-GetBitContext gb;
+BitstreamContext bc;
 int version_b;  ///< Bink version 'b'
 int first;
 int channels;
@@ -143,11 +143,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
 return 0;
 }
 
-static float get_float(GetBitContext *gb)
+static float get_float(BitstreamContext *bc)
 {
-int power = get_bits(gb, 5);
-float f = ldexpf(get_bits_long(gb, 23), power - 23);
-if (get_bits1(gb))
+int power = bitstream_read(bc, 5);
+float f = ldexpf(bitstream_read(bc, 23), power - 23);
+if (bitstream_read_bit(bc))
 f = -f;
 return f;
 }
@@ -166,30 +166,30 @@ static int decode_block(BinkAudioContext *s, float **out, 
int use_dct)
 int ch, i, j, k;
 float q, quant[25];
 int width, coeff;
-GetBitContext *gb = >gb;
+BitstreamContext *bc = >bc;
 
 if (use_dct)
-skip_bits(gb, 2);
+bitstream_skip(bc, 2);
 
 for (ch = 0; ch < s->channels; ch++) {
 FFTSample *coeffs = out[ch];
 
 if (s->version_b) {
-if (get_bits_left(gb) < 64)
+if (bitstream_bits_left(bc) < 64)
 return AVERROR_INVALIDDATA;
-coeffs[0] = av_int2float(get_bits_long(gb, 32)) * s->root;
-coeffs[1] = av_int2float(get_bits_long(gb, 32)) * s->root;
+coeffs[0] = av_int2float(bitstream_read(bc, 32)) * s->root;
+coeffs[1] = av_int2float(bitstream_read(bc, 32)) * s->root;
 } else {
-if (get_bits_left(gb) < 58)
+if (bitstream_bits_left(bc) < 58)
 return AVERROR_INVALIDDATA;
-coeffs[0] = get_float(gb) * s->root;
-coeffs[1] = get_float(gb) * s->root;
+coeffs[0] = get_float(bc) * s->root;
+coeffs[1] = get_float(bc) * s->root;
 }
 
-if (get_bits_left(gb) < s->num_bands * 8)
+if (bitstream_bits_left(bc) < s->num_bands * 8)
 return AVERROR_INVALIDDATA;
 for (i = 0; i < s->num_bands; i++) {
-int value = get_bits(gb, 8);
+int value = bitstream_read(bc, 8);
 quant[i]  = quant_table[FFMIN(value, 95)];
 }
 
@@ -202,9 +202,9 @@ static int decode_block(BinkAudioContext *s, float **out, 
int use_dct)
 if (s->version_b) {
 j = i + 16;
 } else {
-int v = get_bits1(gb);
+int v = bitstream_read_bit(bc);
 if (v) {
-v = get_bits(gb, 4);
+v = bitstream_read(bc, 4);
 j = i + rle_length_tab[v] * 8;
 } else {
 j = i + 8;
@@ -213,7 +213,7 @@ static int decode_block(BinkAudioContext *s, float **out, 
int use_dct)
 
 j = FFMIN(j, s->frame_len);
 
-width = get_bits(gb, 4);
+width = bitstream_read(bc, 4);
 if (width == 0) {
 memset(coeffs + i, 0, (j - i) * sizeof(*coeffs));
 i = j;
@@ -223,10 +223,10 @@ static int decode_block(BinkAudioContext *s, float **out, 
int use_dct)
 while (i < j) {
 if (s->bands[k] == i)
 q = quant[k++];
-coeff = get_bits(gb, width);
+coeff = bitstream_read(bc, width);
 if (coeff) {
 int v;
-v = get_bits1(gb);
+v = bitstream_read_bit(bc);
 if (v)
 coeffs[i] = -q * coeff;
 else
@@ -278,10 +278,11 @@ static av_cold int decode_end(AVCodecContext *avctx)
 return 0;
 }
 
-static void get_bits_align32(GetBitContext *s)
+static void get_bits_align32(BitstreamContext *s)
 {
-int n = (-get_bits_count(s)) & 31;
-if (n) skip_bits(s, n);
+int n = (-bitstream_tell(s)) & 31;
+if (n)
+bitstream_skip(s, n);
 }
 
 static int decode_frame(AVCodecContext *avctx, void *data,
@@ -289,10 +290,10 @@ static int decode_frame(AVCodecContext *avctx, void *data,
 {
 BinkAudioContext *s = avctx->priv_data;
 AVFrame *frame  = data;
-GetBitContext *gb = >gb;
+BitstreamContext *bc = >bc;
 int ret, consumed = 0;
 
-if (!get_bits_left(gb)) {
+if 

[libav-devel] [PATCH 01/35] lavc: add a new bitstream reader to replace get_bits

2016-11-14 Thread Alexandra Hájková
The new bit reader features a simpler API and an implementation without
stacks of nested macros.
---
 libavcodec/bitstream.h | 387 +
 1 file changed, 387 insertions(+)
 create mode 100644 libavcodec/bitstream.h

diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h
new file mode 100644
index 000..996e32e
--- /dev/null
+++ b/libavcodec/bitstream.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * functions for reading bits from a buffer
+ */
+
+#ifndef AVCODEC_BITSTREAM_H
+#define AVCODEC_BITSTREAM_H
+
+#include 
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+
+#include "mathops.h"
+#include "vlc.h"
+
+typedef struct BitstreamContext {
+uint64_t bits;  // stores bits read from the buffer
+const uint8_t *buffer, *buffer_end;
+const uint8_t *ptr; // position inside a buffer
+unsigned bits_left; // number of bits left in bits field
+unsigned size_in_bits;
+} BitstreamContext;
+
+static inline void refill_64(BitstreamContext *bc)
+{
+if (bc->ptr >= bc->buffer_end)
+return;
+
+#ifdef BITSTREAM_READER_LE
+bc->bits   = AV_RL64(bc->ptr);
+#else
+bc->bits   = AV_RB64(bc->ptr);
+#endif
+bc->ptr   += 8;
+bc->bits_left  = 64;
+}
+
+static inline void refill_32(BitstreamContext *bc)
+{
+if (bc->ptr >= bc->buffer_end)
+return;
+
+#ifdef BITSTREAM_READER_LE
+bc->bits   = (uint64_t)AV_RL32(bc->ptr) << bc->bits_left | bc->bits;
+#else
+bc->bits   = bc->bits | (uint64_t)AV_RB32(bc->ptr) << (32 - 
bc->bits_left);
+#endif
+bc->ptr   += 4;
+bc->bits_left += 32;
+}
+
+/* Initialize BitstreamContext. Input buffer must have an additional zero
+ * padding of AV_INPUT_BUFFER_PADDING_SIZE bytes at the end. */
+static inline int bitstream_init(BitstreamContext *bc, const uint8_t *buffer,
+ unsigned bit_size)
+{
+unsigned buffer_size;
+
+if (bit_size > INT_MAX - 7 || !buffer) {
+buffer=
+bc->buffer=
+bc->ptr   = NULL;
+bc->bits_left = 0;
+return AVERROR_INVALIDDATA;
+}
+
+buffer_size = (bit_size + 7) >> 3;
+
+bc->buffer   = buffer;
+bc->buffer_end   = buffer + buffer_size;
+bc->ptr  = bc->buffer;
+bc->size_in_bits = bit_size;
+bc->bits_left= 0;
+bc->bits = 0;
+
+refill_64(bc);
+
+return 0;
+}
+
+/* Initialize BitstreamContext with buffer size in bytes instead of bits. */
+static inline int bitstream_init8(BitstreamContext *bc, const uint8_t *buffer,
+  unsigned byte_size)
+{
+if (byte_size > INT_MAX / 8)
+return AVERROR_INVALIDDATA;
+return bitstream_init(bc, buffer, byte_size * 8);
+}
+
+/* Return number of bits already read. */
+static inline int bitstream_tell(const BitstreamContext *bc)
+{
+return (bc->ptr - bc->buffer) * 8 - bc->bits_left;
+}
+
+/* Return buffer size in bits. */
+static inline int bitstream_tell_size(const BitstreamContext *bc)
+{
+return bc->size_in_bits;
+}
+
+/* Return the number of the bits left in a buffer. */
+static inline int bitstream_bits_left(const BitstreamContext *bc)
+{
+return (bc->buffer - bc->ptr) * 8 + bc->size_in_bits + bc->bits_left;
+}
+
+static inline uint64_t get_val(BitstreamContext *bc, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+uint64_t ret = bc->bits & ((UINT64_C(1) << n) - 1);
+bc->bits >>= n;
+#else
+uint64_t ret = bc->bits >> (64 - n);
+bc->bits <<= n;
+#endif
+bc->bits_left -= n;
+
+return ret;
+}
+
+/* Return one bit from the buffer. */
+static inline unsigned bitstream_read_bit(BitstreamContext *bc)
+{
+if (!bc->bits_left)
+refill_64(bc);
+
+return get_val(bc, 1);
+}
+
+/* Return n bits from the buffer. n has to be in the 0-63 range. */
+static inline uint64_t bitstream_read_63(Bi

[libav-devel] [PATCH 04/35] ea: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/eatgq.c | 34 +-
 libavcodec/eatgv.c | 22 +++---
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/libavcodec/eatgq.c b/libavcodec/eatgq.c
index 8355471..9abedcc 100644
--- a/libavcodec/eatgq.c
+++ b/libavcodec/eatgq.c
@@ -31,9 +31,9 @@
 #define BITSTREAM_READER_LE
 #include "aandcttab.h"
 #include "avcodec.h"
+#include "bitstream.h"
 #include "bytestream.h"
 #include "eaidct.h"
-#include "get_bits.h"
 #include "idctdsp.h"
 #include "internal.h"
 
@@ -58,44 +58,44 @@ static av_cold int tgq_decode_init(AVCodecContext *avctx)
 return 0;
 }
 
-static void tgq_decode_block(TgqContext *s, int16_t block[64], GetBitContext 
*gb)
+static void tgq_decode_block(TgqContext *s, int16_t block[64], 
BitstreamContext *bc)
 {
 uint8_t *perm = s->scantable.permutated;
 int i, j, value;
-block[0] = get_sbits(gb, 8) * s->qtable[0];
+block[0] = bitstream_read_signed(bc, 8) * s->qtable[0];
 for (i = 1; i < 64;) {
-switch (show_bits(gb, 3)) {
+switch (bitstream_peek(bc, 3)) {
 case 4:
 block[perm[i++]] = 0;
 case 0:
 block[perm[i++]] = 0;
-skip_bits(gb, 3);
+bitstream_skip(bc, 3);
 break;
 case 5:
 case 1:
-skip_bits(gb, 2);
-value = get_bits(gb, 6);
+bitstream_skip(bc, 2);
+value = bitstream_read(bc, 6);
 for (j = 0; j < value; j++)
 block[perm[i++]] = 0;
 break;
 case 6:
-skip_bits(gb, 3);
+bitstream_skip(bc, 3);
 block[perm[i]] = -s->qtable[perm[i]];
 i++;
 break;
 case 2:
-skip_bits(gb, 3);
+bitstream_skip(bc, 3);
 block[perm[i]] = s->qtable[perm[i]];
 i++;
 break;
 case 7: // 111b
 case 3: // 011b
-skip_bits(gb, 2);
-if (show_bits(gb, 6) == 0x3F) {
-skip_bits(gb, 6);
-block[perm[i]] = get_sbits(gb, 8) * s->qtable[perm[i]];
+bitstream_skip(bc, 2);
+if (bitstream_peek(bc, 6) == 0x3F) {
+bitstream_skip(bc, 6);
+block[perm[i]] = bitstream_read_signed(bc, 8) * 
s->qtable[perm[i]];
 } else {
-block[perm[i]] = get_sbits(gb, 6) * s->qtable[perm[i]];
+block[perm[i]] = bitstream_read_signed(bc, 6) * 
s->qtable[perm[i]];
 }
 i++;
 break;
@@ -156,10 +156,10 @@ static void tgq_decode_mb(TgqContext *s, AVFrame *frame, 
int mb_y, int mb_x)
 
 mode = bytestream2_get_byte(>gb);
 if (mode > 12) {
-GetBitContext gb;
-init_get_bits(, s->gb.buffer, FFMIN(s->gb.buffer_end - 
s->gb.buffer, mode) * 8);
+BitstreamContext bc;
+bitstream_init(, s->gb.buffer, FFMIN(s->gb.buffer_end - 
s->gb.buffer, mode) * 8);
 for (i = 0; i < 6; i++)
-tgq_decode_block(s, s->block[i], );
+tgq_decode_block(s, s->block[i], );
 tgq_idct_put_mb(s, s->block, frame, mb_x, mb_y);
 bytestream2_skip(>gb, mode);
 } else {
diff --git a/libavcodec/eatgv.c b/libavcodec/eatgv.c
index 7a50d01..549b5b6 100644
--- a/libavcodec/eatgv.c
+++ b/libavcodec/eatgv.c
@@ -33,7 +33,7 @@
 
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "internal.h"
 
 #define EA_PREAMBLE_SIZE8
@@ -153,7 +153,7 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
 int num_blocks_packed;
 int vector_bits;
 int i,j,x,y;
-GetBitContext gb;
+BitstreamContext bc;
 int mvbits;
 const uint8_t *blocks_raw;
 
@@ -166,7 +166,7 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
 vector_bits   = AV_RL16([6]);
 buf += 12;
 
-if (vector_bits > MIN_CACHE_BITS || !vector_bits) {
+if (vector_bits > 32 || !vector_bits) {
 av_log(s->avctx, AV_LOG_ERROR,
"Invalid value for motion vector bits: %d\n", vector_bits);
 return AVERROR_INVALIDDATA;
@@ -195,10 +195,10 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
 if (buf + (mvbits >> 3) + 16 * num_blocks_raw + 8 * num_blocks_packed > 
buf_end)
 return AVERROR_INVALIDDATA;
 
-init_get_bits(, buf, mvbits);
+bitstream_init(, buf, mvbits);
 for (i = 0; i < num_mvs; i++) {
-s->mv_codebook[i][0] = get_sbits(, 10);
-s->mv_codebook[i][1] = get_sbits(, 10);
+s->mv_codebook[i][0] = bitstream_read_signed(, 10);
+s->mv_codebook[i][1] = bitstream_read_signed(, 10);
 }
 buf += mvbits >> 3;
 
@@ -207,23 +207,23 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
 buf   += num_blocks_raw * 16;
 
 /* read compressed blocks */
-init_get_bits(, buf, (buf_end - buf) << 3);

[libav-devel] [PATCH 16/35] cook: Convert to the new bitstream reader

2016-11-14 Thread Alexandra Hájková
Signed-off-by: Anton Khirnov 
---
 libavcodec/cook.c | 53 +++--
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index c990333..c3304ea 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -47,7 +47,7 @@
 
 #include "audiodsp.h"
 #include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
 #include "bytestream.h"
 #include "fft.h"
 #include "internal.h"
@@ -124,7 +124,7 @@ typedef struct cook {
 
 AVCodecContext* avctx;
 AudioDSPContext adsp;
-GetBitContext   gb;
+BitstreamContextbc;
 /* stream data */
 int num_vectors;
 int samples_per_channel;
@@ -325,23 +325,23 @@ static av_cold int cook_decode_close(AVCodecContext 
*avctx)
 /**
  * Fill the gain array for the timedomain quantization.
  *
- * @param gb  pointer to the GetBitContext
+ * @param bc  pointer to the BitstreamContext
  * @param gaininfoarray[9] of gain indexes
  */
-static void decode_gain_info(GetBitContext *gb, int *gaininfo)
+static void decode_gain_info(BitstreamContext *bc, int *gaininfo)
 {
 int i, n;
 
-while (get_bits1(gb)) {
+while (bitstream_read_bit(bc)) {
 /* NOTHING */
 }
 
-n = get_bits_count(gb) - 1; // amount of elements*2 to update
+n = bitstream_tell(bc) - 1; // amount of elements * 2 to update
 
 i = 0;
 while (n--) {
-int index = get_bits(gb, 3);
-int gain = get_bits1(gb) ? get_bits(gb, 4) - 7 : -1;
+int index = bitstream_read(bc, 3);
+int gain = bitstream_read_bit(bc) ? bitstream_read(bc, 4) - 7 : -1;
 
 while (i <= index)
 gaininfo[i++] = gain;
@@ -361,7 +361,7 @@ static int decode_envelope(COOKContext *q, COOKSubpacket *p,
 {
 int i, j, vlc_index;
 
-quant_index_table[0] = get_bits(>gb, 6) - 6; // This is used later in 
categorize
+quant_index_table[0] = bitstream_read(>bc, 6) - 6; // This is used 
later in categorize
 
 for (i = 1; i < p->total_subbands; i++) {
 vlc_index = i;
@@ -375,8 +375,8 @@ static int decode_envelope(COOKContext *q, COOKSubpacket *p,
 if (vlc_index > 13)
 vlc_index = 13; // the VLC tables >13 are identical to No. 13
 
-j = get_vlc2(>gb, q->envelope_quant_index[vlc_index - 1].table,
- q->envelope_quant_index[vlc_index - 1].bits, 2);
+j = bitstream_read_vlc(>bc, q->envelope_quant_index[vlc_index - 
1].table,
+   q->envelope_quant_index[vlc_index - 1].bits, 2);
 quant_index_table[i] = quant_index_table[i - 1] + j - 12; // 
differential encoding
 if (quant_index_table[i] > 63 || quant_index_table[i] < -63) {
 av_log(q->avctx, AV_LOG_ERROR,
@@ -408,7 +408,7 @@ static void categorize(COOKContext *q, COOKSubpacket *p, 
int *quant_index_table,
 int tmp_categorize_array1_idx = p->numvector_size;
 int tmp_categorize_array2_idx = p->numvector_size;
 
-bits_left = p->bits_per_subpacket - get_bits_count(>gb);
+bits_left = p->bits_per_subpacket - bitstream_tell(>bc);
 
 if (bits_left > q->samples_per_channel)
 bits_left = q->samples_per_channel +
@@ -554,8 +554,8 @@ static int unpack_SQVH(COOKContext *q, COOKSubpacket *p, 
int category,
 vd = vd_tab[category];
 result = 0;
 for (i = 0; i < vpr_tab[category]; i++) {
-vlc = get_vlc2(>gb, q->sqvh[category].table, 
q->sqvh[category].bits, 3);
-if (p->bits_per_subpacket < get_bits_count(>gb)) {
+vlc = bitstream_read_vlc(>bc, q->sqvh[category].table, 
q->sqvh[category].bits, 3);
+if (p->bits_per_subpacket < bitstream_tell(>bc)) {
 vlc = 0;
 result = 1;
 }
@@ -566,8 +566,8 @@ static int unpack_SQVH(COOKContext *q, COOKSubpacket *p, 
int category,
 }
 for (j = 0; j < vd; j++) {
 if (subband_coef_index[i * vd + j]) {
-if (get_bits_count(>gb) < p->bits_per_subpacket) {
-subband_coef_sign[i * vd + j] = get_bits1(>gb);
+if (bitstream_tell(>bc) < p->bits_per_subpacket) {
+subband_coef_sign[i * vd + j] = bitstream_read_bit(>bc);
 } else {
 result = 1;
 subband_coef_sign[i * vd + j] = 0;
@@ -634,7 +634,7 @@ static int mono_decode(COOKContext *q, COOKSubpacket *p, 
float *mlt_buffer)
 
 if ((res = decode_envelope(q, p, quant_index_table)) < 0)
 return res;
-q->num_vectors = get_bits(>gb, p->log2_numvector_size);
+q->num_vectors = bitstream_read(>bc, p->log2_numvector_size);
 categorize(q, p, quant_index_table, category, category_index);
 expand_category(q, category, category_index);
 decode_vectors(q, p, category, quant_index_table, mlt_buffer);
@@ -739,7 +739,7 @@ static void imlt_gain(COOKContext *q, float *inbuffer,
 static void 

[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC

2016-11-11 Thread Alexandra Hájková
From: Alexandra Hajkova 

---
 libavcodec/hevcdsp.c  |   2 +
 libavcodec/hevcdsp.h  |   1 +
 libavcodec/ppc/Makefile   |   1 +
 libavcodec/ppc/hevcdsp.c  | 107 ++
 libavcodec/ppc/hevcdsp_template.c |  54 +++
 5 files changed, 165 insertions(+)
 create mode 100644 libavcodec/ppc/hevcdsp.c
 create mode 100644 libavcodec/ppc/hevcdsp_template.c

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 7c19198..8ae023b 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int 
bit_depth)
 break;
 }
 
+if (ARCH_PPC)
+ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
 if (ARCH_X86)
 ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
 }
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 49cb711..2f4ff01 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -115,6 +115,7 @@ typedef struct HEVCDSPContext {
 
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
+void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
 
 extern const int16_t ff_hevc_epel_coeffs[7][16];
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 09eabcb..4b92add 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP)  += ppc/vp8dsp_altivec.o
 
 # decoders/encoders
 OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o
+OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o
 OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)  += ppc/vorbisdsp_altivec.o
 OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
new file mode 100644
index 000..7cf7e97
--- /dev/null
+++ b/libavcodec/ppc/hevcdsp.c
@@ -0,0 +1,107 @@
+/* SIMD-optimized IDCT functions for HEVC decoding
+ * Copyright (c) Alexandra Hajkova
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include 
+#undef pixel
+#endif
+
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+
+#include "libavcodec/hevcdsp.h"
+
+static const vector int16_t trans4[4] = {
+{ 64,  64, 64,  64, 64,  64, 64,  64 },
+{ 83,  36, 83,  36, 83,  36, 83,  36 },
+{ 64, -64, 64, -64, 64, -64, 64, -64 },
+{ 36, -83, 36, -83, 36, -83, 36, -83 },
+};
+
+static const vec_u8 mask[2] = {
+{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 
0x12, 0x13, 0x1A, 0x1B },
+{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 
0x16, 0x17, 0x1E, 0x1F },
+};
+
+#if HAVE_ALTIVEC
+static void transform4x4(vector int16_t src_01, vector int16_t src_23,
+ vector int32_t res[4], const int shift, int16_t 
*coeffs)
+{
+vector int16_t src_02, src_13;
+vector int32_t zero = vec_splat_s32(0);
+vector int32_t e0, o0, e1, o1;
+vector int32_t add;
+
+src_13 = vec_mergel(src_01, src_23);
+src_02 = vec_mergeh(src_01, src_23);
+
+e0 = vec_msums(src_02, trans4[0], zero);
+o0 = vec_msums(src_13, trans4[1], zero);
+e1 = vec_msums(src_02, trans4[2], zero);
+o1 = vec_msums(src_13, trans4[3], zero);
+
+add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+e0 = vec_add(e0, add);
+e1 = vec_add(e1, add);
+
+res[0] = vec_add(e0, o0);
+res[1] = vec_add(e1, o1);
+res[2] = vec_sub(e1, o1);
+res[3] = vec_sub(e0, o0);
+}
+
+static void scale(vector int32_t res[4], vector int16_t res_packed[2], int 
shift)
+{
+int i;
+vector unsigned int v_shift = vec_splat_u32(shift);
+
+for (i = 0; i < 4; i++)
+res[i] = vec_sra(res[i], v_shift);
+
+// clip16
+res_packed[0] = vec_packs(res[0], res[1]);
+res_packed[1] = vec_packs(res[2], res[3]);
+}
+#endif /* HAVE_ALTIVEC */
+
+#define BIT_DEPTH 8
+#include "libavcodec/ppc/hevcdsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include 

[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC

2016-11-10 Thread Alexandra Hájková
From: Alexandra Hajkova 

---
 libavcodec/hevcdsp.c  |   2 +
 libavcodec/hevcdsp.h  |   1 +
 libavcodec/ppc/Makefile   |   1 +
 libavcodec/ppc/hevcdsp.c  | 107 ++
 libavcodec/ppc/hevcdsp_template.c |  50 ++
 5 files changed, 161 insertions(+)
 create mode 100644 libavcodec/ppc/hevcdsp.c
 create mode 100644 libavcodec/ppc/hevcdsp_template.c

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 7c19198..8ae023b 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int 
bit_depth)
 break;
 }
 
+if (ARCH_PPC)
+ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
 if (ARCH_X86)
 ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
 }
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 49cb711..2f4ff01 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -115,6 +115,7 @@ typedef struct HEVCDSPContext {
 
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
+void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
 
 extern const int16_t ff_hevc_epel_coeffs[7][16];
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 09eabcb..4b92add 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP)  += ppc/vp8dsp_altivec.o
 
 # decoders/encoders
 OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o
+OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o
 OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)  += ppc/vorbisdsp_altivec.o
 OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
new file mode 100644
index 000..b46d895
--- /dev/null
+++ b/libavcodec/ppc/hevcdsp.c
@@ -0,0 +1,107 @@
+/* SIMD-optimized IDCT functions for HEVC decoding
+ * Copyright (c) Alexandra Hajkova
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include 
+#undef pixel
+#endif
+
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+
+#include "libavcodec/hevcdsp.h"
+
+static const vector int16_t trans4[4] = {
+{ 64,  64, 64,  64, 64,  64, 64,  64 },
+{ 83,  36, 83,  36, 83,  36, 83,  36 },
+{ 64, -64, 64, -64, 64, -64, 64, -64 },
+{ 36, -83, 36, -83, 36, -83, 36, -83 },
+};
+
+static const vec_u8 mask[2] = {
+{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 
0x12, 0x13, 0x1A, 0x1B },
+{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 
0x16, 0x17, 0x1E, 0x1F },
+};
+
+#if HAVE_ALTIVEC
+static void transform4x4(vector int16_t src_01, vector int16_t src_23,
+ vector int32_t res[4], const int shift, int16_t 
*coeffs)
+{
+vector int16_t src_02, src_13;
+vector int32_t zero = vec_splat_s32(0);
+vector int32_t e0, o0, e1, o1;
+vector int32_t add;
+
+src_13 = vec_mergel(src_01, src_23);
+src_02 = vec_mergeh(src_01, src_23);
+
+e0 = vec_msums(src_02, trans4[0], zero);
+o0 = vec_msums(src_13, trans4[1], zero);
+e1 = vec_msums(src_02, trans4[2], zero);
+o1 = vec_msums(src_13, trans4[3], zero);
+
+add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+e0 = vec_add(e0, add);
+e1 = vec_add(e1, add);
+
+res[0] = vec_add(e0, o0);
+res[1] = vec_add(e1, o1);
+res[2] = vec_sub(e1, o1);
+res[3] = vec_sub(e0, o0);
+}
+
+static void scale(vector int32_t res[4], vector int16_t res_packed[2], int 
shift)
+{
+int i;
+vector unsigned int v_shift = vec_splat_u32(shift);
+
+for (i = 0; i < 4; i++)
+res[i] = vec_sra(res[i], v_shift);
+
+// clip16
+res_packed[0] = vec_packs(res[0], res[1]);
+res_packed[1] = vec_packs(res[2], res[3]);
+}
+#endif /* HAVE_ALTIVEC */
+
+#define BIT_DEPTH 8
+#include "libavcodec/ppc/hevcdsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include 

[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC

2016-11-05 Thread Alexandra Hájková
---
Applied review comments as discussed.
Tested on both LE and BE.

 libavcodec/hevcdsp.c  |   2 +
 libavcodec/hevcdsp.h  |   1 +
 libavcodec/ppc/Makefile   |   1 +
 libavcodec/ppc/hevcdsp.c  | 107 ++
 libavcodec/ppc/hevcdsp_template.c |  50 ++
 5 files changed, 161 insertions(+)
 create mode 100644 libavcodec/ppc/hevcdsp.c
 create mode 100644 libavcodec/ppc/hevcdsp_template.c

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index e370254..5d63eea 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int 
bit_depth)
 break;
 }
 
+if (ARCH_PPC)
+ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
 if (ARCH_X86)
 ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
 if (ARCH_PPC)
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 3827f50..bb490c8 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -115,6 +115,7 @@ typedef struct HEVCDSPContext {
 
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
+void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_altivec(HEVCDSPContext *c, const int bit_depth);
 
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 9234e77..6bc056c 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP)  += ppc/vp8dsp_altivec.o
 
 # decoders/encoders
 OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o
+OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o
 OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)  += ppc/vorbisdsp_altivec.o
 OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
new file mode 100644
index 000..b6e5461
--- /dev/null
+++ b/libavcodec/ppc/hevcdsp.c
@@ -0,0 +1,107 @@
+/* SIMD-optimized IDCT functions for HEVC decoding
+ * Copyright (c) Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include 
+#undef pixel
+#endif
+
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+
+#include "libavcodec/hevcdsp.h"
+
+static const vector int16_t trans4[4] = {
+{ 64,  64, 64,  64, 64,  64, 64,  64 },
+{ 83,  36, 83,  36, 83,  36, 83,  36 },
+{ 64, -64, 64, -64, 64, -64, 64, -64 },
+{ 36, -83, 36, -83, 36, -83, 36, -83 },
+};
+
+static const vec_u8 mask[2] = {
+{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 
0x12, 0x13, 0x1A, 0x1B },
+{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 
0x16, 0x17, 0x1E, 0x1F },
+};
+
+#if HAVE_ALTIVEC
+static void transform4x4(vector int16_t src_01, vector int16_t src_23,
+ vector int32_t res[4], const int shift, int16_t 
*coeffs)
+{
+vector int16_t src_02, src_13;
+vector int32_t zero = vec_splat_s32(0);
+vector int32_t e0, o0, e1, o1;
+vector int32_t add;
+
+src_13 = vec_mergel(src_01, src_23);
+src_02 = vec_mergeh(src_01, src_23);
+
+e0 = vec_msums(src_02, trans4[0], zero);
+o0 = vec_msums(src_13, trans4[1], zero);
+e1 = vec_msums(src_02, trans4[2], zero);
+o1 = vec_msums(src_13, trans4[3], zero);
+
+add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+e0 = vec_add(e0, add);
+e1 = vec_add(e1, add);
+
+res[0] = vec_add(e0, o0);
+res[1] = vec_add(e1, o1);
+res[2] = vec_sub(e1, o1);
+res[3] = vec_sub(e0, o0);
+}
+
+static void scale(vector int32_t res[4], vector int16_t res_packed[2], int 
shift)
+{
+int i;
+vector unsigned int v_shift = vec_splat_u32(shift);
+
+for (i = 0; i < 4; i++)
+res[i] = vec_sra(res[i], v_shift);
+
+// clip16
+res_packed[0] = vec_packs(res[0], res[1]);
+res_packed[1] = vec_packs(res[2], res[3]);
+}
+#endif /* HAVE_ALTIVEC */
+
+#define BI

Re: [libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC

2016-11-01 Thread Alexandra Hájková
>> +static const vector int16_t trans4[4] = {
>> +{ 64,  64, 64,  64, 64,  64, 64,  64 },
>> +{ 83,  36, 83,  36, 83,  36, 83,  36 },
>> +{ 64, -64, 64, -64, 64, -64, 64, -64 },
>> +{ 36, -83, 36, -83, 36, -83, 36, -83 },
>> +};
>
> This fits in int8_t, is there a reason to have it int16_t?

the reason is I need to multiply the vectors of the same type
>
>> +static const vec_u8 mask[2] = {
>> +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 
>> 0x0B, 0x12, 0x13, 0x1A, 0x1B },
>> +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 
>> 0x0F, 0x16, 0x17, 0x1E, 0x1F },
>> +};
>
> Where do these tables come from? I would expect them to be shared
> across arches.

This is permutation mask used by vec_perm and it's specific for this
case (which is matrix tranposition).

Alexandra
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC

2016-11-01 Thread Alexandra Hájková
From: Alexandra Hajkova 

---
 libavcodec/hevcdsp.c  |   2 +
 libavcodec/hevcdsp.h  |   1 +
 libavcodec/ppc/Makefile   |   1 +
 libavcodec/ppc/hevcdsp_ppc.c  | 111 ++
 libavcodec/ppc/hevcdsp_template.c |  52 ++
 5 files changed, 167 insertions(+)
 create mode 100644 libavcodec/ppc/hevcdsp_ppc.c
 create mode 100644 libavcodec/ppc/hevcdsp_template.c

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 7c19198..e370254 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -247,4 +247,6 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int 
bit_depth)
 
 if (ARCH_X86)
 ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
+if (ARCH_PPC)
+ff_hevc_dsp_init_altivec(hevcdsp, bit_depth);
 }
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 49cb711..d50551a 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -116,6 +116,7 @@ typedef struct HEVCDSPContext {
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_altivec(HEVCDSPContext *c, const int bit_depth);
 
 extern const int16_t ff_hevc_epel_coeffs[7][16];
 extern const int8_t ff_hevc_epel_coeffs8[7][16];
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 09eabcb..1c71df6 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP)  += ppc/vp8dsp_altivec.o
 
 # decoders/encoders
 OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o
+OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp_ppc.o
 OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)  += ppc/vorbisdsp_altivec.o
 OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/hevcdsp_ppc.c b/libavcodec/ppc/hevcdsp_ppc.c
new file mode 100644
index 000..a2cb25a
--- /dev/null
+++ b/libavcodec/ppc/hevcdsp_ppc.c
@@ -0,0 +1,111 @@
+
+/*
+ * Copyright (c) Alexandra Hajkova
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include 
+#undef pixel
+#endif
+
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+
+#include "libavcodec/hevcdsp.h"
+
+static const vector int16_t trans4[4] = {
+{ 64,  64, 64,  64, 64,  64, 64,  64 },
+{ 83,  36, 83,  36, 83,  36, 83,  36 },
+{ 64, -64, 64, -64, 64, -64, 64, -64 },
+{ 36, -83, 36, -83, 36, -83, 36, -83 },
+};
+
+static const vec_u8 mask[2] = {
+{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 
0x12, 0x13, 0x1A, 0x1B },
+{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 
0x16, 0x17, 0x1E, 0x1F },
+};
+
+#if HAVE_ALTIVEC
+static void transform4x4(vector int16_t src_01, vector int16_t src_23,
+ vector int32_t res[4], const int shift, int16_t 
*coeffs)
+{
+vector int16_t src_02, src_13;
+vector int32_t zero = vec_splat_s32(0);
+vector int32_t e0, o0, e1, o1;
+vector int32_t add;
+
+src_13 = vec_mergel(src_01, src_23);
+src_02 = vec_mergeh(src_01, src_23);
+
+e0 = vec_msums(src_02, trans4[0], zero);
+o0 = vec_msums(src_13, trans4[1], zero);
+e1 = vec_msums(src_02, trans4[2], zero);
+o1 = vec_msums(src_13, trans4[3], zero);
+
+// if is not used by the other transform
+add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+e0 = vec_add(e0, add);
+e1 = vec_add(e1, add);
+
+res[0] = vec_add(e0, o0);
+res[1] = vec_add(e1, o1);
+res[2] = vec_sub(e1, o1);
+res[3] = vec_sub(e0, o0);
+}
+
+static void scale(vector int32_t res[4], vector int16_t res_packed[2], int 
shift)
+{
+int i;
+vector unsigned int v_shift = vec_splat_u32(shift);
+
+for (i = 0; i < 4; i++)
+res[i] = vec_sra(res[i], v_shift);
+
+// clip16
+res_packed[0] = vec_packs(res[0], res[1]);
+res_packed[1] = vec_packs(res[2], res[3]);
+}
+#endif /* HAVE_ALTIVEC */
+
+#define BIT_DEPTH 8
+#include "libavcodec/ppc/hevcdsp_template.c"
+#undef 

[libav-devel] [PATCH 1/2] hevc: x86: Add add_residual optimizations

2016-10-21 Thread Alexandra Hájková
From: Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr>

Initially written by Pierre Edouard Lepere 
<pierre-edouard.lep...@insa-rennes.fr>,
extended by James Almer <jamr...@gmail.com>.

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
Signed-off-by: Diego Biurrun <di...@biurrun.de>
---
Applied review comments from Henrink and Diego (more consistent naming).

 libavcodec/x86/Makefile |   7 +-
 libavcodec/x86/hevc_add_res.asm | 371 
 libavcodec/x86/hevcdsp_init.c   |  42 +
 3 files changed, 417 insertions(+), 3 deletions(-)
 create mode 100644 libavcodec/x86/hevc_add_res.asm

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a38535b..094c1fa 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -115,9 +115,10 @@ YASM-OBJS-$(CONFIG_AAC_DECODER)+= x86/sbrdsp.o
 YASM-OBJS-$(CONFIG_APE_DECODER)+= x86/apedsp.o
 YASM-OBJS-$(CONFIG_DCA_DECODER)+= x86/dcadsp.o
 YASM-OBJS-$(CONFIG_DNXHD_ENCODER)  += x86/dnxhdenc.o
-YASM-OBJS-$(CONFIG_HEVC_DECODER)   += x86/hevc_deblock.o\
-  x86/hevc_mc.o \
-  x86/hevc_idct.o
+YASM-OBJS-$(CONFIG_HEVC_DECODER)   += x86/hevc_add_res.o\
+  x86/hevc_deblock.o\
+  x86/hevc_idct.o   \
+  x86/hevc_mc.o
 YASM-OBJS-$(CONFIG_PNG_DECODER)+= x86/pngdsp.o
 YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)   += x86/rv40dsp.o
diff --git a/libavcodec/x86/hevc_add_res.asm b/libavcodec/x86/hevc_add_res.asm
new file mode 100644
index 000..a1740b5
--- /dev/null
+++ b/libavcodec/x86/hevc_add_res.asm
@@ -0,0 +1,371 @@
+; *
+; * Provide SIMD optimizations for add_residual functions for HEVC decoding
+; * Copyright (c) 2014 Pierre-Edouard LEPERE
+; *
+; * This file is part of Libav.
+; *
+; * Libav is free software; you can redistribute it and/or
+; * modify it under the terms of the GNU Lesser General Public
+; * License as published by the Free Software Foundation; either
+; * version 2.1 of the License, or (at your option) any later version.
+; *
+; * Libav is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; * Lesser General Public License for more details.
+; *
+; * You should have received a copy of the GNU Lesser General Public
+; * License along with Libav; if not, write to the Free Software
+; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+; 
**
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+max_pixels_10:  times 16  dw ((1 << 10)-1)
+
+SECTION .text
+
+; the add_res macros and functions were largely inspired by h264_idct.asm from 
the x264 project
+%macro ADD_RES_MMX_4_8 0
+mova  m0, [r1]
+mova  m2, [r1+8]
+pxor  m1, m1
+pxor  m3, m3
+psubw m1, m0
+psubw m3, m2
+packuswb  m0, m2
+packuswb  m1, m3
+
+movd  m2, [r0]
+movd  m3, [r0+r2]
+punpckldq m2, m3
+paddusb   m0, m2
+psubusbm0, m1
+movd[r0], m0
+psrlq m0, 32
+movd [r0+r2], m0
+%endmacro
+
+
+INIT_MMX mmxext
+; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t 
stride)
+cglobal hevc_add_residual_4_8, 3, 3, 6
+ADD_RES_MMX_4_8
+add   r1, 16
+lea   r0, [r0+r2*2]
+ADD_RES_MMX_4_8
+RET
+
+%macro ADD_RES_SSE_8_8 0
+pxor  m3, m3
+mova  m4, [r1]
+mova  m6, [r1+16]
+mova  m0, [r1+32]
+mova  m2, [r1+48]
+psubw m5, m3, m4
+psubw m7, m3, m6
+psubw m1, m3, m0
+packuswb  m4, m0
+packuswb  m5, m1
+psubw m3, m2
+packuswb  m6, m2
+packuswb  m7, m3
+
+movq  m0, [r0]
+movq  m1, [r0+r2]
+movhpsm0, [r0+r2*2]
+movhpsm1, [r0+r3]
+paddusb   m0, m4
+paddusb   m1, m6
+psubusb   m0, m5
+psubusb   m1, m7
+movq[r0], m0
+movq [r0+r2], m1
+movhps [r0+2*r2], m0
+movhps   [r0+r3], m1
+%endmacro
+
+%macro ADD_RES_SSE_16_32_8 3
+mova xm2, [r1+%1]
+mova xm6, 

[libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual

2016-10-21 Thread Alexandra Hájková
---
Applied all Henrik's and Diego's review comments:
fixed stride, and size sizes, consistent naming

 tests/checkasm/Makefile   |  2 +-
 tests/checkasm/checkasm.c |  1 +
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/hevc_add_res.c | 85 +++
 4 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/hevc_add_res.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 22cf3db..2250f65 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -12,7 +12,7 @@ AVCODECOBJS-$(CONFIG_VP8DSP)+= vp8dsp.o
 
 # decoders/encoders
 AVCODECOBJS-$(CONFIG_DCA_DECODER)   += dcadsp.o synth_filter.o
-AVCODECOBJS-$(CONFIG_HEVC_DECODER)  += hevc_mc.o hevc_idct.o
+AVCODECOBJS-$(CONFIG_HEVC_DECODER)  += hevc_mc.o hevc_idct.o hevc_add_res.o
 AVCODECOBJS-$(CONFIG_V210_ENCODER)  += v210enc.o
 AVCODECOBJS-$(CONFIG_VP9_DECODER)   += vp9dsp.o
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 040c4eb..623bbce 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -90,6 +90,7 @@ static const struct {
 { "h264qpel", checkasm_check_h264qpel },
 #endif
 #if CONFIG_HEVC_DECODER
+{ "hevc_add_res", checkasm_check_hevc_add_res },
 { "hevc_mc", checkasm_check_hevc_mc },
 { "hevc_idct", checkasm_check_hevc_idct },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 75aa457..462c908 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -39,6 +39,7 @@ void checkasm_check_fmtconvert(void);
 void checkasm_check_h264dsp(void);
 void checkasm_check_h264pred(void);
 void checkasm_check_h264qpel(void);
+void checkasm_check_hevc_add_res(void);
 void checkasm_check_hevc_idct(void);
 void checkasm_check_hevc_mc(void);
 void checkasm_check_huffyuvdsp(void);
diff --git a/tests/checkasm/hevc_add_res.c b/tests/checkasm/hevc_add_res.c
new file mode 100644
index 000..639e25e
--- /dev/null
+++ b/tests/checkasm/hevc_add_res.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)\
+do {\
+int j;  \
+for (j = 0; j < size; j++) {\
+int16_t r = rnd();  \
+AV_WN16A(buf + j, r >> 3);  \
+}   \
+} while (0)
+
+#define randomize_buffers2(buf, size) \
+do {  \
+int j;\
+for (j = 0; j < size; j++)\
+AV_WN16A(buf + j * 2, rnd() & 0x3FF); \
+} while (0)
+
+static void check_add_res(HEVCDSPContext h, int bit_depth)
+{
+int i;
+LOCAL_ALIGNED(32, int16_t, res0, [32 * 32]);
+LOCAL_ALIGNED(32, int16_t, res1, [32 * 32]);
+LOCAL_ALIGNED(32, uint8_t, dst0, [32 * 32 * 2]);
+LOCAL_ALIGNED(32, uint8_t, dst1, [32 * 32 * 2]);
+
+for (i = 2; i <= 5; i++) {
+int block_size = 1 << i;
+int size = block_size * block_size;
+ptrdiff_t stride = block_size << (bit_depth > 8);
+declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t 
*coeffs, ptrdiff_t stride);
+
+randomize_buffers(res0, size);
+randomize_buffers2(dst0, size);
+memcpy(res1, res0, sizeof(*res0) * size);
+memcpy(dst1, dst0, size);
+
+if (check_func(h.add_residual[i - 2], "add_res_%dx%d_%d", block_size, 
block_size, bit_depth)) {
+call_ref(dst0, res0, stride);
+call_new(dst1, res1, stride);
+if (memcmp(dst0, dst1, size))
+fail();
+bench_new(dst1, res1, stride);
+}
+}
+}
+
+void checkasm_check_hevc_add_res(void)
+{
+int bit_depth;
+
+for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+HEVCDSPContext h;
+
+

[libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual

2016-10-13 Thread Alexandra Hájková
---
 tests/checkasm/Makefile   |  2 +-
 tests/checkasm/checkasm.c |  1 +
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/hevc_add_res.c | 84 +++
 4 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/hevc_add_res.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 9b3df55..ac3e97e 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -12,7 +12,7 @@ AVCODECOBJS-$(CONFIG_VP8DSP)+= vp8dsp.o
 
 # decoders/encoders
 AVCODECOBJS-$(CONFIG_DCA_DECODER)   += dcadsp.o synth_filter.o
-AVCODECOBJS-$(CONFIG_HEVC_DECODER)  += hevc_mc.o hevc_idct.o
+AVCODECOBJS-$(CONFIG_HEVC_DECODER)  += hevc_mc.o hevc_idct.o hevc_add_res.o
 AVCODECOBJS-$(CONFIG_V210_ENCODER)  += v210enc.o
 AVCODECOBJS-$(CONFIG_VP9_DECODER)   += vp9dsp.o
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 040c4eb..623bbce 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -90,6 +90,7 @@ static const struct {
 { "h264qpel", checkasm_check_h264qpel },
 #endif
 #if CONFIG_HEVC_DECODER
+{ "hevc_add_res", checkasm_check_hevc_add_res },
 { "hevc_mc", checkasm_check_hevc_mc },
 { "hevc_idct", checkasm_check_hevc_idct },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 5a4c056..bacd6f4 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -39,6 +39,7 @@ void checkasm_check_fmtconvert(void);
 void checkasm_check_h264dsp(void);
 void checkasm_check_h264pred(void);
 void checkasm_check_h264qpel(void);
+void checkasm_check_hevc_add_res(void);
 void checkasm_check_hevc_idct(void);
 void checkasm_check_hevc_mc(void);
 void checkasm_check_huffyuvdsp(void);
diff --git a/tests/checkasm/hevc_add_res.c b/tests/checkasm/hevc_add_res.c
new file mode 100644
index 000..c242c8c
--- /dev/null
+++ b/tests/checkasm/hevc_add_res.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)\
+do {\
+int j;  \
+for (j = 0; j < size; j++) {\
+int16_t r = rnd();  \
+AV_WN16A(buf + j, r >> 3);  \
+}   \
+} while (0)
+
+#define randomize_buffers2(buf, size) \
+do {  \
+int j;\
+for (j = 0; j < size; j++)\
+AV_WN16A(buf + j * 2, rnd() & 0x3FF); \
+} while (0)
+
+static void check_add_res(HEVCDSPContext h, int bit_depth)
+{
+int i;
+LOCAL_ALIGNED(32, int16_t, res0, [32 * 32]);
+LOCAL_ALIGNED(32, int16_t, res1, [32 * 32]);
+LOCAL_ALIGNED(32, uint8_t, dst0, [32 * 32 * 2]);
+LOCAL_ALIGNED(32, uint8_t, dst1, [32 * 32 * 2]);
+
+for (i = 2; i <= 5; i++) {
+int block_size = 1 << i;
+int size = block_size * block_size;
+declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, 
ptrdiff_t stride);
+
+randomize_buffers(res0, size);
+randomize_buffers2(dst0, size * 2);
+memcpy(res1, res0, sizeof(*res0) * size);
+memcpy(dst1, dst0, size * 2);
+
+if (check_func(h.add_residual[i - 2], "add_res_%dx%d_%d", block_size, 
block_size, bit_depth)) {
+call_ref(dst0, res0, block_size * 2);
+call_new(dst1, res1, block_size * 2);
+if (memcmp(dst0, dst1, size * 2))
+fail();
+bench_new(dst1, res1, block_size);
+}
+}
+}
+
+void checkasm_check_hevc_add_res(void)
+{
+int bit_depth;
+
+for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+HEVCDSPContext h;
+
+ff_hevc_dsp_init(, bit_depth);
+check_add_res(h, bit_depth);
+}
+report("add_residual");
+}
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] hevc/x86: Add add_residual

2016-10-13 Thread Alexandra Hájková
From: Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr>

Initially written by Pierre Edouard Lepere 
<pierre-edouard.lep...@insa-rennes.fr>,
extended by James Almer <jamr...@gmail.com>.

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
---
 libavcodec/x86/Makefile |   3 +-
 libavcodec/x86/hevc_res_add.asm | 391 
 libavcodec/x86/hevcdsp_init.c   |  40 
 3 files changed, 433 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/x86/hevc_res_add.asm

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a38535b..aa93e67 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -117,7 +117,8 @@ YASM-OBJS-$(CONFIG_DCA_DECODER)+= x86/dcadsp.o
 YASM-OBJS-$(CONFIG_DNXHD_ENCODER)  += x86/dnxhdenc.o
 YASM-OBJS-$(CONFIG_HEVC_DECODER)   += x86/hevc_deblock.o\
   x86/hevc_mc.o \
-  x86/hevc_idct.o
+  x86/hevc_idct.o   \
+  x86/hevc_res_add.o
 YASM-OBJS-$(CONFIG_PNG_DECODER)+= x86/pngdsp.o
 YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)   += x86/rv40dsp.o
diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm
new file mode 100644
index 000..f8d9fd7
--- /dev/null
+++ b/libavcodec/x86/hevc_res_add.asm
@@ -0,0 +1,391 @@
+; *
+; * Provide SIMD optimizations for add_residual functions for HEVC decoding
+; * Copyright (c) 2014 Pierre-Edouard LEPERE
+; *
+; * This file is part of Libav.
+; *
+; * Libav is free software; you can redistribute it and/or
+; * modify it under the terms of the GNU Lesser General Public
+; * License as published by the Free Software Foundation; either
+; * version 2.1 of the License, or (at your option) any later version.
+; *
+; * Libav is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; * Lesser General Public License for more details.
+; *
+; * You should have received a copy of the GNU Lesser General Public
+; * License along with Libav; if not, write to the Free Software
+; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+; 
**
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+max_pixels_10:  times 16  dw ((1 << 10)-1)
+
+SECTION .text
+
+; the add_res macros and functions were largely inspired by x264 project's 
code in the h264_idct.asm file
+%macro ADD_RES_MMX_4_8 0
+mova  m2, [r1]
+mova  m4, [r1+8]
+pxor  m3, m3
+psubw m3, m2
+packuswb  m2, m2
+packuswb  m3, m3
+pxor  m5, m5
+psubw m5, m4
+packuswb  m4, m4
+packuswb  m5, m5
+
+movh  m0, [r0 ]
+movh  m1, [r0+r2  ]
+paddusb   m0, m2
+paddusb   m1, m4
+psubusb   m0, m3
+psubusb   m1, m5
+movh   [r0 ], m0
+movh   [r0+r2  ], m1
+%endmacro
+
+
+INIT_MMX mmxext
+; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *coeffs, 
ptrdiff_t stride)
+cglobal hevc_add_residual_4_8, 3, 4, 6
+ADD_RES_MMX_4_8
+add   r1, 16
+lea   r0, [r0+r2*2]
+ADD_RES_MMX_4_8
+RET
+
+%macro ADD_RES_SSE_8_8 0
+pxor  m3, m3
+mova  m4, [r1]
+mova  m6, [r1+16]
+mova  m0, [r1+32]
+mova  m2, [r1+48]
+psubw m5, m3, m4
+psubw m7, m3, m6
+psubw m1, m3, m0
+packuswb  m4, m0
+packuswb  m5, m1
+psubw m3, m2
+packuswb  m6, m2
+packuswb  m7, m3
+
+movqm0, [r0 ]
+movqm1, [r0+r2  ]
+movhps  m0, [r0+r2*2]
+movhps  m1, [r0+r3  ]
+paddusb m0, m4
+paddusb m1, m6
+psubusb m0, m5
+psubusb m1, m7
+movq [r0 ], m0
+movq [r0+r2  ], m1
+movhps   [r0+2*r2], m0
+movhps   [r0+r3  ], m1
+%endmacro
+
+%macro ADD_RES_SSE_16_32_8 3
+mova xm2, [r1+%1   ]
+mova xm6, [r1+%1+16]
+%if cpuflag(avx2)
+vinserti128   m2, m2, [r1+%1+32], 1
+vinserti128   m6, m6, [r1+%1+48], 1
+%endif
+%if cpuflag(avx)
+psubw m1, m0, m2
+psubw m5, m0, m6
+%else
+mova  m1, m0
+mova  

Re: [libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual

2016-10-13 Thread Alexandra Hájková
>> +#define randomize_buffers(buf, size)\
>> +do {\
>> +int j;  \
>> +for (j = 0; j < size; j++) {\
>> +int16_t r = rnd();  \
>> +AV_WN16A(buf + j, r >> 3);  \
>> +}   \
>> +} while (0)
>
> We should stop duplicating these between checkasm modules some day.
> You're welcome to help me refactor.

As Martin already pointed out, all the ramdomize buffers are slightly
different for all of hevc tests
>
>> +#define randomize_buffers2(buf, size) \
>> +do { \
>> +int j;   \
>> +for (j = 0; j < size; j++)   \
>> +AV_WN16A(buf + j * 2, (rnd() & 0xFF));   \
>
> pointless (), align the \
>
> What is the reason for writing 16-bits and throwing the upper half away?

I'll better use 0x3FF.
>
>> +void checkasm_check_hevc_add_res(void)
>> +{
>> +int bit_depth;
>> +
>> +for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
>> +HEVCDSPContext h;
>> +
>> +ff_hevc_dsp_init(, bit_depth);
>> +check_add_res(h, bit_depth);
>> +}
>
> I didn't see you add 9-bit versions of the assembly functions, why do
> you test 9 bits?
>
Because there's no 9 bit SIMD function, it's not tested but the code
looks simpler this way.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/2] hevc/x86: Add add_residual

2016-10-12 Thread Alexandra Hájková
From: Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr>

Initially written by Pierre Edouard Lepere 
<pierre-edouard.lep...@insa-rennes.fr>,
extended by James Almer <jamr...@gmail.com>.

Signed-off-by: Alexandra Hájková <alexan...@khirnov.net>
---
 libavcodec/x86/Makefile |   3 +-
 libavcodec/x86/hevc_res_add.asm | 391 
 libavcodec/x86/hevcdsp_init.c   |  40 
 3 files changed, 433 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/x86/hevc_res_add.asm

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a38535b..aa93e67 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -117,7 +117,8 @@ YASM-OBJS-$(CONFIG_DCA_DECODER)+= x86/dcadsp.o
 YASM-OBJS-$(CONFIG_DNXHD_ENCODER)  += x86/dnxhdenc.o
 YASM-OBJS-$(CONFIG_HEVC_DECODER)   += x86/hevc_deblock.o\
   x86/hevc_mc.o \
-  x86/hevc_idct.o
+  x86/hevc_idct.o   \
+  x86/hevc_res_add.o
 YASM-OBJS-$(CONFIG_PNG_DECODER)+= x86/pngdsp.o
 YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)   += x86/rv40dsp.o
diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm
new file mode 100644
index 000..1e3bfc2
--- /dev/null
+++ b/libavcodec/x86/hevc_res_add.asm
@@ -0,0 +1,391 @@
+; /*
+; * Provide SIMD optimizations for add_residual functions for HEVC decoding
+; * Copyright (c) 2014 Pierre-Edouard LEPERE
+; *
+; * This file is part of Libav.
+; *
+; * FFmpeg is free software; you can redistribute it and/or
+; * modify it under the terms of the GNU Lesser General Public
+; * License as published by the Free Software Foundation; either
+; * version 2.1 of the License, or (at your option) any later version.
+; *
+; * FFmpeg is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; * Lesser General Public License for more details.
+; *
+; * You should have received a copy of the GNU Lesser General Public
+; * License along with FFmpeg; if not, write to the Free Software
+; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+; */
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+max_pixels_10:  times 16  dw ((1 << 10)-1)
+
+SECTION .text
+
+; the add_res macros and functions were largely inspired by x264 project's 
code in the h264_idct.asm file
+%macro ADD_RES_MMX_4_8 0
+mova  m2, [r1]
+mova  m4, [r1+8]
+pxor  m3, m3
+psubw m3, m2
+packuswb  m2, m2
+packuswb  m3, m3
+pxor  m5, m5
+psubw m5, m4
+packuswb  m4, m4
+packuswb  m5, m5
+
+movh  m0, [r0 ]
+movh  m1, [r0+r2  ]
+paddusb   m0, m2
+paddusb   m1, m4
+psubusb   m0, m3
+psubusb   m1, m5
+movh   [r0 ], m0
+movh   [r0+r2  ], m1
+%endmacro
+
+
+INIT_MMX mmxext
+; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *coeffs, 
ptrdiff_t stride)
+cglobal hevc_add_residual_4_8, 3, 4, 6
+ADD_RES_MMX_4_8
+add   r1, 16
+lea   r0, [r0+r2*2]
+ADD_RES_MMX_4_8
+RET
+
+%macro ADD_RES_SSE_8_8 0
+pxor  m3, m3
+mova  m4, [r1]
+mova  m6, [r1+16]
+mova  m0, [r1+32]
+mova  m2, [r1+48]
+psubw m5, m3, m4
+psubw m7, m3, m6
+psubw m1, m3, m0
+packuswb  m4, m0
+packuswb  m5, m1
+psubw m3, m2
+packuswb  m6, m2
+packuswb  m7, m3
+
+movqm0, [r0 ]
+movqm1, [r0+r2  ]
+movhps  m0, [r0+r2*2]
+movhps  m1, [r0+r3  ]
+paddusb m0, m4
+paddusb m1, m6
+psubusb m0, m5
+psubusb m1, m7
+movq [r0 ], m0
+movq [r0+r2  ], m1
+movhps   [r0+2*r2], m0
+movhps   [r0+r3  ], m1
+%endmacro
+
+%macro ADD_RES_SSE_16_32_8 3
+mova xm2, [r1+%1   ]
+mova xm6, [r1+%1+16]
+%if cpuflag(avx2)
+vinserti128   m2, m2, [r1+%1+32], 1
+vinserti128   m6, m6, [r1+%1+48], 1
+%endif
+%if cpuflag(avx)
+psubw m1, m0, m2
+psubw m5, m0, m6
+%else
+mova  m1, m0
+mova  m5, m0
+psubw m1, m2
+psubw m5, m6
+%endif
+packuswb  m2, m6
+packuswb  m1, m5
+
+mova   

[libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual

2016-10-12 Thread Alexandra Hájková
---
 tests/checkasm/Makefile   |  2 +-
 tests/checkasm/checkasm.c |  1 +
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/hevc_add_res.c | 84 +++
 4 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/hevc_add_res.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 9b3df55..ac3e97e 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -12,7 +12,7 @@ AVCODECOBJS-$(CONFIG_VP8DSP)+= vp8dsp.o
 
 # decoders/encoders
 AVCODECOBJS-$(CONFIG_DCA_DECODER)   += dcadsp.o synth_filter.o
-AVCODECOBJS-$(CONFIG_HEVC_DECODER)  += hevc_mc.o hevc_idct.o
+AVCODECOBJS-$(CONFIG_HEVC_DECODER)  += hevc_mc.o hevc_idct.o hevc_add_res.o
 AVCODECOBJS-$(CONFIG_V210_ENCODER)  += v210enc.o
 AVCODECOBJS-$(CONFIG_VP9_DECODER)   += vp9dsp.o
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 040c4eb..d0dc525 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -92,6 +92,7 @@ static const struct {
 #if CONFIG_HEVC_DECODER
 { "hevc_mc", checkasm_check_hevc_mc },
 { "hevc_idct", checkasm_check_hevc_idct },
+{ "hevc_add_res", checkasm_check_hevc_add_res },
 #endif
 #if CONFIG_HUFFYUVDSP
 { "huffyuvdsp", checkasm_check_huffyuvdsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 5a4c056..bacd6f4 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -39,6 +39,7 @@ void checkasm_check_fmtconvert(void);
 void checkasm_check_h264dsp(void);
 void checkasm_check_h264pred(void);
 void checkasm_check_h264qpel(void);
+void checkasm_check_hevc_add_res(void);
 void checkasm_check_hevc_idct(void);
 void checkasm_check_hevc_mc(void);
 void checkasm_check_huffyuvdsp(void);
diff --git a/tests/checkasm/hevc_add_res.c b/tests/checkasm/hevc_add_res.c
new file mode 100644
index 000..fcc47c1
--- /dev/null
+++ b/tests/checkasm/hevc_add_res.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)\
+do {\
+int j;  \
+for (j = 0; j < size; j++) {\
+int16_t r = rnd();  \
+AV_WN16A(buf + j, r >> 3);  \
+}   \
+} while (0)
+
+#define randomize_buffers2(buf, size) \
+do { \
+int j;   \
+for (j = 0; j < size; j++)   \
+AV_WN16A(buf + j * 2, (rnd() & 0xFF));   \
+} while (0)
+
+static void check_add_res(HEVCDSPContext h, int bit_depth)
+{
+int i;
+LOCAL_ALIGNED(32, int16_t, res0, [32 * 32]);
+LOCAL_ALIGNED(32, int16_t, res1, [32 * 32]);
+LOCAL_ALIGNED(32, uint8_t, dst0, [32 * 32 * 2]);
+LOCAL_ALIGNED(32, uint8_t, dst1, [32 * 32 * 2]);
+
+for (i = 2; i <= 5; i++) {
+int block_size = 1 << i;
+int size = block_size * block_size;
+declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, 
ptrdiff_t stride);
+
+randomize_buffers(res0, size);
+randomize_buffers2(dst0, size * 2);
+memcpy(res1, res0, sizeof(*res0) * size);
+memcpy(dst1, dst0, size * 2);
+
+if (check_func(h.add_residual[i - 2], "add_res_%dx%d_%d", block_size, 
block_size, bit_depth)) {
+call_ref(dst0, res0, block_size * 2);
+call_new(dst1, res1, block_size * 2);
+if (memcmp(dst0, dst1, size * 2))
+fail();
+bench_new(dst1, res1, block_size);
+}
+}
+}
+
+void checkasm_check_hevc_add_res(void)
+{
+int bit_depth;
+
+for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+HEVCDSPContext h;
+
+ff_hevc_dsp_init(, bit_depth);
+check_add_res(h, bit_depth);
+}
+report("add_residual");
+}
-- 
2.1.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] hevc: Add SSE2 and AVX IDCT

2016-10-10 Thread Alexandra Hájková
---
 libavcodec/x86/hevc_idct.asm  |  787 +++--
 libavcodec/x86/hevcdsp_init.c |   54 ++-
 2 files changed, 811 insertions(+), 30 deletions(-)

diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm
index d662aa9..f397cc1 100644
--- a/libavcodec/x86/hevc_idct.asm
+++ b/libavcodec/x86/hevc_idct.asm
@@ -2,6 +2,7 @@
 ;* SIMD-optimized IDCT functions for HEVC decoding
 ;* Copyright (c) 2014 Pierre-Edouard LEPERE
 ;* Copyright (c) 2014 James Almer
+;* Copyright (c) 2016 Alexandra Hájková
 ;*
 ;* This file is part of Libav.
 ;*
@@ -22,6 +23,217 @@
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA
+
+pd_64: times 4 dd 64
+pd_2048: times 4 dd 2048
+pd_512: times 4 dd 512
+
+; 4x4 transform coeffs
+cextern pw_64
+pw_64_m64: times 4 dw 64, -64
+pw_83_36: times 4 dw 83, 36
+pw_36_m83: times 4 dw 36, -83
+
+; 8x8 transform coeffs
+pw_89_75: times 4 dw 89, 75
+pw_50_18: times 4 dw 50, 18
+
+pw_75_m18: times 4 dw 75, -18
+pw_m89_m50: times 4 dw -89, -50
+
+pw_50_m89: times 4 dw 50, -89
+pw_18_75: times 4 dw 18, 75
+
+pw_18_m50: times 4 dw 18, -50
+pw_75_m89: times 4 dw 75, -89
+
+; 16x16 transformation coeffs
+trans_coeffs16: times 4 dw 90, 87
+times 4 dw 80, 70
+times 4 dw 57, 43
+times 4 dw 25, 9
+
+times 4 dw 87, 57
+times 4 dw 9, -43
+times 4 dw -80, -90
+times 4 dw -70, -25
+
+times 4 dw 80, 9
+times 4 dw -70, -87
+times 4 dw -25, 57
+times 4 dw 90, 43
+
+times 4 dw 70, -43
+times 4 dw -87, 9
+times 4 dw 90, 25
+times 4 dw -80, -57
+
+times 4 dw 57, -80
+times 4 dw -25, 90
+times 4 dw -9, -87
+times 4 dw 43, 70
+
+times 4 dw 43, -90
+times 4 dw 57, 25
+times 4 dw -87, 70
+times 4 dw 9, -80
+
+times 4 dw 25, -70
+times 4 dw 90, -80
+times 4 dw 43, 9
+times 4 dw -57, 87
+
+times 4 dw 9, -25
+times 4 dw 43, -57
+times 4 dw 70, -80
+times 4 dw 87, -90
+
+; 32x32 transform coeffs
+trans_coeff32: times 8 dw 90
+times 4 dw 88, 85
+times 4 dw 82, 78
+times 4 dw 73, 67
+times 4 dw 61, 54
+times 4 dw 46, 38
+times 4 dw 31, 22
+times 4 dw 13, 4
+
+times 4 dw 90, 82
+times 4 dw 67, 46
+times 4 dw 22, -4
+times 4 dw -31, -54
+times 4 dw -73, -85
+times 4 dw -90, -88
+times 4 dw -78, -61
+times 4 dw -38, -13
+
+times 4 dw 88, 67
+times 4 dw 31, -13
+times 4 dw -54, -82
+times 4 dw -90, -78
+times 4 dw -46, -4
+times 4 dw 38, 73
+times 4 dw 90, 85
+times 4 dw 61, 22
+
+times 4 dw 85, 46
+times 4 dw -13, -67
+times 4 dw -90, -73
+times 4 dw -22, 38
+times 4 dw 82, 88
+times 4 dw 54, -4
+times 4 dw -61, -90
+times 4 dw -78, -31
+
+times 4 dw 82, 22
+times 4 dw -54, -90
+times 4 dw -61, 13
+times 4 dw 78, 85
+times 4 dw 31, -46
+times 4 dw -90, -67
+times 4 dw 4, 73
+times 4 dw 88, 38
+
+times 4 dw 78, -4
+times 4 dw -82, -73
+times 4 dw 13, 85
+times 4 dw 67, -22
+times 4 dw -88, -61
+times 4 dw 31, 90
+times 4 dw 54, -38
+times 4 dw -90, -46
+
+times 4 dw 73, -31
+times 4 dw -90, -22
+times 4 dw 78, 67
+times 4 dw -38, -90
+times 4 dw -13, 82
+times 4 dw 61, -46
+times 4 dw -88, -4
+times 4 dw 85, 54
+
+times 4 dw 67, -54
+times 4 dw -78, 38
+times 4 dw 85, -22
+times 4 dw -90, 4
+times 4 dw 90, 13
+times 4 dw -88, -31
+times 4 dw 82, 46
+times 4 dw -73, -61
+
+times 4 dw 61, -73
+times 4 dw -46, 82
+times 4 dw 31, -88
+times 4 dw -13, 90
+times 4 dw -4, -90
+times 4 dw 22, 85
+times 4 dw -38, -78
+times 4 dw 54, 67
+
+times 4 dw 54, -85
+times 4 dw -4, 88
+times 4 dw -46, -61
+times 4 dw 82, 13
+times 4 dw -90, 38
+times 4 dw 67, -78
+times 4 dw -22, 90
+times 4 dw -31, -73
+
+times 4 dw 46, -90
+times 4 dw 38, 54
+times 4 dw -90, 31
+times 4 dw 61, -88
+times 4 dw 22, 67
+times 4 dw -85, 13
+times 4 dw 73, -82
+times 4 dw 4, 78
+
+times 4 dw 38, -88
+times 4 dw 73, -4
+times 4 dw -67, 90
+times 4 dw -46, -31
+times 4 dw 85, -78
+times 4 dw 13, 61
+times 4 dw -90, 54
+times 4 dw 22, -82
+
+times 4 dw 31, -78
+times 4 dw 90, -61
+times 4 dw 4, 54
+times 4 dw -88, 82
+times 4 dw -38, -22
+times 4 dw 73, -90
+times 4 dw 67, -13
+times 4 dw -46, 85
+
+times 4 dw 22, -61
+times 4 dw 85, -90
+times 4 dw 73, -38
+times 4 dw -4, 46
+times 4 dw -78, 90
+times 4 dw -82, 54
+times 4 dw -13, -31
+times 4 dw 67, -88
+
+times 4 dw 13, -38
+times 4 dw 61, -78
+times 4 dw 88, -90
+times 4 dw 85, -73
+times 4 dw 54, -31
+times 4 dw 4, 22
+times 4 dw -46, 67
+times 4 dw -82, 90
+
+times 4 dw 4, -13
+times 4 dw 22, -31
+times 4 dw 38, -46
+times 4 dw 54, -61
+times 4 dw 67, -73
+times 4 dw 78, -82
+times 4 dw 85, -88
+times 4 dw 90, -90
+
 section .text
 
 ; void ff_hevc_idctHxW_dc_{8,10}_(int16_t *coeffs)
@@ -74,34 +286,565 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp
 RET
 %endmacro
 
-; 8-bit
-INIT_MMX mmxext
-IDCT_DC_NL  4,  8
-IDCT_DC 8,  2,  8
+; IDCT 4x4, expects input in m0, m1
+; %1 - shift
+; %2 - 1/0 - SCALE and Transpose or not
+; %3 - 1/0 add constant or not
+%macro TR_4x4 3
+; interleaves src0 with src2 to m0
+; and src1 with scr3 to m2
+; src0: 00 01 02 03 m0: 00 20 01 21 02 22 03 23
+; src1: 10 11 12 13 -->
+ 

[libav-devel] [PATCH 1/2] hevc: Add SSE2 and AVX IDCT

2016-10-08 Thread Alexandra Hájková
---
Apply the review comments from the last review.

 libavcodec/x86/hevc_idct.asm  |  792 +++--
 libavcodec/x86/hevcdsp_init.c |   62 +++-
 2 files changed, 825 insertions(+), 29 deletions(-)

diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm
index d662aa9..6606d46 100644
--- a/libavcodec/x86/hevc_idct.asm
+++ b/libavcodec/x86/hevc_idct.asm
@@ -2,6 +2,7 @@
 ;* SIMD-optimized IDCT functions for HEVC decoding
 ;* Copyright (c) 2014 Pierre-Edouard LEPERE
 ;* Copyright (c) 2014 James Almer
+;* Copyright (c) 2016 Alexandra Hájková
 ;*
 ;* This file is part of Libav.
 ;*
@@ -22,6 +23,217 @@
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA
+
+pd_64: times 4 dd 64
+pd_2048: times 4 dd 2048
+pd_512: times 4 dd 512
+
+; 4x4 transform coeffs
+cextern pw_64
+pw_64_m64: times 4 dw 64, -64
+pw_83_36: times 4 dw 83, 36
+pw_36_m83: times 4 dw 36, -83
+
+; 8x8 transform coeffs
+pw_89_75: times 4 dw 89, 75
+pw_50_18: times 4 dw 50, 18
+
+pw_75_m18: times 4 dw 75, -18
+pw_m89_m50: times 4 dw -89, -50
+
+pw_50_m89: times 4 dw 50, -89
+pw_18_75: times 4 dw 18, 75
+
+pw_18_m50: times 4 dw 18, -50
+pw_75_m89: times 4 dw 75, -89
+
+; 16x16 transformation coeffs
+trans_coeffs16: times 4 dw 90, 87
+times 4 dw 80, 70
+times 4 dw 57, 43
+times 4 dw 25, 9
+
+times 4 dw 87, 57
+times 4 dw 9, -43
+times 4 dw -80, -90
+times 4 dw -70, -25
+
+times 4 dw 80, 9
+times 4 dw -70, -87
+times 4 dw -25, 57
+times 4 dw 90, 43
+
+times 4 dw 70, -43
+times 4 dw -87, 9
+times 4 dw 90, 25
+times 4 dw -80, -57
+
+times 4 dw 57, -80
+times 4 dw -25, 90
+times 4 dw -9, -87
+times 4 dw 43, 70
+
+times 4 dw 43, -90
+times 4 dw 57, 25
+times 4 dw -87, 70
+times 4 dw 9, -80
+
+times 4 dw 25, -70
+times 4 dw 90, -80
+times 4 dw 43, 9
+times 4 dw -57, 87
+
+times 4 dw 9, -25
+times 4 dw 43, -57
+times 4 dw 70, -80
+times 4 dw 87, -90
+
+; 32x32 transform coeffs
+trans_coeff32: times 8 dw 90
+times 4 dw 88, 85
+times 4 dw 82, 78
+times 4 dw 73, 67
+times 4 dw 61, 54
+times 4 dw 46, 38
+times 4 dw 31, 22
+times 4 dw 13, 4
+
+times 4 dw 90, 82
+times 4 dw 67, 46
+times 4 dw 22, -4
+times 4 dw -31, -54
+times 4 dw -73, -85
+times 4 dw -90, -88
+times 4 dw -78, -61
+times 4 dw -38, -13
+
+times 4 dw 88, 67
+times 4 dw 31, -13
+times 4 dw -54, -82
+times 4 dw -90, -78
+times 4 dw -46, -4
+times 4 dw 38, 73
+times 4 dw 90, 85
+times 4 dw 61, 22
+
+times 4 dw 85, 46
+times 4 dw -13, -67
+times 4 dw -90, -73
+times 4 dw -22, 38
+times 4 dw 82, 88
+times 4 dw 54, -4
+times 4 dw -61, -90
+times 4 dw -78, -31
+
+times 4 dw 82, 22
+times 4 dw -54, -90
+times 4 dw -61, 13
+times 4 dw 78, 85
+times 4 dw 31, -46
+times 4 dw -90, -67
+times 4 dw 4, 73
+times 4 dw 88, 38
+
+times 4 dw 78, -4
+times 4 dw -82, -73
+times 4 dw 13, 85
+times 4 dw 67, -22
+times 4 dw -88, -61
+times 4 dw 31, 90
+times 4 dw 54, -38
+times 4 dw -90, -46
+
+times 4 dw 73, -31
+times 4 dw -90, -22
+times 4 dw 78, 67
+times 4 dw -38, -90
+times 4 dw -13, 82
+times 4 dw 61, -46
+times 4 dw -88, -4
+times 4 dw 85, 54
+
+times 4 dw 67, -54
+times 4 dw -78, 38
+times 4 dw 85, -22
+times 4 dw -90, 4
+times 4 dw 90, 13
+times 4 dw -88, -31
+times 4 dw 82, 46
+times 4 dw -73, -61
+
+times 4 dw 61, -73
+times 4 dw -46, 82
+times 4 dw 31, -88
+times 4 dw -13, 90
+times 4 dw -4, -90
+times 4 dw 22, 85
+times 4 dw -38, -78
+times 4 dw 54, 67
+
+times 4 dw 54, -85
+times 4 dw -4, 88
+times 4 dw -46, -61
+times 4 dw 82, 13
+times 4 dw -90, 38
+times 4 dw 67, -78
+times 4 dw -22, 90
+times 4 dw -31, -73
+
+times 4 dw 46, -90
+times 4 dw 38, 54
+times 4 dw -90, 31
+times 4 dw 61, -88
+times 4 dw 22, 67
+times 4 dw -85, 13
+times 4 dw 73, -82
+times 4 dw 4, 78
+
+times 4 dw 38, -88
+times 4 dw 73, -4
+times 4 dw -67, 90
+times 4 dw -46, -31
+times 4 dw 85, -78
+times 4 dw 13, 61
+times 4 dw -90, 54
+times 4 dw 22, -82
+
+times 4 dw 31, -78
+times 4 dw 90, -61
+times 4 dw 4, 54
+times 4 dw -88, 82
+times 4 dw -38, -22
+times 4 dw 73, -90
+times 4 dw 67, -13
+times 4 dw -46, 85
+
+times 4 dw 22, -61
+times 4 dw 85, -90
+times 4 dw 73, -38
+times 4 dw -4, 46
+times 4 dw -78, 90
+times 4 dw -82, 54
+times 4 dw -13, -31
+times 4 dw 67, -88
+
+times 4 dw 13, -38
+times 4 dw 61, -78
+times 4 dw 88, -90
+times 4 dw 85, -73
+times 4 dw 54, -31
+times 4 dw 4, 22
+times 4 dw -46, 67
+times 4 dw -82, 90
+
+times 4 dw 4, -13
+times 4 dw 22, -31
+times 4 dw 38, -46
+times 4 dw 54, -61
+times 4 dw 67, -73
+times 4 dw 78, -82
+times 4 dw 85, -88
+times 4 dw 90, -90
+
 section .text
 
 ; void ff_hevc_idctHxW_dc_{8,10}_(int16_t *coeffs)
@@ -74,34 +286,572 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp
 RET
 %endmacro
 
-; 8-bit
-INIT_MMX mmxext
-IDCT_DC_NL  4,  8
-IDCT_DC 8,  2,  8
+; IDCT 4x4, expects input in m0, m1
+; %1 - shift
+; %2 - 1/0 - SCALE and Transpose or not
+; %3 - 1/0 add constant or not
+%macro TR_4x4 3
+; interleaves src0 with src2 to m0
+; and src1 with scr3 to m2
+; src0: 00 01 02 03 m0: 00 20 0

Re: [libav-devel] [PATCH 1/2] hevc: Add SSE2 and AVX IDCT

2016-10-07 Thread Alexandra Hájková
On Fri, Oct 7, 2016 at 12:32 AM, Diego Biurrun <di...@biurrun.de> wrote:
> On Wed, Oct 05, 2016 at 02:04:31PM +0200, Alexandra Hájková wrote:
>> --- a/libavcodec/x86/hevc_idct.asm
>> +++ b/libavcodec/x86/hevc_idct.asm
>> @@ -74,34 +286,578 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp
>>
>>  INIT_XMM sse2
>> +IDCT_DC_NL  8,  %1
>> +IDCT_DC16,  4,  %1
>> +IDCT_DC32, 16,  %1
>> +%if %1 == 8
>> +TRANSPOSE_8x8
>> +%if ARCH_X86_64
>> +TRANSPOSE_16x16
>> +TRANSPOSE_32x32
>> +%endif
>> +%endif
>> +%define transpose_8x8 hevc_idct_transpose_8x8_sse2
>> +%if ARCH_X86_64
>> +%define transpose_16x16 hevc_idct_transpose_16x16_sse2
>> +%define transpose_32x32 hevc_idct_transpose_32x32_sse2
>> +IDCT_32x32 %1
>> +IDCT_16x16 %1
>> +%endif
>
> There should be no need to redefine the transpose functions, just call
> the right one with the help of the cpuname macro.

The traspose functions are called by IDCT_size*size macros and the macro itself
is the same for avx and sse2. I think the only way to avoid this
define is to group the init
by SIMD instead of grouping it by bitdepth but what to do with the
bitdepth then?
So I think it would be better to leave the define as it is.

>
>> --- a/libavcodec/x86/hevcdsp_init.c
>> +++ b/libavcodec/x86/hevcdsp_init.c
>> @@ -329,6 +361,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int 
>> bit_depth)
>>  #if HAVE_AVX_EXTERNAL
>>  SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv);
>>  SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv);
>> +
>> +c->idct[0] = ff_hevc_idct_4x4_8_avx;
>> +c->idct[1] = ff_hevc_idct_8x8_8_avx;
>> +c->idct[2] = ff_hevc_idct_16x16_8_avx;
>> +c->idct[3] = ff_hevc_idct_32x32_8_avx;
>>  #endif /* HAVE_AVX_EXTERNAL */
>>  }
>
> Only the parts that are explicitly ifdeffed above within this very file
> should be ifdeffed here. Add these below the ifdef.
>
Ok, will be done.
>> @@ -354,6 +397,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int 
>> bit_depth)
>>  SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h);
>>  SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v);
>>  SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv);
>> +
>> +c->idct[0] = ff_hevc_idct_4x4_10_avx;
>> +c->idct[1] = ff_hevc_idct_8x8_10_avx;
>> +c->idct[2] = ff_hevc_idct_16x16_10_avx;
>> +c->idct[3] = ff_hevc_idct_32x32_10_avx;
>> +
>>  #endif /* HAVE_AVX_EXTERNAL */
>>  }
>>  if (EXTERNAL_AVX2(cpu_flags)) {
>
> same
>
> Diego
> ___
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

  1   2   3   4   >