Re: [libav-devel] [PATCH 14/14] avcodec/cbs_h2645: use AVBufferRef to store list of active parameter sets
On Sat, May 4, 2019 at 2:16 PM Luca Barbato wrote: > > From: James Almer > > Removes unnecessary data copies, and partially fixes potential issues > with dangling references held in said lists. > > Reviewed-by: Mark Thompson > Signed-off-by: James Almer > --- > libavcodec/cbs_h264.h | 2 ++ > libavcodec/cbs_h2645.c | 46 ++ > libavcodec/cbs_h265.h | 3 +++ > 3 files changed, 31 insertions(+), 20 deletions(-) > > diff --git a/libavcodec/cbs_h264.h b/libavcodec/cbs_h264.h > index 5a7dc27698..8e68595614 100644 > --- a/libavcodec/cbs_h264.h > +++ b/libavcodec/cbs_h264.h > @@ -421,6 +421,8 @@ typedef struct CodedBitstreamH264Context { > > // All currently available parameter sets. These are updated when > // any parameter set NAL unit is read/written with this context. > +AVBufferRef *sps_ref[H264_MAX_SPS_COUNT]; > +AVBufferRef *pps_ref[H264_MAX_PPS_COUNT]; > H264RawSPS *sps[H264_MAX_SPS_COUNT]; > H264RawPPS *pps[H264_MAX_PPS_COUNT]; > > diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c > index fab8bb7749..c05b347b1c 100644 > --- a/libavcodec/cbs_h2645.c > +++ b/libavcodec/cbs_h2645.c > @@ -677,9 +677,10 @@ static int > cbs_h2645_split_fragment(CodedBitstreamContext *ctx, > > #define cbs_h2645_replace_ps(h26n, ps_name, ps_var, id_element) \ > static int cbs_h26 ## h26n ## _replace_ ## ps_var(CodedBitstreamContext > *ctx, \ > - const H26 ## h26n ## Raw > ## ps_name *ps_var) \ > + CodedBitstreamUnit *unit) > \ > { \ > CodedBitstreamH26 ## h26n ## Context *priv = ctx->priv_data; \ > +H26 ## h26n ## Raw ## ps_name *ps_var = unit->content; \ > unsigned int id = ps_var->id_element; \ > if (id > FF_ARRAY_ELEMS(priv->ps_var)) { \ > av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid " #ps_name \ > @@ -688,11 +689,16 @@ static int cbs_h26 ## h26n ## _replace_ ## > ps_var(CodedBitstreamContext *ctx, \ > } \ > if (priv->ps_var[id] == priv->active_ ## ps_var) \ > priv->active_ ## ps_var = NULL ; \ > -av_freep(>ps_var[id]); \ > -priv->ps_var[id] = av_malloc(sizeof(*ps_var)); \ > -if (!priv->ps_var[id]) \ > +av_buffer_unref(>ps_var ## _ref[id]); \ > +if (unit->content_ref) \ > +priv->ps_var ## _ref[id] = av_buffer_ref(unit->content_ref); \ > +else \ > +priv->ps_var ## _ref[id] = av_buffer_alloc(sizeof(*ps_var)); \ > +if (!priv->ps_var ## _ref[id]) \ > return AVERROR(ENOMEM); \ > -memcpy(priv->ps_var[id], ps_var, sizeof(*ps_var)); \ > +priv->ps_var[id] = (H26 ## h26n ## Raw ## ps_name *)priv->ps_var ## > _ref[id]->data; \ > +if (!unit->content_ref) \ > +memcpy(priv->ps_var[id], ps_var, sizeof(*ps_var)); \ > return 0; \ > } > > @@ -726,7 +732,7 @@ static int cbs_h264_read_nal_unit(CodedBitstreamContext > *ctx, > if (err < 0) > return err; > > -err = cbs_h264_replace_sps(ctx, sps); > +err = cbs_h264_replace_sps(ctx, unit); > if (err < 0) > return err; > } > @@ -760,7 +766,7 @@ static int cbs_h264_read_nal_unit(CodedBitstreamContext > *ctx, > if (err < 0) > return err; > > -err = cbs_h264_replace_pps(ctx, pps); > +err = cbs_h264_replace_pps(ctx, unit); > if (err < 0) > return err; > } > @@ -873,7 +879,7 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext > *ctx, > if (err < 0) > return err; > > -err = cbs_h265_replace_vps(ctx, vps); > +err = cbs_h265_replace_vps(ctx, unit); > if (err < 0) > return err; > } > @@ -892,7 +898,7 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext > *ctx, > if (err < 0) > return err; > > -err = cbs_h265_replace_sps(ctx, sps); > +err = cbs_h265_replace_sps(ctx, unit); > if (err < 0) > return err; > } > @@ -912,7 +918,7 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext > *ctx, > if (err < 0) > return err; > > -err = cbs_h265_replace_pps(ctx, pps); > +err = cbs_h265_replace_pps(ctx, unit); > if (err < 0) > return err; > } > @@ -1002,7 +1008,7 @@ static int > cbs_h264_write_nal_unit(CodedBitstreamContext *ctx, > if (err < 0) > return err; > > -err = cbs_h264_replace_sps(ctx, sps); > +err = cbs_h264_replace_sps(ctx, unit); > if (err < 0) > return err; > } > @@ -1026,7 +1032,7 @@ static int > cbs_h264_write_nal_unit(CodedBitstreamContext *ctx, > if (err < 0) > return err; > > -
Re: [libav-devel] [PATCH 13/14] cbs_h264: Need [] in the name when subscript is required
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: Haihao Xiang > > Otherwise it will hit an assert in the function > ff_cbs_trace_syntax_element() in cbs.c, line 400. > > Signed-off-by: Haihao Xiang > --- > libavcodec/cbs_h264_syntax_template.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/libavcodec/cbs_h264_syntax_template.c > b/libavcodec/cbs_h264_syntax_template.c > index 35ff85f653..28840f0bfe 100644 > --- a/libavcodec/cbs_h264_syntax_template.c > +++ b/libavcodec/cbs_h264_syntax_template.c > @@ -763,7 +763,7 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, > RWContext *rw, > { > allocate(current->payload.other.data, current->payload_size); > for (i = 0; i < current->payload_size; i++) > -xu(8, payload_byte, current->payload.other.data[i], 0, 255, > 1, i); > +xu(8, payload_byte[i], current->payload.other.data[i], 0, > 255, 1, i); > } > } > > -- > 2.12.2 > > ___ > libav-devel mailing list > libav-devel@libav.org > https://lists.libav.org/mailman/listinfo/libav-devel OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 12/14] cbs: Add support for array subscripts in trace output
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: Mark Thompson > > This makes the trace output for arrays significantly nicer. > --- > libavcodec/cbs.c | 44 ++-- > libavcodec/cbs_h2645.c | 83 --- > libavcodec/cbs_h264_syntax_template.c | 101 +-- > libavcodec/cbs_h265_syntax_template.c | 179 > + > libavcodec/cbs_internal.h | 10 +- > libavcodec/cbs_mpeg2.c | 25 ++--- > libavcodec/cbs_mpeg2_syntax_template.c | 25 ++--- > 7 files changed, 261 insertions(+), 206 deletions(-) > > diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c > index 907471956b..1329caeb78 100644 > --- a/libavcodec/cbs.c > +++ b/libavcodec/cbs.c > @@ -357,17 +357,43 @@ void ff_cbs_trace_header(CodedBitstreamContext *ctx, > } > > void ff_cbs_trace_syntax_element(CodedBitstreamContext *ctx, int position, > - const char *name, const char *bits, > - int64_t value) > + const char *str, const int *subscripts, > + const char *bits, int64_t value) > { > +char name[256]; > size_t name_len, bits_len; > -int pad; > +int pad, subs, i, j, k, n; > > if (!ctx->trace_enable) > return; > > av_assert0(value >= INT_MIN && value <= UINT32_MAX); > > +subs = subscripts ? subscripts[0] : 0; > +n = 0; > +for (i = j = 0; str[i];) { > +if (str[i] == '[') { > +if (n < subs) { > +++n; > +k = snprintf(name + j, sizeof(name) - j, "[%d", > subscripts[n]); > +av_assert0(k > 0 && j + k < sizeof(name)); > +j += k; > +for (++i; str[i] && str[i] != ']'; i++); > +av_assert0(str[i] == ']'); > +} else { > +while (str[i] && str[i] != ']') > +name[j++] = str[i++]; > +av_assert0(str[i] == ']'); > +} > +} else { > +av_assert0(j + 1 < sizeof(name)); > +name[j++] = str[i++]; > +} > +} > +av_assert0(j + 1 < sizeof(name)); > +name[j] = 0; > +av_assert0(n == subs); > + > name_len = strlen(name); > bits_len = strlen(bits); > > @@ -381,7 +407,8 @@ void ff_cbs_trace_syntax_element(CodedBitstreamContext > *ctx, int position, > } > > int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, BitstreamContext *bc, > - int width, const char *name, uint32_t *write_to, > + int width, const char *name, > + const int *subscripts, uint32_t *write_to, > uint32_t range_min, uint32_t range_max) > { > uint32_t value; > @@ -407,7 +434,8 @@ int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, > BitstreamContext *bc, > bits[i] = value >> (width - i - 1) & 1 ? '1' : '0'; > bits[i] = 0; > > -ff_cbs_trace_syntax_element(ctx, position, name, bits, value); > +ff_cbs_trace_syntax_element(ctx, position, name, subscripts, > +bits, value); > } > > if (value < range_min || value > range_max) { > @@ -422,7 +450,8 @@ int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, > BitstreamContext *bc, > } > > int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, PutBitContext *pbc, > - int width, const char *name, uint32_t value, > + int width, const char *name, > + const int *subscripts, uint32_t value, >uint32_t range_min, uint32_t range_max) > { > av_assert0(width <= 32); > @@ -444,7 +473,8 @@ int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, > PutBitContext *pbc, > bits[i] = value >> (width - i - 1) & 1 ? '1' : '0'; > bits[i] = 0; > > -ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc), name, bits, > value); > +ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc), > +name, subscripts, bits, value); > } > > if (width < 32) > diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c > index 0509d6d836..fab8bb7749 100644 > --- a/libavcodec/cbs_h2645.c > +++ b/libavcodec/cbs_h2645.c > @@ -32,7 +32,8 @@ > > > static int cbs_read_ue_golomb(CodedBitstreamContext *ctx, BitstreamContext > *bc, > - const char *name, uint32_t *write_to, > + const char *name, const int *subscripts, > + uint32_t *write_to, >uint32_t range_min, uint32_t range_max) > { > uint32_t value; > @@ -68,7 +69,8 @@ static int cbs_read_ue_golomb(CodedBitstreamContext *ctx, > BitstreamContext *bc, > --value; > > if (ctx->trace_enable) > -
Re: [libav-devel] [PATCH 11/14] cbs_h2645: Simplify representation of fixed values
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: Mark Thompson > > --- > libavcodec/cbs_h2645.c| 5 + > libavcodec/cbs_h264_syntax_template.c | 30 --- > libavcodec/cbs_h265_syntax_template.c | 38 > +++ > 3 files changed, 34 insertions(+), 39 deletions(-) > > diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c > index 8cd6db3fb4..0509d6d836 100644 > --- a/libavcodec/cbs_h2645.c > +++ b/libavcodec/cbs_h2645.c > @@ -239,6 +239,11 @@ static int cbs_write_se_golomb(CodedBitstreamContext > *ctx, PutBitContext *pbc, > #define FUNC_H264(rw, name) FUNC_NAME(rw, h264, name) > #define FUNC_H265(rw, name) FUNC_NAME(rw, h265, name) > > +#define fixed(width, name, value) do { \ > +av_unused uint32_t fixed_value = value; \ > +xu(width, name, fixed_value, value, value); \ > +} while (0) > + > > #define READ > #define READWRITE read > diff --git a/libavcodec/cbs_h264_syntax_template.c > b/libavcodec/cbs_h264_syntax_template.c > index 92c1b67862..82d9d23200 100644 > --- a/libavcodec/cbs_h264_syntax_template.c > +++ b/libavcodec/cbs_h264_syntax_template.c > @@ -19,10 +19,10 @@ > static int FUNC(rbsp_trailing_bits)(CodedBitstreamContext *ctx, RWContext > *rw) > { > int err; > -av_unused int one = 1, zero = 0; > -xu(1, rbsp_stop_one_bit, one, 1, 1); > + > +fixed(1, rbsp_stop_one_bit, 1); > while (byte_alignment(rw) != 0) > -xu(1, rbsp_alignment_zero_bit, zero, 0, 0); > +fixed(1, rbsp_alignment_zero_bit, 0); > > return 0; > } > @@ -740,9 +740,8 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, > RWContext *rw, > break; > case H264_SEI_TYPE_FILLER_PAYLOAD: > { > -av_unused int ff_byte = 0xff; > for (i = 0; i < current->payload_size; i++) > -xu(8, ff_byte, ff_byte, 0xff, 0xff); > +fixed(8, ff_byte, 0xff); > } > break; > case H264_SEI_TYPE_USER_DATA_REGISTERED: > @@ -770,10 +769,9 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, > RWContext *rw, > } > > if (byte_alignment(rw)) { > -av_unused int one = 1, zero = 0; > -xu(1, bit_equal_to_one, one, 1, 1); > +fixed(1, bit_equal_to_one, 1); > while (byte_alignment(rw)) > -xu(1, bit_equal_to_zero, zero, 0, 0); > +fixed(1, bit_equal_to_zero, 0); > } > > #ifdef READ > @@ -810,14 +808,14 @@ static int FUNC(sei)(CodedBitstreamContext *ctx, > RWContext *rw, > uint32_t tmp; > > while (bitstream_peek(rw, 8) == 0xff) { > -xu(8, ff_byte, tmp, 0xff, 0xff); > +fixed(8, ff_byte, 0xff); > payload_type += 255; > } > xu(8, last_payload_type_byte, tmp, 0, 254); > payload_type += tmp; > > while (bitstream_peek(rw, 8) == 0xff) { > -xu(8, ff_byte, tmp, 0xff, 0xff); > +fixed(8, ff_byte, 0xff); > payload_size += 255; > } > xu(8, last_payload_size_byte, tmp, 0, 254); > @@ -853,14 +851,14 @@ static int FUNC(sei)(CodedBitstreamContext *ctx, > RWContext *rw, > > tmp = current->payload[k].payload_type; > while (tmp >= 255) { > -xu(8, ff_byte, 0xff, 0xff, 0xff); > +fixed(8, ff_byte, 0xff); > tmp -= 255; > } > xu(8, last_payload_type_byte, tmp, 0, 254); > > tmp = current->payload[k].payload_size; > while (tmp >= 255) { > -xu(8, ff_byte, 0xff, 0xff, 0xff); > +fixed(8, ff_byte, 0xff); > tmp -= 255; > } > xu(8, last_payload_size_byte, tmp, 0, 254); > @@ -1240,9 +1238,8 @@ static int FUNC(slice_header)(CodedBitstreamContext > *ctx, RWContext *rw, > } > > if (pps->entropy_coding_mode_flag) { > -av_unused int one = 1; > while (byte_alignment(rw)) > -xu(1, cabac_alignment_one_bit, one, 1, 1); > +fixed(1, cabac_alignment_one_bit, 1); > } > > return 0; > @@ -1251,7 +1248,6 @@ static int FUNC(slice_header)(CodedBitstreamContext > *ctx, RWContext *rw, > static int FUNC(filler)(CodedBitstreamContext *ctx, RWContext *rw, > H264RawFiller *current) > { > -av_unused int ff_byte = 0xff; > int err; > > HEADER("Filler Data"); > @@ -1261,14 +1257,14 @@ static int FUNC(filler)(CodedBitstreamContext *ctx, > RWContext *rw, > > #ifdef READ > while (bitstream_peek(rw, 8) == 0xff) { > -xu(8, ff_byte, ff_byte, 0xff, 0xff); > +fixed(8, ff_byte, 0xff); > ++current->filler_size; > } > #else > { > uint32_t i; > for (i = 0; i < current->filler_size; i++) > -xu(8, ff_byte, ff_byte, 0xff, 0xff); > +fixed(8, ff_byte, 0xff); > } > #endif > >
Re: [libav-devel] [PATCH 09/14] avcodec/cbs_mpeg2: create a reference to the existing buffer when decomposing slice units
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: James Almer > > Reviewed-by: Mark Thompson > Signed-off-by: James Almer > --- > libavcodec/cbs_mpeg2.c | 9 ++--- > 1 file changed, 2 insertions(+), 7 deletions(-) > > diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c > index 8974bcabac..d65d84bd3d 100644 > --- a/libavcodec/cbs_mpeg2.c > +++ b/libavcodec/cbs_mpeg2.c > @@ -191,16 +191,11 @@ static int cbs_mpeg2_read_unit(CodedBitstreamContext > *ctx, > len = unit->data_size; > > slice->data_size = len - pos / 8; > -slice->data_ref = av_buffer_alloc(slice->data_size + > - AV_INPUT_BUFFER_PADDING_SIZE); > +slice->data_ref = av_buffer_ref(unit->data_ref); > if (!slice->data_ref) > return AVERROR(ENOMEM); > -slice->data = slice->data_ref->data; > +slice->data = unit->data + pos / 8; > > -memcpy(slice->data, > - unit->data + pos / 8, slice->data_size); > -memset(slice->data + slice->data_size, 0, > - AV_INPUT_BUFFER_PADDING_SIZE); > slice->data_bit_start = pos % 8; > > } else { > -- > 2.12.2 > > ___ > libav-devel mailing list > libav-devel@libav.org > https://lists.libav.org/mailman/listinfo/libav-devel OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 10/14] cbs: Fragment/unit data is always reference counted
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: Mark Thompson > > Make this clear in the documentation and add some asserts to ensure > that it is always true. > --- > libavcodec/cbs.c | 19 --- > libavcodec/cbs.h | 10 ++ > 2 files changed, 18 insertions(+), 11 deletions(-) > > diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c > index dc8d94eedc..907471956b 100644 > --- a/libavcodec/cbs.c > +++ b/libavcodec/cbs.c > @@ -140,26 +140,30 @@ static int > cbs_read_fragment_content(CodedBitstreamContext *ctx, > int err, i, j; > > for (i = 0; i < frag->nb_units; i++) { > +CodedBitstreamUnit *unit = >units[i]; > + > if (ctx->decompose_unit_types) { > for (j = 0; j < ctx->nb_decompose_unit_types; j++) { > -if (ctx->decompose_unit_types[j] == frag->units[i].type) > +if (ctx->decompose_unit_types[j] == unit->type) > break; > } > if (j >= ctx->nb_decompose_unit_types) > continue; > } > > -av_buffer_unref(>units[i].content_ref); > -frag->units[i].content = NULL; > +av_buffer_unref(>content_ref); > +unit->content = NULL; > + > +av_assert0(unit->data && unit->data_ref); > > -err = ctx->codec->read_unit(ctx, >units[i]); > +err = ctx->codec->read_unit(ctx, unit); > if (err == AVERROR(ENOSYS)) { > av_log(ctx->log_ctx, AV_LOG_VERBOSE, > "Decomposition unimplemented for unit %d " > - "(type %"PRIu32").\n", i, frag->units[i].type); > + "(type %"PRIu32").\n", i, unit->type); > } else if (err < 0) { > av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to read unit %d " > - "(type %"PRIu32").\n", i, frag->units[i].type); > + "(type %"PRIu32").\n", i, unit->type); > return err; > } > } > @@ -278,6 +282,7 @@ int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx, > "(type %"PRIu32").\n", i, unit->type); > return err; > } > +av_assert0(unit->data && unit->data_ref); > } > > av_buffer_unref(>data_ref); > @@ -288,6 +293,7 @@ int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx, > av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to assemble fragment.\n"); > return err; > } > +av_assert0(frag->data && frag->data_ref); > > return 0; > } > @@ -328,7 +334,6 @@ int ff_cbs_write_packet(CodedBitstreamContext *ctx, > if (err < 0) > return err; > > -av_assert0(frag->data_ref); > buf = av_buffer_ref(frag->data_ref); > if (!buf) > return AVERROR(ENOMEM); > diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h > index 1f26be7b36..c38edc539a 100644 > --- a/libavcodec/cbs.h > +++ b/libavcodec/cbs.h > @@ -84,8 +84,9 @@ typedef struct CodedBitstreamUnit { > */ > size_t data_bit_padding; > /** > - * If data is reference counted, a reference to the buffer containing > - * data. Null if data is not reference counted. > + * A reference to the buffer containing data. > + * > + * Must be set if data is not NULL. > */ > AVBufferRef *data_ref; > > @@ -130,8 +131,9 @@ typedef struct CodedBitstreamFragment { > */ > size_t data_bit_padding; > /** > - * If data is reference counted, a reference to the buffer containing > - * data. Null if data is not reference counted. > + * A reference to the buffer containing data. > + * > + * Must be set if data is not NULL. > */ > AVBufferRef *data_ref; > > -- > 2.12.2 > > ___ > libav-devel mailing list > libav-devel@libav.org > https://lists.libav.org/mailman/listinfo/libav-devel OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 08/14] avcodec/cbs_h2645: create a reference to the existing buffer when decomposing slice units
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: James Almer > > Reviewed-by: Mark Thompson > Signed-off-by: James Almer > --- > libavcodec/cbs_h2645.c | 18 -- > 1 file changed, 4 insertions(+), 14 deletions(-) > > diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c > index bf16343db5..8cd6db3fb4 100644 > --- a/libavcodec/cbs_h2645.c > +++ b/libavcodec/cbs_h2645.c > @@ -776,15 +776,10 @@ static int cbs_h264_read_nal_unit(CodedBitstreamContext > *ctx, > } > > slice->data_size = len - pos / 8; > -slice->data_ref = av_buffer_alloc(slice->data_size + > - AV_INPUT_BUFFER_PADDING_SIZE); > +slice->data_ref = av_buffer_ref(unit->data_ref); > if (!slice->data_ref) > return AVERROR(ENOMEM); > -slice->data = slice->data_ref->data; > -memcpy(slice->data, > - unit->data + pos / 8, slice->data_size); > -memset(slice->data + slice->data_size, 0, > - AV_INPUT_BUFFER_PADDING_SIZE); > +slice->data = unit->data + pos / 8; > slice->data_bit_start = pos % 8; > } > break; > @@ -946,15 +941,10 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext > *ctx, > } > > slice->data_size = len - pos / 8; > -slice->data_ref = av_buffer_alloc(slice->data_size + > - AV_INPUT_BUFFER_PADDING_SIZE); > +slice->data_ref = av_buffer_ref(unit->data_ref); > if (!slice->data_ref) > return AVERROR(ENOMEM); > -slice->data = slice->data_ref->data; > -memcpy(slice->data, > - unit->data + pos / 8, slice->data_size); > -memset(slice->data + slice->data_size, 0, > - AV_INPUT_BUFFER_PADDING_SIZE); > +slice->data = unit->data + pos / 8; > slice->data_bit_start = pos % 8; > } > break; > -- > 2.12.2 > OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 07/14] avcodec/cbs_mpeg2: use existing buffer reference when splitting fragments
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: James Almer > > Reviewed-by: Mark Thompson > Signed-off-by: James Almer > --- > libavcodec/cbs_mpeg2.c | 12 +++- > 1 file changed, 3 insertions(+), 9 deletions(-) > > diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c > index 7f484267fa..8974bcabac 100644 > --- a/libavcodec/cbs_mpeg2.c > +++ b/libavcodec/cbs_mpeg2.c > @@ -146,18 +146,12 @@ static int > cbs_mpeg2_split_fragment(CodedBitstreamContext *ctx, > unit_size = (end - 4) - (start - 1); > } > > -unit_data = av_malloc(unit_size + AV_INPUT_BUFFER_PADDING_SIZE); > -if (!unit_data) > -return AVERROR(ENOMEM); > -memcpy(unit_data, start - 1, unit_size); > -memset(unit_data + unit_size, 0, AV_INPUT_BUFFER_PADDING_SIZE); > +unit_data = (uint8_t *)start - 1; > > err = ff_cbs_insert_unit_data(ctx, frag, i, unit_type, > - unit_data, unit_size, NULL); > -if (err < 0) { > -av_freep(_data); > + unit_data, unit_size, frag->data_ref); > +if (err < 0) > return err; > -} > > if (end == frag->data + frag->data_size) > break; > -- > 2.12.2 > OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 06/14] avcodec/cbs_mpeg2: use memcpy when assembling fragments
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: James Almer > > before: > 419022 decicycles in assemble_fragment,2047 runs, 1 skips > > after: > 104621 decicycles in assemble_fragment,2045 runs, 3 skips > > Benched with a 2 minutes long 720x480 DVD mpeg2 sample. > > Reviewed-by: Michael Niedermayer > Signed-off-by: James Almer > --- > libavcodec/cbs_mpeg2.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c > index 7fa13c64fa..7f484267fa 100644 > --- a/libavcodec/cbs_mpeg2.c > +++ b/libavcodec/cbs_mpeg2.c > @@ -362,7 +362,7 @@ static int > cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx, > CodedBitstreamFragment *frag) > { > uint8_t *data; > -size_t size, dp, sp; > +size_t size, dp; > int i; > > size = 0; > @@ -382,8 +382,8 @@ static int > cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx, > data[dp++] = 0; > data[dp++] = 1; > > -for (sp = 0; sp < unit->data_size; sp++) > -data[dp++] = unit->data[sp]; > +memcpy(data + dp, unit->data, unit->data_size); > +dp += unit->data_size; > } > > av_assert0(dp == size); > -- > 2.12.2 > OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 04/14] avcodec/cbs: use a reference to the assembled CodedBitstreamFragment buffer when writing packets
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: James Almer > > This saves one malloc + memcpy per packet > > The CodedBitstreamFragment buffer is padded to follow the requirements > of AVPacket. > > Reviewed-by: jkqxz > Signed-off-by: James Almer > --- > libavcodec/cbs.c | 12 > libavcodec/cbs_h2645.c | 8 +--- > libavcodec/cbs_mpeg2.c | 3 ++- > 3 files changed, 15 insertions(+), 8 deletions(-) > > diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c > index c5c5b98d24..dc8d94eedc 100644 > --- a/libavcodec/cbs.c > +++ b/libavcodec/cbs.c > @@ -321,17 +321,21 @@ int ff_cbs_write_packet(CodedBitstreamContext *ctx, > AVPacket *pkt, > CodedBitstreamFragment *frag) > { > +AVBufferRef *buf; > int err; > > err = ff_cbs_write_fragment_data(ctx, frag); > if (err < 0) > return err; > > -err = av_new_packet(pkt, frag->data_size); > -if (err < 0) > -return err; > +av_assert0(frag->data_ref); > +buf = av_buffer_ref(frag->data_ref); > +if (!buf) > +return AVERROR(ENOMEM); > > -memcpy(pkt->data, frag->data, frag->data_size); > +av_init_packet(pkt); > +pkt->buf = buf; > +pkt->data = frag->data; > pkt->size = frag->data_size; > > return 0; > diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c > index d6131a13e5..bf16343db5 100644 > --- a/libavcodec/cbs_h2645.c > +++ b/libavcodec/cbs_h2645.c > @@ -1298,7 +1298,7 @@ static int > cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx, > max_size += 3 + frag->units[i].data_size * 3 / 2; > } > > -data = av_malloc(max_size); > +data = av_malloc(max_size + AV_INPUT_BUFFER_PADDING_SIZE); > if (!data) > return AVERROR(ENOMEM); > > @@ -1349,11 +1349,13 @@ static int > cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx, > } > > av_assert0(dp <= max_size); > -err = av_reallocp(, dp); > +err = av_reallocp(, dp + AV_INPUT_BUFFER_PADDING_SIZE); > if (err) > return err; > +memset(data + dp, 0, AV_INPUT_BUFFER_PADDING_SIZE); > > -frag->data_ref = av_buffer_create(data, dp, NULL, NULL, 0); > +frag->data_ref = av_buffer_create(data, dp + > AV_INPUT_BUFFER_PADDING_SIZE, > + NULL, NULL, 0); > if (!frag->data_ref) { > av_freep(); > return AVERROR(ENOMEM); > diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c > index 3db10c5152..7fa13c64fa 100644 > --- a/libavcodec/cbs_mpeg2.c > +++ b/libavcodec/cbs_mpeg2.c > @@ -369,7 +369,7 @@ static int > cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx, > for (i = 0; i < frag->nb_units; i++) > size += 3 + frag->units[i].data_size; > > -frag->data_ref = av_buffer_alloc(size); > +frag->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE); > if (!frag->data_ref) > return AVERROR(ENOMEM); > data = frag->data_ref->data; > @@ -388,6 +388,7 @@ static int > cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx, > > av_assert0(dp == size); > > +memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); > frag->data = data; > frag->data_size = size; > > -- > 2.12.2 > OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 05/14] cbs_h265: Use helper macro for maximum values of fixed-width elements
On Sat, May 4, 2019 at 2:15 PM Luca Barbato wrote: > > From: Mark Thompson > > Apply the same logic as the previous patch to H.265. There are no cases > which currently overflow here, but this is still more consistent. > --- > libavcodec/cbs_h265_syntax_template.c | 16 > 1 file changed, 8 insertions(+), 8 deletions(-) > > diff --git a/libavcodec/cbs_h265_syntax_template.c > b/libavcodec/cbs_h265_syntax_template.c > index a194887d76..20b0479891 100644 > --- a/libavcodec/cbs_h265_syntax_template.c > +++ b/libavcodec/cbs_h265_syntax_template.c > @@ -665,7 +665,7 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext > *ctx, RWContext *rw, >: current->bit_depth_chroma_minus8 > + 8; > for (i = 0; i <= > current->sps_num_palette_predictor_initializer_minus1; i++) > u(bit_depth, sps_palette_predictor_initializers[comp][i], > - 0, (1 << bit_depth) - 1); > + 0, MAX_UINT_BITS(bit_depth)); > } > } > } > @@ -827,7 +827,7 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, > RWContext *rw, > for (i = 0; i < current->num_long_term_ref_pics_sps; i++) { > u(current->log2_max_pic_order_cnt_lsb_minus4 + 4, >lt_ref_pic_poc_lsb_sps[i], > - 0, (1 << (current->log2_max_pic_order_cnt_lsb_minus4 + 4)) - > 1); > + 0, MAX_UINT_BITS(current->log2_max_pic_order_cnt_lsb_minus4 + > 4)); > flag(used_by_curr_pic_lt_sps_flag[i]); > } > } > @@ -845,7 +845,7 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, > RWContext *rw, > flag(sps_multilayer_extension_flag); > flag(sps_3d_extension_flag); > flag(sps_scc_extension_flag); > -u(4, sps_extension_4bits, 0, (1 << 4) - 1); > +u(4, sps_extension_4bits, 0, MAX_UINT_BITS(4)); > } > > if (current->sps_range_extension_flag) > @@ -925,7 +925,7 @@ static int FUNC(pps_scc_extension)(CodedBitstreamContext > *ctx, RWContext *rw, >: > current->chroma_bit_depth_entry_minus8 + 8; > for (i = 0; i < > current->pps_num_palette_predictor_initializer; i++) > u(bit_depth, pps_palette_predictor_initializers[comp][i], > - 0, (1 << bit_depth) - 1); > + 0, MAX_UINT_BITS(bit_depth)); > } > } > } > @@ -1038,7 +1038,7 @@ static int FUNC(pps)(CodedBitstreamContext *ctx, > RWContext *rw, > flag(pps_multilayer_extension_flag); > flag(pps_3d_extension_flag); > flag(pps_scc_extension_flag); > -u(4, pps_extension_4bits, 0, (1 << 4) - 1); > +u(4, pps_extension_4bits, 0, MAX_UINT_BITS(4)); > } > if (current->pps_range_extension_flag) > CHECK(FUNC(pps_range_extension)(ctx, rw, current)); > @@ -1274,7 +1274,7 @@ static int > FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw, > const H265RawSTRefPicSet *rps; > > u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, > slice_pic_order_cnt_lsb, > - 0, (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1); > + 0, MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4)); > > flag(short_term_ref_pic_set_sps_flag); > if (!current->short_term_ref_pic_set_sps_flag) { > @@ -1321,7 +1321,7 @@ static int > FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw, > ++num_pic_total_curr; > } else { > u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, > poc_lsb_lt[i], > - 0, (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + > 4)) - 1); > + 0, > MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4)); > flag(used_by_curr_pic_lt_flag[i]); > if (current->used_by_curr_pic_lt_flag[i]) > ++num_pic_total_curr; > @@ -1487,7 +1487,7 @@ static int > FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw, > ue(offset_len_minus1, 0, 31); > for (i = 0; i < current->num_entry_point_offsets; i++) > u(current->offset_len_minus1 + 1, > entry_point_offset_minus1[i], > - 0, (1 << (current->offset_len_minus1 + 1)) - 1); > + 0, MAX_UINT_BITS(current->offset_len_minus1 + 1)); > } > } > > -- > 2.12.2 > OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 03/14] cbs: Add a table of all supported codec IDs
On Sat, May 4, 2019 at 2:14 PM Luca Barbato wrote: > > From: Mark Thompson > > Use it as the set of codec IDs supported by the trace_headers BSF. > --- > configure | 2 +- > libavcodec/cbs.c | 13 + > libavcodec/cbs.h | 8 > libavcodec/trace_headers_bsf.c | 9 + > 4 files changed, 23 insertions(+), 9 deletions(-) > > diff --git a/configure b/configure > index 8c46a870c4..7b5df4df29 100755 > --- a/configure > +++ b/configure > @@ -2365,7 +2365,7 @@ h264_redundant_pps_bsf_select="cbs_h264" > hevc_metadata_bsf_select="cbs_h265" > mjpeg2jpeg_bsf_select="jpegtables" > mpeg2_metadata_bsf_select="cbs_mpeg2" > -trace_headers_bsf_select="cbs_h264 cbs_h265 cbs_mpeg2" > +trace_headers_bsf_select="cbs" > > # external libraries > avisynth_deps="LoadLibrary" > diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c > index 04ad2dfc41..c5c5b98d24 100644 > --- a/libavcodec/cbs.c > +++ b/libavcodec/cbs.c > @@ -40,6 +40,19 @@ static const CodedBitstreamType *cbs_type_table[] = { > #endif > }; > > +const enum AVCodecID ff_cbs_all_codec_ids[] = { > +#if CONFIG_CBS_H264 > +AV_CODEC_ID_H264, > +#endif > +#if CONFIG_CBS_H265 > +AV_CODEC_ID_HEVC, > +#endif > +#if CONFIG_CBS_MPEG2 > +AV_CODEC_ID_MPEG2VIDEO, > +#endif > +AV_CODEC_ID_NONE > +}; > + > int ff_cbs_init(CodedBitstreamContext **ctx_ptr, > enum AVCodecID codec_id, void *log_ctx) > { > diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h > index 6505386708..1f26be7b36 100644 > --- a/libavcodec/cbs.h > +++ b/libavcodec/cbs.h > @@ -202,6 +202,14 @@ typedef struct CodedBitstreamContext { > > > /** > + * Table of all supported codec IDs. > + * > + * Terminated by AV_CODEC_ID_NONE. > + */ > +extern const enum AVCodecID ff_cbs_all_codec_ids[]; > + > + > +/** > * Create and initialise a new context for the given codec. > */ > int ff_cbs_init(CodedBitstreamContext **ctx, > diff --git a/libavcodec/trace_headers_bsf.c b/libavcodec/trace_headers_bsf.c > index 9c97dd4cea..4494bd4553 100644 > --- a/libavcodec/trace_headers_bsf.c > +++ b/libavcodec/trace_headers_bsf.c > @@ -109,18 +109,11 @@ static int trace_headers(AVBSFContext *bsf, AVPacket > *out) > return 0; > } > > -static const enum AVCodecID trace_headers_codec_ids[] = { > -AV_CODEC_ID_H264, > -AV_CODEC_ID_HEVC, > -AV_CODEC_ID_MPEG2VIDEO, > -AV_CODEC_ID_NONE, > -}; > - > const AVBitStreamFilter ff_trace_headers_bsf = { > .name = "trace_headers", > .priv_data_size = sizeof(TraceHeadersContext), > .init = _headers_init, > .close = _headers_close, > .filter = _headers, > -.codec_ids = trace_headers_codec_ids, > +.codec_ids = ff_cbs_all_codec_ids, > }; > -- > 2.12.2 > OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 02/14] cbs_h264: Fix overflow in shifts
On Sat, May 4, 2019 at 2:14 PM Luca Barbato wrote: > > From: Mark Thompson > > The type of the result of a shift operation is unaffected by the type of > the right operand, so some existing code overflows with undefined behaviour > when the element length is 32. Add a helper macro to calculate the maximum > value correctly and then use it everywhere this pattern appears. > > Found-by: Andreas Rheinhardt > --- > libavcodec/cbs_h264_syntax_template.c | 22 +++--- > libavcodec/cbs_internal.h | 4 > 2 files changed, 15 insertions(+), 11 deletions(-) > > diff --git a/libavcodec/cbs_h264_syntax_template.c > b/libavcodec/cbs_h264_syntax_template.c > index 1aa7888584..92c1b67862 100644 > --- a/libavcodec/cbs_h264_syntax_template.c > +++ b/libavcodec/cbs_h264_syntax_template.c > @@ -342,8 +342,8 @@ static int FUNC(sps_extension)(CodedBitstreamContext > *ctx, RWContext *rw, > flag(alpha_incr_flag); > > bits = current->bit_depth_aux_minus8 + 9; > -u(bits, alpha_opaque_value, 0, (1 << bits) - 1); > -u(bits, alpha_transparent_value, 0, (1 << bits) - 1); > +u(bits, alpha_opaque_value, 0, MAX_UINT_BITS(bits)); > +u(bits, alpha_transparent_value, 0, MAX_UINT_BITS(bits)); > } > > flag(additional_extension_flag); > @@ -483,10 +483,10 @@ static int > FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw, > length = > sps->vui.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1 + 1; > xu(length, initial_cpb_removal_delay[SchedSelIdx], > current->nal.initial_cpb_removal_delay[i], > - 0, (1 << (uint64_t)length) - 1); > + 1, MAX_UINT_BITS(length)); > xu(length, initial_cpb_removal_delay_offset[SchedSelIdx], > current->nal.initial_cpb_removal_delay_offset[i], > - 0, (1 << (uint64_t)length) - 1); > + 0, MAX_UINT_BITS(length)); > } > } > > @@ -495,10 +495,10 @@ static int > FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw, > length = > sps->vui.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1 + 1; > xu(length, initial_cpb_removal_delay[SchedSelIdx], > current->vcl.initial_cpb_removal_delay[i], > - 0, (1 << (uint64_t)length) - 1); > + 1, MAX_UINT_BITS(length)); > xu(length, initial_cpb_removal_delay_offset[SchedSelIdx], > current->vcl.initial_cpb_removal_delay_offset[i], > - 0, (1 << (uint64_t)length) - 1); > + 0, MAX_UINT_BITS(length)); > } > } > > @@ -548,7 +548,7 @@ static int FUNC(sei_pic_timestamp)(CodedBitstreamContext > *ctx, RWContext *rw, > > if (time_offset_length > 0) > u(time_offset_length, time_offset, > - 0, (1 << (uint64_t)time_offset_length) - 1); > + 0, MAX_UINT_BITS(time_offset_length)); > else > infer(time_offset, 0); > > @@ -600,9 +600,9 @@ static int FUNC(sei_pic_timing)(CodedBitstreamContext > *ctx, RWContext *rw, > } > > u(hrd->cpb_removal_delay_length_minus1 + 1, cpb_removal_delay, > - 0, (1 << (uint64_t)hrd->cpb_removal_delay_length_minus1) + 1); > + 0, MAX_UINT_BITS(hrd->cpb_removal_delay_length_minus1 + 1)); > u(hrd->dpb_output_delay_length_minus1 + 1, dpb_output_delay, > - 0, (1 << (uint64_t)hrd->dpb_output_delay_length_minus1) + 1); > + 0, MAX_UINT_BITS(hrd->dpb_output_delay_length_minus1 + 1)); > } > > if (sps->vui.pic_struct_present_flag) { > @@ -1123,7 +1123,7 @@ static int FUNC(slice_header)(CodedBitstreamContext > *ctx, RWContext *rw, > u(2, colour_plane_id, 0, 2); > > u(sps->log2_max_frame_num_minus4 + 4, frame_num, > - 0, (1 << (sps->log2_max_frame_num_minus4 + 4)) - 1); > + 0, MAX_UINT_BITS(sps->log2_max_frame_num_minus4 + 4)); > > if (!sps->frame_mbs_only_flag) { > flag(field_pic_flag); > @@ -1141,7 +1141,7 @@ static int FUNC(slice_header)(CodedBitstreamContext > *ctx, RWContext *rw, > > if (sps->pic_order_cnt_type == 0) { > u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, pic_order_cnt_lsb, > - 0, (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1); > + 0, MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4)); > if (pps->bottom_field_pic_order_in_frame_present_flag && > !current->field_pic_flag) > se(delta_pic_order_cnt_bottom, INT32_MIN + 1, INT32_MAX); > diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h > index 4c6f421d19..54265d8e0e 100644 > --- a/libavcodec/cbs_internal.h > +++ b/libavcodec/cbs_internal.h > @@ -79,6 +79,10 @@ int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, > PutBitContext *pbc, >int width, const char *name, uint32_t value, >
Re: [libav-devel] [PATCH 01/14] maint: Ignore dot dirs
On Sat, May 4, 2019 at 2:14 PM Luca Barbato wrote: > > They are usually created by tools and editors. > --- > .gitignore | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/.gitignore b/.gitignore > index 1a08fd15c5..693fa5636a 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -29,3 +29,4 @@ > /coverage.info > /lcov/ > /mapfile > +.*/ > -- > 2.12.2 > OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] hevc: Add hevc_get_pixel_4/8/12/16/24/32/48/64
Checkasm timings: block size bitdepth C NEON 4 8 bit:146.7 48.7 10 bit:146.7 52.7 8 8 bit:430.3 84.4 10 bit:430.4 119.5 12 8 bit:812.8 141.0 10 bit:812.8 195.0 16 8 bit: 1499.1 268.0 10 bit: 1498.9 368.4 24 8 bit: 4394.2 574.8 10 bit: 3696.3 804.8 32 8 bit: 5108.6 568.9 10 bit: 4249.6 918.8 48 8 bit: 16819.6 2304.9 10 bit: 13882.0 3178.5 64 8 bit: 13490.8 1799.5 10 bit: 11018.5 2519.4 --- libavcodec/arm/Makefile | 3 +- libavcodec/arm/hevc_mc.S | 381 ++ libavcodec/arm/hevcdsp_init_arm.c | 67 +++ 3 files changed, 450 insertions(+), 1 deletion(-) create mode 100644 libavcodec/arm/hevc_mc.S diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index b48745ad4..49e17ce0d 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -135,7 +135,8 @@ NEON-OBJS-$(CONFIG_AAC_DECODER)+= arm/aacpsdsp_neon.o \ NEON-OBJS-$(CONFIG_APE_DECODER)+= arm/apedsp_neon.o NEON-OBJS-$(CONFIG_DCA_DECODER)+= arm/dcadsp_neon.o \ arm/synth_filter_neon.o -NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevc_idct.o +NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevc_idct.o \ + arm/hevc_mc.o NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o\ arm/rv40dsp_neon.o diff --git a/libavcodec/arm/hevc_mc.S b/libavcodec/arm/hevc_mc.S new file mode 100644 index 0..a1274ec71 --- /dev/null +++ b/libavcodec/arm/hevc_mc.S @@ -0,0 +1,381 @@ +/* + * ARM NEON optimised MC functions for HEVC decoding + * + * Copyright (c) 2017 Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +.macro get_pixels4 bitdepth +function ff_hevc_get_pixels_4_\bitdepth\()_neon, export=1 +@r0 dst, r1 dststride, r2 src, r3 srcstride +ldr r12, [sp] @height +cmp r12, #0 +bxeqlr + +1: .if \bitdepth == 8 +vld1.32 {d0[0]}, [r2], r3 +vld1.32 {d1[0]}, [r2], r3 +vld1.32 {d2[0]}, [r2], r3 +vld1.32 {d3[0]}, [r2], r3 +vshll.u8q8, d0, #6 +vshll.u8q9, d1, #6 +vshll.u8q10, d2, #6 +vshll.u8q11, d3, #6 + .else +vld1.16 {d0}, [r2], r3 +vld1.16 {d1}, [r2], r3 +vld1.16 {d2}, [r2], r3 +vld1.16 {d3}, [r2], r3 +vshl.i16d16, d0, #4 +vshl.i16d18, d1, #4 +vshl.i16d20, d2, #4 +vshl.i16d22, d3, #4 + .endif + +vst1.16 {d16}, [r0, :64], r1 +vst1.16 {d18}, [r0, :64], r1 +vst1.16 {d20}, [r0, :64], r1 +vst1.16 {d22}, [r0, :64], r1 +subsr12, #4 +bgt 1b + +bx lr +endfunc +.endm + +.macro get_pixels8 bitdepth +function ff_hevc_get_pixels_8_\bitdepth\()_neon, export=1 +@r0 dst, r1 dststride, r2 src, r3 srcstride +ldr r12, [sp] @height +cmp r12, #0 +bxeqlr + +1: .if \bitdepth == 8 +vld1.8 {d0}, [r2], r3 +vld1.8 {d1}, [r2], r3 +vld1.8 {d2}, [r2], r3 +vld1.8 {d3}, [r2], r3 +vshll.u8q8, d0, #6 +vshll.u8q9, d1, #6 +vshll.u8q10, d2, #6 +vshll.u8q11, d3, #6 + .else +vld1.16 {d16-d17}, [r2], r3 +vld1.16 {d18-d19}, [r2], r3 +vld1.16 {d20-d21}, [r2], r3 +vld1.16 {d22-d23}, [r2], r3 +vshl.i16q8, q8, #4 +vshl.i16q9, q9, #4 +vshl.i16q10, q10, #4 +vshl.i16q11, q11, #4 + .endif + +vst1.16 {d16-d17}, [r0
Re: [libav-devel] [PATCH v2] avconv.c: fix calculation of input file duration in seek_to_start()
On Mon, Oct 30, 2017 at 12:25 PM, Peter Großewrote: > Fixes looping files without audio or when using stream_copy, where > ist->nb_samples is not set since no decoding is done. > --- > avtools/avconv.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/avtools/avconv.c b/avtools/avconv.c > index 4e3ffecdef..cee7a7b452 100644 > --- a/avtools/avconv.c > +++ b/avtools/avconv.c > @@ -2553,9 +2553,9 @@ static int seek_to_start(InputFile *ifile, > AVFormatContext *is) > continue; > } else { > if (ist->framerate.num) { > -duration = av_rescale_q(1, ist->framerate, > ist->st->time_base); > +duration = FFMAX(av_rescale_q(1, av_inv_q(ist->framerate), > ist->st->time_base), 1); > } else if (ist->st->avg_frame_rate.num) { > -duration = av_rescale_q(1, ist->st->avg_frame_rate, > ist->st->time_base); > +duration = FFMAX(av_rescale_q(1, > av_inv_q(ist->st->avg_frame_rate), ist->st->time_base), 1); > } else duration = 1; > } > if (!ifile->duration) > -- > 2.13.6 > > ___ > libav-devel mailing list > libav-devel@libav.org > https://lists.libav.org/mailman/listinfo/libav-devel OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/2] avconv.c: fix calculation of input file duration in seek_to_start()
> --- a/avtools/avconv.c > +++ b/avtools/avconv.c > @@ -2553,9 +2553,9 @@ static int seek_to_start(InputFile *ifile, > AVFormatContext *is) > continue; > } else { > if (ist->framerate.num) { > -duration = av_rescale_q(1, ist->framerate, > ist->st->time_base); > +duration = av_rescale_q(1, av_inv_q(ist->framerate), > ist->st->time_base); I think it should rather be: duration = FFMAX(av_rescale_q(1, av_inv_q(ist->framerate), ist->st->time_base), 1); ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/2] avconv.c: fix calculation of input file duration in seek_to_start()
On Sun, Oct 29, 2017 at 12:48 PM, Peter Großewrote: > Fixes looping files without audio or when using stream_copy, where > ist->nb_samples is not set since no decoding is done. > Does the loop discards the last frame because its duration is set to 0? Could you give me your sample(s), please? Thank you, Alexandra ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] avconv: when using -loop option bail out if seek to start fails
On Fri, Jun 30, 2017 at 5:28 PM, Peter Großewrote: > Fixes an infinite loop when a demuxer fails to seek to the start of the input. > > Signed-off-by: Peter Große > --- > avtools/avconv.c | 8 +--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/avtools/avconv.c b/avtools/avconv.c > index 8dd11bb5fc..4e3ffecdef 100644 > --- a/avtools/avconv.c > +++ b/avtools/avconv.c > @@ -2615,9 +2615,11 @@ static int process_input(void) > return ret; > } > if (ret < 0 && ifile->loop) { > -if ((ret = seek_to_start(ifile, is)) < 0) > -return ret; > -ret = get_input_packet(ifile, ); > +ret = seek_to_start(ifile, is); > +if(ret < 0) > +av_log(NULL, AV_LOG_WARNING, "Seek to start failed.\n"); > +else > +ret = get_input_packet(ifile, ); > } > if (ret < 0) { > if (ret != AVERROR_EOF) { > -- > 2.13.0 > Producing unlooped output with avconv -loop -1 and running infinitely is clearly wrong and I think this's a reasonable way to fix it. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] Libav Sprint Pelhřimov
Hello everyone, I would like to announce another Libav sprint in the wilds around Pelhřimov, the date was set to 21-23 July, but the next weekend (28-30) is also possible. The plans are: * hacking * cooking * enjoying countryside * evenutally others. Please, reply here, if you're interested. (The report about the last sprint: http://sasshkas.blogspot.cz/2016/10/another-libav-sprint.html) ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] hevc: Add NEON 32x32 IDCT
--- libavcodec/arm/hevc_idct.S| 311 +++--- libavcodec/arm/hevcdsp_init_arm.c | 4 + 2 files changed, 294 insertions(+), 21 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index eeb81e3..79799b2 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -28,6 +28,10 @@ const trans, align=4 .short 89, 75, 50, 18 .short 90, 87, 80, 70 .short 57, 43, 25, 9 +.short 90, 90, 88, 85 +.short 82, 78, 73, 67 +.short 61, 54, 46, 38 +.short 31, 22, 13, 4 endconst .macro clip10 in1, in2, c1, c2 @@ -509,7 +513,7 @@ endfunc vsub.s32\tmp_m, \e, \o .endm -.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7 +.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7, offset tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, q14, q15 vmull.s16 q12, \in1, \in0[0] @@ -535,7 +539,7 @@ endfunc butterfly q9, q13, q1, q6 butterfly q10, q14, q2, q5 butterfly q11, q15, q3, q4 -add r4, sp, #512 +add r4, sp, #\offset vst1.s32{q0-q1}, [r4, :128]! vst1.s32{q2-q3}, [r4, :128]! vst1.s32{q4-q5}, [r4, :128]! @@ -575,15 +579,15 @@ endfunc vsub.s32\in6, \in6, \in7 .endm -.macro store16 in0, in1, in2, in3, in4, in5, in6, in7 +.macro store16 in0, in1, in2, in3, in4, in5, in6, in7, rx vst1.s16\in0, [r1, :64], r2 -vst1.s16\in1, [r3, :64], r4 +vst1.s16\in1, [r3, :64], \rx vst1.s16\in2, [r1, :64], r2 -vst1.s16\in3, [r3, :64], r4 +vst1.s16\in3, [r3, :64], \rx vst1.s16\in4, [r1, :64], r2 -vst1.s16\in5, [r3, :64], r4 +vst1.s16\in5, [r3, :64], \rx vst1.s16\in6, [r1, :64], r2 -vst1.s16\in7, [r3, :64], r4 +vst1.s16\in7, [r3, :64], \rx .endm .macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, shift @@ -597,19 +601,35 @@ endfunc vqrshrn.s32 \out7, \in7, \shift .endm -.macro tr_16x4 name, shift +@stores in1, in2, in4, in6 ascending from off1 and +@stores in1, in3, in5, in7 descending from off2 +.macro store_to_stack off1, off2, in0, in2, in4, in6, in7, in5, in3, in1 +add r1, sp, #\off1 +add r3, sp, #\off2 +mov r2, #-16 +vst1.s32{\in0}, [r1, :128]! +vst1.s32{\in1}, [r3, :128], r2 +vst1.s32{\in2}, [r1, :128]! +vst1.s32{\in3}, [r3, :128], r2 +vst1.s32{\in4}, [r1, :128]! +vst1.s32{\in5}, [r3, :128], r2 +vst1.s32{\in6}, [r1, :128] +vst1.s32{\in7}, [r3, :128] +.endm + +.macro tr_16x4 name, shift, offset, step function func_tr_16x4_\name mov r1, r5 -add r3, r5, #64 -mov r2, #128 +add r3, r5, #(\step * 64) +mov r2, #(\step * 128) load16 d0, d1, d2, d3, d4, d5, d6, d7 movrel r1, trans -tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7 +tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7, \offset -add r1, r5, #32 -add r3, r5, #(64 + 32) -mov r2, #128 +add r1, r5, #(\step * 32) +add r3, r5, #(\step * 3 *32) +mov r2, #(\step * 128) load16 d8, d9, d2, d3, d4, d5, d6, d7 movrel r1, trans + 16 vld1.s16{q0}, [r1, :128] @@ -630,11 +650,12 @@ function func_tr_16x4_\name add_member d6, d1[2], d0[3], d0[0], d0[2], d1[1], d1[3], d1[0], d0[1], +, -, +, -, +, +, -, + add_member d7, d1[3], d1[2], d1[1], d1[0], d0[3], d0[2], d0[1], d0[0], +, -, +, -, +, -, +, - -add r4, sp, #512 +add r4, sp, #\offset vld1.s32{q0-q1}, [r4, :128]! vld1.s32{q2-q3}, [r4, :128]! butterfly16 q0, q5, q1, q6, q2, q7, q3, q8 +.if \shift > 0 scale d26, d27, d28, d29, d30, d31, d16, d17, q4, q0, q5, q1, q6, q2, q7, q3, \shift transpose8_4x4 d26, d28, d30, d16 transpose8_4x4 d17, d31, d29, d27 @@ -642,12 +663,16 @@ function func_tr_16x4_\name add r3, r6, #(24 +3*32) mov r2, #32 mov r4, #-32 -store16 d26, d27, d28, d29, d30, d31, d16, d17 +store16 d26, d27, d28, d29, d30, d31, d16, d17, r4 +.else +store_to_stack \offset, (\offset + 240), q4, q5, q6, q7, q3, q2, q1, q0 +.endif -add
[libav-devel] [PATCH 1/2] hevc: 16x16 NEON idct: Use the right element size for stores.
This doesn't change the actual behaviour of the code but improves readability. --- libavcodec/arm/hevc_idct.S | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index fac5758..eeb81e3 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -536,10 +536,10 @@ endfunc butterfly q10, q14, q2, q5 butterfly q11, q15, q3, q4 add r4, sp, #512 -vst1.s16{q0-q1}, [r4, :128]! -vst1.s16{q2-q3}, [r4, :128]! -vst1.s16{q4-q5}, [r4, :128]! -vst1.s16{q6-q7}, [r4, :128] +vst1.s32{q0-q1}, [r4, :128]! +vst1.s32{q2-q3}, [r4, :128]! +vst1.s32{q4-q5}, [r4, :128]! +vst1.s32{q6-q7}, [r4, :128] .endm .macro load16 in0, in1, in2, in3, in4, in5, in6, in7 @@ -631,8 +631,8 @@ function func_tr_16x4_\name add_member d7, d1[3], d1[2], d1[1], d1[0], d0[3], d0[2], d0[1], d0[0], +, -, +, -, +, -, +, - add r4, sp, #512 -vld1.s16{q0-q1}, [r4, :128]! -vld1.s16{q2-q3}, [r4, :128]! +vld1.s32{q0-q1}, [r4, :128]! +vld1.s32{q2-q3}, [r4, :128]! butterfly16 q0, q5, q1, q6, q2, q7, q3, q8 scale d26, d27, d28, d29, d30, d31, d16, d17, q4, q0, q5, q1, q6, q2, q7, q3, \shift @@ -645,8 +645,8 @@ function func_tr_16x4_\name store16 d26, d27, d28, d29, d30, d31, d16, d17 add r4, sp, #576 -vld1.s16{q0-q1}, [r4, :128]! -vld1.s16{q2-q3}, [r4, :128] +vld1.s32{q0-q1}, [r4, :128]! +vld1.s32{q2-q3}, [r4, :128] butterfly16 q0, q9, q1, q10, q2, q11, q3, q12 scale d26, d27, d28, d29, d30, d31, d8, d9, q4, q0, q9, q1, q10, q2, q11, q3, \shift transpose8_4x4 d26, d28, d30, d8 -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] hevc: 16x16 NEON idct: store 32 bit elements correctly
--- libavcodec/arm/hevc_idct.S | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index fac5758..4814c86 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -536,10 +536,10 @@ endfunc butterfly q10, q14, q2, q5 butterfly q11, q15, q3, q4 add r4, sp, #512 -vst1.s16{q0-q1}, [r4, :128]! -vst1.s16{q2-q3}, [r4, :128]! -vst1.s16{q4-q5}, [r4, :128]! -vst1.s16{q6-q7}, [r4, :128] +vst1.s32{q0-q1}, [r4, :128]! +vst1.s32{q2-q3}, [r4, :128]! +vst1.s32{q4-q5}, [r4, :128]! +vst1.s32{q6-q7}, [r4, :128] .endm .macro load16 in0, in1, in2, in3, in4, in5, in6, in7 -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] hevc: Add NEON 32x32 IDCT
--- libavcodec/arm/hevc_idct.S| 319 ++ libavcodec/arm/hevcdsp_init_arm.c | 4 + 2 files changed, 297 insertions(+), 26 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 4814c86..3a512b4 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -28,6 +28,10 @@ const trans, align=4 .short 89, 75, 50, 18 .short 90, 87, 80, 70 .short 57, 43, 25, 9 +.short 90, 90, 88, 85 +.short 82, 78, 73, 67 +.short 61, 54, 46, 38 +.short 31, 22, 13, 4 endconst .macro clip10 in1, in2, c1, c2 @@ -509,7 +513,7 @@ endfunc vsub.s32\tmp_m, \e, \o .endm -.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7 +.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7, offset tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, q14, q15 vmull.s16 q12, \in1, \in0[0] @@ -535,7 +539,7 @@ endfunc butterfly q9, q13, q1, q6 butterfly q10, q14, q2, q5 butterfly q11, q15, q3, q4 -add r4, sp, #512 +add r4, sp, #\offset vst1.s32{q0-q1}, [r4, :128]! vst1.s32{q2-q3}, [r4, :128]! vst1.s32{q4-q5}, [r4, :128]! @@ -575,15 +579,15 @@ endfunc vsub.s32\in6, \in6, \in7 .endm -.macro store16 in0, in1, in2, in3, in4, in5, in6, in7 +.macro store16 in0, in1, in2, in3, in4, in5, in6, in7, rx vst1.s16\in0, [r1, :64], r2 -vst1.s16\in1, [r3, :64], r4 +vst1.s16\in1, [r3, :64], \rx vst1.s16\in2, [r1, :64], r2 -vst1.s16\in3, [r3, :64], r4 +vst1.s16\in3, [r3, :64], \rx vst1.s16\in4, [r1, :64], r2 -vst1.s16\in5, [r3, :64], r4 +vst1.s16\in5, [r3, :64], \rx vst1.s16\in6, [r1, :64], r2 -vst1.s16\in7, [r3, :64], r4 +vst1.s16\in7, [r3, :64], \rx .endm .macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, shift @@ -597,19 +601,33 @@ endfunc vqrshrn.s32 \out7, \in7, \shift .endm -.macro tr_16x4 name, shift +.macro store_to_stack off1, off2, in0, in2, in4, in6, in7, in5, in3, in1 +add r1, sp, #\off1 +add r3, sp, #\off2 +mov r2, #-16 +vst1.s32{\in0}, [r1, :128]! +vst1.s32{\in1}, [r3, :128], r2 +vst1.s32{\in2}, [r1, :128]! +vst1.s32{\in3}, [r3, :128], r2 +vst1.s32{\in4}, [r1, :128]! +vst1.s32{\in5}, [r3, :128], r2 +vst1.s32{\in6}, [r1, :128] +vst1.s32{\in7}, [r3, :128] +.endm + +.macro tr_16x4 name, shift, offset, step function func_tr_16x4_\name mov r1, r5 -add r3, r5, #64 -mov r2, #128 +add r3, r5, #(\step * 64) +mov r2, #(\step * 128) load16 d0, d1, d2, d3, d4, d5, d6, d7 movrel r1, trans -tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7 +tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7, \offset -add r1, r5, #32 -add r3, r5, #(64 + 32) -mov r2, #128 +add r1, r5, #(\step * 32) +add r3, r5, #(\step * 3 *32) +mov r2, #(\step * 128) load16 d8, d9, d2, d3, d4, d5, d6, d7 movrel r1, trans + 16 vld1.s16{q0}, [r1, :128] @@ -630,11 +648,12 @@ function func_tr_16x4_\name add_member d6, d1[2], d0[3], d0[0], d0[2], d1[1], d1[3], d1[0], d0[1], +, -, +, -, +, +, -, + add_member d7, d1[3], d1[2], d1[1], d1[0], d0[3], d0[2], d0[1], d0[0], +, -, +, -, +, -, +, - -add r4, sp, #512 -vld1.s16{q0-q1}, [r4, :128]! -vld1.s16{q2-q3}, [r4, :128]! +add r4, sp, #\offset +vld1.s32{q0-q1}, [r4, :128]! +vld1.s32{q2-q3}, [r4, :128]! butterfly16 q0, q5, q1, q6, q2, q7, q3, q8 +.if \shift > 0 scale d26, d27, d28, d29, d30, d31, d16, d17, q4, q0, q5, q1, q6, q2, q7, q3, \shift transpose8_4x4 d26, d28, d30, d16 transpose8_4x4 d17, d31, d29, d27 @@ -642,12 +661,16 @@ function func_tr_16x4_\name add r3, r6, #(24 +3*32) mov r2, #32 mov r4, #-32 -store16 d26, d27, d28, d29, d30, d31, d16, d17 - -add r4, sp, #576 -vld1.s16{q0-q1}, [r4, :128]! -vld1.s16{q2-q3}, [r4, :128] +store16 d26, d27, d28, d29, d30, d31, d16, d17, r4
[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 10
--- libavcodec/arm/hevc_idct.S| 90 +++ libavcodec/arm/hevcdsp_init_arm.c | 13 ++ 2 files changed, 103 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index b3ce00b..5d400c2 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,6 +30,13 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +.macro clip10 in1, in2, c1, c2 +vmax.s16\in1, \in1, \c1 +vmax.s16\in2, \in2, \c1 +vmin.s16\in1, \in1, \c2 +vmin.s16\in2, \in2, \c2 +.endm + function ff_hevc_add_residual_4x4_8_neon, export=1 vld1.16 {q0-q1}, [r1, :128] vld1.32 d4[0], [r0, :32], r2 @@ -50,6 +57,25 @@ function ff_hevc_add_residual_4x4_8_neon, export=1 bx lr endfunc +function ff_hevc_add_residual_4x4_10_neon, export=1 +mov r12, r0 +vld1.16 {q0-q1}, [r1, :128] +vld1.16 d4, [r12, :64], r2 +vld1.16 d5, [r12, :64], r2 +vld1.16 d6, [r12, :64], r2 +vqadd.s16 q0, q2 +vld1.16 d7, [r12, :64], r2 +vmov.s16q12, #0 +vqadd.s16 q1, q3 +vmov.s16q13, #0x3FF +clip10 q0, q1, q12, q13 +vst1.16 d0, [r0, :64], r2 +vst1.16 d1, [r0, :64], r2 +vst1.16 d2, [r0, :64], r2 +vst1.16 d3, [r0, :64], r2 +bx lr +endfunc + function ff_hevc_add_residual_8x8_8_neon, export=1 add r12, r0, r2 add r2, r2, r2 @@ -70,6 +96,25 @@ function ff_hevc_add_residual_8x8_8_neon, export=1 bx lr endfunc +function ff_hevc_add_residual_8x8_10_neon, export=1 +add r12, r0, r2 +add r2, r2, r2 +mov r3, #8 +vmov.s16q12, #0 +vmov.s16q13, #0x3FF +1: subsr3, #2 +vld1.16 {q0-q1}, [r1, :128]! +vld1.16 {q8},[r0, :128] +vqadd.s16 q0, q8 +vld1.16 {q9},[r12, :128] +vqadd.s16 q1, q9 +clip10 q0, q1, q12, q13 +vst1.16 {q0}, [r0, :128], r2 +vst1.16 {q1}, [r12, :128], r2 +bne 1b +bx lr +endfunc + function ff_hevc_add_residual_16x16_8_neon, export=1 mov r3, #16 add r12, r0, r2 @@ -97,6 +142,29 @@ function ff_hevc_add_residual_16x16_8_neon, export=1 bx lr endfunc +function ff_hevc_add_residual_16x16_10_neon, export=1 +mov r3, #16 +vmov.s16q12, #0 +vmov.s16q13, #0x3FF +add r12, r0, r2 +add r2, r2, r2 +1: subsr3, #2 +vld1.16 {q8-q9}, [r0, :128] +vld1.16 {q0, q1}, [r1, :128]! +vqadd.s16 q0, q8 +vld1.16 {q10-q11}, [r12, :128] +vqadd.s16 q1, q9 +vld1.16 {q2, q3}, [r1, :128]! +vqadd.s16 q2, q10 +vqadd.s16 q3, q11 +clip10 q0, q1, q12, q13 +clip10 q2, q3, q12, q13 +vst1.16 {q0-q1}, [r0, :128], r2 +vst1.16 {q2-q3}, [r12, :128], r2 +bne 1b +bx lr +endfunc + function ff_hevc_add_residual_32x32_8_neon, export=1 vpush {q4-q7} add r12, r0, r2 @@ -137,6 +205,28 @@ function ff_hevc_add_residual_32x32_8_neon, export=1 bx lr endfunc +function ff_hevc_add_residual_32x32_10_neon, export=1 +mov r3, #32 +vmov.s16q12, #0 +vmov.s16q13, #0x3FF +1: subsr3, #1 +vldmr1!, {q0-q3} +vld1.16 {q8, q9}, [r0, :128] +add r12, r0, #32 +vld1.16 {q10, q11}, [r12, :128] +vqadd.s16 q0, q8 +vqadd.s16 q1, q9 +vqadd.s16 q2, q10 +vqadd.s16 q3, q11 +clip10 q0, q1, q12, q13 +clip10 q2, q3, q12, q13 +vst1.16 {q0-q1}, [r0, :128] +vst1.16 {q2-q3}, [r12, :128] +add r0, r2 +bne 1b +bx lr +endfunc + .macro idct_4x4_dc bitdepth function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index 817c157..e3d4e4e 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -28,12 +28,20 @@ void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
Re: [libav-devel] [PATCH] arm: hevc_idct: Tune the add_res_8x8 and add_res_32x32 functions
On Thu, Apr 27, 2017 at 11:38 PM, Martin Storsjöwrote: > Before: Cortex A7 A8 A9 A53 > hevc_add_res_8x8_8_neon: 116.058.780.290.7 > hevc_add_res_32x32_8_neon: 1230.0 737.5 1187.5 974.4 > After: > hevc_add_res_8x8_8_neon: 97.757.073.780.0 > hevc_add_res_32x32_8_neon: 1216.0 698.7 1127.5 827.1 Looks great. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 10
--- libavcodec/arm/hevc_idct.S| 89 +++ libavcodec/arm/hevcdsp_init_arm.c | 13 ++ 2 files changed, 102 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 3966e93..14af40f 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -50,6 +50,32 @@ function ff_hevc_add_residual_4x4_8_neon, export=1 bx lr endfunc +.macro clip10 in1, in2, c1, c2 +vmax.s16\in1, \in1, q12 +vmax.s16\in2, \in2, q12 +vmin.s16\in1, \in1, q13 +vmin.s16\in2, \in2, q13 +.endm + +function ff_hevc_add_residual_4x4_10_neon, export=1 +vld1.16 {q0-q1}, [r1, :128] +mov r12, r0 +vld1.16 d4, [r12, :64], r2 +vld1.16 d5, [r12, :64], r2 +vld1.16 d6, [r12, :64], r2 +vld1.16 d7, [r12, :64], r2 +vqadd.s16 q0, q2 +vqadd.s16 q1, q3 +vmov.s16q12, #0 +vmov.s16q13, #0x3FF +clip10 q0, q1 +vst1.16 d0, [r0, :64], r2 +vst1.16 d1, [r0, :64], r2 +vst1.16 d2, [r0, :64], r2 +vst1.16 d3, [r0, :64], r2 +bx lr +endfunc + function ff_hevc_add_residual_8x8_8_neon, export=1 mov r3, #8 1: subsr3, #2 @@ -69,6 +95,24 @@ function ff_hevc_add_residual_8x8_8_neon, export=1 bx lr endfunc +function ff_hevc_add_residual_8x8_10_neon, export=1 +mov r3, #8 +vmov.s16q12, #0 +vmov.s16q13, #0x3FF +1: subsr3, #2 +vld1.16 {q0-q1}, [r1, :128]! +vld1.16 {q8},[r0, :128] +add r12, r0, r2 +vld1.16 {q9},[r12, :128] +vqadd.s16 q0, q8 +vqadd.s16 q1, q9 +clip10 q0, q1 +vst1.16 {q0}, [r0, :128], r2 +vst1.16 {q1}, [r0, :128], r2 +bne 1b +bx lr +endfunc + function ff_hevc_add_residual_16x16_8_neon, export=1 mov r3, #16 add r12, r0, r2 @@ -96,6 +140,29 @@ function ff_hevc_add_residual_16x16_8_neon, export=1 bx lr endfunc +function ff_hevc_add_residual_16x16_10_neon, export=1 +mov r3, #16 +vmov.s16q12, #0 +vmov.s16q13, #0x3FF +add r12, r0, r2 +add r2, r2, r2 +1: subsr3, #2 +vld1.16 {q8-q9}, [r0, :128] +vld1.16 {q0, q1}, [r1, :128]! +vld1.16 {q10-q11}, [r12, :128] +vld1.16 {q2, q3}, [r1, :128]! +vqadd.s16 q0, q8 +vqadd.s16 q1, q9 +vqadd.s16 q2, q10 +vqadd.s16 q3, q11 +clip10 q0, q1 +clip10 q2, q3 +vst1.16 {q0-q1}, [r0, :128], r2 +vst1.16 {q2-q3}, [r12, :128], r2 +bne 1b +bx lr +endfunc + function ff_hevc_add_residual_32x32_8_neon, export=1 mov r3, #32 1: subsr3, #1 @@ -118,6 +185,28 @@ function ff_hevc_add_residual_32x32_8_neon, export=1 bx lr endfunc +function ff_hevc_add_residual_32x32_10_neon, export=1 +mov r3, #32 +vmov.s16q12, #0 +vmov.s16q13, #0x3FF +1: subsr3, #1 +vldmr1!, {q0-q3} +vld1.16 {q8, q9}, [r0, :128] +add r12, r0, #32 +vld1.16 {q10, q11}, [r12, :128] +vqadd.s16 q0, q8 +vqadd.s16 q1, q9 +vqadd.s16 q2, q10 +vqadd.s16 q3, q11 +clip10 q0, q1 +clip10 q2, q3 +vst1.16 {q0-q1}, [r0, :128] +vst1.16 {q2-q3}, [r12, :128] +add r0, r2 +bne 1b +bx lr +endfunc + .macro idct_4x4_dc bitdepth function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index 817c157..e3d4e4e 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -28,12 +28,20 @@ void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst,
[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 8
From: Seppo Tomperi <seppo.tomp...@vtt.fi> Optimized by Alexandra Hájková. --- libavcodec/arm/hevc_idct.S| 88 +++ libavcodec/arm/hevcdsp_init_arm.c | 15 +++ 2 files changed, 103 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 41b1b29..3966e93 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,6 +30,94 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +function ff_hevc_add_residual_4x4_8_neon, export=1 +vld1.16 {q0-q1}, [r1, :128] +vld1.32 d4[0], [r0, :32], r2 +vld1.32 d4[1], [r0, :32], r2 +vld1.32 d5[0], [r0, :32], r2 +vld1.32 d5[1], [r0, :32], r2 +sub r0, r0, r2, lsl #2 +vmovl.u8q8, d4 +vmovl.u8q9, d5 +vqadd.s16 q0, q0, q8 +vqadd.s16 q1, q1, q9 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vst1.32 d0[0], [r0, :32], r2 +vst1.32 d0[1], [r0, :32], r2 +vst1.32 d1[0], [r0, :32], r2 +vst1.32 d1[1], [r0, :32], r2 +bx lr +endfunc + +function ff_hevc_add_residual_8x8_8_neon, export=1 +mov r3, #8 +1: subsr3, #2 +vld1.16 {q0-q1}, [r1, :128]! +vld1.8 {d16}, [r0, :64] +add r12, r0, r2 +vld1.8 {d17}, [r12, :64] +vmovl.u8q9, d16 +vmovl.u8q8, d17 +vqadd.s16 q0, q9 +vqadd.s16 q1, q8 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vst1.8 d0, [r0, :64], r2 +vst1.8 d1, [r0, :64], r2 +bne 1b +bx lr +endfunc + +function ff_hevc_add_residual_16x16_8_neon, export=1 +mov r3, #16 +add r12, r0, r2 +add r2, r2, r2 +1: subsr3, #2 +vld1.8 {q8}, [r0, :128] +vld1.16 {q0, q1}, [r1, :128]! +vld1.8 {q11},[r12, :128] +vld1.16 {q2, q3}, [r1, :128]! +vmovl.u8q9, d16 +vmovl.u8q10, d17 +vmovl.u8q12, d22 +vmovl.u8q13, d23 +vqadd.s16 q0, q9 +vqadd.s16 q1, q10 +vqadd.s16 q2, q12 +vqadd.s16 q3, q13 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vqmovun.s16 d2, q2 +vqmovun.s16 d3, q3 +vst1.8 {q0}, [r0, :128], r2 +vst1.8 {q1}, [r12, :128], r2 +bne 1b +bx lr +endfunc + +function ff_hevc_add_residual_32x32_8_neon, export=1 +mov r3, #32 +1: subsr3, #1 +vldmr1!, {q0-q3} +vld1.8 {q8, q9}, [r0, :128] +vmovl.u8q10, d16 +vmovl.u8q11, d17 +vmovl.u8q12, d18 +vmovl.u8q13, d19 +vqadd.s16 q0, q10 +vqadd.s16 q1, q11 +vqadd.s16 q2, q12 +vqadd.s16 q3, q13 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vqmovun.s16 d2, q2 +vqmovun.s16 d3, q3 +vst1.8 {q0, q1}, [r0, :128], r2 +bne 1b +bx lr +endfunc + .macro idct_4x4_dc bitdepth function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index 3d8d06b..817c157 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,6 +25,16 @@ #include "libavcodec/hevcdsp.h" + +void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); + void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); @@ -47,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) if (have_neon(cpu_flags)) { if (bit_depth == 8) { +c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon; +c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon; +c->add_residual[2] = ff_hevc_add_residual_16x
[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 8
From: Seppo Tomperi <seppo.tomp...@vtt.fi> Optimized by Alexandra Hájková. --- libavcodec/arm/hevc_idct.S| 86 +++ libavcodec/arm/hevcdsp_init_arm.c | 15 +++ 2 files changed, 101 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 41b1b29..833c3fe 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,6 +30,92 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +function ff_hevc_add_residual_4x4_8_neon, export=1 +vld1.16 {q0-q1}, [r1, :128] +vld1.32 d4[0], [r0, :32], r2 +vld1.32 d4[1], [r0, :32], r2 +vld1.32 d5[0], [r0, :32], r2 +vld1.32 d5[1], [r0, :32], r2 +sub r0, r0, r2, lsl #2 +vmovl.u8q8, d4 +vmovl.u8q9, d5 +vqadd.s16 q0, q0, q8 +vqadd.s16 q1, q1, q9 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vst1.32 d0[0], [r0], r2 +vst1.32 d0[1], [r0], r2 +vst1.32 d1[0], [r0], r2 +vst1.32 d1[1], [r0], r2 +bx lr +endfunc + +function ff_hevc_add_residual_8x8_8_neon, export=1 +mov r3, #8 +1: subsr3, #2 +vld1.16 {q0-q1}, [r1, :128]! +vld1.8 {q8},[r0, :128] +vmovl.u8q9, d16 +vmovl.u8q8, d17 +vqadd.s16 q0, q9 +vqadd.s16 q1, q8 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vst1.8 d0, [r0, :64], r2 +vst1.8 d1, [r0, :64], r2 +bne 1b +bx lr +endfunc + +function ff_hevc_add_residual_16x16_8_neon, export=1 +push{lr} +mov r3, #16 +1: subsr3, #2 +vld1.16 {q0, q1}, [r1, :128]! +vld1.16 {q2, q3}, [r1, :128]! +vld1.8 {q8}, [r0, :128] +add lr, r0, r2 +vld1.8 {q11},[lr, :128] +vmovl.u8q9, d16 +vmovl.u8q10, d17 +vmovl.u8q12, d22 +vmovl.u8q13, d23 +vqadd.s16 q0, q9 +vqadd.s16 q1, q10 +vqadd.s16 q2, q12 +vqadd.s16 q3, q13 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vqmovun.s16 d2, q2 +vqmovun.s16 d3, q3 +vst1.8 {q0}, [r0, :128], r2 +vst1.8 {q1}, [r0, :128], r2 +bne 1b +pop {pc} +endfunc + +function ff_hevc_add_residual_32x32_8_neon, export=1 +mov r3, #32 +1: subsr3, #1 +vldmr1!, {q0-q3} +vld1.8 {q8, q9}, [r0, :128] +vmovl.u8q10, d16 +vmovl.u8q11, d17 +vmovl.u8q12, d18 +vmovl.u8q13, d19 +vqadd.s16 q0, q10 +vqadd.s16 q1, q11 +vqadd.s16 q2, q12 +vqadd.s16 q3, q13 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vqmovun.s16 d2, q2 +vqmovun.s16 d3, q3 +vst1.8 {q0, q1}, [r0, :128], r2 +bne 1b +bx lr +endfunc + .macro idct_4x4_dc bitdepth function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index 3d8d06b..817c157 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,6 +25,16 @@ #include "libavcodec/hevcdsp.h" + +void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); + void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); @@ -47,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) if (have_neon(cpu_flags)) { if (bit_depth == 8) { +c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon; +c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon; +c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon; +c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon; + c->idct_dc[
[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC
--- libavcodec/arm/hevc_idct.S| 40 +++ libavcodec/arm/hevcdsp_init_arm.c | 9 + 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index ceded7a..41b1b29 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,26 +30,29 @@ const trans, align=4 .short 57, 43, 25, 9 endconst -function ff_hevc_idct_4x4_dc_8_neon, export=1 +.macro idct_4x4_dc bitdepth +function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q0, r1 vdup.16 q1, r1 vst1.16 {q0, q1}, [r0, :128] bx lr endfunc +.endm -function ff_hevc_idct_8x8_dc_8_neon, export=1 +.macro idct_8x8_dc bitdepth +function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -61,14 +64,16 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_16x16_dc_8_neon, export=1 +.macro idct_16x16_dc bitdepth +function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -83,14 +88,16 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_32x32_dc_8_neon, export=1 +.macro idct_32x32_dc bitdepth +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) mov r3, #16 vdup.16 q8, r1 vdup.16 q9, r1 @@ -105,6 +112,7 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1 bne 1b bx lr endfunc +.endm .macro sum_sub out, in, c, op .ifc \op, + @@ -496,8 +504,16 @@ tr_16x4 secondpass_10, 20 - 10 .ltorg idct_4x4 8 +idct_4x4_dc 8 idct_4x4 10 +idct_4x4_dc 10 idct_8x8 8 +idct_8x8_dc 8 idct_8x8 10 +idct_8x8_dc 10 idct_16x16 8 +idct_16x16_dc 8 idct_16x16 10 +idct_16x16_dc 10 +idct_32x32_dc 8 +idct_32x32_dc 10 diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index fa2e7ad..3d8d06b 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -29,6 +29,10 @@ void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs); void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); @@ -53,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) c->idct[2] = ff_hevc_idct_16x16_8_neon; } if (bit_depth == 10) { +c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon; +c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon; +c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon; +c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon; + c->idct[0] = ff_hevc_idct_4x4_10_neon; c->idct[1] = ff_hevc_idct_8x8_10_neon; c->idct[2] = ff_hevc_idct_16x16_10_neon; -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8
From: Seppo Tomperi <seppo.tomp...@vtt.fi> Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> --- libavcodec/arm/hevc_idct.S| 78 +++ libavcodec/arm/hevcdsp_init_arm.c | 10 + 2 files changed, 88 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 156d476..ceded7a 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -1,5 +1,7 @@ /* * ARM NEON optimised IDCT functions for HEVC decoding + * + * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi> * Copyright (c) 2017 Alexandra Hájková * * This file is part of Libav. @@ -28,6 +30,82 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +function ff_hevc_idct_4x4_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q0, r1 +vdup.16 q1, r1 +vst1.16 {q0, q1}, [r0, :128] +bx lr +endfunc + +function ff_hevc_idct_8x8_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_16x16_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_32x32_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +mov r3, #16 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +1: subsr3, #1 +vstmr0!, {q8-q15} +bne 1b +bx lr +endfunc + .macro sum_sub out, in, c, op .ifc \op, + vmlal.s16 \out, \in, \c diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index e61587f..fa2e7ad 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,6 +25,11 @@ #include "libavcodec/hevcdsp.h" +void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); + void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); @@ -38,6 +43,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) if (have_neon(cpu_flags)) { if (bit_depth == 8) { +c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon; +c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon; +c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon; +c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon; + c->idct[0] = ff_hevc_idct_4x4_8_neon; c->idct[1] = ff_hevc_idct_8x8_8_neon; c->idct[2] = ff_hevc_idct_16x16_8_neon; -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC
--- libavcodec/arm/hevc_idct.S| 42 +++ libavcodec/arm/hevcdsp_init_arm.c | 22 ++-- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index f0008aa..41b1b29 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,26 +30,29 @@ const trans, align=4 .short 57, 43, 25, 9 endconst -function ff_hevc_idct_4x4_dc_8_neon, export=1 +.macro idct_4x4_dc bitdepth +function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q0, r1 vdup.16 q1, r1 vst1.16 {q0, q1}, [r0, :128] bx lr endfunc +.endm -function ff_hevc_idct_8x8_dc_8_neon, export=1 +.macro idct_8x8_dc bitdepth +function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -61,14 +64,16 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_16x16_dc_8_neon, export=1 +.macro idct_16x16_dc bitdepth +function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -83,14 +88,16 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_32x32_dc_8_neon, export=1 +.macro idct_32x32_dc bitdepth +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] -ldr r2, =0x20 +ldr r2, =(1 << (13 - \bitdepth)) add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) mov r3, #16 vdup.16 q8, r1 vdup.16 q9, r1 @@ -103,8 +110,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1 1: subsr3, #1 vstmr0!, {q8-q15} bne 1b -bx lr +bx lr endfunc +.endm .macro sum_sub out, in, c, op .ifc \op, + @@ -496,8 +504,16 @@ tr_16x4 secondpass_10, 20 - 10 .ltorg idct_4x4 8 +idct_4x4_dc 8 idct_4x4 10 +idct_4x4_dc 10 idct_8x8 8 +idct_8x8_dc 8 idct_8x8 10 +idct_8x8_dc 10 idct_16x16 8 +idct_16x16_dc 8 idct_16x16 10 +idct_16x16_dc 10 +idct_32x32_dc 8 +idct_32x32_dc 10 diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index 10f60bc..99eff78 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,13 +25,18 @@ #include "libavcodec/hevcdsp.h" -void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs); + +void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit); @@ -52,9 +57,14 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) c->idct[2]= ff_hevc_idct_16x16_8_neon; } if (bit_depth == 10) { -c->idct[0] = ff_hevc_idct_4x4_10_neon; -c->idct[1] =
[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8
From: Seppo Tomperi <seppo.tomp...@vtt.fi> Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> --- libavcodec/arm/hevc_idct.S| 78 +++ libavcodec/arm/hevcdsp_init_arm.c | 15 ++-- 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 156d476..f0008aa 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -1,5 +1,7 @@ /* * ARM NEON optimised IDCT functions for HEVC decoding + * + * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi> * Copyright (c) 2017 Alexandra Hájková * * This file is part of Libav. @@ -28,6 +30,82 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +function ff_hevc_idct_4x4_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q0, r1 +vdup.16 q1, r1 +vst1.16 {q0, q1}, [r0, :128] +bx lr +endfunc + +function ff_hevc_idct_8x8_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_16x16_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_32x32_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +mov r3, #16 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +1: subsr3, #1 +vstmr0!, {q8-q15} +bne 1b +bx lr +endfunc + .macro sum_sub out, in, c, op .ifc \op, + vmlal.s16 \out, \in, \c diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index e61587f..10f60bc 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -26,8 +26,12 @@ #include "libavcodec/hevcdsp.h" void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit); @@ -38,9 +42,14 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) if (have_neon(cpu_flags)) { if (bit_depth == 8) { -c->idct[0] = ff_hevc_idct_4x4_8_neon; -c->idct[1] = ff_hevc_idct_8x8_8_neon; -c->idct[2] = ff_hevc_idct_16x16_8_neon; +c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon; +c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon; +c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon; +c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon; + +c->idct[0]= ff_hevc_idct_4x4_8_neon; +c->idct[1]= ff_hevc_idct_8x8_8_neon; +c->idct[2]= ff_hevc_idct_16x16_8_neon; } if (bit_depth == 10) { c->idct[0] = ff_hevc_idct_4x4_10_neon; -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] hevc: Add NEON add_residual for bitdepth 8
From: Seppo Tomperi <seppo.tomp...@vtt.fi> Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> --- libavcodec/arm/hevc_idct.S| 71 +++ libavcodec/arm/hevcdsp_init_arm.c | 15 + 2 files changed, 86 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 082f832..0e84034 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,6 +30,77 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +function ff_hevc_add_residual_4x4_8_neon, export=1 +vldmr1, {q0-q1} +vld1.32 d4[0], [r0], r2 +vld1.32 d4[1], [r0], r2 +vld1.32 d5[0], [r0], r2 +vld1.32 d5[1], [r0], r2 +sub r0, r0, r2, lsl #2 +vmovl.u8q8, d4 +vmovl.u8q9, d5 +vqadd.s16 q0, q0, q8 +vqadd.s16 q1, q1, q9 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vst1.32 d0[0], [r0], r2 +vst1.32 d0[1], [r0], r2 +vst1.32 d1[0], [r0], r2 +vst1.32 d1[1], [r0], r2 +bx lr +endfunc + +function ff_hevc_add_residual_8x8_8_neon, export=1 +mov r3, #8 +1: subsr3, #1 +vld1.16 {q0}, [r1]! +vld1.8 d16, [r0] +vmovl.u8q8, d16 +vqadd.s16 q0, q8 +vqmovun.s16 d0, q0 +vst1.32 d0, [r0], r2 +bne 1b +bx lr +endfunc + +function ff_hevc_add_residual_16x16_8_neon, export=1 +mov r3, #16 +1: subsr3, #1 +vld1.16 {q0, q1}, [r1]! +vld1.8 {q8}, [r0] +vmovl.u8q9, d16 +vmovl.u8q10, d17 +vqadd.s16 q0, q9 +vqadd.s16 q1, q10 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vst1.8 {q0}, [r0], r2 +bne 1b +bx lr +endfunc + +function ff_hevc_add_residual_32x32_8_neon, export=1 +mov r3, #32 +1: subsr3, #1 +vldmr1!, {q0-q3} +vld1.8 {q8, q9}, [r0] +vmovl.u8q10, d16 +vmovl.u8q11, d17 +vmovl.u8q12, d18 +vmovl.u8q13, d19 +vqadd.s16 q0, q10 +vqadd.s16 q1, q11 +vqadd.s16 q2, q12 +vqadd.s16 q3, q13 +vqmovun.s16 d0, q0 +vqmovun.s16 d1, q1 +vqmovun.s16 d2, q2 +vqmovun.s16 d3, q3 +vst1.8 {q0, q1}, [r0], r2 +bne 1b +bx lr +endfunc + .macro idct_4x4_dc bitdepth function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index febbcc1..a32b7ef 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,6 +25,16 @@ #include "libavcodec/hevcdsp.h" + +void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); +void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs, + ptrdiff_t stride); + void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); @@ -47,6 +57,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) if (have_neon(cpu_flags)) { if (bit_depth == 8) { +c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon; +c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon; +c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon; +c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon; + c->idct[0]= ff_hevc_idct_4x4_8_neon; c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon; c->idct[1]= ff_hevc_idct_8x8_8_neon; -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC
--- libavcodec/arm/hevc_idct.S| 50 --- libavcodec/arm/hevcdsp_init_arm.c | 21 +++- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 7fdd7cc..082f832 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,26 +30,37 @@ const trans, align=4 .short 57, 43, 25, 9 endconst -function ff_hevc_idct_4x4_dc_8_neon, export=1 +.macro idct_4x4_dc bitdepth +function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q0, r1 vdup.16 q1, r1 vst1.16 {q0, q1}, [r0] bx lr endfunc +.endm -function ff_hevc_idct_8x8_dc_8_neon, export=1 +.macro idct_8x8_dc bitdepth +function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_16x16_dc_8_neon, export=1 +.macro idct_16x16_dc bitdepth +function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -83,14 +100,20 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_32x32_dc_8_neon, export=1 +.macro idct_32x32_dc bitdepth +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) mov r3, #16 vdup.16 q8, r1 vdup.16 q9, r1 @@ -103,8 +126,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1 1: subsr3, #1 vstmr0!, {q8-q15} bne 1b -bx lr +bx lr endfunc +.endm .macro sum_sub out, in, c, op .ifc \op, + @@ -496,8 +520,16 @@ tr_16x4 secondpass_10, 20 - 10 .ltorg idct_4x4 8 +idct_4x4_dc 8 idct_4x4 10 +idct_4x4_dc 10 idct_8x8 8 +idct_8x8_dc 8 idct_8x8 10 +idct_8x8_dc 10 idct_16x16 8 +idct_16x16_dc 8 idct_16x16 10 +idct_16x16_dc 10 +idct_32x32_dc 8 +idct_32x32_dc 10 diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index b65e2e9..febbcc1 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,13 +25,18 @@ #include "libavcodec/hevcdsp.h" -void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs); + +void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit); @@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon; }
Re: [libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC
>> -function ff_hevc_idct_32x32_dc_8_neon, export=1 >> +.macro idct_32x32_dc bitdepth >> +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1 >> ldrsh r1, [r0] >> ldr r2, =0x20 >> +.if \bitdepth == 8 >> +ldr r2, =0x20 >> +.else >> +ldr r2, =0x8 >> +.endif > > This doesn't look quite right, shouldn't the new block replace/wrap > the old ldr instruction, like it does in the 16x16 version (and all > other sizes)? > It's wrong, yes. I'll resend it. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC
--- Indent operands. libavcodec/arm/hevc_idct.S| 51 --- libavcodec/arm/hevcdsp_init_arm.c | 21 +++- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 7fdd7cc..f949d80 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,26 +30,37 @@ const trans, align=4 .short 57, 43, 25, 9 endconst -function ff_hevc_idct_4x4_dc_8_neon, export=1 +.macro idct_4x4_dc bitdepth +function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q0, r1 vdup.16 q1, r1 vst1.16 {q0, q1}, [r0] bx lr endfunc +.endm -function ff_hevc_idct_8x8_dc_8_neon, export=1 +.macro idct_8x8_dc bitdepth +function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_16x16_dc_8_neon, export=1 +.macro idct_16x16_dc bitdepth +function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -83,14 +100,21 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_32x32_dc_8_neon, export=1 +.macro idct_32x32_dc bitdepth +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] ldr r2, =0x20 +.if \bitdepth == 8 +ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) mov r3, #16 vdup.16 q8, r1 vdup.16 q9, r1 @@ -103,8 +127,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1 1: subsr3, #1 vstmr0!, {q8-q15} bne 1b -bx lr +bx lr endfunc +.endm .macro sum_sub out, in, c, op .ifc \op, + @@ -496,8 +521,16 @@ tr_16x4 secondpass_10, 20 - 10 .ltorg idct_4x4 8 +idct_4x4_dc 8 idct_4x4 10 +idct_4x4_dc 10 idct_8x8 8 +idct_8x8_dc 8 idct_8x8 10 +idct_8x8_dc 10 idct_16x16 8 +idct_16x16_dc 8 idct_16x16 10 +idct_16x16_dc 10 +idct_32x32_dc 8 +idct_32x32_dc 10 diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index b65e2e9..febbcc1 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,13 +25,18 @@ #include "libavcodec/hevcdsp.h" -void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs); + +void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit); @@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) c->idct_dc[3]
[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8
From: Seppo Tomperi <seppo.tomp...@vtt.fi> Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> --- Indent operands. libavcodec/arm/hevc_idct.S| 78 +++ libavcodec/arm/hevcdsp_init_arm.c | 14 +-- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 156d476..7fdd7cc 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -1,5 +1,7 @@ /* * ARM NEON optimised IDCT functions for HEVC decoding + * + * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi> * Copyright (c) 2017 Alexandra Hájková * * This file is part of Libav. @@ -28,6 +30,82 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +function ff_hevc_idct_4x4_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q0, r1 +vdup.16 q1, r1 +vst1.16 {q0, q1}, [r0] +bx lr +endfunc + +function ff_hevc_idct_8x8_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_16x16_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_32x32_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +mov r3, #16 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +1: subsr3, #1 +vstmr0!, {q8-q15} +bne 1b +bx lr +endfunc + .macro sum_sub out, in, c, op .ifc \op, + vmlal.s16 \out, \in, \c diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index e61587f..b65e2e9 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -26,8 +26,12 @@ #include "libavcodec/hevcdsp.h" void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit); @@ -38,9 +42,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) if (have_neon(cpu_flags)) { if (bit_depth == 8) { -c->idct[0] = ff_hevc_idct_4x4_8_neon; -c->idct[1] = ff_hevc_idct_8x8_8_neon; -c->idct[2] = ff_hevc_idct_16x16_8_neon; +c->idct[0]= ff_hevc_idct_4x4_8_neon; +c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon; +c->idct[1]= ff_hevc_idct_8x8_8_neon; +c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon; +c->idct[2]= ff_hevc_idct_16x16_8_neon; +c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon; +c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon; } if (bit_depth == 10) { c->idct[0] = ff_hevc_idct_4x4_10_neon; -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] hevc: Add NEON IDCT DC functions for bitdepth 8
From: Seppo Tomperi <seppo.tomp...@vtt.fi> Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> --- libavcodec/arm/hevc_idct.S| 78 +++ libavcodec/arm/hevcdsp_init_arm.c | 14 +-- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 156d476..f74847b 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -1,5 +1,7 @@ /* * ARM NEON optimised IDCT functions for HEVC decoding + * + * Copyright (c) 2014 Seppo Tomperi <seppo.tomp...@vtt.fi> * Copyright (c) 2017 Alexandra Hájková * * This file is part of Libav. @@ -28,6 +30,82 @@ const trans, align=4 .short 57, 43, 25, 9 endconst +function ff_hevc_idct_4x4_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q0, r1 +vdup.16 q1, r1 +vst1.16 {q0, q1}, [r0] +bx lr +endfunc + +function ff_hevc_idct_8x8_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_16x16_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0!, {q8-q15} +vstmr0, {q8-q15} +bx lr +endfunc + +function ff_hevc_idct_32x32_dc_8_neon, export=1 +ldrsh r1, [r0] +ldr r2, =0x20 +add r1, #1 +asr r1, #1 +add r1, r2 +asr r1, #6 +mov r3, #16 +vdup.16 q8, r1 +vdup.16 q9, r1 +vmov.16 q10, q8 +vmov.16 q11, q8 +vmov.16 q12, q8 +vmov.16 q13, q8 +vmov.16 q14, q8 +vmov.16 q15, q8 +1: subsr3, #1 +vstmr0!, {q8-q15} +bne 1b +bx lr +endfunc + .macro sum_sub out, in, c, op .ifc \op, + vmlal.s16 \out, \in, \c diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index e61587f..b65e2e9 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -26,8 +26,12 @@ #include "libavcodec/hevcdsp.h" void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit); @@ -38,9 +42,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) if (have_neon(cpu_flags)) { if (bit_depth == 8) { -c->idct[0] = ff_hevc_idct_4x4_8_neon; -c->idct[1] = ff_hevc_idct_8x8_8_neon; -c->idct[2] = ff_hevc_idct_16x16_8_neon; +c->idct[0]= ff_hevc_idct_4x4_8_neon; +c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon; +c->idct[1]= ff_hevc_idct_8x8_8_neon; +c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon; +c->idct[2]= ff_hevc_idct_16x16_8_neon; +c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon; +c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon; } if (bit_depth == 10) { c->idct[0] = ff_hevc_idct_4x4_10_neon; -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] hevc: Add support for bitdepth 10 for IDCT DC
--- libavcodec/arm/hevc_idct.S| 49 --- libavcodec/arm/hevcdsp_init_arm.c | 21 - 2 files changed, 56 insertions(+), 14 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index f74847b..b80d5ff 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -30,26 +30,37 @@ const trans, align=4 .short 57, 43, 25, 9 endconst -function ff_hevc_idct_4x4_dc_8_neon, export=1 +.macro idct_4x4_dc bitdepth +function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q0, r1 vdup.16 q1, r1 vst1.16 {q0, q1}, [r0] bx lr endfunc +.endm -function ff_hevc_idct_8x8_dc_8_neon, export=1 +.macro idct_8x8_dc bitdepth +function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_16x16_dc_8_neon, export=1 +.macro idct_16x16_dc bitdepth +function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] +.if \bitdepth == 8 ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) vdup.16 q8, r1 vdup.16 q9, r1 vmov.16 q10, q8 @@ -83,14 +100,21 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1 vstmr0, {q8-q15} bx lr endfunc +.endm -function ff_hevc_idct_32x32_dc_8_neon, export=1 +.macro idct_32x32_dc bitdepth +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1 ldrsh r1, [r0] ldr r2, =0x20 +.if \bitdepth == 8 +ldr r2, =0x20 +.else +ldr r2, =0x8 +.endif add r1, #1 asr r1, #1 add r1, r2 -asr r1, #6 +asr r1, #(14 - \bitdepth) mov r3, #16 vdup.16 q8, r1 vdup.16 q9, r1 @@ -105,6 +129,7 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1 bne 1b bx lr endfunc +.endm .macro sum_sub out, in, c, op .ifc \op, + @@ -496,8 +521,16 @@ tr_16x4 secondpass_10, 20 - 10 .ltorg idct_4x4 8 +idct_4x4_dc 8 idct_4x4 10 +idct_4x4_dc 10 idct_8x8 8 +idct_8x8_dc 8 idct_8x8 10 +idct_8x8_dc 10 idct_16x16 8 +idct_16x16_dc 8 idct_16x16 10 +idct_16x16_dc 10 +idct_32x32_dc 8 +idct_32x32_dc 10 diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c index b65e2e9..febbcc1 100644 --- a/libavcodec/arm/hevcdsp_init_arm.c +++ b/libavcodec/arm/hevcdsp_init_arm.c @@ -25,13 +25,18 @@ #include "libavcodec/hevcdsp.h" -void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs); -void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs); void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs); +void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs); +void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs); + +void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit); @@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth) c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon; } if (bit_depth == 10) { -c->idct[0] = ff_hevc_idct_4x4_10_neon; -c->idct[1] = ff_hevc_idct_8x8_10_neon; -c->idct[2] = ff_hevc_idct_16x16_10_neon; +c->idct[0]= ff_hevc_idct_4x4_10_neon; +c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon; +
[libav-devel] [PATCH] hevc: Optimize NEON 8x8 IDCT using col_limit
--- libavcodec/arm/hevc_idct.S | 26 +++--- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 4124fc8..29135ad 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -58,7 +58,7 @@ endconst .macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, tmp3 vshll.s16 \tmp0, \in0, #6 - vld1.s16 {\in0}, [r1, :64]! + vld1.s16 {\in0}, [r4, :64]! vmov \tmp1, \tmp0 vmull.s16 \tmp2, \in1, \in0[1] vmull.s16 \tmp3, \in1, \in0[3] @@ -67,14 +67,14 @@ endconst vmlal.s16 \tmp2, \in3, \in0[3] @o0 vmlsl.s16 \tmp3, \in3, \in0[1] @o1 - vld1.s16 {\in0}, [r1, :64] + vld1.s16 {\in0}, [r4, :64] vadd.s32 \out0, \tmp0, \tmp2 vadd.s32 \out1, \tmp1, \tmp3 vsub.s32 \out2, \tmp1, \tmp3 vsub.s32 \out3, \tmp0, \tmp2 - subr1, r1, #8 + subr4, r4, #8 .endm @ Do a 4x4 transpose, using q registers for the subtransposes that don't @@ -166,21 +166,25 @@ endfunc .macro idct_8x8 bitdepth function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1 @r0 - coeffs +push{r4, lr} vpush {q4-q7} -mov r1, r0 +mov r4, r0 mov r2, #64 add r3, r0, #32 -vld1.s16{q0-q1}, [r1,:128], r2 +vld1.s16{q0-q1}, [r4,:128], r2 vld1.s16{q2-q3}, [r3,:128], r2 -vld1.s16{q4-q5}, [r1,:128], r2 +vld1.s16{q4-q5}, [r4,:128], r2 vld1.s16{q6-q7}, [r3,:128], r2 -movrel r1, trans +movrel r4, trans tr_8x4 7, d0, d2, d4, d6, d8, d10, d12, d14 +cmp r1, #4 +blt 1f tr_8x4 7, d1, d3, d5, d7, d9, d11, d13, d15 +1: @ Transpose each 4x4 block, and swap how d4-d7 and d8-d11 are used. @ Layout before: @ d0 d1 @@ -209,16 +213,16 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1 transpose_8x8 d0, d2, d4, d6, d8, d10, d12, d14, d1, d3, d5, d7, d9, d11, d13, d15 -mov r1, r0 +mov r4, r0 mov r2, #64 add r3, r0, #32 -vst1.s16{q0-q1}, [r1,:128], r2 +vst1.s16{q0-q1}, [r4,:128], r2 vst1.s16{q2-q3}, [r3,:128], r2 -vst1.s16{q4-q5}, [r1,:128], r2 +vst1.s16{q4-q5}, [r4,:128], r2 vst1.s16{q6-q7}, [r3,:128], r2 vpop{q4-q7} -bx lr +pop {r4, pc} endfunc .endm -- 2.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] hevc: Add NEON 16x16 IDCT
The speedup vs C code is around 6-13x. --- Use irp to avoid the repetition. libavcodec/arm/hevc_idct.S| 196 ++ libavcodec/arm/hevcdsp_init_arm.c | 4 + 2 files changed, 200 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 4124fc8..3608f3a 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -222,7 +222,203 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1 endfunc .endm +.macro butterfly e, o, tmp_p, tmp_m +vadd.s32\tmp_p, \e, \o +vsub.s32\tmp_m, \e, \o +.endm + +.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7 +tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, q14, q15 + +vmull.s16 q12, \in1, \in0[0] +vmull.s16 q13, \in1, \in0[1] +vmull.s16 q14, \in1, \in0[2] +vmull.s16 q15, \in1, \in0[3] +sum_sub q12, \in3, \in0[1], + +sum_sub q13, \in3, \in0[3], - +sum_sub q14, \in3, \in0[0], - +sum_sub q15, \in3, \in0[2], - + +sum_sub q12, \in5, \in0[2], + +sum_sub q13, \in5, \in0[0], - +sum_sub q14, \in5, \in0[3], + +sum_sub q15, \in5, \in0[1], + + +sum_sub q12, \in7, \in0[3], + +sum_sub q13, \in7, \in0[2], - +sum_sub q14, \in7, \in0[1], + +sum_sub q15, \in7, \in0[0], - + +butterfly q8, q12, q0, q7 +butterfly q9, q13, q1, q6 +butterfly q10, q14, q2, q5 +butterfly q11, q15, q3, q4 +add r4, sp, #512 +vst1.s16{q0-q1}, [r4, :128]! +vst1.s16{q2-q3}, [r4, :128]! +vst1.s16{q4-q5}, [r4, :128]! +vst1.s16{q6-q7}, [r4, :128] +.endm + +.macro load16 in0, in1, in2, in3, in4, in5, in6, in7 +vld1.s16{\in0}, [r1, :64], r2 +vld1.s16{\in1}, [r3, :64], r2 +vld1.s16{\in2}, [r1, :64], r2 +vld1.s16{\in3}, [r3, :64], r2 +vld1.s16{\in4}, [r1, :64], r2 +vld1.s16{\in5}, [r3, :64], r2 +vld1.s16{\in6}, [r1, :64], r2 +vld1.s16{\in7}, [r3, :64], r2 +.endm + +.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7 +sum_sub q5, \in, \t0, \op0 +sum_sub q6, \in, \t1, \op1 +sum_sub q7, \in, \t2, \op2 +sum_sub q8, \in, \t3, \op3 +sum_sub q9, \in, \t4, \op4 +sum_sub q10,\in, \t5, \op5 +sum_sub q11,\in, \t6, \op6 +sum_sub q12,\in, \t7, \op7 +.endm + +.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7 +vadd.s32q4, \in0, \in1 +vsub.s32\in0, \in0, \in1 +vadd.s32\in1, \in2, \in3 +vsub.s32\in2, \in2, \in3 +vadd.s32\in3, \in4, \in5 +vsub.s32\in4, \in4, \in5 +vadd.s32\in5, \in6, \in7 +vsub.s32\in6, \in6, \in7 +.endm + +.macro store16 in0, in1, in2, in3, in4, in5, in6, in7 +vst1.s16\in0, [r1, :64], r2 +vst1.s16\in1, [r3, :64], r4 +vst1.s16\in2, [r1, :64], r2 +vst1.s16\in3, [r3, :64], r4 +vst1.s16\in4, [r1, :64], r2 +vst1.s16\in5, [r3, :64], r4 +vst1.s16\in6, [r1, :64], r2 +vst1.s16\in7, [r3, :64], r4 +.endm + +.macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, shift +vqrshrn.s32 \out0, \in0, \shift +vqrshrn.s32 \out1, \in1, \shift +vqrshrn.s32 \out2, \in2, \shift +vqrshrn.s32 \out3, \in3, \shift +vqrshrn.s32 \out4, \in4, \shift +vqrshrn.s32 \out5, \in5, \shift +vqrshrn.s32 \out6, \in6, \shift +vqrshrn.s32 \out7, \in7, \shift +.endm + +.macro tr_16x4 name, shift +function func_tr_16x4_\name +mov r1, r5 +add r3, r5, #64 +mov r2, #128 +load16 d0, d1, d2, d3, d4, d5, d6, d7 +movrel r1, trans + +tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7 + +add r1, r5, #32 +add r3, r5, #(64 + 32) +mov r2, #128 +load16 d8, d9, d2, d3, d4, d5, d6, d7 +movrel r1, trans + 16 +vld1.s16{q0}, [r1, :128] +vmull.s16 q5, d8, d0[0] +vmull.s16 q6, d8, d0[1] +vmull.s16 q7, d8, d0[2] +vmull.s16 q8, d8, d0[3] +vmull.s16 q9, d8, d1[0] +vmull.s16 q10, d8, d1[1] +vmull.s16 q11, d8, d1[2] +vmull.s16 q12, d8, d1[3] + +add_member d9,
[libav-devel] [PATCH] hevc: Add NEON 16x16 IDCT
The speedup vs C code is around 6-13x. --- libavcodec/arm/hevc_idct.S| 210 ++ libavcodec/arm/hevcdsp_init_arm.c | 4 + 2 files changed, 214 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 4124fc8..0ea048b 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -222,7 +222,217 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1 endfunc .endm +.macro butterfly e, o, tmp_p, tmp_m +vadd.s32\tmp_p, \e, \o +vsub.s32\tmp_m, \e, \o +.endm + +.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7 +tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, q14, q15 + +vmull.s16 q12, \in1, \in0[0] +vmull.s16 q13, \in1, \in0[1] +vmull.s16 q14, \in1, \in0[2] +vmull.s16 q15, \in1, \in0[3] +sum_sub q12, \in3, \in0[1], + +sum_sub q13, \in3, \in0[3], - +sum_sub q14, \in3, \in0[0], - +sum_sub q15, \in3, \in0[2], - + +sum_sub q12, \in5, \in0[2], + +sum_sub q13, \in5, \in0[0], - +sum_sub q14, \in5, \in0[3], + +sum_sub q15, \in5, \in0[1], + + +sum_sub q12, \in7, \in0[3], + +sum_sub q13, \in7, \in0[2], - +sum_sub q14, \in7, \in0[1], + +sum_sub q15, \in7, \in0[0], - + +butterfly q8, q12, q0, q7 +butterfly q9, q13, q1, q6 +butterfly q10, q14, q2, q5 +butterfly q11, q15, q3, q4 +add r4, sp, #512 +vst1.s16{q0-q1}, [r4, :128]! +vst1.s16{q2-q3}, [r4, :128]! +vst1.s16{q4-q5}, [r4, :128]! +vst1.s16{q6-q7}, [r4, :128] +.endm + +.macro load16 in0, in1, in2, in3, in4, in5, in6, in7 +vld1.s16{\in0}, [r1, :64], r2 +vld1.s16{\in1}, [r3, :64], r2 +vld1.s16{\in2}, [r1, :64], r2 +vld1.s16{\in3}, [r3, :64], r2 +vld1.s16{\in4}, [r1, :64], r2 +vld1.s16{\in5}, [r3, :64], r2 +vld1.s16{\in6}, [r1, :64], r2 +vld1.s16{\in7}, [r3, :64], r2 +.endm + +.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7 +sum_sub q5, \in, \t0, \op0 +sum_sub q6, \in, \t1, \op1 +sum_sub q7, \in, \t2, \op2 +sum_sub q8, \in, \t3, \op3 +sum_sub q9, \in, \t4, \op4 +sum_sub q10,\in, \t5, \op5 +sum_sub q11,\in, \t6, \op6 +sum_sub q12,\in, \t7, \op7 +.endm + +.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7 +vadd.s32q4, \in0, \in1 +vsub.s32\in0, \in0, \in1 +vadd.s32\in1, \in2, \in3 +vsub.s32\in2, \in2, \in3 +vadd.s32\in3, \in4, \in5 +vsub.s32\in4, \in4, \in5 +vadd.s32\in5, \in6, \in7 +vsub.s32\in6, \in6, \in7 +.endm + +.macro store16 in0, in1, in2, in3, in4, in5, in6, in7 +vst1.s16\in0, [r1, :64], r2 +vst1.s16\in1, [r3, :64], r4 +vst1.s16\in2, [r1, :64], r2 +vst1.s16\in3, [r3, :64], r4 +vst1.s16\in4, [r1, :64], r2 +vst1.s16\in5, [r3, :64], r4 +vst1.s16\in6, [r1, :64], r2 +vst1.s16\in7, [r3, :64], r4 +.endm + +.macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, shift +vqrshrn.s32 \out0, \in0, \shift +vqrshrn.s32 \out1, \in1, \shift +vqrshrn.s32 \out2, \in2, \shift +vqrshrn.s32 \out3, \in3, \shift +vqrshrn.s32 \out4, \in4, \shift +vqrshrn.s32 \out5, \in5, \shift +vqrshrn.s32 \out6, \in6, \shift +vqrshrn.s32 \out7, \in7, \shift +.endm + +.macro tr_16x4 name, shift +function func_tr_16x4_\name +mov r1, r5 +add r3, r5, #64 +mov r2, #128 +load16 d0, d1, d2, d3, d4, d5, d6, d7 +movrel r1, trans + +tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7 + +add r1, r5, #32 +add r3, r5, #(64 + 32) +mov r2, #128 +load16 d8, d9, d2, d3, d4, d5, d6, d7 +movrel r1, trans + 16 +vld1.s16{q0}, [r1, :128] +vmull.s16 q5, d8, d0[0] +vmull.s16 q6, d8, d0[1] +vmull.s16 q7, d8, d0[2] +vmull.s16 q8, d8, d0[3] +vmull.s16 q9, d8, d1[0] +vmull.s16 q10, d8, d1[1] +vmull.s16 q11, d8, d1[2] +vmull.s16 q12, d8, d1[3] + +add_member d9, d0[1], d1[0], d1[3], d1[1], d0[2],
[libav-devel] [PATCH] hevc: Add NEON 16x16 IDCT
The speedup vs C code is around 8x. --- libavcodec/arm/hevc_idct.S| 187 ++ libavcodec/arm/hevcdsp_init_arm.c | 4 + 2 files changed, 191 insertions(+) diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S index 4124fc8..b4279db 100644 --- a/libavcodec/arm/hevc_idct.S +++ b/libavcodec/arm/hevc_idct.S @@ -222,7 +222,194 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1 endfunc .endm +.macro butterfly e, o, tmp_p, tmp_m +vadd.s32\tmp_p, \e, \o +vsub.s32\tmp_m, \e, \o +.endm + +.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7 +tr_4x4_8\in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, q14, q15 + +vmull.s16 q12, \in1, \in0[0] +vmull.s16 q13, \in1, \in0[1] +vmull.s16 q14, \in1, \in0[2] +vmull.s16 q15, \in1, \in0[3] +sum_sub q12, \in3, \in0[1], + +sum_sub q13, \in3, \in0[3], - +sum_sub q14, \in3, \in0[0], - +sum_sub q15, \in3, \in0[2], - + +sum_sub q12, \in5, \in0[2], + +sum_sub q13, \in5, \in0[0], - +sum_sub q14, \in5, \in0[3], + +sum_sub q15, \in5, \in0[1], + + +sum_sub q12, \in7, \in0[3], + +sum_sub q13, \in7, \in0[2], - +sum_sub q14, \in7, \in0[1], + +sum_sub q15, \in7, \in0[0], - + +butterfly q8, q12, q0, q7 +butterfly q9, q13, q1, q6 +butterfly q10, q14, q2, q5 +butterfly q11, q15, q3, q4 +add r4, sp, #512 +vst1.s16{q0-q1}, [r4, :128]! +vst1.s16{q2-q3}, [r4, :128]! +vst1.s16{q4-q5}, [r4, :128]! +vst1.s16{q6-q7}, [r4, :128] +.endm + +.macro load16 in0, in1, in2, in3, in4, in5, in6, in7 +vld1.s16{\in0}, [r1, :64], r2 +vld1.s16{\in1}, [r3, :64], r2 +vld1.s16{\in2}, [r1, :64], r2 +vld1.s16{\in3}, [r3, :64], r2 +vld1.s16{\in4}, [r1, :64], r2 +vld1.s16{\in5}, [r3, :64], r2 +vld1.s16{\in6}, [r1, :64], r2 +vld1.s16{\in7}, [r3, :64], r2 +.endm + +.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7 +sum_sub q5, \in, \t0, \op0 +sum_sub q6, \in, \t1, \op1 +sum_sub q7, \in, \t2, \op2 +sum_sub q8, \in, \t3, \op3 +sum_sub q9, \in, \t4, \op4 +sum_sub q10,\in, \t5, \op5 +sum_sub q11,\in, \t6, \op6 +sum_sub q12,\in, \t7, \op7 +.endm + +.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7 +vadd.s32q4, \in0, \in1 +vsub.s32\in0, \in0, \in1 +vadd.s32\in1, \in2, \in3 +vsub.s32\in2, \in2, \in3 +vadd.s32\in3, \in4, \in5 +vsub.s32\in4, \in4, \in5 +vadd.s32\in5, \in6, \in7 +vsub.s32\in6, \in6, \in7 +.endm + +.macro store16 in0, in1, in2, in3, in4, in5, in6, in7 +vst1.s16\in0, [r1, :64], r2 +vst1.s16\in1, [r3, :64], r4 +vst1.s16\in2, [r1, :64], r2 +vst1.s16\in3, [r3, :64], r4 +vst1.s16\in4, [r1, :64], r2 +vst1.s16\in5, [r3, :64], r4 +vst1.s16\in6, [r1, :64], r2 +vst1.s16\in7, [r3, :64], r4 +.endm + +.macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, shift +vqrshrn.s32 \out0, \in0, \shift +vqrshrn.s32 \out1, \in1, \shift +vqrshrn.s32 \out2, \in2, \shift +vqrshrn.s32 \out3, \in3, \shift +vqrshrn.s32 \out4, \in4, \shift +vqrshrn.s32 \out5, \in5, \shift +vqrshrn.s32 \out6, \in6, \shift +vqrshrn.s32 \out7, \in7, \shift +.endm + +.macro tr_16x4 horiz, shift, in, out +add r1, \in, \horiz +add r3, \in, #(\horiz + 64) +mov r2, #128 +load16 d0, d1, d2, d3, d4, d5, d6, d7 +movrel r1, trans + +tr16_8x4d0, d1, d2, d3, d4, d5, d6, d7 + +add r1, \in, #(\horiz + 32) +add r3, \in, #(\horiz + 64 + 32) +mov r2, #128 +load16 d8, d9, d2, d3, d4, d5, d6, d7 +movrel r1, trans + 16 +vld1.s16{q0}, [r1, :128] +vmull.s16 q5, d8, d0[0] +vmull.s16 q6, d8, d0[1] +vmull.s16 q7, d8, d0[2] +vmull.s16 q8, d8, d0[3] +vmull.s16 q9, d8, d1[0] +vmull.s16 q10, d8, d1[1] +vmull.s16 q11, d8, d1[2] +vmull.s16 q12, d8, d1[3] + +add_member d9, d0[1], d1[0],
[libav-devel] [PATCH] hevc: Add NEON 4x4 and 8x8 IDCT
Optimized by Martin Storsjö <mar...@martin.st>. --- libavcodec/arm/Makefile | 2 + libavcodec/arm/hevc_idct.S| 228 ++ libavcodec/arm/hevcdsp_init_arm.c | 47 libavcodec/hevcdsp.c | 2 + libavcodec/hevcdsp.h | 1 + 5 files changed, 280 insertions(+) create mode 100644 libavcodec/arm/hevc_idct.S create mode 100644 libavcodec/arm/hevcdsp_init_arm.c diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 77452b1..555de16 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -134,6 +134,8 @@ NEON-OBJS-$(CONFIG_AAC_DECODER)+= arm/aacpsdsp_neon.o \ NEON-OBJS-$(CONFIG_APE_DECODER)+= arm/apedsp_neon.o NEON-OBJS-$(CONFIG_DCA_DECODER)+= arm/dcadsp_neon.o \ arm/synth_filter_neon.o +NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevc_idct.o \ + arm/hevcdsp_init_arm.o NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o\ arm/rv40dsp_neon.o diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S new file mode 100644 index 000..1bb75e7 --- /dev/null +++ b/libavcodec/arm/hevc_idct.S @@ -0,0 +1,228 @@ +/* + * ARM NEON optimised IDCT functions for HEVC decoding + * Copyright (c) 2017 Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +const trans +.short 64, 83, 64, 36 +.short 89, 75, 50, 18 +.short 90, 87, 80, 70 +.short 57, 43, 25, 9 +endconst + +.macro sum_sub out, in, c, op + .ifc \op, + +vmlal.s16 \out, \in, \c + .else +vmlsl.s16 \out, \in, \c + .endif +.endm + +.macro tr_4x4 in0, in1, in2, in3, out0, out1, out2, out3, shift, tmp0, tmp1, tmp2, tmp3, tmp4 + vshll.s16 \tmp0, \in0, #6 + vmull.s16 \tmp2, \in1, d4[1] + vmov \tmp1, \tmp0 + vmull.s16 \tmp3, \in1, d4[3] + vmlal.s16 \tmp0, \in2, d4[0] @e0 + vmlsl.s16 \tmp1, \in2, d4[0] @e1 + vmlal.s16 \tmp2, \in3, d4[3] @o0 + vmlsl.s16 \tmp3, \in3, d4[1] @o1 + + vadd.s32 \tmp4, \tmp0, \tmp2 + vsub.s32 \tmp0, \tmp0, \tmp2 + vadd.s32 \tmp2, \tmp1, \tmp3 + vsub.s32 \tmp1, \tmp1, \tmp3 + vqrshrn.s32\out0, \tmp4, #\shift + vqrshrn.s32\out3, \tmp0, #\shift + vqrshrn.s32\out1, \tmp2, #\shift + vqrshrn.s32\out2, \tmp1, #\shift +.endm + +.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, tmp3 + vshll.s16 \tmp0, \in0, #6 + vld1.s16 {\in0}, [r1, :64]! + vmov \tmp1, \tmp0 + vmull.s16 \tmp2, \in1, \in0[1] + vmull.s16 \tmp3, \in1, \in0[3] + vmlal.s16 \tmp0, \in2, \in0[0] @e0 + vmlsl.s16 \tmp1, \in2, \in0[0] @e1 + vmlal.s16 \tmp2, \in3, \in0[3] @o0 + vmlsl.s16 \tmp3, \in3, \in0[1] @o1 + + vld1.s16 {\in0}, [r1, :64] + + vadd.s32 \out0, \tmp0, \tmp2 + vadd.s32 \out1, \tmp1, \tmp3 + vsub.s32 \out2, \tmp1, \tmp3 + vsub.s32 \out3, \tmp0, \tmp2 + + subr1, r1, #8 +.endm + +@ Do a 4x4 transpose, using q registers for the subtransposes that don't +@ need to address the indiviudal d registers. +@ r0,r1 == rq0, r2,r3 == rq1 +.macro transpose_4x4 rq0, rq1, r0, r1, r2, r3 +vtrn.32 \rq0, \rq1 +vtrn.16 \r0, \r1 +vtrn.16 \r2, \r3 +.endm + +.macro idct_4x4 bitdepth +function ff_hevc_idct_4x4_\bitdepth\()_neon, export=1 +@r0 - coeffs +vld1.s16{q0-q1}, [r0, :128] + +movrel r1, trans +vld1.s16{d4}, [r1, :64] + +tr_4x4 d0, d1, d2, d3, d16, d17, d18, d19, 7, q10, q11, q12, q13, q0 +transpose_4x4 q8, q9, d16, d17, d18, d19 + +tr_4x4 d16, d17, d18, d19, d0, d1, d2, d3, 20 -
[libav-devel] [PATCH] hevc: Add NEON 4x4 and 8x8 IDCT
Optimized by Martin Storsjö <mar...@martin.st>. --- libavcodec/arm/Makefile | 2 + libavcodec/arm/hevc_idct.S | 269 libavcodec/arm/hevc_idct_init.c | 50 libavcodec/hevcdsp.c| 2 + libavcodec/hevcdsp.h| 2 + 5 files changed, 325 insertions(+) create mode 100644 libavcodec/arm/hevc_idct.S create mode 100644 libavcodec/arm/hevc_idct_init.c diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 77452b1..0d30a49 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -113,6 +113,8 @@ NEON-OBJS-$(CONFIG_H264DSP)+= arm/h264dsp_neon.o\ NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o NEON-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_neon.o \ arm/hpeldsp_neon.o +NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevc_idct.o \ + arm/hevc_idct_init.o NEON-OBJS-$(CONFIG_HPELDSP)+= arm/hpeldsp_init_neon.o \ arm/hpeldsp_neon.o NEON-OBJS-$(CONFIG_IDCTDSP)+= arm/idctdsp_init_neon.o \ diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S new file mode 100644 index 000..89dbe22 --- /dev/null +++ b/libavcodec/arm/hevc_idct.S @@ -0,0 +1,269 @@ +/* + * ARM NEON optimised IDCT functions for HEVC decoding + * Copyright (c) 2017 Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +const trans +.short 64, 83, 64, 36 +.short 89, 75, 50, 18 +.short 90, 87, 80, 70 +.short 57, 43, 25, 9 +endconst + +.macro sum_sub out, in, c, op + .ifc \op, + +vmlal.s16 \out, \in, \c + .else +vmlsl.s16 \out, \in, \c + .endif +.endm + +.macro sum out, in0, in1, in2, in3, c0, c1, c2, c3, op1, op2, op3, tr4 +vmull.s16 \out, \in0, \c0 +sum_sub \out, \in1, \c1, \op1 +sum_sub \out, \in2, \c2, \op2 +sum_sub \out, \in3, \c3, \op3 +.endm + +.macro tr_4x4 in0, in1, in2, in3, out0, out1, out2, out3, shift, tmp0, tmp1, tmp2, tmp3, tmp4 + vshll.s16 \tmp0, \in0, #6 + vmull.s16 \tmp2, \in1, d4[1] + vmov \tmp1, \tmp0 + vmull.s16 \tmp3, \in1, d4[3] + vmlal.s16 \tmp0, \in2, d4[0] @e0 + vmlsl.s16 \tmp1, \in2, d4[0] @e1 + vmlal.s16 \tmp2, \in3, d4[3] @o0 + vmlsl.s16 \tmp3, \in3, d4[1] @o1 + + vadd.s32 \tmp4, \tmp0, \tmp2 + vsub.s32 \tmp0, \tmp0, \tmp2 + vadd.s32 \tmp2, \tmp1, \tmp3 + vsub.s32 \tmp1, \tmp1, \tmp3 + vqrshrn.s32\out0, \tmp4, #\shift + vqrshrn.s32\out3, \tmp0, #\shift + vqrshrn.s32\out1, \tmp2, #\shift + vqrshrn.s32\out2, \tmp1, #\shift +.endm + +.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, tmp3 + vshll.s16 \tmp0, \in0, #6 + vld1.s16 {\in0}, [r1, :64]! + vmov \tmp1, \tmp0 + vmull.s16 \tmp2, \in1, \in0[1] + vmull.s16 \tmp3, \in1, \in0[3] + vmlal.s16 \tmp0, \in2, \in0[0] @e0 + vmlsl.s16 \tmp1, \in2, \in0[0] @e1 + vmlal.s16 \tmp2, \in3, \in0[3] @o0 + vmlsl.s16 \tmp3, \in3, \in0[1] @o1 + + vld1.s16 {\in0}, [r1, :64] + + vadd.s32 \out0, \tmp0, \tmp2 + vadd.s32 \out1, \tmp1, \tmp3 + vsub.s32 \out2, \tmp1, \tmp3 + vsub.s32 \out3, \tmp0, \tmp2 + + subr1, r1, #8 +.endm + +@ Do a 4x4 transpose, using q registers for the subtransposes that don't +@ need to address the indiviudal d registers. +@ r0,r1 == rq0, r2,r3 == rq1 +.macro transpose_4x4 rq0, rq1, r0, r1, r2, r3 +vtrn.32 \rq0, \rq1 +vtrn.16 \r0, \r1 +vtrn.16 \r2, \r3 +.endm + +.macro idct_4x4 bitdepth +function ff_hevc_idct_4x4_\bitdepth\()_neon, export=1 +@r0 - coeffs +vld1.s16{q0-q1}, [r0, :128] + +movrel
[libav-devel] [PATCH] asfdec: Account for different Format Data sizes
Some muxers may use the BMP_HEADER Format Data size instead of the ASF-specific one. Bug-Id: 1020 --- Use more descriptive variable names. Upgrate the documentation. Use better commit message. libavformat/asfdec.c | 12 +++- libavformat/avidec.c | 2 +- libavformat/riff.h| 4 ++-- libavformat/riffdec.c | 6 -- libavformat/wtv.c | 2 +- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c index d602af8..6fe2524 100644 --- a/libavformat/asfdec.c +++ b/libavformat/asfdec.c @@ -691,20 +691,22 @@ static int asf_read_properties(AVFormatContext *s, const GUIDParseTable *g) static int parse_video_info(AVIOContext *pb, AVStream *st) { -uint16_t size; +uint16_t size_asf; // ASF specific Format Data size +uint32_t size_bmp; // BMP_HEADER specific Format Data size unsigned int tag; st->codecpar->width = avio_rl32(pb); st->codecpar->height = avio_rl32(pb); avio_skip(pb, 1); // skip reserved flags -size = avio_rl16(pb); // size of the Format Data -tag = ff_get_bmp_header(pb, st); +size_asf = avio_rl16(pb); +tag = ff_get_bmp_header(pb, st, _bmp); st->codecpar->codec_tag = tag; st->codecpar->codec_id = ff_codec_get_id(ff_codec_bmp_tags, tag); +size_bmp = FFMAX(size_asf, size_bmp); -if (size > BMP_HEADER_SIZE) { +if (size_bmp > BMP_HEADER_SIZE) { int ret; -st->codecpar->extradata_size = size - BMP_HEADER_SIZE; +st->codecpar->extradata_size = size_bmp - BMP_HEADER_SIZE; if (!(st->codecpar->extradata = av_malloc(st->codecpar->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE))) { st->codecpar->extradata_size = 0; diff --git a/libavformat/avidec.c b/libavformat/avidec.c index 0439c9c..61f81e8 100644 --- a/libavformat/avidec.c +++ b/libavformat/avidec.c @@ -613,7 +613,7 @@ static int avi_read_header(AVFormatContext *s) avio_skip(pb, size); break; } -tag1 = ff_get_bmp_header(pb, st); +tag1 = ff_get_bmp_header(pb, st, NULL); if (tag1 == MKTAG('D', 'X', 'S', 'B') || tag1 == MKTAG('D', 'X', 'S', 'A')) { diff --git a/libavformat/riff.h b/libavformat/riff.h index a45c7f3..e77552b 100644 --- a/libavformat/riff.h +++ b/libavformat/riff.h @@ -40,10 +40,10 @@ void ff_end_tag(AVIOContext *pb, int64_t start); /** * Read BITMAPINFOHEADER structure and set AVStream codec width, height and - * bits_per_encoded_sample fields. Does not read extradata. + * bits_per_encoded_sample fields. Writes the size of BMP file to *size. Does not read extradata. * @return codec tag */ -int ff_get_bmp_header(AVIOContext *pb, AVStream *st); +int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size); void ff_put_bmp_header(AVIOContext *pb, AVCodecParameters *par, const AVCodecTag *tags, int for_asf); int ff_put_wav_header(AVFormatContext *s, AVIOContext *pb, AVCodecParameters *par); diff --git a/libavformat/riffdec.c b/libavformat/riffdec.c index 8124835..d10ea2b 100644 --- a/libavformat/riffdec.c +++ b/libavformat/riffdec.c @@ -180,10 +180,12 @@ enum AVCodecID ff_wav_codec_get_id(unsigned int tag, int bps) return id; } -int ff_get_bmp_header(AVIOContext *pb, AVStream *st) +int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size) { int tag1; -avio_rl32(pb); /* size */ +uint32_t size_ = avio_rl32(pb); /* size */ +if (size) +*size = size_; st->codecpar->width = avio_rl32(pb); st->codecpar->height = (int32_t)avio_rl32(pb); avio_rl16(pb); /* planes */ diff --git a/libavformat/wtv.c b/libavformat/wtv.c index 2cab4e5..272b317 100644 --- a/libavformat/wtv.c +++ b/libavformat/wtv.c @@ -586,7 +586,7 @@ static int parse_videoinfoheader2(AVFormatContext *s, AVStream *st) AVIOContext *pb = wtv->pb; avio_skip(pb, 72); // picture aspect ratio is unreliable -ff_get_bmp_header(pb, st); +ff_get_bmp_header(pb, st, NULL); return 72 + 40; } -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] asfdec: use the BMP_HEADER specific Format Data size instead of
the ASF specific Format Data size. Fixes video decoding problem part of the bug 1020. --- libavformat/asfdec.c | 8 +--- libavformat/avidec.c | 2 +- libavformat/riff.h| 2 +- libavformat/riffdec.c | 6 -- libavformat/wtv.c | 2 +- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c index d602af8..10f8644 100644 --- a/libavformat/asfdec.c +++ b/libavformat/asfdec.c @@ -691,16 +691,18 @@ static int asf_read_properties(AVFormatContext *s, const GUIDParseTable *g) static int parse_video_info(AVIOContext *pb, AVStream *st) { -uint16_t size; +uint16_t size_; +uint32_t size; unsigned int tag; st->codecpar->width = avio_rl32(pb); st->codecpar->height = avio_rl32(pb); avio_skip(pb, 1); // skip reserved flags -size = avio_rl16(pb); // size of the Format Data -tag = ff_get_bmp_header(pb, st); +size_ = avio_rl16(pb); // size of the Format Data +tag = ff_get_bmp_header(pb, st, ); st->codecpar->codec_tag = tag; st->codecpar->codec_id = ff_codec_get_id(ff_codec_bmp_tags, tag); +size = FFMAX(size_, size); if (size > BMP_HEADER_SIZE) { int ret; diff --git a/libavformat/avidec.c b/libavformat/avidec.c index 0439c9c..61f81e8 100644 --- a/libavformat/avidec.c +++ b/libavformat/avidec.c @@ -613,7 +613,7 @@ static int avi_read_header(AVFormatContext *s) avio_skip(pb, size); break; } -tag1 = ff_get_bmp_header(pb, st); +tag1 = ff_get_bmp_header(pb, st, NULL); if (tag1 == MKTAG('D', 'X', 'S', 'B') || tag1 == MKTAG('D', 'X', 'S', 'A')) { diff --git a/libavformat/riff.h b/libavformat/riff.h index a45c7f3..295b6d0 100644 --- a/libavformat/riff.h +++ b/libavformat/riff.h @@ -43,7 +43,7 @@ void ff_end_tag(AVIOContext *pb, int64_t start); * bits_per_encoded_sample fields. Does not read extradata. * @return codec tag */ -int ff_get_bmp_header(AVIOContext *pb, AVStream *st); +int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size); void ff_put_bmp_header(AVIOContext *pb, AVCodecParameters *par, const AVCodecTag *tags, int for_asf); int ff_put_wav_header(AVFormatContext *s, AVIOContext *pb, AVCodecParameters *par); diff --git a/libavformat/riffdec.c b/libavformat/riffdec.c index 8124835..d10ea2b 100644 --- a/libavformat/riffdec.c +++ b/libavformat/riffdec.c @@ -180,10 +180,12 @@ enum AVCodecID ff_wav_codec_get_id(unsigned int tag, int bps) return id; } -int ff_get_bmp_header(AVIOContext *pb, AVStream *st) +int ff_get_bmp_header(AVIOContext *pb, AVStream *st, uint32_t *size) { int tag1; -avio_rl32(pb); /* size */ +uint32_t size_ = avio_rl32(pb); /* size */ +if (size) +*size = size_; st->codecpar->width = avio_rl32(pb); st->codecpar->height = (int32_t)avio_rl32(pb); avio_rl16(pb); /* planes */ diff --git a/libavformat/wtv.c b/libavformat/wtv.c index 2cab4e5..272b317 100644 --- a/libavformat/wtv.c +++ b/libavformat/wtv.c @@ -586,7 +586,7 @@ static int parse_videoinfoheader2(AVFormatContext *s, AVStream *st) AVIOContext *pb = wtv->pb; avio_skip(pb, 72); // picture aspect ratio is unreliable -ff_get_bmp_header(pb, st); +ff_get_bmp_header(pb, st, NULL); return 72 + 40; } -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC
From: Alexandra Hajkova--- libavcodec/hevcdsp.c | 2 + libavcodec/hevcdsp.h | 1 + libavcodec/ppc/Makefile | 1 + libavcodec/ppc/hevcdsp.c | 108 ++ libavcodec/ppc/hevcdsp_template.c | 48 + 5 files changed, 160 insertions(+) create mode 100644 libavcodec/ppc/hevcdsp.c create mode 100644 libavcodec/ppc/hevcdsp_template.c diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 7c19198..8ae023b 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) break; } +if (ARCH_PPC) +ff_hevc_dsp_init_ppc(hevcdsp, bit_depth); if (ARCH_X86) ff_hevc_dsp_init_x86(hevcdsp, bit_depth); } diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 49cb711..2f4ff01 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -115,6 +115,7 @@ typedef struct HEVCDSPContext { void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); +void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); extern const int16_t ff_hevc_epel_coeffs[7][16]; diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 09eabcb..4b92add 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP) += ppc/vp8dsp_altivec.o # decoders/encoders OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o +OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c new file mode 100644 index 000..9200e27 --- /dev/null +++ b/libavcodec/ppc/hevcdsp.c @@ -0,0 +1,108 @@ +/* SIMD-optimized IDCT functions for HEVC decoding + * Copyright (c) Alexandra Hajkova + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#endif + +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" + +#include "libavcodec/hevcdsp.h" + +#if HAVE_ALTIVEC +static const vector int16_t trans4[4] = { +{ 64, 64, 64, 64, 64, 64, 64, 64 }, +{ 83, 36, 83, 36, 83, 36, 83, 36 }, +{ 64, -64, 64, -64, 64, -64, 64, -64 }, +{ 36, -83, 36, -83, 36, -83, 36, -83 }, +}; + +static const vec_u8 mask[2] = { +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 0x12, 0x13, 0x1A, 0x1B }, +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F }, +}; + +static void transform4x4(vector int16_t src_01, vector int16_t src_23, + vector int32_t res[4], const int shift, int16_t *coeffs) +{ +vector int16_t src_02, src_13; +vector int32_t zero = vec_splat_s32(0); +vector int32_t e0, o0, e1, o1; +vector int32_t add; + +src_13 = vec_mergel(src_01, src_23); +src_02 = vec_mergeh(src_01, src_23); + +e0 = vec_msums(src_02, trans4[0], zero); +o0 = vec_msums(src_13, trans4[1], zero); +e1 = vec_msums(src_02, trans4[2], zero); +o1 = vec_msums(src_13, trans4[3], zero); + +add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1)); +e0 = vec_add(e0, add); +e1 = vec_add(e1, add); + +res[0] = vec_add(e0, o0); +res[1] = vec_add(e1, o1); +res[2] = vec_sub(e1, o1); +res[3] = vec_sub(e0, o0); +} + +static void scale(vector int32_t res[4], vector int16_t res_packed[2], int shift) +{ +int i; +vector unsigned int v_shift = vec_splat_u32(shift); + +for (i = 0; i < 4; i++) +res[i] = vec_sra(res[i], v_shift); + +// clip16 +res_packed[0] = vec_packs(res[0], res[1]); +res_packed[1] = vec_packs(res[2], res[3]); +} + +#define FUNCDECL(a, depth) a ## _ ## depth ## _altivec +#define FUNC(a, b) FUNCDECL(a, b) + +#define BIT_DEPTH 8 +#include "hevcdsp_template.c" +#undef BIT_DEPTH + +#define
[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC
From: Alexandra Hajkova--- libavcodec/hevcdsp.c | 2 + libavcodec/hevcdsp.h | 1 + libavcodec/ppc/Makefile | 1 + libavcodec/ppc/hevcdsp.c | 110 ++ libavcodec/ppc/hevcdsp_template.c | 48 + 5 files changed, 162 insertions(+) create mode 100644 libavcodec/ppc/hevcdsp.c create mode 100644 libavcodec/ppc/hevcdsp_template.c diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 7c19198..8ae023b 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) break; } +if (ARCH_PPC) +ff_hevc_dsp_init_ppc(hevcdsp, bit_depth); if (ARCH_X86) ff_hevc_dsp_init_x86(hevcdsp, bit_depth); } diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 49cb711..2f4ff01 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -115,6 +115,7 @@ typedef struct HEVCDSPContext { void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); +void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); extern const int16_t ff_hevc_epel_coeffs[7][16]; diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 09eabcb..4b92add 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP) += ppc/vp8dsp_altivec.o # decoders/encoders OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o +OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c new file mode 100644 index 000..c95af67 --- /dev/null +++ b/libavcodec/ppc/hevcdsp.c @@ -0,0 +1,110 @@ +/* SIMD-optimized IDCT functions for HEVC decoding + * Copyright (c) Alexandra Hajkova + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#endif + +#include "libavutil/cpu.h" +#if HAVE_ALTIVEC_H +#include "libavutil/ppc/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" +#endif + +#include "libavcodec/hevcdsp.h" + +#if HAVE_ALTIVEC +#define FUNCDECL(a, depth) a ## _ ## depth ## _altivec +#define FUNC(a, b) FUNCDECL(a, b) + +static const vector int16_t trans4[4] = { +{ 64, 64, 64, 64, 64, 64, 64, 64 }, +{ 83, 36, 83, 36, 83, 36, 83, 36 }, +{ 64, -64, 64, -64, 64, -64, 64, -64 }, +{ 36, -83, 36, -83, 36, -83, 36, -83 }, +}; + +static const vec_u8 mask[2] = { +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 0x12, 0x13, 0x1A, 0x1B }, +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F }, +}; + +static void transform4x4(vector int16_t src_01, vector int16_t src_23, + vector int32_t res[4], const int shift, int16_t *coeffs) +{ +vector int16_t src_02, src_13; +vector int32_t zero = vec_splat_s32(0); +vector int32_t e0, o0, e1, o1; +vector int32_t add; + +src_13 = vec_mergel(src_01, src_23); +src_02 = vec_mergeh(src_01, src_23); + +e0 = vec_msums(src_02, trans4[0], zero); +o0 = vec_msums(src_13, trans4[1], zero); +e1 = vec_msums(src_02, trans4[2], zero); +o1 = vec_msums(src_13, trans4[3], zero); + +add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1)); +e0 = vec_add(e0, add); +e1 = vec_add(e1, add); + +res[0] = vec_add(e0, o0); +res[1] = vec_add(e1, o1); +res[2] = vec_sub(e1, o1); +res[3] = vec_sub(e0, o0); +} + +static void scale(vector int32_t res[4], vector int16_t res_packed[2], int shift) +{ +int i; +vector unsigned int v_shift = vec_splat_u32(shift); + +for (i = 0; i < 4; i++) +res[i] = vec_sra(res[i], v_shift); + +// clip16 +res_packed[0] = vec_packs(res[0], res[1]); +res_packed[1] = vec_packs(res[2], res[3]); +} + +#define BIT_DEPTH 8 +#include
[libav-devel] [PATCH 34/35] qcelp: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/qcelpdec.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c index e9e7347..9d5e13a 100644 --- a/libavcodec/qcelpdec.c +++ b/libavcodec/qcelpdec.c @@ -31,9 +31,10 @@ #include "libavutil/channel_layout.h" #include "libavutil/float_dsp.h" + #include "avcodec.h" +#include "bitstream.h" #include "internal.h" -#include "get_bits.h" #include "qcelpdata.h" #include "celp_filters.h" #include "acelp_filters.h" @@ -53,7 +54,7 @@ typedef enum { } qcelp_packet_rate; typedef struct QCELPContext { -GetBitContext gb; +BitstreamContext bc; qcelp_packet_rate bitrate; QCELPFrameframe;/**< unpacked data frame */ @@ -718,12 +719,12 @@ static int qcelp_decode_frame(AVCodecContext *avctx, void *data, qcelp_unpacking_bitmaps_lengths[q->bitrate]; uint8_t *unpacked_data = (uint8_t *)>frame; -init_get_bits(>gb, buf, 8 * buf_size); +bitstream_init(>bc, buf, 8 * buf_size); memset(>frame, 0, sizeof(QCELPFrame)); for (; bitmaps < bitmaps_end; bitmaps++) -unpacked_data[bitmaps->index] |= get_bits(>gb, bitmaps->bitlen) << bitmaps->bitpos; +unpacked_data[bitmaps->index] |= bitstream_read(>bc, bitmaps->bitlen) << bitmaps->bitpos; // Check for erasures/blanks on rates 1, 1/4 and 1/8. if (q->frame.reserved) { -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 32/35] opus: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/opus.h| 6 +++--- libavcodec/opusdec.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/opus.h b/libavcodec/opus.h index 55c91fa..fbf67c9 100644 --- a/libavcodec/opus.h +++ b/libavcodec/opus.h @@ -32,7 +32,7 @@ #include "libavresample/avresample.h" #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #define MAX_FRAME_SIZE 1275 #define MAX_FRAMES 48 @@ -92,7 +92,7 @@ typedef struct RawBitsContext { } RawBitsContext; typedef struct OpusRangeCoder { -GetBitContext gb; +BitstreamContext bc; RawBitsContext rb; unsigned int range; unsigned int value; @@ -196,7 +196,7 @@ typedef struct OpusContext { static av_always_inline void opus_rc_normalize(OpusRangeCoder *rc) { while (rc->range <= 1<<23) { -rc->value = ((rc->value << 8) | (get_bits(>gb, 8) ^ 0xFF)) & ((1u << 31) - 1); +rc->value = ((rc->value << 8) | (bitstream_read(>bc, 8) ^ 0xFF)) & ((1u << 31) - 1); rc->range <<= 8; rc->total_read_bits += 8; } diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c index 92e651c..163f0d5 100644 --- a/libavcodec/opusdec.c +++ b/libavcodec/opusdec.c @@ -43,9 +43,9 @@ #include "libavresample/avresample.h" #include "avcodec.h" +#include "bitstream.h" #include "celp_filters.h" #include "fft.h" -#include "get_bits.h" #include "internal.h" #include "mathops.h" #include "opus.h" @@ -80,12 +80,12 @@ static int get_silk_samplerate(int config) */ static int opus_rc_init(OpusRangeCoder *rc, const uint8_t *data, int size) { -int ret = init_get_bits8(>gb, data, size); +int ret = bitstream_init8(>bc, data, size); if (ret < 0) return ret; rc->range = 128; -rc->value = 127 - get_bits(>gb, 7); +rc->value = 127 - bitstream_read(>bc, 7); rc->total_read_bits = 9; opus_rc_normalize(rc); -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 28/35] hq_hqa: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/hq_hqa.c | 48 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/libavcodec/hq_hqa.c b/libavcodec/hq_hqa.c index 98bd596..0d03e59 100644 --- a/libavcodec/hq_hqa.c +++ b/libavcodec/hq_hqa.c @@ -24,8 +24,8 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" +#include "bitstream.h" #include "canopus.h" -#include "get_bits.h" #include "internal.h" #include "hq_hqa.h" @@ -59,7 +59,7 @@ static inline void put_blocks(HQContext *c, AVFrame *pic, pic->linesize[plane] << ilace, block1); } -static int hq_decode_block(HQContext *c, GetBitContext *gb, int16_t block[64], +static int hq_decode_block(HQContext *c, BitstreamContext *bc, int16_t block[64], int qsel, int is_chroma, int is_hqa) { const int32_t *q; @@ -68,15 +68,15 @@ static int hq_decode_block(HQContext *c, GetBitContext *gb, int16_t block[64], memset(block, 0, 64 * sizeof(*block)); if (!is_hqa) { -block[0] = get_sbits(gb, 9) << 6; -q = ff_hq_quants[qsel][is_chroma][get_bits(gb, 2)]; +block[0] = bitstream_read_signed(bc, 9) << 6; +q = ff_hq_quants[qsel][is_chroma][bitstream_read(bc, 2)]; } else { -q = ff_hq_quants[qsel][is_chroma][get_bits(gb, 2)]; -block[0] = get_sbits(gb, 9) << 6; +q = ff_hq_quants[qsel][is_chroma][bitstream_read(bc, 2)]; +block[0] = bitstream_read_signed(bc, 9) << 6; } for (;;) { -val = get_vlc2(gb, c->hq_ac_vlc.table, 9, 2); +val = bitstream_read_vlc(bc, c->hq_ac_vlc.table, 9, 2); if (val < 0) return AVERROR_INVALIDDATA; @@ -91,16 +91,16 @@ static int hq_decode_block(HQContext *c, GetBitContext *gb, int16_t block[64], } static int hq_decode_mb(HQContext *c, AVFrame *pic, -GetBitContext *gb, int x, int y) +BitstreamContext *bc, int x, int y) { int qgroup, flag; int i, ret; -qgroup = get_bits(gb, 4); -flag = get_bits1(gb); +qgroup = bitstream_read(bc, 4); +flag = bitstream_read_bit(bc); for (i = 0; i < 8; i++) { -ret = hq_decode_block(c, gb, c->block[i], qgroup, i >= 4, 0); +ret = hq_decode_block(c, bc, c->block[i], qgroup, i >= 4, 0); if (ret < 0) return ret; } @@ -117,7 +117,7 @@ static int hq_decode_frame(HQContext *ctx, AVFrame *pic, int prof_num, size_t data_size) { const HQProfile *profile; -GetBitContext gb; +BitstreamContext bc; const uint8_t *perm, *src = ctx->gbc.buffer; uint32_t slice_off[21]; int slice, start_off, next_off, i, ret; @@ -160,11 +160,11 @@ static int hq_decode_frame(HQContext *ctx, AVFrame *pic, "Invalid slice size %zu.\n", data_size); break; } -init_get_bits(, src + slice_off[slice], - (slice_off[slice + 1] - slice_off[slice]) * 8); +bitstream_init(, src + slice_off[slice], + (slice_off[slice + 1] - slice_off[slice]) * 8); for (i = 0; i < (next_off - start_off) * profile->tab_w; i++) { -ret = hq_decode_mb(ctx, pic, , perm[0] * 16, perm[1] * 16); +ret = hq_decode_mb(ctx, pic, , perm[0] * 16, perm[1] * 16); if (ret < 0) { av_log(ctx->avctx, AV_LOG_ERROR, "Error decoding macroblock %d at slice %d.\n", i, slice); @@ -178,12 +178,12 @@ static int hq_decode_frame(HQContext *ctx, AVFrame *pic, } static int hqa_decode_mb(HQContext *c, AVFrame *pic, int qgroup, - GetBitContext *gb, int x, int y) + BitstreamContext *bc, int x, int y) { int flag = 0; int i, ret, cbp; -cbp = get_vlc2(gb, c->hqa_cbp_vlc.table, 5, 1); +cbp = bitstream_read_vlc(bc, c->hqa_cbp_vlc.table, 5, 1); for (i = 0; i < 12; i++) memset(c->block[i], 0, sizeof(*c->block)); @@ -191,7 +191,7 @@ static int hqa_decode_mb(HQContext *c, AVFrame *pic, int qgroup, c->block[i][0] = -128 * (1 << 6); if (cbp) { -flag = get_bits1(gb); +flag = bitstream_read_bit(bc); cbp |= cbp << 4; if (cbp & 0x3) @@ -201,7 +201,7 @@ static int hqa_decode_mb(HQContext *c, AVFrame *pic, int qgroup, for (i = 0; i < 12; i++) { if (!(cbp & (1 << i))) continue; -ret = hq_decode_block(c, gb, c->block[i], qgroup, i >= 8, 1); +ret = hq_decode_block(c, bc, c->block[i], qgroup, i >= 8, 1); if (ret < 0) return ret; } @@ -217,7 +217,7 @@ static int hqa_decode_mb(HQContext *c, AVFrame *pic, int qgroup, return 0; } -static int hqa_decode_slice(HQContext *ctx, AVFrame *pic, GetBitContext *gb, +static int hqa_decode_slice(HQContext *ctx,
[libav-devel] [PATCH 35/35] qdm2: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/qdm2.c | 200 +++--- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c index 7a7c149..781999a 100644 --- a/libavcodec/qdm2.c +++ b/libavcodec/qdm2.c @@ -39,7 +39,7 @@ #define BITSTREAM_READER_LE #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" #include "mpegaudio.h" #include "mpegaudiodsp.h" @@ -361,31 +361,31 @@ static av_cold void qdm2_init_vlc(void) INIT_VLC_USE_NEW_STATIC | INIT_VLC_LE); } -static int qdm2_get_vlc(GetBitContext *gb, VLC *vlc, int flag, int depth) +static int qdm2_get_vlc(BitstreamContext *bc, VLC *vlc, int flag, int depth) { int value; -value = get_vlc2(gb, vlc->table, vlc->bits, depth); +value = bitstream_read_vlc(bc, vlc->table, vlc->bits, depth); /* stage-2, 3 bits exponent escape sequence */ if (value-- == 0) -value = get_bits(gb, get_bits(gb, 3) + 1); +value = bitstream_read(bc, bitstream_read(bc, 3) + 1); /* stage-3, optional */ if (flag) { int tmp = vlc_stage3_values[value]; if ((value & ~3) > 0) -tmp += get_bits(gb, (value >> 2)); +tmp += bitstream_read(bc, value >> 2); value = tmp; } return value; } -static int qdm2_get_se_vlc(VLC *vlc, GetBitContext *gb, int depth) +static int qdm2_get_se_vlc(VLC *vlc, BitstreamContext *bc, int depth) { -int value = qdm2_get_vlc(gb, vlc, 0, depth); +int value = qdm2_get_vlc(bc, vlc, 0, depth); return (value & 1) ? ((value + 1) >> 1) : -(value >> 1); } @@ -412,35 +412,35 @@ static uint16_t qdm2_packet_checksum(const uint8_t *data, int length, int value) /** * Fill a QDM2SubPacket structure with packet type, size, and data pointer. * - * @param gbbitreader context + * @param bcbitreader context * @param sub_packetpacket under analysis */ -static void qdm2_decode_sub_packet_header(GetBitContext *gb, +static void qdm2_decode_sub_packet_header(BitstreamContext *bc, QDM2SubPacket *sub_packet) { -sub_packet->type = get_bits(gb, 8); +sub_packet->type = bitstream_read(bc, 8); if (sub_packet->type == 0) { sub_packet->size = 0; sub_packet->data = NULL; } else { -sub_packet->size = get_bits(gb, 8); +sub_packet->size = bitstream_read(bc, 8); if (sub_packet->type & 0x80) { sub_packet->size <<= 8; -sub_packet->size |= get_bits(gb, 8); +sub_packet->size |= bitstream_read(bc, 8); sub_packet->type &= 0x7f; } if (sub_packet->type == 0x7f) -sub_packet->type |= (get_bits(gb, 8) << 8); +sub_packet->type |= bitstream_read(bc, 8) << 8; // FIXME: this depends on bitreader-internal data -sub_packet->data = >buffer[get_bits_count(gb) / 8]; +sub_packet->data = >buffer[bitstream_tell(bc) / 8]; } av_log(NULL, AV_LOG_DEBUG, "Subpacket: type=%d size=%d start_offs=%x\n", - sub_packet->type, sub_packet->size, get_bits_count(gb) / 8); + sub_packet->type, sub_packet->size, bitstream_tell(bc) / 8); } /** @@ -799,12 +799,12 @@ static void fill_coding_method_array(sb_int8_array tone_level_idx, * sb 8-sb_used. * * @param q context - * @param gbbitreader context + * @param bcbitreader context * @param lengthpacket length in bits * @param sb_minlower subband processed (sb_min included) * @param sb_maxhigher subband processed (sb_max excluded) */ -static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb, +static void synthfilt_build_sb_samples(QDM2Context *q, BitstreamContext *bc, int length, int sb_min, int sb_max) { int sb, j, k, n, ch, run, channels; @@ -830,12 +830,12 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb, else if (sb >= 24) joined_stereo = 1; else -joined_stereo = (get_bits_left(gb) >= 1) ? get_bits1(gb) : 0; +joined_stereo = (bitstream_bits_left(bc) >= 1) ? bitstream_read_bit(bc) : 0; if (joined_stereo) { -if (get_bits_left(gb) >= 16) +if (bitstream_bits_left(bc) >= 16) for (j = 0; j < 16; j++) -sign_bits[j] = get_bits1(gb); +sign_bits[j] = bitstream_read_bit(bc); for (j = 0; j < 64; j++) if (q->coding_method[1][sb][j] > q->coding_method[0][sb][j]) @@ -851,22 +851,22 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb, for (ch = 0; ch < channels; ch++) { FIX_NOISE_IDX(q->noise_idx); -zero_encoding = (get_bits_left(gb) >= 1) ?
[libav-devel] [PATCH 30/35] jvdec: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/jvdec.c | 52 ++-- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/libavcodec/jvdec.c b/libavcodec/jvdec.c index c532b75..37a2770 100644 --- a/libavcodec/jvdec.c +++ b/libavcodec/jvdec.c @@ -28,8 +28,8 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" +#include "bitstream.h" #include "blockdsp.h" -#include "get_bits.h" #include "internal.h" typedef struct JvContext { @@ -62,84 +62,84 @@ static av_cold int decode_init(AVCodecContext *avctx) /** * Decode 2x2 block */ -static inline void decode2x2(GetBitContext *gb, uint8_t *dst, int linesize) +static inline void decode2x2(BitstreamContext *bc, uint8_t *dst, int linesize) { int i, j, v[2]; -switch (get_bits(gb, 2)) { +switch (bitstream_read(bc, 2)) { case 1: -v[0] = get_bits(gb, 8); +v[0] = bitstream_read(bc, 8); for (j = 0; j < 2; j++) memset(dst + j * linesize, v[0], 2); break; case 2: -v[0] = get_bits(gb, 8); -v[1] = get_bits(gb, 8); +v[0] = bitstream_read(bc, 8); +v[1] = bitstream_read(bc, 8); for (j = 0; j < 2; j++) for (i = 0; i < 2; i++) -dst[j * linesize + i] = v[get_bits1(gb)]; +dst[j * linesize + i] = v[bitstream_read_bit(bc)]; break; case 3: for (j = 0; j < 2; j++) for (i = 0; i < 2; i++) -dst[j * linesize + i] = get_bits(gb, 8); +dst[j * linesize + i] = bitstream_read(bc, 8); } } /** * Decode 4x4 block */ -static inline void decode4x4(GetBitContext *gb, uint8_t *dst, int linesize) +static inline void decode4x4(BitstreamContext *bc, uint8_t *dst, int linesize) { int i, j, v[2]; -switch (get_bits(gb, 2)) { +switch (bitstream_read(bc, 2)) { case 1: -v[0] = get_bits(gb, 8); +v[0] = bitstream_read(bc, 8); for (j = 0; j < 4; j++) memset(dst + j * linesize, v[0], 4); break; case 2: -v[0] = get_bits(gb, 8); -v[1] = get_bits(gb, 8); +v[0] = bitstream_read(bc, 8); +v[1] = bitstream_read(bc, 8); for (j = 2; j >= 0; j -= 2) { for (i = 0; i < 4; i++) -dst[j * linesize + i] = v[get_bits1(gb)]; +dst[j * linesize + i] = v[bitstream_read_bit(bc)]; for (i = 0; i < 4; i++) -dst[(j + 1) * linesize + i] = v[get_bits1(gb)]; +dst[(j + 1) * linesize + i] = v[bitstream_read_bit(bc)]; } break; case 3: for (j = 0; j < 4; j += 2) for (i = 0; i < 4; i += 2) -decode2x2(gb, dst + j * linesize + i, linesize); +decode2x2(bc, dst + j * linesize + i, linesize); } } /** * Decode 8x8 block */ -static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize, +static inline void decode8x8(BitstreamContext *bc, uint8_t *dst, int linesize, BlockDSPContext *bdsp) { int i, j, v[2]; -switch (get_bits(gb, 2)) { +switch (bitstream_read(bc, 2)) { case 1: -v[0] = get_bits(gb, 8); +v[0] = bitstream_read(bc, 8); bdsp->fill_block_tab[1](dst, v[0], linesize, 8); break; case 2: -v[0] = get_bits(gb, 8); -v[1] = get_bits(gb, 8); +v[0] = bitstream_read(bc, 8); +v[1] = bitstream_read(bc, 8); for (j = 7; j >= 0; j--) for (i = 0; i < 8; i++) -dst[j * linesize + i] = v[get_bits1(gb)]; +dst[j * linesize + i] = v[bitstream_read_bit(bc)]; break; case 3: for (j = 0; j < 8; j += 4) for (i = 0; i < 8; i += 4) -decode4x4(gb, dst + j * linesize + i, linesize); +decode4x4(bc, dst + j * linesize + i, linesize); } } @@ -163,12 +163,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, } if (video_type == 0 || video_type == 1) { -GetBitContext gb; -init_get_bits(, buf, 8 * FFMIN(video_size, buf_end - buf)); +BitstreamContext bc; +bitstream_init(, buf, 8 * FFMIN(video_size, buf_end - buf)); for (j = 0; j < avctx->height; j += 8) for (i = 0; i < avctx->width; i += 8) -decode8x8(, +decode8x8(, s->frame->data[0] + j * s->frame->linesize[0] + i, s->frame->linesize[0], >bdsp); -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 27/35] gsm: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/gsmdec.c | 11 ++- libavcodec/gsmdec_template.c | 34 +- libavcodec/msgsmdec.c| 9 + 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/libavcodec/gsmdec.c b/libavcodec/gsmdec.c index a333e58..d727cf9 100644 --- a/libavcodec/gsmdec.c +++ b/libavcodec/gsmdec.c @@ -25,8 +25,9 @@ */ #include "libavutil/channel_layout.h" + #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" #include "msgsmdec.h" @@ -67,7 +68,7 @@ static int gsm_decode_frame(AVCodecContext *avctx, void *data, { AVFrame *frame = data; int res; -GetBitContext gb; +BitstreamContext bc; const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; int16_t *samples; @@ -87,10 +88,10 @@ static int gsm_decode_frame(AVCodecContext *avctx, void *data, switch (avctx->codec_id) { case AV_CODEC_ID_GSM: -init_get_bits(, buf, buf_size * 8); -if (get_bits(, 4) != 0xd) +bitstream_init(, buf, buf_size * 8); +if (bitstream_read(, 4) != 0xd) av_log(avctx, AV_LOG_WARNING, "Missing GSM magic!\n"); -res = gsm_decode_block(avctx, samples, , GSM_13000); +res = gsm_decode_block(avctx, samples, , GSM_13000); if (res < 0) return res; break; diff --git a/libavcodec/gsmdec_template.c b/libavcodec/gsmdec_template.c index 2794bd1..7437908 100644 --- a/libavcodec/gsmdec_template.c +++ b/libavcodec/gsmdec_template.c @@ -24,17 +24,17 @@ * GSM decoder */ -#include "get_bits.h" +#include "bitstream.h" #include "gsm.h" #include "gsmdec_data.h" -static void apcm_dequant_add(GetBitContext *gb, int16_t *dst, const int *frame_bits) +static void apcm_dequant_add(BitstreamContext *bc, int16_t *dst, const int *frame_bits) { int i, val; -int maxidx = get_bits(gb, 6); +int maxidx = bitstream_read(bc, 6); const int16_t *tab = ff_gsm_dequant_tab[maxidx]; for (i = 0; i < 13; i++) { -val = get_bits(gb, frame_bits[i]); +val = bitstream_read(bc, frame_bits[i]); dst[3 * i] += tab[ff_gsm_requant_tab[frame_bits[i]][val]]; } } @@ -120,28 +120,28 @@ static int postprocess(int16_t *data, int msr) } static int gsm_decode_block(AVCodecContext *avctx, int16_t *samples, -GetBitContext *gb, int mode) +BitstreamContext *bc, int mode) { GSMContext *ctx = avctx->priv_data; int i; int16_t *ref_dst = ctx->ref_buf + 120; int *lar = ctx->lar[ctx->lar_idx]; -lar[0] = decode_log_area(get_bits(gb, 6), 13107, 1 << 15); -lar[1] = decode_log_area(get_bits(gb, 6), 13107, 1 << 15); -lar[2] = decode_log_area(get_bits(gb, 5), 13107, (1 << 14) + 2048*2); -lar[3] = decode_log_area(get_bits(gb, 5), 13107, (1 << 14) - 2560*2); -lar[4] = decode_log_area(get_bits(gb, 4), 19223, (1 << 13) + 94*2); -lar[5] = decode_log_area(get_bits(gb, 4), 17476, (1 << 13) - 1792*2); -lar[6] = decode_log_area(get_bits(gb, 3), 31454, (1 << 12) - 341*2); -lar[7] = decode_log_area(get_bits(gb, 3), 29708, (1 << 12) - 1144*2); +lar[0] = decode_log_area(bitstream_read(bc, 6), 13107, 1 << 15); +lar[1] = decode_log_area(bitstream_read(bc, 6), 13107, 1 << 15); +lar[2] = decode_log_area(bitstream_read(bc, 5), 13107, (1 << 14) + 2048 * 2); +lar[3] = decode_log_area(bitstream_read(bc, 5), 13107, (1 << 14) - 2560 * 2); +lar[4] = decode_log_area(bitstream_read(bc, 4), 19223, (1 << 13) + 94 * 2); +lar[5] = decode_log_area(bitstream_read(bc, 4), 17476, (1 << 13) - 1792 * 2); +lar[6] = decode_log_area(bitstream_read(bc, 3), 31454, (1 << 12) - 341 * 2); +lar[7] = decode_log_area(bitstream_read(bc, 3), 29708, (1 << 12) - 1144 * 2); for (i = 0; i < 4; i++) { -int lag = get_bits(gb, 7); -int gain_idx = get_bits(gb, 2); -int offset = get_bits(gb, 2); +int lag = bitstream_read(bc, 7); +int gain_idx = bitstream_read(bc, 2); +int offset = bitstream_read(bc, 2); lag = av_clip(lag, 40, 120); long_term_synth(ref_dst, lag, gain_idx); -apcm_dequant_add(gb, ref_dst + offset, ff_gsm_apcm_bits[mode][i]); +apcm_dequant_add(bc, ref_dst + offset, ff_gsm_apcm_bits[mode][i]); ref_dst += 40; } memcpy(ctx->ref_buf, ctx->ref_buf + 160, 120 * sizeof(*ctx->ref_buf)); diff --git a/libavcodec/msgsmdec.c b/libavcodec/msgsmdec.c index 92b5ae6..c26efa9 100644 --- a/libavcodec/msgsmdec.c +++ b/libavcodec/msgsmdec.c @@ -21,6 +21,7 @@ #define BITSTREAM_READER_LE #include "avcodec.h" +#include "bitstream.h" #include "gsm.h" #include "msgsmdec.h" @@ -30,10 +31,10 @@ int ff_msgsm_decode_block(AVCodecContext *avctx, int16_t *samples, const uint8_t *buf, int mode) { int res; -
[libav-devel] [PATCH 33/35] pcx: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/pcx.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libavcodec/pcx.c b/libavcodec/pcx.c index a2d49b4..ece885e 100644 --- a/libavcodec/pcx.c +++ b/libavcodec/pcx.c @@ -23,9 +23,10 @@ */ #include "libavutil/imgutils.h" + #include "avcodec.h" +#include "bitstream.h" #include "bytestream.h" -#include "get_bits.h" #include "internal.h" #define PCX_HEADER_SIZE 128 @@ -179,15 +180,15 @@ static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, goto end; } } else if (nplanes == 1) { /* all packed formats, max. 16 colors */ -GetBitContext s; +BitstreamContext s; for (y = 0; y < h; y++) { -init_get_bits(, scanline, bytes_per_scanline << 3); +bitstream_init(, scanline, bytes_per_scanline << 3); pcx_rle_decode(, scanline, bytes_per_scanline, compressed); for (x = 0; x < w; x++) -ptr[x] = get_bits(, bits_per_pixel); +ptr[x] = bitstream_read(, bits_per_pixel); ptr += stride; } } else {/* planar, 4, 8 or 16 colors */ -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 26/35] g72x: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/g722dec.c | 13 - libavcodec/g723_1dec.c | 72 +- libavcodec/g726.c | 11 3 files changed, 49 insertions(+), 47 deletions(-) diff --git a/libavcodec/g722dec.c b/libavcodec/g722dec.c index c4c0ec8..bfd4b42 100644 --- a/libavcodec/g722dec.c +++ b/libavcodec/g722dec.c @@ -36,8 +36,9 @@ #include "libavutil/channel_layout.h" #include "libavutil/opt.h" + #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "g722.h" #include "internal.h" @@ -92,7 +93,7 @@ static int g722_decode_frame(AVCodecContext *avctx, void *data, int j, ret; const int skip = 8 - c->bits_per_codeword; const int16_t *quantizer_table = low_inv_quants[skip]; -GetBitContext gb; +BitstreamContext bc; /* get output buffer */ frame->nb_samples = avpkt->size * 2; @@ -102,15 +103,15 @@ static int g722_decode_frame(AVCodecContext *avctx, void *data, } out_buf = (int16_t *)frame->data[0]; -init_get_bits(, avpkt->data, avpkt->size * 8); +bitstream_init(, avpkt->data, avpkt->size * 8); for (j = 0; j < avpkt->size; j++) { int ilow, ihigh, rlow, rhigh, dhigh; int xout[2]; -ihigh = get_bits(, 2); -ilow = get_bits(, 6 - skip); -skip_bits(, skip); +ihigh = bitstream_read(, 2); +ilow = bitstream_read(, 6 - skip); +bitstream_skip(, skip); rlow = av_clip_intp2((c->band[0].scale_factor * quantizer_table[ilow] >> 10) + c->band[0].s_predictor, 14); diff --git a/libavcodec/g723_1dec.c b/libavcodec/g723_1dec.c index f50bed1..2ea3bbf 100644 --- a/libavcodec/g723_1dec.c +++ b/libavcodec/g723_1dec.c @@ -32,8 +32,8 @@ #define BITSTREAM_READER_LE #include "acelp_vectors.h" #include "avcodec.h" +#include "bitstream.h" #include "celp_filters.h" -#include "get_bits.h" #include "internal.h" #include "g723_1.h" @@ -68,14 +68,14 @@ static av_cold int g723_1_decode_init(AVCodecContext *avctx) static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf, int buf_size) { -GetBitContext gb; +BitstreamContext bc; int ad_cb_len; int temp, info_bits, i; -init_get_bits(, buf, buf_size * 8); +bitstream_init(, buf, buf_size * 8); /* Extract frame type and rate info */ -info_bits = get_bits(, 2); +info_bits = bitstream_read(, 2); if (info_bits == 3) { p->cur_frame_type = UNTRANSMITTED_FRAME; @@ -83,13 +83,13 @@ static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf, } /* Extract 24 bit lsp indices, 8 bit for each band */ -p->lsp_index[2] = get_bits(, 8); -p->lsp_index[1] = get_bits(, 8); -p->lsp_index[0] = get_bits(, 8); +p->lsp_index[2] = bitstream_read(, 8); +p->lsp_index[1] = bitstream_read(, 8); +p->lsp_index[0] = bitstream_read(, 8); if (info_bits == 2) { p->cur_frame_type = SID_FRAME; -p->subframe[0].amp_index = get_bits(, 6); +p->subframe[0].amp_index = bitstream_read(, 6); return 0; } @@ -97,23 +97,23 @@ static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf, p->cur_rate = info_bits ? RATE_5300 : RATE_6300; p->cur_frame_type = ACTIVE_FRAME; -p->pitch_lag[0] = get_bits(, 7); +p->pitch_lag[0] = bitstream_read(, 7); if (p->pitch_lag[0] > 123) /* test if forbidden code */ return -1; p->pitch_lag[0] += PITCH_MIN; -p->subframe[1].ad_cb_lag = get_bits(, 2); +p->subframe[1].ad_cb_lag = bitstream_read(, 2); -p->pitch_lag[1] = get_bits(, 7); +p->pitch_lag[1] = bitstream_read(, 7); if (p->pitch_lag[1] > 123) return -1; p->pitch_lag[1] += PITCH_MIN; -p->subframe[3].ad_cb_lag = get_bits(, 2); +p->subframe[3].ad_cb_lag = bitstream_read(, 2); p->subframe[0].ad_cb_lag = 1; p->subframe[2].ad_cb_lag = 1; for (i = 0; i < SUBFRAMES; i++) { /* Extract combined gain */ -temp = get_bits(, 12); +temp = bitstream_read(, 12); ad_cb_len = 170; p->subframe[i].dirac_train = 0; if (p->cur_rate == RATE_6300 && p->pitch_lag[i >> 1] < SUBFRAME_LEN - 2) { @@ -130,16 +130,16 @@ static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf, } } -p->subframe[0].grid_index = get_bits(, 1); -p->subframe[1].grid_index = get_bits(, 1); -p->subframe[2].grid_index = get_bits(, 1); -p->subframe[3].grid_index = get_bits(, 1); +p->subframe[0].grid_index = bitstream_read(, 1); +p->subframe[1].grid_index = bitstream_read(, 1); +p->subframe[2].grid_index = bitstream_read(, 1); +p->subframe[3].grid_index = bitstream_read(, 1); if (p->cur_rate == RATE_6300) { -skip_bits(, 1); /* skip reserved bit */ +bitstream_skip(, 1); /* skip reserved bit */
[libav-devel] [PATCH 29/35] hqx: Convert to the new bitstream header
Signed-off-by: Anton Khirnov--- libavcodec/hqx.c | 64 libavcodec/hqx.h | 5 +++-- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/libavcodec/hqx.c b/libavcodec/hqx.c index 7411d3f..3c359e3 100644 --- a/libavcodec/hqx.c +++ b/libavcodec/hqx.c @@ -24,8 +24,8 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" +#include "bitstream.h" #include "canopus.h" -#include "get_bits.h" #include "internal.h" #include "hqx.h" @@ -95,23 +95,23 @@ static inline void put_blocks(HQXContext *ctx, int plane, lsize * fields, block1, quant); } -static inline void hqx_get_ac(GetBitContext *gb, const HQXAC *ac, +static inline void hqx_get_ac(BitstreamContext *bc, const HQXAC *ac, int *run, int *lev) { int val; -val = show_bits(gb, ac->lut_bits); +val = bitstream_peek(bc, ac->lut_bits); if (ac->lut[val].bits == -1) { -GetBitContext gb2 = *gb; -skip_bits(, ac->lut_bits); -val = ac->lut[val].lev + show_bits(, ac->extra_bits); +BitstreamContext bc2 = *bc; +bitstream_skip(, ac->lut_bits); +val = ac->lut[val].lev + bitstream_peek(, ac->extra_bits); } *run = ac->lut[val].run; *lev = ac->lut[val].lev; -skip_bits(gb, ac->lut[val].bits); +bitstream_skip(bc, ac->lut[val].bits); } -static int decode_block(GetBitContext *gb, VLC *vlc, +static int decode_block(BitstreamContext *bc, VLC *vlc, const int *quants, int dcb, int16_t block[64], int *last_dc) { @@ -120,14 +120,14 @@ static int decode_block(GetBitContext *gb, VLC *vlc, int run, lev, pos = 1; memset(block, 0, 64 * sizeof(*block)); -dc = get_vlc2(gb, vlc->table, HQX_DC_VLC_BITS, 2); +dc = bitstream_read_vlc(bc, vlc->table, HQX_DC_VLC_BITS, 2); if (dc < 0) return AVERROR_INVALIDDATA; *last_dc += dc; block[0] = sign_extend(*last_dc << (12 - dcb), 12); -q = quants[get_bits(gb, 2)]; +q = quants[bitstream_read(bc, 2)]; if (q >= 128) ac_idx = HQX_AC_Q128; else if (q >= 64) @@ -142,7 +142,7 @@ static int decode_block(GetBitContext *gb, VLC *vlc, ac_idx = HQX_AC_Q0; do { -hqx_get_ac(gb, _hqx_ac[ac_idx], , ); +hqx_get_ac(bc, _hqx_ac[ac_idx], , ); pos += run; if (pos >= 64) break; @@ -155,24 +155,24 @@ static int decode_block(GetBitContext *gb, VLC *vlc, static int hqx_decode_422(HQXContext *ctx, int slice_no, int x, int y) { HQXSlice *slice = >slice[slice_no]; -GetBitContext *gb = >gb; +BitstreamContext *bc = >bc; const int *quants; int flag; int last_dc; int i, ret; if (ctx->interlaced) -flag = get_bits1(gb); +flag = bitstream_read_bit(bc); else flag = 0; -quants = hqx_quants[get_bits(gb, 4)]; +quants = hqx_quants[bitstream_read(bc, 4)]; for (i = 0; i < 8; i++) { int vlc_index = ctx->dcb - 9; if (i == 0 || i == 4 || i == 6) last_dc = 0; -ret = decode_block(gb, >dc_vlc[vlc_index], quants, +ret = decode_block(bc, >dc_vlc[vlc_index], quants, ctx->dcb, slice->block[i], _dc); if (ret < 0) return ret; @@ -189,14 +189,14 @@ static int hqx_decode_422(HQXContext *ctx, int slice_no, int x, int y) static int hqx_decode_422a(HQXContext *ctx, int slice_no, int x, int y) { HQXSlice *slice = >slice[slice_no]; -GetBitContext *gb = >gb; +BitstreamContext *bc = >bc; const int *quants; int flag = 0; int last_dc; int i, ret; int cbp; -cbp = get_vlc2(gb, ctx->cbp_vlc.table, ctx->cbp_vlc.bits, 1); +cbp = bitstream_read_vlc(bc, ctx->cbp_vlc.table, ctx->cbp_vlc.bits, 1); for (i = 0; i < 12; i++) memset(slice->block[i], 0, sizeof(**slice->block) * 64); @@ -204,9 +204,9 @@ static int hqx_decode_422a(HQXContext *ctx, int slice_no, int x, int y) slice->block[i][0] = -0x800; if (cbp) { if (ctx->interlaced) -flag = get_bits1(gb); +flag = bitstream_read_bit(bc); -quants = hqx_quants[get_bits(gb, 4)]; +quants = hqx_quants[bitstream_read(bc, 4)]; cbp |= cbp << 4; // alpha CBP if (cbp & 0x3) // chroma CBP - top @@ -218,7 +218,7 @@ static int hqx_decode_422a(HQXContext *ctx, int slice_no, int x, int y) last_dc = 0; if (cbp & (1 << i)) { int vlc_index = ctx->dcb - 9; -ret = decode_block(gb, >dc_vlc[vlc_index], quants, +ret = decode_block(bc, >dc_vlc[vlc_index], quants, ctx->dcb, slice->block[i], _dc); if (ret < 0) return ret; @@ -239,24 +239,24 @@ static int hqx_decode_422a(HQXContext *ctx, int
[libav-devel] [PATCH 31/35] nellymoser: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/nellymoserdec.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c index 355935f..390872c 100644 --- a/libavcodec/nellymoserdec.c +++ b/libavcodec/nellymoserdec.c @@ -38,8 +38,8 @@ #define BITSTREAM_READER_LE #include "avcodec.h" +#include "bitstream.h" #include "fft.h" -#include "get_bits.h" #include "internal.h" #include "nellymoser.h" #include "sinewin.h" @@ -48,7 +48,7 @@ typedef struct NellyMoserDecodeContext { AVCodecContext* avctx; AVLFG random_state; -GetBitContext gb; +BitstreamContext bc; float scale_bias; AVFloatDSPContext fdsp; FFTContext imdct_ctx; @@ -67,14 +67,14 @@ static void nelly_decode_block(NellyMoserDecodeContext *s, int bits[NELLY_BUF_LEN]; unsigned char v; -init_get_bits(>gb, block, NELLY_BLOCK_LEN * 8); +bitstream_init(>bc, block, NELLY_BLOCK_LEN * 8); bptr = buf; pptr = pows; -val = ff_nelly_init_table[get_bits(>gb, 6)]; +val = ff_nelly_init_table[bitstream_read(>bc, 6)]; for (i=0 ; i 0) -val += ff_nelly_delta_table[get_bits(>gb, 5)]; +val += ff_nelly_delta_table[bitstream_read(>bc, 5)]; pval = -pow(2, val/2048) * s->scale_bias; for (j = 0; j < ff_nelly_band_sizes_table[i]; j++) { *bptr++ = val; @@ -88,8 +88,8 @@ static void nelly_decode_block(NellyMoserDecodeContext *s, for (i = 0; i < 2; i++) { aptr = audio + i * NELLY_BUF_LEN; -init_get_bits(>gb, block, NELLY_BLOCK_LEN * 8); -skip_bits_long(>gb, NELLY_HEADER_BITS + i*NELLY_DETAIL_BITS); +bitstream_init(>bc, block, NELLY_BLOCK_LEN * 8); +bitstream_skip(>bc, NELLY_HEADER_BITS + i * NELLY_DETAIL_BITS); for (j = 0; j < NELLY_FILL_LEN; j++) { if (bits[j] <= 0) { @@ -97,7 +97,7 @@ static void nelly_decode_block(NellyMoserDecodeContext *s, if (av_lfg_get(>random_state) & 1) aptr[j] *= -1.0; } else { -v = get_bits(>gb, bits[j]); +v = bitstream_read(>bc, bits[j]); aptr[j] = ff_nelly_dequantization_table[(1<
[libav-devel] [PATCH 21/35] exr: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/exr.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index d10841d..28cee84 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -39,8 +39,8 @@ #include "libavutil/opt.h" #include "avcodec.h" +#include "bitstream.h" #include "bytestream.h" -#include "get_bits.h" #include "internal.h" #include "mathops.h" #include "thread.h" @@ -379,16 +379,16 @@ static void huf_canonical_code_table(uint64_t *hcode) static int huf_unpack_enc_table(GetByteContext *gb, int32_t im, int32_t iM, uint64_t *hcode) { -GetBitContext gbit; -int ret = init_get_bits8(, gb->buffer, bytestream2_get_bytes_left(gb)); +BitstreamContext bc; +int ret = bitstream_init8(, gb->buffer, bytestream2_get_bytes_left(gb)); if (ret < 0) return ret; for (; im <= iM; im++) { -uint64_t l = hcode[im] = get_bits(, 6); +uint64_t l = hcode[im] = bitstream_read(, 6); if (l == LONG_ZEROCODE_RUN) { -int zerun = get_bits(, 8) + SHORTEST_LONG_RUN; +int zerun = bitstream_read(, 8) + SHORTEST_LONG_RUN; if (im + zerun > iM + 1) return AVERROR_INVALIDDATA; @@ -410,7 +410,7 @@ static int huf_unpack_enc_table(GetByteContext *gb, } } -bytestream2_skip(gb, (get_bits_count() + 7) / 8); +bytestream2_skip(gb, (bitstream_tell() + 7) / 8); huf_canonical_code_table(hcode); return 0; -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 19/35] escape124: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/escape124.c | 85 -- 1 file changed, 41 insertions(+), 44 deletions(-) diff --git a/libavcodec/escape124.c b/libavcodec/escape124.c index 6d1b487..879f00a 100644 --- a/libavcodec/escape124.c +++ b/libavcodec/escape124.c @@ -21,7 +21,7 @@ #define BITSTREAM_READER_LE #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" typedef union MacroBlock { @@ -48,8 +48,9 @@ typedef struct Escape124Context { CodeBook codebooks[3]; } Escape124Context; -static int can_safely_read(GetBitContext* gb, int bits) { -return get_bits_left(gb) >= bits; +static int can_safely_read(BitstreamContext *bc, int bits) +{ +return bitstream_bits_left(bc) >= bits; } /** @@ -86,13 +87,13 @@ static av_cold int escape124_decode_close(AVCodecContext *avctx) return 0; } -static CodeBook unpack_codebook(GetBitContext* gb, unsigned depth, +static CodeBook unpack_codebook(BitstreamContext *bc, unsigned depth, unsigned size) { unsigned i, j; CodeBook cb = { 0 }; -if (!can_safely_read(gb, size * 34)) +if (!can_safely_read(bc, size * 34)) return cb; if (size >= INT_MAX / sizeof(MacroBlock)) @@ -104,9 +105,9 @@ static CodeBook unpack_codebook(GetBitContext* gb, unsigned depth, cb.depth = depth; cb.size = size; for (i = 0; i < size; i++) { -unsigned mask_bits = get_bits(gb, 4); -unsigned color0 = get_bits(gb, 15); -unsigned color1 = get_bits(gb, 15); +unsigned mask_bits = bitstream_read(bc, 4); +unsigned color0= bitstream_read(bc, 15); +unsigned color1= bitstream_read(bc, 15); for (j = 0; j < 4; j++) { if (mask_bits & (1 << j)) @@ -118,47 +119,43 @@ static CodeBook unpack_codebook(GetBitContext* gb, unsigned depth, return cb; } -static unsigned decode_skip_count(GetBitContext* gb) +static unsigned decode_skip_count(BitstreamContext *bc) { unsigned value; // This function reads a maximum of 23 bits, // which is within the padding space -if (!can_safely_read(gb, 1)) +if (!can_safely_read(bc, 1)) return -1; -value = get_bits1(gb); +value = bitstream_read_bit(bc); if (!value) return value; -value += get_bits(gb, 3); +value += bitstream_read(bc, 3); if (value != (1 + ((1 << 3) - 1))) return value; -value += get_bits(gb, 7); +value += bitstream_read(bc, 7); if (value != (1 + ((1 << 3) - 1)) + ((1 << 7) - 1)) return value; -return value + get_bits(gb, 12); +return value + bitstream_read(bc, 12); } -static MacroBlock decode_macroblock(Escape124Context* s, GetBitContext* gb, -int* codebook_index, int superblock_index) +static MacroBlock decode_macroblock(Escape124Context *s, BitstreamContext *bc, +int *codebook_index, int superblock_index) { // This function reads a maximum of 22 bits; the callers // guard this function appropriately unsigned block_index, depth; -int value = get_bits1(gb); +int value = bitstream_read_bit(bc); if (value) { static const char transitions[3][2] = { {2, 1}, {0, 2}, {1, 0} }; -value = get_bits1(gb); +value = bitstream_read_bit(bc); *codebook_index = transitions[*codebook_index][value]; } depth = s->codebooks[*codebook_index].depth; - -// depth = 0 means that this shouldn't read any bits; -// in theory, this is the same as get_bits(gb, 0), but -// that doesn't actually work. -block_index = get_bitsz(gb, depth); +block_index = bitstream_read(bc, depth); if (*codebook_index == 1) { block_index += superblock_index << s->codebooks[1].depth; @@ -208,7 +205,7 @@ static int escape124_decode_frame(AVCodecContext *avctx, Escape124Context *s = avctx->priv_data; AVFrame *frame = data; -GetBitContext gb; +BitstreamContext bc; unsigned frame_flags, frame_size; unsigned i; @@ -220,15 +217,15 @@ static int escape124_decode_frame(AVCodecContext *avctx, unsigned old_stride, new_stride; int ret; -init_get_bits(, buf, buf_size * 8); +bitstream_init(, buf, buf_size * 8); // This call also guards the potential depth reads for the // codebook unpacking. -if (!can_safely_read(, 64)) +if (!can_safely_read(, 64)) return -1; -frame_flags = get_bits_long(, 32); -frame_size = get_bits_long(, 32); +frame_flags = bitstream_read(, 32); +frame_size = bitstream_read(, 32); // Leave last frame unchanged // FIXME: Is this necessary? I haven't seen it in any real samples @@ -251,10 +248,10 @@ static int escape124_decode_frame(AVCodecContext *avctx, if (i == 2) { // This
[libav-devel] [PATCH 25/35] g2meet: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/g2meet.c | 30 +++--- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c index 7e90916..4a7f5a3 100644 --- a/libavcodec/g2meet.c +++ b/libavcodec/g2meet.c @@ -31,10 +31,10 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" +#include "bitstream.h" #include "blockdsp.h" #include "bytestream.h" #include "elsdec.h" -#include "get_bits.h" #include "idctdsp.h" #include "internal.h" #include "jpegtables.h" @@ -236,7 +236,7 @@ static void jpg_unescape(const uint8_t *src, int src_size, *dst_size = dst - dst_start; } -static int jpg_decode_block(JPGContext *c, GetBitContext *gb, +static int jpg_decode_block(JPGContext *c, BitstreamContext *bc, int plane, int16_t *block) { int dc, val, pos; @@ -244,18 +244,18 @@ static int jpg_decode_block(JPGContext *c, GetBitContext *gb, const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant; c->bdsp.clear_block(block); -dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3); +dc = bitstream_read_vlc(bc, c->dc_vlc[is_chroma].table, 9, 3); if (dc < 0) return AVERROR_INVALIDDATA; if (dc) -dc = get_xbits(gb, dc); +dc = bitstream_read_xbits(bc, dc); dc= dc * qmat[0] + c->prev_dc[plane]; block[0] = dc; c->prev_dc[plane] = dc; pos = 0; while (pos < 63) { -val = get_vlc2(gb, c->ac_vlc[is_chroma].table, 9, 3); +val = bitstream_read_vlc(bc, c->ac_vlc[is_chroma].table, 9, 3); if (val < 0) return AVERROR_INVALIDDATA; pos += val >> 4; @@ -265,7 +265,7 @@ static int jpg_decode_block(JPGContext *c, GetBitContext *gb, if (val) { int nbits = val; -val = get_xbits(gb, nbits); +val = bitstream_read_xbits(bc, nbits); val*= qmat[ff_zigzag_direct[pos]]; block[c->scantable.permutated[pos]] = val; } @@ -286,7 +286,7 @@ static int jpg_decode_data(JPGContext *c, int width, int height, const uint8_t *mask, int mask_stride, int num_mbs, int swapuv) { -GetBitContext gb; +BitstreamContext bc; int mb_w, mb_h, mb_x, mb_y, i, j; int bx, by; int unesc_size; @@ -298,7 +298,7 @@ static int jpg_decode_data(JPGContext *c, int width, int height, return ret; jpg_unescape(src, src_size, c->buf, _size); memset(c->buf + unesc_size, 0, AV_INPUT_BUFFER_PADDING_SIZE); -init_get_bits(, c->buf, unesc_size * 8); +bitstream_init(, c->buf, unesc_size * 8); width = FFALIGN(width, 16); mb_w = width>> 4; @@ -325,14 +325,14 @@ static int jpg_decode_data(JPGContext *c, int width, int height, if (mask && !mask[mb_x * 2 + i + j * mask_stride]) continue; num_mbs--; -if ((ret = jpg_decode_block(c, , 0, +if ((ret = jpg_decode_block(c, , 0, c->block[i + j * 2])) != 0) return ret; c->idsp.idct(c->block[i + j * 2]); } } for (i = 1; i < 3; i++) { -if ((ret = jpg_decode_block(c, , i, c->block[i + 3])) != 0) +if ((ret = jpg_decode_block(c, , i, c->block[i + 3])) != 0) return ret; c->idsp.idct(c->block[i + 3]); } @@ -1011,11 +1011,11 @@ static void kempf_restore_buf(const uint8_t *src, int len, int width, int height, const uint8_t *pal, int npal, int tidx) { -GetBitContext gb; +BitstreamContext bc; int i, j, nb, col; int align_width = FFALIGN(width, 16); -init_get_bits(, src, len * 8); +bitstream_init(, src, len * 8); if (npal <= 2) nb = 1; else if (npal <= 4) nb = 2; @@ -1023,16 +1023,16 @@ static void kempf_restore_buf(const uint8_t *src, int len, else nb = 8; for (j = 0; j < height; j++, dst += stride, jpeg_tile += tile_stride) { -if (get_bits(, 8)) +if (bitstream_read(, 8)) continue; for (i = 0; i < width; i++) { -col = get_bits(, nb); +col = bitstream_read(, nb); if (col != tidx) memcpy(dst + i * 3, pal + col * 3, 3); else memcpy(dst + i * 3, jpeg_tile + i * 3, 3); } -skip_bits_long(, nb * (align_width - width)); +bitstream_skip(, nb * (align_width - width)); } } -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org
[libav-devel] [PATCH 24/35] fraps: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/fraps.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c index 55051ff..2237991 100644 --- a/libavcodec/fraps.c +++ b/libavcodec/fraps.c @@ -32,7 +32,7 @@ */ #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "huffman.h" #include "bytestream.h" #include "bswapdsp.h" @@ -94,7 +94,7 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w, const int step) { int i, j, ret; -GetBitContext gb; +BitstreamContext bc; VLC vlc; Node nodes[512]; @@ -111,10 +111,10 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w, s->bdsp.bswap_buf((uint32_t *) s->tmpbuf, (const uint32_t *) src, size >> 2); -init_get_bits(, s->tmpbuf, size * 8); +bitstream_init(, s->tmpbuf, size * 8); for (j = 0; j < h; j++) { for (i = 0; i < w*step; i += step) { -dst[i] = get_vlc2(, vlc.table, VLC_BITS, 3); +dst[i] = bitstream_read_vlc(, vlc.table, VLC_BITS, 3); /* lines are stored as deltas between previous lines * and we need to add 0x80 to the first lines of chroma planes */ @@ -122,7 +122,7 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w, dst[i] += dst[i - stride]; else if (Uoff) dst[i] += 0x80; -if (get_bits_left() < 0) { +if (bitstream_bits_left() < 0) { ff_free_vlc(); return AVERROR_INVALIDDATA; } -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 23/35] flashsv: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/flashsv.c | 57 ++-- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c index 2cf8f3f..20fa7bc 100644 --- a/libavcodec/flashsv.c +++ b/libavcodec/flashsv.c @@ -38,9 +38,10 @@ #include #include "libavutil/intreadwrite.h" + #include "avcodec.h" +#include "bitstream.h" #include "bytestream.h" -#include "get_bits.h" #include "internal.h" typedef struct BlockInfo { @@ -175,7 +176,7 @@ static int flashsv2_prime(FlashSVContext *s, uint8_t *src, int size) } static int flashsv_decode_block(AVCodecContext *avctx, AVPacket *avpkt, -GetBitContext *gb, int block_size, +BitstreamContext *bc, int block_size, int width, int height, int x_pos, int y_pos, int blk_idx) { @@ -194,7 +195,7 @@ static int flashsv_decode_block(AVCodecContext *avctx, AVPacket *avpkt, if (ret < 0) return ret; } -s->zstream.next_in = avpkt->data + get_bits_count(gb) / 8; +s->zstream.next_in = avpkt->data + bitstream_tell(bc) / 8; s->zstream.avail_in = block_size; s->zstream.next_out = s->tmpblock; s->zstream.avail_out = s->block_size * 3; @@ -210,7 +211,7 @@ static int flashsv_decode_block(AVCodecContext *avctx, AVPacket *avpkt, } if (s->is_keyframe) { -s->blocks[blk_idx].pos = s->keyframedata + (get_bits_count(gb) / 8); +s->blocks[blk_idx].pos = s->keyframedata + (bitstream_tell(bc) / 8); s->blocks[blk_idx].size = block_size; } @@ -233,7 +234,7 @@ static int flashsv_decode_block(AVCodecContext *avctx, AVPacket *avpkt, x_pos, s->diff_height, width, s->frame->linesize[0], s->pal); } -skip_bits_long(gb, 8 * block_size); /* skip the consumed bits */ +bitstream_skip(bc, 8 * block_size); /* skip the consumed bits */ return 0; } @@ -259,7 +260,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data, int buf_size = avpkt->size; FlashSVContext *s = avctx->priv_data; int h_blocks, v_blocks, h_part, v_part, i, j, ret; -GetBitContext gb; +BitstreamContext bc; /* no supplementary picture */ if (buf_size == 0) @@ -267,21 +268,21 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data, if (buf_size < 4) return -1; -init_get_bits(, avpkt->data, buf_size * 8); +bitstream_init(, avpkt->data, buf_size * 8); /* start to parse the bitstream */ -s->block_width = 16 * (get_bits(, 4) + 1); -s->image_width = get_bits(, 12); -s->block_height = 16 * (get_bits(, 4) + 1); -s->image_height = get_bits(, 12); +s->block_width = 16 * (bitstream_read(, 4) + 1); +s->image_width = bitstream_read(, 12); +s->block_height = 16 * (bitstream_read(, 4) + 1); +s->image_height = bitstream_read(, 12); if (s->ver == 2) { -skip_bits(, 6); -if (get_bits1()) { +bitstream_skip(, 6); +if (bitstream_read_bit()) { avpriv_request_sample(avctx, "iframe"); return AVERROR_PATCHWELCOME; } -if (get_bits1()) { +if (bitstream_read_bit()) { avpriv_request_sample(avctx, "Custom palette"); return AVERROR_PATCHWELCOME; } @@ -371,7 +372,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data, int has_diff = 0; /* get the size of the compressed zlib chunk */ -int size = get_bits(, 16); +int size = bitstream_read(, 16); s->color_depth= 0; s->zlibprime_curr = 0; @@ -379,17 +380,17 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data, s->diff_start = 0; s->diff_height= cur_blk_height; -if (8 * size > get_bits_left()) { +if (8 * size > bitstream_bits_left()) { av_frame_unref(s->frame); return AVERROR_INVALIDDATA; } if (s->ver == 2 && size) { -skip_bits(, 3); -s->color_depth= get_bits(, 2); -has_diff = get_bits1(); -s->zlibprime_curr = get_bits1(); -s->zlibprime_prev = get_bits1(); +bitstream_skip(, 3); +s->color_depth= bitstream_read(, 2); +has_diff = bitstream_read_bit(); +s->zlibprime_curr = bitstream_read_bit(); +s->zlibprime_prev = bitstream_read_bit(); if (s->color_depth != 0 && s->color_depth != 2) { av_log(avctx, AV_LOG_ERROR, @@ -404,8 +405,8 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
[libav-devel] [PATCH 22/35] faxcompr: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/faxcompr.c | 34 +- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libavcodec/faxcompr.c b/libavcodec/faxcompr.c index 4cbda3f..8a9010d 100644 --- a/libavcodec/faxcompr.c +++ b/libavcodec/faxcompr.c @@ -25,7 +25,7 @@ * @author Konstantin Shishkov */ #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "put_bits.h" #include "faxcompr.h" @@ -123,7 +123,7 @@ av_cold void ff_ccitt_unpack_init(void) } -static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb, +static int decode_group3_1d_line(AVCodecContext *avctx, BitstreamContext *bc, unsigned int pix_left, int *runs, const int *runend) { @@ -131,7 +131,7 @@ static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb, unsigned int run = 0; unsigned int t; for (;;) { -t= get_vlc2(gb, ccitt_vlc[mode].table, 9, 2); +t= bitstream_read_vlc(bc, ccitt_vlc[mode].table, 9, 2); run += t; if (t < 64) { *runs++ = run; @@ -157,7 +157,7 @@ static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb, return 0; } -static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb, +static int decode_group3_2d_line(AVCodecContext *avctx, BitstreamContext *bc, unsigned int width, int *runs, const int *runend, const int *ref) { @@ -168,7 +168,7 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb, runend--; // for the last written 0 while (offs < width) { -int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1); +int cmode = bitstream_read_vlc(bc, ccitt_group3_2d_vlc.table, 9, 1); if (cmode == -1) { av_log(avctx, AV_LOG_ERROR, "Incorrect mode VLC\n"); return AVERROR_INVALIDDATA; @@ -188,7 +188,7 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb, for (k = 0; k < 2; k++) { run = 0; for (;;) { -t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2); +t = bitstream_read_vlc(bc, ccitt_vlc[mode].table, 9, 2); if (t == -1) { av_log(avctx, AV_LOG_ERROR, "Incorrect code\n"); return AVERROR_INVALIDDATA; @@ -258,12 +258,12 @@ static void put_line(uint8_t *dst, int size, int width, const int *runs) flush_put_bits(); } -static int find_group3_syncmarker(GetBitContext *gb, int srcsize) +static int find_group3_syncmarker(BitstreamContext *bc, int srcsize) { unsigned int state = -1; -srcsize -= get_bits_count(gb); +srcsize -= bitstream_tell(bc); while (srcsize-- > 0) { -state += state + get_bits1(gb); +state += state + bitstream_read_bit(bc); if ((state & 0xFFF) == 1) return 0; } @@ -275,7 +275,7 @@ int ff_ccitt_unpack(AVCodecContext *avctx, const uint8_t *src, int srcsize, enum TiffCompr compr, int opts) { int j; -GetBitContext gb; +BitstreamContext bc; int *runs, *ref = NULL, *runend; int ret; int runsize = avctx->width + 2; @@ -289,27 +289,27 @@ int ff_ccitt_unpack(AVCodecContext *avctx, const uint8_t *src, int srcsize, ref[0] = avctx->width; ref[1] = 0; ref[2] = 0; -init_get_bits(, src, srcsize * 8); +bitstream_init(, src, srcsize * 8); for (j = 0; j < height; j++) { runend = runs + runsize; if (compr == TIFF_G4) { -ret = decode_group3_2d_line(avctx, , avctx->width, runs, runend, +ret = decode_group3_2d_line(avctx, , avctx->width, runs, runend, ref); if (ret < 0) goto fail; } else { int g3d1 = (compr == TIFF_G3) && !(opts & 1); if (compr != TIFF_CCITT_RLE && -find_group3_syncmarker(, srcsize * 8) < 0) +find_group3_syncmarker(, srcsize * 8) < 0) break; -if (compr == TIFF_CCITT_RLE || g3d1 || get_bits1()) -ret = decode_group3_1d_line(avctx, , avctx->width, runs, +if (compr == TIFF_CCITT_RLE || g3d1 || bitstream_read_bit()) +ret = decode_group3_1d_line(avctx, , avctx->width, runs, runend); else -ret = decode_group3_2d_line(avctx, , avctx->width, runs, +ret = decode_group3_2d_line(avctx, , avctx->width, runs, runend, ref); if (compr == TIFF_CCITT_RLE) -align_get_bits(); +bitstream_align(); } if (avctx->err_recognition &
[libav-devel] [PATCH 20/35] escape130: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/escape130.c | 46 +++--- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/libavcodec/escape130.c b/libavcodec/escape130.c index bfc1f3f..544f36d 100644 --- a/libavcodec/escape130.c +++ b/libavcodec/escape130.c @@ -24,7 +24,7 @@ #define BITSTREAM_READER_LE #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" typedef struct Escape130Context { @@ -163,23 +163,23 @@ static av_cold int escape130_decode_close(AVCodecContext *avctx) return 0; } -static int decode_skip_count(GetBitContext* gb) +static int decode_skip_count(BitstreamContext *bc) { int value; -value = get_bits1(gb); +value = bitstream_read_bit(bc); if (value) return 0; -value = get_bits(gb, 3); +value = bitstream_read(bc, 3); if (value) return value; -value = get_bits(gb, 8); +value = bitstream_read(bc, 8); if (value) return value + 7; -value = get_bits(gb, 15); +value = bitstream_read(bc, 15); if (value) return value + 262; @@ -193,7 +193,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, void *data, int buf_size= avpkt->size; Escape130Context *s = avctx->priv_data; AVFrame *pic= data; -GetBitContext gb; +BitstreamContext bc; int ret; uint8_t *old_y, *old_cb, *old_cr, @@ -216,7 +216,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, void *data, if ((ret = ff_get_buffer(avctx, pic, 0)) < 0) return ret; -init_get_bits(, buf + 16, (buf_size - 16) * 8); +bitstream_init(, buf + 16, (buf_size - 16) * 8); new_y = s->new_y; new_cb = s->new_u; @@ -235,7 +235,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, void *data, // Note that this call will make us skip the rest of the blocks // if the frame ends prematurely. if (skip == -1) -skip = decode_skip_count(); +skip = decode_skip_count(); if (skip == -1) { av_log(avctx, AV_LOG_ERROR, "Error decoding skip value\n"); return AVERROR_INVALIDDATA; @@ -250,31 +250,31 @@ static int escape130_decode_frame(AVCodecContext *avctx, void *data, cb = old_cb[0]; cr = old_cr[0]; } else { -if (get_bits1()) { -unsigned sign_selector = get_bits(, 6); -unsigned difference_selector = get_bits(, 2); -y_avg = 2 * get_bits(, 5); +if (bitstream_read_bit()) { +unsigned sign_selector = bitstream_read(, 6); +unsigned difference_selector = bitstream_read(, 2); +y_avg = 2 * bitstream_read(, 5); for (i = 0; i < 4; i++) { y[i] = av_clip(y_avg + offset_table[difference_selector] * sign_table[sign_selector][i], 0, 63); } -} else if (get_bits1()) { -if (get_bits1()) { -y_avg = get_bits(, 6); +} else if (bitstream_read_bit()) { +if (bitstream_read_bit()) { +y_avg = bitstream_read(, 6); } else { -unsigned adjust_index = get_bits(, 3); +unsigned adjust_index = bitstream_read(, 3); y_avg = (y_avg + luma_adjust[adjust_index]) & 63; } for (i = 0; i < 4; i++) y[i] = y_avg; } -if (get_bits1()) { -if (get_bits1()) { -cb = get_bits(, 5); -cr = get_bits(, 5); +if (bitstream_read_bit()) { +if (bitstream_read_bit()) { +cb = bitstream_read(, 5); +cr = bitstream_read(, 5); } else { -unsigned adjust_index = get_bits(, 3); +unsigned adjust_index = bitstream_read(, 3); cb = (cb + chroma_adjust[0][adjust_index]) & 31; cr = (cr + chroma_adjust[1][adjust_index]) & 31; } @@ -333,7 +333,7 @@ static int escape130_decode_frame(AVCodecContext *avctx, void *data, } ff_dlog(avctx, "Frame data: provided %d bytes, used %d bytes\n", -buf_size, get_bits_count() >> 3); +buf_size, bitstream_tell() >> 3); FFSWAP(uint8_t*, s->old_y, s->new_y); FFSWAP(uint8_t*, s->old_u, s->new_u); -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 09/35] atrac: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/atrac1.c | 34 ++- libavcodec/atrac3.c | 97 +++-- 2 files changed, 67 insertions(+), 64 deletions(-) diff --git a/libavcodec/atrac1.c b/libavcodec/atrac1.c index e938976..60be853 100644 --- a/libavcodec/atrac1.c +++ b/libavcodec/atrac1.c @@ -33,8 +33,9 @@ #include #include "libavutil/float_dsp.h" + #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "fft.h" #include "internal.h" #include "sinewin.h" @@ -164,30 +165,31 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q) * Parse the block size mode byte */ -static int at1_parse_bsm(GetBitContext* gb, int log2_block_cnt[AT1_QMF_BANDS]) +static int at1_parse_bsm(BitstreamContext *bc, + int log2_block_cnt[AT1_QMF_BANDS]) { int log2_block_count_tmp, i; for (i = 0; i < 2; i++) { /* low and mid band */ -log2_block_count_tmp = get_bits(gb, 2); +log2_block_count_tmp = bitstream_read(bc, 2); if (log2_block_count_tmp & 1) return AVERROR_INVALIDDATA; log2_block_cnt[i] = 2 - log2_block_count_tmp; } /* high band */ -log2_block_count_tmp = get_bits(gb, 2); +log2_block_count_tmp = bitstream_read(bc, 2); if (log2_block_count_tmp != 0 && log2_block_count_tmp != 3) return AVERROR_INVALIDDATA; log2_block_cnt[IDX_HIGH_BAND] = 3 - log2_block_count_tmp; -skip_bits(gb, 2); +bitstream_skip(bc, 2); return 0; } -static int at1_unpack_dequant(GetBitContext* gb, AT1SUCtx* su, +static int at1_unpack_dequant(BitstreamContext *bc, AT1SUCtx *su, float spec[AT1_SU_SAMPLES]) { int bits_used, band_num, bfu_num, i; @@ -195,22 +197,22 @@ static int at1_unpack_dequant(GetBitContext* gb, AT1SUCtx* su, uint8_t idsfs[AT1_MAX_BFU]; ///< the scalefactor indexes for each BFU /* parse the info byte (2nd byte) telling how much BFUs were coded */ -su->num_bfus = bfu_amount_tab1[get_bits(gb, 3)]; +su->num_bfus = bfu_amount_tab1[bitstream_read(bc, 3)]; /* calc number of consumed bits: num_BFUs * (idwl(4bits) + idsf(6bits)) + log2_block_count(8bits) + info_byte(8bits) + info_byte_copy(8bits) + log2_block_count_copy(8bits) */ bits_used = su->num_bfus * 10 + 32 + -bfu_amount_tab2[get_bits(gb, 2)] + -(bfu_amount_tab3[get_bits(gb, 3)] << 1); +bfu_amount_tab2[bitstream_read(bc, 2)] + +(bfu_amount_tab3[bitstream_read(bc, 3)] << 1); /* get word length index (idwl) for each BFU */ for (i = 0; i < su->num_bfus; i++) -idwls[i] = get_bits(gb, 4); +idwls[i] = bitstream_read(bc, 4); /* get scalefactor index (idsf) for each BFU */ for (i = 0; i < su->num_bfus; i++) -idsfs[i] = get_bits(gb, 6); +idsfs[i] = bitstream_read(bc, 6); /* zero idwl/idsf for empty BFUs */ for (i = su->num_bfus; i < AT1_MAX_BFU; i++) @@ -240,7 +242,7 @@ static int at1_unpack_dequant(GetBitContext* gb, AT1SUCtx* su, /* read in a quantized spec and convert it to * signed int and then inverse quantization */ -spec[pos+i] = get_sbits(gb, word_len) * scale_factor * max_quant; +spec[pos+i] = bitstream_read_signed(bc, word_len) * scale_factor * max_quant; } } else { /* word_len = 0 -> empty BFU, zero all specs in the emty BFU */ memset([pos], 0, num_specs * sizeof(float)); @@ -277,7 +279,7 @@ static int atrac1_decode_frame(AVCodecContext *avctx, void *data, int buf_size = avpkt->size; AT1Ctx *q = avctx->priv_data; int ch, ret; -GetBitContext gb; +BitstreamContext bc; if (buf_size < 212 * avctx->channels) { @@ -295,14 +297,14 @@ static int atrac1_decode_frame(AVCodecContext *avctx, void *data, for (ch = 0; ch < avctx->channels; ch++) { AT1SUCtx* su = >SUs[ch]; -init_get_bits(, [212 * ch], 212 * 8); +bitstream_init(, [212 * ch], 212 * 8); /* parse block_size_mode, 1st byte */ -ret = at1_parse_bsm(, su->log2_block_count); +ret = at1_parse_bsm(, su->log2_block_count); if (ret < 0) return ret; -ret = at1_unpack_dequant(, su, q->spec); +ret = at1_unpack_dequant(, su, q->spec); if (ret < 0) return ret; diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c index 2e1fd3c..be32a0e 100644 --- a/libavcodec/atrac3.c +++ b/libavcodec/atrac3.c @@ -38,10 +38,11 @@ #include "libavutil/attributes.h" #include "libavutil/float_dsp.h" + #include "avcodec.h" +#include "bitstream.h" #include "bytestream.h" #include "fft.h" -#include "get_bits.h" #include "internal.h" #include "atrac.h" @@
[libav-devel] [PATCH 14/35] cdxl: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/cdxl.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/libavcodec/cdxl.c b/libavcodec/cdxl.c index 99e96eb..4c0410d 100644 --- a/libavcodec/cdxl.c +++ b/libavcodec/cdxl.c @@ -21,8 +21,9 @@ #include "libavutil/intreadwrite.h" #include "libavutil/imgutils.h" + #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" #define BIT_PLANAR 0x00 @@ -69,30 +70,30 @@ static void import_palette(CDXLVideoContext *c, uint32_t *new_palette) static void bitplanar2chunky(CDXLVideoContext *c, int linesize, uint8_t *out) { -GetBitContext gb; +BitstreamContext bc; int x, y, plane; -init_get_bits(, c->video, c->video_size * 8); +bitstream_init(, c->video, c->video_size * 8); for (plane = 0; plane < c->bpp; plane++) { for (y = 0; y < c->avctx->height; y++) { for (x = 0; x < c->avctx->width; x++) -out[linesize * y + x] |= get_bits1() << plane; -skip_bits(, c->padded_bits); +out[linesize * y + x] |= bitstream_read_bit() << plane; +bitstream_skip(, c->padded_bits); } } } static void bitline2chunky(CDXLVideoContext *c, int linesize, uint8_t *out) { -GetBitContext gb; +BitstreamContext bc; int x, y, plane; -init_get_bits(, c->video, c->video_size * 8); +bitstream_init(, c->video, c->video_size * 8); for (y = 0; y < c->avctx->height; y++) { for (plane = 0; plane < c->bpp; plane++) { for (x = 0; x < c->avctx->width; x++) -out[linesize * y + x] |= get_bits1() << plane; -skip_bits(, c->padded_bits); +out[linesize * y + x] |= bitstream_read_bit() << plane; +bitstream_skip(, c->padded_bits); } } } -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 06/35] 4xm: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/4xm.c | 27 ++- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c index b2d4db2..ee9d020 100644 --- a/libavcodec/4xm.c +++ b/libavcodec/4xm.c @@ -29,11 +29,12 @@ #include "libavutil/frame.h" #include "libavutil/imgutils.h" #include "libavutil/intreadwrite.h" + #include "avcodec.h" +#include "bitstream.h" #include "blockdsp.h" #include "bswapdsp.h" #include "bytestream.h" -#include "get_bits.h" #include "internal.h" #define BLOCK_TYPE_VLC_BITS 5 @@ -136,8 +137,8 @@ typedef struct FourXContext { BswapDSPContext bbdsp; uint16_t *frame_buffer; uint16_t *last_frame_buffer; -GetBitContext pre_gb; ///< ac/dc prefix -GetBitContext gb; +BitstreamContext pre_bc;// ac/dc prefix +BitstreamContext bc; GetByteContext g; GetByteContext g2; int mv[256]; @@ -352,8 +353,8 @@ static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, return AVERROR_INVALIDDATA; h = 1 << log2h; -code = get_vlc2(>gb, block_type_vlc[1 - (f->version > 1)][index].table, - BLOCK_TYPE_VLC_BITS, 1); +code = bitstream_read_vlc(>bc, block_type_vlc[1 - (f->version > 1)][index].table, + BLOCK_TYPE_VLC_BITS, 1); if (code < 0 || code > 6) return AVERROR_INVALIDDATA; @@ -453,7 +454,7 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length) bitstream_size / 4); memset((uint8_t*)f->bitstream_buffer + bitstream_size, 0, AV_INPUT_BUFFER_PADDING_SIZE); -init_get_bits(>gb, f->bitstream_buffer, 8 * bitstream_size); +bitstream_init(>bc, f->bitstream_buffer, 8 * bitstream_size); wordstream_offset = extra + bitstream_size; bytestream_offset = extra + bitstream_size + wordstream_size; @@ -484,19 +485,19 @@ static int decode_i_block(FourXContext *f, int16_t *block) int code, i, j, level, val; /* DC coef */ -val = get_vlc2(>pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); +val = bitstream_read_vlc(>pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3); if (val >> 4) av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n"); if (val) -val = get_xbits(>gb, val); +val = bitstream_read_xbits(>bc, val); val= val * dequant_table[0] + f->last_dc; f->last_dc = block[0] = val; /* AC coefs */ i = 1; for (;;) { -code = get_vlc2(>pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); +code = bitstream_read_vlc(>pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3); /* EOB */ if (code == 0) @@ -504,7 +505,7 @@ static int decode_i_block(FourXContext *f, int16_t *block) if (code == 0xf0) { i += 16; } else { -level = get_xbits(>gb, code & 0xf); +level = bitstream_read_xbits(>bc, code & 0xf); i+= code >> 4; if (i >= 64) { av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i); @@ -764,7 +765,7 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length) return AVERROR_INVALIDDATA; } -init_get_bits(>gb, buf + 4, 8 * bitstream_size); +bitstream_init(>bc, buf + 4, 8 * bitstream_size); prestream_size = length + buf - prestream; @@ -776,7 +777,7 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length) prestream_size / 4); memset((uint8_t*)f->bitstream_buffer + prestream_size, 0, AV_INPUT_BUFFER_PADDING_SIZE); -init_get_bits(>pre_gb, f->bitstream_buffer, 8 * prestream_size); +bitstream_init(>pre_bc, f->bitstream_buffer, 8 * prestream_size); f->last_dc = 0 * 128 * 8 * 8; @@ -789,7 +790,7 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length) } } -if (get_vlc2(>pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256) +if (bitstream_read_vlc(>pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256) av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n"); return 0; -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 03/35] eamad: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/eamad.c | 42 +++--- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c index 070cfdb..7509c2d 100644 --- a/libavcodec/eamad.c +++ b/libavcodec/eamad.c @@ -29,16 +29,17 @@ */ #include "avcodec.h" +#include "bitstream.h" #include "blockdsp.h" #include "bytestream.h" #include "bswapdsp.h" -#include "get_bits.h" #include "aandcttab.h" #include "eaidct.h" #include "idctdsp.h" #include "internal.h" #include "mpeg12data.h" #include "mpeg12vlc.h" +#include "vlc.h" #define EA_PREAMBLE_SIZE8 #define MADk_TAG MKTAG('M', 'A', 'D', 'k')/* MAD I-frame */ @@ -51,7 +52,7 @@ typedef struct MadContext { BswapDSPContext bbdsp; IDCTDSPContext idsp; AVFrame *last_frame; -GetBitContext gb; +BitstreamContext bc; void *bitstream_buf; unsigned int bitstream_buf_size; DECLARE_ALIGNED(16, int16_t, block)[64]; @@ -129,17 +130,15 @@ static inline void decode_block_intra(MadContext *s, int16_t * block) const uint8_t *scantable = s->scantable.permutated; int16_t *quant_matrix = s->quant_matrix; -block[0] = (128 + get_sbits(>gb, 8)) * quant_matrix[0]; +block[0] = (128 + bitstream_read_signed(>bc, 8)) * quant_matrix[0]; /* The RL decoder is derived from mpeg1_decode_block_intra; Escaped level and run values a decoded differently */ i = 0; { -OPEN_READER(re, >gb); /* now quantify & encode AC coefficients */ for (;;) { -UPDATE_CACHE(re, >gb); -GET_RL_VLC(level, run, re, >gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); +BITSTREAM_RL_VLC(level, run, >bc, rl->rl_vlc[0], TEX_VLC_BITS, 2); if (level == 127) { break; @@ -153,15 +152,12 @@ static inline void decode_block_intra(MadContext *s, int16_t * block) j = scantable[i]; level = (level*quant_matrix[j]) >> 4; level = (level-1)|1; -level = (level ^ SHOW_SBITS(re, >gb, 1)) - SHOW_SBITS(re, >gb, 1); -LAST_SKIP_BITS(re, >gb, 1); +level = bitstream_apply_sign(>bc, level); } else { /* escape */ -UPDATE_CACHE(re, >gb); -level = SHOW_SBITS(re, >gb, 10); SKIP_BITS(re, >gb, 10); +level = bitstream_read_signed(>bc, 10); -UPDATE_CACHE(re, >gb); -run = SHOW_UBITS(re, >gb, 6)+1; LAST_SKIP_BITS(re, >gb, 6); +run = bitstream_read(>bc, 6) + 1; i += run; if (i > 63) { @@ -183,17 +179,17 @@ static inline void decode_block_intra(MadContext *s, int16_t * block) block[j] = level; } -CLOSE_READER(re, >gb); } } -static int decode_motion(GetBitContext *gb) +static int decode_motion(BitstreamContext *bc) { int value = 0; -if (get_bits1(gb)) { -if (get_bits1(gb)) + +if (bitstream_read_bit(bc)) { +if (bitstream_read_bit(bc)) value = -17; -value += get_bits(gb, 4) + 1; +value += bitstream_read(bc, 4) + 1; } return value; } @@ -205,11 +201,11 @@ static void decode_mb(MadContext *s, AVFrame *frame, int inter) int j; if (inter) { -int v = decode210(>gb); +int v = bitstream_decode210(>bc); if (v < 2) { -mv_map = v ? get_bits(>gb, 6) : 63; -mv_x = decode_motion(>gb); -mv_y = decode_motion(>gb); +mv_map = v ? bitstream_read(>bc, 6) : 63; +mv_x = decode_motion(>bc); +mv_y = decode_motion(>bc); } else { mv_map = 0; } @@ -217,7 +213,7 @@ static void decode_mb(MadContext *s, AVFrame *frame, int inter) for (j=0; j<6; j++) { if (mv_map & (1< gb); +int add = 2 * decode_motion(>bc); comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add); } else { s->bdsp.clear_block(s->block); @@ -299,7 +295,7 @@ static int decode_frame(AVCodecContext *avctx, return AVERROR(ENOMEM); s->bbdsp.bswap16_buf(s->bitstream_buf, (const uint16_t *)(buf + bytestream2_tell()), bytestream2_get_bytes_left() / 2); -init_get_bits(>gb, s->bitstream_buf, 8*(bytestream2_get_bytes_left())); +bitstream_init8(>bc, s->bitstream_buf, bytestream2_get_bytes_left()); for (s->mb_y=0; s->mb_y < (avctx->height+15)/16; s->mb_y++) for (s->mb_x=0; s->mb_x < (avctx->width +15)/16; s->mb_x++) -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 18/35] dvdsubdec: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/dvdsubdec.c | 34 +- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c index 86c2873..b02bb6b 100644 --- a/libavcodec/dvdsubdec.c +++ b/libavcodec/dvdsubdec.c @@ -20,7 +20,7 @@ */ #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" #include "libavutil/attributes.h" @@ -50,13 +50,13 @@ static void yuv_a_to_rgba(const uint8_t *ycbcr, const uint8_t *alpha, uint32_t * } } -static int decode_run_2bit(GetBitContext *gb, int *color) +static int decode_run_2bit(BitstreamContext *bc, int *color) { unsigned int v, t; v = 0; for (t = 1; v < t && t <= 0x40; t <<= 2) -v = (v << 4) | get_bits(gb, 4); +v = (v << 4) | bitstream_read(bc, 4); *color = v & 3; if (v < 4) { /* Code for fill rest of line */ return INT_MAX; @@ -64,23 +64,23 @@ static int decode_run_2bit(GetBitContext *gb, int *color) return v >> 2; } -static int decode_run_8bit(GetBitContext *gb, int *color) +static int decode_run_8bit(BitstreamContext *bc, int *color) { int len; -int has_run = get_bits1(gb); -if (get_bits1(gb)) -*color = get_bits(gb, 8); +int has_run = bitstream_read_bit(bc); +if (bitstream_read_bit(bc)) +*color = bitstream_read(bc, 8); else -*color = get_bits(gb, 2); +*color = bitstream_read(bc, 2); if (has_run) { -if (get_bits1(gb)) { -len = get_bits(gb, 7); +if (bitstream_read_bit(bc)) { +len = bitstream_read(bc, 7); if (len == 0) len = INT_MAX; else len += 9; } else -len = get_bits(gb, 3) + 2; +len = bitstream_read(bc, 3) + 2; } else len = 1; return len; @@ -89,24 +89,24 @@ static int decode_run_8bit(GetBitContext *gb, int *color) static int decode_rle(uint8_t *bitmap, int linesize, int w, int h, const uint8_t *buf, int start, int buf_size, int is_8bit) { -GetBitContext gb; +BitstreamContext bc; int bit_len; int x, y, len, color; uint8_t *d; bit_len = (buf_size - start) * 8; -init_get_bits(, buf + start, bit_len); +bitstream_init(, buf + start, bit_len); x = 0; y = 0; d = bitmap; for(;;) { -if (get_bits_count() > bit_len) +if (bitstream_tell() > bit_len) return -1; if (is_8bit) -len = decode_run_8bit(, ); +len = decode_run_8bit(, ); else -len = decode_run_2bit(, ); +len = decode_run_2bit(, ); len = FFMIN(len, w - x); memset(d + x, color, len); x += len; @@ -117,7 +117,7 @@ static int decode_rle(uint8_t *bitmap, int linesize, int w, int h, d += linesize; x = 0; /* byte align */ -align_get_bits(); +bitstream_align(); } } return 0; -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 05/35] on2avc: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/on2avc.c | 64 +++-- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/libavcodec/on2avc.c b/libavcodec/on2avc.c index 2a528c6..1b81980 100644 --- a/libavcodec/on2avc.c +++ b/libavcodec/on2avc.c @@ -22,11 +22,13 @@ #include "libavutil/channel_layout.h" #include "libavutil/float_dsp.h" + #include "avcodec.h" +#include "bitstream.h" #include "bytestream.h" #include "fft.h" -#include "get_bits.h" #include "internal.h" +#include "vlc.h" #include "on2avcdata.h" @@ -84,11 +86,11 @@ typedef struct On2AVCContext { DECLARE_ALIGNED(32, float, short_win)[ON2AVC_SUBFRAME_SIZE / 8]; } On2AVCContext; -static void on2avc_read_ms_info(On2AVCContext *c, GetBitContext *gb) +static void on2avc_read_ms_info(On2AVCContext *c, BitstreamContext *bc) { int w, b, band_off = 0; -c->ms_present = get_bits1(gb); +c->ms_present = bitstream_read_bit(bc); if (!c->ms_present) return; for (w = 0; w < c->num_windows; w++) { @@ -100,12 +102,12 @@ static void on2avc_read_ms_info(On2AVCContext *c, GetBitContext *gb) continue; } for (b = 0; b < c->num_bands; b++) -c->ms_info[band_off++] = get_bits1(gb); +c->ms_info[band_off++] = bitstream_read_bit(bc); } } // do not see Table 17 in ISO/IEC 13818-7 -static int on2avc_decode_band_types(On2AVCContext *c, GetBitContext *gb) +static int on2avc_decode_band_types(On2AVCContext *c, BitstreamContext *bc) { int bits_per_sect = c->is_long ? 5 : 3; int esc_val = (1 << bits_per_sect) - 1; @@ -113,10 +115,10 @@ static int on2avc_decode_band_types(On2AVCContext *c, GetBitContext *gb) int band = 0, i, band_type, run_len, run; while (band < num_bands) { -band_type = get_bits(gb, 4); +band_type = bitstream_read(bc, 4); run_len = 1; do { -run = get_bits(gb, bits_per_sect); +run = bitstream_read(bc, bits_per_sect); run_len += run; } while (run == esc_val); if (band + run_len > num_bands) { @@ -135,7 +137,7 @@ static int on2avc_decode_band_types(On2AVCContext *c, GetBitContext *gb) // completely not like Table 18 in ISO/IEC 13818-7 // (no intensity stereo, different coding for the first coefficient) -static int on2avc_decode_band_scales(On2AVCContext *c, GetBitContext *gb) +static int on2avc_decode_band_scales(On2AVCContext *c, BitstreamContext *bc) { int w, w2, b, scale, first = 1; int band_off = 0; @@ -165,10 +167,10 @@ static int on2avc_decode_band_scales(On2AVCContext *c, GetBitContext *gb) } } if (first) { -scale = get_bits(gb, 7); +scale = bitstream_read(bc, 7); first = 0; } else { -scale += get_vlc2(gb, c->scale_diff.table, 9, 3) - 60; +scale += bitstream_read_vlc(bc, c->scale_diff.table, 9, 3) - 60; } if (scale < 0 || scale > 127) { av_log(c->avctx, AV_LOG_ERROR, "Invalid scale value %d\n", @@ -188,13 +190,13 @@ static inline float on2avc_scale(int v, float scale) } // spectral data is coded completely differently - there are no unsigned codebooks -static int on2avc_decode_quads(On2AVCContext *c, GetBitContext *gb, float *dst, +static int on2avc_decode_quads(On2AVCContext *c, BitstreamContext *bc, float *dst, int dst_size, int type, float band_scale) { int i, j, val, val1; for (i = 0; i < dst_size; i += 4) { -val = get_vlc2(gb, c->cb_vlc[type].table, 9, 3); +val = bitstream_read_vlc(bc, c->cb_vlc[type].table, 9, 3); for (j = 0; j < 4; j++) { val1 = sign_extend((val >> (12 - j * 4)) & 0xF, 4); @@ -205,11 +207,11 @@ static int on2avc_decode_quads(On2AVCContext *c, GetBitContext *gb, float *dst, return 0; } -static inline int get_egolomb(GetBitContext *gb) +static inline int get_egolomb(BitstreamContext *bc) { int v = 4; -while (get_bits1(gb)) { +while (bitstream_read_bit(bc)) { v++; if (v > 30) { av_log(NULL, AV_LOG_WARNING, "Too large golomb code in get_egolomb.\n"); @@ -218,27 +220,27 @@ static inline int get_egolomb(GetBitContext *gb) } } -return (1 << v) + get_bits_long(gb, v); +return (1 << v) + bitstream_read(bc, v); } -static int on2avc_decode_pairs(On2AVCContext *c, GetBitContext *gb, float *dst, +static int on2avc_decode_pairs(On2AVCContext *c, BitstreamContext *bc, float *dst, int dst_size, int type, float band_scale) { int i, val, val1, val2, sign; for (i = 0; i < dst_size; i += 2) { -val = get_vlc2(gb, c->cb_vlc[type].table, 9, 3); +val = bitstream_read_vlc(bc, c->cb_vlc[type].table, 9, 3); val1 =
[libav-devel] [PATCH 12/35] bink: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/bink.c | 264 ++ 1 file changed, 128 insertions(+), 136 deletions(-) diff --git a/libavcodec/bink.c b/libavcodec/bink.c index 7433697..74db80a 100644 --- a/libavcodec/bink.c +++ b/libavcodec/bink.c @@ -28,8 +28,8 @@ #include "avcodec.h" #include "binkdata.h" #include "binkdsp.h" +#include "bitstream.h" #include "blockdsp.h" -#include "get_bits.h" #include "hpeldsp.h" #include "internal.h" #include "mathops.h" @@ -93,8 +93,9 @@ typedef struct Tree { uint8_t syms[16]; ///< leaf value to symbol mapping } Tree; -#define GET_HUFF(gb, tree) (tree).syms[get_vlc2(gb, bink_trees[(tree).vlc_num].table,\ - bink_trees[(tree).vlc_num].bits, 1)] +#define GET_HUFF(bc, tree)\ +(tree).syms[bitstream_read_vlc(bc, bink_trees[(tree).vlc_num].table, \ + bink_trees[(tree).vlc_num].bits, 1)] /** * data structure used for decoding single Bink data type @@ -204,18 +205,18 @@ static av_cold void free_bundles(BinkContext *c) /** * Merge two consequent lists of equal size depending on bits read. * - * @param gb context for reading bits + * @param bc context for reading bits * @param dst buffer where merged list will be written to * @param src pointer to the head of the first list (the second lists starts at src+size) * @param size input lists size */ -static void merge(GetBitContext *gb, uint8_t *dst, uint8_t *src, int size) +static void merge(BitstreamContext *bc, uint8_t *dst, uint8_t *src, int size) { uint8_t *src2 = src + size; int size2 = size; do { -if (!get_bits1(gb)) { +if (!bitstream_read_bit(bc)) { *dst++ = *src++; size--; } else { @@ -233,37 +234,37 @@ static void merge(GetBitContext *gb, uint8_t *dst, uint8_t *src, int size) /** * Read information about Huffman tree used to decode data. * - * @param gb context for reading bits + * @param bc context for reading bits * @param tree pointer for storing tree data */ -static void read_tree(GetBitContext *gb, Tree *tree) +static void read_tree(BitstreamContext *bc, Tree *tree) { uint8_t tmp1[16] = { 0 }, tmp2[16], *in = tmp1, *out = tmp2; int i, t, len; -tree->vlc_num = get_bits(gb, 4); +tree->vlc_num = bitstream_read(bc, 4); if (!tree->vlc_num) { for (i = 0; i < 16; i++) tree->syms[i] = i; return; } -if (get_bits1(gb)) { -len = get_bits(gb, 3); +if (bitstream_read_bit(bc)) { +len = bitstream_read(bc, 3); for (i = 0; i <= len; i++) { -tree->syms[i] = get_bits(gb, 4); +tree->syms[i] = bitstream_read(bc, 4); tmp1[tree->syms[i]] = 1; } for (i = 0; i < 16 && len < 16 - 1; i++) if (!tmp1[i]) tree->syms[++len] = i; } else { -len = get_bits(gb, 2); +len = bitstream_read(bc, 2); for (i = 0; i < 16; i++) in[i] = i; for (i = 0; i <= len; i++) { int size = 1 << i; for (t = 0; t < 16; t += size << 1) -merge(gb, out + t, in + t, size); +merge(bc, out + t, in + t, size); FFSWAP(uint8_t*, in, out); } memcpy(tree->syms, in, 16); @@ -273,21 +274,21 @@ static void read_tree(GetBitContext *gb, Tree *tree) /** * Prepare bundle for decoding data. * - * @param gb context for reading bits + * @param bc context for reading bits * @param c decoder context * @param bundle_num number of the bundle to initialize */ -static void read_bundle(GetBitContext *gb, BinkContext *c, int bundle_num) +static void read_bundle(BitstreamContext *bc, BinkContext *c, int bundle_num) { int i; if (bundle_num == BINK_SRC_COLORS) { for (i = 0; i < 16; i++) -read_tree(gb, >col_high[i]); +read_tree(bc, >col_high[i]); c->col_lastval = 0; } if (bundle_num != BINK_SRC_INTRA_DC && bundle_num != BINK_SRC_INTER_DC) -read_tree(gb, >bundle[bundle_num].tree); +read_tree(bc, >bundle[bundle_num].tree); c->bundle[bundle_num].cur_dec = c->bundle[bundle_num].cur_ptr = c->bundle[bundle_num].data; } @@ -295,66 +296,64 @@ static void read_bundle(GetBitContext *gb, BinkContext *c, int bundle_num) /** * common check before starting decoding bundle data * - * @param gb context for reading bits + * @param bc context for reading bits * @param b bundle * @param t variable where number of elements to decode will be stored */ -#define CHECK_READ_VAL(gb, b, t) \ +#define CHECK_READ_VAL(bc, b, t) \ if (!b->cur_dec || (b->cur_dec > b->cur_ptr)) \ return 0; \ -t = get_bits(gb, b->len); \ +t =
[libav-devel] [PATCH 17/35] dss_sp: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/dss_sp.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/libavcodec/dss_sp.c b/libavcodec/dss_sp.c index 20b0528..44d98d8 100644 --- a/libavcodec/dss_sp.c +++ b/libavcodec/dss_sp.c @@ -25,7 +25,7 @@ #include "libavutil/opt.h" #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" #define SUBFRAMES 4 @@ -302,7 +302,7 @@ static av_cold int dss_sp_decode_init(AVCodecContext *avctx) static void dss_sp_unpack_coeffs(DssSpContext *p, const uint8_t *src) { -GetBitContext gb; +BitstreamContext bc; DssSpFrame *fparam = >fparam; int i; int subframe_idx; @@ -315,24 +315,24 @@ static void dss_sp_unpack_coeffs(DssSpContext *p, const uint8_t *src) p->bits[i + 1] = src[i]; } -init_get_bits(, p->bits, DSS_SP_FRAME_SIZE * 8); +bitstream_init(, p->bits, DSS_SP_FRAME_SIZE * 8); for (i = 0; i < 2; i++) -fparam->filter_idx[i] = get_bits(, 5); +fparam->filter_idx[i] = bitstream_read(, 5); for (; i < 8; i++) -fparam->filter_idx[i] = get_bits(, 4); +fparam->filter_idx[i] = bitstream_read(, 4); for (; i < 14; i++) -fparam->filter_idx[i] = get_bits(, 3); +fparam->filter_idx[i] = bitstream_read(, 3); for (subframe_idx = 0; subframe_idx < 4; subframe_idx++) { -fparam->sf_adaptive_gain[subframe_idx] = get_bits(, 5); +fparam->sf_adaptive_gain[subframe_idx] = bitstream_read(, 5); -fparam->sf[subframe_idx].combined_pulse_pos = get_bits_long(, 31); +fparam->sf[subframe_idx].combined_pulse_pos = bitstream_read(, 31); -fparam->sf[subframe_idx].gain = get_bits(, 6); +fparam->sf[subframe_idx].gain = bitstream_read(, 6); for (i = 0; i < 7; i++) -fparam->sf[subframe_idx].pulse_val[i] = get_bits(, 3); +fparam->sf[subframe_idx].pulse_val[i] = bitstream_read(, 3); } for (subframe_idx = 0; subframe_idx < 4; subframe_idx++) { @@ -394,7 +394,7 @@ static void dss_sp_unpack_coeffs(DssSpContext *p, const uint8_t *src) } } -combined_pitch = get_bits(, 24); +combined_pitch = bitstream_read(, 24); fparam->pitch_lag[0] = (combined_pitch % 151) + 36; -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 15/35] cljrdec: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/cljrdec.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libavcodec/cljrdec.c b/libavcodec/cljrdec.c index 33d8023..833707b 100644 --- a/libavcodec/cljrdec.c +++ b/libavcodec/cljrdec.c @@ -25,7 +25,7 @@ */ #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" static int decode_frame(AVCodecContext *avctx, @@ -34,7 +34,7 @@ static int decode_frame(AVCodecContext *avctx, { const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; -GetBitContext gb; +BitstreamContext bc; AVFrame * const p = data; int x, y, ret; @@ -56,20 +56,20 @@ static int decode_frame(AVCodecContext *avctx, p->pict_type = AV_PICTURE_TYPE_I; p->key_frame = 1; -init_get_bits(, buf, buf_size * 8); +bitstream_init(, buf, buf_size * 8); for (y = 0; y < avctx->height; y++) { uint8_t *luma = >data[0][y * p->linesize[0]]; uint8_t *cb = >data[1][y * p->linesize[1]]; uint8_t *cr = >data[2][y * p->linesize[2]]; for (x = 0; x < avctx->width; x += 4) { -luma[3] = get_bits(, 5) << 3; -luma[2] = get_bits(, 5) << 3; -luma[1] = get_bits(, 5) << 3; -luma[0] = get_bits(, 5) << 3; +luma[3] = bitstream_read(, 5) << 3; +luma[2] = bitstream_read(, 5) << 3; +luma[1] = bitstream_read(, 5) << 3; +luma[0] = bitstream_read(, 5) << 3; luma += 4; -*(cb++) = get_bits(, 6) << 2; -*(cr++) = get_bits(, 6) << 2; +*(cb++) = bitstream_read(, 6) << 2; +*(cr++) = bitstream_read(, 6) << 2; } } -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 10/35] atrac3plus: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/atrac3plus.c| 525 +++-- libavcodec/atrac3plus.h| 7 +- libavcodec/atrac3plusdec.c | 15 +- 3 files changed, 275 insertions(+), 272 deletions(-) diff --git a/libavcodec/atrac3plus.c b/libavcodec/atrac3plus.c index 076fb84..2731a80 100644 --- a/libavcodec/atrac3plus.c +++ b/libavcodec/atrac3plus.c @@ -26,8 +26,9 @@ */ #include "libavutil/avassert.h" + #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "atrac3plus.h" #include "atrac3plus_data.h" @@ -212,20 +213,20 @@ av_cold void ff_atrac3p_init_vlcs(AVCodec *codec) /** * Decode number of coded quantization units. * - * @param[in] gbthe GetBit context + * @param[in] bcthe Bitstream context * @param[in,out] chan ptr to the channel parameters * @param[in,out] ctx ptr to the channel unit context * @param[in] avctx ptr to the AVCodecContext * @return result code: 0 = OK, otherwise - error code */ -static int num_coded_units(GetBitContext *gb, Atrac3pChanParams *chan, +static int num_coded_units(BitstreamContext *bc, Atrac3pChanParams *chan, Atrac3pChanUnitCtx *ctx, AVCodecContext *avctx) { -chan->fill_mode = get_bits(gb, 2); +chan->fill_mode = bitstream_read(bc, 2); if (!chan->fill_mode) { chan->num_coded_vals = ctx->num_quant_units; } else { -chan->num_coded_vals = get_bits(gb, 5); +chan->num_coded_vals = bitstream_read(bc, 5); if (chan->num_coded_vals > ctx->num_quant_units) { av_log(avctx, AV_LOG_ERROR, "Invalid number of transmitted units!\n"); @@ -233,7 +234,7 @@ static int num_coded_units(GetBitContext *gb, Atrac3pChanParams *chan, } if (chan->fill_mode == 3) -chan->split_point = get_bits(gb, 2) + (chan->ch_num << 1) + 1; +chan->split_point = bitstream_read(bc, 2) + (chan->ch_num << 1) + 1; } return 0; @@ -318,21 +319,21 @@ static inline void unpack_vq_shape(int start_val, const int8_t *shape_vec, } } -#define UNPACK_SF_VQ_SHAPE(gb, dst, num_vals)\ -start_val = get_bits((gb), 6); \ -unpack_vq_shape(start_val, _sf_shapes[get_bits((gb), 6)][0], \ +#define UNPACK_SF_VQ_SHAPE(bc, dst, num_vals) \ +start_val = bitstream_read((bc), 6); \ +unpack_vq_shape(start_val, _sf_shapes[bitstream_read((bc), 6)][0], \ (dst), (num_vals)) /** * Decode word length for each quantization unit of a channel. * - * @param[in] gbthe GetBit context + * @param[in] bcthe Bitstream context * @param[in,out] ctx ptr to the channel unit context * @param[in] ch_numchannel to process * @param[in] avctx ptr to the AVCodecContext * @return result code: 0 = OK, otherwise - error code */ -static int decode_channel_wordlen(GetBitContext *gb, Atrac3pChanUnitCtx *ctx, +static int decode_channel_wordlen(BitstreamContext *bc, Atrac3pChanUnitCtx *ctx, int ch_num, AVCodecContext *avctx) { int i, weight_idx = 0, delta, diff, pos, delta_bits, min_val, flag, @@ -343,107 +344,107 @@ static int decode_channel_wordlen(GetBitContext *gb, Atrac3pChanUnitCtx *ctx, chan->fill_mode = 0; -switch (get_bits(gb, 2)) { /* switch according to coding mode */ +switch (bitstream_read(bc, 2)) { /* switch according to coding mode */ case 0: /* coded using constant number of bits */ for (i = 0; i < ctx->num_quant_units; i++) -chan->qu_wordlen[i] = get_bits(gb, 3); +chan->qu_wordlen[i] = bitstream_read(bc, 3); break; case 1: if (ch_num) { -if ((ret = num_coded_units(gb, chan, ctx, avctx)) < 0) +if ((ret = num_coded_units(bc, chan, ctx, avctx)) < 0) return ret; if (chan->num_coded_vals) { -vlc_tab = _vlc_tabs[get_bits(gb, 2)]; +vlc_tab = _vlc_tabs[bitstream_read(bc, 2)]; for (i = 0; i < chan->num_coded_vals; i++) { -delta = get_vlc2(gb, vlc_tab->table, vlc_tab->bits, 1); +delta = bitstream_read_vlc(bc, vlc_tab->table, vlc_tab->bits, 1); chan->qu_wordlen[i] = (ref_chan->qu_wordlen[i] + delta) & 7; } } } else { -weight_idx = get_bits(gb, 2); -if ((ret = num_coded_units(gb, chan, ctx, avctx)) < 0) +weight_idx = bitstream_read(bc, 2); +if ((ret = num_coded_units(bc, chan, ctx, avctx)) < 0) return ret; if (chan->num_coded_vals) { -pos = get_bits(gb, 5);
[libav-devel] [PATCH 02/35] cllc: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/cllc.c | 88 +++ 1 file changed, 36 insertions(+), 52 deletions(-) diff --git a/libavcodec/cllc.c b/libavcodec/cllc.c index cdbed74..bac2b73 100644 --- a/libavcodec/cllc.c +++ b/libavcodec/cllc.c @@ -23,11 +23,13 @@ #include #include "libavutil/intreadwrite.h" + +#include "bitstream.h" #include "bswapdsp.h" #include "canopus.h" -#include "get_bits.h" #include "avcodec.h" #include "internal.h" +#include "vlc.h" typedef struct CLLCContext { AVCodecContext *avctx; @@ -37,7 +39,7 @@ typedef struct CLLCContext { int swapped_buf_size; } CLLCContext; -static int read_code_table(CLLCContext *ctx, GetBitContext *gb, VLC *vlc) +static int read_code_table(CLLCContext *ctx, BitstreamContext *bc, VLC *vlc) { uint8_t symbols[256]; uint8_t bits[256]; @@ -49,10 +51,10 @@ static int read_code_table(CLLCContext *ctx, GetBitContext *gb, VLC *vlc) count = 0; num_codes_sum = 0; -num_lens = get_bits(gb, 5); +num_lens = bitstream_read(bc, 5); for (i = 0; i < num_lens; i++) { -num_codes = get_bits(gb, 9); +num_codes = bitstream_read(bc, 9); num_codes_sum += num_codes; if (num_codes_sum > 256) { @@ -64,7 +66,7 @@ static int read_code_table(CLLCContext *ctx, GetBitContext *gb, VLC *vlc) } for (j = 0; j < num_codes; j++) { -symbols[count] = get_bits(gb, 8); +symbols[count] = bitstream_read(bc, 8); bits[count]= i + 1; codes[count] = prefix++; @@ -82,7 +84,7 @@ static int read_code_table(CLLCContext *ctx, GetBitContext *gb, VLC *vlc) * Unlike the RGB24 read/restore, which reads in a component at a time, * ARGB read/restore reads in ARGB quads. */ -static int read_argb_line(CLLCContext *ctx, GetBitContext *gb, int *top_left, +static int read_argb_line(CLLCContext *ctx, BitstreamContext *bc, int *top_left, VLC *vlc, uint8_t *outbuf) { uint8_t *dst; @@ -90,8 +92,6 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext *gb, int *top_left, int code; int i; -OPEN_READER(bits, gb); - dst = outbuf; pred[0] = top_left[0]; pred[1] = top_left[1]; @@ -100,8 +100,7 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext *gb, int *top_left, for (i = 0; i < ctx->avctx->width; i++) { /* Always get the alpha component */ -UPDATE_CACHE(bits, gb); -GET_VLC(code, bits, gb, vlc[0].table, 7, 2); +code = bitstream_read_vlc(bc, vlc[0].table, 7, 2); pred[0] += code; dst[0] = pred[0]; @@ -109,22 +108,19 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext *gb, int *top_left, /* Skip the components if they are entirely transparent */ if (dst[0]) { /* Red */ -UPDATE_CACHE(bits, gb); -GET_VLC(code, bits, gb, vlc[1].table, 7, 2); +code = bitstream_read_vlc(bc, vlc[1].table, 7, 2); pred[1] += code; dst[1] = pred[1]; /* Green */ -UPDATE_CACHE(bits, gb); -GET_VLC(code, bits, gb, vlc[2].table, 7, 2); +code = bitstream_read_vlc(bc, vlc[2].table, 7, 2); pred[2] += code; dst[2] = pred[2]; /* Blue */ -UPDATE_CACHE(bits, gb); -GET_VLC(code, bits, gb, vlc[3].table, 7, 2); +code = bitstream_read_vlc(bc, vlc[3].table, 7, 2); pred[3] += code; dst[3] = pred[3]; @@ -137,8 +133,6 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext *gb, int *top_left, dst += 4; } -CLOSE_READER(bits, gb); - top_left[0] = outbuf[0]; /* Only stash components if they are not transparent */ @@ -151,65 +145,55 @@ static int read_argb_line(CLLCContext *ctx, GetBitContext *gb, int *top_left, return 0; } -static int read_rgb24_component_line(CLLCContext *ctx, GetBitContext *gb, +static int read_rgb24_component_line(CLLCContext *ctx, BitstreamContext *bc, int *top_left, VLC *vlc, uint8_t *outbuf) { uint8_t *dst; int pred, code; int i; -OPEN_READER(bits, gb); - dst = outbuf; pred = *top_left; /* Simultaneously read and restore the line */ for (i = 0; i < ctx->avctx->width; i++) { -UPDATE_CACHE(bits, gb); -GET_VLC(code, bits, gb, vlc->table, 7, 2); +code = bitstream_read_vlc(bc, vlc->table, 7, 2); pred += code; dst[0] = pred; dst += 3; } -CLOSE_READER(bits, gb); - /* Stash the first pixel */ *top_left = outbuf[0]; return 0; } -static int read_yuv_component_line(CLLCContext *ctx, GetBitContext *gb, +static int read_yuv_component_line(CLLCContext *ctx,
[libav-devel] [PATCH 07/35] adpcm: Convert to the new bitstream header
Signed-off-by: Anton Khirnov--- libavcodec/adpcm.c | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c index 3ab16dd..fe51c0d 100644 --- a/libavcodec/adpcm.c +++ b/libavcodec/adpcm.c @@ -29,8 +29,9 @@ * License along with Libav; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ + #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "put_bits.h" #include "bytestream.h" #include "adpcm.h" @@ -366,32 +367,33 @@ static int xa_decode(AVCodecContext *avctx, int16_t *out0, int16_t *out1, static void adpcm_swf_decode(AVCodecContext *avctx, const uint8_t *buf, int buf_size, int16_t *samples) { ADPCMDecodeContext *c = avctx->priv_data; -GetBitContext gb; +BitstreamContext bc; const int *table; int k0, signmask, nb_bits, count; int size = buf_size*8; int i; -init_get_bits(, buf, size); +bitstream_init(, buf, size); //read bits & initial values -nb_bits = get_bits(, 2)+2; +nb_bits = bitstream_read(, 2)+2; table = swf_index_tables[nb_bits-2]; k0 = 1 << (nb_bits-2); signmask = 1 << (nb_bits-1); -while (get_bits_count() <= size - 22*avctx->channels) { +while (bitstream_tell() <= size - 22 * avctx->channels) { for (i = 0; i < avctx->channels; i++) { -*samples++ = c->status[i].predictor = get_sbits(, 16); -c->status[i].step_index = get_bits(, 6); +*samples++ = +c->status[i].predictor = bitstream_read_signed(, 16); +c->status[i].step_index = bitstream_read(, 6); } -for (count = 0; get_bits_count() <= size - nb_bits*avctx->channels && count < 4095; count++) { +for (count = 0; bitstream_tell() <= size - nb_bits * avctx->channels && count < 4095; count++) { int i; for (i = 0; i < avctx->channels; i++) { // similar to IMA adpcm -int delta = get_bits(, nb_bits); +int delta = bitstream_read(, nb_bits); int step = ff_adpcm_step_table[c->status[i].step_index]; long vpdiff = 0; // vpdiff = (delta+0.5)*step/4 int k = k0; -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 08/35] asvdec: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/asv.h| 4 ++-- libavcodec/asvdec.c | 54 ++--- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/libavcodec/asv.h b/libavcodec/asv.h index 18f7a95..7c4e4fd 100644 --- a/libavcodec/asv.h +++ b/libavcodec/asv.h @@ -31,11 +31,11 @@ #include "libavutil/mem.h" #include "avcodec.h" +#include "bitstream.h" #include "blockdsp.h" #include "bswapdsp.h" #include "fdctdsp.h" #include "idctdsp.h" -#include "get_bits.h" #include "pixblockdsp.h" #include "put_bits.h" @@ -47,7 +47,7 @@ typedef struct ASV1Context { IDCTDSPContext idsp; PixblockDSPContext pdsp; PutBitContext pb; -GetBitContext gb; +BitstreamContext bc; ScanTable scantable; int inv_qscale; int mb_width; diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c index f17f064..cbda63d 100644 --- a/libavcodec/asvdec.c +++ b/libavcodec/asvdec.c @@ -70,27 +70,27 @@ static av_cold void init_vlcs(ASV1Context *a) } // FIXME write a reversed bitstream reader to avoid the double reverse -static inline int asv2_get_bits(GetBitContext *gb, int n) +static inline int asv2_get_bits(BitstreamContext *bc, int n) { -return ff_reverse[get_bits(gb, n) << (8 - n)]; +return ff_reverse[bitstream_read(bc, n) << (8 - n)]; } -static inline int asv1_get_level(GetBitContext *gb) +static inline int asv1_get_level(BitstreamContext *bc) { -int code = get_vlc2(gb, level_vlc.table, VLC_BITS, 1); +int code = bitstream_read_vlc(bc, level_vlc.table, VLC_BITS, 1); if (code == 3) -return get_sbits(gb, 8); +return bitstream_read_signed(bc, 8); else return code - 3; } -static inline int asv2_get_level(GetBitContext *gb) +static inline int asv2_get_level(BitstreamContext *bc) { -int code = get_vlc2(gb, asv2_level_vlc.table, ASV2_LEVEL_VLC_BITS, 1); +int code = bitstream_read_vlc(bc, asv2_level_vlc.table, ASV2_LEVEL_VLC_BITS, 1); if (code == 31) -return (int8_t) asv2_get_bits(gb, 8); +return (int8_t) asv2_get_bits(bc, 8); else return code - 31; } @@ -99,10 +99,10 @@ static inline int asv1_decode_block(ASV1Context *a, int16_t block[64]) { int i; -block[0] = 8 * get_bits(>gb, 8); +block[0] = 8 * bitstream_read(>bc, 8); for (i = 0; i < 11; i++) { -const int ccp = get_vlc2(>gb, ccp_vlc.table, VLC_BITS, 1); +const int ccp = bitstream_read_vlc(>bc, ccp_vlc.table, VLC_BITS, 1); if (ccp) { if (ccp == 16) @@ -113,13 +113,13 @@ static inline int asv1_decode_block(ASV1Context *a, int16_t block[64]) } if (ccp & 8) -block[a->scantable.permutated[4 * i + 0]] = (asv1_get_level(>gb) * a->intra_matrix[4 * i + 0]) >> 4; +block[a->scantable.permutated[4 * i + 0]] = (asv1_get_level(>bc) * a->intra_matrix[4 * i + 0]) >> 4; if (ccp & 4) -block[a->scantable.permutated[4 * i + 1]] = (asv1_get_level(>gb) * a->intra_matrix[4 * i + 1]) >> 4; +block[a->scantable.permutated[4 * i + 1]] = (asv1_get_level(>bc) * a->intra_matrix[4 * i + 1]) >> 4; if (ccp & 2) -block[a->scantable.permutated[4 * i + 2]] = (asv1_get_level(>gb) * a->intra_matrix[4 * i + 2]) >> 4; +block[a->scantable.permutated[4 * i + 2]] = (asv1_get_level(>bc) * a->intra_matrix[4 * i + 2]) >> 4; if (ccp & 1) -block[a->scantable.permutated[4 * i + 3]] = (asv1_get_level(>gb) * a->intra_matrix[4 * i + 3]) >> 4; +block[a->scantable.permutated[4 * i + 3]] = (asv1_get_level(>bc) * a->intra_matrix[4 * i + 3]) >> 4; } } @@ -130,32 +130,32 @@ static inline int asv2_decode_block(ASV1Context *a, int16_t block[64]) { int i, count, ccp; -count = asv2_get_bits(>gb, 4); +count = asv2_get_bits(>bc, 4); -block[0] = 8 * asv2_get_bits(>gb, 8); +block[0] = 8 * asv2_get_bits(>bc, 8); -ccp = get_vlc2(>gb, dc_ccp_vlc.table, VLC_BITS, 1); +ccp = bitstream_read_vlc(>bc, dc_ccp_vlc.table, VLC_BITS, 1); if (ccp) { if (ccp & 4) -block[a->scantable.permutated[1]] = (asv2_get_level(>gb) * a->intra_matrix[1]) >> 4; +block[a->scantable.permutated[1]] = (asv2_get_level(>bc) * a->intra_matrix[1]) >> 4; if (ccp & 2) -block[a->scantable.permutated[2]] = (asv2_get_level(>gb) * a->intra_matrix[2]) >> 4; +block[a->scantable.permutated[2]] = (asv2_get_level(>bc) * a->intra_matrix[2]) >> 4; if (ccp & 1) -block[a->scantable.permutated[3]] = (asv2_get_level(>gb) * a->intra_matrix[3]) >> 4; +block[a->scantable.permutated[3]] = (asv2_get_level(>bc) * a->intra_matrix[3]) >> 4; } for (i = 1; i < count + 1; i++) { -const int ccp = get_vlc2(>gb, ac_ccp_vlc.table, VLC_BITS, 1); +
[libav-devel] [PATCH 11/35] avs: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/avs.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libavcodec/avs.c b/libavcodec/avs.c index 0d127f8..bea01a2 100644 --- a/libavcodec/avs.c +++ b/libavcodec/avs.c @@ -20,7 +20,7 @@ */ #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" typedef struct AvsContext { @@ -57,7 +57,7 @@ avs_decode_frame(AVCodecContext * avctx, int i, j, x, y, stride, ret, vect_w = 3, vect_h = 3; AvsVideoSubType sub_type; AvsBlockType type; -GetBitContext change_map; +BitstreamContext change_map; if ((ret = ff_reget_buffer(avctx, p)) < 0) { av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); @@ -125,13 +125,13 @@ avs_decode_frame(AVCodecContext * avctx, int map_size = ((318 / vect_w + 7) / 8) * (198 / vect_h); if (buf_end - table < map_size) return AVERROR_INVALIDDATA; -init_get_bits(_map, table, map_size * 8); +bitstream_init(_map, table, map_size * 8); table += map_size; } for (y=0; y<198; y+=vect_h) { for (x=0; x<318; x+=vect_w) { -if (sub_type == AVS_I_FRAME || get_bits1(_map)) { +if (sub_type == AVS_I_FRAME || bitstream_read_bit(_map)) { if (buf_end - table < 1) return AVERROR_INVALIDDATA; vect = [*table++ * (vect_w * vect_h)]; @@ -145,7 +145,7 @@ avs_decode_frame(AVCodecContext * avctx, } } if (sub_type != AVS_I_FRAME) -align_get_bits(_map); +bitstream_align(_map); } if ((ret = av_frame_ref(picture, p)) < 0) -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 13/35] binkaudio: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/binkaudio.c | 59 +- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c index 2638eb2..cf61135 100644 --- a/libavcodec/binkaudio.c +++ b/libavcodec/binkaudio.c @@ -33,8 +33,8 @@ #define BITSTREAM_READER_LE #include "avcodec.h" +#include "bitstream.h" #include "dct.h" -#include "get_bits.h" #include "internal.h" #include "rdft.h" #include "wma_freqs.h" @@ -45,7 +45,7 @@ static float quant_table[96]; #define BINK_BLOCK_MAX_SIZE (MAX_CHANNELS << 11) typedef struct BinkAudioContext { -GetBitContext gb; +BitstreamContext bc; int version_b; ///< Bink version 'b' int first; int channels; @@ -143,11 +143,11 @@ static av_cold int decode_init(AVCodecContext *avctx) return 0; } -static float get_float(GetBitContext *gb) +static float get_float(BitstreamContext *bc) { -int power = get_bits(gb, 5); -float f = ldexpf(get_bits_long(gb, 23), power - 23); -if (get_bits1(gb)) +int power = bitstream_read(bc, 5); +float f = ldexpf(bitstream_read(bc, 23), power - 23); +if (bitstream_read_bit(bc)) f = -f; return f; } @@ -166,30 +166,30 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct) int ch, i, j, k; float q, quant[25]; int width, coeff; -GetBitContext *gb = >gb; +BitstreamContext *bc = >bc; if (use_dct) -skip_bits(gb, 2); +bitstream_skip(bc, 2); for (ch = 0; ch < s->channels; ch++) { FFTSample *coeffs = out[ch]; if (s->version_b) { -if (get_bits_left(gb) < 64) +if (bitstream_bits_left(bc) < 64) return AVERROR_INVALIDDATA; -coeffs[0] = av_int2float(get_bits_long(gb, 32)) * s->root; -coeffs[1] = av_int2float(get_bits_long(gb, 32)) * s->root; +coeffs[0] = av_int2float(bitstream_read(bc, 32)) * s->root; +coeffs[1] = av_int2float(bitstream_read(bc, 32)) * s->root; } else { -if (get_bits_left(gb) < 58) +if (bitstream_bits_left(bc) < 58) return AVERROR_INVALIDDATA; -coeffs[0] = get_float(gb) * s->root; -coeffs[1] = get_float(gb) * s->root; +coeffs[0] = get_float(bc) * s->root; +coeffs[1] = get_float(bc) * s->root; } -if (get_bits_left(gb) < s->num_bands * 8) +if (bitstream_bits_left(bc) < s->num_bands * 8) return AVERROR_INVALIDDATA; for (i = 0; i < s->num_bands; i++) { -int value = get_bits(gb, 8); +int value = bitstream_read(bc, 8); quant[i] = quant_table[FFMIN(value, 95)]; } @@ -202,9 +202,9 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct) if (s->version_b) { j = i + 16; } else { -int v = get_bits1(gb); +int v = bitstream_read_bit(bc); if (v) { -v = get_bits(gb, 4); +v = bitstream_read(bc, 4); j = i + rle_length_tab[v] * 8; } else { j = i + 8; @@ -213,7 +213,7 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct) j = FFMIN(j, s->frame_len); -width = get_bits(gb, 4); +width = bitstream_read(bc, 4); if (width == 0) { memset(coeffs + i, 0, (j - i) * sizeof(*coeffs)); i = j; @@ -223,10 +223,10 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct) while (i < j) { if (s->bands[k] == i) q = quant[k++]; -coeff = get_bits(gb, width); +coeff = bitstream_read(bc, width); if (coeff) { int v; -v = get_bits1(gb); +v = bitstream_read_bit(bc); if (v) coeffs[i] = -q * coeff; else @@ -278,10 +278,11 @@ static av_cold int decode_end(AVCodecContext *avctx) return 0; } -static void get_bits_align32(GetBitContext *s) +static void get_bits_align32(BitstreamContext *s) { -int n = (-get_bits_count(s)) & 31; -if (n) skip_bits(s, n); +int n = (-bitstream_tell(s)) & 31; +if (n) +bitstream_skip(s, n); } static int decode_frame(AVCodecContext *avctx, void *data, @@ -289,10 +290,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, { BinkAudioContext *s = avctx->priv_data; AVFrame *frame = data; -GetBitContext *gb = >gb; +BitstreamContext *bc = >bc; int ret, consumed = 0; -if (!get_bits_left(gb)) { +if
[libav-devel] [PATCH 01/35] lavc: add a new bitstream reader to replace get_bits
The new bit reader features a simpler API and an implementation without stacks of nested macros. --- libavcodec/bitstream.h | 387 + 1 file changed, 387 insertions(+) create mode 100644 libavcodec/bitstream.h diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h new file mode 100644 index 000..996e32e --- /dev/null +++ b/libavcodec/bitstream.h @@ -0,0 +1,387 @@ +/* + * Copyright (c) 2016 Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * functions for reading bits from a buffer + */ + +#ifndef AVCODEC_BITSTREAM_H +#define AVCODEC_BITSTREAM_H + +#include + +#include "libavutil/common.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/log.h" + +#include "mathops.h" +#include "vlc.h" + +typedef struct BitstreamContext { +uint64_t bits; // stores bits read from the buffer +const uint8_t *buffer, *buffer_end; +const uint8_t *ptr; // position inside a buffer +unsigned bits_left; // number of bits left in bits field +unsigned size_in_bits; +} BitstreamContext; + +static inline void refill_64(BitstreamContext *bc) +{ +if (bc->ptr >= bc->buffer_end) +return; + +#ifdef BITSTREAM_READER_LE +bc->bits = AV_RL64(bc->ptr); +#else +bc->bits = AV_RB64(bc->ptr); +#endif +bc->ptr += 8; +bc->bits_left = 64; +} + +static inline void refill_32(BitstreamContext *bc) +{ +if (bc->ptr >= bc->buffer_end) +return; + +#ifdef BITSTREAM_READER_LE +bc->bits = (uint64_t)AV_RL32(bc->ptr) << bc->bits_left | bc->bits; +#else +bc->bits = bc->bits | (uint64_t)AV_RB32(bc->ptr) << (32 - bc->bits_left); +#endif +bc->ptr += 4; +bc->bits_left += 32; +} + +/* Initialize BitstreamContext. Input buffer must have an additional zero + * padding of AV_INPUT_BUFFER_PADDING_SIZE bytes at the end. */ +static inline int bitstream_init(BitstreamContext *bc, const uint8_t *buffer, + unsigned bit_size) +{ +unsigned buffer_size; + +if (bit_size > INT_MAX - 7 || !buffer) { +buffer= +bc->buffer= +bc->ptr = NULL; +bc->bits_left = 0; +return AVERROR_INVALIDDATA; +} + +buffer_size = (bit_size + 7) >> 3; + +bc->buffer = buffer; +bc->buffer_end = buffer + buffer_size; +bc->ptr = bc->buffer; +bc->size_in_bits = bit_size; +bc->bits_left= 0; +bc->bits = 0; + +refill_64(bc); + +return 0; +} + +/* Initialize BitstreamContext with buffer size in bytes instead of bits. */ +static inline int bitstream_init8(BitstreamContext *bc, const uint8_t *buffer, + unsigned byte_size) +{ +if (byte_size > INT_MAX / 8) +return AVERROR_INVALIDDATA; +return bitstream_init(bc, buffer, byte_size * 8); +} + +/* Return number of bits already read. */ +static inline int bitstream_tell(const BitstreamContext *bc) +{ +return (bc->ptr - bc->buffer) * 8 - bc->bits_left; +} + +/* Return buffer size in bits. */ +static inline int bitstream_tell_size(const BitstreamContext *bc) +{ +return bc->size_in_bits; +} + +/* Return the number of the bits left in a buffer. */ +static inline int bitstream_bits_left(const BitstreamContext *bc) +{ +return (bc->buffer - bc->ptr) * 8 + bc->size_in_bits + bc->bits_left; +} + +static inline uint64_t get_val(BitstreamContext *bc, unsigned n) +{ +#ifdef BITSTREAM_READER_LE +uint64_t ret = bc->bits & ((UINT64_C(1) << n) - 1); +bc->bits >>= n; +#else +uint64_t ret = bc->bits >> (64 - n); +bc->bits <<= n; +#endif +bc->bits_left -= n; + +return ret; +} + +/* Return one bit from the buffer. */ +static inline unsigned bitstream_read_bit(BitstreamContext *bc) +{ +if (!bc->bits_left) +refill_64(bc); + +return get_val(bc, 1); +} + +/* Return n bits from the buffer. n has to be in the 0-63 range. */ +static inline uint64_t bitstream_read_63(Bi
[libav-devel] [PATCH 04/35] ea: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/eatgq.c | 34 +- libavcodec/eatgv.c | 22 +++--- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/libavcodec/eatgq.c b/libavcodec/eatgq.c index 8355471..9abedcc 100644 --- a/libavcodec/eatgq.c +++ b/libavcodec/eatgq.c @@ -31,9 +31,9 @@ #define BITSTREAM_READER_LE #include "aandcttab.h" #include "avcodec.h" +#include "bitstream.h" #include "bytestream.h" #include "eaidct.h" -#include "get_bits.h" #include "idctdsp.h" #include "internal.h" @@ -58,44 +58,44 @@ static av_cold int tgq_decode_init(AVCodecContext *avctx) return 0; } -static void tgq_decode_block(TgqContext *s, int16_t block[64], GetBitContext *gb) +static void tgq_decode_block(TgqContext *s, int16_t block[64], BitstreamContext *bc) { uint8_t *perm = s->scantable.permutated; int i, j, value; -block[0] = get_sbits(gb, 8) * s->qtable[0]; +block[0] = bitstream_read_signed(bc, 8) * s->qtable[0]; for (i = 1; i < 64;) { -switch (show_bits(gb, 3)) { +switch (bitstream_peek(bc, 3)) { case 4: block[perm[i++]] = 0; case 0: block[perm[i++]] = 0; -skip_bits(gb, 3); +bitstream_skip(bc, 3); break; case 5: case 1: -skip_bits(gb, 2); -value = get_bits(gb, 6); +bitstream_skip(bc, 2); +value = bitstream_read(bc, 6); for (j = 0; j < value; j++) block[perm[i++]] = 0; break; case 6: -skip_bits(gb, 3); +bitstream_skip(bc, 3); block[perm[i]] = -s->qtable[perm[i]]; i++; break; case 2: -skip_bits(gb, 3); +bitstream_skip(bc, 3); block[perm[i]] = s->qtable[perm[i]]; i++; break; case 7: // 111b case 3: // 011b -skip_bits(gb, 2); -if (show_bits(gb, 6) == 0x3F) { -skip_bits(gb, 6); -block[perm[i]] = get_sbits(gb, 8) * s->qtable[perm[i]]; +bitstream_skip(bc, 2); +if (bitstream_peek(bc, 6) == 0x3F) { +bitstream_skip(bc, 6); +block[perm[i]] = bitstream_read_signed(bc, 8) * s->qtable[perm[i]]; } else { -block[perm[i]] = get_sbits(gb, 6) * s->qtable[perm[i]]; +block[perm[i]] = bitstream_read_signed(bc, 6) * s->qtable[perm[i]]; } i++; break; @@ -156,10 +156,10 @@ static void tgq_decode_mb(TgqContext *s, AVFrame *frame, int mb_y, int mb_x) mode = bytestream2_get_byte(>gb); if (mode > 12) { -GetBitContext gb; -init_get_bits(, s->gb.buffer, FFMIN(s->gb.buffer_end - s->gb.buffer, mode) * 8); +BitstreamContext bc; +bitstream_init(, s->gb.buffer, FFMIN(s->gb.buffer_end - s->gb.buffer, mode) * 8); for (i = 0; i < 6; i++) -tgq_decode_block(s, s->block[i], ); +tgq_decode_block(s, s->block[i], ); tgq_idct_put_mb(s, s->block, frame, mb_x, mb_y); bytestream2_skip(>gb, mode); } else { diff --git a/libavcodec/eatgv.c b/libavcodec/eatgv.c index 7a50d01..549b5b6 100644 --- a/libavcodec/eatgv.c +++ b/libavcodec/eatgv.c @@ -33,7 +33,7 @@ #define BITSTREAM_READER_LE #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "internal.h" #define EA_PREAMBLE_SIZE8 @@ -153,7 +153,7 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame, int num_blocks_packed; int vector_bits; int i,j,x,y; -GetBitContext gb; +BitstreamContext bc; int mvbits; const uint8_t *blocks_raw; @@ -166,7 +166,7 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame, vector_bits = AV_RL16([6]); buf += 12; -if (vector_bits > MIN_CACHE_BITS || !vector_bits) { +if (vector_bits > 32 || !vector_bits) { av_log(s->avctx, AV_LOG_ERROR, "Invalid value for motion vector bits: %d\n", vector_bits); return AVERROR_INVALIDDATA; @@ -195,10 +195,10 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame, if (buf + (mvbits >> 3) + 16 * num_blocks_raw + 8 * num_blocks_packed > buf_end) return AVERROR_INVALIDDATA; -init_get_bits(, buf, mvbits); +bitstream_init(, buf, mvbits); for (i = 0; i < num_mvs; i++) { -s->mv_codebook[i][0] = get_sbits(, 10); -s->mv_codebook[i][1] = get_sbits(, 10); +s->mv_codebook[i][0] = bitstream_read_signed(, 10); +s->mv_codebook[i][1] = bitstream_read_signed(, 10); } buf += mvbits >> 3; @@ -207,23 +207,23 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame, buf += num_blocks_raw * 16; /* read compressed blocks */ -init_get_bits(, buf, (buf_end - buf) << 3);
[libav-devel] [PATCH 16/35] cook: Convert to the new bitstream reader
Signed-off-by: Anton Khirnov--- libavcodec/cook.c | 53 +++-- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/libavcodec/cook.c b/libavcodec/cook.c index c990333..c3304ea 100644 --- a/libavcodec/cook.c +++ b/libavcodec/cook.c @@ -47,7 +47,7 @@ #include "audiodsp.h" #include "avcodec.h" -#include "get_bits.h" +#include "bitstream.h" #include "bytestream.h" #include "fft.h" #include "internal.h" @@ -124,7 +124,7 @@ typedef struct cook { AVCodecContext* avctx; AudioDSPContext adsp; -GetBitContext gb; +BitstreamContextbc; /* stream data */ int num_vectors; int samples_per_channel; @@ -325,23 +325,23 @@ static av_cold int cook_decode_close(AVCodecContext *avctx) /** * Fill the gain array for the timedomain quantization. * - * @param gb pointer to the GetBitContext + * @param bc pointer to the BitstreamContext * @param gaininfoarray[9] of gain indexes */ -static void decode_gain_info(GetBitContext *gb, int *gaininfo) +static void decode_gain_info(BitstreamContext *bc, int *gaininfo) { int i, n; -while (get_bits1(gb)) { +while (bitstream_read_bit(bc)) { /* NOTHING */ } -n = get_bits_count(gb) - 1; // amount of elements*2 to update +n = bitstream_tell(bc) - 1; // amount of elements * 2 to update i = 0; while (n--) { -int index = get_bits(gb, 3); -int gain = get_bits1(gb) ? get_bits(gb, 4) - 7 : -1; +int index = bitstream_read(bc, 3); +int gain = bitstream_read_bit(bc) ? bitstream_read(bc, 4) - 7 : -1; while (i <= index) gaininfo[i++] = gain; @@ -361,7 +361,7 @@ static int decode_envelope(COOKContext *q, COOKSubpacket *p, { int i, j, vlc_index; -quant_index_table[0] = get_bits(>gb, 6) - 6; // This is used later in categorize +quant_index_table[0] = bitstream_read(>bc, 6) - 6; // This is used later in categorize for (i = 1; i < p->total_subbands; i++) { vlc_index = i; @@ -375,8 +375,8 @@ static int decode_envelope(COOKContext *q, COOKSubpacket *p, if (vlc_index > 13) vlc_index = 13; // the VLC tables >13 are identical to No. 13 -j = get_vlc2(>gb, q->envelope_quant_index[vlc_index - 1].table, - q->envelope_quant_index[vlc_index - 1].bits, 2); +j = bitstream_read_vlc(>bc, q->envelope_quant_index[vlc_index - 1].table, + q->envelope_quant_index[vlc_index - 1].bits, 2); quant_index_table[i] = quant_index_table[i - 1] + j - 12; // differential encoding if (quant_index_table[i] > 63 || quant_index_table[i] < -63) { av_log(q->avctx, AV_LOG_ERROR, @@ -408,7 +408,7 @@ static void categorize(COOKContext *q, COOKSubpacket *p, int *quant_index_table, int tmp_categorize_array1_idx = p->numvector_size; int tmp_categorize_array2_idx = p->numvector_size; -bits_left = p->bits_per_subpacket - get_bits_count(>gb); +bits_left = p->bits_per_subpacket - bitstream_tell(>bc); if (bits_left > q->samples_per_channel) bits_left = q->samples_per_channel + @@ -554,8 +554,8 @@ static int unpack_SQVH(COOKContext *q, COOKSubpacket *p, int category, vd = vd_tab[category]; result = 0; for (i = 0; i < vpr_tab[category]; i++) { -vlc = get_vlc2(>gb, q->sqvh[category].table, q->sqvh[category].bits, 3); -if (p->bits_per_subpacket < get_bits_count(>gb)) { +vlc = bitstream_read_vlc(>bc, q->sqvh[category].table, q->sqvh[category].bits, 3); +if (p->bits_per_subpacket < bitstream_tell(>bc)) { vlc = 0; result = 1; } @@ -566,8 +566,8 @@ static int unpack_SQVH(COOKContext *q, COOKSubpacket *p, int category, } for (j = 0; j < vd; j++) { if (subband_coef_index[i * vd + j]) { -if (get_bits_count(>gb) < p->bits_per_subpacket) { -subband_coef_sign[i * vd + j] = get_bits1(>gb); +if (bitstream_tell(>bc) < p->bits_per_subpacket) { +subband_coef_sign[i * vd + j] = bitstream_read_bit(>bc); } else { result = 1; subband_coef_sign[i * vd + j] = 0; @@ -634,7 +634,7 @@ static int mono_decode(COOKContext *q, COOKSubpacket *p, float *mlt_buffer) if ((res = decode_envelope(q, p, quant_index_table)) < 0) return res; -q->num_vectors = get_bits(>gb, p->log2_numvector_size); +q->num_vectors = bitstream_read(>bc, p->log2_numvector_size); categorize(q, p, quant_index_table, category, category_index); expand_category(q, category, category_index); decode_vectors(q, p, category, quant_index_table, mlt_buffer); @@ -739,7 +739,7 @@ static void imlt_gain(COOKContext *q, float *inbuffer, static void
[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC
From: Alexandra Hajkova--- libavcodec/hevcdsp.c | 2 + libavcodec/hevcdsp.h | 1 + libavcodec/ppc/Makefile | 1 + libavcodec/ppc/hevcdsp.c | 107 ++ libavcodec/ppc/hevcdsp_template.c | 54 +++ 5 files changed, 165 insertions(+) create mode 100644 libavcodec/ppc/hevcdsp.c create mode 100644 libavcodec/ppc/hevcdsp_template.c diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 7c19198..8ae023b 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) break; } +if (ARCH_PPC) +ff_hevc_dsp_init_ppc(hevcdsp, bit_depth); if (ARCH_X86) ff_hevc_dsp_init_x86(hevcdsp, bit_depth); } diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 49cb711..2f4ff01 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -115,6 +115,7 @@ typedef struct HEVCDSPContext { void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); +void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); extern const int16_t ff_hevc_epel_coeffs[7][16]; diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 09eabcb..4b92add 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP) += ppc/vp8dsp_altivec.o # decoders/encoders OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o +OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c new file mode 100644 index 000..7cf7e97 --- /dev/null +++ b/libavcodec/ppc/hevcdsp.c @@ -0,0 +1,107 @@ +/* SIMD-optimized IDCT functions for HEVC decoding + * Copyright (c) Alexandra Hajkova + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#undef pixel +#endif + +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" + +#include "libavcodec/hevcdsp.h" + +static const vector int16_t trans4[4] = { +{ 64, 64, 64, 64, 64, 64, 64, 64 }, +{ 83, 36, 83, 36, 83, 36, 83, 36 }, +{ 64, -64, 64, -64, 64, -64, 64, -64 }, +{ 36, -83, 36, -83, 36, -83, 36, -83 }, +}; + +static const vec_u8 mask[2] = { +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 0x12, 0x13, 0x1A, 0x1B }, +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F }, +}; + +#if HAVE_ALTIVEC +static void transform4x4(vector int16_t src_01, vector int16_t src_23, + vector int32_t res[4], const int shift, int16_t *coeffs) +{ +vector int16_t src_02, src_13; +vector int32_t zero = vec_splat_s32(0); +vector int32_t e0, o0, e1, o1; +vector int32_t add; + +src_13 = vec_mergel(src_01, src_23); +src_02 = vec_mergeh(src_01, src_23); + +e0 = vec_msums(src_02, trans4[0], zero); +o0 = vec_msums(src_13, trans4[1], zero); +e1 = vec_msums(src_02, trans4[2], zero); +o1 = vec_msums(src_13, trans4[3], zero); + +add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1)); +e0 = vec_add(e0, add); +e1 = vec_add(e1, add); + +res[0] = vec_add(e0, o0); +res[1] = vec_add(e1, o1); +res[2] = vec_sub(e1, o1); +res[3] = vec_sub(e0, o0); +} + +static void scale(vector int32_t res[4], vector int16_t res_packed[2], int shift) +{ +int i; +vector unsigned int v_shift = vec_splat_u32(shift); + +for (i = 0; i < 4; i++) +res[i] = vec_sra(res[i], v_shift); + +// clip16 +res_packed[0] = vec_packs(res[0], res[1]); +res_packed[1] = vec_packs(res[2], res[3]); +} +#endif /* HAVE_ALTIVEC */ + +#define BIT_DEPTH 8 +#include "libavcodec/ppc/hevcdsp_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 10 +#include
[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC
From: Alexandra Hajkova--- libavcodec/hevcdsp.c | 2 + libavcodec/hevcdsp.h | 1 + libavcodec/ppc/Makefile | 1 + libavcodec/ppc/hevcdsp.c | 107 ++ libavcodec/ppc/hevcdsp_template.c | 50 ++ 5 files changed, 161 insertions(+) create mode 100644 libavcodec/ppc/hevcdsp.c create mode 100644 libavcodec/ppc/hevcdsp_template.c diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 7c19198..8ae023b 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) break; } +if (ARCH_PPC) +ff_hevc_dsp_init_ppc(hevcdsp, bit_depth); if (ARCH_X86) ff_hevc_dsp_init_x86(hevcdsp, bit_depth); } diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 49cb711..2f4ff01 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -115,6 +115,7 @@ typedef struct HEVCDSPContext { void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); +void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); extern const int16_t ff_hevc_epel_coeffs[7][16]; diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 09eabcb..4b92add 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP) += ppc/vp8dsp_altivec.o # decoders/encoders OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o +OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c new file mode 100644 index 000..b46d895 --- /dev/null +++ b/libavcodec/ppc/hevcdsp.c @@ -0,0 +1,107 @@ +/* SIMD-optimized IDCT functions for HEVC decoding + * Copyright (c) Alexandra Hajkova + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#undef pixel +#endif + +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" + +#include "libavcodec/hevcdsp.h" + +static const vector int16_t trans4[4] = { +{ 64, 64, 64, 64, 64, 64, 64, 64 }, +{ 83, 36, 83, 36, 83, 36, 83, 36 }, +{ 64, -64, 64, -64, 64, -64, 64, -64 }, +{ 36, -83, 36, -83, 36, -83, 36, -83 }, +}; + +static const vec_u8 mask[2] = { +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 0x12, 0x13, 0x1A, 0x1B }, +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F }, +}; + +#if HAVE_ALTIVEC +static void transform4x4(vector int16_t src_01, vector int16_t src_23, + vector int32_t res[4], const int shift, int16_t *coeffs) +{ +vector int16_t src_02, src_13; +vector int32_t zero = vec_splat_s32(0); +vector int32_t e0, o0, e1, o1; +vector int32_t add; + +src_13 = vec_mergel(src_01, src_23); +src_02 = vec_mergeh(src_01, src_23); + +e0 = vec_msums(src_02, trans4[0], zero); +o0 = vec_msums(src_13, trans4[1], zero); +e1 = vec_msums(src_02, trans4[2], zero); +o1 = vec_msums(src_13, trans4[3], zero); + +add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1)); +e0 = vec_add(e0, add); +e1 = vec_add(e1, add); + +res[0] = vec_add(e0, o0); +res[1] = vec_add(e1, o1); +res[2] = vec_sub(e1, o1); +res[3] = vec_sub(e0, o0); +} + +static void scale(vector int32_t res[4], vector int16_t res_packed[2], int shift) +{ +int i; +vector unsigned int v_shift = vec_splat_u32(shift); + +for (i = 0; i < 4; i++) +res[i] = vec_sra(res[i], v_shift); + +// clip16 +res_packed[0] = vec_packs(res[0], res[1]); +res_packed[1] = vec_packs(res[2], res[3]); +} +#endif /* HAVE_ALTIVEC */ + +#define BIT_DEPTH 8 +#include "libavcodec/ppc/hevcdsp_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 10 +#include
[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC
--- Applied review comments as discussed. Tested on both LE and BE. libavcodec/hevcdsp.c | 2 + libavcodec/hevcdsp.h | 1 + libavcodec/ppc/Makefile | 1 + libavcodec/ppc/hevcdsp.c | 107 ++ libavcodec/ppc/hevcdsp_template.c | 50 ++ 5 files changed, 161 insertions(+) create mode 100644 libavcodec/ppc/hevcdsp.c create mode 100644 libavcodec/ppc/hevcdsp_template.c diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index e370254..5d63eea 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -245,6 +245,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) break; } +if (ARCH_PPC) +ff_hevc_dsp_init_ppc(hevcdsp, bit_depth); if (ARCH_X86) ff_hevc_dsp_init_x86(hevcdsp, bit_depth); if (ARCH_PPC) diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 3827f50..bb490c8 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -115,6 +115,7 @@ typedef struct HEVCDSPContext { void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); +void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_altivec(HEVCDSPContext *c, const int bit_depth); diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 9234e77..6bc056c 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP) += ppc/vp8dsp_altivec.o # decoders/encoders OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o +OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp.o OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c new file mode 100644 index 000..b6e5461 --- /dev/null +++ b/libavcodec/ppc/hevcdsp.c @@ -0,0 +1,107 @@ +/* SIMD-optimized IDCT functions for HEVC decoding + * Copyright (c) Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#undef pixel +#endif + +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" + +#include "libavcodec/hevcdsp.h" + +static const vector int16_t trans4[4] = { +{ 64, 64, 64, 64, 64, 64, 64, 64 }, +{ 83, 36, 83, 36, 83, 36, 83, 36 }, +{ 64, -64, 64, -64, 64, -64, 64, -64 }, +{ 36, -83, 36, -83, 36, -83, 36, -83 }, +}; + +static const vec_u8 mask[2] = { +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 0x12, 0x13, 0x1A, 0x1B }, +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F }, +}; + +#if HAVE_ALTIVEC +static void transform4x4(vector int16_t src_01, vector int16_t src_23, + vector int32_t res[4], const int shift, int16_t *coeffs) +{ +vector int16_t src_02, src_13; +vector int32_t zero = vec_splat_s32(0); +vector int32_t e0, o0, e1, o1; +vector int32_t add; + +src_13 = vec_mergel(src_01, src_23); +src_02 = vec_mergeh(src_01, src_23); + +e0 = vec_msums(src_02, trans4[0], zero); +o0 = vec_msums(src_13, trans4[1], zero); +e1 = vec_msums(src_02, trans4[2], zero); +o1 = vec_msums(src_13, trans4[3], zero); + +add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1)); +e0 = vec_add(e0, add); +e1 = vec_add(e1, add); + +res[0] = vec_add(e0, o0); +res[1] = vec_add(e1, o1); +res[2] = vec_sub(e1, o1); +res[3] = vec_sub(e0, o0); +} + +static void scale(vector int32_t res[4], vector int16_t res_packed[2], int shift) +{ +int i; +vector unsigned int v_shift = vec_splat_u32(shift); + +for (i = 0; i < 4; i++) +res[i] = vec_sra(res[i], v_shift); + +// clip16 +res_packed[0] = vec_packs(res[0], res[1]); +res_packed[1] = vec_packs(res[2], res[3]); +} +#endif /* HAVE_ALTIVEC */ + +#define BI
Re: [libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC
>> +static const vector int16_t trans4[4] = { >> +{ 64, 64, 64, 64, 64, 64, 64, 64 }, >> +{ 83, 36, 83, 36, 83, 36, 83, 36 }, >> +{ 64, -64, 64, -64, 64, -64, 64, -64 }, >> +{ 36, -83, 36, -83, 36, -83, 36, -83 }, >> +}; > > This fits in int8_t, is there a reason to have it int16_t? the reason is I need to multiply the vectors of the same type > >> +static const vec_u8 mask[2] = { >> +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, >> 0x0B, 0x12, 0x13, 0x1A, 0x1B }, >> +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, >> 0x0F, 0x16, 0x17, 0x1E, 0x1F }, >> +}; > > Where do these tables come from? I would expect them to be shared > across arches. This is permutation mask used by vec_perm and it's specific for this case (which is matrix tranposition). Alexandra ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] hevc: Add HEVC IDCT 4x4 for PPC
From: Alexandra Hajkova--- libavcodec/hevcdsp.c | 2 + libavcodec/hevcdsp.h | 1 + libavcodec/ppc/Makefile | 1 + libavcodec/ppc/hevcdsp_ppc.c | 111 ++ libavcodec/ppc/hevcdsp_template.c | 52 ++ 5 files changed, 167 insertions(+) create mode 100644 libavcodec/ppc/hevcdsp_ppc.c create mode 100644 libavcodec/ppc/hevcdsp_template.c diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 7c19198..e370254 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -247,4 +247,6 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) if (ARCH_X86) ff_hevc_dsp_init_x86(hevcdsp, bit_depth); +if (ARCH_PPC) +ff_hevc_dsp_init_altivec(hevcdsp, bit_depth); } diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 49cb711..d50551a 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -116,6 +116,7 @@ typedef struct HEVCDSPContext { void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); +void ff_hevc_dsp_init_altivec(HEVCDSPContext *c, const int bit_depth); extern const int16_t ff_hevc_epel_coeffs[7][16]; extern const int8_t ff_hevc_epel_coeffs8[7][16]; diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 09eabcb..1c71df6 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -25,6 +25,7 @@ OBJS-$(CONFIG_VP8DSP) += ppc/vp8dsp_altivec.o # decoders/encoders OBJS-$(CONFIG_APE_DECODER) += ppc/apedsp_altivec.o +OBJS-$(CONFIG_HEVC_DECODER)+= ppc/hevcdsp_ppc.o OBJS-$(CONFIG_SVQ1_ENCODER)+= ppc/svq1enc_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o diff --git a/libavcodec/ppc/hevcdsp_ppc.c b/libavcodec/ppc/hevcdsp_ppc.c new file mode 100644 index 000..a2cb25a --- /dev/null +++ b/libavcodec/ppc/hevcdsp_ppc.c @@ -0,0 +1,111 @@ + +/* + * Copyright (c) Alexandra Hajkova + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#undef pixel +#endif + +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" + +#include "libavcodec/hevcdsp.h" + +static const vector int16_t trans4[4] = { +{ 64, 64, 64, 64, 64, 64, 64, 64 }, +{ 83, 36, 83, 36, 83, 36, 83, 36 }, +{ 64, -64, 64, -64, 64, -64, 64, -64 }, +{ 36, -83, 36, -83, 36, -83, 36, -83 }, +}; + +static const vec_u8 mask[2] = { +{ 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 0x12, 0x13, 0x1A, 0x1B }, +{ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F }, +}; + +#if HAVE_ALTIVEC +static void transform4x4(vector int16_t src_01, vector int16_t src_23, + vector int32_t res[4], const int shift, int16_t *coeffs) +{ +vector int16_t src_02, src_13; +vector int32_t zero = vec_splat_s32(0); +vector int32_t e0, o0, e1, o1; +vector int32_t add; + +src_13 = vec_mergel(src_01, src_23); +src_02 = vec_mergeh(src_01, src_23); + +e0 = vec_msums(src_02, trans4[0], zero); +o0 = vec_msums(src_13, trans4[1], zero); +e1 = vec_msums(src_02, trans4[2], zero); +o1 = vec_msums(src_13, trans4[3], zero); + +// if is not used by the other transform +add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1)); +e0 = vec_add(e0, add); +e1 = vec_add(e1, add); + +res[0] = vec_add(e0, o0); +res[1] = vec_add(e1, o1); +res[2] = vec_sub(e1, o1); +res[3] = vec_sub(e0, o0); +} + +static void scale(vector int32_t res[4], vector int16_t res_packed[2], int shift) +{ +int i; +vector unsigned int v_shift = vec_splat_u32(shift); + +for (i = 0; i < 4; i++) +res[i] = vec_sra(res[i], v_shift); + +// clip16 +res_packed[0] = vec_packs(res[0], res[1]); +res_packed[1] = vec_packs(res[2], res[3]); +} +#endif /* HAVE_ALTIVEC */ + +#define BIT_DEPTH 8 +#include "libavcodec/ppc/hevcdsp_template.c" +#undef
[libav-devel] [PATCH 1/2] hevc: x86: Add add_residual optimizations
From: Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr> Initially written by Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr>, extended by James Almer <jamr...@gmail.com>. Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> Signed-off-by: Diego Biurrun <di...@biurrun.de> --- Applied review comments from Henrink and Diego (more consistent naming). libavcodec/x86/Makefile | 7 +- libavcodec/x86/hevc_add_res.asm | 371 libavcodec/x86/hevcdsp_init.c | 42 + 3 files changed, 417 insertions(+), 3 deletions(-) create mode 100644 libavcodec/x86/hevc_add_res.asm diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index a38535b..094c1fa 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -115,9 +115,10 @@ YASM-OBJS-$(CONFIG_AAC_DECODER)+= x86/sbrdsp.o YASM-OBJS-$(CONFIG_APE_DECODER)+= x86/apedsp.o YASM-OBJS-$(CONFIG_DCA_DECODER)+= x86/dcadsp.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o -YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o\ - x86/hevc_mc.o \ - x86/hevc_idct.o +YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_add_res.o\ + x86/hevc_deblock.o\ + x86/hevc_idct.o \ + x86/hevc_mc.o YASM-OBJS-$(CONFIG_PNG_DECODER)+= x86/pngdsp.o YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o diff --git a/libavcodec/x86/hevc_add_res.asm b/libavcodec/x86/hevc_add_res.asm new file mode 100644 index 000..a1740b5 --- /dev/null +++ b/libavcodec/x86/hevc_add_res.asm @@ -0,0 +1,371 @@ +; * +; * Provide SIMD optimizations for add_residual functions for HEVC decoding +; * Copyright (c) 2014 Pierre-Edouard LEPERE +; * +; * This file is part of Libav. +; * +; * Libav is free software; you can redistribute it and/or +; * modify it under the terms of the GNU Lesser General Public +; * License as published by the Free Software Foundation; either +; * version 2.1 of the License, or (at your option) any later version. +; * +; * Libav is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; * Lesser General Public License for more details. +; * +; * You should have received a copy of the GNU Lesser General Public +; * License along with Libav; if not, write to the Free Software +; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +; ** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 +max_pixels_10: times 16 dw ((1 << 10)-1) + +SECTION .text + +; the add_res macros and functions were largely inspired by h264_idct.asm from the x264 project +%macro ADD_RES_MMX_4_8 0 +mova m0, [r1] +mova m2, [r1+8] +pxor m1, m1 +pxor m3, m3 +psubw m1, m0 +psubw m3, m2 +packuswb m0, m2 +packuswb m1, m3 + +movd m2, [r0] +movd m3, [r0+r2] +punpckldq m2, m3 +paddusb m0, m2 +psubusbm0, m1 +movd[r0], m0 +psrlq m0, 32 +movd [r0+r2], m0 +%endmacro + + +INIT_MMX mmxext +; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride) +cglobal hevc_add_residual_4_8, 3, 3, 6 +ADD_RES_MMX_4_8 +add r1, 16 +lea r0, [r0+r2*2] +ADD_RES_MMX_4_8 +RET + +%macro ADD_RES_SSE_8_8 0 +pxor m3, m3 +mova m4, [r1] +mova m6, [r1+16] +mova m0, [r1+32] +mova m2, [r1+48] +psubw m5, m3, m4 +psubw m7, m3, m6 +psubw m1, m3, m0 +packuswb m4, m0 +packuswb m5, m1 +psubw m3, m2 +packuswb m6, m2 +packuswb m7, m3 + +movq m0, [r0] +movq m1, [r0+r2] +movhpsm0, [r0+r2*2] +movhpsm1, [r0+r3] +paddusb m0, m4 +paddusb m1, m6 +psubusb m0, m5 +psubusb m1, m7 +movq[r0], m0 +movq [r0+r2], m1 +movhps [r0+2*r2], m0 +movhps [r0+r3], m1 +%endmacro + +%macro ADD_RES_SSE_16_32_8 3 +mova xm2, [r1+%1] +mova xm6,
[libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual
--- Applied all Henrik's and Diego's review comments: fixed stride, and size sizes, consistent naming tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/hevc_add_res.c | 85 +++ 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 tests/checkasm/hevc_add_res.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 22cf3db..2250f65 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -12,7 +12,7 @@ AVCODECOBJS-$(CONFIG_VP8DSP)+= vp8dsp.o # decoders/encoders AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o synth_filter.o -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o hevc_idct.o +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o hevc_idct.o hevc_add_res.o AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 040c4eb..623bbce 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -90,6 +90,7 @@ static const struct { { "h264qpel", checkasm_check_h264qpel }, #endif #if CONFIG_HEVC_DECODER +{ "hevc_add_res", checkasm_check_hevc_add_res }, { "hevc_mc", checkasm_check_hevc_mc }, { "hevc_idct", checkasm_check_hevc_idct }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 75aa457..462c908 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -39,6 +39,7 @@ void checkasm_check_fmtconvert(void); void checkasm_check_h264dsp(void); void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); +void checkasm_check_hevc_add_res(void); void checkasm_check_hevc_idct(void); void checkasm_check_hevc_mc(void); void checkasm_check_huffyuvdsp(void); diff --git a/tests/checkasm/hevc_add_res.c b/tests/checkasm/hevc_add_res.c new file mode 100644 index 000..639e25e --- /dev/null +++ b/tests/checkasm/hevc_add_res.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016 Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with Libav; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include "libavutil/intreadwrite.h" + +#include "libavcodec/hevcdsp.h" + +#include "checkasm.h" + +#define randomize_buffers(buf, size)\ +do {\ +int j; \ +for (j = 0; j < size; j++) {\ +int16_t r = rnd(); \ +AV_WN16A(buf + j, r >> 3); \ +} \ +} while (0) + +#define randomize_buffers2(buf, size) \ +do { \ +int j;\ +for (j = 0; j < size; j++)\ +AV_WN16A(buf + j * 2, rnd() & 0x3FF); \ +} while (0) + +static void check_add_res(HEVCDSPContext h, int bit_depth) +{ +int i; +LOCAL_ALIGNED(32, int16_t, res0, [32 * 32]); +LOCAL_ALIGNED(32, int16_t, res1, [32 * 32]); +LOCAL_ALIGNED(32, uint8_t, dst0, [32 * 32 * 2]); +LOCAL_ALIGNED(32, uint8_t, dst1, [32 * 32 * 2]); + +for (i = 2; i <= 5; i++) { +int block_size = 1 << i; +int size = block_size * block_size; +ptrdiff_t stride = block_size << (bit_depth > 8); +declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); + +randomize_buffers(res0, size); +randomize_buffers2(dst0, size); +memcpy(res1, res0, sizeof(*res0) * size); +memcpy(dst1, dst0, size); + +if (check_func(h.add_residual[i - 2], "add_res_%dx%d_%d", block_size, block_size, bit_depth)) { +call_ref(dst0, res0, stride); +call_new(dst1, res1, stride); +if (memcmp(dst0, dst1, size)) +fail(); +bench_new(dst1, res1, stride); +} +} +} + +void checkasm_check_hevc_add_res(void) +{ +int bit_depth; + +for (bit_depth = 8; bit_depth <= 10; bit_depth++) { +HEVCDSPContext h; + +
[libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual
--- tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/hevc_add_res.c | 84 +++ 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 tests/checkasm/hevc_add_res.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 9b3df55..ac3e97e 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -12,7 +12,7 @@ AVCODECOBJS-$(CONFIG_VP8DSP)+= vp8dsp.o # decoders/encoders AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o synth_filter.o -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o hevc_idct.o +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o hevc_idct.o hevc_add_res.o AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 040c4eb..623bbce 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -90,6 +90,7 @@ static const struct { { "h264qpel", checkasm_check_h264qpel }, #endif #if CONFIG_HEVC_DECODER +{ "hevc_add_res", checkasm_check_hevc_add_res }, { "hevc_mc", checkasm_check_hevc_mc }, { "hevc_idct", checkasm_check_hevc_idct }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 5a4c056..bacd6f4 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -39,6 +39,7 @@ void checkasm_check_fmtconvert(void); void checkasm_check_h264dsp(void); void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); +void checkasm_check_hevc_add_res(void); void checkasm_check_hevc_idct(void); void checkasm_check_hevc_mc(void); void checkasm_check_huffyuvdsp(void); diff --git a/tests/checkasm/hevc_add_res.c b/tests/checkasm/hevc_add_res.c new file mode 100644 index 000..c242c8c --- /dev/null +++ b/tests/checkasm/hevc_add_res.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016 Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with Libav; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include "libavutil/intreadwrite.h" + +#include "libavcodec/hevcdsp.h" + +#include "checkasm.h" + +#define randomize_buffers(buf, size)\ +do {\ +int j; \ +for (j = 0; j < size; j++) {\ +int16_t r = rnd(); \ +AV_WN16A(buf + j, r >> 3); \ +} \ +} while (0) + +#define randomize_buffers2(buf, size) \ +do { \ +int j;\ +for (j = 0; j < size; j++)\ +AV_WN16A(buf + j * 2, rnd() & 0x3FF); \ +} while (0) + +static void check_add_res(HEVCDSPContext h, int bit_depth) +{ +int i; +LOCAL_ALIGNED(32, int16_t, res0, [32 * 32]); +LOCAL_ALIGNED(32, int16_t, res1, [32 * 32]); +LOCAL_ALIGNED(32, uint8_t, dst0, [32 * 32 * 2]); +LOCAL_ALIGNED(32, uint8_t, dst1, [32 * 32 * 2]); + +for (i = 2; i <= 5; i++) { +int block_size = 1 << i; +int size = block_size * block_size; +declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, ptrdiff_t stride); + +randomize_buffers(res0, size); +randomize_buffers2(dst0, size * 2); +memcpy(res1, res0, sizeof(*res0) * size); +memcpy(dst1, dst0, size * 2); + +if (check_func(h.add_residual[i - 2], "add_res_%dx%d_%d", block_size, block_size, bit_depth)) { +call_ref(dst0, res0, block_size * 2); +call_new(dst1, res1, block_size * 2); +if (memcmp(dst0, dst1, size * 2)) +fail(); +bench_new(dst1, res1, block_size); +} +} +} + +void checkasm_check_hevc_add_res(void) +{ +int bit_depth; + +for (bit_depth = 8; bit_depth <= 10; bit_depth++) { +HEVCDSPContext h; + +ff_hevc_dsp_init(, bit_depth); +check_add_res(h, bit_depth); +} +report("add_residual"); +} -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] hevc/x86: Add add_residual
From: Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr> Initially written by Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr>, extended by James Almer <jamr...@gmail.com>. Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> --- libavcodec/x86/Makefile | 3 +- libavcodec/x86/hevc_res_add.asm | 391 libavcodec/x86/hevcdsp_init.c | 40 3 files changed, 433 insertions(+), 1 deletion(-) create mode 100644 libavcodec/x86/hevc_res_add.asm diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index a38535b..aa93e67 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -117,7 +117,8 @@ YASM-OBJS-$(CONFIG_DCA_DECODER)+= x86/dcadsp.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o\ x86/hevc_mc.o \ - x86/hevc_idct.o + x86/hevc_idct.o \ + x86/hevc_res_add.o YASM-OBJS-$(CONFIG_PNG_DECODER)+= x86/pngdsp.o YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm new file mode 100644 index 000..f8d9fd7 --- /dev/null +++ b/libavcodec/x86/hevc_res_add.asm @@ -0,0 +1,391 @@ +; * +; * Provide SIMD optimizations for add_residual functions for HEVC decoding +; * Copyright (c) 2014 Pierre-Edouard LEPERE +; * +; * This file is part of Libav. +; * +; * Libav is free software; you can redistribute it and/or +; * modify it under the terms of the GNU Lesser General Public +; * License as published by the Free Software Foundation; either +; * version 2.1 of the License, or (at your option) any later version. +; * +; * Libav is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; * Lesser General Public License for more details. +; * +; * You should have received a copy of the GNU Lesser General Public +; * License along with Libav; if not, write to the Free Software +; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +; ** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 +max_pixels_10: times 16 dw ((1 << 10)-1) + +SECTION .text + +; the add_res macros and functions were largely inspired by x264 project's code in the h264_idct.asm file +%macro ADD_RES_MMX_4_8 0 +mova m2, [r1] +mova m4, [r1+8] +pxor m3, m3 +psubw m3, m2 +packuswb m2, m2 +packuswb m3, m3 +pxor m5, m5 +psubw m5, m4 +packuswb m4, m4 +packuswb m5, m5 + +movh m0, [r0 ] +movh m1, [r0+r2 ] +paddusb m0, m2 +paddusb m1, m4 +psubusb m0, m3 +psubusb m1, m5 +movh [r0 ], m0 +movh [r0+r2 ], m1 +%endmacro + + +INIT_MMX mmxext +; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +cglobal hevc_add_residual_4_8, 3, 4, 6 +ADD_RES_MMX_4_8 +add r1, 16 +lea r0, [r0+r2*2] +ADD_RES_MMX_4_8 +RET + +%macro ADD_RES_SSE_8_8 0 +pxor m3, m3 +mova m4, [r1] +mova m6, [r1+16] +mova m0, [r1+32] +mova m2, [r1+48] +psubw m5, m3, m4 +psubw m7, m3, m6 +psubw m1, m3, m0 +packuswb m4, m0 +packuswb m5, m1 +psubw m3, m2 +packuswb m6, m2 +packuswb m7, m3 + +movqm0, [r0 ] +movqm1, [r0+r2 ] +movhps m0, [r0+r2*2] +movhps m1, [r0+r3 ] +paddusb m0, m4 +paddusb m1, m6 +psubusb m0, m5 +psubusb m1, m7 +movq [r0 ], m0 +movq [r0+r2 ], m1 +movhps [r0+2*r2], m0 +movhps [r0+r3 ], m1 +%endmacro + +%macro ADD_RES_SSE_16_32_8 3 +mova xm2, [r1+%1 ] +mova xm6, [r1+%1+16] +%if cpuflag(avx2) +vinserti128 m2, m2, [r1+%1+32], 1 +vinserti128 m6, m6, [r1+%1+48], 1 +%endif +%if cpuflag(avx) +psubw m1, m0, m2 +psubw m5, m0, m6 +%else +mova m1, m0 +mova
Re: [libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual
>> +#define randomize_buffers(buf, size)\ >> +do {\ >> +int j; \ >> +for (j = 0; j < size; j++) {\ >> +int16_t r = rnd(); \ >> +AV_WN16A(buf + j, r >> 3); \ >> +} \ >> +} while (0) > > We should stop duplicating these between checkasm modules some day. > You're welcome to help me refactor. As Martin already pointed out, all the ramdomize buffers are slightly different for all of hevc tests > >> +#define randomize_buffers2(buf, size) \ >> +do { \ >> +int j; \ >> +for (j = 0; j < size; j++) \ >> +AV_WN16A(buf + j * 2, (rnd() & 0xFF)); \ > > pointless (), align the \ > > What is the reason for writing 16-bits and throwing the upper half away? I'll better use 0x3FF. > >> +void checkasm_check_hevc_add_res(void) >> +{ >> +int bit_depth; >> + >> +for (bit_depth = 8; bit_depth <= 10; bit_depth++) { >> +HEVCDSPContext h; >> + >> +ff_hevc_dsp_init(, bit_depth); >> +check_add_res(h, bit_depth); >> +} > > I didn't see you add 9-bit versions of the assembly functions, why do > you test 9 bits? > Because there's no 9 bit SIMD function, it's not tested but the code looks simpler this way. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] hevc/x86: Add add_residual
From: Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr> Initially written by Pierre Edouard Lepere <pierre-edouard.lep...@insa-rennes.fr>, extended by James Almer <jamr...@gmail.com>. Signed-off-by: Alexandra Hájková <alexan...@khirnov.net> --- libavcodec/x86/Makefile | 3 +- libavcodec/x86/hevc_res_add.asm | 391 libavcodec/x86/hevcdsp_init.c | 40 3 files changed, 433 insertions(+), 1 deletion(-) create mode 100644 libavcodec/x86/hevc_res_add.asm diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index a38535b..aa93e67 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -117,7 +117,8 @@ YASM-OBJS-$(CONFIG_DCA_DECODER)+= x86/dcadsp.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o\ x86/hevc_mc.o \ - x86/hevc_idct.o + x86/hevc_idct.o \ + x86/hevc_res_add.o YASM-OBJS-$(CONFIG_PNG_DECODER)+= x86/pngdsp.o YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm new file mode 100644 index 000..1e3bfc2 --- /dev/null +++ b/libavcodec/x86/hevc_res_add.asm @@ -0,0 +1,391 @@ +; /* +; * Provide SIMD optimizations for add_residual functions for HEVC decoding +; * Copyright (c) 2014 Pierre-Edouard LEPERE +; * +; * This file is part of Libav. +; * +; * FFmpeg is free software; you can redistribute it and/or +; * modify it under the terms of the GNU Lesser General Public +; * License as published by the Free Software Foundation; either +; * version 2.1 of the License, or (at your option) any later version. +; * +; * FFmpeg is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; * Lesser General Public License for more details. +; * +; * You should have received a copy of the GNU Lesser General Public +; * License along with FFmpeg; if not, write to the Free Software +; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +; */ +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 +max_pixels_10: times 16 dw ((1 << 10)-1) + +SECTION .text + +; the add_res macros and functions were largely inspired by x264 project's code in the h264_idct.asm file +%macro ADD_RES_MMX_4_8 0 +mova m2, [r1] +mova m4, [r1+8] +pxor m3, m3 +psubw m3, m2 +packuswb m2, m2 +packuswb m3, m3 +pxor m5, m5 +psubw m5, m4 +packuswb m4, m4 +packuswb m5, m5 + +movh m0, [r0 ] +movh m1, [r0+r2 ] +paddusb m0, m2 +paddusb m1, m4 +psubusb m0, m3 +psubusb m1, m5 +movh [r0 ], m0 +movh [r0+r2 ], m1 +%endmacro + + +INIT_MMX mmxext +; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +cglobal hevc_add_residual_4_8, 3, 4, 6 +ADD_RES_MMX_4_8 +add r1, 16 +lea r0, [r0+r2*2] +ADD_RES_MMX_4_8 +RET + +%macro ADD_RES_SSE_8_8 0 +pxor m3, m3 +mova m4, [r1] +mova m6, [r1+16] +mova m0, [r1+32] +mova m2, [r1+48] +psubw m5, m3, m4 +psubw m7, m3, m6 +psubw m1, m3, m0 +packuswb m4, m0 +packuswb m5, m1 +psubw m3, m2 +packuswb m6, m2 +packuswb m7, m3 + +movqm0, [r0 ] +movqm1, [r0+r2 ] +movhps m0, [r0+r2*2] +movhps m1, [r0+r3 ] +paddusb m0, m4 +paddusb m1, m6 +psubusb m0, m5 +psubusb m1, m7 +movq [r0 ], m0 +movq [r0+r2 ], m1 +movhps [r0+2*r2], m0 +movhps [r0+r3 ], m1 +%endmacro + +%macro ADD_RES_SSE_16_32_8 3 +mova xm2, [r1+%1 ] +mova xm6, [r1+%1+16] +%if cpuflag(avx2) +vinserti128 m2, m2, [r1+%1+32], 1 +vinserti128 m6, m6, [r1+%1+48], 1 +%endif +%if cpuflag(avx) +psubw m1, m0, m2 +psubw m5, m0, m6 +%else +mova m1, m0 +mova m5, m0 +psubw m1, m2 +psubw m5, m6 +%endif +packuswb m2, m6 +packuswb m1, m5 + +mova
[libav-devel] [PATCH 2/2] checkasm: Add a test for HEVC add_residual
--- tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/hevc_add_res.c | 84 +++ 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 tests/checkasm/hevc_add_res.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 9b3df55..ac3e97e 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -12,7 +12,7 @@ AVCODECOBJS-$(CONFIG_VP8DSP)+= vp8dsp.o # decoders/encoders AVCODECOBJS-$(CONFIG_DCA_DECODER) += dcadsp.o synth_filter.o -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o hevc_idct.o +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_mc.o hevc_idct.o hevc_add_res.o AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 040c4eb..d0dc525 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -92,6 +92,7 @@ static const struct { #if CONFIG_HEVC_DECODER { "hevc_mc", checkasm_check_hevc_mc }, { "hevc_idct", checkasm_check_hevc_idct }, +{ "hevc_add_res", checkasm_check_hevc_add_res }, #endif #if CONFIG_HUFFYUVDSP { "huffyuvdsp", checkasm_check_huffyuvdsp }, diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 5a4c056..bacd6f4 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -39,6 +39,7 @@ void checkasm_check_fmtconvert(void); void checkasm_check_h264dsp(void); void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); +void checkasm_check_hevc_add_res(void); void checkasm_check_hevc_idct(void); void checkasm_check_hevc_mc(void); void checkasm_check_huffyuvdsp(void); diff --git a/tests/checkasm/hevc_add_res.c b/tests/checkasm/hevc_add_res.c new file mode 100644 index 000..fcc47c1 --- /dev/null +++ b/tests/checkasm/hevc_add_res.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016 Alexandra Hájková + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with Libav; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include "libavutil/intreadwrite.h" + +#include "libavcodec/hevcdsp.h" + +#include "checkasm.h" + +#define randomize_buffers(buf, size)\ +do {\ +int j; \ +for (j = 0; j < size; j++) {\ +int16_t r = rnd(); \ +AV_WN16A(buf + j, r >> 3); \ +} \ +} while (0) + +#define randomize_buffers2(buf, size) \ +do { \ +int j; \ +for (j = 0; j < size; j++) \ +AV_WN16A(buf + j * 2, (rnd() & 0xFF)); \ +} while (0) + +static void check_add_res(HEVCDSPContext h, int bit_depth) +{ +int i; +LOCAL_ALIGNED(32, int16_t, res0, [32 * 32]); +LOCAL_ALIGNED(32, int16_t, res1, [32 * 32]); +LOCAL_ALIGNED(32, uint8_t, dst0, [32 * 32 * 2]); +LOCAL_ALIGNED(32, uint8_t, dst1, [32 * 32 * 2]); + +for (i = 2; i <= 5; i++) { +int block_size = 1 << i; +int size = block_size * block_size; +declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, ptrdiff_t stride); + +randomize_buffers(res0, size); +randomize_buffers2(dst0, size * 2); +memcpy(res1, res0, sizeof(*res0) * size); +memcpy(dst1, dst0, size * 2); + +if (check_func(h.add_residual[i - 2], "add_res_%dx%d_%d", block_size, block_size, bit_depth)) { +call_ref(dst0, res0, block_size * 2); +call_new(dst1, res1, block_size * 2); +if (memcmp(dst0, dst1, size * 2)) +fail(); +bench_new(dst1, res1, block_size); +} +} +} + +void checkasm_check_hevc_add_res(void) +{ +int bit_depth; + +for (bit_depth = 8; bit_depth <= 10; bit_depth++) { +HEVCDSPContext h; + +ff_hevc_dsp_init(, bit_depth); +check_add_res(h, bit_depth); +} +report("add_residual"); +} -- 2.1.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] hevc: Add SSE2 and AVX IDCT
--- libavcodec/x86/hevc_idct.asm | 787 +++-- libavcodec/x86/hevcdsp_init.c | 54 ++- 2 files changed, 811 insertions(+), 30 deletions(-) diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm index d662aa9..f397cc1 100644 --- a/libavcodec/x86/hevc_idct.asm +++ b/libavcodec/x86/hevc_idct.asm @@ -2,6 +2,7 @@ ;* SIMD-optimized IDCT functions for HEVC decoding ;* Copyright (c) 2014 Pierre-Edouard LEPERE ;* Copyright (c) 2014 James Almer +;* Copyright (c) 2016 Alexandra Hájková ;* ;* This file is part of Libav. ;* @@ -22,6 +23,217 @@ %include "libavutil/x86/x86util.asm" +SECTION_RODATA + +pd_64: times 4 dd 64 +pd_2048: times 4 dd 2048 +pd_512: times 4 dd 512 + +; 4x4 transform coeffs +cextern pw_64 +pw_64_m64: times 4 dw 64, -64 +pw_83_36: times 4 dw 83, 36 +pw_36_m83: times 4 dw 36, -83 + +; 8x8 transform coeffs +pw_89_75: times 4 dw 89, 75 +pw_50_18: times 4 dw 50, 18 + +pw_75_m18: times 4 dw 75, -18 +pw_m89_m50: times 4 dw -89, -50 + +pw_50_m89: times 4 dw 50, -89 +pw_18_75: times 4 dw 18, 75 + +pw_18_m50: times 4 dw 18, -50 +pw_75_m89: times 4 dw 75, -89 + +; 16x16 transformation coeffs +trans_coeffs16: times 4 dw 90, 87 +times 4 dw 80, 70 +times 4 dw 57, 43 +times 4 dw 25, 9 + +times 4 dw 87, 57 +times 4 dw 9, -43 +times 4 dw -80, -90 +times 4 dw -70, -25 + +times 4 dw 80, 9 +times 4 dw -70, -87 +times 4 dw -25, 57 +times 4 dw 90, 43 + +times 4 dw 70, -43 +times 4 dw -87, 9 +times 4 dw 90, 25 +times 4 dw -80, -57 + +times 4 dw 57, -80 +times 4 dw -25, 90 +times 4 dw -9, -87 +times 4 dw 43, 70 + +times 4 dw 43, -90 +times 4 dw 57, 25 +times 4 dw -87, 70 +times 4 dw 9, -80 + +times 4 dw 25, -70 +times 4 dw 90, -80 +times 4 dw 43, 9 +times 4 dw -57, 87 + +times 4 dw 9, -25 +times 4 dw 43, -57 +times 4 dw 70, -80 +times 4 dw 87, -90 + +; 32x32 transform coeffs +trans_coeff32: times 8 dw 90 +times 4 dw 88, 85 +times 4 dw 82, 78 +times 4 dw 73, 67 +times 4 dw 61, 54 +times 4 dw 46, 38 +times 4 dw 31, 22 +times 4 dw 13, 4 + +times 4 dw 90, 82 +times 4 dw 67, 46 +times 4 dw 22, -4 +times 4 dw -31, -54 +times 4 dw -73, -85 +times 4 dw -90, -88 +times 4 dw -78, -61 +times 4 dw -38, -13 + +times 4 dw 88, 67 +times 4 dw 31, -13 +times 4 dw -54, -82 +times 4 dw -90, -78 +times 4 dw -46, -4 +times 4 dw 38, 73 +times 4 dw 90, 85 +times 4 dw 61, 22 + +times 4 dw 85, 46 +times 4 dw -13, -67 +times 4 dw -90, -73 +times 4 dw -22, 38 +times 4 dw 82, 88 +times 4 dw 54, -4 +times 4 dw -61, -90 +times 4 dw -78, -31 + +times 4 dw 82, 22 +times 4 dw -54, -90 +times 4 dw -61, 13 +times 4 dw 78, 85 +times 4 dw 31, -46 +times 4 dw -90, -67 +times 4 dw 4, 73 +times 4 dw 88, 38 + +times 4 dw 78, -4 +times 4 dw -82, -73 +times 4 dw 13, 85 +times 4 dw 67, -22 +times 4 dw -88, -61 +times 4 dw 31, 90 +times 4 dw 54, -38 +times 4 dw -90, -46 + +times 4 dw 73, -31 +times 4 dw -90, -22 +times 4 dw 78, 67 +times 4 dw -38, -90 +times 4 dw -13, 82 +times 4 dw 61, -46 +times 4 dw -88, -4 +times 4 dw 85, 54 + +times 4 dw 67, -54 +times 4 dw -78, 38 +times 4 dw 85, -22 +times 4 dw -90, 4 +times 4 dw 90, 13 +times 4 dw -88, -31 +times 4 dw 82, 46 +times 4 dw -73, -61 + +times 4 dw 61, -73 +times 4 dw -46, 82 +times 4 dw 31, -88 +times 4 dw -13, 90 +times 4 dw -4, -90 +times 4 dw 22, 85 +times 4 dw -38, -78 +times 4 dw 54, 67 + +times 4 dw 54, -85 +times 4 dw -4, 88 +times 4 dw -46, -61 +times 4 dw 82, 13 +times 4 dw -90, 38 +times 4 dw 67, -78 +times 4 dw -22, 90 +times 4 dw -31, -73 + +times 4 dw 46, -90 +times 4 dw 38, 54 +times 4 dw -90, 31 +times 4 dw 61, -88 +times 4 dw 22, 67 +times 4 dw -85, 13 +times 4 dw 73, -82 +times 4 dw 4, 78 + +times 4 dw 38, -88 +times 4 dw 73, -4 +times 4 dw -67, 90 +times 4 dw -46, -31 +times 4 dw 85, -78 +times 4 dw 13, 61 +times 4 dw -90, 54 +times 4 dw 22, -82 + +times 4 dw 31, -78 +times 4 dw 90, -61 +times 4 dw 4, 54 +times 4 dw -88, 82 +times 4 dw -38, -22 +times 4 dw 73, -90 +times 4 dw 67, -13 +times 4 dw -46, 85 + +times 4 dw 22, -61 +times 4 dw 85, -90 +times 4 dw 73, -38 +times 4 dw -4, 46 +times 4 dw -78, 90 +times 4 dw -82, 54 +times 4 dw -13, -31 +times 4 dw 67, -88 + +times 4 dw 13, -38 +times 4 dw 61, -78 +times 4 dw 88, -90 +times 4 dw 85, -73 +times 4 dw 54, -31 +times 4 dw 4, 22 +times 4 dw -46, 67 +times 4 dw -82, 90 + +times 4 dw 4, -13 +times 4 dw 22, -31 +times 4 dw 38, -46 +times 4 dw 54, -61 +times 4 dw 67, -73 +times 4 dw 78, -82 +times 4 dw 85, -88 +times 4 dw 90, -90 + section .text ; void ff_hevc_idctHxW_dc_{8,10}_(int16_t *coeffs) @@ -74,34 +286,565 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp RET %endmacro -; 8-bit -INIT_MMX mmxext -IDCT_DC_NL 4, 8 -IDCT_DC 8, 2, 8 +; IDCT 4x4, expects input in m0, m1 +; %1 - shift +; %2 - 1/0 - SCALE and Transpose or not +; %3 - 1/0 add constant or not +%macro TR_4x4 3 +; interleaves src0 with src2 to m0 +; and src1 with scr3 to m2 +; src0: 00 01 02 03 m0: 00 20 01 21 02 22 03 23 +; src1: 10 11 12 13 --> +
[libav-devel] [PATCH 1/2] hevc: Add SSE2 and AVX IDCT
--- Apply the review comments from the last review. libavcodec/x86/hevc_idct.asm | 792 +++-- libavcodec/x86/hevcdsp_init.c | 62 +++- 2 files changed, 825 insertions(+), 29 deletions(-) diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm index d662aa9..6606d46 100644 --- a/libavcodec/x86/hevc_idct.asm +++ b/libavcodec/x86/hevc_idct.asm @@ -2,6 +2,7 @@ ;* SIMD-optimized IDCT functions for HEVC decoding ;* Copyright (c) 2014 Pierre-Edouard LEPERE ;* Copyright (c) 2014 James Almer +;* Copyright (c) 2016 Alexandra Hájková ;* ;* This file is part of Libav. ;* @@ -22,6 +23,217 @@ %include "libavutil/x86/x86util.asm" +SECTION_RODATA + +pd_64: times 4 dd 64 +pd_2048: times 4 dd 2048 +pd_512: times 4 dd 512 + +; 4x4 transform coeffs +cextern pw_64 +pw_64_m64: times 4 dw 64, -64 +pw_83_36: times 4 dw 83, 36 +pw_36_m83: times 4 dw 36, -83 + +; 8x8 transform coeffs +pw_89_75: times 4 dw 89, 75 +pw_50_18: times 4 dw 50, 18 + +pw_75_m18: times 4 dw 75, -18 +pw_m89_m50: times 4 dw -89, -50 + +pw_50_m89: times 4 dw 50, -89 +pw_18_75: times 4 dw 18, 75 + +pw_18_m50: times 4 dw 18, -50 +pw_75_m89: times 4 dw 75, -89 + +; 16x16 transformation coeffs +trans_coeffs16: times 4 dw 90, 87 +times 4 dw 80, 70 +times 4 dw 57, 43 +times 4 dw 25, 9 + +times 4 dw 87, 57 +times 4 dw 9, -43 +times 4 dw -80, -90 +times 4 dw -70, -25 + +times 4 dw 80, 9 +times 4 dw -70, -87 +times 4 dw -25, 57 +times 4 dw 90, 43 + +times 4 dw 70, -43 +times 4 dw -87, 9 +times 4 dw 90, 25 +times 4 dw -80, -57 + +times 4 dw 57, -80 +times 4 dw -25, 90 +times 4 dw -9, -87 +times 4 dw 43, 70 + +times 4 dw 43, -90 +times 4 dw 57, 25 +times 4 dw -87, 70 +times 4 dw 9, -80 + +times 4 dw 25, -70 +times 4 dw 90, -80 +times 4 dw 43, 9 +times 4 dw -57, 87 + +times 4 dw 9, -25 +times 4 dw 43, -57 +times 4 dw 70, -80 +times 4 dw 87, -90 + +; 32x32 transform coeffs +trans_coeff32: times 8 dw 90 +times 4 dw 88, 85 +times 4 dw 82, 78 +times 4 dw 73, 67 +times 4 dw 61, 54 +times 4 dw 46, 38 +times 4 dw 31, 22 +times 4 dw 13, 4 + +times 4 dw 90, 82 +times 4 dw 67, 46 +times 4 dw 22, -4 +times 4 dw -31, -54 +times 4 dw -73, -85 +times 4 dw -90, -88 +times 4 dw -78, -61 +times 4 dw -38, -13 + +times 4 dw 88, 67 +times 4 dw 31, -13 +times 4 dw -54, -82 +times 4 dw -90, -78 +times 4 dw -46, -4 +times 4 dw 38, 73 +times 4 dw 90, 85 +times 4 dw 61, 22 + +times 4 dw 85, 46 +times 4 dw -13, -67 +times 4 dw -90, -73 +times 4 dw -22, 38 +times 4 dw 82, 88 +times 4 dw 54, -4 +times 4 dw -61, -90 +times 4 dw -78, -31 + +times 4 dw 82, 22 +times 4 dw -54, -90 +times 4 dw -61, 13 +times 4 dw 78, 85 +times 4 dw 31, -46 +times 4 dw -90, -67 +times 4 dw 4, 73 +times 4 dw 88, 38 + +times 4 dw 78, -4 +times 4 dw -82, -73 +times 4 dw 13, 85 +times 4 dw 67, -22 +times 4 dw -88, -61 +times 4 dw 31, 90 +times 4 dw 54, -38 +times 4 dw -90, -46 + +times 4 dw 73, -31 +times 4 dw -90, -22 +times 4 dw 78, 67 +times 4 dw -38, -90 +times 4 dw -13, 82 +times 4 dw 61, -46 +times 4 dw -88, -4 +times 4 dw 85, 54 + +times 4 dw 67, -54 +times 4 dw -78, 38 +times 4 dw 85, -22 +times 4 dw -90, 4 +times 4 dw 90, 13 +times 4 dw -88, -31 +times 4 dw 82, 46 +times 4 dw -73, -61 + +times 4 dw 61, -73 +times 4 dw -46, 82 +times 4 dw 31, -88 +times 4 dw -13, 90 +times 4 dw -4, -90 +times 4 dw 22, 85 +times 4 dw -38, -78 +times 4 dw 54, 67 + +times 4 dw 54, -85 +times 4 dw -4, 88 +times 4 dw -46, -61 +times 4 dw 82, 13 +times 4 dw -90, 38 +times 4 dw 67, -78 +times 4 dw -22, 90 +times 4 dw -31, -73 + +times 4 dw 46, -90 +times 4 dw 38, 54 +times 4 dw -90, 31 +times 4 dw 61, -88 +times 4 dw 22, 67 +times 4 dw -85, 13 +times 4 dw 73, -82 +times 4 dw 4, 78 + +times 4 dw 38, -88 +times 4 dw 73, -4 +times 4 dw -67, 90 +times 4 dw -46, -31 +times 4 dw 85, -78 +times 4 dw 13, 61 +times 4 dw -90, 54 +times 4 dw 22, -82 + +times 4 dw 31, -78 +times 4 dw 90, -61 +times 4 dw 4, 54 +times 4 dw -88, 82 +times 4 dw -38, -22 +times 4 dw 73, -90 +times 4 dw 67, -13 +times 4 dw -46, 85 + +times 4 dw 22, -61 +times 4 dw 85, -90 +times 4 dw 73, -38 +times 4 dw -4, 46 +times 4 dw -78, 90 +times 4 dw -82, 54 +times 4 dw -13, -31 +times 4 dw 67, -88 + +times 4 dw 13, -38 +times 4 dw 61, -78 +times 4 dw 88, -90 +times 4 dw 85, -73 +times 4 dw 54, -31 +times 4 dw 4, 22 +times 4 dw -46, 67 +times 4 dw -82, 90 + +times 4 dw 4, -13 +times 4 dw 22, -31 +times 4 dw 38, -46 +times 4 dw 54, -61 +times 4 dw 67, -73 +times 4 dw 78, -82 +times 4 dw 85, -88 +times 4 dw 90, -90 + section .text ; void ff_hevc_idctHxW_dc_{8,10}_(int16_t *coeffs) @@ -74,34 +286,572 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp RET %endmacro -; 8-bit -INIT_MMX mmxext -IDCT_DC_NL 4, 8 -IDCT_DC 8, 2, 8 +; IDCT 4x4, expects input in m0, m1 +; %1 - shift +; %2 - 1/0 - SCALE and Transpose or not +; %3 - 1/0 add constant or not +%macro TR_4x4 3 +; interleaves src0 with src2 to m0 +; and src1 with scr3 to m2 +; src0: 00 01 02 03 m0: 00 20 0
Re: [libav-devel] [PATCH 1/2] hevc: Add SSE2 and AVX IDCT
On Fri, Oct 7, 2016 at 12:32 AM, Diego Biurrun <di...@biurrun.de> wrote: > On Wed, Oct 05, 2016 at 02:04:31PM +0200, Alexandra Hájková wrote: >> --- a/libavcodec/x86/hevc_idct.asm >> +++ b/libavcodec/x86/hevc_idct.asm >> @@ -74,34 +286,578 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp >> >> INIT_XMM sse2 >> +IDCT_DC_NL 8, %1 >> +IDCT_DC16, 4, %1 >> +IDCT_DC32, 16, %1 >> +%if %1 == 8 >> +TRANSPOSE_8x8 >> +%if ARCH_X86_64 >> +TRANSPOSE_16x16 >> +TRANSPOSE_32x32 >> +%endif >> +%endif >> +%define transpose_8x8 hevc_idct_transpose_8x8_sse2 >> +%if ARCH_X86_64 >> +%define transpose_16x16 hevc_idct_transpose_16x16_sse2 >> +%define transpose_32x32 hevc_idct_transpose_32x32_sse2 >> +IDCT_32x32 %1 >> +IDCT_16x16 %1 >> +%endif > > There should be no need to redefine the transpose functions, just call > the right one with the help of the cpuname macro. The traspose functions are called by IDCT_size*size macros and the macro itself is the same for avx and sse2. I think the only way to avoid this define is to group the init by SIMD instead of grouping it by bitdepth but what to do with the bitdepth then? So I think it would be better to leave the define as it is. > >> --- a/libavcodec/x86/hevcdsp_init.c >> +++ b/libavcodec/x86/hevcdsp_init.c >> @@ -329,6 +361,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int >> bit_depth) >> #if HAVE_AVX_EXTERNAL >> SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv); >> SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv); >> + >> +c->idct[0] = ff_hevc_idct_4x4_8_avx; >> +c->idct[1] = ff_hevc_idct_8x8_8_avx; >> +c->idct[2] = ff_hevc_idct_16x16_8_avx; >> +c->idct[3] = ff_hevc_idct_32x32_8_avx; >> #endif /* HAVE_AVX_EXTERNAL */ >> } > > Only the parts that are explicitly ifdeffed above within this very file > should be ifdeffed here. Add these below the ifdef. > Ok, will be done. >> @@ -354,6 +397,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int >> bit_depth) >> SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h); >> SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v); >> SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv); >> + >> +c->idct[0] = ff_hevc_idct_4x4_10_avx; >> +c->idct[1] = ff_hevc_idct_8x8_10_avx; >> +c->idct[2] = ff_hevc_idct_16x16_10_avx; >> +c->idct[3] = ff_hevc_idct_32x32_10_avx; >> + >> #endif /* HAVE_AVX_EXTERNAL */ >> } >> if (EXTERNAL_AVX2(cpu_flags)) { > > same > > Diego > ___ > libav-devel mailing list > libav-devel@libav.org > https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel