encode: restructure the core encoding code

James Almer Thu, 26 Mar 2020 13:08:23 -0700

This commit follows the same logic as 061a0c14bb, but for the encode API: The
new public encoding API will no longer be a wrapper around the old deprecated
one, and the internal API used by the encoders now consists of a single
receive_packet() callback that pulls frames as required.


Signed-off-by: James Almer <[email protected]>
---
 libavcodec/avcodec.h  |  12 +-
 libavcodec/decode.c   |   1 -
 libavcodec/encode.c   | 282 ++++++++++++++++++++++++++++++++----------
 libavcodec/encode.h   |  39 ++++++
 libavcodec/internal.h |   7 +-
 libavcodec/utils.c    |  10 +-
 6 files changed, 272 insertions(+), 79 deletions(-)
 create mode 100644 libavcodec/encode.h

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 78c483c25c..9e41321772 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3653,14 +3653,10 @@ typedef struct AVCodec {
     int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket 
*avpkt);
     int (*close)(AVCodecContext *);
     /**
-     * Encode API with decoupled packet/frame dataflow. The API is the
-     * same as the avcodec_ prefixed APIs (avcodec_send_frame() etc.), except
-     * that:
-     * - never called if the codec is closed or the wrong type,
-     * - if AV_CODEC_CAP_DELAY is not set, drain frames are never sent,
-     * - only one drain frame is ever passed down,
-     */
-    int (*send_frame)(AVCodecContext *avctx, const AVFrame *frame);
+     * Encode API with decoupled frame/packet dataflow. This function is called
+     * to get one output packet. It should call ff_encode_get_frame() to obtain
+     * input data.
+     */
     int (*receive_packet)(AVCodecContext *avctx, AVPacket *avpkt);
 
     /**
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index af6bb3f952..22f0f8acb0 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -2039,7 +2039,6 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
     av_frame_unref(avci->buffer_frame);
     av_frame_unref(avci->compat_decode_frame);
     av_packet_unref(avci->buffer_pkt);
-    avci->buffer_pkt_valid = 0;
 
     av_packet_unref(avci->ds.in_pkt);
 
diff --git a/libavcodec/encode.c b/libavcodec/encode.c
index 9ed2cf0f59..4856f41635 100644
--- a/libavcodec/encode.c
+++ b/libavcodec/encode.c
@@ -26,6 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #include "avcodec.h"
+#include "encode.h"
 #include "frame_thread_encoder.h"
 #include "internal.h"
 
@@ -78,14 +79,10 @@ int ff_alloc_packet(AVPacket *avpkt, int size)
 /**
  * Pad last frame with silence.
  */
-static int pad_last_frame(AVCodecContext *s, AVFrame **dst, const AVFrame *src)
+static int pad_last_frame(AVCodecContext *s, AVFrame *frame, const AVFrame 
*src)
 {
-    AVFrame *frame = NULL;
     int ret;
 
-    if (!(frame = av_frame_alloc()))
-        return AVERROR(ENOMEM);
-
     frame->format         = src->format;
     frame->channel_layout = src->channel_layout;
     frame->channels       = src->channels;
@@ -106,12 +103,10 @@ static int pad_last_frame(AVCodecContext *s, AVFrame 
**dst, const AVFrame *src)
                                       s->channels, s->sample_fmt)) < 0)
         goto fail;
 
-    *dst = frame;
-
     return 0;
 
 fail:
-    av_frame_free(&frame);
+    av_frame_unref(frame);
     return ret;
 }
 
@@ -182,7 +177,11 @@ int attribute_align_arg 
avcodec_encode_audio2(AVCodecContext *avctx,
             }
 
             if (frame->nb_samples < avctx->frame_size) {
-                ret = pad_last_frame(avctx, &padded_frame, frame);
+                if (!(padded_frame = av_frame_alloc())) {
+                    ret = AVERROR(ENOMEM);
+                    goto end;
+                }
+                ret = pad_last_frame(avctx, padded_frame, frame);
                 if (ret < 0)
                     goto end;
 
@@ -359,101 +358,248 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, 
uint8_t *buf, int buf_size,
     return ret;
 }
 
-static int do_encode(AVCodecContext *avctx, const AVFrame *frame, int 
*got_packet)
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame)
 {
+    AVCodecInternal *avci = avctx->internal;
+
+    if (avci->draining)
+        return AVERROR_EOF;
+
+    if (!avci->buffer_frame->buf[0])
+        return AVERROR(EAGAIN);
+
+    av_frame_move_ref(frame, avci->buffer_frame);
+
+    return 0;
+}
+
+static int encode_simple_internal(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    AVCodecInternal   *avci = avctx->internal;
+    EncodeSimpleContext *es = &avci->es;
+    AVFrame          *frame = es->in_frame;
+    int got_packet;
     int ret;
-    *got_packet = 0;
 
-    av_packet_unref(avctx->internal->buffer_pkt);
-    avctx->internal->buffer_pkt_valid = 0;
+    if (avci->draining_done)
+        return AVERROR_EOF;
 
-    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
-        ret = avcodec_encode_video2(avctx, avctx->internal->buffer_pkt,
-                                    frame, got_packet);
-    } else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
-        ret = avcodec_encode_audio2(avctx, avctx->internal->buffer_pkt,
-                                    frame, got_packet);
-    } else {
-        ret = AVERROR(EINVAL);
+    if (!frame->buf[0] && !avci->draining) {
+        av_frame_unref(frame);
+        ret = ff_encode_get_frame(avctx, frame);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    if (!frame->buf[0]) {
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+              avctx->active_thread_type & FF_THREAD_FRAME))
+            return AVERROR_EOF;
+
+        // Flushing is signaled with a NULL frame
+        frame = NULL;
+    }
+
+    got_packet = 0;
+
+    av_assert0(avctx->codec->encode2);
+
+    if (CONFIG_FRAME_THREAD_ENCODER &&
+        avci->frame_thread_encoder && 
(avctx->active_thread_type&FF_THREAD_FRAME))
+        ret = ff_thread_video_encode_frame(avctx, avpkt, frame, &got_packet);
+    else
+        ret = avctx->codec->encode2(avctx, avpkt, frame, &got_packet);
+
+    av_assert0(ret <= 0);
+
+    emms_c();
+
+    if (!ret && got_packet) {
+        if (avpkt->data) {
+            ret = av_packet_make_refcounted(avpkt);
+            if (ret < 0)
+                goto end;
+        }
+
+        if (frame && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+                       avctx->active_thread_type & FF_THREAD_FRAME)) {
+            if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+                avpkt->pts = avpkt->dts = frame->pts;
+            } else if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+                if (avpkt->pts == AV_NOPTS_VALUE)
+                    avpkt->pts = frame->pts;
+                if (!avpkt->duration)
+                    avpkt->duration = ff_samples_to_time_base(avctx,
+                                                              
frame->nb_samples);
+            }
+        }
+        if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+            /* NOTE: if we add any audio encoders which output non-keyframe 
packets,
+             *       this needs to be moved to the encoders, but for now we 
can do it
+             *       here to simplify things */
+            avpkt->flags |= AV_PKT_FLAG_KEY;
+            avpkt->dts = avpkt->pts;
+        }
+    }
+
+    if (avci->draining && !got_packet)
+        avci->draining_done = 1;
+
+end:
+    if (ret < 0 || !got_packet)
+        av_packet_unref(avpkt);
+
+    if (frame) {
+        if (!ret)
+            avctx->frame_number++;
+        av_frame_unref(frame);
     }
 
-    if (ret >= 0 && *got_packet) {
+    if (got_packet)
         // Encoders must always return ref-counted buffers.
         // Side-data only packets have no data and can be not ref-counted.
-        av_assert0(!avctx->internal->buffer_pkt->data || 
avctx->internal->buffer_pkt->buf);
-        avctx->internal->buffer_pkt_valid = 1;
-        ret = 0;
-    } else {
-        av_packet_unref(avctx->internal->buffer_pkt);
+        av_assert0(!avpkt->data || avpkt->buf);
+
+    return ret;
+}
+
+static int encode_simple_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    int ret;
+
+    while (!avpkt->data && !avpkt->side_data) {
+        ret = encode_simple_internal(avctx, avpkt);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int encode_receive_packet_internal(AVCodecContext *avctx, AVPacket 
*avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    av_assert0(!avpkt->data && !avpkt->side_data);
+
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if ((avctx->flags & AV_CODEC_FLAG_PASS1) && avctx->stats_out)
+            avctx->stats_out[0] = '\0';
+        if (av_image_check_size2(avctx->width, avctx->height, 
avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx))
+            return AVERROR(EINVAL);
     }
 
+    if (avctx->codec->receive_packet) {
+        ret = avctx->codec->receive_packet(avctx, avpkt);
+        if (!ret)
+            // Encoders must always return ref-counted buffers.
+            // Side-data only packets have no data and can be not ref-counted.
+            av_assert0(!avpkt->data || avpkt->buf);
+    } else
+        ret = encode_simple_receive_packet(avctx, avpkt);
+
+    if (ret == AVERROR_EOF)
+        avci->draining_done = 1;
+
     return ret;
 }
 
+static int encode_send_frame_internal(AVCodecContext *avctx, const AVFrame 
*src)
+{
+    AVCodecInternal *avci = avctx->internal;
+    AVFrame *dst = avci->buffer_frame;
+    int ret;
+
+    if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+        /* extract audio service type metadata */
+        AVFrameSideData *sd = av_frame_get_side_data(src, 
AV_FRAME_DATA_AUDIO_SERVICE_TYPE);
+        if (sd && sd->size >= sizeof(enum AVAudioServiceType))
+            avctx->audio_service_type = *(enum AVAudioServiceType*)sd->data;
+
+        /* check for valid frame size */
+        if (avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME) {
+            if (src->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "more samples than frame size\n");
+                return AVERROR(EINVAL);
+            }
+        } else if (!(avctx->codec->capabilities & 
AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) {
+            /* if we already got an undersized frame, that must have been the 
last */
+            if (avctx->internal->last_audio_frame) {
+                av_log(avctx, AV_LOG_ERROR, "frame_size (%d) was not respected 
for a non-last frame\n", avctx->frame_size);
+                return AVERROR(EINVAL);
+            }
+
+            if (src->nb_samples < avctx->frame_size) {
+                ret = pad_last_frame(avctx, dst, src);
+                if (ret < 0)
+                    return ret;
+
+                avctx->internal->last_audio_frame = 1;
+            } else if (src->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "nb_samples (%d) != frame_size 
(%d)\n", dst->nb_samples, avctx->frame_size);
+                return AVERROR(EINVAL);
+            }
+        }
+    }
+
+    if (!dst->data[0]) {
+        ret = av_frame_ref(dst, src);
+        if (ret < 0)
+             return ret;
+    }
+
+    return 0;
+}
+
 int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const 
AVFrame *frame)
 {
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
     if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
         return AVERROR(EINVAL);
 
-    if (avctx->internal->draining)
+    if (avci->draining)
         return AVERROR_EOF;
 
-    if (!frame) {
-        avctx->internal->draining = 1;
+    if (avci->buffer_frame->data[0])
+        return AVERROR(EAGAIN);
 
-        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
-            return 0;
+    if (!frame) {
+        avci->draining = 1;
+    } else {
+        ret = encode_send_frame_internal(avctx, frame);
+        if (ret < 0)
+            return ret;
     }
 
-    if (avctx->codec->send_frame)
-        return avctx->codec->send_frame(avctx, frame);
-
-    // Emulation via old API. Do it here instead of avcodec_receive_packet, 
because:
-    // 1. if the AVFrame is not refcounted, the copying will be much more
-    //    expensive than copying the packet data
-    // 2. assume few users use non-refcounted AVPackets, so usually no copy is
-    //    needed
-
-    if (avctx->internal->buffer_pkt_valid)
-        return AVERROR(EAGAIN);
+    if (!avci->buffer_pkt->data && !avci->buffer_pkt->side_data) {
+        ret = encode_receive_packet_internal(avctx, avci->buffer_pkt);
+        if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+            return ret;
+    }
 
-    return do_encode(avctx, frame, &(int){0});
+    return 0;
 }
 
 int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket 
*avpkt)
 {
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
     av_packet_unref(avpkt);
 
     if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
         return AVERROR(EINVAL);
 
-    if (avctx->codec->receive_packet) {
-        int ret;
-        if (avctx->internal->draining && !(avctx->codec->capabilities & 
AV_CODEC_CAP_DELAY))
-            return AVERROR_EOF;
-        ret = avctx->codec->receive_packet(avctx, avpkt);
-        if (!ret)
-            // Encoders must always return ref-counted buffers.
-            // Side-data only packets have no data and can be not ref-counted.
-            av_assert0(!avpkt->data || avpkt->buf);
-        return ret;
-    }
-
-    // Emulation via old API.
-
-    if (!avctx->internal->buffer_pkt_valid) {
-        int got_packet;
-        int ret;
-        if (!avctx->internal->draining)
-            return AVERROR(EAGAIN);
-        ret = do_encode(avctx, NULL, &got_packet);
+    if (avci->buffer_pkt->data || avci->buffer_pkt->side_data) {
+        av_packet_move_ref(avpkt, avci->buffer_pkt);
+    } else {
+        ret = encode_receive_packet_internal(avctx, avpkt);
         if (ret < 0)
             return ret;
-        if (ret >= 0 && !got_packet)
-            return AVERROR_EOF;
     }
 
-    av_packet_move_ref(avpkt, avctx->internal->buffer_pkt);
-    avctx->internal->buffer_pkt_valid = 0;
     return 0;
 }
diff --git a/libavcodec/encode.h b/libavcodec/encode.h
new file mode 100644
index 0000000000..dfa9cb2d97
--- /dev/null
+++ b/libavcodec/encode.h
@@ -0,0 +1,39 @@
+/*
+ * generic encoding-related code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ENCODE_H
+#define AVCODEC_ENCODE_H
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+
+/**
+ * Called by encoders to get the next frame for encoding.
+ *
+ * @param frame An empty frame to be filled with data.
+ * @return 0 if a new reference has been successfully written to frame
+ *         AVERROR(EAGAIN) if no data is currently available
+ *         AVERROR_EOF if end of stream has been reached, so no more data
+ *                     will be available
+ */
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame);
+
+#endif /* AVCODEC_ENCODE_H */
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index bccd9222d4..8b97e08e9f 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -132,6 +132,10 @@ typedef struct DecodeFilterContext {
     int         nb_bsfs;
 } DecodeFilterContext;
 
+typedef struct EncodeSimpleContext {
+    AVFrame *in_frame;
+} EncodeSimpleContext;
+
 typedef struct AVCodecInternal {
     /**
      * Whether the parent AVCodecContext is a copy of the context which had
@@ -171,6 +175,8 @@ typedef struct AVCodecInternal {
     DecodeSimpleContext ds;
     DecodeFilterContext filter;
 
+    EncodeSimpleContext es;
+
     /**
      * Properties (timestamps+side data) extracted from the last packet passed
      * for decoding.
@@ -204,7 +210,6 @@ typedef struct AVCodecInternal {
      * buffers for using new encode/decode API through legacy API
      */
     AVPacket *buffer_pkt;
-    int buffer_pkt_valid; // encoding: packet without data can be valid
     AVFrame *buffer_frame;
     int draining_done;
     /* set to 1 when the caller is using the old decoding API */
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 4ab8cff4f5..704c185248 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -93,7 +93,7 @@ void av_fast_padded_mallocz(void *ptr, unsigned int *size, 
size_t min_size)
 
 int av_codec_is_encoder(const AVCodec *codec)
 {
-    return codec && (codec->encode_sub || codec->encode2 ||codec->send_frame);
+    return codec && (codec->encode_sub || codec->encode2 || 
codec->receive_packet);
 }
 
 int av_codec_is_decoder(const AVCodec *codec)
@@ -607,6 +607,12 @@ int attribute_align_arg avcodec_open2(AVCodecContext 
*avctx, const AVCodec *code
         goto free_and_end;
     }
 
+    avci->es.in_frame = av_frame_alloc();
+    if (!avctx->internal->es.in_frame) {
+        ret = AVERROR(ENOMEM);
+        goto free_and_end;
+    }
+
     avci->buffer_pkt = av_packet_alloc();
     if (!avci->buffer_pkt) {
         ret = AVERROR(ENOMEM);
@@ -1074,6 +1080,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         av_packet_free(&avci->last_pkt_props);
 
         av_packet_free(&avci->ds.in_pkt);
+        av_frame_free(&avci->es.in_frame);
         ff_decode_bsfs_uninit(avctx);
 
         av_freep(&avci->pool);
@@ -1129,6 +1136,7 @@ av_cold int avcodec_close(AVCodecContext *avctx)
         av_packet_free(&avctx->internal->last_pkt_props);
 
         av_packet_free(&avctx->internal->ds.in_pkt);
+        av_frame_free(&avctx->internal->es.in_frame);
 
         for (i = 0; i < FF_ARRAY_ELEMS(pool->pools); i++)
             av_buffer_pool_uninit(&pool->pools[i]);
-- 
2.25.2

_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2 v3] avcodec/encode: restructure the core encoding code

Reply via email to