This patch is based on the stream assignment code in webmdashenc. Additional changes: * Default to one AdaptationSet per stream
Previously all mapped streams of a media type (video, audio) where assigned to a single AdaptationSet. Using the DASH live profile it is mandatory, that the segments of all representations are aligned, which is currently not enforced. This leads to problems when using video streams with different key frame intervals. So to play safe, default to one AdaptationSet per stream, unless overwritten by explicit assignment * Make sure all streams are assigned to exactly one AdaptationSet Signed-off-by: Peter Große <pe...@friiks.de> --- libavformat/dashenc.c | 193 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 162 insertions(+), 31 deletions(-) diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c index 1b12d4f..c561ad1 100644 --- a/libavformat/dashenc.c +++ b/libavformat/dashenc.c @@ -24,6 +24,7 @@ #include <unistd.h> #endif +#include "libavutil/avutil.h" #include "libavutil/avstring.h" #include "libavutil/eval.h" #include "libavutil/intreadwrite.h" @@ -58,9 +59,15 @@ typedef struct Segment { int n; } Segment; +typedef struct AdaptationSet { + char id[10]; + enum AVMediaType media_type; + AVDictionary *metadata; +} AdaptationSet; + typedef struct OutputStream { AVFormatContext *ctx; - int ctx_inited; + int ctx_inited, as_idx; uint8_t iobuf[32768]; AVIOContext *out; int duration_written; @@ -79,6 +86,9 @@ typedef struct OutputStream { typedef struct DASHContext { const AVClass *class; /* Class for private options. */ + char *adaptation_sets; + AdaptationSet *as; + int nb_as; int window_size; int extra_window_size; int min_seg_duration; @@ -87,7 +97,7 @@ typedef struct DASHContext { int use_timeline; int single_file; OutputStream *streams; - int has_video, has_audio; + int has_video; int64_t last_duration; int64_t total_duration; char availability_start_time[100]; @@ -176,6 +186,16 @@ static void dash_free(AVFormatContext *s) { DASHContext *c = s->priv_data; int i, j; + + if (c->as) { + for (i = 0; i < c->nb_as; i++) { + if (&c->as[i].metadata) + av_dict_free(&c->as[i].metadata); + } + av_freep(&c->as); + c->nb_as = 0; + } + if (!c->streams) return; for (i = 0; i < s->nb_streams; i++) { @@ -436,12 +456,144 @@ static void format_date_now(char *buf, int size) } } +static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_index) +{ + DASHContext *c = s->priv_data; + AdaptationSet *as = &c->as[as_index]; + int i; + + avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", + as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio"); + + for (i = 0; i < s->nb_streams; i++) { + OutputStream *os = &c->streams[i]; + + if (os->as_idx - 1 != as_index) + continue; + + if (as->media_type == AVMEDIA_TYPE_VIDEO) { + avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n", + i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height); + } else { + avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n", + i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate); + avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n", + s->streams[i]->codecpar->channels); + } + output_segment_list(os, out, c); + avio_printf(out, "\t\t\t</Representation>\n"); + } + avio_printf(out, "\t\t</AdaptationSet>\n"); + + return 0; +} + +static int parse_adaptation_sets(AVFormatContext *s) +{ + DASHContext *c = s->priv_data; + char *p = c->adaptation_sets; + char *q; + enum { new_set, parse_id, parsing_streams } state; + int i; + + /* default: one AdaptationSet for each stream */ + if (!p) { + void *mem = av_mallocz(sizeof(*c->as) * s->nb_streams); + if (!mem) + return AVERROR(ENOMEM); + c->as = mem; + c->nb_as = s->nb_streams; + + for (i = 0; i < s->nb_streams; i++) { + AdaptationSet *as = &c->as[i]; + OutputStream *os = &c->streams[i]; + snprintf(as->id, sizeof(as->id), "%d", i); + as->metadata = NULL; + as->media_type = s->streams[i]->codecpar->codec_type; + os->as_idx = i + 1; + } + return 0; + } + + /* syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on */ + state = new_set; + while (p < c->adaptation_sets + strlen(c->adaptation_sets)) { + if (*p == ' ') { + continue; + } else if (state == new_set && !strncmp(p, "id=", 3)) { + AdaptationSet *as; + void *mem = av_realloc(c->as, sizeof(*c->as) * (c->nb_as + 1)); + if (!mem) + return AVERROR(ENOMEM); + c->as = mem; + ++c->nb_as; + + as = &c->as[c->nb_as - 1]; + as->metadata = NULL; + as->media_type = AVMEDIA_TYPE_UNKNOWN; + + p += 3; // consume "id=" + q = as->id; + while (*p != ',') *q++ = *p++; + *q = 0; + p++; + state = parse_id; + } else if (state == parse_id && !strncmp(p, "streams=", 8)) { + p += 8; // consume "streams=" + state = parsing_streams; + } else if (state == parsing_streams) { + struct AdaptationSet *as = &c->as[c->nb_as - 1]; + OutputStream *os; + char *stream_identifier; + + q = p; + while (*q != '\0' && *q != ',' && *q != ' ') q++; + stream_identifier = av_strndup(p, q - p); + + i = strtol(stream_identifier, NULL, 10); + if (i < 0 || i >= s->nb_streams) { + av_log(s, AV_LOG_ERROR, "Selected stream \"%s\" not found!\n", stream_identifier); + return -1; + } + + os = &c->streams[i]; + if (as->media_type == AVMEDIA_TYPE_UNKNOWN) { + as->media_type = s->streams[i]->codecpar->codec_type; + } else if (as->media_type != s->streams[i]->codecpar->codec_type) { + av_log(s, AV_LOG_ERROR, "Mixing codec types within an AdaptationSet is not allowed\n"); + return -1; + } else if (os->as_idx) { + av_log(s, AV_LOG_ERROR, "Assigning a stream to more than one AdaptationSet is not allowed\n"); + return -1; + } + os->as_idx = c->nb_as; + av_free(stream_identifier); + + if (*q == '\0') break; + if (*q == ' ') state = new_set; + p = ++q; + } else { + return -1; + } + } + + /* check for unassigned streams */ + for (i = 0; i < s->nb_streams; i++) { + OutputStream *os = &c->streams[i]; + if (!os->as_idx) { + av_log(s, AV_LOG_ERROR, "Stream %d is not mapped to an AdaptationSet\n", i); + return -1; + } + } + return 0; +} + static int write_manifest(AVFormatContext *s, int final) { DASHContext *c = s->priv_data; AVIOContext *out; char temp_filename[1024]; - int ret, i, as_id = 0; + int ret, i; AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0); snprintf(temp_filename, sizeof(temp_filename), "%s.tmp", s->filename); @@ -506,32 +658,9 @@ static int write_manifest(AVFormatContext *s, int final) avio_printf(out, "\t<Period id=\"0\" start=\"PT0.0S\">\n"); } - if (c->has_video) { - avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++); - for (i = 0; i < s->nb_streams; i++) { - AVStream *st = s->streams[i]; - OutputStream *os = &c->streams[i]; - if (st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) - continue; - avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codecpar->width, st->codecpar->height); - output_segment_list(&c->streams[i], out, c); - avio_printf(out, "\t\t\t</Representation>\n"); - } - avio_printf(out, "\t\t</AdaptationSet>\n"); - } - if (c->has_audio) { - avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"audio\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++); - for (i = 0; i < s->nb_streams; i++) { - AVStream *st = s->streams[i]; - OutputStream *os = &c->streams[i]; - if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO) - continue; - avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codecpar->sample_rate); - avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n", st->codecpar->channels); - output_segment_list(&c->streams[i], out, c); - avio_printf(out, "\t\t\t</Representation>\n"); - } - avio_printf(out, "\t\t</AdaptationSet>\n"); + for (i = 0; i < c->nb_as; i++) { + if ((ret = write_adaptation_set(s, out, i)) < 0) + return ret; } avio_printf(out, "\t</Period>\n"); avio_printf(out, "</MPD>\n"); @@ -579,6 +708,9 @@ static int dash_write_header(AVFormatContext *s) goto fail; } + if ((ret = parse_adaptation_sets(s)) < 0) + goto fail; + for (i = 0; i < s->nb_streams; i++) { OutputStream *os = &c->streams[i]; AVFormatContext *ctx; @@ -669,8 +801,6 @@ static int dash_write_header(AVFormatContext *s) s->avoid_negative_ts = ctx->avoid_negative_ts; if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) c->has_video = 1; - else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) - c->has_audio = 1; set_codec_str(s, st->codecpar, os->codec_str, sizeof(os->codec_str)); os->first_pts = AV_NOPTS_VALUE; @@ -997,6 +1127,7 @@ static int dash_write_trailer(AVFormatContext *s) #define OFFSET(x) offsetof(DASHContext, x) #define E AV_OPT_FLAG_ENCODING_PARAM static const AVOption options[] = { + { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, { "window_size", "number of segments kept in the manifest", OFFSET(window_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E }, { "extra_window_size", "number of segments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E }, { "min_seg_duration", "minimum segment duration (in microseconds)", OFFSET(min_seg_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E }, -- 2.10.2 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel