From 4241d3dedc6a69d14edf928b599b1b36982fc232 Mon Sep 17 00:00:00 2001
From: davemevans <dave.evans@m2amedia.tv>
Date: Mon, 12 Oct 2020 16:48:09 +0100
Subject: [PATCH] avformat/webvttdec,enc: correctly decode files containing
 STYLE, REGION blocks

Add the ability to extract cues from files containing one or more WebVTT
region definition block or WebVTT style block. Previously the decoder would
misinterpret these and fail to parse any cues at all. Also add the ability to
write these, or alternative header information, back out in the encoder using
the codec extradata.
---
 libavformat/webvttdec.c                   | 16 ++++++++++++++--
 libavformat/webvttenc.c                   | 12 ++++++++++++
 tests/fate/subtitles.mak                  |  3 +++
 tests/ref/fate/sub-webvtt-styleandregions | 19 +++++++++++++++++++
 4 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 tests/ref/fate/sub-webvtt-styleandregions

diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c
index 8d2fdfed37..c6cc367383 100644
--- a/libavformat/webvttdec.c
+++ b/libavformat/webvttdec.c
@@ -60,7 +60,7 @@ static int64_t read_ts(const char *s)
 static int webvtt_read_header(AVFormatContext *s)
 {
     WebVTTContext *webvtt = s->priv_data;
-    AVBPrint cue;
+    AVBPrint cue, header;
     int res = 0;
     AVStream *st = avformat_new_stream(s, NULL);
 
@@ -72,6 +72,7 @@ static int webvtt_read_header(AVFormatContext *s)
     st->disposition |= webvtt->kind;
 
     av_bprint_init(&cue,    0, AV_BPRINT_SIZE_UNLIMITED);
+    av_bprint_init(&header, 0, AV_BPRINT_SIZE_UNLIMITED);
 
     for (;;) {
         int i;
@@ -89,12 +90,18 @@ static int webvtt_read_header(AVFormatContext *s)
         p = identifier = cue.str;
         pos = avio_tell(s->pb);
 
-        /* ignore header chunk */
+        /* ignore the magic word and any comments */
         if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) ||
             !strncmp(p, "WEBVTT", 6) ||
             !strncmp(p, "NOTE", 4))
             continue;
 
+        /* store the style and region blocks from the header */
+        if (!strncmp(p, "STYLE", 5) || !strncmp(p, "REGION", 6)) {
+            av_bprintf(&header, "%s%s", header.len ? "\n\n" : "", p);
+            continue;
+        }
+
         /* optional cue identifier (can be a number like in SRT or some kind of
          * chaptering id) */
         for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
@@ -161,12 +168,17 @@ static int webvtt_read_header(AVFormatContext *s)
         SET_SIDE_DATA(settings,   AV_PKT_DATA_WEBVTT_SETTINGS);
     }
 
+    res = ff_bprint_to_codecpar_extradata(st->codecpar, &header);
+    if (res < 0)
+        goto end;
+
     ff_subtitles_queue_finalize(s, &webvtt->q);
 
 end:
     if (res < 0)
         ff_subtitles_queue_clean(&webvtt->q);
     av_bprint_finalize(&cue,    NULL);
+    av_bprint_finalize(&header, NULL);
     return res;
 }
 
diff --git a/libavformat/webvttenc.c b/libavformat/webvttenc.c
index cbd989dcb6..fcbd3ee10a 100644
--- a/libavformat/webvttenc.c
+++ b/libavformat/webvttenc.c
@@ -58,6 +58,18 @@ static int webvtt_write_header(AVFormatContext *ctx)
 
     avio_printf(pb, "WEBVTT\n");
 
+    if (par->extradata_size > 0) {
+        size_t header_size = par->extradata_size;
+
+        if (par->extradata[0] != '\n')
+            avio_printf(pb, "\n");
+
+        avio_write(pb, par->extradata, header_size);
+
+        if (par->extradata[header_size - 1] != '\n')
+            avio_printf(pb, "\n");
+    }
+
     return 0;
 }
 
diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak
index 6323d0f93d..41b496e46f 100644
--- a/tests/fate/subtitles.mak
+++ b/tests/fate/subtitles.mak
@@ -91,6 +91,9 @@ fate-sub-webvtt: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/WebVTT_capability_
 FATE_SUBTITLES_ASS-$(call DEMDEC, WEBVTT, WEBVTT) += fate-sub-webvtt2
 fate-sub-webvtt2: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/WebVTT_extended_tester.vtt
 
+FATE_SUBTITLES_ASS-$(call DEMDEC, WEBVTT, WEBVTT) += fate-sub-webvtt-styleandregions
+fate-sub-webvtt-styleandregions: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/webvtt_style_and_regions.vtt
+
 FATE_SUBTITLES-$(call ALLYES, SRT_DEMUXER SUBRIP_DECODER WEBVTT_ENCODER WEBVTT_MUXER) += fate-sub-webvttenc
 fate-sub-webvttenc: CMD = fmtstdout webvtt -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt
 
diff --git a/tests/ref/fate/sub-webvtt-styleandregions b/tests/ref/fate/sub-webvtt-styleandregions
new file mode 100644
index 0000000000..ca3516fe02
--- /dev/null
+++ b/tests/ref/fate/sub-webvtt-styleandregions
@@ -0,0 +1,19 @@
+[Script Info]
+; Script generated by FFmpeg/Lavc
+ScriptType: v4.00+
+PlayResX: 384
+PlayResY: 288
+ScaledBorderAndShadow: yes
+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,0
+
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+Dialogue: 0,0:00:10.00,0:00:25.00,Default,,0,0,0,,Can I tell you a joke, Dad?
+Dialogue: 0,0:00:12.50,0:00:27.50,Default,,0,0,0,,Sure, I could do with a laugh.
+Dialogue: 0,0:00:15.00,0:00:30.00,Default,,0,0,0,,Where do sheep go to get their hair cut?
+Dialogue: 0,0:00:17.50,0:00:32.50,Default,,0,0,0,,I don't know, son. Where do sheep go to get their hair cut?
+Dialogue: 0,0:00:20.00,0:00:35.00,Default,,0,0,0,,To the baa-baa shop!
+Dialogue: 0,0:00:22.50,0:00:37.50,Default,,0,0,0,,{\i1}[facepalms]{\i0}
-- 
2.17.1

