This allows handling matroska files with errors.
Fixes test4.mkv and test7.mkv from the official Matroska test suite,
and by extension Bugzilla #62.

Based on a patch by Reimar Doffinger <[email protected]>
---
 libavformat/matroskadec.c |   50 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 744f7c0..f0dd134 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -556,6 +556,36 @@ static EbmlSyntax matroska_clusters_incremental[] = {
 
 static const char *const matroska_doctypes[] = { "matroska", "webm" };
 
+static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
+{
+    AVIOContext *pb = matroska->ctx->pb;
+    uint32_t id;
+    matroska->current_id = 0;
+    matroska->num_levels = 0;
+
+    // seek to next position to resync from
+    if (avio_seek(pb, last_pos + 1, SEEK_SET) < 0 || avio_tell(pb) <= last_pos)
+        goto eof;
+
+    id = avio_rb32(pb);
+
+    // try to find a toplevel element
+    while (!url_feof(pb)) {
+        if (id == MATROSKA_ID_INFO || id == MATROSKA_ID_TRACKS ||
+            id == MATROSKA_ID_CUES || id == MATROSKA_ID_TAGS ||
+            id == MATROSKA_ID_SEEKHEAD || id == MATROSKA_ID_ATTACHMENTS ||
+            id == MATROSKA_ID_CLUSTER || id == MATROSKA_ID_CHAPTERS)
+        {
+            matroska->current_id = id;
+            return 0;
+        }
+        id = (id << 8) | avio_r8(pb);
+    }
+eof:
+    matroska->done = 1;
+    return AVERROR_EOF;
+}
+
 /*
  * Return: Whether we reached the end of a level in the hierarchy or not.
  */
@@ -1361,6 +1391,7 @@ static int matroska_read_header(AVFormatContext *s)
     MatroskaChapter *chapters;
     MatroskaTrack *tracks;
     uint64_t max_start = 0;
+    int64_t pos;
     Ebml ebml = { 0 };
     AVStream *st;
     int i, j, res;
@@ -1391,8 +1422,16 @@ static int matroska_read_header(AVFormatContext *s)
     ebml_free(ebml_syntax, &ebml);
 
     /* The next thing is a segment. */
-    if ((res = ebml_parse(matroska, matroska_segments, matroska)) < 0)
-        return res;
+    pos = avio_tell(matroska->ctx->pb);
+    res = ebml_parse(matroska, matroska_segments, matroska);
+    // try resyncing until we find a EBML_STOP type element.
+    while (res != 1) {
+        res = matroska_resync(matroska, pos);
+        if (res < 0)
+            return res;
+        pos = avio_tell(matroska->ctx->pb);
+        res = ebml_parse(matroska, matroska_segment, matroska);
+    }
     matroska_execute_seekhead(matroska);
 
     if (!matroska->time_scale)
@@ -2013,7 +2052,7 @@ static int matroska_parse_block(MatroskaDemuxContext 
*matroska, uint8_t *data,
 
     if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
         av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
-        return n;
+        return AVERROR_INVALIDDATA;
     }
     data += n;
     size -= n;
@@ -2187,7 +2226,6 @@ static int matroska_parse_cluster(MatroskaDemuxContext 
*matroska)
                                      pos);
         }
     ebml_free(matroska_cluster, &cluster);
-    if (res < 0)  matroska->done = 1;
     return res;
 }
 
@@ -2197,9 +2235,11 @@ static int matroska_read_packet(AVFormatContext *s, 
AVPacket *pkt)
     int ret = 0;
 
     while (!ret && matroska_deliver_packet(matroska, pkt)) {
+        int64_t pos = avio_tell(matroska->ctx->pb);
         if (matroska->done)
             return AVERROR_EOF;
-        ret = matroska_parse_cluster(matroska);
+        if (matroska_parse_cluster(matroska) < 0)
+            ret = matroska_resync(matroska, pos);
     }
 
     if (ret == AVERROR_INVALIDDATA && pkt->data) {
-- 
1.7.9.2

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to