From: Bhargava Shastry <bshas...@sect.tu-berlin.de>

This patch adds a libFuzzer style fuzzer test harness to xz. The test
harness is accompanied by a Makefile that should be used from upstream
oss-fuzz, and a dictionary file.

Shout out to @pdknsk over at GitHub for initiating this work.

---
 tests/ossfuzz/Makefile         |   4 +
 tests/ossfuzz/config/fuzz.dict |   2 +
 tests/ossfuzz/fuzz.c           | 248 +++++++++++++++++++++++++++++++++
 3 files changed, 254 insertions(+)
 create mode 100644 tests/ossfuzz/Makefile
 create mode 100644 tests/ossfuzz/config/fuzz.dict
 create mode 100644 tests/ossfuzz/fuzz.c

diff --git a/tests/ossfuzz/Makefile b/tests/ossfuzz/Makefile
new file mode 100644
index 0000000..242625b
--- /dev/null
+++ b/tests/ossfuzz/Makefile
@@ -0,0 +1,4 @@
+fuzz: fuzz.c
+       $(CC) $(CFLAGS) -c fuzz.c -I ../../src/liblzma/api/
+       $(CXX) $(CXXFLAGS) -lFuzzingEngine fuzz.o -o $(OUT)/fuzz \
+             ../../src/liblzma/.libs/liblzma.a 
diff --git a/tests/ossfuzz/config/fuzz.dict b/tests/ossfuzz/config/fuzz.dict
new file mode 100644
index 0000000..932d67c
--- /dev/null
+++ b/tests/ossfuzz/config/fuzz.dict
@@ -0,0 +1,2 @@
+"\xFD7zXZ\x00"
+"YZ"
diff --git a/tests/ossfuzz/fuzz.c b/tests/ossfuzz/fuzz.c
new file mode 100644
index 0000000..1697523
--- /dev/null
+++ b/tests/ossfuzz/fuzz.c
@@ -0,0 +1,248 @@
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <lzma.h>
+
+static bool
+init_decoder(lzma_stream *strm)
+{
+    // Initialize a .xz decoder. The decoder supports a memory usage limit
+    // and a set of flags.
+    //
+    // The memory usage of the decompressor depends on the settings used
+    // to compress a .xz file. It can vary from less than a megabyte to
+    // a few gigabytes, but in practice (at least for now) it rarely
+    // exceeds 65 MiB because that's how much memory is required to
+    // decompress files created with "xz -9". Settings requiring more
+    // memory take extra effort to use and don't (at least for now)
+    // provide significantly better compression in most cases.
+    //
+    // Memory usage limit is useful if it is important that the
+    // decompressor won't consume gigabytes of memory. The need
+    // for limiting depends on the application. In this example,
+    // no memory usage limiting is used. This is done by setting
+    // the limit to UINT64_MAX.
+    //
+    // The .xz format allows concatenating compressed files as is:
+    //
+    //     echo foo | xz > foobar.xz
+    //     echo bar | xz >> foobar.xz
+    //
+    // When decompressing normal standalone .xz files, LZMA_CONCATENATED
+    // should always be used to support decompression of concatenated
+    // .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
+    // after the first .xz stream. This can be useful when .xz data has
+    // been embedded inside another file format.
+    //
+    // Flags other than LZMA_CONCATENATED are supported too, and can
+    // be combined with bitwise-or. See lzma/container.h
+    // (src/liblzma/api/lzma/container.h in the source package or e.g.
+    // /usr/include/lzma/container.h depending on the install prefix)
+    // for details.
+    lzma_ret ret = lzma_stream_decoder(
+            strm, UINT64_MAX, LZMA_CONCATENATED);
+
+    // Return successfully if the initialization went fine.
+    if (ret == LZMA_OK)
+        return true;
+
+#ifdef __DEBUG__
+    // Something went wrong. The possible errors are documented in
+    // lzma/container.h (src/liblzma/api/lzma/container.h in the source
+    // package or e.g. /usr/include/lzma/container.h depending on the
+    // install prefix).
+    //
+    // Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
+    // specify a very tiny limit, the error will be delayed until
+    // the first headers have been parsed by a call to lzma_code().
+    const char *msg;
+    switch (ret) {
+    case LZMA_MEM_ERROR:
+        msg = "Memory allocation failed";
+        break;
+
+    case LZMA_OPTIONS_ERROR:
+        msg = "Unsupported decompressor flags";
+        break;
+
+    default:
+        // This is most likely LZMA_PROG_ERROR indicating a bug in
+        // this program or in liblzma. It is inconvenient to have a
+        // separate error message for errors that should be impossible
+        // to occur, but knowing the error code is important for
+        // debugging. That's why it is good to print the error code
+        // at least when there is no good error message to show.
+        msg = "Unknown error, possibly a bug";
+        break;
+    }
+
+    fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n",
+            msg, ret);
+#endif
+    return false;
+}
+
+static bool
+decompress(lzma_stream *strm, const uint8_t *inbuf, size_t inlen,
+           uint8_t *outbuf, size_t outlen)
+{
+    size_t remainlen = inlen;
+
+    // When LZMA_CONCATENATED flag was used when initializing the decoder,
+    // we need to tell lzma_code() when there will be no more input.
+    // This is done by setting action to LZMA_FINISH instead of LZMA_RUN
+    // in the same way as it is done when encoding.
+    //
+    // When LZMA_CONCATENATED isn't used, there is no need to use
+    // LZMA_FINISH to tell when all the input has been read, but it
+    // is still OK to use it if you want. When LZMA_CONCATENATED isn't
+    // used, the decoder will stop after the first .xz stream. In that
+    // case some unused data may be left in strm->next_in.
+    lzma_action action = LZMA_RUN;
+
+    strm->next_in = NULL;
+    strm->avail_in = 0;
+    strm->next_out = outbuf;
+    strm->avail_out = outlen;
+
+    // Decode BUFSIZ==8192 bytes of inbuf at a time
+    while (true) {
+
+        // TODO: We invoke lzma_code twice when remainlen == 0.
+        // Is this okay?
+
+        if (strm->avail_in == 0 && remainlen != 0) {
+            strm->next_in = inbuf;
+            strm->avail_in = (remainlen > BUFSIZ) ? BUFSIZ : remainlen;
+            remainlen -= strm->avail_in;
+
+            // Once the end of the input file has been reached,
+            // we need to tell lzma_code() that no more input
+            // will be coming. As said before, this isn't required
+            // if the LZMA_CONCATENATED flag isn't used when
+            // initializing the decoder.
+            if (remainlen == 0)
+                action = LZMA_FINISH;
+        }
+
+        lzma_ret ret = lzma_code(strm, action);
+
+        // TODO: Is this code trying to overwrite outbuf when outlen
+        // is exhausted? If so, is that okay?
+        if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
+            strm->next_out = outbuf;
+            strm->avail_out = outlen;
+        }
+
+        if (ret != LZMA_OK) {
+            // Once everything has been decoded successfully, the
+            // return value of lzma_code() will be LZMA_STREAM_END.
+            //
+            // It is important to check for LZMA_STREAM_END. Do not
+            // assume that getting ret != LZMA_OK would mean that
+            // everything has gone well or that when you aren't
+            // getting more output it must have successfully
+            // decoded everything.
+            if (ret == LZMA_STREAM_END)
+                return true;
+
+#ifdef __DEBUG__
+            // It's not LZMA_OK nor LZMA_STREAM_END,
+            // so it must be an error code. See lzma/base.h
+            // (src/liblzma/api/lzma/base.h in the source package
+            // or e.g. /usr/include/lzma/base.h depending on the
+            // install prefix) for the list and documentation of
+            // possible values. Many values listen in lzma_ret
+            // enumeration aren't possible in this example, but
+            // can be made possible by enabling memory usage limit
+            // or adding flags to the decoder initialization.
+            const char *msg;
+            switch (ret) {
+            case LZMA_MEM_ERROR:
+                msg = "Memory allocation failed";
+                break;
+
+            case LZMA_FORMAT_ERROR:
+                // .xz magic bytes weren't found.
+                msg = "The input is not in the .xz format";
+                break;
+
+            case LZMA_OPTIONS_ERROR:
+                // For example, the headers specify a filter
+                // that isn't supported by this liblzma
+                // version (or it hasn't been enabled when
+                // building liblzma, but no-one sane does
+                // that unless building liblzma for an
+                // embedded system). Upgrading to a newer
+                // liblzma might help.
+                //
+                // Note that it is unlikely that the file has
+                // accidentally became corrupt if you get this
+                // error. The integrity of the .xz headers is
+                // always verified with a CRC32, so
+                // unintentionally corrupt files can be
+                // distinguished from unsupported files.
+                msg = "Unsupported compression options";
+                break;
+
+            case LZMA_DATA_ERROR:
+                msg = "Compressed file is corrupt";
+                break;
+
+            case LZMA_BUF_ERROR:
+                // Typically this error means that a valid
+                // file has got truncated, but it might also
+                // be a damaged part in the file that makes
+                // the decoder think the file is truncated.
+                // If you prefer, you can use the same error
+                // message for this as for LZMA_DATA_ERROR.
+                msg = "Compressed file is truncated or "
+                        "otherwise corrupt";
+                break;
+
+            default:
+                // This is most likely LZMA_PROG_ERROR.
+                msg = "Unknown error, possibly a bug";
+                break;
+            }
+
+            fprintf(stderr, "%s: Decoder error: "
+                    "%s (error code %u)\n",
+                    inname, msg, ret);
+#endif
+            return false;
+        }
+    }
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+
+    lzma_stream strm = LZMA_STREAM_INIT;
+
+    // Null data is uninteresting
+    if (size == 0) {
+        return 0;
+    }
+
+    // Init decoder.
+    if (!init_decoder(&strm)) {
+        // Decoder initialization failed. There's no point
+        // to retry it so we need to exit.
+        return 0;
+    }
+
+    uint8_t outbuf[BUFSIZ];
+
+    if (!decompress(&strm, data, size, outbuf, BUFSIZ)) {
+#ifdef __DEBUG__
+        fprintf(stderr, "Decode failure\n");
+#endif
+    }
+
+    // Free the memory allocated for the decoder.
+    lzma_end(&strm);
+    return 0;
+}
-- 
2.17.1


Reply via email to