durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Alex Gaynor suggested we should probably copy the left and right sides
  of diffs to new blocks so we can detect over-reads in the diffing
  code, and I agree. Once I got into that, I realized we should do
  things with C++17 idioms rather than keep using malloc() and
  free(). This change is the result. I tried to split it more than this
  and failed.
  
  Everything still compiles and works in the oss-fuzz container, so I
  think we can count on C++17 being available!

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3675

AFFECTED FILES
  contrib/fuzz/Makefile
  contrib/fuzz/bdiff.cc
  contrib/fuzz/fuzzutil.cc
  contrib/fuzz/fuzzutil.h
  contrib/fuzz/xdiff.cc

CHANGE DETAILS

diff --git a/contrib/fuzz/xdiff.cc b/contrib/fuzz/xdiff.cc
--- a/contrib/fuzz/xdiff.cc
+++ b/contrib/fuzz/xdiff.cc
@@ -10,6 +10,8 @@
 #include <inttypes.h>
 #include <stdlib.h>
 
+#include "fuzzutil.h"
+
 extern "C" {
 
 int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
@@ -20,21 +22,17 @@
 
 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
 {
-       if (!Size) {
+       auto maybe_inputs = SplitInputs(Data, Size);
+       if (!maybe_inputs) {
                return 0;
        }
-       // figure out a random point in [0, Size] to split our input.
-       size_t split = Data[0] / 255.0 * Size;
-
+       auto inputs = std::move(maybe_inputs.value());
        mmfile_t a, b;
 
-       // `a` input to diff is data[1:split]
-       a.ptr = (char *)Data + 1;
-       // which has len split-1
-       a.size = split - 1;
-       // `b` starts at the next byte after `a` ends
-       b.ptr = a.ptr + a.size;
-       b.size = Size - split;
+       a.ptr = inputs.left.get();
+       a.size = inputs.left_size;
+       b.ptr = inputs.right.get();
+       b.size = inputs.right_size;
        xpparam_t xpp = {
            XDF_INDENT_HEURISTIC, /* flags */
        };
diff --git a/contrib/fuzz/fuzzutil.h b/contrib/fuzz/fuzzutil.h
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/fuzzutil.h
@@ -0,0 +1,24 @@
+#ifndef CONTRIB_FUZZ_FUZZUTIL_H
+#define CONTRIB_FUZZ_FUZZUTIL_H
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <stdint.h>
+
+/* set DEBUG to 1 for a few debugging prints, or 2 for a lot */
+#define DEBUG 0
+#define LOG(level)                                                             
\
+       if (level <= DEBUG)                                                    \
+       std::cout
+
+struct two_inputs {
+       std::unique_ptr<char[]> right;
+       size_t right_size;
+       std::unique_ptr<char[]> left;
+       size_t left_size;
+};
+
+/* Split a non-zero-length input into two inputs. */
+std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size);
+
+#endif /* CONTRIB_FUZZ_FUZZUTIL_H */
diff --git a/contrib/fuzz/fuzzutil.cc b/contrib/fuzz/fuzzutil.cc
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/fuzzutil.cc
@@ -0,0 +1,26 @@
+#include "fuzzutil.h"
+
+#include <utility>
+
+std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size)
+{
+       if (!Size) {
+               return std::nullopt;
+       }
+       // figure out a random point in [0, Size] to split our input.
+       size_t left_size = (Data[0] / 255.0) * (Size - 1);
+
+       // Copy inputs to new allocations so if bdiff over-reads
+       // AddressSanitizer can detect it.
+       std::unique_ptr<char[]> left(new char[left_size]);
+       memcpy(left.get(), Data + 1, left_size);
+       // right starts at the next byte after left ends
+       size_t right_size = Size - (left_size + 1);
+       std::unique_ptr<char[]> right(new char[right_size]);
+       memcpy(right.get(), Data + 1 + left_size, right_size);
+       LOG(2) << "inputs are  " << left_size << " and " << right_size
+              << " bytes" << std::endl;
+       two_inputs result = {std::move(right), right_size, std::move(left),
+                            left_size};
+       return result;
+}
diff --git a/contrib/fuzz/bdiff.cc b/contrib/fuzz/bdiff.cc
--- a/contrib/fuzz/bdiff.cc
+++ b/contrib/fuzz/bdiff.cc
@@ -6,30 +6,25 @@
  * This software may be used and distributed according to the terms of
  * the GNU General Public License, incorporated herein by reference.
  */
+#include <memory>
 #include <stdlib.h>
 
+#include "fuzzutil.h"
+
 extern "C" {
 #include "bdiff.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
 {
-       if (!Size) {
+       auto maybe_inputs = SplitInputs(Data, Size);
+       if (!maybe_inputs) {
                return 0;
        }
-       // figure out a random point in [0, Size] to split our input.
-       size_t split = Data[0] / 255.0 * Size;
-
-       // left input to diff is data[1:split]
-       const uint8_t *left = Data + 1;
-       // which has len split-1
-       size_t left_size = split - 1;
-       // right starts at the next byte after left ends
-       const uint8_t *right = left + left_size;
-       size_t right_size = Size - split;
+       auto inputs = std::move(maybe_inputs.value());
 
        struct bdiff_line *a, *b;
-       int an = bdiff_splitlines((const char *)left, split - 1, &a);
-       int bn = bdiff_splitlines((const char *)right, right_size, &b);
+       int an = bdiff_splitlines(inputs.left.get(), inputs.left_size, &a);
+       int bn = bdiff_splitlines(inputs.right.get(), inputs.right_size, &b);
        struct bdiff_hunk l;
        bdiff_diff(a, an, b, bn, &l);
        free(a);
diff --git a/contrib/fuzz/Makefile b/contrib/fuzz/Makefile
--- a/contrib/fuzz/Makefile
+++ b/contrib/fuzz/Makefile
@@ -1,36 +1,42 @@
+fuzzutil.o: fuzzutil.cc fuzzutil.h
+       $$CXX $$CXXFLAGS -g -O1 -fsanitize=fuzzer-no-link,address \
+         -std=c++17 \
+         -I../../mercurial -c -o fuzzutil.o fuzzutil.cc
+
 bdiff.o: ../../mercurial/bdiff.c
-       clang -g -O1 -fsanitize=fuzzer-no-link,address -c -o bdiff.o \
+       $$CC $$CFLAGS -fsanitize=fuzzer-no-link,address -c -o bdiff.o \
          ../../mercurial/bdiff.c
 
-bdiff: bdiff.cc bdiff.o
-       clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 
-fsanitize=fuzzer-no-link,address \
-         -I../../mercurial bdiff.cc bdiff.o -o bdiff
+bdiff: bdiff.cc bdiff.o fuzzutil.o
+       $$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 
-fsanitize=fuzzer-no-link,address \
+         -std=c++17 \
+         -I../../mercurial bdiff.cc bdiff.o fuzzutil.o -o bdiff
 
 bdiff-oss-fuzz.o: ../../mercurial/bdiff.c
        $$CC $$CFLAGS -c -o bdiff-oss-fuzz.o ../../mercurial/bdiff.c
 
-bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o
-       $$CXX $$CXXFLAGS -std=c++11 -I../../mercurial bdiff.cc \
-         bdiff-oss-fuzz.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer
+bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o fuzzutil.o
+       $$CXX $$CXXFLAGS -std=c++17 -I../../mercurial bdiff.cc \
+         bdiff-oss-fuzz.o fuzzutil.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer
 
 x%.o: ../../mercurial/thirdparty/xdiff/x%.c 
../../mercurial/thirdparty/xdiff/*.h
-       clang -g -O1 -fsanitize=fuzzer-no-link,address -c \
+       $$CC -g -O1 -fsanitize=fuzzer-no-link,address -c \
          -o $@ \
          $<
 
-xdiff: xdiff.cc xdiffi.o xprepare.o  xutils.o
-       clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 
-fsanitize=fuzzer-no-link,address \
+xdiff: xdiff.cc xdiffi.o xprepare.o xutils.o fuzzutil.o
+       $$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 
-fsanitize=fuzzer-no-link,address \
          -I../../mercurial xdiff.cc \
-         xdiffi.o xprepare.o xutils.o -o xdiff
+         xdiffi.o xprepare.o xutils.o fuzzutil.o -o xdiff
 
 fuzz-x%.o: ../../mercurial/thirdparty/xdiff/x%.c 
../../mercurial/thirdparty/xdiff/*.h
        $$CC $$CFLAGS -c \
          -o $@ \
          $<
 
-xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o  fuzz-xutils.o
-       $$CXX $$CXXFLAGS -std=c++11 -I../../mercurial xdiff.cc \
-         fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o \
+xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o
+       $$CXX $$CXXFLAGS -std=c++17 -I../../mercurial xdiff.cc \
+         fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o \
          -lFuzzingEngine -o $$OUT/xdiff_fuzzer
 
 all: bdiff xdiff



To: durin42, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to