Git should never generate packs with duplicate objects.
However, we may see such packs due to bugs in Git or other
implementations (e.g., JGit had such a bug a few years ago).

In theory, such packs should not be a problem for us (we
will simply find one of the instances of the object when
looking in the pack). However, the JGit bug report mentioned
possible infinite loops during repacking due to cycles in
the delta chain.  Though this problem hasn't specifically
been reproduced on modern git, there is no reason not to be
careful with incoming packs, given that only buggy
implementations should be producing such packs, anyway.

This patch introduces the pack.indexDuplicates option to
allow or reject such packs from index-pack. The default
remains to allow it.

Signed-off-by: Jeff King <p...@peff.net>
Acked-by: Nicolas Pitre <n...@fluxnic.net>
---
 builtin/index-pack.c              |  4 ++++
 pack-write.c                      | 24 ++++++++++++++++++++++++
 pack.h                            |  2 ++
 t/t5308-pack-detect-duplicates.sh |  8 ++++++++
 4 files changed, 38 insertions(+)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 79dfe47..72e19a0 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1364,6 +1364,10 @@ static int git_index_pack_config(const char *k, const 
char *v, void *cb)
 #endif
                return 0;
        }
+       if (!strcmp(k, "pack.indexduplicates")) {
+               opts->allow_duplicates = git_config_bool(k, v);
+               return 0;
+       }
        return git_default_config(k, v, cb);
 }
 
diff --git a/pack-write.c b/pack-write.c
index ca9e63b..da946a7 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -7,6 +7,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
        memset(opts, 0, sizeof(*opts));
        opts->version = 2;
        opts->off32_limit = 0x7fffffff;
+       opts->allow_duplicates = 1;
 }
 
 static int sha1_compare(const void *_a, const void *_b)
@@ -37,6 +38,19 @@ static int need_large_offset(off_t offset, const struct 
pack_idx_option *opts)
                         sizeof(ofsval), cmp_uint32);
 }
 
+static void *find_duplicate(void *vbase, size_t n, size_t size,
+                           int (*cmp)(const void *, const void *))
+{
+       unsigned char *base = vbase;
+       while (n > 1) {
+               if (!cmp(base, base + size))
+                       return base;
+               base += size;
+               n--;
+       }
+       return NULL;
+}
+
 /*
  * On entry *sha1 contains the pack content SHA1 hash, on exit it is
  * the SHA1 hash of sorted object names. The objects array passed in
@@ -68,6 +82,16 @@ const char *write_idx_file(const char *index_name, struct 
pack_idx_entry **objec
        else
                sorted_by_sha = list = last = NULL;
 
+       if (!opts->allow_duplicates) {
+               struct pack_idx_entry **dup;
+
+               dup = find_duplicate(sorted_by_sha, nr_objects,
+                                    sizeof(*sorted_by_sha), sha1_compare);
+               if (dup)
+                       die("pack has duplicate entries for %s",
+                           sha1_to_hex((*dup)->sha1));
+       }
+
        if (opts->flags & WRITE_IDX_VERIFY) {
                assert(index_name);
                f = sha1fd_check(index_name);
diff --git a/pack.h b/pack.h
index aa6ee7d..45217b6 100644
--- a/pack.h
+++ b/pack.h
@@ -44,6 +44,8 @@ struct pack_idx_option {
        uint32_t version;
        uint32_t off32_limit;
 
+       int allow_duplicates;
+
        /*
         * List of offsets that would fit within off32_limit but
         * need to be written out as 64-bit entity for byte-for-byte
diff --git a/t/t5308-pack-detect-duplicates.sh 
b/t/t5308-pack-detect-duplicates.sh
index 04fe242..97ce2e0 100755
--- a/t/t5308-pack-detect-duplicates.sh
+++ b/t/t5308-pack-detect-duplicates.sh
@@ -70,4 +70,12 @@ test_expect_success 'lookup in duplicated pack 
(GIT_USE_LOOKUP)' '
        test_cmp expect actual
 '
 
+test_expect_success 'index-pack can reject packs with duplicates' '
+       clear_packs &&
+       create_pack dups.pack 2 &&
+       test_must_fail \
+               git -c pack.indexDuplicates=0 index-pack --stdin <dups.pack &&
+       test_expect_code 1 git cat-file -e $LO_SHA1
+'
+
 test_done
-- 
1.8.4.rc2.28.g6bb5f3f

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to