v8 changes
- prefer BUG() over die()
- do "1U <<" instead of "1 << " to avoid undefined behavior with
signed shifting.
- add more comments based on Jeff's feedback
- plug a leak in try_delta() when delta_size is too large
- be kind and set depth/cache_max_small_delta_size to max limit
instead of dying when the user gives a value over limit
- make travis execute pack-objects uncommon code
- use git_env_*() instead of manually handling getenv() values
- fallback code for when a new pack is added when pack-objects is
running
- Compressed cached delta size limit is increased from 64k to 1MB
- Cached delta size limit is decreased from 2G to 1MB
Interdiff
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index c774821930..b5bba2c228 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1439,7 +1439,7 @@ static void check_object(struct object_entry *entry)
goto give_up;
if (type < 0)
- die("BUG: invalid type %d", type);
+ BUG("invalid type %d", type);
entry->in_pack_type = type;
/*
@@ -1861,6 +1861,11 @@ static pthread_mutex_t progress_mutex;
#endif
+/*
+ * Return the size of the object without doing any delta
+ * reconstruction (so non-deltas are true object sizes, but deltas
+ * return the size of the delta data).
+ */
unsigned long oe_get_size_slow(struct packing_data *pack,
const struct object_entry *e)
{
@@ -1881,7 +1886,7 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
p = oe_in_pack(pack, e);
if (!p)
- die("BUG: when e->type is a delta, it must belong to a pack");
+ BUG("when e->type is a delta, it must belong to a pack");
read_lock();
w_curs = NULL;
@@ -2006,8 +2011,10 @@ static int try_delta(struct unpacked *trg, struct
unpacked *src,
delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size,
max_size);
if (!delta_buf)
return 0;
- if (delta_size >= (1 << OE_DELTA_SIZE_BITS))
+ if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) {
+ free(delta_buf);
return 0;
+ }
if (DELTA(trg_entry)) {
/* Prefer only shallower same-sized deltas. */
@@ -2163,7 +2170,7 @@ static void find_deltas(struct object_entry **list,
unsigned *list_size,
unsigned long size;
size = do_compress(&entry->delta_data,
DELTA_SIZE(entry));
- if (size < (1 << OE_Z_DELTA_BITS)) {
+ if (size < (1U << OE_Z_DELTA_BITS)) {
entry->z_delta_size = size;
cache_lock();
delta_cache_size -= DELTA_SIZE(entry);
@@ -3131,7 +3138,7 @@ int cmd_pack_objects(int argc, const char **argv, const
char *prefix)
};
if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
- die("BUG: too many dfs states, increase OE_DFS_STATE_BITS");
+ BUG("too many dfs states, increase OE_DFS_STATE_BITS");
check_replace_refs = 0;
@@ -3149,12 +3156,16 @@ int cmd_pack_objects(int argc, const char **argv, const
char *prefix)
if (pack_to_stdout != !base_name || argc)
usage_with_options(pack_usage, pack_objects_options);
- if (depth >= (1 << OE_DEPTH_BITS))
- die(_("delta chain depth %d is greater than maximum limit %d"),
- depth, (1 << OE_DEPTH_BITS) - 1);
- if (cache_max_small_delta_size >= (1 << OE_Z_DELTA_BITS))
- die(_("pack.deltaCacheLimit is greater than maximum limit %d"),
- (1 << OE_Z_DELTA_BITS) - 1);
+ if (depth >= (1 << OE_DEPTH_BITS)) {
+ warning(_("delta chain depth %d is too deep, forcing %d"),
+ depth, (1 << OE_DEPTH_BITS) - 1);
+ depth = (1 << OE_DEPTH_BITS) - 1;
+ }
+ if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
+ warning(_("pack.deltaCacheLimit is too high, forcing %d"),
+ (1U << OE_Z_DELTA_BITS) - 1);
+ cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
+ }
argv_array_push(&rp, "pack-objects");
if (thin) {
@@ -3274,6 +3285,8 @@ int cmd_pack_objects(int argc, const char **argv, const
char *prefix)
}
}
+ prepare_packing_data(&to_pack);
+
if (progress)
progress_state = start_progress(_("Counting objects"), 0);
if (!use_internal_rev_list)
diff --git a/ci/run-tests.sh b/ci/run-tests.sh
index 73e273fac7..857d144ee8 100755
--- a/ci/run-tests.sh
+++ b/ci/run-tests.sh
@@ -10,7 +10,10 @@ ln -s "$cache_dir/.prove" t/.prove
make --quiet test
if test "$jobname" = "linux-gcc"
then
- GIT_TEST_SPLIT_INDEX=YesPlease make --quiet test
+ export GIT_TEST_SPLIT_INDEX=YesPlease
+ export GIT_TEST_FULL_IN_PACK_ARRAY=true
+ export GIT_TEST_OE_SIZE=10
+ make --quiet test
fi
check_unignored_build_artifacts
diff --git a/pack-objects.c b/pack-objects.c
index 59c6e40a02..bf2e0a808d 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -3,6 +3,7 @@
#include "pack.h"
#include "pack-objects.h"
#include "packfile.h"
+#include "config.h"
static uint32_t locate_object_entry_hash(struct packing_data *pdata,
const unsigned char *sha1,
@@ -90,18 +91,14 @@ struct object_entry *packlist_find(struct packing_data
*pdata,
static void prepare_in_pack_by_idx(struct packing_data *pdata)
{
struct packed_git **mapping, *p;
- int cnt = 0, nr = 1 << OE_IN_PACK_BITS;
-
- if (getenv("GIT_TEST_FULL_IN_PACK_ARRAY")) {
- /*
- * leave in_pack_by_idx NULL to force in_pack[] to be
- * used instead
- */
- return;
- }
+ int cnt = 0, nr = 1U << OE_IN_PACK_BITS;
ALLOC_ARRAY(mapping, nr);
- mapping[cnt++] = NULL; /* zero index must be mapped to NULL */
+ /*
+ * oe_in_pack() on an all-zero'd object_entry
+ * (i.e. in_pack_idx also zero) should return NULL.
+ */
+ mapping[cnt++] = NULL;
prepare_packed_git();
for (p = packed_git; p; p = p->next, cnt++) {
if (cnt == nr) {
@@ -114,21 +111,50 @@ static void prepare_in_pack_by_idx(struct packing_data
*pdata)
pdata->in_pack_by_idx = mapping;
}
+/*
+ * A new pack appears after prepare_in_pack_by_idx() has been
+ * run. This is likely a race.
+ *
+ * We could map this new pack to in_pack_by_idx[] array, but then we
+ * have to deal with full array anyway. And since it's hard to test
+ * this fall back code, just stay simple and fall back to using
+ * in_pack[] array.
+ */
+void oe_map_new_pack(struct packing_data *pack,
+ struct packed_git *p)
+{
+ uint32_t i;
+
+ REALLOC_ARRAY(pack->in_pack, pack->nr_alloc);
+
+ for (i = 0; i < pack->nr_objects; i++)
+ pack->in_pack[i] = oe_in_pack(pack, pack->objects + i);
+
+ FREE_AND_NULL(pack->in_pack_by_idx);
+}
+
+/* assume pdata is already zero'd by caller */
+void prepare_packing_data(struct packing_data *pdata)
+{
+ if (git_env_bool("GIT_TEST_FULL_IN_PACK_ARRAY", 0)) {
+ /*
+ * do not initialize in_pack_by_idx[] to force the
+ * slow path in oe_in_pack()
+ */
+ } else {
+ prepare_in_pack_by_idx(pdata);
+ }
+
+ pdata->oe_size_limit = git_env_ulong("GIT_TEST_OE_SIZE",
+ 1U << OE_SIZE_BITS);
+}
+
struct object_entry *packlist_alloc(struct packing_data *pdata,
const unsigned char *sha1,
uint32_t index_pos)
{
struct object_entry *new_entry;
- if (!pdata->nr_objects) {
- prepare_in_pack_by_idx(pdata);
- if (getenv("GIT_TEST_OE_SIZE_BITS")) {
- int bits = atoi(getenv("GIT_TEST_OE_SIZE_BITS"));;
- pdata->oe_size_limit = 1 << bits;
- }
- if (!pdata->oe_size_limit)
- pdata->oe_size_limit = 1 << OE_SIZE_BITS;
- }
if (pdata->nr_objects >= pdata->nr_alloc) {
pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2;
REALLOC_ARRAY(pdata->objects, pdata->nr_alloc);
diff --git a/pack-objects.h b/pack-objects.h
index c20f67e25b..60192cce1f 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -4,9 +4,13 @@
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
#define OE_IN_PACK_BITS 10
-#define OE_Z_DELTA_BITS 16
+#define OE_Z_DELTA_BITS 20
+/*
+ * Note that oe_set_size() becomes expensive when the given size is
+ * above this limit. Don't lower it too much.
+ */
#define OE_SIZE_BITS 31
-#define OE_DELTA_SIZE_BITS 31
+#define OE_DELTA_SIZE_BITS 20
/*
* State flags for depth-first search used for analyzing delta cycles.
@@ -36,7 +40,7 @@ enum dfs_state {
*
* "size" is the uncompressed object size. Compressed size of the raw
* data for an object in a pack is not stored anywhere but is computed
- * and made available when reverse .idx is made. Note that when an
+ * and made available when reverse .idx is made. Note that when a
* delta is reused, "size" is the uncompressed _delta_ size, not the
* canonical one after the delta has been applied.
*
@@ -77,15 +81,15 @@ struct object_entry {
void *delta_data; /* cached delta (uncompressed) */
off_t in_pack_offset;
uint32_t hash; /* name hint hash */
- uint32_t size_:OE_SIZE_BITS;
+ unsigned size_:OE_SIZE_BITS;
unsigned size_valid:1;
uint32_t delta_idx; /* delta base object */
uint32_t delta_child_idx; /* deltified objects who bases me */
uint32_t delta_sibling_idx; /* other deltified objects who
* uses the same base as me
*/
- uint32_t delta_size_:OE_DELTA_SIZE_BITS; /* delta data size
(uncompressed) */
- uint32_t delta_size_valid:1;
+ unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size
(uncompressed) */
+ unsigned delta_size_valid:1;
unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */
unsigned z_delta_size:OE_Z_DELTA_BITS;
unsigned type_valid:1;
@@ -103,7 +107,15 @@ struct object_entry {
unsigned char in_pack_header_size;
unsigned depth:OE_DEPTH_BITS;
- /* size: 80, bit_padding: 20 bits, holes: 1 bit */
+ /*
+ * pahole results on 64-bit linux (gcc and clang)
+ *
+ * size: 80, bit_padding: 20 bits, holes: 8 bits
+ *
+ * and on 32-bit (gcc)
+ *
+ * size: 76, bit_padding: 20 bits, holes: 8 bits
+ */
};
struct packing_data {
@@ -127,6 +139,7 @@ struct packing_data {
uintmax_t oe_size_limit;
};
+void prepare_packing_data(struct packing_data *pdata);
struct object_entry *packlist_alloc(struct packing_data *pdata,
const unsigned char *sha1,
uint32_t index_pos);
@@ -164,7 +177,7 @@ static inline void oe_set_type(struct object_entry *e,
enum object_type type)
{
if (type >= OBJ_ANY)
- die("BUG: OBJ_ANY cannot be set in pack-objects code");
+ BUG("OBJ_ANY cannot be set in pack-objects code");
e->type_valid = type >= OBJ_NONE;
e->type_ = (unsigned)type;
@@ -190,21 +203,20 @@ static inline struct packed_git *oe_in_pack(const struct
packing_data *pack,
return pack->in_pack_by_idx[e->in_pack_idx];
else
return pack->in_pack[e - pack->objects];
-
}
+void oe_map_new_pack(struct packing_data *pack,
+ struct packed_git *p);
static inline void oe_set_in_pack(struct packing_data *pack,
struct object_entry *e,
struct packed_git *p)
{
- if (pack->in_pack_by_idx) {
- if (p->index <= 0)
- die("BUG: found_pack should be NULL "
- "instead of having non-positive index");
+ if (!p->index)
+ oe_map_new_pack(pack, p);
+ if (pack->in_pack_by_idx)
e->in_pack_idx = p->index;
- } else
+ else
pack->in_pack[e - pack->objects] = p;
-
}
static inline struct object_entry *oe_delta(
@@ -307,7 +319,7 @@ static inline void oe_set_size(struct packing_data *pack,
} else {
e->size_valid = 0;
if (oe_get_size_slow(pack, e) != size)
- die("BUG: 'size' is supposed to be the object size!");
+ BUG("'size' is supposed to be the object size!");
}
}
@@ -326,7 +338,7 @@ static inline void oe_set_delta_size(struct packing_data
*pack,
e->delta_size_ = size;
e->delta_size_valid = e->delta_size_ == size;
if (!e->delta_size_valid && size != oe_size(pack, e))
- die("BUG: this can only happen in check_object() "
+ BUG("this can only happen in check_object() "
"where delta size is the same as entry size");
}
diff --git a/t/README b/t/README
index 02bfb3fed5..c01d210c15 100644
--- a/t/README
+++ b/t/README
@@ -291,16 +291,26 @@ expect the rest to function correctly.
and know what setup is needed for it. Or when you want to run
everything up to a certain test.
+
+Running tests with special setups
+---------------------------------
+
+The whole test suite could be run to test some special features
+that cannot be easily covered by a few specific test cases. These
+could be enabled by running the test suite with correct GIT_TEST_
+environment set.
+
+GIT_TEST_SPLIT_INDEX forces split-index mode on the whole test suite.
+
GIT_TEST_FULL_IN_PACK_ARRAY exercises the uncommon pack-objects code
path where there are more than 1024 packs even if the actual number of
packs in repository is below this limit.
-GIT_TEST_OE_SIZE_BITS=<bits> exercises the uncommon pack-objects
-code path where we do not cache objecct size in memory and read it
-from existing packs on demand. This normally only happens when the
-object size is over 2GB. This variable forces the code path on any
-object larger than 2^<bits> bytes.
-
+GIT_TEST_OE_SIZE=<n> exercises the uncommon pack-objects code path
+where we do not cache object size in memory and read it from existing
+packs on demand. This normally only happens when the object size is
+over 2GB. This variable forces the code path on any object larger than
+<n> bytes.
Naming Tests
------------
Nguyễn Thái Ngọc Duy (15):
t/README: mention about running the test suite in special modes
pack-objects: a bit of document about struct object_entry
pack-objects: turn type and in_pack_type to bitfields
pack-objects: use bitfield for object_entry::dfs_state
pack-objects: use bitfield for object_entry::depth
pack-objects: move in_pack_pos out of struct object_entry
pack-objects: move in_pack out of struct object_entry
pack-objects: refer to delta objects by index instead of pointer
pack-objects: shrink z_delta_size field in struct object_entry
pack-objects: don't check size when the object is bad
pack-objects: clarify the use of object_entry::size
pack-objects: shrink size field in struct object_entry
pack-objects: shrink delta_size field in struct object_entry
pack-objects: reorder members to shrink struct object_entry
ci: exercise the whole test suite with uncommon code in pack-objects
Documentation/config.txt | 4 +-
Documentation/git-pack-objects.txt | 4 +-
Documentation/git-repack.txt | 4 +-
builtin/pack-objects.c | 366 +++++++++++++++++++----------
cache.h | 3 +
ci/run-tests.sh | 5 +-
object.h | 1 -
pack-bitmap-write.c | 14 +-
pack-bitmap.c | 2 +-
pack-bitmap.h | 4 +-
pack-objects.c | 69 ++++++
pack-objects.h | 312 ++++++++++++++++++++++--
t/README | 20 ++
t/t5300-pack-object.sh | 5 +
14 files changed, 650 insertions(+), 163 deletions(-)
--
2.17.0.rc2.515.g4feb9b7923