[PATCH/RFC v3 00/12] Reduce pack-objects memory footprint

2018-03-08 Thread Nguyễn Thái Ngọc Duy
v3 cleans up a bit to avoid the horrible macros that v2 adds, and
adds some more documentation for this struct since it's getting harder
to just look at the struct and see what field is related to what.

v3 also adds three more patches and reduces another 16 bytes (total
struct reduction now is 41%). After this there's hardly anything else I
could do. Two 64-bit fields left, but even if I shrink them, I'd lose
it to padding. There's still one possibility to share in_pack_offset
with idx.offset, but it's risky.

These three patches are made to optimize for the common case. The
incommon cases will suffer some performance loss:

- 10/12 limits the cached compressed delta size to 64k (default 1000
  bytes). If you normally have lots of huge deltas, you're going to
  take a hit because more deltas must be recreated at writing phase.
  Note that it does not stop pack-objects from creating deltas larger
  than 64k.

- 11/12 reduces uncompressed object size to 4GB. Whenever we need to
  read object size of those larger than that, we read the pack again
  to retrieve the information, which is much slower than accessing a
  piece of memory. Again I'm assuming these giant blobs are really
  really rare that this performance hit won't matter.

- 12/12 is similar to 11/12 and reduces uncompressed delta size to
  4GB. Frankly a 4GB delta is still ridiculous, but I don't think we
  gain more by shrinking it further. If your packs have one of those
  giant deltas, it still works, delta_size will be read back from the
  pack again.

The following interdiff does _NOT_ cover the new patches, just the
first nine that v2 has.

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 55f19a1f18..82a4a95888 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -29,19 +29,13 @@
 #include "list.h"
 #include "packfile.h"
 
-#define DELTA(obj) \
-   ((obj)->delta_idx ? &to_pack.objects[(obj)->delta_idx - 1] : NULL)
-#define DELTA_CHILD(obj) \
-   ((obj)->delta_child_idx ? &to_pack.objects[(obj)->delta_child_idx - 1] 
: NULL)
-#define DELTA_SIBLING(obj) \
-   ((obj)->delta_sibling_idx ? &to_pack.objects[(obj)->delta_sibling_idx - 
1] : NULL)
-
-#define CLEAR_DELTA(obj) (obj)->delta_idx = 0
-#define CLEAR_DELTA_CHILD(obj) (obj)->delta_child_idx = 0
-#define CLEAR_DELTA_SIBLING(obj) (obj)->delta_sibling_idx = 0
-
-#define SET_DELTA(obj, val) (obj)->delta_idx = ((val) - to_pack.objects) + 1
-#define SET_DELTA_CHILD(obj, val) (obj)->delta_child_idx = ((val) - 
to_pack.objects) + 1
+#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
+#define DELTA(obj) oe_delta(&to_pack, obj)
+#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
+#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
+#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
+#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
+#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
 
 static const char *pack_usage[] = {
N_("git pack-objects --stdout [...] [<  | < 
]"),
@@ -381,7 +375,7 @@ static unsigned long write_no_reuse_object(struct hashfile 
*f, struct object_ent
 static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
unsigned long limit, int usable_delta)
 {
-   struct packed_git *p = IN_PACK(&to_pack, entry);
+   struct packed_git *p = IN_PACK(entry);
struct pack_window *w_curs = NULL;
struct revindex_entry *revidx;
off_t offset;
@@ -492,7 +486,7 @@ static off_t write_object(struct hashfile *f,
 
if (!reuse_object)
to_reuse = 0;   /* explicit */
-   else if (!IN_PACK(&to_pack, entry))
+   else if (!IN_PACK(entry))
to_reuse = 0;   /* can't reuse what we don't have */
else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA)
/* check_object() decided it for us ... */
@@ -557,7 +551,7 @@ static enum write_one_status write_one(struct hashfile *f,
switch (write_one(f, DELTA(e), offset)) {
case WRITE_ONE_RECURSIVE:
/* we cannot depend on this one */
-   CLEAR_DELTA(e);
+   SET_DELTA(e, NULL);
break;
default:
break;
@@ -672,8 +666,8 @@ static struct object_entry **compute_write_order(void)
for (i = 0; i < to_pack.nr_objects; i++) {
objects[i].tagged = 0;
objects[i].filled = 0;
-   CLEAR_DELTA_CHILD(&objects[i]);
-   CLEAR_DELTA_SIBLING(&objects[i]);
+   SET_DELTA_CHILD(&objects[i], NULL);
+   SET_DELTA_SIBLING(&objects[i], NULL);
}
 
/*
@@ -1067,19 +1061,8 @@ static int want_object_in_pack(const struct object_id 
*oid,
 
want = 1;
 done:
-   if (want && *found_pack && !(*found_pack)->index) {
-   struct packed_g

Re: [PATCH/RFC v2 0/9] Reduce pack-objects memory footprint

2018-03-05 Thread Duy Nguyen
On Sat, Mar 03, 2018 at 09:46:57AM +0700, Nguyễn Thái Ngọc Duy wrote:
> The array of object_entry in pack-objects can take a lot of memory
> when pack-objects is run in "pack everything" mode. On linux-2.6.git,
> this array alone takes roughly 800MB.
> 
> This series reorders some fields and reduces field size... to keep
> this struct smaller. Its size goes from 136 bytes to 96 bytes (29%) on
> 64-bit linux and saves 260MB on linux-2.6.git.

And I continue to push this until someone screams "enough is enough!".
This patch saves 4 more bytes. The trade off is, processing objects
with offset beyond 4 GB will be slower. But I think this is a reasonable
trade off.

The same trick could be done for "size" field in this struct
(i.e. uncompressed object size greater than 32 bits must be read back
from disk). Interestingly though, "size" is unsigned long which is 32
bits on Windows and nobody has complained about it so far, we could
even just unconditionally reject objects larger than 4GB.

-- 8< --
Subject: [PATCH] pack-objects: shrink in_pack_offset for 4GB pack files

If a pack file is smaller than 4GB, pack offsets should fit within 32
bits. If not (which is not considered a common case), this field
in_pack_location stores the object index instead (which still fits in 32
bits) and getting pack offset requires extra lookups through
nth_packed_object_offset() function call.

This saves us 4 bytes but lose it to padding until this struct is shrunk
further.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/pack-objects.c | 26 ++---
 pack-objects.h | 44 --
 2 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 55f19a1f18..57c04b277b 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -43,6 +43,8 @@
 #define SET_DELTA(obj, val) (obj)->delta_idx = ((val) - to_pack.objects) + 1
 #define SET_DELTA_CHILD(obj, val) (obj)->delta_child_idx = ((val) - 
to_pack.objects) + 1
 
+#define IN_PACK_OFFSET(obj) oe_in_pack_offset(&to_pack, obj)
+
 static const char *pack_usage[] = {
N_("git pack-objects --stdout [...] [<  | < 
]"),
N_("git pack-objects [...]  [<  | < 
]"),
@@ -397,7 +399,7 @@ static off_t write_reuse_object(struct hashfile *f, struct 
object_entry *entry,
hdrlen = encode_in_pack_object_header(header, sizeof(header),
  type, entry->size);
 
-   offset = entry->in_pack_offset;
+   offset = IN_PACK_OFFSET(entry);
revidx = find_pack_revindex(p, offset);
datalen = revidx[1].offset - offset;
if (!pack_to_stdout && p->index_version > 1 &&
@@ -1107,7 +1109,7 @@ static void create_object_entry(const struct object_id 
*oid,
if (found_pack->index <= 0)
die("BUG: found_pack should be NULL instead of having 
non-positive index");
entry->in_pack_idx = found_pack->index;
-   entry->in_pack_offset = found_offset;
+   oe_set_in_pack_offset(&to_pack, entry, found_offset);
}
 
entry->no_try_delta = no_try_delta;
@@ -1442,7 +1444,7 @@ static void check_object(struct object_entry *entry)
unsigned char *buf, c;
enum object_type type;
 
-   buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
+   buf = use_pack(p, &w_curs, IN_PACK_OFFSET(entry), &avail);
 
/*
 * We want in_pack_type even if we do not reuse delta
@@ -1475,12 +1477,12 @@ static void check_object(struct object_entry *entry)
case OBJ_REF_DELTA:
if (reuse_delta && !entry->preferred_base)
base_ref = use_pack(p, &w_curs,
-   entry->in_pack_offset + used, 
NULL);
+   IN_PACK_OFFSET(entry) + used, 
NULL);
entry->in_pack_header_size = used + 20;
break;
case OBJ_OFS_DELTA:
buf = use_pack(p, &w_curs,
-  entry->in_pack_offset + used, NULL);
+  IN_PACK_OFFSET(entry) + used, NULL);
used_0 = 0;
c = buf[used_0++];
ofs = c & 127;
@@ -1494,8 +1496,8 @@ static void check_object(struct object_entry *entry)
c = buf[used_0++];
ofs = (ofs << 7) + (c & 127);
}
-   ofs = entry->in_pack_offset - ofs;
-   if (ofs <= 0 || ofs >= entry->in_pack_offset) {
+   ofs = IN_PACK_OFFSET(entry) - ofs;
+   if (ofs <= 0 || ofs >= IN_PACK_OFFSET(entry)) {
error("delta base offset out of bound for %s",

[PATCH/RFC v2 0/9] Reduce pack-objects memory footprint

2018-03-02 Thread Nguyễn Thái Ngọc Duy
The array of object_entry in pack-objects can take a lot of memory
when pack-objects is run in "pack everything" mode. On linux-2.6.git,
this array alone takes roughly 800MB.

This series reorders some fields and reduces field size... to keep
this struct smaller. Its size goes from 136 bytes to 96 bytes (29%) on
64-bit linux and saves 260MB on linux-2.6.git.

v2 only differs in limits, documentation and a new escape hatch for
the pack file limit.

Now the bad side:

- the number of pack files pack-objects can handle is reduced to 16k
  (previously unlimited, v1 has it at 4k)
- max delta chain is also limited to 4096 (previously practically
  unlimited), same as v1
- some patches are quite invasive (e.g. replacing pointer with
  uint32_t) and reduces readability a bit.
- it may be tricker to add more data in object_entry in the future.

In v1, if the total pack count is above the 4k limit, pack-objects
dies. v2 is a bit smarter and only count packs that do not have the
companion .keep files. This allows users with 16k+ pack files to
continue to use pack-objects by first adding .keep to reduce pack
count, repack, remove (some) .keep files, repack again...

While this process could be automated at least by 'git repack', given
the unbelievably high limit 16k, I don't think it's worth doing it.

Interdiff

diff --git a/Documentation/config.txt b/Documentation/config.txt
index f57e9cf10c..9bd3f5a789 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2412,6 +2412,7 @@ pack.window::
 pack.depth::
The maximum delta depth used by linkgit:git-pack-objects[1] when no
maximum depth is given on the command line. Defaults to 50.
+   Maximum value is 4095.
 
 pack.windowMemory::
The maximum size of memory that is consumed by each thread
diff --git a/Documentation/git-pack-objects.txt 
b/Documentation/git-pack-objects.txt
index 81bc490ac5..b8d936ccf5 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -96,7 +96,9 @@ base-name::
it too deep affects the performance on the unpacker
side, because delta data needs to be applied that many
times to get to the necessary object.
-   The default value for --window is 10 and --depth is 50.
++
+The default value for --window is 10 and --depth is 50. The maximum
+depth is 4095.
 
 --window-memory=::
This option provides an additional limit on top of `--window`;
@@ -267,6 +269,15 @@ Unexpected missing object will raise an error.
locally created objects [without .promisor] and objects from the
promisor remote [with .promisor].)  This is used with partial clone.
 
+LIMITATIONS
+---
+
+This command could only handle 16384 existing pack files at a time.
+If you have more than this, you need to exclude some pack files with
+".keep" file and --honor-pack-keep option, to combine 16k pack files
+in one, then remove these .keep files and run pack-objects one more
+time.
+
 SEE ALSO
 
 linkgit:git-rev-list[1]
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index ae750e9e11..25c83c4927 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -90,7 +90,9 @@ other objects in that pack they already have locally.
space. `--depth` limits the maximum delta depth; making it too deep
affects the performance on the unpacker side, because delta data needs
to be applied that many times to get to the necessary object.
-   The default value for --window is 10 and --depth is 50.
++
+The default value for --window is 10 and --depth is 50. The maximum
+depth is 4095.
 
 --threads=::
This option is passed through to `git pack-objects`.
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 45076f2523..55f19a1f18 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1038,7 +1038,7 @@ static int want_object_in_pack(const struct object_id 
*oid,
if (*found_pack) {
want = want_found_object(exclude, *found_pack);
if (want != -1)
-   return want;
+   goto done;
}
 
list_for_each(pos, &packed_git_mru) {
@@ -1061,11 +1061,27 @@ static int want_object_in_pack(const struct object_id 
*oid,
if (!exclude && want > 0)
list_move(&p->mru, &packed_git_mru);
if (want != -1)
-   return want;
+   goto done;
}
}
 
-   return 1;
+   want = 1;
+done:
+   if (want && *found_pack && !(*found_pack)->index) {
+   struct packed_git *p = *found_pack;
+
+   if (to_pack.in_pack_count >= (1 << OE_IN_PACK_BITS))
+   die(_("too many packs to handle in one go. "
+ "Please add .keep files to exclude\n"
+ "some pack files an

Re: Reduce pack-objects memory footprint?

2018-03-02 Thread Duy Nguyen
On Fri, Mar 2, 2018 at 5:54 PM, Jeff King  wrote:
> On Fri, Mar 02, 2018 at 05:18:45PM +0700, Duy Nguyen wrote:
>
>> On Wed, Feb 28, 2018 at 4:27 PM, Duy Nguyen  wrote:
>> > linux-2.6.git current has 6483999 objects. "git gc" on my poor laptop
>> > consumes 1.7G out of 4G RAM, pushing lots of data to swap and making
>> > all apps nearly unusuable (granted the problem is partly Linux I/O
>> > scheduler too). So I wonder if we can reduce pack-objects memory
>> > footprint a bit.
>>
>> Next low hanging fruit item:
>>
>> struct revindex_entry {
>> off_t offset;
>> unsigned int nr;
>> };
>>
>> We need on entry per object, so 6.5M objects * 16 bytes = 104 MB. If
>> we break this struct apart and store two arrays of offset and nr in
>> struct packed_git, we save 4 bytes per struct, 26 MB total.
>>
>> It's getting low but every megabyte counts for me, and it does not
>> look like breaking this struct will make horrible code (we recreate
>> the struct at find_pack_revindex()) so I'm going to do this too unless
>> someone objects. There will be slight performance regression due to
>> cache effects, but hopefully it's ok.
>
> Maybe you will prove me wrong, but I don't think splitting them is going
> to work. The point of the revindex_entry is that we sort the (offset,nr)
> tuple as a unit.
>
> Or are you planning to sort it, and then copy the result into two
> separate arrays? I think that would work, but it sounds kind of nasty
> (arcane code, and extra CPU work for systems that don't care about the
> 26MB).


How about two level lookups? We have

struct revindex_entry_l2 {
uint32_t offset; /* the lowest 32 bits */
uint32_t nr;
};

struct revindex {
struct revindex_entry *level1[256]; /* 8 high bits */
};

This limits us to 1024GB pack files, which should give us some time
before we have to worry about it again and most of the time we'll need
just one or two items in level1[] so cache effects are not that bad.
Preparing/Sorting this could be a problem though.
-- 
Duy


Re: [PATCH 00/11] Reduce pack-objects memory footprint

2018-03-02 Thread Jeff King
On Fri, Mar 02, 2018 at 07:14:01AM +0700, Duy Nguyen wrote:

> > We have a big repo, and this gets repacked on 6-8GB of memory on dev
> > KVMs, so we're under a fair bit of memory pressure. git-gc slows things
> > down a lot.
> >
> > It would be really nice to have something that made it use drastically
> > less memory at the cost of less efficient packs. Is the property that
> 
> Ahh.. less efficient. You may be more interested in [1] then. It
> avoids rewriting the base pack. Without the base pack, book keeping
> becomes much much cheaper.
> 
> We still read every single byte in all packs though (I think, unless
> you use pack-bitmap) and this amount of I/O affect the rest of the
> system too. Perhaps reducing core.packedgitwindowsize might make it
> friendlier to the OS, I don't know.

Yes, the ".keep" thing is actually quite expensive. We still do a
complete rev-list to find all the objects we want, and then for each
object say "is this in a pack with .keep?". And worse, the mru doesn't
help there because even if we find it in the first pack, we have to keep
looking to see if it's _another_ pack.

There are probably some low-hanging optimizations there (e.g., only
looking in the .keep packs if that's all we're looking for; we may even
do that already).

But I think fundamentally you'd do much better to generate the partial
list of objects outside of pack-objects entirely, and then just feed it
to pack-objects without using "--revs".

-Peff


Re: Reduce pack-objects memory footprint?

2018-03-02 Thread Duy Nguyen
On Fri, Mar 2, 2018 at 5:54 PM, Jeff King  wrote:
> On Fri, Mar 02, 2018 at 05:18:45PM +0700, Duy Nguyen wrote:
>
>> On Wed, Feb 28, 2018 at 4:27 PM, Duy Nguyen  wrote:
>> > linux-2.6.git current has 6483999 objects. "git gc" on my poor laptop
>> > consumes 1.7G out of 4G RAM, pushing lots of data to swap and making
>> > all apps nearly unusuable (granted the problem is partly Linux I/O
>> > scheduler too). So I wonder if we can reduce pack-objects memory
>> > footprint a bit.
>>
>> Next low hanging fruit item:
>>
>> struct revindex_entry {
>> off_t offset;
>> unsigned int nr;
>> };
>>
>> We need on entry per object, so 6.5M objects * 16 bytes = 104 MB. If
>> we break this struct apart and store two arrays of offset and nr in
>> struct packed_git, we save 4 bytes per struct, 26 MB total.
>>
>> It's getting low but every megabyte counts for me, and it does not
>> look like breaking this struct will make horrible code (we recreate
>> the struct at find_pack_revindex()) so I'm going to do this too unless
>> someone objects. There will be slight performance regression due to
>> cache effects, but hopefully it's ok.
>
> Maybe you will prove me wrong, but I don't think splitting them is going
> to work. The point of the revindex_entry is that we sort the (offset,nr)
> tuple as a unit.
>
> Or are you planning to sort it, and then copy the result into two
> separate arrays?

Yep.

> I think that would work, but it sounds kind of nasty

Yeah :(

> (arcane code, and extra CPU work for systems that don't care about the
> 26MB).
-- 
Duy


Re: Reduce pack-objects memory footprint?

2018-03-02 Thread Jeff King
On Fri, Mar 02, 2018 at 05:18:45PM +0700, Duy Nguyen wrote:

> On Wed, Feb 28, 2018 at 4:27 PM, Duy Nguyen  wrote:
> > linux-2.6.git current has 6483999 objects. "git gc" on my poor laptop
> > consumes 1.7G out of 4G RAM, pushing lots of data to swap and making
> > all apps nearly unusuable (granted the problem is partly Linux I/O
> > scheduler too). So I wonder if we can reduce pack-objects memory
> > footprint a bit.
> 
> Next low hanging fruit item:
> 
> struct revindex_entry {
> off_t offset;
> unsigned int nr;
> };
> 
> We need on entry per object, so 6.5M objects * 16 bytes = 104 MB. If
> we break this struct apart and store two arrays of offset and nr in
> struct packed_git, we save 4 bytes per struct, 26 MB total.
> 
> It's getting low but every megabyte counts for me, and it does not
> look like breaking this struct will make horrible code (we recreate
> the struct at find_pack_revindex()) so I'm going to do this too unless
> someone objects. There will be slight performance regression due to
> cache effects, but hopefully it's ok.

Maybe you will prove me wrong, but I don't think splitting them is going
to work. The point of the revindex_entry is that we sort the (offset,nr)
tuple as a unit.

Or are you planning to sort it, and then copy the result into two
separate arrays? I think that would work, but it sounds kind of nasty
(arcane code, and extra CPU work for systems that don't care about the
26MB).

-Peff


Re: Reduce pack-objects memory footprint?

2018-03-02 Thread Eric Wong
Duy Nguyen  wrote:
> struct revindex_entry {
> off_t offset;
> unsigned int nr;
> };
> 
> We need on entry per object, so 6.5M objects * 16 bytes = 104 MB. If
> we break this struct apart and store two arrays of offset and nr in
> struct packed_git, we save 4 bytes per struct, 26 MB total.

Can the offset array can be a union which stores
int32_t/uint32_t instead of off_t for projects which never
exceed 2/4GB?

Likewise, places object_entry where "unsigned long" and off_t
are 64-bit could benefit from being 32-bit.  Testing/maintenance
overhead could be bad, for those, though.


Re: Reduce pack-objects memory footprint?

2018-03-02 Thread Duy Nguyen
On Wed, Feb 28, 2018 at 4:27 PM, Duy Nguyen  wrote:
> linux-2.6.git current has 6483999 objects. "git gc" on my poor laptop
> consumes 1.7G out of 4G RAM, pushing lots of data to swap and making
> all apps nearly unusuable (granted the problem is partly Linux I/O
> scheduler too). So I wonder if we can reduce pack-objects memory
> footprint a bit.

Next low hanging fruit item:

struct revindex_entry {
off_t offset;
unsigned int nr;
};

We need on entry per object, so 6.5M objects * 16 bytes = 104 MB. If
we break this struct apart and store two arrays of offset and nr in
struct packed_git, we save 4 bytes per struct, 26 MB total.

It's getting low but every megabyte counts for me, and it does not
look like breaking this struct will make horrible code (we recreate
the struct at find_pack_revindex()) so I'm going to do this too unless
someone objects. There will be slight performance regression due to
cache effects, but hopefully it's ok.
-- 
Duy


Re: [PATCH 00/11] Reduce pack-objects memory footprint

2018-03-01 Thread Duy Nguyen
On Thu, Mar 1, 2018 at 8:33 PM, Ævar Arnfjörð Bjarmason
 wrote:
>
> On Thu, Mar 01 2018, Nguyễn Thái Ngọc Duy jotted:
>
>> The array of object_entry in pack-objects can take a lot of memory
>> when pack-objects is run in "pack everything" mode. On linux-2.6.git,
>> this array alone takes roughly 800MB.
>>
>> This series reorders some fields and reduces field size... to keep
>> this struct smaller. Its size goes from 136 bytes to 96 bytes (29%) on
>> 64-bit linux and saves 260MB on linux-2.6.git.
>
> I'm very interested in this patch series. I don't have time to test this
> one right now (have to run), but with your previous RFC patch memory use
> (in the ~4GB range) on a big in-house repo went down by a bit over 3%,
> and it's ~5% faster.
>
> Before/after RSS 4440812 / 429 & runtime 172.73 / 162.45. This is
> after having already done a full git gc before, data via /usr/bin/time
> -v.

Jeff correctly pointed out elsewhere in this thread that RSS covers
both heap (this is what I try to reduce) and some file cache (we mmap
the whole pack file just to ease the reading) so RSS might not a good
indicator of memory reduction. Any new freed memory should be used for
cache which raises RSS back up. I think the RssAnon field in
/proc//status shows it better.

> So not huge, but respectable.
>
> We have a big repo, and this gets repacked on 6-8GB of memory on dev
> KVMs, so we're under a fair bit of memory pressure. git-gc slows things
> down a lot.
>
> It would be really nice to have something that made it use drastically
> less memory at the cost of less efficient packs. Is the property that

Ahh.. less efficient. You may be more interested in [1] then. It
avoids rewriting the base pack. Without the base pack, book keeping
becomes much much cheaper.

We still read every single byte in all packs though (I think, unless
you use pack-bitmap) and this amount of I/O affect the rest of the
system too. Perhaps reducing core.packedgitwindowsize might make it
friendlier to the OS, I don't know.

> you need to spend give or take the size of .git/objects in memory
> something inherent, or just a limitation of the current implementation?
> I.e. could we do a first pass to pick some objects based on some
> heuristic, then repack them N at a time, and finally delete the
> now-obsolete packs?
>
> Another thing I've dealt with is that on these machines their
> NFS-mounted storage gets exhausted (I'm told) due to some pathological
> operations git does during repack, I/O tends to get 5-6x slower. Of
> course ionice doesn't help because the local kernel doesn't know
> anything about how harmful it is.

[1] https://public-inbox.org/git/20180301092046.2769-1-pclo...@gmail.com/T/#u
-- 
Duy


Re: [PATCH 00/11] Reduce pack-objects memory footprint

2018-03-01 Thread Ævar Arnfjörð Bjarmason

On Thu, Mar 01 2018, Nguyễn Thái Ngọc Duy jotted:

> The array of object_entry in pack-objects can take a lot of memory
> when pack-objects is run in "pack everything" mode. On linux-2.6.git,
> this array alone takes roughly 800MB.
>
> This series reorders some fields and reduces field size... to keep
> this struct smaller. Its size goes from 136 bytes to 96 bytes (29%) on
> 64-bit linux and saves 260MB on linux-2.6.git.

I'm very interested in this patch series. I don't have time to test this
one right now (have to run), but with your previous RFC patch memory use
(in the ~4GB range) on a big in-house repo went down by a bit over 3%,
and it's ~5% faster.

Before/after RSS 4440812 / 429 & runtime 172.73 / 162.45. This is
after having already done a full git gc before, data via /usr/bin/time
-v.

So not huge, but respectable.

We have a big repo, and this gets repacked on 6-8GB of memory on dev
KVMs, so we're under a fair bit of memory pressure. git-gc slows things
down a lot.

It would be really nice to have something that made it use drastically
less memory at the cost of less efficient packs. Is the property that
you need to spend give or take the size of .git/objects in memory
something inherent, or just a limitation of the current implementation?
I.e. could we do a first pass to pick some objects based on some
heuristic, then repack them N at a time, and finally delete the
now-obsolete packs?

Another thing I've dealt with is that on these machines their
NFS-mounted storage gets exhausted (I'm told) due to some pathological
operations git does during repack, I/O tends to get 5-6x slower. Of
course ionice doesn't help because the local kernel doesn't know
anything about how harmful it is.


[PATCH 00/11] Reduce pack-objects memory footprint

2018-03-01 Thread Nguyễn Thái Ngọc Duy
The array of object_entry in pack-objects can take a lot of memory
when pack-objects is run in "pack everything" mode. On linux-2.6.git,
this array alone takes roughly 800MB.

This series reorders some fields and reduces field size... to keep
this struct smaller. Its size goes from 136 bytes to 96 bytes (29%) on
64-bit linux and saves 260MB on linux-2.6.git.

Now the bad side:

- the number of pack files pack-objects can handle is reduced to 4096
  (previously unlimited)
- max delta chain is also limited to 4096 (previously practically
  unlimited)
- some patches are quite invasive (e.g. replacing pointer with
  uint32_t) and reduces readability a bit.
- it may be tricker to add more data in object_entry in the future.

Nguyễn Thái Ngọc Duy (11):
  pack-objects: document holes in struct object_entry.h
  pack-objects: turn type and in_pack_type to bitfields
  pack-objects: use bitfield for object_entry::dfs_state
  pack-objects: use bitfield for object_entry::depth
  pack-objects: note about in_pack_header_size
  pack-objects: move in_pack_pos out of struct object_entry
  pack-objects: move in_pack out of struct object_entry
  pack-objects: faster reverse packed_git lookup
  pack-objects: refer to delta objects by index instead of pointer
  pack-objects: reorder 'hash' to pack struct object_entry
  pack-objects: increase pack file limit to 4096

 builtin/pack-objects.c | 189 ++---
 cache.h|   3 +
 object.h   |   1 -
 pack-bitmap-write.c|   8 +-
 pack-bitmap.c  |   2 +-
 pack-bitmap.h  |   4 +-
 pack-objects.h |  70 ++-
 7 files changed, 180 insertions(+), 97 deletions(-)

-- 
2.16.1.435.g8f24da2e1a



Re: Reduce pack-objects memory footprint?

2018-03-01 Thread Duy Nguyen
On Wed, Feb 28, 2018 at 06:22:33PM +, Eric Wong wrote:
> Duy Nguyen  wrote:
> > which saves 12 bytes (or another 74 MB). 222 MB total is plenty of
> > space to keep some file cache from being evicted.
> 
> Nice!  I can definitely benefit from lower memory usage when
> packing.  Fwiw, I use pahole with other projects to help find
> packing opportunities:
> 
>   git://git.kernel.org/pub/scm/devel/pahole/pahole.git

Yes it's a wonderful tool.

> > @@ -14,11 +26,10 @@ struct object_entry {
> > void *delta_data;   /* cached delta (uncompressed) */
> > unsigned long delta_size;   /* delta data size (uncompressed) */
> > unsigned long z_delta_size; /* delta data size (compressed) */
> > -   enum object_type type;
> > -   enum object_type in_pack_type;  /* could be delta */
> > uint32_t hash;  /* name hint hash */
> > -   unsigned int in_pack_pos;
> > unsigned char in_pack_header_size;
> > +   unsigned type:3; /* enum object_type */
> > +   unsigned in_pack_type:3; /* enum object_type - could be delta */
> 
> For C99 compilers, enums can be bitfields.  I introduced the
> following macro into Ruby a few weeks ago to remain compatible
> with non-C99 compilers:
> 
> /*
>  * For declaring bitfields out of non-unsigned int types:
>  *   struct date {
>  *  BITFIELD(enum months) month:4;
>  *  ...
>  *   };
>  */
> #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
> # define BITFIELD(type) type
> #else
> # define BITFIELD(type) unsigned int
> #endif

I tried this and got

In file included from builtin/pack-objects.c:20:0:
./pack-objects.h:49:19: l?i: ?type? is narrower than values of its type 
[-Werror]
  enum object_type type:TYPE_BITS;
   ^~~~

The compiler is not wrong. What it does not realize is pack-objects
code never uses out-of-range values (OBJ_BAD and OBJ_ANY) but I don't
see how I could suppress this warning. So I went back to non-enum
bitfields.

--
Duy


Re: Reduce pack-objects memory footprint?

2018-02-28 Thread Eric Wong
Duy Nguyen  wrote:
> which saves 12 bytes (or another 74 MB). 222 MB total is plenty of
> space to keep some file cache from being evicted.

Nice!  I can definitely benefit from lower memory usage when
packing.  Fwiw, I use pahole with other projects to help find
packing opportunities:

git://git.kernel.org/pub/scm/devel/pahole/pahole.git

> @@ -14,11 +26,10 @@ struct object_entry {
>   void *delta_data;   /* cached delta (uncompressed) */
>   unsigned long delta_size;   /* delta data size (uncompressed) */
>   unsigned long z_delta_size; /* delta data size (compressed) */
> - enum object_type type;
> - enum object_type in_pack_type;  /* could be delta */
>   uint32_t hash;  /* name hint hash */
> - unsigned int in_pack_pos;
>   unsigned char in_pack_header_size;
> + unsigned type:3; /* enum object_type */
> + unsigned in_pack_type:3; /* enum object_type - could be delta */

For C99 compilers, enums can be bitfields.  I introduced the
following macro into Ruby a few weeks ago to remain compatible
with non-C99 compilers:

/*
 * For declaring bitfields out of non-unsigned int types:
 *   struct date {
 *  BITFIELD(enum months) month:4;
 *  ...
 *   };
 */
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
# define BITFIELD(type) type
#else
# define BITFIELD(type) unsigned int
#endif


Re: Reduce pack-objects memory footprint?

2018-02-28 Thread Duy Nguyen
On Wed, Feb 28, 2018 at 6:11 PM, Jeff King  wrote:
>> > The torvalds/linux fork network has ~23 million objects,
>> > so it's probably 7-8 GB of book-keeping. Which is gross, but 64GB in a
>> > server isn't uncommon these days.
>>
>> I wonder if we could just do book keeping for some but not all objects
>> because all objects simply do not scale. Say we have a big pack of
>> many GBs, could we keep the 80% of its bottom untouched, register the
>> top 20% (mostly non-blobs, and some more blobs as delta base) for
>> repack? We copy the bottom part to the new pack byte-by-byte, then
>> pack-objects rebuilds the top part with objects from other sources.
>
> Yes, though I think it would take a fair bit of surgery to do
> internally. And some features (like bitmap generation) just wouldn't
> work at all.
>
> I suspect you could simulate it, though, by just packing your subset
> with pack-objects (feeding it directly without using "--revs") and then
> catting the resulting packfiles together with a fixed-up header.
>
> At one point I played with a "fast pack" that would just cat packfiles
> together. My goal was to make cases with 10,000 packs workable by
> creating one lousy pack, and then repacking that lousy pack with a
> "real" repack. In the end I abandoned it in favor of fixing the
> performance problems from trying to make a real pack of 10,000 packs. :)
>
> But I might be able to dig it up if you want to experiment in that
> direction.

Naah it's ok. I'll go similar direction, but I'd repack those pack
files too except the big one. Let's see how it turns out.

>> They are 32 bytes per entry, so it should take less than object_entry.
>> I briefly wondered if we should fall back to external rev-list too,
>> just to free that memory.
>>
>> So about 200 MB for those objects (or maybe more for commits). Add 256
>> MB delta cache on top, it's still a bit far from 1.7G. There's
>> something I'm still missing.
>
> Are you looking at RSS or heap? Keep in mind that you're mmap-ing what's
> probably a 1GB packfile on disk. If you're under memory pressure that
> won't all stay resident, but some of it will be counted in RSS.

Interesting. It was RSS.

>> Pity we can't do the same for 'struct object'. Most of the time we
>> have a giant .idx file with most hashes. We could look up in both
>> places: the hash table in object.c, and the idx file, to find an
>> object. Then those objects that are associated with .idx file will not
>> need "oid" field (needed to as key for the hash table). But I see no
>> way to make that change.
>
> Yeah, that would be pretty invasive, I think. I also wonder if it would
> perform worse due to cache effects.

It should be better because of cache effects, I think. I mean, hash
map is the least cache friendly lookup. Moving most objects out of the
hash table shrinks it, which is even nicer to cache. But we also lose
O(1) when we do binary search on .idx file (after failing to find the
same object in the hash table)
-- 
Duy


Re: Reduce pack-objects memory footprint?

2018-02-28 Thread Jeff King
On Wed, Feb 28, 2018 at 05:58:50PM +0700, Duy Nguyen wrote:

> > Yeah, the per object memory footprint is not great. Around 100 million
> > objects it becomes pretty ridiculous. I started to dig into it a year or
> > three ago when I saw such a case, but it turned out to be something that
> > we could prune.
> 
> We could? What could we prune?

Sorry, I just meant that my 100 million-object case turned out not to
need all those objects, and I was able to prune it down. No code fixes
came out of it. ;)

> > The torvalds/linux fork network has ~23 million objects,
> > so it's probably 7-8 GB of book-keeping. Which is gross, but 64GB in a
> > server isn't uncommon these days.
> 
> I wonder if we could just do book keeping for some but not all objects
> because all objects simply do not scale. Say we have a big pack of
> many GBs, could we keep the 80% of its bottom untouched, register the
> top 20% (mostly non-blobs, and some more blobs as delta base) for
> repack? We copy the bottom part to the new pack byte-by-byte, then
> pack-objects rebuilds the top part with objects from other sources.

Yes, though I think it would take a fair bit of surgery to do
internally. And some features (like bitmap generation) just wouldn't
work at all.

I suspect you could simulate it, though, by just packing your subset
with pack-objects (feeding it directly without using "--revs") and then
catting the resulting packfiles together with a fixed-up header.

At one point I played with a "fast pack" that would just cat packfiles
together. My goal was to make cases with 10,000 packs workable by
creating one lousy pack, and then repacking that lousy pack with a
"real" repack. In the end I abandoned it in favor of fixing the
performance problems from trying to make a real pack of 10,000 packs. :)

But I might be able to dig it up if you want to experiment in that
direction.

> They are 32 bytes per entry, so it should take less than object_entry.
> I briefly wondered if we should fall back to external rev-list too,
> just to free that memory.
> 
> So about 200 MB for those objects (or maybe more for commits). Add 256
> MB delta cache on top, it's still a bit far from 1.7G. There's
> something I'm still missing.

Are you looking at RSS or heap? Keep in mind that you're mmap-ing what's
probably a 1GB packfile on disk. If you're under memory pressure that
won't all stay resident, but some of it will be counted in RSS.

> Pity we can't do the same for 'struct object'. Most of the time we
> have a giant .idx file with most hashes. We could look up in both
> places: the hash table in object.c, and the idx file, to find an
> object. Then those objects that are associated with .idx file will not
> need "oid" field (needed to as key for the hash table). But I see no
> way to make that change.

Yeah, that would be pretty invasive, I think. I also wonder if it would
perform worse due to cache effects.

-Peff


Re: Reduce pack-objects memory footprint?

2018-02-28 Thread Duy Nguyen
On Wed, Feb 28, 2018 at 5:17 PM, Jeff King  wrote:
> On Wed, Feb 28, 2018 at 04:27:22PM +0700, Duy Nguyen wrote:
>
>> linux-2.6.git current has 6483999 objects. "git gc" on my poor laptop
>> consumes 1.7G out of 4G RAM, pushing lots of data to swap and making
>> all apps nearly unusuable (granted the problem is partly Linux I/O
>> scheduler too). So I wonder if we can reduce pack-objects memory
>> footprint a bit.
>
> Yeah, the per object memory footprint is not great. Around 100 million
> objects it becomes pretty ridiculous. I started to dig into it a year or
> three ago when I saw such a case, but it turned out to be something that
> we could prune.

We could? What could we prune?

> The torvalds/linux fork network has ~23 million objects,
> so it's probably 7-8 GB of book-keeping. Which is gross, but 64GB in a
> server isn't uncommon these days.

I wonder if we could just do book keeping for some but not all objects
because all objects simply do not scale. Say we have a big pack of
many GBs, could we keep the 80% of its bottom untouched, register the
top 20% (mostly non-blobs, and some more blobs as delta base) for
repack? We copy the bottom part to the new pack byte-by-byte, then
pack-objects rebuilds the top part with objects from other sources.

That would of course be less optimal because we can't make delta
against those objects at the bottom. And this makes the assumption
that these packs are generated using the same heuristics that we use.

> I think laptops repacking the kernel are probably one of the worst cases
> (leaving aside the awful Windows repository, but my impression is that
> they simply can't do a full repack at all there).

Yeah. My fear is "git gc --auto" kicking in. Even on a laptop we
should support working on a repo as big as linux-2.6 (or as smalls as
one from facebook/microsoft perspective). I probably will add a mode
that keeps the largest pack alone and just pack the rest in a second
pack in "git gc --auto". That might help (I haven't really checked the
details yet, but object_entry book keeping should go down at least)

>> This demonstration patch (probably breaks some tests) would reduce the
>> size of struct object_entry from from 136 down to 112 bytes on
>> x86-64. There are 6483999 of these objects, so the saving is 17% or
>> 148 MB.
>
> 136 x 6.5M objects is only about 800MB. I suspect a big chunk of the
> rest is going to the object structs we create when doing the internal
> rev-list traversal. And those duplicate the 20-byte sha1s at the very
> least.

They are 32 bytes per entry, so it should take less than object_entry.
I briefly wondered if we should fall back to external rev-list too,
just to free that memory.

So about 200 MB for those objects (or maybe more for commits). Add 256
MB delta cache on top, it's still a bit far from 1.7G. There's
something I'm still missing.

> Another option would be to somehow replace the pack_idx_entry with a
> reference to a "struct object". That would let us at least avoid storing
> the 20-byte oid twice.

20 bytes saving? /me drools.

Pity we can't do the same for 'struct object'. Most of the time we
have a giant .idx file with most hashes. We could look up in both
places: the hash table in object.c, and the idx file, to find an
object. Then those objects that are associated with .idx file will not
need "oid" field (needed to as key for the hash table). But I see no
way to make that change.

>> If we go further, notice that nr_objects is uint32_t, we could convert
>> the three pointers
>>
>>   struct object_entry *delta;
>>   struct object_entry *delta_child;
>>   struct object_entry *delta_sibling;
>>
>> to
>>
>>   uint32_t delta;
>>   uint32_t delta_child;
>>   uint32_t delta_sibling;
>>
>> which saves 12 bytes (or another 74 MB). 222 MB total is plenty of
>> space to keep some file cache from being evicted.
>
> Yeah, that seems like low-hanging fruit. I'd also note that we don't
> actually use all of the fields during the whole process. I think some of
> those delta fields are only used for a short time. So we might be able
> to reduce peak memory if there are some mutually exclusive bits of each
> entry (and even if there's some overlap, we'd reduce the length of time
> we'd need to be at peak).

Yeah it looks that way to me too. Now that we have packing_data,
splitting object_entry[] to multiple arrays (that is only needed at
some point) may be very nice.

>> Is it worth doing this? The struct packing makes it harder to read
>> (and more fragile too). I added some more artifical limit like max
>> depth of 2^11. But I think 409

Re: Reduce pack-objects memory footprint?

2018-02-28 Thread Jeff King
On Wed, Feb 28, 2018 at 04:27:22PM +0700, Duy Nguyen wrote:

> linux-2.6.git current has 6483999 objects. "git gc" on my poor laptop
> consumes 1.7G out of 4G RAM, pushing lots of data to swap and making
> all apps nearly unusuable (granted the problem is partly Linux I/O
> scheduler too). So I wonder if we can reduce pack-objects memory
> footprint a bit.

Yeah, the per object memory footprint is not great. Around 100 million
objects it becomes pretty ridiculous. I started to dig into it a year or
three ago when I saw such a case, but it turned out to be something that
we could prune. The torvalds/linux fork network has ~23 million objects,
so it's probably 7-8 GB of book-keeping. Which is gross, but 64GB in a
server isn't uncommon these days.

I think laptops repacking the kernel are probably one of the worst cases
(leaving aside the awful Windows repository, but my impression is that
they simply can't do a full repack at all there).

> This demonstration patch (probably breaks some tests) would reduce the
> size of struct object_entry from from 136 down to 112 bytes on
> x86-64. There are 6483999 of these objects, so the saving is 17% or
> 148 MB.

136 x 6.5M objects is only about 800MB. I suspect a big chunk of the
rest is going to the object structs we create when doing the internal
rev-list traversal. And those duplicate the 20-byte sha1s at the very
least.

I don't know if it would be a good idea to free them after the
traversal, though. We do use them again later in the bitmap case. On the
other hand, we could probably do so for the non-bitmap case. And even
for the bitmap case, the primary value in keeping them around is that
the parent pointers will already be cached. So it might make sense to
free the blobs and trees (though it might be tricky; the commits have
pointers to the trees).

It also doesn't help with peak memory usage, because you'll have the
full to_pack list and all of the "struct object" in memory together at
one point.

Another option would be to somehow replace the pack_idx_entry with a
reference to a "struct object". That would let us at least avoid storing
the 20-byte oid twice.

> If we go further, notice that nr_objects is uint32_t, we could convert
> the three pointers
> 
>   struct object_entry *delta;
>   struct object_entry *delta_child;
>   struct object_entry *delta_sibling;
> 
> to
> 
>   uint32_t delta;
>   uint32_t delta_child;
>   uint32_t delta_sibling;
> 
> which saves 12 bytes (or another 74 MB). 222 MB total is plenty of
> space to keep some file cache from being evicted.

Yeah, that seems like low-hanging fruit. I'd also note that we don't
actually use all of the fields during the whole process. I think some of
those delta fields are only used for a short time. So we might be able
to reduce peak memory if there are some mutually exclusive bits of each
entry (and even if there's some overlap, we'd reduce the length of time
we'd need to be at peak).

> Is it worth doing this? The struct packing makes it harder to read
> (and more fragile too). I added some more artifical limit like max
> depth of 2^11. But I think 4096+ depth is getting unreasonable.

I'm OK with a limit like 4096, as long as we notice when we hit the
limit and behave reasonably. I think the algorithm in
break_delta_chains() may temporarily store up to uint32_t depth. But I
think we won't ever write anything into cur->depth larger than the
max-depth limit. So it would probably be sufficient to just check that
the --depth argument is reasonably sized and complain otherwise.

I do agree this makes things a bit harder to read, but I think the
benefits are pretty measurable. And may make a real usability difference
on a large repository.

-Peff


Reduce pack-objects memory footprint?

2018-02-28 Thread Duy Nguyen
linux-2.6.git current has 6483999 objects. "git gc" on my poor laptop
consumes 1.7G out of 4G RAM, pushing lots of data to swap and making
all apps nearly unusuable (granted the problem is partly Linux I/O
scheduler too). So I wonder if we can reduce pack-objects memory
footprint a bit.

This demonstration patch (probably breaks some tests) would reduce the
size of struct object_entry from from 136 down to 112 bytes on
x86-64. There are 6483999 of these objects, so the saving is 17% or
148 MB.

If we go further, notice that nr_objects is uint32_t, we could convert
the three pointers

struct object_entry *delta;
struct object_entry *delta_child;
struct object_entry *delta_sibling;

to

uint32_t delta;
uint32_t delta_child;
uint32_t delta_sibling;

which saves 12 bytes (or another 74 MB). 222 MB total is plenty of
space to keep some file cache from being evicted.

Is it worth doing this? The struct packing makes it harder to read
(and more fragile too). I added some more artifical limit like max
depth of 2^11. But I think 4096+ depth is getting unreasonable.

-- 8< --
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 5c674b2843..6a9804daec 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -877,8 +877,11 @@ static void write_pack_file(void)
strbuf_addf(&tmpname, "%s-", base_name);
 
if (write_bitmap_index) {
+   ALLOC_ARRAY(to_pack.in_pack_pos, 
to_pack.nr_objects);
bitmap_writer_set_checksum(oid.hash);
-   bitmap_writer_build_type_index(written_list, 
nr_written);
+   bitmap_writer_build_type_index(written_list,
+  nr_written,
+  &to_pack);
}
 
finish_tmp_packfile(&tmpname, pack_tmp_name,
@@ -1407,6 +1410,7 @@ static void check_object(struct object_entry *entry)
unsigned long avail;
off_t ofs;
unsigned char *buf, c;
+   enum object_type type;
 
buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
 
@@ -1415,8 +1419,9 @@ static void check_object(struct object_entry *entry)
 * since non-delta representations could still be reused.
 */
used = unpack_object_header_buffer(buf, avail,
-  &entry->in_pack_type,
+  &type,
   &entry->size);
+   entry->in_pack_type = type;
if (used == 0)
goto give_up;
 
@@ -1559,6 +1564,7 @@ static void drop_reused_delta(struct object_entry *entry)
 {
struct object_entry **p = &entry->delta->delta_child;
struct object_info oi = OBJECT_INFO_INIT;
+   enum object_type type;
 
while (*p) {
if (*p == entry)
@@ -1570,7 +1576,7 @@ static void drop_reused_delta(struct object_entry *entry)
entry->depth = 0;
 
oi.sizep = &entry->size;
-   oi.typep = &entry->type;
+   oi.typep = &type;
if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) 
{
/*
 * We failed to get the info from this pack for some reason;
@@ -1580,7 +1586,8 @@ static void drop_reused_delta(struct object_entry *entry)
 */
entry->type = sha1_object_info(entry->idx.oid.hash,
   &entry->size);
-   }
+   } else
+   entry->type = type;
 }
 
 /*
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index e01f992884..5c9957a095 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -49,7 +49,8 @@ void bitmap_writer_show_progress(int show)
  * Build the initial type index for the packfile
  */
 void bitmap_writer_build_type_index(struct pack_idx_entry **index,
-   uint32_t index_nr)
+   uint32_t index_nr,
+   struct packing_data *to_pack)
 {
uint32_t i;
 
@@ -62,7 +63,7 @@ void bitmap_writer_build_type_index(struct pack_idx_entry 
**index,
struct object_entry *entry = (struct object_entry *)index[i];
enum object_type real_type;
 
-   entry->in_pack_pos = i;
+   to_pack->in_pack_pos[entry - to_pack->objects] = i;
 
switch (entry->type) {
case OBJ_COMMIT:
@@ -147,7 +148,7 @@ static uint32_t find_object_pos(const unsigned char *sha1)
&q