Re: [PATCH 2/2] index-pack: kill union delta_base to save memory

2015-07-05 Thread Junio C Hamano
Duy Nguyen pclo...@gmail.com writes:

 @@ -1282,28 +1344,25 @@ static void fix_unresolved_deltas(struct sha1file 
 *f, int nr_unresolved)
* resolving deltas in the same order as their position in the pack.
*/
   sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
 - for (i = 0; i  nr_deltas; i++) {
 - if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
 - continue;
 - sorted_by_pos[n++] = deltas[i];
 - }
 + for (i = 0; i  nr_ref_deltas; i++)
 + sorted_by_pos[n++] = ref_deltas[i];
   qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);

 You allocate an array of nr_unresolved (which is the sum of
 nr_ref_deltas and nr_ofs_deltas in the new world order with patch)
 entries, fill only the first nr_ref_deltas entries of it, and then
 sort that first n (= nr_ref_deltas) entries.  The qsort and the
 subsequent loop only looks at the first n entries, so nothing is
 filling or reading these nr_ofs_deltas entres at the end.

 I do not see any wrong behaviour other than temporary wastage of
 nr_ofs_deltas pointers that would be caused by this, but this
 allocation is misleading.

 Also, the old code before this change had to use 'i' and 'n' because
 some of the things we see in the (old) deltas[] array we scanned
 with 'i' would not make it into the sorted_by_pos[] array in the old
 world order, but now because you have only ref delta in a separate
 ref_deltas[] array, they increment lockstep.  That also puzzled me
 while re-reading this code.

 Perhaps something like this is needed?

 Yeah I can see the confusion when reading the code without checking
 its history. You probably want to kill the argument nr_unresolved too
 because it's not needed anymore after your patch.

 So what's the bug you mentioned? All I got from the above was wastage
 and confusion, no bug.

Actually, the above is not analyzed correctly.  I thought
nr_unresolved was ref + ofs, but look at the caller in the
fix_thin_pack codepath:

int nr_unresolved = nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas;
int nr_objects_initial = nr_objects;
if (nr_unresolved = 0)
die(_(confusion beyond insanity));
REALLOC_ARRAY(objects, nr_objects + nr_unresolved + 1);
memset(objects + nr_objects + 1, 0,
   nr_unresolved * sizeof(*objects));
f = sha1fd(output_fd, curr_pack);
fix_unresolved_deltas(f, nr_unresolved);

If there were tons of nr_resolved_deltas and only small number of
nr_ofs_deltas, then the allocation in question may actually be
under-allocating.

So...
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] index-pack: kill union delta_base to save memory

2015-07-03 Thread Duy Nguyen
On Fri, Jul 3, 2015 at 11:51 PM, Junio C Hamano gits...@pobox.com wrote:
 Nguyễn Thái Ngọc Duy  pclo...@gmail.com writes:

 Once we know the number of objects in the input pack, we allocate an
 array of nr_objects of struct delta_entry. On x86-64, this struct is
 32 bytes long. The union delta_base, which is part of struct
 delta_entry, provides enough space to store either ofs-delta (8 bytes)
 or ref-delta (20 bytes).

 Sorry for responding to a patch 7000+ messages ago, but some kind
 folks at Google were puzzled by this code, and I think they found a
 small bug.

  static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
  {
 - struct delta_entry **sorted_by_pos;
 + struct ref_delta_entry **sorted_by_pos;
   int i, n = 0;

   /*
 @@ -1282,28 +1344,25 @@ static void fix_unresolved_deltas(struct sha1file 
 *f, int nr_unresolved)
* resolving deltas in the same order as their position in the pack.
*/
   sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
 - for (i = 0; i  nr_deltas; i++) {
 - if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
 - continue;
 - sorted_by_pos[n++] = deltas[i];
 - }
 + for (i = 0; i  nr_ref_deltas; i++)
 + sorted_by_pos[n++] = ref_deltas[i];
   qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);

 You allocate an array of nr_unresolved (which is the sum of
 nr_ref_deltas and nr_ofs_deltas in the new world order with patch)
 entries, fill only the first nr_ref_deltas entries of it, and then
 sort that first n (= nr_ref_deltas) entries.  The qsort and the
 subsequent loop only looks at the first n entries, so nothing is
 filling or reading these nr_ofs_deltas entres at the end.

 I do not see any wrong behaviour other than temporary wastage of
 nr_ofs_deltas pointers that would be caused by this, but this
 allocation is misleading.

 Also, the old code before this change had to use 'i' and 'n' because
 some of the things we see in the (old) deltas[] array we scanned
 with 'i' would not make it into the sorted_by_pos[] array in the old
 world order, but now because you have only ref delta in a separate
 ref_deltas[] array, they increment lockstep.  That also puzzled me
 while re-reading this code.

 Perhaps something like this is needed?

Yeah I can see the confusion when reading the code without checking
its history. You probably want to kill the argument nr_unresolved too
because it's not needed anymore after your patch.

So what's the bug you mentioned? All I got from the above was wastage
and confusion, no bug.
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] index-pack: kill union delta_base to save memory

2015-07-03 Thread Junio C Hamano
Nguyễn Thái Ngọc Duy  pclo...@gmail.com writes:

 Once we know the number of objects in the input pack, we allocate an
 array of nr_objects of struct delta_entry. On x86-64, this struct is
 32 bytes long. The union delta_base, which is part of struct
 delta_entry, provides enough space to store either ofs-delta (8 bytes)
 or ref-delta (20 bytes).

Sorry for responding to a patch 7000+ messages ago, but some kind
folks at Google were puzzled by this code, and I think they found a
small bug.

  static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
  {
 - struct delta_entry **sorted_by_pos;
 + struct ref_delta_entry **sorted_by_pos;
   int i, n = 0;
  
   /*
 @@ -1282,28 +1344,25 @@ static void fix_unresolved_deltas(struct sha1file *f, 
 int nr_unresolved)
* resolving deltas in the same order as their position in the pack.
*/
   sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
 - for (i = 0; i  nr_deltas; i++) {
 - if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
 - continue;
 - sorted_by_pos[n++] = deltas[i];
 - }
 + for (i = 0; i  nr_ref_deltas; i++)
 + sorted_by_pos[n++] = ref_deltas[i];
   qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);

You allocate an array of nr_unresolved (which is the sum of
nr_ref_deltas and nr_ofs_deltas in the new world order with patch)
entries, fill only the first nr_ref_deltas entries of it, and then
sort that first n (= nr_ref_deltas) entries.  The qsort and the
subsequent loop only looks at the first n entries, so nothing is
filling or reading these nr_ofs_deltas entres at the end.

I do not see any wrong behaviour other than temporary wastage of
nr_ofs_deltas pointers that would be caused by this, but this
allocation is misleading.

Also, the old code before this change had to use 'i' and 'n' because
some of the things we see in the (old) deltas[] array we scanned
with 'i' would not make it into the sorted_by_pos[] array in the old
world order, but now because you have only ref delta in a separate
ref_deltas[] array, they increment lockstep.  That also puzzled me
while re-reading this code.

Perhaps something like this is needed?


 builtin/index-pack.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 48fa472..d6c48cd 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1334,7 +1334,7 @@ static int delta_pos_compare(const void *_a, const void 
*_b)
 static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
 {
struct ref_delta_entry **sorted_by_pos;
-   int i, n = 0;
+   int i;
 
/*
 * Since many unresolved deltas may well be themselves base objects
@@ -1346,12 +1346,12 @@ static void fix_unresolved_deltas(struct sha1file *f, 
int nr_unresolved)
 * before deltas depending on them, a good heuristic is to start
 * resolving deltas in the same order as their position in the pack.
 */
-   sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
+   sorted_by_pos = xmalloc(nr_ref_deltas * sizeof(*sorted_by_pos));
for (i = 0; i  nr_ref_deltas; i++)
-   sorted_by_pos[n++] = ref_deltas[i];
-   qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);
+   sorted_by_pos[i] = ref_deltas[i];
+   qsort(sorted_by_pos, nr_ref_deltas, sizeof(*sorted_by_pos), 
delta_pos_compare);
 
-   for (i = 0; i  n; i++) {
+   for (i = 0; i  nr_ref_deltas; i++) {
struct ref_delta_entry *d = sorted_by_pos[i];
enum object_type type;
struct base_data *base_obj = alloc_base_data();

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] index-pack: kill union delta_base to save memory

2015-04-18 Thread Nguyễn Thái Ngọc Duy
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).

Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.

In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.

This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.

The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.

A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.

Brought-up-by: Matthew Sporleder msporle...@gmail.com
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 builtin/index-pack.c | 260 +++
 1 file changed, 160 insertions(+), 100 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d854fb..3ed53e3 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -28,11 +28,6 @@ struct object_stat {
int base_object_no;
 };
 
-union delta_base {
-   unsigned char sha1[20];
-   off_t offset;
-};
-
 struct base_data {
struct base_data *base;
struct base_data *child;
@@ -52,26 +47,28 @@ struct thread_local {
int pack_fd;
 };
 
-/*
- * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
- * to memcmp() only the first 20 bytes.
- */
-#define UNION_BASE_SZ  20
-
 #define FLAG_LINK (1u20)
 #define FLAG_CHECKED (1u21)
 
-struct delta_entry {
-   union delta_base base;
+struct ofs_delta_entry {
+   off_t offset;
+   int obj_no;
+};
+
+struct ref_delta_entry {
+   unsigned char sha1[20];
int obj_no;
 };
 
 static struct object_entry *objects;
 static struct object_stat *obj_stat;
-static struct delta_entry *deltas;
+static struct ofs_delta_entry *ofs_deltas;
+static struct ref_delta_entry *ref_deltas;
 static struct thread_local nothread_data;
 static int nr_objects;
-static int nr_deltas;
+static int nr_ofs_deltas;
+static int nr_ref_deltas;
+static int ref_deltas_alloc;
 static int nr_resolved_deltas;
 static int nr_threads;
 
@@ -480,7 +477,8 @@ static void *unpack_entry_data(unsigned long offset, 
unsigned long size,
 }
 
 static void *unpack_raw_entry(struct object_entry *obj,
- union delta_base *delta_base,
+ off_t *ofs_offset,
+ unsigned char *ref_sha1,
  unsigned char *sha1)
 {
unsigned char *p;
@@ -509,11 +507,10 @@ static void *unpack_raw_entry(struct object_entry *obj,
 
switch (obj-type) {
case OBJ_REF_DELTA:
-   hashcpy(delta_base-sha1, fill(20));
+   hashcpy(ref_sha1, fill(20));
use(20);
break;
case OBJ_OFS_DELTA:
-   memset(delta_base, 0, sizeof(*delta_base));
p = fill(1);
c = *p;
use(1);
@@ -527,8 +524,8 @@ static void *unpack_raw_entry(struct object_entry *obj,
use(1);
base_offset = (base_offset  7) + (c  127);
}
-   delta_base-offset = obj-idx.offset - base_offset;
-   if (delta_base-offset = 0 || delta_base-offset = 
obj-idx.offset)
+   *ofs_offset = obj-idx.offset - base_offset;
+   if (*ofs_offset = 0 || *ofs_offset = obj-idx.offset)
bad_object(obj-idx.offset, _(delta base offset is out 
of bound));
break;