[PATCH 13/23] midx: write object id fanout chunk

2018-06-07 Thread Derrick Stolee
Signed-off-by: Derrick Stolee 
---
 Documentation/technical/pack-format.txt |  5 +++
 builtin/midx.c  |  4 +-
 midx.c  | 53 +++--
 object-store.h  |  1 +
 t/t5319-midx.sh | 18 +
 5 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/Documentation/technical/pack-format.txt 
b/Documentation/technical/pack-format.txt
index de9ac778b6..77e88f85e4 100644
--- a/Documentation/technical/pack-format.txt
+++ b/Documentation/technical/pack-format.txt
@@ -307,6 +307,11 @@ CHUNK DATA:
name. This is the only chunk not guaranteed to be a multiple of four
bytes in length, so should be the last chunk for alignment reasons.
 
+   OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
+   The ith entry, F[i], stores the number of OIDs with first
+   byte at most i. Thus F[255] stores the total
+   number of objects (N).
+
OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
The OIDs for all objects in the MIDX are stored in lexicographic
order in this chunk.
diff --git a/builtin/midx.c b/builtin/midx.c
index 86edd30174..e1fd0e0de4 100644
--- a/builtin/midx.c
+++ b/builtin/midx.c
@@ -35,10 +35,12 @@ static int read_midx_file(const char *object_dir)
printf(" pack_lookup");
if (m->chunk_pack_names)
printf(" pack_names");
+   if (m->chunk_oid_fanout)
+   printf(" oid_fanout");
if (m->chunk_oid_lookup)
printf(" oid_lookup");
 
-   printf("\n");
+   printf("\nnum_objects: %d\n", m->num_objects);
 
printf("packs:\n");
for (i = 0; i < m->num_packs; i++)
diff --git a/midx.c b/midx.c
index d06bc6876a..9458ced208 100644
--- a/midx.c
+++ b/midx.c
@@ -14,12 +14,14 @@
 #define MIDX_HASH_LEN 20
 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
 
-#define MIDX_MAX_CHUNKS 3
+#define MIDX_MAX_CHUNKS 4
 #define MIDX_CHUNK_ALIGNMENT 4
 #define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */
 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
+#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
 #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
 #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
+#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
 
 static char *get_midx_filename(const char *object_dir)
 {
@@ -96,6 +98,10 @@ struct midxed_git *load_midxed_git(const char *object_dir)
m->chunk_pack_names = m->data + chunk_offset;
break;
 
+   case MIDX_CHUNKID_OIDFANOUT:
+   m->chunk_oid_fanout = (uint32_t *)(m->data + 
chunk_offset);
+   break;
+
case MIDX_CHUNKID_OIDLOOKUP:
m->chunk_oid_lookup = m->data + chunk_offset;
break;
@@ -117,9 +123,13 @@ struct midxed_git *load_midxed_git(const char *object_dir)
die("MIDX missing required pack lookup chunk");
if (!m->chunk_pack_names)
die("MIDX missing required pack-name chunk");
+   if (!m->chunk_oid_fanout)
+   die("MIDX missing required OID fanout chunk");
if (!m->chunk_oid_lookup)
die("MIDX missing required OID lookup chunk");
 
+   m->num_objects = ntohl(m->chunk_oid_fanout[255]);
+
m->pack_names = xcalloc(m->num_packs, sizeof(const char *));
for (i = 0; i < m->num_packs; i++) {
if (i) {
@@ -377,6 +387,35 @@ static size_t write_midx_pack_names(struct hashfile *f,
return written;
 }
 
+static size_t write_midx_oid_fanout(struct hashfile *f,
+   struct pack_midx_entry *objects,
+   uint32_t nr_objects)
+{
+   struct pack_midx_entry *list = objects;
+   struct pack_midx_entry *last = objects + nr_objects;
+   uint32_t count = 0;
+   uint32_t i;
+
+   /*
+   * Write the first-level table (the list is sorted,
+   * but we use a 256-entry lookup to be able to avoid
+   * having to do eight extra binary search iterations).
+   */
+   for (i = 0; i < 256; i++) {
+   struct pack_midx_entry *next = list;
+
+   while (next < last && next->oid.hash[0] == i) {
+   count++;
+   next++;
+   }
+
+   hashwrite_be32(f, count);
+   list = next;
+   }
+
+   return MIDX_CHUNK_FANOUT_SIZE;
+}
+
 static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len,
struct pack_midx_entry *objects,
uint32_t nr_objects)
@@ -489,7 +528,7 @@ int write_midx_file(const char *object_dir)
FREE_AND_NULL(midx_name);
 
  

Re: [PATCH 13/23] midx: write object id fanout chunk

2018-06-09 Thread Duy Nguyen
On Thu, Jun 7, 2018 at 4:06 PM Derrick Stolee  wrote:
> @@ -117,9 +123,13 @@ struct midxed_git *load_midxed_git(const char 
> *object_dir)
> die("MIDX missing required pack lookup chunk");
> if (!m->chunk_pack_names)
> die("MIDX missing required pack-name chunk");
> +   if (!m->chunk_oid_fanout)
> +   die("MIDX missing required OID fanout chunk");

_()

> @@ -501,9 +540,13 @@ int write_midx_file(const char *object_dir)
> chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_packs * 
> sizeof(uint32_t);
>
> cur_chunk++;
> -   chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
> +   chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;

Err.. mistake?

> chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + 
> pack_name_concat_len;
>
> +   cur_chunk++;
> +   chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;

Same here.

> +   chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + 
> MIDX_CHUNK_FANOUT_SIZE;
> +
> cur_chunk++;
> chunk_ids[cur_chunk] = 0;
> chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries 
> * MIDX_HASH_LEN;
>
-- 
Duy


Re: [PATCH 13/23] midx: write object id fanout chunk

2018-06-21 Thread Derrick Stolee

On 6/9/2018 1:28 PM, Duy Nguyen wrote:

On Thu, Jun 7, 2018 at 4:06 PM Derrick Stolee  wrote:

@@ -117,9 +123,13 @@ struct midxed_git *load_midxed_git(const char *object_dir)
 die("MIDX missing required pack lookup chunk");
 if (!m->chunk_pack_names)
 die("MIDX missing required pack-name chunk");
+   if (!m->chunk_oid_fanout)
+   die("MIDX missing required OID fanout chunk");

_()


@@ -501,9 +540,13 @@ int write_midx_file(const char *object_dir)
 chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_packs * 
sizeof(uint32_t);

 cur_chunk++;
-   chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
+   chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;

Err.. mistake?


Not a mistake, just a side-effect of inserting the fanout before the lookup.

The commits are in this order because we need to construct the list 
before we build the fanout, but it makes sense to have the smaller 
fanout chunk before the lookup chunk.





 chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + 
pack_name_concat_len;

+   cur_chunk++;
+   chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;

Same here.


+   chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + 
MIDX_CHUNK_FANOUT_SIZE;
+
 cur_chunk++;
 chunk_ids[cur_chunk] = 0;
 chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * 
MIDX_HASH_LEN;