Re: [PATCH 10/23] midx: write a lookup into the pack names chunk
On 6/9/2018 12:43 PM, Duy Nguyen wrote: On Thu, Jun 7, 2018 at 7:01 PM Derrick Stolee wrote: Signed-off-by: Derrick Stolee --- Documentation/technical/pack-format.txt | 5 +++ builtin/midx.c | 7 midx.c | 56 +++-- object-store.h | 2 + t/t5319-midx.sh | 11 +++-- 5 files changed, 75 insertions(+), 6 deletions(-) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 2b37be7b33..29bf87283a 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -296,6 +296,11 @@ CHUNK LOOKUP: CHUNK DATA: + Packfile Name Lookup (ID: {'P', 'L', 'O', 'O'}) (P * 4 bytes) + P * 4 bytes storing the offset in the packfile name chunk for + the null-terminated string containing the filename for the + ith packfile. + Commit message is too light on this one. Why does this need to be stored? Isn't the cost of rebuilding this in-core cheap? Adding this chunk on disk in my opinion only adds more burden. Now you have to verify that these offsets actually point to the right place. This is a very good point. I'll drop the chunk and just read the names directly to construct the array of strings. Thanks, -Stolee
Re: [PATCH 10/23] midx: write a lookup into the pack names chunk
On Thu, Jun 7, 2018 at 7:01 PM Derrick Stolee wrote: > > Signed-off-by: Derrick Stolee > --- > Documentation/technical/pack-format.txt | 5 +++ > builtin/midx.c | 7 > midx.c | 56 +++-- > object-store.h | 2 + > t/t5319-midx.sh | 11 +++-- > 5 files changed, 75 insertions(+), 6 deletions(-) > > diff --git a/Documentation/technical/pack-format.txt > b/Documentation/technical/pack-format.txt > index 2b37be7b33..29bf87283a 100644 > --- a/Documentation/technical/pack-format.txt > +++ b/Documentation/technical/pack-format.txt > @@ -296,6 +296,11 @@ CHUNK LOOKUP: > > CHUNK DATA: > > + Packfile Name Lookup (ID: {'P', 'L', 'O', 'O'}) (P * 4 bytes) > + P * 4 bytes storing the offset in the packfile name chunk for > + the null-terminated string containing the filename for the > + ith packfile. > + Commit message is too light on this one. Why does this need to be stored? Isn't the cost of rebuilding this in-core cheap? Adding this chunk on disk in my opinion only adds more burden. Now you have to verify that these offsets actually point to the right place. > Packfile Names (ID: {'P', 'N', 'A', 'M'}) > Stores the packfile names as concatenated, null-terminated > strings. > Packfiles must be listed in lexicographic order for fast lookups > by > diff --git a/builtin/midx.c b/builtin/midx.c > index fe56560853..3a261e9bbf 100644 > --- a/builtin/midx.c > +++ b/builtin/midx.c > @@ -16,6 +16,7 @@ static struct opts_midx { > > static int read_midx_file(const char *object_dir) > { > + uint32_t i; > struct midxed_git *m = load_midxed_git(object_dir); > > if (!m) > @@ -30,11 +31,17 @@ static int read_midx_file(const char *object_dir) > > printf("chunks:"); > > + if (m->chunk_pack_lookup) > + printf(" pack_lookup"); > if (m->chunk_pack_names) > printf(" pack_names"); > > printf("\n"); > > + printf("packs:\n"); > + for (i = 0; i < m->num_packs; i++) > + printf("%s\n", m->pack_names[i]); > + > printf("object_dir: %s\n", m->object_dir); > > return 0; > diff --git a/midx.c b/midx.c > index d4f4a01a51..923acda72e 100644 > --- a/midx.c > +++ b/midx.c > @@ -13,8 +13,9 @@ > #define MIDX_HASH_LEN 20 > #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) > > -#define MIDX_MAX_CHUNKS 1 > +#define MIDX_MAX_CHUNKS 2 > #define MIDX_CHUNK_ALIGNMENT 4 > +#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */ > #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ > #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) > > @@ -85,6 +86,10 @@ struct midxed_git *load_midxed_git(const char *object_dir) > uint64_t chunk_offset = get_be64(m->data + 16 + > MIDX_CHUNKLOOKUP_WIDTH * i); > > switch (chunk_id) { > + case MIDX_CHUNKID_PACKLOOKUP: > + m->chunk_pack_lookup = (uint32_t *)(m->data + > chunk_offset); > + break; > + > case MIDX_CHUNKID_PACKNAMES: > m->chunk_pack_names = m->data + chunk_offset; > break; > @@ -102,9 +107,32 @@ struct midxed_git *load_midxed_git(const char > *object_dir) > } > } > > + if (!m->chunk_pack_lookup) > + die("MIDX missing required pack lookup chunk"); > if (!m->chunk_pack_names) > die("MIDX missing required pack-name chunk"); > > + m->pack_names = xcalloc(m->num_packs, sizeof(const char *)); > + for (i = 0; i < m->num_packs; i++) { > + if (i) { > + if (ntohl(m->chunk_pack_lookup[i]) <= > ntohl(m->chunk_pack_lookup[i - 1])) { > + error("MIDX pack lookup value %d before %d", > + ntohl(m->chunk_pack_lookup[i - 1]), > + ntohl(m->chunk_pack_lookup[i])); > + goto cleanup_fail; > + } > + } > + > + m->pack_names[i] = (const char *)(m->chunk_pack_names + > ntohl(m->chunk_pack_lookup[i])); > + > + if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) > { > + error("MIDX pack names out of order: '%s' before > '%s'", > + m->pack_names[i - 1], > + m->pack_names[i]); > + goto cleanup_fail; > + } > + } > + > return m; > > cleanup_fail: > @@ -162,6 +190,20 @@ static void sort_packs_by_name(char **pack_names, > uint32_t nr_packs, uint32_t *p > } > } > > +static size_t write_midx_pack_lookup(struct hashfile *f, > +
[PATCH 10/23] midx: write a lookup into the pack names chunk
Signed-off-by: Derrick Stolee --- Documentation/technical/pack-format.txt | 5 +++ builtin/midx.c | 7 midx.c | 56 +++-- object-store.h | 2 + t/t5319-midx.sh | 11 +++-- 5 files changed, 75 insertions(+), 6 deletions(-) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 2b37be7b33..29bf87283a 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -296,6 +296,11 @@ CHUNK LOOKUP: CHUNK DATA: + Packfile Name Lookup (ID: {'P', 'L', 'O', 'O'}) (P * 4 bytes) + P * 4 bytes storing the offset in the packfile name chunk for + the null-terminated string containing the filename for the + ith packfile. + Packfile Names (ID: {'P', 'N', 'A', 'M'}) Stores the packfile names as concatenated, null-terminated strings. Packfiles must be listed in lexicographic order for fast lookups by diff --git a/builtin/midx.c b/builtin/midx.c index fe56560853..3a261e9bbf 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -16,6 +16,7 @@ static struct opts_midx { static int read_midx_file(const char *object_dir) { + uint32_t i; struct midxed_git *m = load_midxed_git(object_dir); if (!m) @@ -30,11 +31,17 @@ static int read_midx_file(const char *object_dir) printf("chunks:"); + if (m->chunk_pack_lookup) + printf(" pack_lookup"); if (m->chunk_pack_names) printf(" pack_names"); printf("\n"); + printf("packs:\n"); + for (i = 0; i < m->num_packs; i++) + printf("%s\n", m->pack_names[i]); + printf("object_dir: %s\n", m->object_dir); return 0; diff --git a/midx.c b/midx.c index d4f4a01a51..923acda72e 100644 --- a/midx.c +++ b/midx.c @@ -13,8 +13,9 @@ #define MIDX_HASH_LEN 20 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) -#define MIDX_MAX_CHUNKS 1 +#define MIDX_MAX_CHUNKS 2 #define MIDX_CHUNK_ALIGNMENT 4 +#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */ #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) @@ -85,6 +86,10 @@ struct midxed_git *load_midxed_git(const char *object_dir) uint64_t chunk_offset = get_be64(m->data + 16 + MIDX_CHUNKLOOKUP_WIDTH * i); switch (chunk_id) { + case MIDX_CHUNKID_PACKLOOKUP: + m->chunk_pack_lookup = (uint32_t *)(m->data + chunk_offset); + break; + case MIDX_CHUNKID_PACKNAMES: m->chunk_pack_names = m->data + chunk_offset; break; @@ -102,9 +107,32 @@ struct midxed_git *load_midxed_git(const char *object_dir) } } + if (!m->chunk_pack_lookup) + die("MIDX missing required pack lookup chunk"); if (!m->chunk_pack_names) die("MIDX missing required pack-name chunk"); + m->pack_names = xcalloc(m->num_packs, sizeof(const char *)); + for (i = 0; i < m->num_packs; i++) { + if (i) { + if (ntohl(m->chunk_pack_lookup[i]) <= ntohl(m->chunk_pack_lookup[i - 1])) { + error("MIDX pack lookup value %d before %d", + ntohl(m->chunk_pack_lookup[i - 1]), + ntohl(m->chunk_pack_lookup[i])); + goto cleanup_fail; + } + } + + m->pack_names[i] = (const char *)(m->chunk_pack_names + ntohl(m->chunk_pack_lookup[i])); + + if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) { + error("MIDX pack names out of order: '%s' before '%s'", + m->pack_names[i - 1], + m->pack_names[i]); + goto cleanup_fail; + } + } + return m; cleanup_fail: @@ -162,6 +190,20 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p } } +static size_t write_midx_pack_lookup(struct hashfile *f, +char **pack_names, +uint32_t nr_packs) +{ + uint32_t i, cur_len = 0; + + for (i = 0; i < nr_packs; i++) { + hashwrite_be32(f, cur_len); + cur_len += strlen(pack_names[i]) + 1; + } + + return sizeof(uint32_t) * (size_t)nr_packs; +} + static size_t write_midx_pack_names(struct hashfile *f, char **pack_names, uint32_t num_packs) @@ -275,13 +317,17 @@ int write_midx_file(