[PATCH 05/27] object-store: move packed_git and packed_git_mru to object store
From: Stefan BellerIn a process with multiple repositories open, packfile accessors should be associated to a single repository and not shared globally. Move packed_git and packed_git_mru into the_repository and adjust callers to reflect this. [nd: while at there, wrap access to these two fields in get_packed_git() and get_packed_git_mru(). This allows us to lazily initialize these fields without caller doing that explicitly] Signed-off-by: Stefan Beller Signed-off-by: Jonathan Nieder Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/count-objects.c | 5 ++-- builtin/fsck.c | 6 +++-- builtin/gc.c | 4 +++- builtin/index-pack.c | 1 + builtin/pack-objects.c | 21 ++--- builtin/pack-redundant.c | 6 +++-- cache.h | 29 --- fast-import.c| 8 +-- http-backend.c | 6 +++-- http-push.c | 1 + http-walker.c| 1 + http.c | 1 + object-store.h | 34 +++ object.c | 7 ++ pack-bitmap.c| 4 +++- pack-check.c | 1 + pack-revindex.c | 1 + packfile.c | 51 packfile.h | 3 +++ reachable.c | 1 + server-info.c| 6 +++-- sha1_name.c | 5 ++-- 22 files changed, 128 insertions(+), 74 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index ced8958e43..b28ff00be2 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -7,6 +7,7 @@ #include "cache.h" #include "config.h" #include "dir.h" +#include "repository.h" #include "builtin.h" #include "parse-options.h" #include "quote.h" @@ -121,9 +122,9 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) struct strbuf loose_buf = STRBUF_INIT; struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - if (!packed_git) + if (!get_packed_git(the_repository)) prepare_packed_git(); - for (p = packed_git; p; p = p->next) { + for (p = get_packed_git(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fsck.c b/builtin/fsck.c index c736a10a11..7707407275 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -732,7 +732,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) prepare_packed_git(); if (show_progress) { - for (p = packed_git; p; p = p->next) { + for (p = get_packed_git(the_repository); p; +p = p->next) { if (open_pack_index(p)) continue; total += p->num_objects; @@ -740,7 +741,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) progress = start_progress(_("Checking objects"), total); } - for (p = packed_git; p; p = p->next) { + for (p = get_packed_git(the_repository); p; +p = p->next) { /* verify gives error messages itself */ if (verify_pack(p, fsck_obj_buffer, progress, count)) diff --git a/builtin/gc.c b/builtin/gc.c index 77fa720bd0..b00238cd5d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -11,6 +11,7 @@ */ #include "builtin.h" +#include "repository.h" #include "config.h" #include "tempfile.h" #include "lockfile.h" @@ -20,6 +21,7 @@ #include "argv-array.h" #include "commit.h" #include "packfile.h" +#include "object-store.h" #define FAILED_RUN "failed to run %s" @@ -173,7 +175,7 @@ static int too_many_packs(void) return 0; prepare_packed_git(); - for (cnt = 0, p = packed_git; p; p = p->next) { + for (cnt = 0, p = get_packed_git(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (p->pack_keep) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 5ebd370c56..1d6bc87b76 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -13,6 +13,7 @@ #include "streaming.h" #include "thread-utils.h" #include "packfile.h" +#include "object-store.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; diff --git a/builtin/pack-objects.c
Re: [PATCH 05/27] object-store: move packed_git and packed_git_mru to object store
>> 2. Applying the semantic patch >> contrib/coccinelle/refactoring/packed_git.cocci to adjust callers. >> This semantic patch is placed in a sub directory of the coccinelle >> contrib dir, as this semantic patch is not expected to be of general >> usefulness; it is only useful during developing this series and >> merging it with other topics in flight. At a later date, just >> delete that semantic patch. > > Can the semantic patch go in the commit message instead? It is very > brief. done > > Actually, I don't see this semantic patch in the diffstat. Is the > commit message stale? > >> 3. Applying line wrapping fixes from "make style" to break the >> resulting long lines. >> >> 4. Adding missing #includes of repository.h and object-store.h >> where needed. > > Is there a way to automate this step? (I'm asking for my own > reference when writing future patches, not because of any concern > about the correctness of this one.) no, for several reasons. (a) I don't know how to write it in coccinelle, because (b) I have not figured out the order in which we include headers, apart from "cache.h goes first", the rest of the includes sometimes looks like a random order, because different patches add new includes at different places. I have the impression, that (1) some add the include after all other existing includes or (2) try to figure out where to add the include to make sense in the existing include order or (3) sort alphabetically or (4) put it randomly, so the chance for a merge conflict with other series in flight decreases. (c) I did it in a semi automated fashion: while make fails: add another include The mess with including repository or object-store comes from the fact that I had v2 based on object-store, not the repository and cherry-picked this patch over to v3. Fixed all of the includes now. >> @@ -59,10 +83,25 @@ struct raw_object_store { >>*/ >> char *objectdir; >> >> + struct packed_git *packed_git; >> + /* >> + * A most-recently-used ordered version of the packed_git list, which >> can >> + * be iterated instead of packed_git (and marked via mru_mark). >> + */ >> + struct list_head packed_git_mru; > > I don't understand the new part of the comment. Can you explain here, > for me? cherrypicking error, fixed. >> -#define RAW_OBJECT_STORE_INIT { NULL, NULL, NULL } >> + >> +/* >> + * The mru list_head is supposed to be initialized using >> + * the LIST_HEAD macro, assigning prev/next to itself. >> + * However this doesn't work in this case as some compilers dislike >> + * that macro on member variables. Use NULL instead as that is defined >> + * and accepted, deferring the real init to prepare_packed_git_mru(). */ > > style nit: '*/' should be on its own line. > > More importantly, we can avoid such an issue as described by Junio. :) See reply to Junio, I am not quite sure I like that.
Re: [PATCH 05/27] object-store: move packed_git and packed_git_mru to object store
On Wed, Feb 21, 2018 at 1:51 PM, Junio C Hamanowrote: > Stefan Beller writes: > >> + >> +/* >> + * The mru list_head is supposed to be initialized using >> + * the LIST_HEAD macro, assigning prev/next to itself. >> + * However this doesn't work in this case as some compilers dislike >> + * that macro on member variables. Use NULL instead as that is defined >> + * and accepted, deferring the real init to prepare_packed_git_mru(). */ >> +#define __MRU_INIT { NULL, NULL } >> +#define RAW_OBJECT_STORE_INIT { NULL, NULL, __MRU_INIT, NULL, NULL } > > I do not think it has to be this way to abuse two NULLs, if you > faithfully mimicked how LIST_HEAD() macro is constructed. The > reason why it does not try to introduce > > struct list_head x = LIST_HEAD_INIT; > > and instead, uses > > LIST_HEAD(x); > > is because of the need for self referral. If we learn from it, we > can do the same, i.e. instead of doing > > struct raw_object_store x = RAW_OBJECT_STORE_INIT; > > we can do > > RAW_OBJECT_STORE(x); > > that expands to > > struct raw_object_store x = { > ... > { _git_mru, _git_mru }, > ... > }; > > no? Then we do not need such a lengthy comment that reads only like > an excuse ;-) We cannot do this, because the object store is directly embedded into the the_repository (not as a pointer), which is a global on its own. So we'd have to do this at the repository level, which I would not want. I'd want to have the #defines where the structs are declared. Any guidance on how to do that correctly with self referential definitions without making the object store a pointer then?
Re: [PATCH 05/27] object-store: move packed_git and packed_git_mru to object store
Hi, Stefan Beller wrote: > In a process with multiple repositories open, packfile accessors > should be associated to a single repository and not shared globally. > Move packed_git and packed_git_mru into the_repository and adjust > callers to reflect this. > > Patch generated by > > 1. Moving the struct packed_git declaration to object-store.h > and packed_git, packed_git_mru globals to struct object_store. > > 2. Applying the semantic patch > contrib/coccinelle/refactoring/packed_git.cocci to adjust callers. > This semantic patch is placed in a sub directory of the coccinelle > contrib dir, as this semantic patch is not expected to be of general > usefulness; it is only useful during developing this series and > merging it with other topics in flight. At a later date, just > delete that semantic patch. Can the semantic patch go in the commit message instead? It is very brief. Actually, I don't see this semantic patch in the diffstat. Is the commit message stale? > 3. Applying line wrapping fixes from "make style" to break the > resulting long lines. > > 4. Adding missing #includes of repository.h and object-store.h > where needed. Is there a way to automate this step? (I'm asking for my own reference when writing future patches, not because of any concern about the correctness of this one.) > > 5. As the packfiles are now owned by an objectstore/repository, which > is ephemeral unlike globals, we introduce memory leaks. So address > them in raw_object_store_clear(). The compound words are confusing me. What is an objectstore/repository? Are these referring to particular identifiers or something else? Would some wording like the following work? 5. Freeing packed_git and packed_git_mru in raw_object_store_clear to avoid a per-repository memory leak. Previously they were global singletons, so code to free them did not exist. [...] > --- a/builtin/index-pack.c > +++ b/builtin/index-pack.c > @@ -12,6 +12,7 @@ > #include "exec_cmd.h" > #include "streaming.h" > #include "thread-utils.h" > +#include "object-store.h" > #include "packfile.h" > > static const char index_pack_usage[] = Change from a different patch leaked into this one? [...] > +++ b/builtin/pack-objects.c [...] > @@ -1044,7 +1045,7 @@ static int want_object_in_pack(const struct object_id > *oid, > } > want = want_found_object(exclude, p); > if (!exclude && want > 0) > - list_move(>mru, _git_mru); > + list_move(>mru, > _repository->objects.packed_git_mru); Long line. Can "make style" catch this? [...] > +++ b/builtin/receive-pack.c > @@ -7,6 +7,7 @@ > #include "sideband.h" > #include "run-command.h" > #include "exec_cmd.h" > +#include "object-store.h" > #include "commit.h" > #include "object.h" > #include "remote.h" Another change leaked in? [...] > --- a/cache.h > +++ b/cache.h > @@ -1585,35 +1585,6 @@ struct pack_window { > unsigned int inuse_cnt; > }; > > -extern struct packed_git { [...] > -} *packed_git; Move detecting diff confirms that this wasn't modified. Thanks for creating it. [...] > +++ b/fast-import.c [...] > @@ -1110,7 +1112,7 @@ static int store_object( > if (e->idx.offset) { > duplicate_count_by_type[type]++; > return 1; > - } else if (find_sha1_pack(oid.hash, packed_git)) { > + } else if (find_sha1_pack(oid.hash, > the_repository->objects.packed_git)) { Long line. (I'll refrain from commenting about any further ones.) [...] > +++ b/http-push.c > @@ -1,4 +1,5 @@ > #include "cache.h" > +#include "object-store.h" > #include "commit.h" > #include "tag.h" > #include "blob.h" Stray change? > diff --git a/http-walker.c b/http-walker.c > index 07c2b1af82..8bb5d991bb 100644 > --- a/http-walker.c > +++ b/http-walker.c > @@ -4,6 +4,7 @@ > #include "http.h" > #include "list.h" > #include "transport.h" > +#include "object-store.h" > #include "packfile.h" > > struct alt_base { Same question. > diff --git a/http.c b/http.c > index 31755023a4..a4a9e583c7 100644 > --- a/http.c > +++ b/http.c > @@ -1,6 +1,7 @@ > #include "git-compat-util.h" > #include "http.h" > #include "config.h" > +#include "object-store.h" > #include "pack.h" > #include "sideband.h" > #include "run-command.h" Likewise. > diff --git a/object-store.h b/object-store.h > index e78eea1dde..1de9e07102 100644 > --- a/object-store.h > +++ b/object-store.h > @@ -52,6 +52,30 @@ void add_to_alternates_memory(const char *dir); > */ > struct strbuf *alt_scratch_buf(struct alternate_object_database *alt); > > +struct packed_git { > + struct packed_git *next; > + struct list_head mru; > + struct pack_window *windows; > + off_t pack_size; > + const void *index_data; > + size_t index_size; > + uint32_t num_objects; > + uint32_t num_bad_objects; > +
Re: [PATCH 05/27] object-store: move packed_git and packed_git_mru to object store
Stefan Bellerwrites: > + > +/* > + * The mru list_head is supposed to be initialized using > + * the LIST_HEAD macro, assigning prev/next to itself. > + * However this doesn't work in this case as some compilers dislike > + * that macro on member variables. Use NULL instead as that is defined > + * and accepted, deferring the real init to prepare_packed_git_mru(). */ > +#define __MRU_INIT { NULL, NULL } > +#define RAW_OBJECT_STORE_INIT { NULL, NULL, __MRU_INIT, NULL, NULL } I do not think it has to be this way to abuse two NULLs, if you faithfully mimicked how LIST_HEAD() macro is constructed. The reason why it does not try to introduce struct list_head x = LIST_HEAD_INIT; and instead, uses LIST_HEAD(x); is because of the need for self referral. If we learn from it, we can do the same, i.e. instead of doing struct raw_object_store x = RAW_OBJECT_STORE_INIT; we can do RAW_OBJECT_STORE(x); that expands to struct raw_object_store x = { ... { _git_mru, _git_mru }, ... }; no? Then we do not need such a lengthy comment that reads only like an excuse ;-)
[PATCH 05/27] object-store: move packed_git and packed_git_mru to object store
In a process with multiple repositories open, packfile accessors should be associated to a single repository and not shared globally. Move packed_git and packed_git_mru into the_repository and adjust callers to reflect this. Patch generated by 1. Moving the struct packed_git declaration to object-store.h and packed_git, packed_git_mru globals to struct object_store. 2. Applying the semantic patch contrib/coccinelle/refactoring/packed_git.cocci to adjust callers. This semantic patch is placed in a sub directory of the coccinelle contrib dir, as this semantic patch is not expected to be of general usefulness; it is only useful during developing this series and merging it with other topics in flight. At a later date, just delete that semantic patch. 3. Applying line wrapping fixes from "make style" to break the resulting long lines. 4. Adding missing #includes of repository.h and object-store.h where needed. 5. As the packfiles are now owned by an objectstore/repository, which is ephemeral unlike globals, we introduce memory leaks. So address them in raw_object_store_clear(). Signed-off-by: Stefan BellerSigned-off-by: Jonathan Nieder --- builtin/count-objects.c | 6 -- builtin/fsck.c | 7 +-- builtin/gc.c | 4 +++- builtin/index-pack.c | 1 + builtin/pack-objects.c | 20 +++- builtin/pack-redundant.c | 6 -- builtin/receive-pack.c | 1 + cache.h | 29 fast-import.c| 6 -- http-backend.c | 6 -- http-push.c | 1 + http-walker.c| 1 + http.c | 1 + object-store.h | 41 +++- object.c | 7 +++ pack-bitmap.c| 4 +++- pack-check.c | 1 + pack-revindex.c | 1 + packfile.c | 39 +++--- reachable.c | 1 + repository.c | 2 ++ server-info.c| 6 -- sha1_name.c | 5 +++-- 23 files changed, 122 insertions(+), 74 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 33343818c8..9334648dcc 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -7,6 +7,8 @@ #include "cache.h" #include "config.h" #include "dir.h" +#include "repository.h" +#include "object-store.h" #include "builtin.h" #include "parse-options.h" #include "quote.h" @@ -120,9 +122,9 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) struct strbuf loose_buf = STRBUF_INIT; struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - if (!packed_git) + if (!the_repository->objects.packed_git) prepare_packed_git(); - for (p = packed_git; p; p = p->next) { + for (p = the_repository->objects.packed_git; p; p = p->next) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fsck.c b/builtin/fsck.c index 908e4f092a..2e99e02128 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -2,6 +2,7 @@ #include "cache.h" #include "repository.h" #include "config.h" +#include "object-store.h" #include "commit.h" #include "tree.h" #include "blob.h" @@ -729,7 +730,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) prepare_packed_git(); if (show_progress) { - for (p = packed_git; p; p = p->next) { + for (p = the_repository->objects.packed_git; p; +p = p->next) { if (open_pack_index(p)) continue; total += p->num_objects; @@ -737,7 +739,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) progress = start_progress(_("Checking objects"), total); } - for (p = packed_git; p; p = p->next) { + for (p = the_repository->objects.packed_git; p; +p = p->next) { /* verify gives error messages itself */ if (verify_pack(p, fsck_obj_buffer, progress, count)) diff --git a/builtin/gc.c b/builtin/gc.c index 77fa720bd0..96ff790867 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -11,6 +11,7 @@ */ #include "builtin.h" +#include "repository.h" #include "config.h" #include "tempfile.h" #include "lockfile.h" @@ -18,6 +19,7 @@ #include "run-command.h"