Re: [PATCH 07/18] sha1_file: support lazily fetching missing objects
On Thu, Oct 12, 2017 at 4:42 PM, Christian Couderwrote: > > Instead of adding labels and gotos, I would suggest adding a new > function like the following does on top of your changes: Sorry for the usual Gmail related problems. You can find the patch and a further refactoring that adds an object_info_from_pack_entry() function there: https://github.com/chriscool/git/commits/pu-partial-clone-lazy-fetch-improved
Re: [PATCH 07/18] sha1_file: support lazily fetching missing objects
On Fri, Sep 29, 2017 at 10:11 PM, Jonathan Tanwrote: > diff --git a/sha1_file.c b/sha1_file.c > index b4a67bb83..77aa0ffdf 100644 > --- a/sha1_file.c > +++ b/sha1_file.c > @@ -29,6 +29,7 @@ > #include "mergesort.h" > #include "quote.h" > #include "packfile.h" > +#include "fetch-object.h" > > const unsigned char null_sha1[GIT_MAX_RAWSZ]; > const struct object_id null_oid; > @@ -1149,6 +1150,8 @@ static int sha1_loose_object_info(const unsigned char > *sha1, > return (status < 0) ? status : 0; > } > > +int fetch_if_missing = 1; > + > int sha1_object_info_extended(const unsigned char *sha1, struct object_info > *oi, unsigned flags) > { > static struct object_info blank_oi = OBJECT_INFO_INIT; > @@ -1157,6 +1160,7 @@ int sha1_object_info_extended(const unsigned char > *sha1, struct object_info *oi, > const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ? > lookup_replace_object(sha1) : > sha1; > + int already_retried = 0; > > if (!oi) > oi = _oi; > @@ -1181,28 +1185,36 @@ int sha1_object_info_extended(const unsigned char > *sha1, struct object_info *oi, > } > } > > - if (!find_pack_entry(real, )) { > - /* Most likely it's a loose object. */ > - if (!sha1_loose_object_info(real, oi, flags)) > - return 0; > +retry: > + if (find_pack_entry(real, )) > + goto found_packed; > > - /* Not a loose object; someone else may have just packed it. > */ > - if (flags & OBJECT_INFO_QUICK) { > - return -1; > - } else { > - reprepare_packed_git(); > - if (!find_pack_entry(real, )) > - return -1; > - } > + /* Most likely it's a loose object. */ > + if (!sha1_loose_object_info(real, oi, flags)) > + return 0; > + > + /* Not a loose object; someone else may have just packed it. */ > + reprepare_packed_git(); > + if (find_pack_entry(real, )) > + goto found_packed; > + > + /* Check if it is a missing object */ > + if (fetch_if_missing && repository_format_partial_clone && > + !already_retried) { > + fetch_object(repository_format_partial_clone, real); > + already_retried = 1; > + goto retry; > } > > + return -1; > + > +found_packed: > if (oi == _oi) > /* > * We know that the caller doesn't actually need the > * information below, so return early. > */ > return 0; > - > rtype = packed_object_info(e.p, e.offset, oi); > if (rtype < 0) { > mark_bad_packed_object(e.p, real); Instead of adding labels and gotos, I would suggest adding a new function like the following does on top of your changes: diff --git a/sha1_file.c b/sha1_file.c index cc1aa0bd7f..02a6ed1e9b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1171,18 +1171,40 @@ static int sha1_loose_object_info(const unsigned char *sha1, return (status < 0) ? status : 0; } +int try_find_packed_entry_or_loose_object(const unsigned char *real, struct object_info *oi, + unsigned flags, struct pack_entry *e, int retry) +{ + if (find_pack_entry(real, e)) + return 1; + + /* Most likely it's a loose object. */ + if (!sha1_loose_object_info(real, oi, flags)) + return 0; + + /* Not a loose object; someone else may have just packed it. */ + reprepare_packed_git(); + if (find_pack_entry(real, e)) + return 1; + + /* Check if it is a missing object */ + if (fetch_if_missing && repository_format_partial_clone && retry) { + fetch_object(repository_format_partial_clone, real); + return try_find_packed_entry_or_loose_object(real, oi, flags, e, 0); + } + + return -1; +} + int fetch_if_missing = 1; int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags) { static struct object_info blank_oi = OBJECT_INFO_INIT; struct pack_entry e; - int rtype; + int rtype, res; const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ? lookup_replace_object(sha1) : sha1; - int already_retried = 0; - if (!oi) oi = _oi; @@ -1206,30 +1228,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, } } -retry: - if (find_pack_entry(real, )) - goto found_packed; - - /* Most likely it's a loose object. */ -
[PATCH 07/18] sha1_file: support lazily fetching missing objects
Teach sha1_file to fetch objects from the remote configured in extensions.partialClone whenever an object is requested but missing. The fetching of objects can be suppressed through a global variable. This is used by fsck and index-pack. However, by default, such fetching is not suppressed. This is meant as a temporary measure to ensure that all Git commands work in such a situation. Future patches will update some commands to either tolerate missing objects (without fetching them) or be more efficient in fetching them. In order to determine the code changes in sha1_file.c necessary, I investigated the following: (1) functions in sha1_file.c that take in a hash, without the user regarding how the object is stored (loose or packed) (2) functions in packfile.c (because I need to check callers that know about the loose/packed distinction and operate on both differently, and ensure that they can handle the concept of objects that are neither loose nor packed) (1) is handled by the modification to sha1_object_info_extended(). For (2), I looked at for_each_packed_object and others. For for_each_packed_object, the callers either already work or are fixed in this patch: - reachable - only to find recent objects - builtin/fsck - already knows about missing objects - builtin/cat-file - warning message added in this commit Callers of the other functions do not need to be changed: - parse_pack_index - http - indirectly from http_get_info_packs - find_pack_entry_one - this searches a single pack that is provided as an argument; the caller already knows (through other means) that the sought object is in a specific pack - find_sha1_pack - fast-import - appears to be an optimization to not store a file if it is already in a pack - http-walker - to search through a struct alt_base - http-push - to search through remote packs - has_sha1_pack - builtin/fsck - already knows about promisor objects - builtin/count-objects - informational purposes only (check if loose object is also packed) - builtin/prune-packed - check if object to be pruned is packed (if not, don't prune it) - revision - used to exclude packed objects if requested by user - diff - just for optimization Signed-off-by: Jonathan Tan--- builtin/cat-file.c | 2 ++ builtin/fetch-pack.c | 2 ++ builtin/fsck.c | 3 +++ builtin/index-pack.c | 6 ++ cache.h | 8 fetch-object.c | 3 +++ sha1_file.c | 38 t/t0410-partial-clone.sh | 51 8 files changed, 100 insertions(+), 13 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 4ccbfaac3..7aa1159ce 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -474,6 +474,8 @@ static int batch_objects(struct batch_options *opt) for_each_loose_object(batch_loose_object, , 0); for_each_packed_object(batch_packed_object, , 0); + if (repository_format_partial_clone) + warning("This repository has extensions.partialClone set. Some objects may not be loaded."); cb.opt = opt; cb.expand = diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 9f303cf98..9a7ebf6e9 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -53,6 +53,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) struct oid_array shallow = OID_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; + fetch_if_missing = 0; + packet_trace_identity("fetch-pack"); memset(, 0, sizeof(args)); diff --git a/builtin/fsck.c b/builtin/fsck.c index f6cb4d755..155ca663b 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -683,6 +683,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) int i; struct alternate_object_database *alt; + /* fsck knows how to handle missing promisor objects */ + fetch_if_missing = 0; + errors_found = 0; check_replace_refs = 0; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 14ebb2f17..440558046 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1657,6 +1657,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) unsigned foreign_nr = 1;/* zero is a "good" value, assume bad */ int report_end_of_input = 0; + /* +* index-pack never needs to fetch missing objects, since it only +* accesses the repo to do hash collision checks +*/ + fetch_if_missing = 0; + if (argc == 2 && !strcmp(argv[1], "-h")) usage(index_pack_usage); diff --git a/cache.h b/cache.h index 437206d06..58d3f8986 100644 --- a/cache.h +++ b/cache.h @@ -1721,6 +1721,14 @@ struct object_info {