Current code peaks into the transfered pack's header, if the number of
objects is under a limit, unpack-objects is called to handle the rest,
otherwise index-pack is. This patch makes fetch-pack use index-pack
unconditionally, then turn objects loose and remove the pack at the
end. unpack-objects is deprecated and may be removed in future.

There are a few reasons for this:

 - down to two code paths to maintain regarding pack reading
   (sha1_file.c and index-pack.c). When .pack v4 comes, we don't need
   to duplicate work in index-pack and unpack-objects.

 - the number of objects should not be the only indicator for
   unpacking. If there are a few large objects in the pack, the pack
   should be kept anyway. Keeping the pack lets us examine the whole
   pack later and make a better decision.

 - by going through index-pack first, then unpack, we pay extra cost
   for completing a thin pack into a full one. But compared to fetch's
   total time, it should not be noticeable because unpack-objects is
   only called when the pack contains a small number of objects.

 - unpack-objects does not support streaming large blobs. Granted
   force_object_loose(), which is used by this patch, does not either.
   But it'll be easier to do so because sha1_file.c interface supports
   streaming.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
---
 builtin/receive-pack.c | 120 +++++++++++++------------------------------------
 cache.h                |   1 +
 fetch-pack.c           |  77 +++++++++++--------------------
 sha1_file.c            |  70 ++++++++++++++++++++++++++++-
 4 files changed, 128 insertions(+), 140 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index e3eb5fc..6eb4ffb 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -792,105 +792,49 @@ static struct command *read_head_info(void)
        return commands;
 }
 
-static const char *parse_pack_header(struct pack_header *hdr)
-{
-       switch (read_pack_header(0, hdr)) {
-       case PH_ERROR_EOF:
-               return "eof before pack header was fully read";
-
-       case PH_ERROR_PACK_SIGNATURE:
-               return "protocol error (pack signature mismatch detected)";
-
-       case PH_ERROR_PROTOCOL:
-               return "protocol error (pack version unsupported)";
-
-       default:
-               return "unknown error in parse_pack_header";
-
-       case 0:
-               return NULL;
-       }
-}
-
 static const char *pack_lockfile;
 
 static const char *unpack(int err_fd)
 {
-       struct pack_header hdr;
-       const char *hdr_err;
-       char hdr_arg[38];
        int fsck_objects = (receive_fsck_objects >= 0
                            ? receive_fsck_objects
                            : transfer_fsck_objects >= 0
                            ? transfer_fsck_objects
                            : 0);
 
-       hdr_err = parse_pack_header(&hdr);
-       if (hdr_err) {
-               if (err_fd > 0)
-                       close(err_fd);
-               return hdr_err;
-       }
-       snprintf(hdr_arg, sizeof(hdr_arg),
-                       "--pack_header=%"PRIu32",%"PRIu32,
-                       ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries));
-
-       if (ntohl(hdr.hdr_entries) < unpack_limit) {
-               int code, i = 0;
-               struct child_process child;
-               const char *unpacker[5];
-               unpacker[i++] = "unpack-objects";
-               if (quiet)
-                       unpacker[i++] = "-q";
-               if (fsck_objects)
-                       unpacker[i++] = "--strict";
-               unpacker[i++] = hdr_arg;
-               unpacker[i++] = NULL;
-               memset(&child, 0, sizeof(child));
-               child.argv = unpacker;
-               child.no_stdout = 1;
-               child.err = err_fd;
-               child.git_cmd = 1;
-               code = run_command(&child);
-               if (!code)
-                       return NULL;
-               return "unpack-objects abnormal exit";
-       } else {
-               const char *keeper[7];
-               int s, status, i = 0;
-               char keep_arg[256];
-               struct child_process ip;
-
-               s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", 
(uintmax_t) getpid());
-               if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
-                       strcpy(keep_arg + s, "localhost");
-
-               keeper[i++] = "index-pack";
-               keeper[i++] = "--stdin";
-               if (fsck_objects)
-                       keeper[i++] = "--strict";
-               keeper[i++] = "--fix-thin";
-               keeper[i++] = hdr_arg;
-               keeper[i++] = keep_arg;
-               keeper[i++] = NULL;
-               memset(&ip, 0, sizeof(ip));
-               ip.argv = keeper;
-               ip.out = -1;
-               ip.err = err_fd;
-               ip.git_cmd = 1;
-               status = start_command(&ip);
-               if (status) {
-                       return "index-pack fork failed";
-               }
-               pack_lockfile = index_pack_lockfile(ip.out);
-               close(ip.out);
-               status = finish_command(&ip);
-               if (!status) {
-                       reprepare_packed_git();
-                       return NULL;
-               }
+       const char *keeper[7];
+       int s, status, i = 0;
+       char keep_arg[256];
+       struct child_process ip;
+
+       s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) 
getpid());
+       if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+               strcpy(keep_arg + s, "localhost");
+
+       keeper[i++] = "index-pack";
+       keeper[i++] = "--stdin";
+       if (fsck_objects)
+               keeper[i++] = "--strict";
+       keeper[i++] = "--fix-thin";
+       keeper[i++] = keep_arg;
+       keeper[i++] = NULL;
+       memset(&ip, 0, sizeof(ip));
+       ip.argv = keeper;
+       ip.out = -1;
+       ip.err = err_fd;
+       ip.git_cmd = 1;
+       status = start_command(&ip);
+       if (status) {
+               return "index-pack fork failed";
+       }
+       pack_lockfile = index_pack_lockfile(ip.out);
+       close(ip.out);
+       status = finish_command(&ip);
+       if (status)
                return "index-pack abnormal exit";
-       }
+       if (!maybe_unpack_objects(pack_lockfile, unpack_limit))
+               pack_lockfile = NULL;
+       return NULL;
 }
 
 static const char *unpack_with_sideband(void)
diff --git a/cache.h b/cache.h
index 85b544f..0fff958 100644
--- a/cache.h
+++ b/cache.h
@@ -789,6 +789,7 @@ extern int hash_sha1_file(const void *buf, unsigned long 
len, const char *type,
 extern int write_sha1_file(const void *buf, unsigned long len, const char 
*type, unsigned char *return_sha1);
 extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned 
char *);
 extern int force_object_loose(const unsigned char *sha1, time_t mtime);
+extern int maybe_unpack_objects(const char *pack_lockfile, int limit);
 extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, 
unsigned long mapsize, void *buffer, unsigned long bufsiz);
 extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
diff --git a/fetch-pack.c b/fetch-pack.c
index f5d99c1..9c81a15 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -687,9 +687,7 @@ static int get_pack(struct fetch_pack_args *args,
        struct async demux;
        const char *argv[22];
        char keep_arg[256];
-       char hdr_arg[256];
        const char **av;
-       int do_keep = args->keep_pack;
        struct child_process cmd;
        int ret;
 
@@ -711,54 +709,29 @@ static int get_pack(struct fetch_pack_args *args,
 
        memset(&cmd, 0, sizeof(cmd));
        cmd.argv = argv;
+       cmd.out = -1;
        av = argv;
-       *hdr_arg = 0;
-       if (!args->keep_pack && unpack_limit) {
-               struct pack_header header;
-
-               if (read_pack_header(demux.out, &header))
-                       die("protocol error: bad pack header");
-               snprintf(hdr_arg, sizeof(hdr_arg),
-                        "--pack_header=%"PRIu32",%"PRIu32,
-                        ntohl(header.hdr_version), ntohl(header.hdr_entries));
-               if (ntohl(header.hdr_entries) < unpack_limit)
-                       do_keep = 0;
-               else
-                       do_keep = 1;
-       }
 
        if (alternate_shallow_file) {
                *av++ = "--shallow-file";
                *av++ = alternate_shallow_file;
        }
 
-       if (do_keep) {
-               if (pack_lockfile)
-                       cmd.out = -1;
-               *av++ = "index-pack";
-               *av++ = "--stdin";
-               if (!args->quiet && !args->no_progress)
-                       *av++ = "-v";
-               if (args->use_thin_pack)
-                       *av++ = "--fix-thin";
-               if (args->lock_pack || unpack_limit) {
-                       int s = sprintf(keep_arg,
-                                       "--keep=fetch-pack %"PRIuMAX " on ", 
(uintmax_t) getpid());
-                       if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
-                               strcpy(keep_arg + s, "localhost");
-                       *av++ = keep_arg;
-               }
-               if (args->check_self_contained_and_connected)
-                       *av++ = "--check-self-contained-and-connected";
-       }
-       else {
-               *av++ = "unpack-objects";
-               if (args->quiet || args->no_progress)
-                       *av++ = "-q";
-               args->check_self_contained_and_connected = 0;
-       }
-       if (*hdr_arg)
-               *av++ = hdr_arg;
+       *av++ = "index-pack";
+       *av++ = "--stdin";
+       if (!args->quiet && !args->no_progress)
+               *av++ = "-v";
+       if (args->use_thin_pack)
+               *av++ = "--fix-thin";
+       if (args->lock_pack || unpack_limit) {
+               int s = sprintf(keep_arg,
+                               "--keep=fetch-pack %"PRIuMAX " on ", 
(uintmax_t) getpid());
+               if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+                       strcpy(keep_arg + s, "localhost");
+               *av++ = keep_arg;
+       }
+       if (args->check_self_contained_and_connected)
+               *av++ = "--check-self-contained-and-connected";
        if (fetch_fsck_objects >= 0
            ? fetch_fsck_objects
            : transfer_fsck_objects >= 0
@@ -771,10 +744,8 @@ static int get_pack(struct fetch_pack_args *args,
        cmd.git_cmd = 1;
        if (start_command(&cmd))
                die("fetch-pack: unable to fork off %s", argv[0]);
-       if (do_keep && pack_lockfile) {
-               *pack_lockfile = index_pack_lockfile(cmd.out);
-               close(cmd.out);
-       }
+       *pack_lockfile = index_pack_lockfile(cmd.out);
+       close(cmd.out);
 
        ret = finish_command(&cmd);
        if (!ret || (args->check_self_contained_and_connected && ret == 1))
@@ -820,11 +791,12 @@ static struct ref *do_fetch_pack(struct fetch_pack_args 
*args,
                                 int fd[2],
                                 const struct ref *orig_ref,
                                 struct ref **sought, int nr_sought,
-                                char **pack_lockfile)
+                                char **pack_lockfile_p)
 {
        struct ref *ref = copy_ref_list(orig_ref);
        unsigned char sha1[20];
        const char *agent_feature;
+       char *pack_lockfile = NULL;
        int agent_len;
 
        sort_ref_list(&ref, ref_compare_name);
@@ -899,9 +871,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args 
*args,
                setup_alternate_shallow();
        else
                alternate_shallow_file = NULL;
-       if (get_pack(args, fd, pack_lockfile))
+       if (get_pack(args, fd, &pack_lockfile))
                die("git fetch-pack: fetch failed.");
 
+       if (!maybe_unpack_objects(pack_lockfile,
+                                 args->keep_pack ? 0 : unpack_limit))
+               pack_lockfile = NULL;
+       if (pack_lockfile_p)
+               *pack_lockfile_p = pack_lockfile;
+
  all_done:
        return ref;
 }
@@ -997,6 +975,5 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
                        commit_lock_file(&shallow_lock);
        }
 
-       reprepare_packed_git();
        return ref_cpy;
 }
diff --git a/sha1_file.c b/sha1_file.c
index 8e27db1..a0cdeae 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -61,6 +61,8 @@ static struct cached_object empty_tree = {
 };
 
 static struct packed_git *last_found_pack;
+/* temporarily skip one pack in find_pack_entry() */
+static struct packed_git *skip_this_pack;
 
 static struct cached_object *find_cached_object(const unsigned char *sha1)
 {
@@ -2376,11 +2378,15 @@ static int find_pack_entry(const unsigned char *sha1, 
struct pack_entry *e)
        if (!packed_git)
                return 0;
 
-       if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+       if (last_found_pack &&
+           last_found_pack != skip_this_pack &&
+           fill_pack_entry(sha1, e, last_found_pack))
                return 1;
 
        for (p = packed_git; p; p = p->next) {
-               if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+               if (p == last_found_pack ||
+                   p == skip_this_pack ||
+                   !fill_pack_entry(sha1, e, p))
                        continue;
 
                last_found_pack = p;
@@ -3133,3 +3139,63 @@ void assert_sha1_type(const unsigned char *sha1, enum 
object_type expect)
                die("%s is not a valid '%s' object", sha1_to_hex(sha1),
                    typename(expect));
 }
+
+static int has_sha1_except_in(struct packed_git *p,
+                             const unsigned char *sha1)
+{
+       int has;
+       skip_this_pack = p;
+       has = has_sha1_file(sha1);
+       skip_this_pack = NULL;
+       return has;
+}
+
+/*
+ * Unpack objects if the number of objects in the pack is lower than
+ * specified limit. Otherwise make sure the new pack is registered
+ * (including when pack_lockfile is NULL). Return 0 is the pack is
+ * removed.
+ */
+int maybe_unpack_objects(const char *pack_lockfile, int limit)
+{
+       const char *ext[] = { ".pack", ".idx", ".keep", NULL };
+       struct packed_git *p;
+       char *path;
+       int len, ret = 0;
+       uint32_t i;
+
+       reprepare_packed_git();
+       if (!pack_lockfile)
+               return -1;
+
+       /* must be enough for any extensions in ext[] */
+       path = xstrdup(pack_lockfile);
+       len = strlen(pack_lockfile) - strlen(".keep");
+       strcpy(path + len, ".pack");
+       for (p = packed_git; p; p = p->next)
+               if (!strcmp(p->pack_name, path))
+                       break;
+       if (!p || open_pack_index(p))
+               die("unable to find pack %s", path);
+
+       if (p->num_objects >= limit) {
+               free(path);
+               return -1;
+       }
+
+       for (i = 0; i < p->num_objects; i++) {
+               const unsigned char *sha1 = nth_packed_object_sha1(p, i);
+               if (!has_sha1_except_in(p, sha1)) /* skip --fix-thin objects */
+                       ret |= force_object_loose(sha1, p->mtime);
+       }
+
+       if (!ret) {
+               free_pack_by_name(p->pack_name);
+               for (i = 0; ext[i]; i++) {
+                       strcpy(path + len, ext[i]);
+                       unlink_or_warn(path);
+               }
+       }
+       free(path);
+       return ret;
+}
-- 
1.8.2.83.gc99314b

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to