As explained in the previous commit, current aggressive settings
--depth=250 --window=250 could slow down repository access
significantly. Notice that people usually work on recent history only,
we could keep recent history more loosely packed, so that repo access
is fast most of the time while the pack file remains small.

Three more configuration variables are used to make that happen. The
first one, gc.aggressiveCommitLimits covers the old history part,
which will be tightly packed. The remaining part will be packed with
gc.lessAggresiveWindow and gc.lessAggressiveDepth. If
gc.aggressiveCommitLimits is empty, everything will be tightly packed
as before.

The repack process becomes:

 - repack -adf on old history (e.g. the default --before=1.year.ago)
   mark to keep that pack

 - repack the second time with lessAggressive settings, the kept pack
   should be left untouched.

 - remove .keep file and repack the final time, reusing all deltas

This process costs more time, but produce a more effecient pack. It is
assumed that people who do "gc --aggressive" do not do this often and
do not mind if it takes a bit longer.

git.git is not a great repo to test it because its size is modest but
so are my laptop's cpu and memory, so here are the timings and pack
sizes

            size  time
old aggr.   36MB  5m51
new aggr.   37MB  6m13
repack -adf 48MB  1m12

Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
---
 Documentation/config.txt |  19 +++++++++
 builtin/gc.c             | 109 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 124 insertions(+), 4 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 5ce7f9a..47979dc 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1161,6 +1161,25 @@ gc.aggressiveWindow::
        algorithm used by 'git gc --aggressive'.  This defaults
        to 250.
 
+gc.aggressiveCommitLimits::
+       This one parameter to linkgit:git-rev-list[1] to select
+       commits that are repacked with gc.aggressiveDepth and
+       gc.aggressiveWindow, while the remaining commits are repacked
+       with gc.lessAggressiveDepth and gc.lessAggressiveWindow.
++
+If this is an empty string, everything will be repacked with
+gc.aggressiveWindow and gc.aggressiveDepth.
+
+gc.lessAggressiveDepth::
+       The depth parameter used in the delta compression
+       algorithm used by 'git gc --aggressive' when
+       gc.aggressiveCommitLimits is set.  This defaults to 50.
+
+gc.lessAggressiveWindow::
+       The window size parameter used in the delta compression
+       algorithm used by 'git gc --aggressive' when
+       gc.aggressiveCommitLimits is set.  This defaults to 250.
+
 gc.auto::
        When there are approximately more than this many loose
        objects in the repository, `git gc --auto` will pack them.
diff --git a/builtin/gc.c b/builtin/gc.c
index 72aa912..37fc603 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -28,10 +28,14 @@ static const char * const builtin_gc_usage[] = {
 static int pack_refs = 1;
 static int aggressive_depth = 250;
 static int aggressive_window = 250;
+static const char *aggressive_rev_list = "--before=1.year.ago";
+static int less_aggressive_depth = 50;
+static int less_aggressive_window = 250;
 static int gc_auto_threshold = 6700;
 static int gc_auto_pack_limit = 50;
 static int detach_auto = 1;
 static const char *prune_expire = "2.weeks.ago";
+static int delta_base_offset = 1;
 
 static struct argv_array pack_refs_cmd = ARGV_ARRAY_INIT;
 static struct argv_array reflog = ARGV_ARRAY_INIT;
@@ -39,10 +43,13 @@ static struct argv_array repack = ARGV_ARRAY_INIT;
 static struct argv_array prune = ARGV_ARRAY_INIT;
 static struct argv_array rerere = ARGV_ARRAY_INIT;
 
+static char *keep_file;
 static char *pidfile;
 
 static void remove_pidfile(void)
 {
+       if (keep_file)
+               unlink_or_warn(keep_file);
        if (pidfile)
                unlink(pidfile);
 }
@@ -54,6 +61,63 @@ static void remove_pidfile_on_signal(int signo)
        raise(signo);
 }
 
+static void pack_old_history(int quiet)
+{
+       struct child_process pack_objects;
+       struct child_process rev_list;
+       struct argv_array av_po = ARGV_ARRAY_INIT;
+       struct argv_array av_rl = ARGV_ARRAY_INIT;
+       char sha1[41];
+
+       argv_array_pushl(&av_rl, "rev-list", "--all", "--objects",
+                        "--reflog", NULL);
+       argv_array_push(&av_rl, aggressive_rev_list);
+
+       memset(&rev_list, 0, sizeof(rev_list));
+       rev_list.no_stdin = 1;
+       rev_list.out = -1;
+       rev_list.git_cmd = 1;
+       rev_list.argv = av_rl.argv;
+
+       if (start_command(&rev_list))
+               die(_("gc: unable to fork git-rev-list"));
+
+       argv_array_pushl(&av_po, "pack-objects", "--keep-true-parents",
+                        "--honor-pack-keep", "--non-empty", "--no-reuse-delta",
+                        "--keep", "--local", NULL);
+       if (delta_base_offset)
+               argv_array_push(&av_po,  "--delta-base-offset");
+       if (quiet)
+               argv_array_push(&av_po, "-q");
+       if (aggressive_window)
+               argv_array_pushf(&av_po, "--window=%d", aggressive_window);
+       if (aggressive_depth)
+               argv_array_pushf(&av_po, "--depth=%d", aggressive_depth);
+       argv_array_push(&av_po, git_path("objects/pack/pack"));
+
+       memset(&pack_objects, 0, sizeof(pack_objects));
+       pack_objects.in = rev_list.out;
+       pack_objects.out = -1;
+       pack_objects.git_cmd = 1;
+       pack_objects.argv = av_po.argv;
+
+       if (start_command(&pack_objects))
+               die(_("gc: unable to fork git-pack-objects"));
+
+       if (read_in_full(pack_objects.out, sha1, 41) != 41 ||
+           sha1[40] != '\n')
+               die_errno(_("gc: pack-objects did not return the new pack's 
SHA-1"));
+       sha1[40] = '\0';
+       keep_file = git_pathdup("objects/pack/pack-%s.keep", sha1);
+       close(pack_objects.out);
+
+       if (finish_command(&rev_list))
+               die(_("gc: git-rev-list died with error"));
+
+       if (finish_command(&pack_objects))
+               die(_("gc: git-pack-objects died with error"));
+}
+
 static int gc_config(const char *var, const char *value, void *cb)
 {
        if (!strcmp(var, "gc.packrefs")) {
@@ -71,6 +135,22 @@ static int gc_config(const char *var, const char *value, 
void *cb)
                aggressive_depth = git_config_int(var, value);
                return 0;
        }
+       if (!strcmp(var, "gc.aggressivecommitlimits")) {
+               aggressive_rev_list = value && *value ? xstrdup(value) : NULL;
+               return 0;
+       }
+       if (!strcmp(var, "gc.lessaggressivewindow")) {
+               less_aggressive_window = git_config_int(var, value);
+               return 0;
+       }
+       if (!strcmp(var, "gc.lessaggressivedepth")) {
+               less_aggressive_depth = git_config_int(var, value);
+               return 0;
+       }
+       if (!strcmp(var, "repack.usedeltabaseoffset")) {
+               delta_base_offset = git_config_bool(var, value);
+               return 0;
+       }
        if (!strcmp(var, "gc.auto")) {
                gc_auto_threshold = git_config_int(var, value);
                return 0;
@@ -298,11 +378,19 @@ int cmd_gc(int argc, const char **argv, const char 
*prefix)
                usage_with_options(builtin_gc_usage, builtin_gc_options);
 
        if (aggressive) {
+               int depth, window;
+               if (aggressive_rev_list) {
+                       depth = less_aggressive_depth;
+                       window = less_aggressive_window;
+               } else {
+                       depth = aggressive_depth;
+                       window = aggressive_window;
+               }
                argv_array_push(&repack, "-f");
-               if (aggressive_depth > 0)
-                       argv_array_pushf(&repack, "--depth=%d", 
aggressive_depth);
-               if (aggressive_window > 0)
-                       argv_array_pushf(&repack, "--window=%d", 
aggressive_window);
+               if (depth > 0)
+                       argv_array_pushf(&repack, "--depth=%d", depth);
+               if (window > 0)
+                       argv_array_pushf(&repack, "--window=%d", window);
        }
        if (quiet)
                argv_array_push(&repack, "-q");
@@ -343,9 +431,22 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
        if (run_command_v_opt(reflog.argv, RUN_GIT_CMD))
                return error(FAILED_RUN, reflog.argv[0]);
 
+       if (aggressive && aggressive_rev_list)
+               pack_old_history(quiet);
+
        if (run_command_v_opt(repack.argv, RUN_GIT_CMD))
                return error(FAILED_RUN, repack.argv[0]);
 
+       if (aggressive && aggressive_rev_list) {
+               if (keep_file)
+                       unlink_or_warn(keep_file);
+               argv_array_clear(&repack);
+               argv_array_pushl(&repack, "repack", "-d", "-l", NULL);
+               add_repack_all_option();
+               if (run_command_v_opt(repack.argv, RUN_GIT_CMD))
+                       return error(FAILED_RUN, repack.argv[0]);
+       }
+
        if (prune_expire) {
                argv_array_push(&prune, prune_expire);
                if (quiet)
-- 
1.9.0.40.gaa8c3ea

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to