On Fri, Feb 21, 2014 at 06:35:06AM +0700, Duy Nguyen wrote:
> On the other hand, the size reduction is really nice (320MB vs 500MB).
> I don't know if we can do this, but does it make sense to apply
> --depth=250 for old commits only and shallow depth for recent commits?
> 
> For old projects, commits older than 1-2 years is probably less often
> accessed and could use some aggressive packing. This still hits
> git-blame badly. We could even make sure all objects "on the blame
> surface" have short delta chain. But that may be pushing pack-objects
> too much.

We can have a "moderately aggressive" mode like this. With the patch
below, first you repack all and remove all loose objects. Then replay
your favourite use cases with GIT_LOOSE_THEM=1. For example, if I'm
most interested in commits from a yearq ago

$ GIT_LOOSE_THEM=1 ../git log --raw --since=1.year.ago >/dev/null

all relevant trees will be unpacked. Put --stat there too if you want
to unpack blobs. blame-heavy users may want to blame a few (or all)
files here too to unpack more. Now we can repack aggressively all
non-loose objects:

$ git repack -adf --exclude-loose --depth=250

and repack again, this time with normal depth, which would only affect
loose objects

$ git repack -ad

The end result is a pack with ancient history with potentially long
delta chains, tightly packed, and nearer history with shorter
chains. You will not notice any performance degradation (unless I run
past 1 year history in my case). And the result pack of git.git is 39M
rather than 64M with standard depth.

The use of loose objects to mark recent objects is not efficient (but
fast for this prototype). We could store an SHA-1 map instead.

-- 8< --
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 541667f..0e9dc8c 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -82,6 +82,7 @@ static int num_preferred_base;
 static struct progress *progress_state;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
+static int no_loose;
 
 static unsigned long delta_cache_size = 0;
 static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
@@ -2204,7 +2205,12 @@ static void show_object(struct object *obj,
                        const struct name_path *path, const char *last,
                        void *data)
 {
-       char *name = path_name(path, last);
+       char *name;
+
+       if (no_loose && has_loose_object(obj->sha1))
+               return;
+
+       name = path_name(path, last);
 
        add_preferred_base_object(name);
        add_object_entry(obj->sha1, obj->type, name, 0);
@@ -2487,6 +2493,7 @@ int cmd_pack_objects(int argc, const char **argv, const 
char *prefix)
                { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
                  N_("include objects referred by reflog entries"),
                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
+               OPT_BOOL(0, "exclude-loose", &no_loose, ""),
                OPT_BOOL(0, "stdout", &pack_to_stdout,
                         N_("output pack to stdout")),
                OPT_BOOL(0, "include-tag", &include_tag,
diff --git a/builtin/repack.c b/builtin/repack.c
index bb2314c..9b8bb35 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -137,6 +137,7 @@ int cmd_repack(int argc, const char **argv, const char 
*prefix)
        int no_update_server_info = 0;
        int quiet = 0;
        int local = 0;
+       int no_loose = 0;
 
        struct option builtin_repack_options[] = {
                OPT_BIT('a', NULL, &pack_everything,
@@ -152,6 +153,7 @@ int cmd_repack(int argc, const char **argv, const char 
*prefix)
                                N_("pass --no-reuse-object to 
git-pack-objects")),
                OPT_BOOL('n', NULL, &no_update_server_info,
                                N_("do not run git-update-server-info")),
+               OPT_BOOL(0, "exclude-loose", &no_loose, ""),
                OPT__QUIET(&quiet, N_("be quiet")),
                OPT_BOOL('l', "local", &local,
                                N_("pass --local to git-pack-objects")),
@@ -184,6 +186,8 @@ int cmd_repack(int argc, const char **argv, const char 
*prefix)
        argv_array_push(&cmd_args, "--non-empty");
        argv_array_push(&cmd_args, "--all");
        argv_array_push(&cmd_args, "--reflog");
+       if (no_loose)
+               argv_array_push(&cmd_args, "--exclude-loose");
        if (window)
                argv_array_pushf(&cmd_args, "--window=%s", window);
        if (window_memory)
diff --git a/sha1_file.c b/sha1_file.c
index 6e8c05d..d0988f2 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -454,7 +454,7 @@ int has_loose_object_nonlocal(const unsigned char *sha1)
        return 0;
 }
 
-static int has_loose_object(const unsigned char *sha1)
+int has_loose_object(const unsigned char *sha1)
 {
        return has_loose_object_local(sha1) ||
               has_loose_object_nonlocal(sha1);
@@ -2114,6 +2114,11 @@ struct unpack_entry_stack_ent {
        unsigned long size;
 };
 
+static void write_sha1_file_prepare(const void *buf, unsigned long len,
+                                   const char *type, unsigned char *sha1,
+                                   char *hdr, int *hdrlen);
+static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
+                             const void *buf, unsigned long len, time_t mtime);
 void *unpack_entry(struct packed_git *p, off_t obj_offset,
                   enum object_type *final_type, unsigned long *final_size)
 {
@@ -2126,6 +2131,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
        struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
        int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
        int base_from_cache = 0;
+       static int let_them_loose = -1;
 
        if (log_pack_access != no_log_pack_access)
                write_pack_access_log(p, obj_offset);
@@ -2288,6 +2294,17 @@ void *unpack_entry(struct packed_git *p, off_t 
obj_offset,
        *final_type = type;
        *final_size = size;
 
+       if (let_them_loose == -1)
+               let_them_loose = getenv("GIT_LOOSE_THEM") != NULL;
+       if (let_them_loose && (type == OBJ_TREE || type == OBJ_BLOB)) {
+               unsigned char sha1[20];
+               char hdr[32];
+               int hdrlen;
+               write_sha1_file_prepare(data, size, typename(type), sha1, hdr, 
&hdrlen);
+               if (!has_loose_object(sha1))
+                       write_loose_object(sha1, hdr, hdrlen, data, size, 0);
+       }
+
        unuse_pack(&w_curs);
        return data;
 }
-- 8< --

--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to