Add config pack.graphcompression similar to pack.compression.
Applies to non-blob objects and if unspecified falls back to pack.compression.

We may identify objects compressed with level 0 by their leading bytes.
Use this to force recompression when the source and target levels mismatch.
Limit its application to when the config pack.graphcompression is set.

Signed-off-by: David Michael Barr <b...@rr-dav.id.au>
---
 builtin/pack-objects.c | 49 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)

 I started working on this just before taking a vacation,
 so it's been a little while coming.

 The intent is to allow selective recompression of pack data.
 For small objects/deltas the overhead of deflate is significant.
 This may improve read performance for the object graph.

 I ran some unscientific experiments with the chromium repository.
 With pack.graphcompression = 0, there was a 2.7% increase in pack size.
 I saw a 35% improvement with cold caches and 43% otherwise on git log --raw.

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index f069462..9518daf 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -40,6 +40,7 @@ struct object_entry {
        unsigned long z_delta_size;     /* delta data size (compressed) */
        unsigned int hash;      /* name hint hash */
        enum object_type type;
+       enum object_type actual_type;
        enum object_type in_pack_type;  /* could be delta */
        unsigned char in_pack_header_size;
        unsigned char preferred_base; /* we do not pack this, but is available
@@ -81,6 +82,8 @@ static int num_preferred_base;
 static struct progress *progress_state;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
+static int pack_graph_compression_level = Z_DEFAULT_COMPRESSION;
+static int pack_graph_compression_seen;
 
 static unsigned long delta_cache_size = 0;
 static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
@@ -125,14 +128,14 @@ static void *get_delta(struct object_entry *entry)
        return delta_buf;
 }
 
-static unsigned long do_compress(void **pptr, unsigned long size)
+static unsigned long do_compress(void **pptr, unsigned long size, int level)
 {
        git_zstream stream;
        void *in, *out;
        unsigned long maxsize;
 
        memset(&stream, 0, sizeof(stream));
-       git_deflate_init(&stream, pack_compression_level);
+       git_deflate_init(&stream, level);
        maxsize = git_deflate_bound(&stream, size);
 
        in = *pptr;
@@ -191,6 +194,18 @@ static unsigned long write_large_blob_data(struct 
git_istream *st, struct sha1fi
        return olen;
 }
 
+static int check_pack_compressed(struct packed_git *p,
+               struct pack_window **w_curs,
+               off_t offset)
+{
+       unsigned long avail;
+       int compressed = 0;
+       unsigned char *in = use_pack(p, w_curs, offset, &avail);
+       if (avail >= 3)
+               compressed = !!(in[2] & 0x6);
+       return compressed;
+}
+
 /*
  * we are going to reuse the existing object data as is.  make
  * sure it is not corrupt.
@@ -240,6 +255,8 @@ static void copy_pack_data(struct sha1file *f,
        }
 }
 
+#define compression_level(type) ((type) && (type) != OBJ_BLOB ? 
pack_graph_compression_level : pack_compression_level)
+
 /* Return 0 if we will bust the pack-size limit */
 static unsigned long write_no_reuse_object(struct sha1file *f, struct 
object_entry *entry,
                                           unsigned long limit, int 
usable_delta)
@@ -286,7 +303,7 @@ static unsigned long write_no_reuse_object(struct sha1file 
*f, struct object_ent
        else if (entry->z_delta_size)
                datalen = entry->z_delta_size;
        else
-               datalen = do_compress(&buf, size);
+               datalen = do_compress(&buf, size, 
compression_level(entry->actual_type));
 
        /*
         * The object header is a byte of 'type' followed by zero or
@@ -379,6 +396,13 @@ static unsigned long write_reuse_object(struct sha1file 
*f, struct object_entry
        offset += entry->in_pack_header_size;
        datalen -= entry->in_pack_header_size;
 
+       if (!pack_to_stdout &&
+           pack_graph_compression_seen &&
+           check_pack_compressed(p, &w_curs, offset) != 
!!compression_level(entry->actual_type)) {
+               unuse_pack(&w_curs);
+               return write_no_reuse_object(f, entry, limit, usable_delta);
+       }
+
        if (!pack_to_stdout && p->index_version == 1 &&
            check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
                error("corrupt packed object for %s", 
sha1_to_hex(entry->idx.sha1));
@@ -955,6 +979,8 @@ static int add_object_entry(const unsigned char *sha1, enum 
object_type type,
        memset(entry, 0, sizeof(*entry));
        hashcpy(entry->idx.sha1, sha1);
        entry->hash = hash;
+       if (pack_graph_compression_seen)
+               entry->actual_type = sha1_object_info(sha1, NULL);
        if (type)
                entry->type = type;
        if (exclude)
@@ -1758,7 +1784,8 @@ static void find_deltas(struct object_entry **list, 
unsigned *list_size,
                 */
                if (entry->delta_data && !pack_to_stdout) {
                        entry->z_delta_size = do_compress(&entry->delta_data,
-                                                         entry->delta_size);
+                                                         entry->delta_size,
+                                                         
compression_level(entry->actual_type));
                        cache_lock();
                        delta_cache_size -= entry->delta_size;
                        delta_cache_size += entry->z_delta_size;
@@ -2159,6 +2186,16 @@ static int git_pack_config(const char *k, const char *v, 
void *cb)
                            pack_idx_opts.version);
                return 0;
        }
+       if (!strcmp(k, "pack.graphcompression")) {
+               int level = git_config_int(k, v);
+               if (level == -1)
+                       level = Z_DEFAULT_COMPRESSION;
+               else if (level < 0 || level > Z_BEST_COMPRESSION)
+                       die("bad pack graph compression level %d", level);
+               pack_graph_compression_level = level;
+               pack_graph_compression_seen = 1;
+               return 0;
+       }
        return git_default_config(k, v, cb);
 }
 
@@ -2519,6 +2556,10 @@ int cmd_pack_objects(int argc, const char **argv, const 
char *prefix)
        argc = parse_options(argc, argv, prefix, pack_objects_options,
                             pack_usage, 0);
 
+       /* Fall back after option parsing to catch --compression */
+       if (!pack_graph_compression_seen)
+               pack_graph_compression_level = pack_compression_level;
+
        if (argc) {
                base_name = argv[0];
                argc--;
-- 
1.8.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to