Re: [PATCH 4/4] core.abbrev: raise the default abbreviation to 12 hexdigits

Linus Torvalds Fri, 30 Sep 2016 11:22:07 -0700

On Fri, Sep 30, 2016 at 10:54 AM, Linus Torvalds
<[email protected]> wrote:
>
>> So IMHO, the best combination is the init_default_abbrev() you posted in
>> [1], but initialized at the top of find_unique_abbrev(). And cached
>> there, obviously, in a similar way.
>
> That's certainly possible, but I'm really not happy with how the
> counting function looks.  And nobody actually stood up to say "yeah,
> that gets alternate loose objects right" or "if you have tons of those
> alternate loose objects you have other issues anyway". I think
> somebody would have to "own" that counting function, the advantage of
> just putting it into disambiguate_state is that we just get the
> counting for free..


Side note: maybe we can mix the two approaches, and keep the counting
in the disambiguation state, and just make the counting function do

    init_object_disambiguation();
    find_short_object_filename(&ds);
    find_short_packed_object(&ds);
    finish_object_disambiguation(&ds, sha1);

and then just use "ds.nrobjects". So the counting would still be done
by the disambiguation code, it just woudln't be in get_short_sha1().

So here's another version that takes that approach. And if somebody
(hint hint) wants to do the counting differently, they can perhaps
send an incremental patch to do that.

(This patch also contains the few setup issues Junio found with the
new "default_abbrev is negative" model)

              Linus

 builtin/rev-parse.c |  5 +++--
 diff.c              |  2 +-
 environment.c       |  2 +-
 sha1_name.c         | 39 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c
index 4da1f1da2..cfb0f1510 100644
--- a/builtin/rev-parse.c
+++ b/builtin/rev-parse.c
@@ -671,8 +671,9 @@ int cmd_rev_parse(int argc, const char **argv, const char 
*prefix)
                                filter &= ~(DO_FLAGS|DO_NOREV);
                                verify = 1;
                                abbrev = DEFAULT_ABBREV;
-                               if (arg[7] == '=')
-                                       abbrev = strtoul(arg + 8, NULL, 10);
+                               if (!arg[7])
+                                       continue;
+                               abbrev = strtoul(arg + 8, NULL, 10);
                                if (abbrev < MINIMUM_ABBREV)
                                        abbrev = MINIMUM_ABBREV;
                                else if (40 <= abbrev)
diff --git a/diff.c b/diff.c
index 59920747d..c6d445915 100644
--- a/diff.c
+++ b/diff.c
@@ -3421,7 +3421,7 @@ void diff_setup_done(struct diff_options *options)
                         */
                        read_cache();
        }
-       if (options->abbrev <= 0 || 40 < options->abbrev)
+       if (options->abbrev > 40)
                options->abbrev = 40; /* full */
 
        /*
diff --git a/environment.c b/environment.c
index c1442df9a..fd6681e46 100644
--- a/environment.c
+++ b/environment.c
@@ -16,7 +16,7 @@ int trust_executable_bit = 1;
 int trust_ctime = 1;
 int check_stat = 1;
 int has_symlinks = 1;
-int minimum_abbrev = 4, default_abbrev = 7;
+int minimum_abbrev = 4, default_abbrev = -1;
 int ignore_case;
 int assume_unchanged;
 int prefer_symlink_refs;
diff --git a/sha1_name.c b/sha1_name.c
index 3b647fd7c..684b36dba 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -15,6 +15,7 @@ typedef int (*disambiguate_hint_fn)(const unsigned char *, 
void *);
 
 struct disambiguate_state {
        int len; /* length of prefix in hex chars */
+       unsigned int nrobjects;
        char hex_pfx[GIT_SHA1_HEXSZ + 1];
        unsigned char bin_pfx[GIT_SHA1_RAWSZ];
 
@@ -118,6 +119,12 @@ static void find_short_object_filename(struct 
disambiguate_state *ds)
 
                        if (strlen(de->d_name) != 38)
                                continue;
+
+                       // We only look at the one subdirectory, and we assume
+                       // each subdirectory is roughly similar, so each object
+                       // we find probably has 255 other objects in the other
+                       // fan-out directories
+                       ds->nrobjects += 256;
                        if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
                                continue;
                        memcpy(hex + 2, de->d_name, 38);
@@ -151,6 +158,7 @@ static void unique_in_pack(struct packed_git *p,
 
        open_pack_index(p);
        num = p->num_objects;
+       ds->nrobjects += num;
        last = num;
        while (first < last) {
                uint32_t mid = (first + last) / 2;
@@ -455,17 +463,46 @@ int for_each_abbrev(const char *prefix, each_abbrev_fn 
fn, void *cb_data)
        return ret;
 }
 
+static int get_automatic_abbrev(const char *hex)
+{
+       static int len;
+       struct disambiguate_state ds;
+
+       if (init_object_disambiguation(hex, 7, &ds) < 0)
+               return 7;
+
+       find_short_object_filename(&ds);
+       find_short_packed_object(&ds);
+
+       for (len = 7; len < 16; len++) {
+               unsigned int expect_collision = 1 << (len * 2);
+               if (ds.nrobjects < expect_collision)
+                       break;
+       }
+       return len;
+}
+
 int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
 {
        int status, exists;
+       int flags = GET_SHA1_QUIETLY;
 
        sha1_to_hex_r(hex, sha1);
        if (len == 40 || !len)
                return 40;
+
+       if (len < 0) {
+               static int automatic_abbrev = -1;
+
+               if (automatic_abbrev < 0)
+                       automatic_abbrev = get_automatic_abbrev(hex);
+               len = automatic_abbrev;
+       }
+
        exists = has_sha1_file(sha1);
        while (len < 40) {
                unsigned char sha1_ret[20];
-               status = get_short_sha1(hex, len, sha1_ret, GET_SHA1_QUIETLY);
+               status = get_short_sha1(hex, len, sha1_ret, flags);
                if (exists
                    ? !status
                    : status == SHORT_NAME_NOT_FOUND) {

Re: [PATCH 4/4] core.abbrev: raise the default abbreviation to 12 hexdigits

Reply via email to