Re: [PATCH 4/9] get_short_oid: sort ambiguous objects by type, then SHA-1

Derrick Stolee Tue, 01 May 2018 04:12:07 -0700

On 4/30/2018 6:07 PM, Ævar Arnfjörð Bjarmason wrote:

Change the output emitted when an ambiguous object is encountered so
that we show tags first, then commits, followed by trees, and finally
blobs. Within each type we show objects in hashcmp(). Before this
change the objects were only ordered by hashcmp().


The reason for doing this is that the output looks better as a result,
e.g. the v2.17.0 tag before this change on "git show e8f2" would
display:

     hint: The candidates are:
     hint:   e8f2093055 tree
     hint:   e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached 
HEAD abbreviated object name
     hint:   e8f21d02f7 blob
     hint:   e8f21d577c blob
     hint:   e8f25a3a50 tree
     hint:   e8f26250fa commit 2017-02-03 - Merge pull request #996 from 
jeffhostetler/jeffhostetler/register_rename_src
     hint:   e8f2650052 tag v2.17.0
     hint:   e8f2867228 blob
     hint:   e8f28d537c tree
     hint:   e8f2a35526 blob
     hint:   e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for 
multiple remote.url entries
     hint:   e8f2cf6ec0 tree

Now we'll instead show:

     hint:   e8f2650052 tag v2.17.0
     hint:   e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached 
HEAD abbreviated object name
     hint:   e8f26250fa commit 2017-02-03 - Merge pull request #996 from 
jeffhostetler/jeffhostetler/register_rename_src
     hint:   e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for 
multiple remote.url entries
     hint:   e8f2093055 tree
     hint:   e8f25a3a50 tree
     hint:   e8f28d537c tree
     hint:   e8f2cf6ec0 tree
     hint:   e8f21d02f7 blob
     hint:   e8f21d577c blob
     hint:   e8f2867228 blob
     hint:   e8f2a35526 blob

Since we show the commit data in the output that's nicely aligned once
we sort by object type. The decision to show tags before commits is
pretty arbitrary, but it's much less likely that we'll display a tag,
so if there is one it makes sense to show it first.

Here's a non-arbitrary reason: the object types are orderedtopologically (ignoring self-references):


tag -> commit, tree, blob
commit -> tree
tree -> blob

Signed-off-by: Ævar Arnfjörð Bjarmason <ava...@gmail.com>
---
  sha1-array.c | 15 +++++++++++++++
  sha1-array.h |  3 +++
  sha1-name.c  | 37 ++++++++++++++++++++++++++++++++++++-
  3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/sha1-array.c b/sha1-array.c
index 838b3bf847..48bd9e9230 100644
--- a/sha1-array.c
+++ b/sha1-array.c
@@ -41,6 +41,21 @@ void oid_array_clear(struct oid_array *array)
        array->sorted = 0;
  }

+

+int oid_array_for_each(struct oid_array *array,
+                      for_each_oid_fn fn,
+                      void *data)
+{
+       int i;
+
+       for (i = 0; i < array->nr; i++) {
+               int ret = fn(array->oid + i, data);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
  int oid_array_for_each_unique(struct oid_array *array,
                                for_each_oid_fn fn,
                                void *data)
diff --git a/sha1-array.h b/sha1-array.h
index 1e1d24b009..232bf95017 100644
--- a/sha1-array.h
+++ b/sha1-array.h
@@ -16,6 +16,9 @@ void oid_array_clear(struct oid_array *array);

typedef int (*for_each_oid_fn)(const struct object_id *oid,

                               void *data);
+int oid_array_for_each(struct oid_array *array,
+                      for_each_oid_fn fn,
+                      void *data);
  int oid_array_for_each_unique(struct oid_array *array,
                              for_each_oid_fn fn,
                              void *data);
diff --git a/sha1-name.c b/sha1-name.c
index 9d7bbd3e96..46d8b1afa6 100644
--- a/sha1-name.c
+++ b/sha1-name.c
@@ -378,6 +378,34 @@ static int collect_ambiguous(const struct object_id *oid, 
void *data)
        return 0;
  }

+static int sort_ambiguous(const void *a, const void *b)

+{
+       int a_type = oid_object_info(a, NULL);
+       int b_type = oid_object_info(b, NULL);
+       int a_type_sort;
+       int b_type_sort;
+
+       /*
+        * Sorts by hash within the same object type, just as
+        * oid_array_for_each_unique() would do.
+        */
+       if (a_type == b_type)
+               return oidcmp(a, b);
+
+       /*
+        * Between object types show tags, then commits, and finally
+        * trees and blobs.
+        *
+        * The object_type enum is commit, tree, blob, tag, but we
+        * want tag, commit, tree blob. Cleverly (perhaps too
+        * cleverly) do that with modulus, since the enum assigns 1 to
+        * commit, so tag becomes 0.
+        */


I appreciate this comment. Clever things should be marked as such.

+       a_type_sort = a_type % 4;
+       b_type_sort = b_type % 4;
+       return a_type_sort > b_type_sort ? 1 : -1;
+}
+
  static int get_short_oid(const char *name, int len, struct object_id *oid,
                          unsigned flags)
  {
@@ -409,6 +437,8 @@ static int get_short_oid(const char *name, int len, struct 
object_id *oid,
        status = finish_object_disambiguation(&ds, oid);

if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) {

+               struct oid_array collect = OID_ARRAY_INIT;
+
                error(_("short SHA1 %s is ambiguous"), ds.hex_pfx);

/*

@@ -421,7 +451,12 @@ static int get_short_oid(const char *name, int len, struct 
object_id *oid,
                        ds.fn = NULL;

advise(_("The candidates are:"));

-               for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds);
+               for_each_abbrev(ds.hex_pfx, collect_ambiguous, &collect);
+               QSORT(collect.oid, collect.nr, sort_ambiguous);

I was wondering how the old code sorted by SHA even when the ambiguousobjects were loaded from different sources (multiple pack-files, looseobjects). Turns out that for_each_abbrev() does its own sort aftercollecting the SHAs and then calls the given function pointer only onceper distinct object. This avoids multiple instances of the same object,which may appear multiple times across pack-files.

I only ask because now we are doing two sorts. I wonder if it would bemore elegant to provide your sorting algorithm to for_each_abbrev() andlet it call show_ambiguous_object as before.

Another question is if we should use this sort generally for all callsto for_each_abbrev(). The only other case I see is in builtin/revparse.c.

+
+               if (oid_array_for_each(&collect, show_ambiguous_object, &ds))
+                       BUG("show_ambiguous_object shouldn't return non-zero");
+               oid_array_clear(&collect);
        }

return status;

Re: [PATCH 4/9] get_short_oid: sort ambiguous objects by type, then SHA-1

Reply via email to