Nguyễn Thái Ngọc Duy  <pclo...@gmail.com> writes:

> prepare_packed_git_one() is modified to allow count-objects to hook a
> report function to so we don't need to duplicate the pack searching
> logic in count-objects.c. When report_pack_garbage is NULL, the
> overhead is insignificant.
>
> The garbage is reported with warning() instead of error() in packed
> garbage case because it's not an error to have garbage. Loose garbage
> is still reported as errors and will be converted to warnings later.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
> ---

Thanks.

Tests look good and the series is getting much closer.

> diff --git a/sha1_file.c b/sha1_file.c
> index 239bee7..5bedf78 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -21,6 +21,7 @@
>  #include "sha1-lookup.h"
>  #include "bulk-checkin.h"
>  #include "streaming.h"
> +#include "dir.h"
>  
>  #ifndef O_NOATIME
>  #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
> @@ -1000,6 +1001,57 @@ void install_packed_git(struct packed_git *pack)
>       packed_git = pack;
>  }
>  
> +void (*report_garbage)(const char *desc, const char *path);
> +
> +static void report_helper(const struct string_list *list,
> +                       int seen_bits, int first, int last)
> +{
> +     const char *msg;
> +     switch (seen_bits) {
> +     case 0: msg = "no corresponding .idx nor .pack"; break;
> +     case 1: msg = "no corresponding .idx"; break;
> +     case 2: msg = "no corresponding .pack"; break;

That's dense.

> +     default:
> +             return;
> +     }
> +     for (; first <= last; first++)

This looks odd.  If you use the usual last+1 convention between the
caller and callee, you do not have to do this, or call this function
with "i - 1" and "list->nr -1" as the last parameter.

> +static void report_pack_garbage(struct string_list *list)
> +{
> +     int i, baselen = -1, first = 0, seen_bits = 0;
> +
> +     if (!report_garbage)
> +             return;
> +
> +     sort_string_list(list);
> +
> +     for (i = 0; i < list->nr; i++) {
> +             const char *path = list->items[i].string;
> +             if (baselen != -1 &&
> +                 strncmp(path, list->items[first].string, baselen)) {
> +                     report_helper(list, seen_bits, first, i - 1);
> +                     baselen = -1;
> +                     seen_bits = 0;
> +             }
> +             if (baselen == -1) {
> +                     const char *dot = strrchr(path, '.');
> +                     if (!dot) {
> +                             report_garbage("garbage found", path);
> +                             continue;
> +                     }
> +                     baselen = dot - path + 1;
> +                     first = i;
> +             }
> +             if (!strcmp(path + baselen, "pack"))
> +                     seen_bits |= 1;
> +             else if (!strcmp(path + baselen, "idx"))
> +                     seen_bits |= 2;
> +     }
> +     report_helper(list, seen_bits, first, list->nr - 1);
> +}

> @@ -1009,6 +1061,7 @@ static void prepare_packed_git_one(char *objdir, int 
> local)
>       int len;
>       DIR *dir;
>       struct dirent *de;
> +     struct string_list garbage = STRING_LIST_INIT_DUP;
>  
>       sprintf(path, "%s/pack", objdir);
>       len = strlen(path);
> ...
> @@ -1043,8 +1106,20 @@ static void prepare_packed_git_one(char *objdir, int 
> local)
>                           (p = add_packed_git(path, len + namelen, local)) != 
> NULL)
>                               install_packed_git(p);
>               }
> +
> +             if (!report_garbage)
> +                     continue;
> +
> +             if (has_extension(de->d_name, ".idx") ||
> +                 has_extension(de->d_name, ".pack") ||
> +                 has_extension(de->d_name, ".keep"))
> +                     string_list_append(&garbage, path);

It might be OK to put .pack and .keep in the same "if (A || B)" as
it may happen to be that they do not need any special treatment
right now, but I do not think this is a good idea in general.

You would want to do things differently for ".idx", e.g.

diff --git a/sha1_file.c b/sha1_file.c
index 5bedf78..450521f 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1076,6 +1076,7 @@ static void prepare_packed_git_one(char *objdir, int 
local)
        while ((de = readdir(dir)) != NULL) {
                int namelen = strlen(de->d_name);
                struct packed_git *p;
+               int is_a_bad_idx = 0;
 
                if (len + namelen + 1 > sizeof(path)) {
                        if (report_garbage) {
@@ -1105,12 +1106,14 @@ static void prepare_packed_git_one(char *objdir, int 
local)
                             */
                            (p = add_packed_git(path, len + namelen, local)) != 
NULL)
                                install_packed_git(p);
+                       else
+                               is_a_bad_idx = 1;
                }
 
                if (!report_garbage)
                        continue;
 
-               if (has_extension(de->d_name, ".idx") ||
+               if ((has_extension(de->d_name, ".idx") && !is_a_bad_idx) ||
                    has_extension(de->d_name, ".pack") ||
                    has_extension(de->d_name, ".keep"))
                        string_list_append(&garbage, path);


so that you can say something about .pack/.keep files that do not
have a working .idx file.  In the above example, the only special
thing you would do for .idx is just to check if it is a bad one, but
in later patches you may have to do different things in the body
(i.e. something else in addition to string_list_append(&garbage))
not just in the condition.  Collapsing these into a condition to a
single "if (A||B||C)" may be suffering from a lack of foresight.

> diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh
> index d645328..e4bb3a1 100755
> --- a/t/t5304-prune.sh
> +++ b/t/t5304-prune.sh
> @@ -195,4 +195,30 @@ test_expect_success 'gc: prune old objects after local 
> clone' '
>       )
>  '
>  
> +test_expect_success 'garbage report in count-objects -v' '
> +     : >.git/objects/pack/foo &&
> +     : >.git/objects/pack/foo.bar &&
> +     : >.git/objects/pack/foo.keep &&
> +     : >.git/objects/pack/foo.pack &&
> +     : >.git/objects/pack/fake.bar &&
> +     : >.git/objects/pack/fake.keep &&
> +     : >.git/objects/pack/fake.pack &&
> +     : >.git/objects/pack/fake.idx &&
> +     : >.git/objects/pack/fake2.keep &&
> +     : >.git/objects/pack/fake3.idx &&
> +     git count-objects -v 2>stderr &&
> +     grep "index file .git/objects/pack/fake.idx is too small" stderr &&

The above suggested change will make a difference to
fake.{pack,keep} because of this breakage, I think.

> +     grep "^warning:" stderr | sort >actual &&
> +     cat >expected <<\EOF &&
> +warning: garbage found: .git/objects/pack/fake.bar
> +warning: garbage found: .git/objects/pack/foo
> +warning: garbage found: .git/objects/pack/foo.bar
> +warning: no corresponding .idx nor .pack: .git/objects/pack/fake2.keep
> +warning: no corresponding .idx: .git/objects/pack/foo.keep
> +warning: no corresponding .idx: .git/objects/pack/foo.pack
> +warning: no corresponding .pack: .git/objects/pack/fake3.idx
> +EOF
> +     test_cmp expected actual
> +'
> +
>  test_done
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to