Am Donnerstag, den 29.05.2014, 19:57 +0700 schrieb Nguyễn Thái Ngọc Duy:

Hi,

sorry for chiming in so late.

I've just played around with patch 3 and 4 of that series.
And I like it very much as I work often with large files so any further 
enhancement in that area is really nice.

(see comments below)

> Too large files may lead to failure to allocate memory. If it happens
> here, it could impact quite a few commands that involve
> diff. Moreover, too large files are inefficient to compare anyway (and
> most likely non-text), so mark them binary and skip looking at their
> content.
> 
> Noticed-by: Dale R. Worley <wor...@alum.mit.edu>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
> ---
>  diff.c           | 26 ++++++++++++++++++--------
>  diffcore.h       |  1 +
>  t/t1050-large.sh |  4 ++++
>  3 files changed, 23 insertions(+), 8 deletions(-)
> 
> diff --git a/diff.c b/diff.c
> index 54281cb..0a2f865 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -2185,8 +2185,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
>                       one->is_binary = one->driver->binary;
>               else {
>                       if (!one->data && DIFF_FILE_VALID(one))
> -                             diff_populate_filespec(one, 0);
> -                     if (one->data)
> +                             diff_populate_filespec(one, 
> DIFF_POPULATE_IS_BINARY);
> +                     if (one->is_binary == -1 && one->data)
>                               one->is_binary = buffer_is_binary(one->data,
>                                               one->size);
>                       if (one->is_binary == -1)
> @@ -2721,6 +2721,11 @@ int diff_populate_filespec(struct diff_filespec *s, 
> unsigned int flags)
>               }
>               if (size_only)
>                       return 0;
> +             if ((flags & DIFF_POPULATE_IS_BINARY) &&
> +                 s->size > big_file_threshold && s->is_binary == -1) {
> +                     s->is_binary = 1;
> +                     return 0;
> +             }

Why do you check for s->is_binary == -1 here? I think it does not matter
what s_is_binary says here.

>               fd = open(s->path, O_RDONLY);
>               if (fd < 0)
>                       goto err_empty;
> @@ -2742,16 +2747,21 @@ int diff_populate_filespec(struct diff_filespec *s, 
> unsigned int flags)
>       }
>       else {
>               enum object_type type;
> -             if (size_only) {
> +             if (size_only || (flags & DIFF_POPULATE_IS_BINARY)) {
>                       type = sha1_object_info(s->sha1, &s->size);
>                       if (type < 0)
>                               die("unable to read %s", sha1_to_hex(s->sha1));
> -             } else {
> -                     s->data = read_sha1_file(s->sha1, &type, &s->size);
> -                     if (!s->data)
> -                             die("unable to read %s", sha1_to_hex(s->sha1));
> -                     s->should_free = 1;
> +                     if (size_only)
> +                             return 0;
> +                     if (s->size > big_file_threshold && s->is_binary == -1) 
> {
same as above.
> +                             s->is_binary = 1;
> +                             return 0;
> +                     }
>               }
> +             s->data = read_sha1_file(s->sha1, &type, &s->size);
> +             if (!s->data)
> +                     die("unable to read %s", sha1_to_hex(s->sha1));
> +             s->should_free = 1;
>       }
>       return 0;
>  }
> diff --git a/diffcore.h b/diffcore.h
> index a186d7c..e7760d9 100644
> --- a/diffcore.h
> +++ b/diffcore.h
> @@ -56,6 +56,7 @@ extern void fill_filespec(struct diff_filespec *, const 
> unsigned char *,
>                         int, unsigned short);
>  
>  #define DIFF_POPULATE_SIZE_ONLY 1
> +#define DIFF_POPULATE_IS_BINARY 2
>  extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
>  extern void diff_free_filespec_data(struct diff_filespec *);
>  extern void diff_free_filespec_blob(struct diff_filespec *);
> diff --git a/t/t1050-large.sh b/t/t1050-large.sh
> index 333909b..4d922e2 100755
> --- a/t/t1050-large.sh
> +++ b/t/t1050-large.sh
> @@ -112,6 +112,10 @@ test_expect_success 'diff --raw' '
>       git diff --raw HEAD^
>  '
>  
> +test_expect_success 'diff --stat' '
> +     git diff --stat HEAD^ HEAD
> +'
> +
>  test_expect_success 'hash-object' '
>       git hash-object large1
>  '

I would also add a note to the documentation e. g:

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 9f467d3..7a2f27d 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -499,7 +499,8 @@ core.bigFileThreshold::
        Files larger than this size are stored deflated, without
        attempting delta compression.  Storing large files without
        delta compression avoids excessive memory usage, at the
-       slight expense of increased disk usage.
+       slight expense of increased disk usage.  Additionally files
+       larger than this size are allways treated as binary.
 +
 Default is 512 MiB on all platforms.  This should be reasonable
 for most projects as source code and other text files can still

Thomas

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to