Re: [PATCH 30/33] tree-diff: convert diff_tree_paths to struct object_id

2017-07-15 Thread brian m. carlson
On Sat, Jul 15, 2017 at 07:18:51PM +0200, René Scharfe wrote:
> -- >8 --
> Subject: [PATCH] tree-diff: don't access hash of NULL object_id pointer
> 
> The object_id pointers can be NULL for invalid entries.  Don't try to
> dereference them and pass NULL along to fill_tree_descriptor() instead,
> which handles them just fine.
> 
> Found with Clang's UBSan.
> 
> Signed-off-by: Rene Scharfe 
> ---
> fill_tree_descriptor() can easily be converted to object_id, by the
> way, which would get us rid of the extra check introduced here, but
> this patch is meant as a minimal fix.
> 
>  tree-diff.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/tree-diff.c b/tree-diff.c
> index bd6d65a409..2357f72899 100644
> --- a/tree-diff.c
> +++ b/tree-diff.c
> @@ -421,8 +421,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
>*   diff_tree_oid(parent, commit) )
>*/
>   for (i = 0; i < nparent; ++i)
> - tptree[i] = fill_tree_descriptor([i], parents_oid[i]->hash);
> - ttree = fill_tree_descriptor(, oid->hash);
> + tptree[i] = fill_tree_descriptor([i],
> + parents_oid[i] ? parents_oid[i]->hash : NULL);
> + ttree = fill_tree_descriptor(, oid ? oid->hash : NULL);

Good catch.  This seems obviously correct.
-- 
brian m. carlson / brian with sandals: Houston, Texas, US
https://www.crustytoothpaste.net/~bmc | My opinion only
OpenPGP: https://keybase.io/bk2204


signature.asc
Description: PGP signature


Re: [PATCH 30/33] tree-diff: convert diff_tree_paths to struct object_id

2017-07-15 Thread René Scharfe
Am 30.05.2017 um 19:31 schrieb Brandon Williams:
> @@ -273,21 +274,20 @@ static struct combine_diff_path *emit_path(struct 
> combine_diff_path *p,
>   }
>   
>   if (recurse) {
> - const unsigned char **parents_sha1;
> + const struct object_id **parents_oid;
>   
> - FAST_ARRAY_ALLOC(parents_sha1, nparent);
> + FAST_ARRAY_ALLOC(parents_oid, nparent);
>   for (i = 0; i < nparent; ++i) {
>   /* same rule as in emitthis */
>   int tpi_valid = tp && !(tp[i].entry.mode & 
> S_IFXMIN_NEQ);
>   
> - parents_sha1[i] = tpi_valid ? tp[i].entry.oid->hash
> - : NULL;
> + parents_oid[i] = tpi_valid ? tp[i].entry.oid : NULL;
>   }

So elements of parents_oid can be NULL...

>   
>   strbuf_add(base, path, pathlen);
>   strbuf_addch(base, '/');
> - p = ll_diff_tree_paths(p, sha1, parents_sha1, nparent, base, 
> opt);
> - FAST_ARRAY_FREE(parents_sha1, nparent);
> + p = ll_diff_tree_paths(p, oid, parents_oid, nparent, base, opt);

... and we pass that array to ll_diff_tree_paths()...

> + FAST_ARRAY_FREE(parents_oid, nparent);
>   }
>   
>   strbuf_setlen(base, old_baselen);
> @@ -312,7 +312,7 @@ static void skip_uninteresting(struct tree_desc *t, 
> struct strbuf *base,
>   
>   
>   /*
> - * generate paths for combined diff D(sha1,parents_sha1[])
> + * generate paths for combined diff D(sha1,parents_oid[])
>*
>* Resulting paths are appended to combine_diff_path linked list, and also, 
> are
>* emitted on the go via opt->pathchange() callback, so it is possible to
> @@ -404,8 +404,8 @@ static inline void update_tp_entries(struct tree_desc 
> *tp, int nparent)
>   }
>   
>   static struct combine_diff_path *ll_diff_tree_paths(
> - struct combine_diff_path *p, const unsigned char *sha1,
> - const unsigned char **parents_sha1, int nparent,
> + struct combine_diff_path *p, const struct object_id *oid,
> + const struct object_id **parents_oid, int nparent,
>   struct strbuf *base, struct diff_options *opt)
>   {
>   struct tree_desc t, *tp;
> @@ -422,8 +422,8 @@ static struct combine_diff_path *ll_diff_tree_paths(
>*   diff_tree_oid(parent, commit) )
>*/
>   for (i = 0; i < nparent; ++i)
> - tptree[i] = fill_tree_descriptor([i], parents_sha1[i]);
> - ttree = fill_tree_descriptor(, sha1);
> + tptree[i] = fill_tree_descriptor([i], parents_oid[i]->hash);

... and here we are in that function, dereferencing a pointer that may
be NULL.

> + ttree = fill_tree_descriptor(, oid->hash);

oid here can also be NULL, but I think that's not as easy to see.

-- >8 --
Subject: [PATCH] tree-diff: don't access hash of NULL object_id pointer

The object_id pointers can be NULL for invalid entries.  Don't try to
dereference them and pass NULL along to fill_tree_descriptor() instead,
which handles them just fine.

Found with Clang's UBSan.

Signed-off-by: Rene Scharfe 
---
fill_tree_descriptor() can easily be converted to object_id, by the
way, which would get us rid of the extra check introduced here, but
this patch is meant as a minimal fix.

 tree-diff.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tree-diff.c b/tree-diff.c
index bd6d65a409..2357f72899 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -421,8 +421,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
 *   diff_tree_oid(parent, commit) )
 */
for (i = 0; i < nparent; ++i)
-   tptree[i] = fill_tree_descriptor([i], parents_oid[i]->hash);
-   ttree = fill_tree_descriptor(, oid->hash);
+   tptree[i] = fill_tree_descriptor([i],
+   parents_oid[i] ? parents_oid[i]->hash : NULL);
+   ttree = fill_tree_descriptor(, oid ? oid->hash : NULL);
 
/* Enable recursion indefinitely */
opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE);
-- 
2.13.3


Re: [PATCH 30/33] tree-diff: convert diff_tree_paths to struct object_id

2017-06-01 Thread Junio C Hamano
Brandon Williams  writes:

> @@ -220,7 +221,7 @@ static struct combine_diff_path *emit_path(struct 
> combine_diff_path *p,
>   if (emitthis) {
>   int keep;
>   struct combine_diff_path *pprev = p;
> - p = path_appendnew(p, nparent, base, path, pathlen, mode, sha1);
> + p = path_appendnew(p, nparent, base, path, pathlen, mode, oid ? 
> oid->hash : NULL);

This is a correct conversion, but it shows that "struct oid" that
has "hash" sometimes makes things a bit cumbersome.  Perhaps
path_appendnew() and friends need to learn to take a pointer to
"struct oid" next.



Re: [PATCH 30/33] tree-diff: convert diff_tree_paths to struct object_id

2017-05-31 Thread Jeff King
On Wed, May 31, 2017 at 05:29:20PM -0400, Jeff King wrote:

> Or did you mean that diff_tree_paths() could now take an actual
> array-of-struct rather than an array-of-pointer-to-struct? That would
> drop the "parents_oid" array entirely. I think that's actually
> orthogonal to this change (the same could have been done with the
> original sha1 array), but would be a nice cleanup on top.

I took a quick look at this, but it doesn't work. This caller
(find_paths_multitree) would be happy to just pass the existing
"parents" array.

But if we change the interface to diff_tree_paths(), we also have to
change ll_diff_tree_paths() to match. And that function is called from
emit_path(), which really does assemble a list of pointers to
non-adjacent bits of memory. So it would still have to allocate, and
would now have to copy whole oids rather than just pointers.

But even worse, it seems to leave some entries as NULL. We'd need some
sentinel value (like the null sha1) to replace that.

So I think the code is already in its simplest form.

-Peff


Re: [PATCH 30/33] tree-diff: convert diff_tree_paths to struct object_id

2017-05-31 Thread Jeff King
On Wed, May 31, 2017 at 11:24:33AM -0700, Stefan Beller wrote:

> On Tue, May 30, 2017 at 10:31 AM, Brandon Williams  wrote:
> >
> > Signed-off-by: Brandon Williams 
> > ---
> >  combine-diff.c | 10 +-
> >  diff.h |  4 ++--
> >  tree-diff.c| 63 
> > +-
> >  3 files changed, 39 insertions(+), 38 deletions(-)
> >
> > diff --git a/combine-diff.c b/combine-diff.c
> > index 04c4ae856..ec9d93044 100644
> > --- a/combine-diff.c
> > +++ b/combine-diff.c
> > @@ -1364,22 +1364,22 @@ static struct combine_diff_path 
> > *find_paths_multitree(
> > struct diff_options *opt)
> >  {
> > int i, nparent = parents->nr;
> > -   const unsigned char **parents_sha1;
> > +   const struct object_id **parents_oid;
> > struct combine_diff_path paths_head;
> > struct strbuf base;
> >
> > -   ALLOC_ARRAY(parents_sha1, nparent);
> > +   ALLOC_ARRAY(parents_oid, nparent);
> > for (i = 0; i < nparent; i++)
> > -   parents_sha1[i] = parents->oid[i].hash;
> > +   parents_oid[i] = >oid[i];
> 
> I have the impression that we could get away with one layer less
> of indirection. Previously we had a heap allocated array (*) of (char*),
> now we'd have a an array (*) of pointers(*) of the oid struct, that
> is a (char[]) essentially. Maybe I am just confused?

I don't think so. We always could have allocated the original as an
array of 20-byte chunks. It's syntactically less awkward in C when that
20-byte chunk is wrapped in a struct like object_id. But fundamentally
I think we don't need to be making a copy of the oid. We're pointing to
the existing copy in the "parents" array.

Or did you mean that diff_tree_paths() could now take an actual
array-of-struct rather than an array-of-pointer-to-struct? That would
drop the "parents_oid" array entirely. I think that's actually
orthogonal to this change (the same could have been done with the
original sha1 array), but would be a nice cleanup on top.

-Peff


Re: [PATCH 30/33] tree-diff: convert diff_tree_paths to struct object_id

2017-05-31 Thread Stefan Beller
On Tue, May 30, 2017 at 10:31 AM, Brandon Williams  wrote:
>
> Signed-off-by: Brandon Williams 
> ---
>  combine-diff.c | 10 +-
>  diff.h |  4 ++--
>  tree-diff.c| 63 
> +-
>  3 files changed, 39 insertions(+), 38 deletions(-)
>
> diff --git a/combine-diff.c b/combine-diff.c
> index 04c4ae856..ec9d93044 100644
> --- a/combine-diff.c
> +++ b/combine-diff.c
> @@ -1364,22 +1364,22 @@ static struct combine_diff_path *find_paths_multitree(
> struct diff_options *opt)
>  {
> int i, nparent = parents->nr;
> -   const unsigned char **parents_sha1;
> +   const struct object_id **parents_oid;
> struct combine_diff_path paths_head;
> struct strbuf base;
>
> -   ALLOC_ARRAY(parents_sha1, nparent);
> +   ALLOC_ARRAY(parents_oid, nparent);
> for (i = 0; i < nparent; i++)
> -   parents_sha1[i] = parents->oid[i].hash;
> +   parents_oid[i] = >oid[i];

I have the impression that we could get away with one layer less
of indirection. Previously we had a heap allocated array (*) of (char*),
now we'd have a an array (*) of pointers(*) of the oid struct, that
is a (char[]) essentially. Maybe I am just confused?


[PATCH 30/33] tree-diff: convert diff_tree_paths to struct object_id

2017-05-30 Thread Brandon Williams

Signed-off-by: Brandon Williams 
---
 combine-diff.c | 10 +-
 diff.h |  4 ++--
 tree-diff.c| 63 +-
 3 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/combine-diff.c b/combine-diff.c
index 04c4ae856..ec9d93044 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -1364,22 +1364,22 @@ static struct combine_diff_path *find_paths_multitree(
struct diff_options *opt)
 {
int i, nparent = parents->nr;
-   const unsigned char **parents_sha1;
+   const struct object_id **parents_oid;
struct combine_diff_path paths_head;
struct strbuf base;
 
-   ALLOC_ARRAY(parents_sha1, nparent);
+   ALLOC_ARRAY(parents_oid, nparent);
for (i = 0; i < nparent; i++)
-   parents_sha1[i] = parents->oid[i].hash;
+   parents_oid[i] = >oid[i];
 
/* fake list head, so worker can assume it is non-NULL */
paths_head.next = NULL;
 
strbuf_init(, PATH_MAX);
-   diff_tree_paths(_head, oid->hash, parents_sha1, nparent, , 
opt);
+   diff_tree_paths(_head, oid, parents_oid, nparent, , opt);
 
strbuf_release();
-   free(parents_sha1);
+   free(parents_oid);
return paths_head.next;
 }
 
diff --git a/diff.h b/diff.h
index e0b503460..0d0c14f28 100644
--- a/diff.h
+++ b/diff.h
@@ -210,8 +210,8 @@ const char *diff_line_prefix(struct diff_options *);
 extern const char mime_boundary_leader[];
 
 extern struct combine_diff_path *diff_tree_paths(
-   struct combine_diff_path *p, const unsigned char *sha1,
-   const unsigned char **parent_sha1, int nparent,
+   struct combine_diff_path *p, const struct object_id *oid,
+   const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
 extern int diff_tree_oid(const struct object_id *old_oid,
 const struct object_id *new_oid,
diff --git a/tree-diff.c b/tree-diff.c
index 29e3f6144..6a960f569 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -26,11 +26,12 @@
 } while(0)
 
 static struct combine_diff_path *ll_diff_tree_paths(
-   struct combine_diff_path *p, const unsigned char *sha1,
-   const unsigned char **parents_sha1, int nparent,
+   struct combine_diff_path *p, const struct object_id *oid,
+   const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
-static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char 
*new,
-struct strbuf *base, struct diff_options *opt);
+static int ll_diff_tree_oid(const struct object_id *old_oid,
+   const struct object_id *new_oid,
+   struct strbuf *base, struct diff_options *opt);
 
 /*
  * Compare two tree entries, taking into account only path/S_ISDIR(mode),
@@ -183,7 +184,7 @@ static struct combine_diff_path *emit_path(struct 
combine_diff_path *p,
 {
unsigned mode;
const char *path;
-   const unsigned char *sha1;
+   const struct object_id *oid;
int pathlen;
int old_baselen = base->len;
int i, isdir, recurse = 0, emitthis = 1;
@@ -193,7 +194,7 @@ static struct combine_diff_path *emit_path(struct 
combine_diff_path *p,
 
if (t) {
/* path present in resulting tree */
-   sha1 = tree_entry_extract(t, , )->hash;
+   oid = tree_entry_extract(t, , );
pathlen = tree_entry_len(>entry);
isdir = S_ISDIR(mode);
} else {
@@ -208,7 +209,7 @@ static struct combine_diff_path *emit_path(struct 
combine_diff_path *p,
pathlen = tree_entry_len([imin].entry);
 
isdir = S_ISDIR(mode);
-   sha1 = NULL;
+   oid = NULL;
mode = 0;
}
 
@@ -220,7 +221,7 @@ static struct combine_diff_path *emit_path(struct 
combine_diff_path *p,
if (emitthis) {
int keep;
struct combine_diff_path *pprev = p;
-   p = path_appendnew(p, nparent, base, path, pathlen, mode, sha1);
+   p = path_appendnew(p, nparent, base, path, pathlen, mode, oid ? 
oid->hash : NULL);
 
for (i = 0; i < nparent; ++i) {
/*
@@ -229,7 +230,7 @@ static struct combine_diff_path *emit_path(struct 
combine_diff_path *p,
 */
int tpi_valid = tp && !(tp[i].entry.mode & 
S_IFXMIN_NEQ);
 
-   const unsigned char *sha1_i;
+   const struct object_id *oid_i;
unsigned mode_i;
 
p->parent[i].status =
@@ -239,16 +240,16 @@ static struct combine_diff_path *emit_path(struct 
combine_diff_path *p,
DIFF_STATUS_ADDED;
 
if (tpi_valid) {
-