Re: [PATCH 56/67] avoid sprintf and strcpy with flex arrays

2015-09-21 Thread Jeff King
On Mon, Sep 21, 2015 at 01:11:09PM -0400, Eric Sunshine wrote:

> >> > -   p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2);
> >> > -   strcpy(p->pack_name, tmp_file);
> >> > +   namelen = strlen(tmp_file) + 2;
> >>
> >> You mentioned this specially in the commit message, but from a brief
> >> read of the code, it's still not obvious (to me) why this is +2 rather
> >> than +1. Since you're touching the code anyhow, perhaps add an in-code
> >> comment explaining it?
> >
> > To be honest, I'm not sure what's going on with the "+ 2" here.
> >
> > In many cases with packed_git we allocate with "foo.idx" and want to be
> > able to later write "foo.pack" into the same buffer. But here we are
> > putting in a tmpfile name. This comes from 8455e48, but I don't see any
> > clue there. I wonder if the "+2" was simply cargo-culted from other
> > instances.
> 
> Ah, ok. I guess I misunderstood the commit message to mean or imply
> that the +2 was correct and sensible and well-understood.

I think it was more that I looked at other instances of packed_git, and
realized they could not be safely converted. I think "struct
alternate_object_database" has similar problems.

-Peff

PS As I mentioned earlier, I did end up adding a FLEX_ALLOC() macro in
   another series that builds on top of this. I haven't posted it yet,
   but check out:

 https://github.com/peff/git/commit/ba491c527572c763286b4b9519aef3c30482c2d1

   and

 https://github.com/peff/git/commit/d88444d5ba00bd875ef5291dca3b71dd046186dc

   if you are curious.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 56/67] avoid sprintf and strcpy with flex arrays

2015-09-21 Thread Eric Sunshine
On Mon, Sep 21, 2015 at 11:15 AM, Jeff King  wrote:
> On Sun, Sep 20, 2015 at 06:48:32PM -0400, Eric Sunshine wrote:
>> > diff --git a/archive.c b/archive.c
>> > index 01b0899..4ac86c8 100644
>> > --- a/archive.c
>> > +++ b/archive.c
>> > @@ -171,13 +171,14 @@ static void queue_directory(const unsigned char 
>> > *sha1,
>> > unsigned mode, int stage, struct archiver_context *c)
>> >  {
>> > struct directory *d;
>> > -   d = xmallocz(sizeof(*d) + base->len + 1 + strlen(filename));
>> > +   size_t len = base->len + 1 + strlen(filename) + 1;
>> > +   d = xmalloc(sizeof(*d) + len);
>>
>> Mental note: The new code makes this one longer than the original code...
>>
>> > d->up  = c->bottom;
>> > d->baselen = base->len;
>> > d->mode= mode;
>> > d->stage   = stage;
>> > c->bottom  = d;
>> > -   d->len = sprintf(d->path, "%.*s%s/", (int)base->len, base->buf, 
>> > filename);
>> > +   d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, 
>> > base->buf, filename);
>>
>> Considering that we need space for the '/' and the NUL, the new code
>> seems to be correct, and the old code was under-allocating, right?
>
> Not quite. The original uses xmallocz, which handles the NUL itself. But
> the purpose of this patch is to pull the total length into a variable
> that we can use both for the malloc and for the xsnprintf, so we have
> to account for it ourselves.

Makes sense. I missed the "z" when reading the old code. Thanks for
the explanation.

>> > -   p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2);
>> > -   strcpy(p->pack_name, tmp_file);
>> > +   namelen = strlen(tmp_file) + 2;
>>
>> You mentioned this specially in the commit message, but from a brief
>> read of the code, it's still not obvious (to me) why this is +2 rather
>> than +1. Since you're touching the code anyhow, perhaps add an in-code
>> comment explaining it?
>
> To be honest, I'm not sure what's going on with the "+ 2" here.
>
> In many cases with packed_git we allocate with "foo.idx" and want to be
> able to later write "foo.pack" into the same buffer. But here we are
> putting in a tmpfile name. This comes from 8455e48, but I don't see any
> clue there. I wonder if the "+2" was simply cargo-culted from other
> instances.

Ah, ok. I guess I misunderstood the commit message to mean or imply
that the +2 was correct and sensible and well-understood.

> I'm loath to change it in the middle of this patch, because it would be
> hard to see amidst the other changes. I'd rather make this a straight
> conversion, and worry about it in a separate patch.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 56/67] avoid sprintf and strcpy with flex arrays

2015-09-21 Thread Jeff King
On Sun, Sep 20, 2015 at 06:48:32PM -0400, Eric Sunshine wrote:

> > diff --git a/archive.c b/archive.c
> > index 01b0899..4ac86c8 100644
> > --- a/archive.c
> > +++ b/archive.c
> > @@ -171,13 +171,14 @@ static void queue_directory(const unsigned char *sha1,
> > unsigned mode, int stage, struct archiver_context *c)
> >  {
> > struct directory *d;
> > -   d = xmallocz(sizeof(*d) + base->len + 1 + strlen(filename));
> > +   size_t len = base->len + 1 + strlen(filename) + 1;
> > +   d = xmalloc(sizeof(*d) + len);
> 
> Mental note: The new code makes this one longer than the original code...
>
> > d->up  = c->bottom;
> > d->baselen = base->len;
> > d->mode= mode;
> > d->stage   = stage;
> > c->bottom  = d;
> > -   d->len = sprintf(d->path, "%.*s%s/", (int)base->len, base->buf, 
> > filename);
> > +   d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, 
> > base->buf, filename);
> 
> Considering that we need space for the '/' and the NUL, the new code
> seems to be correct, and the old code was under-allocating, right?

Not quite. The original uses xmallocz, which handles the NUL itself. But
the purpose of this patch is to pull the total length into a variable
that we can use both for the malloc and for the xsnprintf, so we have
to account for it ourselves.

We do lose the setting of the final byte to '\0' that xmallocz does, but
it doesn't matter here because xsnprintf will add the NUL itself.

> > diff --git a/fast-import.c b/fast-import.c
> > index d0c2502..895c6b4 100644
> > --- a/fast-import.c
> > +++ b/fast-import.c
> > @@ -863,13 +863,15 @@ static void start_packfile(void)
> >  {
> > static char tmp_file[PATH_MAX];
> > struct packed_git *p;
> > +   int namelen;
> > struct pack_header hdr;
> > int pack_fd;
> >
> > pack_fd = odb_mkstemp(tmp_file, sizeof(tmp_file),
> >   "pack/tmp_pack_XX");
> > -   p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2);
> > -   strcpy(p->pack_name, tmp_file);
> > +   namelen = strlen(tmp_file) + 2;
> 
> You mentioned this specially in the commit message, but from a brief
> read of the code, it's still not obvious (to me) why this is +2 rather
> than +1. Since you're touching the code anyhow, perhaps add an in-code
> comment explaining it?

To be honest, I'm not sure what's going on with the "+ 2" here.

In many cases with packed_git we allocate with "foo.idx" and want to be
able to later write "foo.pack" into the same buffer. But here we are
putting in a tmpfile name. This comes from 8455e48, but I don't see any
clue there. I wonder if the "+2" was simply cargo-culted from other
instances.

I'm loath to change it in the middle of this patch, because it would be
hard to see amidst the other changes. I'd rather make this a straight
conversion, and worry about it in a separate patch.

> > -   strcpy((char *)update->refname, refname);
> > +   memcpy((char *)update->refname, refname, len); /* includese NUL */
> 
> s/includese/includes/

Thanks, fixed.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 56/67] avoid sprintf and strcpy with flex arrays

2015-09-20 Thread Eric Sunshine
On Tue, Sep 15, 2015 at 12:09 PM, Jeff King  wrote:
> When we are allocating a struct with a FLEX_ARRAY member, we
> generally compute the size of the array and then sprintf or
> strcpy into it. Normally we could improve a dynamic allocation
> like this by using xstrfmt, but it doesn't work here; we
> have to account for the size of the rest of the struct.
>
> But we can improve things a bit by storing the length that
> we use for the allocation, and then feeding it to xsnprintf
> or memcpy, which makes it more obvious that we are not
> writing more than the allocated number of bytes.
>
> It would be nice if we had some kind of helper for
> allocating generic flex arrays, but it doesn't work that
> well:
>
>  - the call signature is a little bit unwieldy:
>
>   d = flex_struct(sizeof(*d), offsetof(d, path), fmt, ...);
>
>You need offsetof here instead of just writing to the
>end of the base size, because we don't know how the
>struct is packed (partially this is because FLEX_ARRAY
>might not be zero, though we can account for that; but
>the size of the struct may actually be rounded up for
>alignment, and we can't know that).
>
>  - some sites do clever things, like over-allocating because
>they know they will write larger things into the buffer
>later (e.g., struct packed_git here).
>
> So we're better off to just write out each allocation (or
> add type-specific helpers, though many of these are one-off
> allocations anyway).
>
> Signed-off-by: Jeff King 
> ---
> diff --git a/archive.c b/archive.c
> index 01b0899..4ac86c8 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -171,13 +171,14 @@ static void queue_directory(const unsigned char *sha1,
> unsigned mode, int stage, struct archiver_context *c)
>  {
> struct directory *d;
> -   d = xmallocz(sizeof(*d) + base->len + 1 + strlen(filename));
> +   size_t len = base->len + 1 + strlen(filename) + 1;
> +   d = xmalloc(sizeof(*d) + len);

Mental note: The new code makes this one longer than the original code...

> d->up  = c->bottom;
> d->baselen = base->len;
> d->mode= mode;
> d->stage   = stage;
> c->bottom  = d;
> -   d->len = sprintf(d->path, "%.*s%s/", (int)base->len, base->buf, 
> filename);
> +   d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, 
> base->buf, filename);

Considering that we need space for the '/' and the NUL, the new code
seems to be correct, and the old code was under-allocating, right?

> hashcpy(d->oid.hash, sha1);
>  }
>
> diff --git a/fast-import.c b/fast-import.c
> index d0c2502..895c6b4 100644
> --- a/fast-import.c
> +++ b/fast-import.c
> @@ -863,13 +863,15 @@ static void start_packfile(void)
>  {
> static char tmp_file[PATH_MAX];
> struct packed_git *p;
> +   int namelen;
> struct pack_header hdr;
> int pack_fd;
>
> pack_fd = odb_mkstemp(tmp_file, sizeof(tmp_file),
>   "pack/tmp_pack_XX");
> -   p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2);
> -   strcpy(p->pack_name, tmp_file);
> +   namelen = strlen(tmp_file) + 2;

You mentioned this specially in the commit message, but from a brief
read of the code, it's still not obvious (to me) why this is +2 rather
than +1. Since you're touching the code anyhow, perhaps add an in-code
comment explaining it?

> +   p = xcalloc(1, sizeof(*p) + namelen);
> +   xsnprintf(p->pack_name, namelen, "%s", tmp_file);
> p->pack_fd = pack_fd;
> p->do_not_close = 1;
> pack_file = sha1fd(pack_fd, p->pack_name);
> diff --git a/refs.c b/refs.c
> index c2709de..df6c41a 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -3984,10 +3984,10 @@ void ref_transaction_free(struct ref_transaction 
> *transaction)
>  static struct ref_update *add_update(struct ref_transaction *transaction,
>  const char *refname)
>  {
> -   size_t len = strlen(refname);
> -   struct ref_update *update = xcalloc(1, sizeof(*update) + len + 1);
> +   size_t len = strlen(refname) + 1;
> +   struct ref_update *update = xcalloc(1, sizeof(*update) + len);
>
> -   strcpy((char *)update->refname, refname);
> +   memcpy((char *)update->refname, refname, len); /* includese NUL */

s/includese/includes/

> ALLOC_GROW(transaction->updates, transaction->nr + 1, 
> transaction->alloc);
> transaction->updates[transaction->nr++] = update;
> return update;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 56/67] avoid sprintf and strcpy with flex arrays

2015-09-15 Thread Jeff King
When we are allocating a struct with a FLEX_ARRAY member, we
generally compute the size of the array and then sprintf or
strcpy into it. Normally we could improve a dynamic allocation
like this by using xstrfmt, but it doesn't work here; we
have to account for the size of the rest of the struct.

But we can improve things a bit by storing the length that
we use for the allocation, and then feeding it to xsnprintf
or memcpy, which makes it more obvious that we are not
writing more than the allocated number of bytes.

It would be nice if we had some kind of helper for
allocating generic flex arrays, but it doesn't work that
well:

 - the call signature is a little bit unwieldy:

  d = flex_struct(sizeof(*d), offsetof(d, path), fmt, ...);

   You need offsetof here instead of just writing to the
   end of the base size, because we don't know how the
   struct is packed (partially this is because FLEX_ARRAY
   might not be zero, though we can account for that; but
   the size of the struct may actually be rounded up for
   alignment, and we can't know that).

 - some sites do clever things, like over-allocating because
   they know they will write larger things into the buffer
   later (e.g., struct packed_git here).

So we're better off to just write out each allocation (or
add type-specific helpers, though many of these are one-off
allocations anyway).

Signed-off-by: Jeff King 
---
Actually, I have a malloc-hardening series (which I'll post after this),
in which I _do_ break down and add a FLEX_ALLOC() macro. But we still
cannot use it for these cases anyway, because we don't assume all
platforms support variadic macros.

 archive.c   | 5 +++--
 builtin/blame.c | 5 +++--
 fast-import.c   | 6 --
 refs.c  | 8 
 sha1_file.c | 5 +++--
 submodule.c | 6 --
 6 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/archive.c b/archive.c
index 01b0899..4ac86c8 100644
--- a/archive.c
+++ b/archive.c
@@ -171,13 +171,14 @@ static void queue_directory(const unsigned char *sha1,
unsigned mode, int stage, struct archiver_context *c)
 {
struct directory *d;
-   d = xmallocz(sizeof(*d) + base->len + 1 + strlen(filename));
+   size_t len = base->len + 1 + strlen(filename) + 1;
+   d = xmalloc(sizeof(*d) + len);
d->up  = c->bottom;
d->baselen = base->len;
d->mode= mode;
d->stage   = stage;
c->bottom  = d;
-   d->len = sprintf(d->path, "%.*s%s/", (int)base->len, base->buf, 
filename);
+   d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, 
filename);
hashcpy(d->oid.hash, sha1);
 }
 
diff --git a/builtin/blame.c b/builtin/blame.c
index f8ec7ff..dbec516 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -459,12 +459,13 @@ static void queue_blames(struct scoreboard *sb, struct 
origin *porigin,
 static struct origin *make_origin(struct commit *commit, const char *path)
 {
struct origin *o;
-   o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
+   size_t pathlen = strlen(path) + 1;
+   o = xcalloc(1, sizeof(*o) + pathlen);
o->commit = commit;
o->refcnt = 1;
o->next = commit->util;
commit->util = o;
-   strcpy(o->path, path);
+   memcpy(o->path, path, pathlen); /* includes NUL */
return o;
 }
 
diff --git a/fast-import.c b/fast-import.c
index d0c2502..895c6b4 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -863,13 +863,15 @@ static void start_packfile(void)
 {
static char tmp_file[PATH_MAX];
struct packed_git *p;
+   int namelen;
struct pack_header hdr;
int pack_fd;
 
pack_fd = odb_mkstemp(tmp_file, sizeof(tmp_file),
  "pack/tmp_pack_XX");
-   p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2);
-   strcpy(p->pack_name, tmp_file);
+   namelen = strlen(tmp_file) + 2;
+   p = xcalloc(1, sizeof(*p) + namelen);
+   xsnprintf(p->pack_name, namelen, "%s", tmp_file);
p->pack_fd = pack_fd;
p->do_not_close = 1;
pack_file = sha1fd(pack_fd, p->pack_name);
diff --git a/refs.c b/refs.c
index c2709de..df6c41a 100644
--- a/refs.c
+++ b/refs.c
@@ -2695,7 +2695,7 @@ static int pack_if_possible_fn(struct ref_entry *entry, 
void *cb_data)
int namelen = strlen(entry->name) + 1;
struct ref_to_prune *n = xcalloc(1, sizeof(*n) + namelen);
hashcpy(n->sha1, entry->u.value.oid.hash);
-   strcpy(n->name, entry->name);
+   memcpy(n->name, entry->name, namelen); /* includes NUL */
n->next = cb->ref_to_prune;
cb->ref_to_prune = n;
}
@@ -3984,10 +3984,10 @@ void ref_transaction_free(struct ref_transaction 
*transaction)
 static struct ref_update *add_update(struct ref_transaction *transaction,
 const char *refname)
 {
-   size_t len =