Re: [PATCH 15/15] commit: record buffer length in cache

2014-06-10 Thread Christian Couder
From: Jeff King 
Subject: Re: [PATCH 15/15] commit: record buffer length in cache
Date: Tue, 10 Jun 2014 16:33:49 -0400

> On Tue, Jun 10, 2014 at 01:27:13AM -0400, Jeff King wrote:
> 
>> > I find the above strange. I would have done something like:
>> > 
>> > -  set_commit_buffer(commit, strbuf_detach(&msg, NULL));
>> > +  size_t size;
>> > +  char *buf = strbuf_detach(&msg, &size);
>> > +  set_commit_buffer(commit, buf, size);
>> 
>> It is a little strange. You can't do:
>> 
>>   set_commit_buffer(commit, strbuf_detach(&msg, NULL), msg.len);
>> 
>> because the second argument resets msg.len as a side effect. Doing it
>> your way is longer, but perhaps is a bit more canonical. In general, one
>> should call strbuf_detach to ensure that the buffer is allocated (and
>> does not point to slopbuf). That's guaranteed here, because we just put
>> contents into the buffer, but it's probably more hygienic to use the
>> more verbose form.
> 
> I was trying to avoid cluttering up the function with the extra variable
> definitions. I ended up with the change below, which I think is clear,
> correct, and pushes the complexity outside of the function.

Yeah, great!
 
> diff --git a/builtin/blame.c b/builtin/blame.c
> index cde19eb..53f43ab 100644
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -2266,6 +2266,18 @@ static void append_merge_parents(struct commit_list 
> **tail)
>  }
>  
>  /*
> + * This isn't as simple as passing sb->buf and sb->len, because we
> + * want to transfer ownership of the buffer to the commit (so we
> + * must use detach).
> + */
> +static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf 
> *sb)
> +{
> + size_t len;
> + void *buf = strbuf_detach(sb, &len);
> + set_commit_buffer(c, buf, len);
> +}
> +
> +/*
>   * Prepare a dummy commit that represents the work tree (or staged) item.
>   * Note that annotating work tree item never works in the reverse.
>   */
> @@ -2313,7 +2325,7 @@ static struct commit *fake_working_tree_commit(struct 
> diff_options *opt,
>   ident, ident, path,
>   (!contents_from ? path :
>(!strcmp(contents_from, "-") ? "standard input" : 
> contents_from)));
> - set_commit_buffer(commit, strbuf_detach(&msg, NULL));
> + set_commit_buffer_from_strbuf(commit, &msg);
>  
>   if (!contents_from || strcmp("-", contents_from)) {
>   struct stat st;

Thanks,
Christian. 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/15] commit: record buffer length in cache

2014-06-10 Thread Jeff King
On Tue, Jun 10, 2014 at 01:27:13AM -0400, Jeff King wrote:

> > I find the above strange. I would have done something like:
> > 
> > -   set_commit_buffer(commit, strbuf_detach(&msg, NULL));
> > +   size_t size;
> > +   char *buf = strbuf_detach(&msg, &size);
> > +   set_commit_buffer(commit, buf, size);
> 
> It is a little strange. You can't do:
> 
>   set_commit_buffer(commit, strbuf_detach(&msg, NULL), msg.len);
> 
> because the second argument resets msg.len as a side effect. Doing it
> your way is longer, but perhaps is a bit more canonical. In general, one
> should call strbuf_detach to ensure that the buffer is allocated (and
> does not point to slopbuf). That's guaranteed here, because we just put
> contents into the buffer, but it's probably more hygienic to use the
> more verbose form.

I was trying to avoid cluttering up the function with the extra variable
definitions. I ended up with the change below, which I think is clear,
correct, and pushes the complexity outside of the function.

diff --git a/builtin/blame.c b/builtin/blame.c
index cde19eb..53f43ab 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2266,6 +2266,18 @@ static void append_merge_parents(struct commit_list 
**tail)
 }
 
 /*
+ * This isn't as simple as passing sb->buf and sb->len, because we
+ * want to transfer ownership of the buffer to the commit (so we
+ * must use detach).
+ */
+static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb)
+{
+   size_t len;
+   void *buf = strbuf_detach(sb, &len);
+   set_commit_buffer(c, buf, len);
+}
+
+/*
  * Prepare a dummy commit that represents the work tree (or staged) item.
  * Note that annotating work tree item never works in the reverse.
  */
@@ -2313,7 +2325,7 @@ static struct commit *fake_working_tree_commit(struct 
diff_options *opt,
ident, ident, path,
(!contents_from ? path :
 (!strcmp(contents_from, "-") ? "standard input" : 
contents_from)));
-   set_commit_buffer(commit, strbuf_detach(&msg, NULL));
+   set_commit_buffer_from_strbuf(commit, &msg);
 
if (!contents_from || strcmp("-", contents_from)) {
struct stat st;


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/15] commit: record buffer length in cache

2014-06-09 Thread Jeff King
On Tue, Jun 10, 2014 at 07:12:37AM +0200, Christian Couder wrote:

> From: Jeff King 
> >
> > --- a/builtin/blame.c
> > +++ b/builtin/blame.c
> > @@ -2313,7 +2313,7 @@ static struct commit *fake_working_tree_commit(struct 
> > diff_options *opt,
> > ident, ident, path,
> > (!contents_from ? path :
> >  (!strcmp(contents_from, "-") ? "standard input" : 
> > contents_from)));
> > -   set_commit_buffer(commit, strbuf_detach(&msg, NULL));
> > +   set_commit_buffer(commit, msg.buf, msg.len);
> 
> I find the above strange. I would have done something like:
> 
> - set_commit_buffer(commit, strbuf_detach(&msg, NULL));
> + size_t size;
> + char *buf = strbuf_detach(&msg, &size);
> + set_commit_buffer(commit, buf, size);

It is a little strange. You can't do:

  set_commit_buffer(commit, strbuf_detach(&msg, NULL), msg.len);

because the second argument resets msg.len as a side effect. Doing it
your way is longer, but perhaps is a bit more canonical. In general, one
should call strbuf_detach to ensure that the buffer is allocated (and
does not point to slopbuf). That's guaranteed here, because we just put
contents into the buffer, but it's probably more hygienic to use the
more verbose form.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/15] commit: record buffer length in cache

2014-06-09 Thread Christian Couder
From: Jeff King 
>
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -2313,7 +2313,7 @@ static struct commit *fake_working_tree_commit(struct 
> diff_options *opt,
>   ident, ident, path,
>   (!contents_from ? path :
>(!strcmp(contents_from, "-") ? "standard input" : 
> contents_from)));
> - set_commit_buffer(commit, strbuf_detach(&msg, NULL));
> + set_commit_buffer(commit, msg.buf, msg.len);

I find the above strange. I would have done something like:

-   set_commit_buffer(commit, strbuf_detach(&msg, NULL));
+   size_t size;
+   char *buf = strbuf_detach(&msg, &size);
+   set_commit_buffer(commit, buf, size);

Thanks,
Christian.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/15] commit: record buffer length in cache

2014-06-09 Thread Jeff King
Most callsites which use the commit buffer try to use the
cached version attached to the commit, rather than
re-reading from disk. Unfortunately, that interface provides
only a pointer to the NUL-terminated buffer, with no
indication of the original length.

For the most part, this doesn't matter. People do not put
NULs in their commit messages, and the log code is happy to
treat it all as a NUL-terminated string. However, some code
paths do care. For example, when checking signatures, we
want to be very careful that we verify all the bytes to
avoid malicious trickery.

This patch just adds an optional "size" out-pointer to
get_commit_buffer and friends. The existing callers all pass
NULL (there did not seem to be any obvious sites where we
could avoid an immediate strlen() call, though perhaps with
some further refactoring we could).

Signed-off-by: Jeff King 
---
 builtin/blame.c |  2 +-
 builtin/fast-export.c   |  2 +-
 builtin/fmt-merge-msg.c |  2 +-
 builtin/index-pack.c|  2 +-
 builtin/log.c   |  2 +-
 builtin/rev-list.c  |  2 +-
 commit.c| 54 -
 commit.h|  8 
 fsck.c  |  2 +-
 log-tree.c  |  2 +-
 merge-recursive.c   |  2 +-
 notes-merge.c   |  2 +-
 object.c|  4 ++--
 pretty.c|  4 ++--
 sequencer.c |  2 +-
 sha1_name.c |  2 +-
 16 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index cde19eb..58a2c54 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2313,7 +2313,7 @@ static struct commit *fake_working_tree_commit(struct 
diff_options *opt,
ident, ident, path,
(!contents_from ? path :
 (!strcmp(contents_from, "-") ? "standard input" : 
contents_from)));
-   set_commit_buffer(commit, strbuf_detach(&msg, NULL));
+   set_commit_buffer(commit, msg.buf, msg.len);
 
if (!contents_from || strcmp("-", contents_from)) {
struct stat st;
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 7ee5e08..05d161f 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -289,7 +289,7 @@ static void handle_commit(struct commit *commit, struct 
rev_info *rev)
rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 
parse_commit_or_die(commit);
-   commit_buffer = get_commit_buffer(commit);
+   commit_buffer = get_commit_buffer(commit, NULL);
author = strstr(commit_buffer, "\nauthor ");
if (!author)
die ("Could not find author in commit %s",
diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c
index 01f6d59..ef8b254 100644
--- a/builtin/fmt-merge-msg.c
+++ b/builtin/fmt-merge-msg.c
@@ -236,7 +236,7 @@ static void record_person(int which, struct string_list 
*people,
const char *field;
 
field = (which == 'a') ? "\nauthor " : "\ncommitter ";
-   buffer = get_commit_buffer(commit);
+   buffer = get_commit_buffer(commit, NULL);
name = strstr(buffer, field);
if (!name)
return;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 995df39..25e3e9b 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -786,7 +786,7 @@ static void sha1_object(const void *data, struct 
object_entry *obj_entry,
}
if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
-   detach_commit_buffer(commit);
+   detach_commit_buffer(commit, NULL);
}
obj->flags |= FLAG_CHECKED;
}
diff --git a/builtin/log.c b/builtin/log.c
index 2c74260..c599eac 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -919,7 +919,7 @@ static void make_cover_letter(struct rev_info *rev, int 
use_stdout,
&need_8bit_cte);
 
for (i = 0; !need_8bit_cte && i < nr; i++) {
-   const char *buf = get_commit_buffer(list[i]);
+   const char *buf = get_commit_buffer(list[i], NULL);
if (has_non_ascii(buf))
need_8bit_cte = 1;
unuse_commit_buffer(list[i], buf);
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 3fcbf21..ff84a82 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -106,7 +106,7 @@ static void show_commit(struct commit *commit, void *data)
else
putchar('\n');
 
-   if (revs->verbose_header && get_cached_commit_buffer(commit)) {
+   if (revs->verbose_header && get_cached_commit_buffer(commit, NULL)) {
struct strbuf buf = STRBUF_INIT;
struct pretty_print_context ctx = {0};
ctx.abbrev = revs->abbrev;
diff --git a/commit.c b/commit.c
index 1b344bd..b833