[PATCH v5 5/5] pretty: user format ignores i18n.logOutputEncoding setting

2013-06-20 Thread Alexey Shumkin
The following two commands are expected to give the same output to a terminal:

$ git log --oneline --no-color
$ git log --pretty=format:'%h %s'

However, the former pays attention to i18n.logOutputEncoding
configuration, while the latter does not when it format %s.
Log messages written in an encoding i18n.commitEncoding which differs
from terminal encoding are shown corrupted with the latter even
when i18n.logOutputEncoding and terminal encoding are the same.

The same corruption is true for
$ git diff --submodule=log
and
$ git rev-list --pretty=format:%s HEAD
and
$ git reset --hard

Signed-off-by: Alexey Shumkin alex.crez...@gmail.com
---
 builtin/reset.c  |  8 ++--
 builtin/rev-list.c   |  1 +
 builtin/shortlog.c   |  1 +
 log-tree.c   |  1 +
 submodule.c  |  3 +++
 t/t4041-diff-submodule-option.sh | 10 +-
 t/t4205-log-pretty-formats.sh| 34 +-
 t/t6006-rev-list-format.sh   |  8 
 t/t7102-reset.sh |  2 +-
 9 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/builtin/reset.c b/builtin/reset.c
index 6032131..b23ed63 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -92,11 +92,15 @@ static int reset_index(const unsigned char *sha1, int 
reset_type, int quiet)
 
 static void print_new_head_line(struct commit *commit)
 {
-   const char *hex, *body;
+   const char *hex, *body, *encoding;
 
hex = find_unique_abbrev(commit-object.sha1, DEFAULT_ABBREV);
printf(_(HEAD is now at %s), hex);
-   body = strstr(commit-buffer, \n\n);
+   encoding = get_log_output_encoding();
+   body = logmsg_reencode(commit, NULL, encoding);
+   if (!body)
+   body = commit-buffer;
+   body = strstr(body, \n\n);
if (body) {
const char *eol;
size_t len;
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 67701be..a5ec30d 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -111,6 +111,7 @@ static void show_commit(struct commit *commit, void *data)
ctx.date_mode = revs-date_mode;
ctx.date_mode_explicit = revs-date_mode_explicit;
ctx.fmt = revs-commit_format;
+   ctx.output_encoding = get_log_output_encoding();
pretty_print_commit(ctx, commit, buf);
if (revs-graph) {
if (buf.len) {
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 1fd6f8a..1434f8f 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -137,6 +137,7 @@ void shortlog_add_commit(struct shortlog *log, struct 
commit *commit)
ctx.subject = ;
ctx.after_subject = ;
ctx.date_mode = DATE_NORMAL;
+   ctx.output_encoding = get_log_output_encoding();
pretty_print_commit(ctx, commit, ufbuf);
buffer = ufbuf.buf;
} else if (*buffer) {
diff --git a/log-tree.c b/log-tree.c
index 1946e9c..5277d3e 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -616,6 +616,7 @@ void show_log(struct rev_info *opt)
ctx.fmt = opt-commit_format;
ctx.mailmap = opt-mailmap;
ctx.color = opt-diffopt.use_color;
+   ctx.output_encoding = get_log_output_encoding();
pretty_print_commit(ctx, commit, msgbuf);
 
if (opt-add_signoff)
diff --git a/submodule.c b/submodule.c
index 1821a5b..baa8669 100644
--- a/submodule.c
+++ b/submodule.c
@@ -222,10 +222,13 @@ static void print_submodule_summary(struct rev_info *rev, 
FILE *f,
static const char format[] =   %m %s;
struct strbuf sb = STRBUF_INIT;
struct commit *commit;
+   const char *log_output_encoding;
 
+   log_output_encoding = get_log_output_encoding();
while ((commit = get_revision(rev))) {
struct pretty_print_context ctx = {0};
ctx.date_mode = rev-date_mode;
+   ctx.output_encoding = log_output_encoding;
strbuf_setlen(sb, 0);
strbuf_addstr(sb, line_prefix);
if (commit-object.flags  SYMMETRIC_LEFT) {
diff --git a/t/t4041-diff-submodule-option.sh b/t/t4041-diff-submodule-option.sh
index 22bf4df..ce192b0 100755
--- a/t/t4041-diff-submodule-option.sh
+++ b/t/t4041-diff-submodule-option.sh
@@ -94,7 +94,7 @@ test_expect_success 'diff.submodule does not affect plumbing' 
'
 commit_file sm1 
 head2=$(add_file sm1 foo3)
 
-test_expect_failure 'modified submodule(forward)' '
+test_expect_success 'modified submodule(forward)' '
git diff-index -p --submodule=log HEAD actual 
cat expected -EOF 
Submodule sm1 $head1..$head2:
@@ -103,7 +103,7 @@ test_expect_failure 'modified submodule(forward)' '
test_cmp expected actual
 '
 
-test_expect_failure 'modified submodule(forward)' '
+test_expect_success 'modified submodule(forward)' '

Re: [PATCH v5 5/5] pretty: user format ignores i18n.logOutputEncoding setting

2013-06-20 Thread Junio C Hamano
Alexey Shumkin alex.crez...@gmail.com writes:

Subject: Re: [PATCH v5 5/5] pretty: user format ignores i18n.logOutputEncoding 
setting

That is a statement of fact, and does not tell much to the reader.

I think you are saying that in the current implementation,
logoutputencoding is not honored in user format, that it is a bug
that needs to be fixed (as opposed to that is by design, the
scripts that read from --format='' is responsible for doing the
conversion), and that this patch fixes it.

So

pretty: --format output should honor logOutputEncoding

or something?  At least it says what is the _desired_ outcome with
should, hints that the status-quo is different from that desired
outcome and implies that this is a patch to fix it.

The Subject line is very important as that is the only thing we
see in many summarizing output format, e.g. shortlog, cover letter
and merge message.

 The following two commands are expected to give the same output to a terminal:

   $ git log --oneline --no-color
   $ git log --pretty=format:'%h %s'

 However, the former pays attention to i18n.logOutputEncoding
 configuration, while the latter does not when it format %s.
 Log messages written in an encoding i18n.commitEncoding which differs
 from terminal encoding are shown corrupted with the latter even
 when i18n.logOutputEncoding and terminal encoding are the same.

 The same corruption is true for
   $ git diff --submodule=log
 and
   $ git rev-list --pretty=format:%s HEAD
 and
   $ git reset --hard

 Signed-off-by: Alexey Shumkin alex.crez...@gmail.com
 ---
  builtin/reset.c  |  8 ++--
  builtin/rev-list.c   |  1 +
  builtin/shortlog.c   |  1 +
  log-tree.c   |  1 +
  submodule.c  |  3 +++
  t/t4041-diff-submodule-option.sh | 10 +-
  t/t4205-log-pretty-formats.sh| 34 +-
  t/t6006-rev-list-format.sh   |  8 
  t/t7102-reset.sh |  2 +-
  9 files changed, 39 insertions(+), 29 deletions(-)

 diff --git a/builtin/reset.c b/builtin/reset.c
 index 6032131..b23ed63 100644
 --- a/builtin/reset.c
 +++ b/builtin/reset.c
 @@ -92,11 +92,15 @@ static int reset_index(const unsigned char *sha1, int 
 reset_type, int quiet)
  
  static void print_new_head_line(struct commit *commit)
  {
 - const char *hex, *body;
 + const char *hex, *body, *encoding;
  
   hex = find_unique_abbrev(commit-object.sha1, DEFAULT_ABBREV);
   printf(_(HEAD is now at %s), hex);
 - body = strstr(commit-buffer, \n\n);
 + encoding = get_log_output_encoding();
 + body = logmsg_reencode(commit, NULL, encoding);


 + if (!body)
 + body = commit-buffer;

Does this happen?  I thought body, without an error, can be the same
as commit-buffer.

 + body = strstr(body, \n\n);
   if (body) {
   const char *eol;
   size_t len;

Do we have a leak here?  body may point at a new piece of memory
logmsg_reencode() have allocated to hold the UTF-8 version of your
8859-5 message in commit-buffer.

It would be more like this, no?

diff --git a/builtin/reset.c b/builtin/reset.c
index 6032131..8d22ffe 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -92,11 +92,13 @@ static int reset_index(const unsigned char *sha1, int 
reset_type, int quiet)
 
 static void print_new_head_line(struct commit *commit)
 {
-   const char *hex, *body;
+   const char *hex, *body, *msg, *encoding; 
 
hex = find_unique_abbrev(commit-object.sha1, DEFAULT_ABBREV);
printf(_(HEAD is now at %s), hex);
-   body = strstr(commit-buffer, \n\n);
+
+   msg = logmsg_reencode(commit, NULL, get_log_output_encoding());
+   body = strstr(msg, \n\n);
if (body) {
const char *eol;
size_t len;
@@ -107,6 +109,7 @@ static void print_new_head_line(struct commit *commit)
}
else
printf(\n);
+   logmsg_free(msg, commit);
 }
 
 static void update_index_from_diff(struct diff_queue_struct *q,


--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html