[PATCH v5 5/5] pretty: user format ignores i18n.logOutputEncoding setting
The following two commands are expected to give the same output to a terminal: $ git log --oneline --no-color $ git log --pretty=format:'%h %s' However, the former pays attention to i18n.logOutputEncoding configuration, while the latter does not when it format %s. Log messages written in an encoding i18n.commitEncoding which differs from terminal encoding are shown corrupted with the latter even when i18n.logOutputEncoding and terminal encoding are the same. The same corruption is true for $ git diff --submodule=log and $ git rev-list --pretty=format:%s HEAD and $ git reset --hard Signed-off-by: Alexey Shumkin alex.crez...@gmail.com --- builtin/reset.c | 8 ++-- builtin/rev-list.c | 1 + builtin/shortlog.c | 1 + log-tree.c | 1 + submodule.c | 3 +++ t/t4041-diff-submodule-option.sh | 10 +- t/t4205-log-pretty-formats.sh| 34 +- t/t6006-rev-list-format.sh | 8 t/t7102-reset.sh | 2 +- 9 files changed, 39 insertions(+), 29 deletions(-) diff --git a/builtin/reset.c b/builtin/reset.c index 6032131..b23ed63 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -92,11 +92,15 @@ static int reset_index(const unsigned char *sha1, int reset_type, int quiet) static void print_new_head_line(struct commit *commit) { - const char *hex, *body; + const char *hex, *body, *encoding; hex = find_unique_abbrev(commit-object.sha1, DEFAULT_ABBREV); printf(_(HEAD is now at %s), hex); - body = strstr(commit-buffer, \n\n); + encoding = get_log_output_encoding(); + body = logmsg_reencode(commit, NULL, encoding); + if (!body) + body = commit-buffer; + body = strstr(body, \n\n); if (body) { const char *eol; size_t len; diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 67701be..a5ec30d 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -111,6 +111,7 @@ static void show_commit(struct commit *commit, void *data) ctx.date_mode = revs-date_mode; ctx.date_mode_explicit = revs-date_mode_explicit; ctx.fmt = revs-commit_format; + ctx.output_encoding = get_log_output_encoding(); pretty_print_commit(ctx, commit, buf); if (revs-graph) { if (buf.len) { diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 1fd6f8a..1434f8f 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -137,6 +137,7 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit) ctx.subject = ; ctx.after_subject = ; ctx.date_mode = DATE_NORMAL; + ctx.output_encoding = get_log_output_encoding(); pretty_print_commit(ctx, commit, ufbuf); buffer = ufbuf.buf; } else if (*buffer) { diff --git a/log-tree.c b/log-tree.c index 1946e9c..5277d3e 100644 --- a/log-tree.c +++ b/log-tree.c @@ -616,6 +616,7 @@ void show_log(struct rev_info *opt) ctx.fmt = opt-commit_format; ctx.mailmap = opt-mailmap; ctx.color = opt-diffopt.use_color; + ctx.output_encoding = get_log_output_encoding(); pretty_print_commit(ctx, commit, msgbuf); if (opt-add_signoff) diff --git a/submodule.c b/submodule.c index 1821a5b..baa8669 100644 --- a/submodule.c +++ b/submodule.c @@ -222,10 +222,13 @@ static void print_submodule_summary(struct rev_info *rev, FILE *f, static const char format[] = %m %s; struct strbuf sb = STRBUF_INIT; struct commit *commit; + const char *log_output_encoding; + log_output_encoding = get_log_output_encoding(); while ((commit = get_revision(rev))) { struct pretty_print_context ctx = {0}; ctx.date_mode = rev-date_mode; + ctx.output_encoding = log_output_encoding; strbuf_setlen(sb, 0); strbuf_addstr(sb, line_prefix); if (commit-object.flags SYMMETRIC_LEFT) { diff --git a/t/t4041-diff-submodule-option.sh b/t/t4041-diff-submodule-option.sh index 22bf4df..ce192b0 100755 --- a/t/t4041-diff-submodule-option.sh +++ b/t/t4041-diff-submodule-option.sh @@ -94,7 +94,7 @@ test_expect_success 'diff.submodule does not affect plumbing' ' commit_file sm1 head2=$(add_file sm1 foo3) -test_expect_failure 'modified submodule(forward)' ' +test_expect_success 'modified submodule(forward)' ' git diff-index -p --submodule=log HEAD actual cat expected -EOF Submodule sm1 $head1..$head2: @@ -103,7 +103,7 @@ test_expect_failure 'modified submodule(forward)' ' test_cmp expected actual ' -test_expect_failure 'modified submodule(forward)' ' +test_expect_success 'modified submodule(forward)' '
Re: [PATCH v5 5/5] pretty: user format ignores i18n.logOutputEncoding setting
Alexey Shumkin alex.crez...@gmail.com writes: Subject: Re: [PATCH v5 5/5] pretty: user format ignores i18n.logOutputEncoding setting That is a statement of fact, and does not tell much to the reader. I think you are saying that in the current implementation, logoutputencoding is not honored in user format, that it is a bug that needs to be fixed (as opposed to that is by design, the scripts that read from --format='' is responsible for doing the conversion), and that this patch fixes it. So pretty: --format output should honor logOutputEncoding or something? At least it says what is the _desired_ outcome with should, hints that the status-quo is different from that desired outcome and implies that this is a patch to fix it. The Subject line is very important as that is the only thing we see in many summarizing output format, e.g. shortlog, cover letter and merge message. The following two commands are expected to give the same output to a terminal: $ git log --oneline --no-color $ git log --pretty=format:'%h %s' However, the former pays attention to i18n.logOutputEncoding configuration, while the latter does not when it format %s. Log messages written in an encoding i18n.commitEncoding which differs from terminal encoding are shown corrupted with the latter even when i18n.logOutputEncoding and terminal encoding are the same. The same corruption is true for $ git diff --submodule=log and $ git rev-list --pretty=format:%s HEAD and $ git reset --hard Signed-off-by: Alexey Shumkin alex.crez...@gmail.com --- builtin/reset.c | 8 ++-- builtin/rev-list.c | 1 + builtin/shortlog.c | 1 + log-tree.c | 1 + submodule.c | 3 +++ t/t4041-diff-submodule-option.sh | 10 +- t/t4205-log-pretty-formats.sh| 34 +- t/t6006-rev-list-format.sh | 8 t/t7102-reset.sh | 2 +- 9 files changed, 39 insertions(+), 29 deletions(-) diff --git a/builtin/reset.c b/builtin/reset.c index 6032131..b23ed63 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -92,11 +92,15 @@ static int reset_index(const unsigned char *sha1, int reset_type, int quiet) static void print_new_head_line(struct commit *commit) { - const char *hex, *body; + const char *hex, *body, *encoding; hex = find_unique_abbrev(commit-object.sha1, DEFAULT_ABBREV); printf(_(HEAD is now at %s), hex); - body = strstr(commit-buffer, \n\n); + encoding = get_log_output_encoding(); + body = logmsg_reencode(commit, NULL, encoding); + if (!body) + body = commit-buffer; Does this happen? I thought body, without an error, can be the same as commit-buffer. + body = strstr(body, \n\n); if (body) { const char *eol; size_t len; Do we have a leak here? body may point at a new piece of memory logmsg_reencode() have allocated to hold the UTF-8 version of your 8859-5 message in commit-buffer. It would be more like this, no? diff --git a/builtin/reset.c b/builtin/reset.c index 6032131..8d22ffe 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -92,11 +92,13 @@ static int reset_index(const unsigned char *sha1, int reset_type, int quiet) static void print_new_head_line(struct commit *commit) { - const char *hex, *body; + const char *hex, *body, *msg, *encoding; hex = find_unique_abbrev(commit-object.sha1, DEFAULT_ABBREV); printf(_(HEAD is now at %s), hex); - body = strstr(commit-buffer, \n\n); + + msg = logmsg_reencode(commit, NULL, get_log_output_encoding()); + body = strstr(msg, \n\n); if (body) { const char *eol; size_t len; @@ -107,6 +109,7 @@ static void print_new_head_line(struct commit *commit) } else printf(\n); + logmsg_free(msg, commit); } static void update_index_from_diff(struct diff_queue_struct *q, -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html