[PATCH 7/8] diff.c: compute reverse locally in emit_line_0

2018-07-27 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 diff.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/diff.c b/diff.c
index fdad7ffdd77..f565a2c0c2b 100644
--- a/diff.c
+++ b/diff.c
@@ -576,11 +576,12 @@ static void check_blank_at_eof(mmfile_t *mf1, mmfile_t 
*mf2,
 }
 
 static void emit_line_0(struct diff_options *o,
-   const char *set_sign, const char *set, unsigned 
reverse, const char *reset,
+   const char *set_sign, const char *set, const char 
*reset,
int first, const char *line, int len)
 {
int has_trailing_newline, has_trailing_carriage_return;
int nofirst;
+   int reverse = !!set && !!set_sign;
FILE *file = o->file;
 
fputs(diff_line_prefix(o), file);
@@ -625,7 +626,7 @@ static void emit_line_0(struct diff_options *o,
 static void emit_line(struct diff_options *o, const char *set, const char 
*reset,
  const char *line, int len)
 {
-   emit_line_0(o, set, NULL, 0, reset, line[0], line+1, len-1);
+   emit_line_0(o, set, NULL, reset, line[0], line+1, len-1);
 }
 
 enum diff_symbol {
@@ -998,15 +999,15 @@ static void emit_line_ws_markup(struct diff_options *o,
}
 
if (!ws && !set_sign)
-   emit_line_0(o, set, NULL, 0, reset, sign, line, len);
+   emit_line_0(o, set, NULL, reset, sign, line, len);
else if (!ws) {
-   emit_line_0(o, set_sign, set, !!set_sign, reset, sign, line, 
len);
+   emit_line_0(o, set_sign, set, reset, sign, line, len);
} else if (blank_at_eof)
/* Blank line at EOF - paint '+' as well */
-   emit_line_0(o, ws, NULL, 0, reset, sign, line, len);
+   emit_line_0(o, ws, NULL, reset, sign, line, len);
else {
/* Emit just the prefix, then the rest. */
-   emit_line_0(o, set_sign, set, !!set_sign, reset, sign, "", 0);
+   emit_line_0(o, set_sign, set, reset, sign, "", 0);
ws_check_emit(line, len, ws_rule,
  o->file, set, reset, ws);
}
@@ -1029,7 +1030,7 @@ static void emit_diff_symbol_from_struct(struct 
diff_options *o,
context = diff_get_color_opt(o, DIFF_CONTEXT);
reset = diff_get_color_opt(o, DIFF_RESET);
putc('\n', o->file);
-   emit_line_0(o, context, NULL, 0, reset, '\\',
+   emit_line_0(o, context, NULL, reset, '\\',
nneof, strlen(nneof));
break;
case DIFF_SYMBOL_SUBMODULE_HEADER:
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 8/8] diff.c: rewrite emit_line_0 more understandably

2018-07-27 Thread Stefan Beller
emit_line_0 has no nested conditions, but puts out all its arguments
(if set) in order. There is still a slight confusion with set
and set_sign, but let's defer that to a later patch.

'first' used be output always no matter if it was 0, but that got lost
got lost at e8c285c4f9c (diff: add an internal option to dual-color
diffs of diffs, 2018-07-21), as there we broadened the meaning of 'first'
to also signal an early return.

The change in 'emit_line' makes sure that 'first' is never content, but
always under our control, a sign or special character in the beginning
of the line (or 0, in which case we ignore it).

Signed-off-by: Stefan Beller 
---
 diff.c | 73 +-
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/diff.c b/diff.c
index f565a2c0c2b..2bd4d3d6839 100644
--- a/diff.c
+++ b/diff.c
@@ -580,43 +580,52 @@ static void emit_line_0(struct diff_options *o,
int first, const char *line, int len)
 {
int has_trailing_newline, has_trailing_carriage_return;
-   int nofirst;
int reverse = !!set && !!set_sign;
+   int needs_reset = 0;
+
FILE *file = o->file;
 
fputs(diff_line_prefix(o), file);
 
-   if (len == 0) {
-   has_trailing_newline = (first == '\n');
-   has_trailing_carriage_return = (!has_trailing_newline &&
-   (first == '\r'));
-   nofirst = has_trailing_newline || has_trailing_carriage_return;
-   } else {
-   has_trailing_newline = (len > 0 && line[len-1] == '\n');
-   if (has_trailing_newline)
-   len--;
-   has_trailing_carriage_return = (len > 0 && line[len-1] == '\r');
-   if (has_trailing_carriage_return)
-   len--;
-   nofirst = 0;
-   }
-
-   if (len || !nofirst) {
-   if (reverse && want_color(o->use_color))
-   fputs(GIT_COLOR_REVERSE, file);
-   if (set_sign || set)
-   fputs(set_sign ? set_sign : set, file);
-   if (first && !nofirst)
-   fputc(first, file);
-   if (len) {
-   if (set_sign && set && set != set_sign)
-   fputs(reset, file);
-   if (set)
-   fputs(set, file);
-   fwrite(line, len, 1, file);
-   }
-   fputs(reset, file);
+   has_trailing_newline = (len > 0 && line[len-1] == '\n');
+   if (has_trailing_newline)
+   len--;
+
+   has_trailing_carriage_return = (len > 0 && line[len-1] == '\r');
+   if (has_trailing_carriage_return)
+   len--;
+
+   if (!len && !first)
+   goto end_of_line;
+
+   if (reverse && want_color(o->use_color)) {
+   fputs(GIT_COLOR_REVERSE, file);
+   needs_reset = 1;
+   }
+
+   if (set_sign || set) {
+   fputs(set_sign ? set_sign : set, file);
+   needs_reset = 1;
}
+
+   if (first)
+   fputc(first, file);
+
+   if (!len)
+   goto end_of_line;
+
+   if (set) {
+   if (set_sign && set != set_sign)
+   fputs(reset, file);
+   fputs(set, file);
+   needs_reset = 1;
+   }
+   fwrite(line, len, 1, file);
+   needs_reset |= len > 0;
+
+end_of_line:
+   if (needs_reset)
+   fputs(reset, file);
if (has_trailing_carriage_return)
fputc('\r', file);
if (has_trailing_newline)
@@ -626,7 +635,7 @@ static void emit_line_0(struct diff_options *o,
 static void emit_line(struct diff_options *o, const char *set, const char 
*reset,
  const char *line, int len)
 {
-   emit_line_0(o, set, NULL, reset, line[0], line+1, len-1);
+   emit_line_0(o, set, NULL, reset, 0, line, len);
 }
 
 enum diff_symbol {
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 5/8] diff.c: add set_sign to emit_line_0

2018-07-27 Thread Stefan Beller
For now just change the signature, we'll reason about the actual
change in a follow up patch.

Pass set_sign (which is output before the sign) and set that is setting
the color after the sign. Hence, promote any 'set's to set_sign as
we want to have color before the sign for now.

Signed-off-by: Stefan Beller 
---
 diff.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/diff.c b/diff.c
index 8fd2171d808..a36ed92c54c 100644
--- a/diff.c
+++ b/diff.c
@@ -576,7 +576,7 @@ static void check_blank_at_eof(mmfile_t *mf1, mmfile_t *mf2,
 }
 
 static void emit_line_0(struct diff_options *o,
-   const char *set, unsigned reverse, const char *reset,
+   const char *set_sign, const char *set, unsigned 
reverse, const char *reset,
int first, const char *line, int len)
 {
int has_trailing_newline, has_trailing_carriage_return;
@@ -606,9 +606,12 @@ static void emit_line_0(struct diff_options *o,
if (len || !nofirst) {
if (reverse && want_color(o->use_color))
fputs(GIT_COLOR_REVERSE, file);
-   fputs(set, file);
+   if (set_sign && set_sign[0])
+   fputs(set_sign, file);
if (first && !nofirst)
fputc(first, file);
+   if (set)
+   fputs(set, file);
fwrite(line, len, 1, file);
fputs(reset, file);
}
@@ -621,7 +624,7 @@ static void emit_line_0(struct diff_options *o,
 static void emit_line(struct diff_options *o, const char *set, const char 
*reset,
  const char *line, int len)
 {
-   emit_line_0(o, set, 0, reset, line[0], line+1, len-1);
+   emit_line_0(o, set, NULL, 0, reset, line[0], line+1, len-1);
 }
 
 enum diff_symbol {
@@ -994,17 +997,17 @@ static void emit_line_ws_markup(struct diff_options *o,
}
 
if (!ws && !set_sign)
-   emit_line_0(o, set, 0, reset, sign, line, len);
+   emit_line_0(o, set, NULL, 0, reset, sign, line, len);
else if (!ws) {
/* Emit just the prefix, then the rest. */
-   emit_line_0(o, set_sign, !!set_sign, reset, sign, "", 0);
-   emit_line_0(o, set, 0, reset, 0, line, len);
+   emit_line_0(o, set_sign, NULL, !!set_sign, reset, sign, "", 0);
+   emit_line_0(o, set, NULL, 0, reset, 0, line, len);
} else if (blank_at_eof)
/* Blank line at EOF - paint '+' as well */
-   emit_line_0(o, ws, 0, reset, sign, line, len);
+   emit_line_0(o, ws, NULL, 0, reset, sign, line, len);
else {
/* Emit just the prefix, then the rest. */
-   emit_line_0(o, set_sign ? set_sign : set, !!set_sign, reset,
+   emit_line_0(o, set_sign ? set_sign : set, NULL, !!set_sign, 
reset,
sign, "", 0);
ws_check_emit(line, len, ws_rule,
  o->file, set, reset, ws);
@@ -1028,7 +1031,7 @@ static void emit_diff_symbol_from_struct(struct 
diff_options *o,
context = diff_get_color_opt(o, DIFF_CONTEXT);
reset = diff_get_color_opt(o, DIFF_RESET);
putc('\n', o->file);
-   emit_line_0(o, context, 0, reset, '\\',
+   emit_line_0(o, context, NULL, 0, reset, '\\',
nneof, strlen(nneof));
break;
case DIFF_SYMBOL_SUBMODULE_HEADER:
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 6/8] diff: use emit_line_0 once per line

2018-07-27 Thread Stefan Beller
All lines that use emit_line_0 multiple times per line, are combined
into a single call to emit_line_0, making use of the 'set' argument.

Signed-off-by: Stefan Beller 
---
 diff.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/diff.c b/diff.c
index a36ed92c54c..fdad7ffdd77 100644
--- a/diff.c
+++ b/diff.c
@@ -583,10 +583,7 @@ static void emit_line_0(struct diff_options *o,
int nofirst;
FILE *file = o->file;
 
-   if (first)
-   fputs(diff_line_prefix(o), file);
-   else if (!len)
-   return;
+   fputs(diff_line_prefix(o), file);
 
if (len == 0) {
has_trailing_newline = (first == '\n');
@@ -606,13 +603,17 @@ static void emit_line_0(struct diff_options *o,
if (len || !nofirst) {
if (reverse && want_color(o->use_color))
fputs(GIT_COLOR_REVERSE, file);
-   if (set_sign && set_sign[0])
-   fputs(set_sign, file);
+   if (set_sign || set)
+   fputs(set_sign ? set_sign : set, file);
if (first && !nofirst)
fputc(first, file);
-   if (set)
-   fputs(set, file);
-   fwrite(line, len, 1, file);
+   if (len) {
+   if (set_sign && set && set != set_sign)
+   fputs(reset, file);
+   if (set)
+   fputs(set, file);
+   fwrite(line, len, 1, file);
+   }
fputs(reset, file);
}
if (has_trailing_carriage_return)
@@ -999,16 +1000,13 @@ static void emit_line_ws_markup(struct diff_options *o,
if (!ws && !set_sign)
emit_line_0(o, set, NULL, 0, reset, sign, line, len);
else if (!ws) {
-   /* Emit just the prefix, then the rest. */
-   emit_line_0(o, set_sign, NULL, !!set_sign, reset, sign, "", 0);
-   emit_line_0(o, set, NULL, 0, reset, 0, line, len);
+   emit_line_0(o, set_sign, set, !!set_sign, reset, sign, line, 
len);
} else if (blank_at_eof)
/* Blank line at EOF - paint '+' as well */
emit_line_0(o, ws, NULL, 0, reset, sign, line, len);
else {
/* Emit just the prefix, then the rest. */
-   emit_line_0(o, set_sign ? set_sign : set, NULL, !!set_sign, 
reset,
-   sign, "", 0);
+   emit_line_0(o, set_sign, set, !!set_sign, reset, sign, "", 0);
ws_check_emit(line, len, ws_rule,
  o->file, set, reset, ws);
}
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 4/8] diff.c: reorder arguments for emit_line_ws_markup

2018-07-27 Thread Stefan Beller
The order shall be all colors first, then the content, flags at the end.
The colors are in order.

Signed-off-by: Stefan Beller 
---
 diff.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/diff.c b/diff.c
index f7251c89cbb..8fd2171d808 100644
--- a/diff.c
+++ b/diff.c
@@ -980,9 +980,9 @@ static void dim_moved_lines(struct diff_options *o)
 }
 
 static void emit_line_ws_markup(struct diff_options *o,
-   const char *set, const char *reset,
-   const char *line, int len,
-   const char *set_sign, char sign,
+   const char *set_sign, const char *set,
+   const char *reset,
+   char sign, const char *line, int len,
unsigned ws_rule, int blank_at_eof)
 {
const char *ws = NULL;
@@ -1066,7 +1066,7 @@ static void emit_diff_symbol_from_struct(struct 
diff_options *o,
else if (c == '-')
set = diff_get_color_opt(o, DIFF_FILE_OLD);
}
-   emit_line_ws_markup(o, set, reset, line, len, set_sign, ' ',
+   emit_line_ws_markup(o, set_sign, set, reset, ' ', line, len,
flags & (DIFF_SYMBOL_CONTENT_WS_MASK), 0);
break;
case DIFF_SYMBOL_PLUS:
@@ -1109,7 +1109,7 @@ static void emit_diff_symbol_from_struct(struct 
diff_options *o,
set = diff_get_color_opt(o, DIFF_CONTEXT_BOLD);
flags |= WS_IGNORE_FIRST_SPACE;
}
-   emit_line_ws_markup(o, set, reset, line, len, set_sign, '+',
+   emit_line_ws_markup(o, set_sign, set, reset, '+', line, len,
flags & DIFF_SYMBOL_CONTENT_WS_MASK,
flags & DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF);
break;
@@ -1152,7 +1152,7 @@ static void emit_diff_symbol_from_struct(struct 
diff_options *o,
else
set = diff_get_color_opt(o, DIFF_CONTEXT_DIM);
}
-   emit_line_ws_markup(o, set, reset, line, len, set_sign, '-',
+   emit_line_ws_markup(o, set_sign, set, reset, '-', line, len,
flags & DIFF_SYMBOL_CONTENT_WS_MASK, 0);
break;
case DIFF_SYMBOL_WORDS_PORCELAIN:
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 3/8] diff.c: simplify caller of emit_line_0

2018-07-27 Thread Stefan Beller
Due to the previous condition we know "set_sign != NULL" at that point.

Signed-off-by: Stefan Beller 
---
 diff.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 272b0b93834..f7251c89cbb 100644
--- a/diff.c
+++ b/diff.c
@@ -997,8 +997,7 @@ static void emit_line_ws_markup(struct diff_options *o,
emit_line_0(o, set, 0, reset, sign, line, len);
else if (!ws) {
/* Emit just the prefix, then the rest. */
-   emit_line_0(o, set_sign ? set_sign : set, !!set_sign, reset,
-   sign, "", 0);
+   emit_line_0(o, set_sign, !!set_sign, reset, sign, "", 0);
emit_line_0(o, set, 0, reset, 0, line, len);
} else if (blank_at_eof)
/* Blank line at EOF - paint '+' as well */
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 2/8] t3206: add color test for range-diff --dual-color

2018-07-27 Thread Stefan Beller
The 'expect'ed outcome is taken by running the 'range-diff |decode';
it is not meant as guidance, rather as a documentation of the current
situation.

Signed-off-by: Stefan Beller 
---
 t/t3206-range-diff.sh | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/t/t3206-range-diff.sh b/t/t3206-range-diff.sh
index 2237c7f4af9..ef652865cd7 100755
--- a/t/t3206-range-diff.sh
+++ b/t/t3206-range-diff.sh
@@ -142,4 +142,43 @@ test_expect_success 'changed message' '
test_cmp expected actual
 '
 
+test_expect_success 'simple coloring' '
+   q_to_tab >expect <<-EOF &&
+   1:  a4b = 1:  f686024 s/5/A/
+   2:  f51d370 ! 2:  
4ab067d s/4/A/
+   @@ -2,6 +2,8 @@
+
+s/4/A/
+
+   +Also a silly comment here!
+   +
+diff --git a/file b/file
+--- a/file
++++ b/file
+   3:  0559556 ! 3:  
b9cb956 s/11/B/
+   @@ -10,7 +10,7 @@
+ 9
+ 10
+-11
+   -+BB
+   ++B
+ 12
+ 13
+ 14
+   4:  d966c5c ! 4:  
8add5f1 s/12/B/
+   @@ -8,7 +8,7 @@
+@@
+ 9
+ 10
+   - BB
+   + B
+-12
++B
+ 13
+   EOF
+   git range-diff changed...changed-message --color --dual-color 
>actual.raw &&
+   test_decode_color >actual 

[PATCH 1/8] test_decode_color: understand FAINT and ITALIC

2018-07-27 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 t/test-lib-functions.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 2b2181dca09..be8244c47b5 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -42,6 +42,8 @@ test_decode_color () {
function name(n) {
if (n == 0) return "RESET";
if (n == 1) return "BOLD";
+   if (n == 2) return "FAINT";
+   if (n == 3) return "ITALIC";
if (n == 7) return "REVERSE";
if (n == 30) return "BLACK";
if (n == 31) return "RED";
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 0/8] Add color test for range-diff, simplify diff.c

2018-07-27 Thread Stefan Beller
This is based on origin/js/range-diff (c255a588bcd) and is also available
via
  git fetch https://github.com/stefanbeller/git ws_cleanup-ontop-range-diff-2 

This adds some color testing to range-diff and then attempts to make the code
in diff.c around emit_line_0 more readable.

I think we can go further bit more (by e.g. cherry-picking 
"ws: do not reset and set color twice" found at [1]), but that would be feature
work. This series doesn't change any existing tests!

[1] https://github.com/stefanbeller/git/tree/ws_cleanup-ontop-range-diff

Thanks,
Stefan

Stefan Beller (8):
  test_decode_color: understand FAINT and ITALIC
  t3206: add color test for range-diff --dual-color
  diff.c: simplify caller of emit_line_0
  diff.c: reorder arguments for emit_line_ws_markup
  diff.c: add set_sign to emit_line_0
  diff: use emit_line_0 once per line
  diff.c: compute reverse locally in emit_line_0
  diff.c: rewrite emit_line_0 more understandably

 diff.c  | 94 +++--
 t/t3206-range-diff.sh   | 39 +
 t/test-lib-functions.sh |  2 +
 3 files changed, 93 insertions(+), 42 deletions(-)

-- 
2.18.0.345.g5c9ce644c3-goog



Re: [PATCH] config: fix case sensitive subsection names on writing

2018-07-27 Thread Stefan Beller
I cc'd the wrong peff. Sorry about that.

On Fri, Jul 27, 2018 at 4:36 PM Stefan Beller  wrote:



[PATCH] config: fix case sensitive subsection names on writing

2018-07-27 Thread Stefan Beller
A use reported a submodule issue regarding strange case indentation
issues, but it could be boiled down to the following test case:

  $ git init test  && cd test
  $ git config foo."Bar".key test
  $ git config foo."bar".key test
  $ tail -n 3 .git/config
  [foo "Bar"]
key = test
key = test

Sub sections are case sensitive and we have a test for correctly reading
them. However we do not have a test for writing out config correctly with
case sensitive subsection names, which is why this went unnoticed in
6ae996f2acf (git_config_set: make use of the config parser's event
stream, 2018-04-09)

Make the subsection case sensitive and provide a test for writing.
The test for reading is just above this test.

Reported-by: JP Sugarbroad 
Signed-off-by: Stefan Beller 
---

I really appreciate the work by DScho (and Peff as I recall him as an active
reviewer there) on 4f4d0b42bae (Merge branch 'js/empty-config-section-fix',
2018-05-08), as the corner cases are all correct, modulo the one line fix
in this patch.

Thanks,
Stefan

 config.c  |  2 +-
 t/t1300-config.sh | 57 +++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/config.c b/config.c
index 7968ef7566a..de646d2c56f 100644
--- a/config.c
+++ b/config.c
@@ -2362,7 +2362,7 @@ static int store_aux_event(enum config_event_t type,
store->is_keys_section =
store->parsed[store->parsed_nr].is_keys_section =
cf->var.len - 1 == store->baselen &&
-   !strncasecmp(cf->var.buf, store->key, store->baselen);
+   !strncmp(cf->var.buf, store->key, store->baselen);
if (store->is_keys_section) {
store->section_seen = 1;
ALLOC_GROW(store->seen, store->seen_nr + 1,
diff --git a/t/t1300-config.sh b/t/t1300-config.sh
index 03c223708eb..710e2b04ad8 100755
--- a/t/t1300-config.sh
+++ b/t/t1300-config.sh
@@ -1218,6 +1218,63 @@ test_expect_success 'last one wins: three level vars' '
test_cmp expect actual
 '
 
+test_expect_success 'old-fashioned settings are case insensitive' '
+   test_when_finished "rm -f testConfig testConfig_expect 
testConfig_actual" &&
+
+   cat >testConfig <<-EOF &&
+   # insensitive:
+   [V.A]
+   r = value1
+   EOF
+   q_to_tab >testConfig_expect <<-EOF &&
+   # insensitive:
+   [V.A]
+   Qr = value2
+   EOF
+
+   for key in "v.a.r" "V.A.r" "v.A.r" "V.a.r"
+   do
+   cp testConfig testConfig_actual &&
+   git config -f testConfig_actual v.a.r value2 &&
+   test_cmp testConfig_expect testConfig_actual
+   done
+'
+
+test_expect_success 'setting different case sensitive subsections ' '
+   test_when_finished "rm -f testConfig testConfig_expect 
testConfig_actual" &&
+
+   cat >testConfig <<-EOF &&
+   # V insensitive A sensitive:
+   [V "A"]
+   r = value1
+   # same as above:
+   [V "a"]
+   r = value2
+   EOF
+
+   git config -f testConfig v.a.r value3 &&
+   q_to_tab >testConfig_expect <<-EOF &&
+   # V insensitive A sensitive:
+   [V "A"]
+   r = value1
+   # same as above:
+   [V "a"]
+   Qr = value3
+   EOF
+   test_cmp testConfig_expect testConfig &&
+
+   git config -f testConfig v.A.r value4 &&
+   q_to_tab >testConfig_expect <<-EOF &&
+   # V insensitive A sensitive:
+   [V "A"]
+   Qr = value4
+   # same as above:
+   [V "a"]
+   Qr = value3
+   EOF
+   test_cmp testConfig_expect testConfig
+'
+
 for VAR in a .a a. a.0b a."b c". a."b c".0d
 do
test_expect_success "git -c $VAR=VAL rejects invalid '$VAR'" '
-- 
2.18.0.345.g5c9ce644c3-goog



Re: [PATCH] config: fix case sensitive subsection names on writing

2018-07-27 Thread Stefan Beller
On Fri, Jul 27, 2018 at 2:39 PM Junio C Hamano  wrote:
>
> Brandon Williams  writes:
>
> > Either way you're probably going to need to be careful about how you do
> > string comparison against the different parts.
>
> Good suggestion.

The suggestion is a rabit hole and was a waste of time.

However I did some more manual testing and inspected the code
with trace_printf debugging, and it turns out the strings compared
are brought into the correct form already.

> >> +# v.a.r and v.A.r are not the same variable, as the middle
> >> +# level of a three-level configuration variable name is
> >> +# case sensitive.
>
> In other words, perhaps add
>
> # "V.a.r" and "v.a.R" are the same variable, though
>
> and corresponding test here?

I removed that section and went for a shorter,
more concise expression.

patch to follow.


[PATCH] xdiff: reduce indent heuristic overhead

2018-07-27 Thread Stefan Beller
Skip searching for better indentation heuristics if we'd slide a hunk more
than its size. This is the easiest fix proposed in the analysis[1] in
response to a patch that mercurial took for xdiff to limit searching
by a constant. Using a performance test as:

 #!python
 open('a', 'w').write(" \n" * 100)
 open('b', 'w').write(" \n" * 101)

This patch reduces the execution of "git diff --no-index a b" from
0.70s to 0.31s. However limiting the sliding to the size of the diff hunk,
which was proposed as a solution (that I found easiest to implement for
now) is not optimal for cases like

 open('a', 'w').write(" \n" * 100)
 open('b', 'w').write(" \n" * 200)

as then we'd still slide 100 times.

In addition to limiting the sliding to size of the hunk, also limit by a
constant. Choose 100 lines as the constant as that fits more than a screen,
which really means that the diff sliding is probably not providing a lot
of benefit anyway.

[1] 
https://public-inbox.org/git/72ac1ac2-f567-f241-41d6-d0f83072e...@alum.mit.edu/

Reported-by: Jun Wu 
Analysis-by: Michael Haggerty 
Signed-off-by: Stefan Beller 
---

> Plus, it should always give the right answer.

I was tempted to do just that, but I caved. The diff is correct,
and the hunk sliding is purely to appease the visual aspect of
humans looking at diffs. If your diff can slide more than a
monitor height, you're not interested in the best slided diff,
but something else is going on.

> There are cases where it will be
> more expensive than a fixed `MAX_BORING`, but I bet on average it will
> be faster.

So I did both, settling for performance as the utmost desire. ;-)

 xdiff/xdiffi.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 0de1ef463bf..91e98ee9869 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -591,6 +591,11 @@ static void measure_split(const xdfile_t *xdf, long split,
  */
 #define INDENT_WEIGHT 60
 
+/*
+ * How far do we slide a hunk at most?
+ */
+#define INDENT_HEURISTIC_MAX_SLIDING 100
+
 /*
  * Compute a badness score for the hypothetical split whose measurements are
  * stored in m. The weight factors were determined empirically using the tools 
and
@@ -903,7 +908,12 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long 
flags) {
long shift, best_shift = -1;
struct split_score best_score;
 
-   for (shift = earliest_end; shift <= g.end; shift++) {
+   shift = earliest_end;
+   if (g.end - groupsize - 1 > shift)
+   shift = g.end - groupsize - 1;
+   if (g.end - INDENT_HEURISTIC_MAX_SLIDING > shift)
+   shift = g.end - INDENT_HEURISTIC_MAX_SLIDING;
+   for (; shift <= g.end; shift++) {
struct split_measurement m;
struct split_score score = {0, 0};
 
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH] config: fix case sensitive subsection names on writing

2018-07-27 Thread Stefan Beller
A use reported a submodule issue regarding strange case indentation
issues, but it could be boiled down to the following test case:

  $ git init test  && cd test
  $ git config foo."Bar".key test
  $ git config foo."bar".key test
  $ tail -n 3 .git/config
  [foo "Bar"]
key = test
key = test

Sub sections are case sensitive and we have a test for correctly reading
them. However we do not have a test for writing out config correctly with
case sensitive subsection names, which is why this went unnoticed in
6ae996f2acf (git_config_set: make use of the config parser's event
stream, 2018-04-09)

Make the subsection case sensitive and provide a test for both reading
and writing.

Reported-by: JP Sugarbroad 
Signed-off-by: Stefan Beller 
---
 config.c  |  2 +-
 t/t1300-config.sh | 18 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/config.c b/config.c
index 3aacddfec4b..3ded92b678b 100644
--- a/config.c
+++ b/config.c
@@ -2374,7 +2374,7 @@ static int store_aux_event(enum config_event_t type,
store->is_keys_section =
store->parsed[store->parsed_nr].is_keys_section =
cf->var.len - 1 == store->baselen &&
-   !strncasecmp(cf->var.buf, store->key, store->baselen);
+   !strncmp(cf->var.buf, store->key, store->baselen);
if (store->is_keys_section) {
store->section_seen = 1;
ALLOC_GROW(store->seen, store->seen_nr + 1,
diff --git a/t/t1300-config.sh b/t/t1300-config.sh
index 03c223708eb..8325d4495f4 100755
--- a/t/t1300-config.sh
+++ b/t/t1300-config.sh
@@ -1218,6 +1218,24 @@ test_expect_success 'last one wins: three level vars' '
test_cmp expect actual
 '
 
+test_expect_success 'setting different case subsections ' '
+   test_when_finished "rm -f caseSens caseSens_actual caseSens_expect" &&
+
+   # v.a.r and v.A.r are not the same variable, as the middle
+   # level of a three-level configuration variable name is
+   # case sensitive.
+   git config -f caseSens v."A".r VAL &&
+   git config -f caseSens v."a".r val &&
+
+   echo VAL >caseSens_expect &&
+   git config -f caseSens v."A".r >caseSens_actual &&
+   test_cmp caseSens_expect caseSens_actual &&
+
+   echo val >caseSens_expect &&
+   git config -f caseSens v."a".r >caseSens_actual &&
+   test_cmp caseSens_expect caseSens_actual
+'
+
 for VAR in a .a a. a.0b a."b c". a."b c".0d
 do
test_expect_success "git -c $VAR=VAL rejects invalid '$VAR'" '
-- 
2.18.0.345.g5c9ce644c3-goog



Re: [PATCH 2/3] refs: introduce new API, wrap old API shallowly around new API

2018-07-27 Thread Stefan Beller
On Fri, Jul 27, 2018 at 10:19 AM Brandon Williams  wrote:
>
> On 07/27, Duy Nguyen wrote:
> > On Fri, Jul 27, 2018 at 2:40 AM Stefan Beller  wrote:
> > >
> > > Currently the refs API takes a 'ref_store' as an argument to specify
> > > which ref store to iterate over; however it is more useful to specify
> > > the repository instead (or later a specific worktree of a repository).
> >
> > There is no 'later'. worktrees.c already passes a worktree specific
> > ref store. If you make this move you have to also design a way to give
> > a specific ref store now.
> >
> > Frankly I still dislike the decision to pass repo everywhere,
> > especially when refs code already has a nice ref-store abstraction.
> > Some people frown upon back pointers. But I think adding a back
> > pointer in ref-store, pointing back to the repository is the right
> > move.
>
> I don't quite understand why the refs code would need a whole repository
> and not just the ref-store it self.  I thought the refs code was self
> contained enough that all its state was based on the passed in
> ref-store.  If its not, then we've done a terrible job at avoiding
> layering violations (well actually we're really really bad at this in
> general, and I *think* we're trying to make this better though the
> object store/index refactoring).
>
> If anything I would expect that the actual ref-store code would remain
> untouched by any refactoring and that instead the higher-level API that
> hasn't already been converted to explicitly use a ref-store (and instead
> just calls the underlying impl with get_main_ref_store()).  Am I missing
> something here?

Then I think we might want to go with the original in Stolees proposal
https://github.com/gitgitgadget/git/pull/11/commits/300db80140dacc927db0d46c804ca0ef4dcc1be1
but there the call to for_each_replace_ref just looks ugly, as it takes the
repository as both the repository where to obtain the ref store from
as well as the back pointer.

I anticipate that we need to have a lot of back pointers to the repository
in question, hence I think we should have the repository pointer promoted
to not just a back pointer.


Re: [RFC PATCH 3/5] pack-objects: add delta-islands support

2018-07-27 Thread Stefan Beller
On Fri, Jul 27, 2018 at 6:13 AM Jeff King  wrote:
>
> On Tue, Jul 24, 2018 at 10:20:05AM -0700, Stefan Beller wrote:
>
> > So in my understanding we have a "common base pack" and specific
> > packs on top for each "island".
>
> Sort of. This is another hacky part. The islands themselves are
> generally just about forbidding deltas, and not any particular kind of
> layering.
>
> But there's some magic layering only for the "core" island, which gets
> to go first (and makes a sort of pseudo-pack at the front of the one
> pack). And then everything else is written willy nilly. This is a hack
> to try to make the "blit the pack bytes out" code path for cloning fast.

yup, I do understand its purpose; we had the same discussions here
for the JGit based hosting.

So you are saying island hopping is disallowed, but the core island
has an airport using the spokes system to reach all other islands?
(I described the core island as sea bed before). Sounds reasonable.

> So no, we don't really layer in any sane way. If pack-objects were fed
> the topological relationships between the forks, in theory we could
> create a layered packfile that respects that.
>
> But even that is not quite enough. At the time of forking, you might
> imagine that torvalds/linux has the base pack, and then somebody forks
> from them and contains all of those objects plus more, and somebody
> forks from them, and so on. But that's just a snapshot. Later
> torvalds/linux will get a bunch of new objects pushed to it. And some of
> its forks will merge those objects, too. But some of them will just rot,
> abandoned, as nobody ever touches them again.
>
> So I don't think there's much to be gained by paying attention to the
> external forking relationships. We have to discover afresh the
> relationships between objects, and which refs (and thus which islands)
> point to them.
>
> One thing I don't think we ever tried was doubling down on the
> islandCore concept and making the "root" fork as tightly packed as it
> could be (with the assumption that _most_ people grab that). And then
> just respect the islands for all the other objects (remember this is an
> optimization, so the worst case is somebody asks for an object during a
> fetch and we have to throw away its on-disk delta).
>
> That would solve the problem that fetching torvalds/linux from GitHub
> yields a bigger pack than fetching it from kernel.org. But again, it's
> making that root fork weirdly magical. People who fetch solely from
> other forks won't get any benefit (and may even see worse packs).

Thanks for the explanation.
I think this discussion just hints at me being dense reading the
documentation. After all I like the concept.

Thanks,
Stefan


[RFC PATCH 0/3] Migrate the refs API to take the repository argument

2018-07-26 Thread Stefan Beller
The second patch is the real API proposal.
Unlike the lookup_* series, which caused a lot of integration pain to Junio,
I plan to structure this in a different way, by having multiple steps:

 (1) in this (later to be non-RFC) series, add the new API that passes thru
 the repository; for now do not replace refs_store argument by
 struct repository.
 (2) the last patch is a demo of converting one of the callers over
 to the new API; this would need to be done for all of them
 
 (3) After some time do a cleanup series to remove callers of the
 old API fromly introduced series that are currently in flight.
 (4) Remove the old API.

 (5) Introduce the final API removing the refs_store
 (6) convert all callers to the final API, using this same dual step approach
 (7) remove this API
 
Steps 1,2 will be done in this series (2 is done only as demo here
for one function, but the non-RFC would do it all)

Steps 3,4 would be done once there are no more series in flight using
the old API.

Before continuing on step (2), I would want to ask for your thoughts
of (1).

Also note that after step (1) before (4) refs.h looks messy as well as
between (5) and (7).

Thanks,
Stefan


Stefan Beller (3):
  refs.c: migrate internal ref iteration to pass thru repository
argument
  refs: introduce new API, wrap old API shallowly around new API
  replace: migrate to for_each_replace_repo_ref

 builtin/replace.c|   9 +-
 refs.c   | 187 ++
 refs.h   | 362 ++-
 refs/iterator.c  |   6 +-
 refs/refs-internal.h |   5 +-
 replace-object.c |   7 +-
 6 files changed, 464 insertions(+), 112 deletions(-)

-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 2/3] refs: introduce new API, wrap old API shallowly around new API

2018-07-26 Thread Stefan Beller
Currently the refs API takes a 'ref_store' as an argument to specify
which ref store to iterate over; however it is more useful to specify
the repository instead (or later a specific worktree of a repository).

Introduce a new API, that takes a repository struct instead of a ref store;
the repository argument is also passed through to the callback, which is
of type 'each_repo_ref_fn' that is introduced in a previous patch and is
an extension of the 'each_ref_fn' type with the additional repository
argument.

We wrap the old API as in a very shallow way around the new API,
by wrapping the callback and the callback data into a new callback
to translate between the 'each_ref_fn' and 'each_repo_ref_fn' type.

The wrapping implementation could be done either in refs.c or as presented
in this patch as a 'static inline' in the header file itself. This has the
advantage that the line of the old API is changed (and not just its
implementation in refs.c), such that it will show up in git-blame.

The new API is not perfect yet, as some of them take both a 'repository'
and 'ref_store' argument. This is done for an easy migration:
If the ref_store argument is non-NULL, prefer it over the repository
to compute which refs to iterate over. That way we can ensure that this
step of API migration doesn't confuse which ref store to work on.

Once all callers have migrated to this newly introduced API, we can
get rid of the old API; a second migration step in the future will remove
the then useless ref_store argument

Signed-off-by: Stefan Beller 
---
 refs.c | 158 +++---
 refs.h | 352 +++--
 2 files changed, 407 insertions(+), 103 deletions(-)

diff --git a/refs.c b/refs.c
index 2513f77acb3..27e3772fca9 100644
--- a/refs.c
+++ b/refs.c
@@ -217,7 +217,7 @@ char *resolve_refdup(const char *refname, int resolve_flags,
 /* The argument to filter_refs */
 struct ref_filter {
const char *pattern;
-   each_ref_fn *fn;
+   each_repo_ref_fn *fn;
void *cb_data;
 };
 
@@ -289,14 +289,15 @@ int ref_filter_match(const char *refname,
return 1;
 }
 
-static int filter_refs(const char *refname, const struct object_id *oid,
-  int flags, void *data)
+static int filter_refs(struct repository *r,
+  const char *refname, const struct object_id *oid,
+  int flags, void *data)
 {
struct ref_filter *filter = (struct ref_filter *)data;
 
if (wildmatch(filter->pattern, refname, 0))
return 0;
-   return filter->fn(refname, oid, flags, filter->cb_data);
+   return filter->fn(r, refname, oid, flags, filter->cb_data);
 }
 
 enum peel_status peel_object(const struct object_id *name, struct object_id 
*oid)
@@ -371,46 +372,50 @@ void warn_dangling_symrefs(FILE *fp, const char *msg_fmt, 
const struct string_li
for_each_rawref(warn_if_dangling_symref, );
 }
 
-int refs_for_each_tag_ref(struct ref_store *refs, each_ref_fn fn, void 
*cb_data)
+int refs_for_each_tag_repo_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data)
 {
-   return refs_for_each_ref_in(refs, "refs/tags/", fn, cb_data);
+   return refs_for_each_repo_ref_in(r, NULL, "refs/tags/", fn, cb_data);
 }
 
-int for_each_tag_ref(each_ref_fn fn, void *cb_data)
+int for_each_tag_repo_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data)
 {
-   return refs_for_each_tag_ref(get_main_ref_store(the_repository), fn, 
cb_data);
+   return refs_for_each_tag_repo_ref(r, fn, cb_data);
 }
 
-int refs_for_each_branch_ref(struct ref_store *refs, each_ref_fn fn, void 
*cb_data)
+int refs_for_each_branch_repo_ref(struct repository *r, struct ref_store *refs,
+ each_repo_ref_fn fn, void *cb_data)
 {
-   return refs_for_each_ref_in(refs, "refs/heads/", fn, cb_data);
+   return refs_for_each_repo_ref_in(r, refs, "refs/heads/", fn, cb_data);
 }
 
-int for_each_branch_ref(each_ref_fn fn, void *cb_data)
+int for_each_branch_repo_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data)
 {
-   return refs_for_each_branch_ref(get_main_ref_store(the_repository), fn, 
cb_data);
+   return refs_for_each_branch_repo_ref(r, NULL, fn, cb_data);
 }
 
-int refs_for_each_remote_ref(struct ref_store *refs, each_ref_fn fn, void 
*cb_data)
+int refs_for_each_remote_repo_ref(struct repository *r, struct ref_store *refs,
+ each_repo_ref_fn fn, void *cb_data)
 {
-   return refs_for_each_ref_in(refs, "refs/remotes/", fn, cb_data);
+   return refs_for_each_repo_ref_in(r, refs, "refs/remotes/", fn, cb_data);
 }
 
-int for_each_remote_ref(each_ref_fn fn, void *cb_data)
+int for_each_remote_repo_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data)
 {
-   return refs_for_each_remote_ref(get_main_ref_store(t

[PATCH 3/3] replace: migrate to for_each_replace_repo_ref

2018-07-26 Thread Stefan Beller
By upgrading the replace mechanism works well for all repositories

Signed-off-by: Stefan Beller 
---
 builtin/replace.c | 9 +
 replace-object.c  | 7 ---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/builtin/replace.c b/builtin/replace.c
index ef22d724bbc..fd8a935eb77 100644
--- a/builtin/replace.c
+++ b/builtin/replace.c
@@ -39,7 +39,8 @@ struct show_data {
enum replace_format format;
 };
 
-static int show_reference(const char *refname, const struct object_id *oid,
+static int show_reference(struct repository *r, const char *refname,
+ const struct object_id *oid,
  int flag, void *cb_data)
 {
struct show_data *data = cb_data;
@@ -56,9 +57,9 @@ static int show_reference(const char *refname, const struct 
object_id *oid,
if (get_oid(refname, ))
return error("Failed to resolve '%s' as a valid 
ref.", refname);
 
-   obj_type = oid_object_info(the_repository, ,
+   obj_type = oid_object_info(r, ,
   NULL);
-   repl_type = oid_object_info(the_repository, oid, NULL);
+   repl_type = oid_object_info(r, oid, NULL);
 
printf("%s (%s) -> %s (%s)\n", refname, 
type_name(obj_type),
   oid_to_hex(oid), type_name(repl_type));
@@ -87,7 +88,7 @@ static int list_replace_refs(const char *pattern, const char 
*format)
 "valid formats are 'short', 'medium' and 'long'\n",
 format);
 
-   for_each_replace_ref(the_repository, show_reference, (void *));
+   for_each_replace_repo_ref(the_repository, show_reference, (void 
*));
 
return 0;
 }
diff --git a/replace-object.c b/replace-object.c
index 801b5c16789..c0457b8048c 100644
--- a/replace-object.c
+++ b/replace-object.c
@@ -6,7 +6,8 @@
 #include "repository.h"
 #include "commit.h"
 
-static int register_replace_ref(const char *refname,
+static int register_replace_ref(struct repository *r,
+   const char *refname,
const struct object_id *oid,
int flag, void *cb_data)
 {
@@ -25,7 +26,7 @@ static int register_replace_ref(const char *refname,
oidcpy(_obj->replacement, oid);
 
/* Register new object */
-   if (oidmap_put(the_repository->objects->replace_map, repl_obj))
+   if (oidmap_put(r->objects->replace_map, repl_obj))
die("duplicate replace ref: %s", refname);
 
return 0;
@@ -40,7 +41,7 @@ static void prepare_replace_object(struct repository *r)
xmalloc(sizeof(*r->objects->replace_map));
oidmap_init(r->objects->replace_map, 0);
 
-   for_each_replace_ref(r, register_replace_ref, NULL);
+   for_each_replace_repo_ref(r, register_replace_ref, NULL);
 }
 
 /* We allow "recursive" replacement. Only within reason, though */
-- 
2.18.0.345.g5c9ce644c3-goog



[PATCH 1/3] refs.c: migrate internal ref iteration to pass thru repository argument

2018-07-26 Thread Stefan Beller
In 60ce76d3581 (refs: add repository argument to for_each_replace_ref,
2018-04-11) and 0d296c57aec (refs: allow for_each_replace_ref to handle
arbitrary repositories, 2018-04-11), for_each_replace_ref learned how
to iterate over refs by a given arbitrary repository.
New attempts in the object store conversion have shown that it is useful
to have the repository handle available that the refs iteration is
currently iterating over.

To achieve this goal we will need to add a repository argument to
each_ref_fn in refs.h. However as many callers rely on the signature
such a patch would be too large.

So convert the internals of the ref subsystem first to pass through a
repository argument without exposing the change to the user. Assume
the_repository for the passed through repository, although it is not
used anywhere yet.

Signed-off-by: Stefan Beller 
---
 refs.c   | 39 +--
 refs.h   | 10 ++
 refs/iterator.c  |  6 +++---
 refs/refs-internal.h |  5 +++--
 4 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/refs.c b/refs.c
index fcfd3171e83..2513f77acb3 100644
--- a/refs.c
+++ b/refs.c
@@ -1390,17 +1390,50 @@ struct ref_iterator *refs_ref_iterator_begin(
  * non-zero value, stop the iteration and return that value;
  * otherwise, return 0.
  */
+static int do_for_each_repo_ref(struct repository *r, const char *prefix,
+   each_repo_ref_fn fn, int trim, int flags,
+   void *cb_data)
+{
+   struct ref_iterator *iter;
+   struct ref_store *refs = get_main_ref_store(r);
+
+   if (!refs)
+   return 0;
+
+   iter = refs_ref_iterator_begin(refs, prefix, trim, flags);
+
+   return do_for_each_repo_ref_iterator(r, iter, fn, cb_data);
+}
+
+struct do_for_each_ref_help {
+   each_ref_fn *fn;
+   void *cb_data;
+};
+
+static int do_for_each_ref_helper(struct repository *r,
+ const char *refname,
+ const struct object_id *oid,
+ int flags,
+ void *cb_data)
+{
+   struct do_for_each_ref_help *hp = cb_data;
+
+   return hp->fn(refname, oid, flags, hp->cb_data);
+}
+
 static int do_for_each_ref(struct ref_store *refs, const char *prefix,
   each_ref_fn fn, int trim, int flags, void *cb_data)
 {
struct ref_iterator *iter;
+   struct do_for_each_ref_help hp = { fn, cb_data };
 
if (!refs)
return 0;
 
iter = refs_ref_iterator_begin(refs, prefix, trim, flags);
 
-   return do_for_each_ref_iterator(iter, fn, cb_data);
+   return do_for_each_repo_ref_iterator(the_repository, iter,
+   do_for_each_ref_helper, );
 }
 
 int refs_for_each_ref(struct ref_store *refs, each_ref_fn fn, void *cb_data)
@@ -2029,10 +2062,12 @@ int refs_verify_refname_available(struct ref_store 
*refs,
 int refs_for_each_reflog(struct ref_store *refs, each_ref_fn fn, void *cb_data)
 {
struct ref_iterator *iter;
+   struct do_for_each_ref_help hp = { fn, cb_data };
 
iter = refs->be->reflog_iterator_begin(refs);
 
-   return do_for_each_ref_iterator(iter, fn, cb_data);
+   return do_for_each_repo_ref_iterator(the_repository, iter,
+do_for_each_ref_helper, );
 }
 
 int for_each_reflog(each_ref_fn fn, void *cb_data)
diff --git a/refs.h b/refs.h
index cc2fb4c68c0..80eec8bbc68 100644
--- a/refs.h
+++ b/refs.h
@@ -274,6 +274,16 @@ struct ref_transaction;
 typedef int each_ref_fn(const char *refname,
const struct object_id *oid, int flags, void *cb_data);
 
+/*
+ * The same as each_ref_fn, but also with a repository argument that
+ * contains the repository associated with the callback.
+ */
+typedef int each_repo_ref_fn(struct repository *r,
+const char *refname,
+const struct object_id *oid,
+int flags,
+void *cb_data);
+
 /*
  * The following functions invoke the specified callback function for
  * each reference indicated.  If the function ever returns a nonzero
diff --git a/refs/iterator.c b/refs/iterator.c
index 2ac91ac3401..629e00a122a 100644
--- a/refs/iterator.c
+++ b/refs/iterator.c
@@ -407,15 +407,15 @@ struct ref_iterator *prefix_ref_iterator_begin(struct 
ref_iterator *iter0,
 
 struct ref_iterator *current_ref_iter = NULL;
 
-int do_for_each_ref_iterator(struct ref_iterator *iter,
-each_ref_fn fn, void *cb_data)
+int do_for_each_repo_ref_iterator(struct repository *r, struct ref_iterator 
*iter,
+ each_repo_ref_fn fn, void *cb_data)
 {
int retval = 0, ok;
struct ref_iterator *old_ref_iter = current_ref_iter;
 
current_ref_iter = iter;
 

Re: [PATCH v2 0/6] git-submodule.sh: convert part of cmd_update to C

2018-07-26 Thread Stefan Beller
>
> To allow me to protest in a timely manner, I wanted to teach GitGitGadget
> (which is the main reason I work on range-diff, as you undoubtedly suspect
> by now) to warn me about such instances.

I did not suspect that GGG is the prime motivation for range diff; as it proves
useful (a) on its own, e.g. looking at the updates in pu and (b) is helping the
current workflow very much.


Re: [PATCH] RFC Highlight keywords in remote sideband output.

2018-07-26 Thread Stefan Beller
On Thu, Jul 26, 2018 at 10:18 AM Han-Wen Nienhuys  wrote:
>
> Supported keywords are "error", "warning", "hint" and "success".

Thanks for taking this upstream. :-)

>
> TODO:
>  * make the coloring optional? What variable to use?

This is the natural extension of the topic merged at a56fb3dcc09
(Merge branch 'js/colored-push-errors', 2018-05-08), which was merged
rather recently, so I'd think extending the color.transport option would be
useful here. (cc'd the authors of that series)

>  * doc for the coloring option.



>  * how to test?

I think the best way to get started with a test is to be inspired by
8301266afa4 (push: test to verify that push errors are colored,
2018-04-21) from that series.

> Signed-off-by: Han-Wen Nienhuys 
> Change-Id: I090412a1288bc2caef0916447e28c2d0199da47d

We do not do Change-Ids in git upstream. :/
As different workflows have different edges, everyone has their own
little script to deal with those. For example Brandon has
https://github.com/bmwill/dotfiles/blob/master/bin/check-patch
that contains a section to remove change ids

# Remove Change-Id from patch
sed -i "/Change-Id:/d" "$f"

> +void emit_sideband(struct strbuf *dest, const char *src, int n) {

Coding style: we start the brace in the next line for new functions
(but not after if/while/for)

Also I did not think hard enough in the internal review, as this function
is not emitting to the sideband. that is solely done by the xwrite
call in recv_sideband. So maybe prepare_sideband would be a better
name?


> +// NOSUBMIT - maybe use transport.color property?

Yes, that would be my suggestion (note that we do not use // comments)

> +int want_color = want_color_stderr(GIT_COLOR_AUTO);
> +
> +if (!want_color) {
> +strbuf_add(dest, src, n);
> +return;
> +}
> +
> +struct kwtable {
> +const char* keyword;
> +const char* color;
> +} keywords[] = {
> +{"hint", GIT_COLOR_YELLOW},
> +{"warning", GIT_COLOR_BOLD_YELLOW},
> +{"success", GIT_COLOR_BOLD_GREEN},
> +{"error", GIT_COLOR_BOLD_RED},
> +{},
> +};
> +
> +while (isspace(*src)) {
> +strbuf_addch(dest, *src);
> +src++;
> +n--;
> +}
> +
> +for (struct kwtable* p = keywords; p->keyword; p++) {
> +int len = strlen(p->keyword);
> +if (!strncasecmp(p->keyword, src, len) && 
> !isalnum(src[len])) {
> +strbuf_addstr(dest, p->color);
> +strbuf_add(dest, src, len);
> +strbuf_addstr(dest, GIT_COLOR_RESET);
> +n -= len;
> +src += len;
> +break;
> +}
> +}
> +
> +strbuf_add(dest, src, n);
> +}
> +
>
>  /*
>   * Receive multiplexed output stream over git native protocol.
> @@ -48,8 +95,10 @@ int recv_sideband(const char *me, int in_stream, int out)
> len--;
> switch (band) {
> case 3:
> -   strbuf_addf(, "%s%s%s", outbuf.len ? "\n" : "",
> -   DISPLAY_PREFIX, buf + 1);
> +   strbuf_addf(, "%s%s", outbuf.len ? "\n" : "",
> +   DISPLAY_PREFIX);
> +emit_sideband(, buf+1, len);
> +
> retval = SIDEBAND_REMOTE_ERROR;
> break;
> case 2:
> @@ -69,20 +118,22 @@ int recv_sideband(const char *me, int in_stream, int out)
> if (!outbuf.len)
> strbuf_addstr(, 
> DISPLAY_PREFIX);
> if (linelen > 0) {
> -   strbuf_addf(, "%.*s%s%c",
> -   linelen, b, suffix, *brk);
> -   } else {
> -   strbuf_addch(, *brk);
> +emit_sideband(, b, linelen);
> +strbuf_addstr(, suffix);
> }
> +
> +strbuf_addch(, *brk);
> xwrite(2, outbuf.buf, outbuf.len);
> strbuf_reset();
>
> b = brk + 1;
> }
>
> -   if (*b)
> -   strbuf_addf(, "%s%s", outbuf.len ?
> -   "" : DISPLAY_PREFIX, b);
> +   if (*b) {
> +   strbuf_addstr(, outbuf.len ?
> +   "" : DISPLAY_PREFIX);
> +emit_sideband(, b, strlen(b));
> +   

Re: What's cooking in git.git (Jul 2018, #03; Wed, 25)

2018-07-25 Thread Stefan Beller
On Wed, Jul 25, 2018 at 3:13 PM Junio C Hamano  wrote:
>
> Here are the topics that have been cooking.  Commits prefixed with
> '-' are only in 'pu' (proposed updates) while commits prefixed with
> '+' are in 'next'.  The ones marked with '.' do not appear in any of
> the integration branches, but I am still holding onto them.
>
> Many topics have moved to 'master' and 'next' from 'next' to 'pu'
> respectively.
>
> You can find the changes described here in the integration branches
> of the repositories listed at

(using an alias to fetch all using protocol v2)
Fetching o
>From https://github.com/gitster/git
 - [deleted] (none) -> origin/ab/fetch-tags-noclobber
 - [deleted] (none) -> origin/nd/use-the-index-compat-less
 - [deleted] (none) -> origin/sb/diff-color-move
remote: Counting objects: 830, done.
remote: Compressing objects: 100% (379/379), done.
remote: Total 830 (delta 404), reused 777 (delta 404), pack-reused 47
Receiving objects: 100% (830/830), 470.12 KiB | 9.04 MiB/s, done.
Resolving deltas: 100% (404/404), completed with 58 local objects.
fatal: Couldn't find remote ref refs/notes/amlog
error: Could not fetch o
...
(goes and pastes this into the email, what if I try again?)

$ git -c protocol.version=0 fetch --all
Fetching o
>From https://github.com/gitster/git
 * [new branch]  ab/newhash-is-sha256   ->
origin/ab/newhash-is-sha256
 + 6da893ab657...729b3925ed9 bb/make-developer-pedantic ->
origin/bb/make-developer-pedantic  (forced update)
 * [new branch]  bb/redecl-enum-fix ->
origin/bb/redecl-enum-fix
 * [new branch]  es/diff-color-move-fix ->
origin/es/diff-color-move-fix
 + 165e30f8529...d63a0b5e393 es/format-patch-rangediff  ->
origin/es/format-patch-rangediff  (forced update)
 + 1ac17a46fab...bb4a134ae84 jch-> origin/jch
(forced update)
 * [new branch]  jh/structured-logging  ->
origin/jh/structured-logging
 + 816325b2109...c255a588bcd js/range-diff  ->
origin/js/range-diff  (forced update)
   b7bd9486b05..ffc6fa0e396  master -> origin/master
   5c9ce644c39..a71716f1ade  next   -> origin/next
 + 8a589a4fc91...838143aa5c0 pu -> origin/pu
(forced update)
 + 0041456f5f1...16d09ff91a2 refs/notes/amlog   ->
refs/notes/amlog  (forced update)
Fetching googlesource
...


> * jh/structured-logging (2018-07-25) 25 commits
[...]
>  - structured-logging: design document
>  (this branch uses jh/json-writer.)
>
>  X-Gah.

I am not sure what to make of this comment?
Does it need review or is the design/intent to be
discussed?


> [Cooking]
[...]
> * sb/submodule-update-in-c (2018-07-18) 6 commits
>  - submodule--helper: introduce new update-module-mode helper
>  - builtin/submodule--helper: factor out method to update a single submodule
>  - builtin/submodule--helper: store update_clone information in a struct
>  - builtin/submodule--helper: factor out submodule updating
>  - git-submodule.sh: rename unused variables
>  - git-submodule.sh: align error reporting for update mode to use path
>
>  "git submodule update" is getting rewritten piece-by-piece into C.
>
>  It seems to pass its own self-tests standalone, but seems to break
>  horribly when merged to 'pu'.

I need to redo this, please eject from pu if that is easier for us.

> * ag/rebase-i-in-c (2018-07-10) 13 commits
[...]
>
>  Piecemeal rewrite of the remaining "rebase -i" machinery in C.
>
>  A reroll (which is rumored to be quite good) exists, but hasn't
>  been picked up yet.

Forgot to state so on either the mailing list (or Github or IRC),
but I read the tip of the reroll and I think it is good.

> * sb/object-store-lookup (2018-06-29) 33 commits
[...]
>
>  Will merge to 'master'.

Finally. Hooray! I am currently writing its successor in a
"less confrontational (but needing more work)"-kind-of way,
which converts the ref store to take repository objects.

Given this series (and how it was done/reviewed and yet
caused so much trouble), I'd like to spark a discussion on
how the community wants to see large scale refactorings
and eventually document it.

> * js/range-diff (2018-07-25) 21 commits
[...]
>  (this branch is used by es/format-patch-rangediff.)
>
>  "git tbdiff" that lets us compare individual patches in two
>  iterations of a topic has been rewritten and made into a built-in
>  command.
>
>  Undecided.
>
>  Many "The feature is useful" comments without much real review; we
>  know the feature is great as this mimicks tbdiff already so that is
>  not news.

It has also seen reviews regarding its non-coloring side in previous rounds,
which I think is mature by now. I suggested to submit the range-diff
without coloring on IRC, but it was not quite well received as colors
were seen as "the main benefit of range-diff" by DScho.
(http://colabti.org/irclogger/irclogger_log/git-devel?date=2018-07-25#l72)


Re: [GSoC][PATCH v4 00/20] rebase -i: rewrite in C

2018-07-25 Thread Stefan Beller
On Tue, Jul 24, 2018 at 9:34 AM Alban Gruin  wrote:
>
> This patch series rewrite the interactive rebase from shell to C.
>
> It is based on master (as of 2018-07-24).
>
> Changes since v3:
>
>  - The `--verbose` option is stored directly into opts.verbose
>
>  - Drop includes in rebase-interactive.h
>
>  - skip_unnecessary_picks() now returns an object_id instead of a string
>
>  - Add a test case to ensure interactive rebase aborts when the todo
>list only has commented-out commands
>
>  - complete_action() aborts when the todo list only has commented-out
>commands
>
>  - Drop the `keep_empty` parameter of complete_action()
>
>  - Don’t remove the modes `--shorten-oids` and `--expand-oids` from
>git-rebase--helper
>
>  - Replace `ret = !!x(); return ret` by `ret = x(); return !!ret`
>
>  - Fail if `--make-script` is provided with two arguments instead of
>one
>
>  - Rewrite write_basic_state() and init_basic_state() in C
>
>  - Rewrite git-rebase--interactive.sh as a builtin
>

I gave that series a read and I think it looks good.

Thanks,
Stefan


Re: [PATCH v4 00/21] Add `range-diff`, a `tbdiff` lookalike

2018-07-25 Thread Stefan Beller
On Mon, Jul 23, 2018 at 2:49 PM Junio C Hamano  wrote:
>
> Stefan Beller  writes:
>
> > On Sat, Jul 21, 2018 at 3:04 PM Johannes Schindelin via GitGitGadget
> >  wrote:
> >
> >> Side note: I work on implementing range-diff not only to make life easier 
> >> for reviewers who have to suffer through v2, v3, ... of my patch series, 
> >> but also to verify my changes before submitting a new iteration. And also, 
> >> maybe even more importantly, I plan to use it to verify my merging-rebases 
> >> of Git for
> >> Windows (for which I previously used to redirect the 
> >> pre-rebase/post-rebase diffs vs upstream and then compare them using `git 
> >> diff --no-index`). And of course any interested person can see what 
> >> changes were necessary e.g. in the merging-rebase of Git for Windows onto 
> >> v2.17.0 by running a command like:
> >
> > Thanks for making tools that makes the life of a Git developer easier!
> > (Just filed https://github.com/gitgitgadget/gitgitgadget/issues/26
> > which asks to break lines for this cover letter)
>
> Thanks.  These cover letters are unreadable without W Q
> (gnus-article-fill-long-lines)

While I had some comments on how I dislike some aspects of the
implementation, I think it proves its usefulness by my usage, so I
would suggest to merge it down to next as-is (and as soon as possible);
deferring the issues in the implementation to later.

I found running the range-diff on origin/pu to be a pleasant
experience, although that still highlights the issues of
in-exact coloring (the colors are chosen by the first two
characters of the diff, which leads to mis-coloring of
diff headers of the inner diff in the outer diff.

But despite the imperfection, I strongly urge to consider
the series as-is as good enough for inclusion.

Thanks,
Stefan

Thanks,
Stefan


Re: [PATCH] diff: --color-moved: rename "dimmed_zebra" to "dimmed-zebra"

2018-07-24 Thread Stefan Beller
On Tue, Jul 24, 2018 at 3:09 PM Jonathan Nieder  wrote:
> Hm.  Looks like dimmed_zebra was introduced in v2.15.0-rc0~16^2~2
> (2017-06-30), so it has been around for a while (about a year).  But I
> would like to be able to simplify by getting rid of it.
>
> https://codesearch.debian.net/search?q=dimmed_zebra doesn't find any
> instances of it being used outside Git itself.
>
> https://twitter.com/kornelski/status/982247477020508162 (found with a
> web search) shows that some people may have it in configuration.

Thanks for the searches.

>
> I don't have any good ideas about deprecating more aggressively, and
> the patch looks good, so
>
> Reviewed-by: Jonathan Nieder 

Same here. I wonder if we actually ever want to deprecate it further than
this patch, as the additional code to support it is only 2 lines.
So the effort to deprecate it (more than this patch) is more than
keeping the misnamed version around forever.

Thanks!
Stefan


Re: [PATCH] Documentation/diff-options: explain different diff algorithms

2018-07-24 Thread Stefan Beller
On Mon, Jul 23, 2018 at 9:41 PM Jonathan Nieder  wrote:
>
> Hi,
>
> Stefan Beller wrote:
>
> > As a user I wondered what the diff algorithms are about. Offer at least
> > a basic explanation on the differences of the diff algorithms.
>
> Interesting.  Let's see.
>
> [...]
> > --- a/Documentation/diff-options.txt
> > +++ b/Documentation/diff-options.txt
> > @@ -94,16 +94,34 @@ diff" algorithm internally.
> >   Choose a diff algorithm. The variants are as follows:
> >  +
> >  --
> > -`default`, `myers`;;
> > - The basic greedy diff algorithm. Currently, this is the default.
> >  `minimal`;;
> > + A diff as produced by the basic greedy algorithm described in
>
> Why this reordering?

because Myers (below) is constructed from minimal + heuristic.
Before this patch we say Myers is myers and minimal "spends
extra cycles", which is true if you read the code, as it just is

for (...) {
test_path
if (need_min)
continue;
if (heuristic_good_enough())
break;
}

https://github.com/git/git/blob/master/xdiff/xdiffi.c#L152

So the "minimal" algorithm is the basic myers algorithm,
and the "myers" algorithm is the extended version (extended
with a heuristic to be fast in corner cases, still producing good
enough diffs).

>
> > + link:http://www.xmailserver.org/diff2.pdf[An O(ND) Difference 
> > Algorithm and its Variations]
>
> This URL doesn't seem likely to stay stable.  Are appearances
> deceiving?  (Maybe so, since that's the same host as libxdiff's
> homepage <http://www.xmailserver.org/xdiff-lib.html>.)  It might be
> worth mentioning the author and date just in case.

I though about it and did not mention it, as the name
"An O(ND) Difference Algorithm and its Variations" is already
enough to find that paper quickly.

> > - Spend extra time to make sure the smallest possible diff is
> > - produced.
> > +`default`, `myers`;;
> > + The same algorithm as `minimal`, extended with a heuristic to
> > + reduce extensive searches. Currently, this is the default.
>
> Amusing --- this means that the Myers diff is spelled as "minimal"
> instead of "myers".
>
> >  `patience`;;
> > - Use "patience diff" algorithm when generating patches.
> > + Use "patience diff" algorithm when generating patches. This
> > + matches the longest common subsequence of unique lines on
> > + both sides, recursively. It obtained its name by the way the
> > + longest subsequence is found, as that is a byproduct of the
> > + patience sorting algorithm. If there are no unique lines left
> > + it falls back to `myers`. Empirically this algorithm produces
> > + a more readable output for code, but it does not garantuee
> > + the shortest output.
>
> Probably also worth mentioning that this was invented by Bram Cohen
> for the bazaar vcs.

I tried to balance the part that reads like a history lesson and the
immediately useful part (why is this better, when should I use it?)

Mentioning bazaar probably makes sense. Not sure about the author,
but I'll include him in a reroll of this patch.

> >  `histogram`;;
> > - This algorithm extends the patience algorithm to "support
> > - low-occurrence common elements".
> > + This algorithm re-implements the `patience` algorithm with
>
> What does reimplements mean here?  Do you mean that it is a variation
> on patience?

It is supposed to be a variation of patience, but as it comes with its
own implementation which I would not fully trust (as it was ported
from JGit, and reading the comments over there, a misunderstanding
of LCS made it possible to have only one midpoint before recursing)

So it is not just taking the patience algorithm and adding support for
"low-occurrence common elements"  (what does that even mean?
patience already distinguishes uniques and non-uniques), but
its own implementation, hence it is not bug-for-bug compatible.

> > + "support of low-occurrence common elements" and only picks
> > + one element of the LCS for the recursion. It is often the
>
> Does LCS mean longest common subsequence or longest common substring
> here?  Probably best to spell it out to avoid confusion.

When writing the patch I meant to refer to longest common subsequence
from above, but by picking just one element, this algorithm understands it
as longest common string.


>
> > + fastest, but in cornercases (when there are many longest
>
> s/cornercases/corner cases/
>
> > + common subsequences of the same length) it produces bad
&

Re: [RFC PATCH 3/5] pack-objects: add delta-islands support

2018-07-24 Thread Stefan Beller
On Tue, Jul 24, 2018 at 2:58 AM Jeff King  wrote:
>
> On Mon, Jul 23, 2018 at 11:52:49AM -0700, Stefan Beller wrote:
>
> > > +DELTA ISLANDS
> > > +-
> > [...]
> >
> > I had to read all of this [background information] to understand the
> > concept and I think it is misnamed, as my gut instinct first told me
> > to have deltas only "within an island and no island hopping is allowed".
> > (This message reads a bit like a commit message, not as documentation
> > as it is long winded, too).
>
> I'm not sure if I'm parsing your sentence correctly, but the reason I
> called them "islands" is exactly that you'd have deltas within an island
> and want to forbid island hopping. So I wasn't sure if you were saying
> "that's what I think, but not how the feature works".

Yeah, you want to avoid island hopping, but still want the sea bed to be
useful for all islands (i.e. to have the largest possible base pack, that all
islands can share without being overexposed with objects they should
not see). And that is the main feature IMHO. It is not so much about
island separation (as you could use its own physically separated repo
for these separation tasks), but the main selling point of this feature
is that it enables a base pack that can be shared across all islands
without violating ACLs for example or making one island "too big"
(having unneeded objects on the island).

So metaphorically speaking I assumed the sea bed to support
all islands, which themselves are independent from each other.

> There _is_ a tricky thing, which is that a given object is going to
> appear in many islands. So the rule is really "you cannot hop to a base
> that is not in all of your islands".

Yes, if islands were numbers, we'd be looking for the Greatest common
common divisor for all of them, as all islands have to have access to
all objects in the base pack.

>
> > What about renaming this feature to
> >
> > [pack]
> > excludePartialReach = refs/virtual/[0-9]]+/tags/
> >
> >   "By setting `pack.excludePartialReach`, object deltafication is
> >   prohibited for objects that are not reachable from all
> >   manifestations of the given regex"
> >
> > Cryptic, but it explains it in my mind in a shorter, more concise way. ;-)
>
> So I'm hopelessly biased at this point, having developed and worked with
> the feature under the "island" name for several years. But I find your
> name and explanation pretty confusing. :)

Ok.

>
> Worse, though, it does not have any noun to refer to the reachable set.
> The regex capture and the island names are an important part of the
> feature, because it lets you make a single island out of:
>
>   refs/virtual/([0-9]+)/heads
>   refs/virtual/([0-9]+)/tags
>
> but exclude:
>
>   refs/virtual/([0-9]+)/(foo)
>
> which goes into its own island ("123-foo" instead of "123"). So what's
> the equivalent nomenclature to "island name"?

So in my understanding we have a "common base pack" and specific
packs on top for each "island".

Regarding naming I find islands interesting and well fitting here, I don't
know of any better name.

I was just confused in the beginning as the name indicates that we care
about the islands, but we rather care about *not* hopping them.

Do you envision to have "groups of islands" (an atoll) for say all
open source clones of linux.git, such that you can layer the packs?
You would not just have the base pack + island pack, but have one
pack that is common to most islands?

Sorry if the initial email came off as a rant; I think the islands
metaphor is very good.

Thanks,
Stefan


[PATCH] Documentation/diff-options: explain different diff algorithms

2018-07-23 Thread Stefan Beller
As a user I wondered what the diff algorithms are about. Offer at least
a basic explanation on the differences of the diff algorithms.

Signed-off-by: Stefan Beller 
---
 Documentation/diff-options.txt | 32 +---
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index f466600972f..0d765482027 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -94,16 +94,34 @@ diff" algorithm internally.
Choose a diff algorithm. The variants are as follows:
 +
 --
-`default`, `myers`;;
-   The basic greedy diff algorithm. Currently, this is the default.
 `minimal`;;
-   Spend extra time to make sure the smallest possible diff is
-   produced.
+   A diff as produced by the basic greedy algorithm described in
+   link:http://www.xmailserver.org/diff2.pdf[An O(ND) Difference Algorithm 
and its Variations]
+`default`, `myers`;;
+   The same algorithm as `minimal`, extended with a heuristic to
+   reduce extensive searches. Currently, this is the default.
 `patience`;;
-   Use "patience diff" algorithm when generating patches.
+   Use "patience diff" algorithm when generating patches. This
+   matches the longest common subsequence of unique lines on
+   both sides, recursively. It obtained its name by the way the
+   longest subsequence is found, as that is a byproduct of the
+   patience sorting algorithm. If there are no unique lines left
+   it falls back to `myers`. Empirically this algorithm produces
+   a more readable output for code, but it does not garantuee
+   the shortest output.
 `histogram`;;
-   This algorithm extends the patience algorithm to "support
-   low-occurrence common elements".
+   This algorithm re-implements the `patience` algorithm with
+   "support of low-occurrence common elements" and only picks
+   one element of the LCS for the recursion. It is often the
+   fastest, but in cornercases (when there are many longest
+   common subsequences of the same length) it produces bad
+   results as seen in:
+
+   seq 1 100 >one
+   echo 99 > two
+   seq 1 2 98 >>two
+   git diff --no-index --histogram one two
+
 --
 +
 For instance, if you configured diff.algorithm variable to a
-- 
2.18.0.345.g5c9ce644c3-goog



Re: [PATCH 07/14] range-diff: respect diff_option.file rather than assuming 'stdout'

2018-07-23 Thread Stefan Beller
On Sun, Jul 22, 2018 at 2:58 AM Eric Sunshine  wrote:
>
> The actual diffs output by range-diff respect diff_option.file, which
> range-diff passes down the call-chain, thus are destination-agnostic.
> However, output_pair_header() is hard-coded to emit to 'stdout'. Fix
> this by making output_pair_header() respect diff_option.file, as well.
>
> Signed-off-by: Eric Sunshine 

Depending how much the range diff series has progressed
already, it might make sense to squash it there?

Thanks,
Stefan

> ---
>  range-diff.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/range-diff.c b/range-diff.c
> index 347b4a79f2..76e053c2b2 100644
> --- a/range-diff.c
> +++ b/range-diff.c
> @@ -328,7 +328,7 @@ static void output_pair_header(struct diff_options 
> *diffopt,
> }
> strbuf_addf(buf, "%s\n", color_reset);
>
> -   fwrite(buf->buf, buf->len, 1, stdout);
> +   fwrite(buf->buf, buf->len, 1, diffopt->file);
>  }
>
>  static struct userdiff_driver no_func_name = {
> --
> 2.18.0.345.g5c9ce644c3
>


Re: [PATCH v4 14/21] diff: add an internal option to dual-color diffs of diffs

2018-07-23 Thread Stefan Beller
> > - fputs(diff_line_prefix(o), file);
> > + if (first)
> > + fputs(diff_line_prefix(o), file);
> > + else if (!len)
> > + return;
>
> Can you explain this hunk in the log message?  I am not sure how the
> description in the log message relates to this change.  Is the idea
> of this change essentially "all the existing callers that aren't
> doing the diff-of-diffs send a non-NUL first character, and for them
> this change is a no-op.  New callers share most of the remainder of
> emit_line_0() logic but do not want to show the prefix, so the
> support for it is piggy-backing by a special case where first could
> be NUL"?

All but two caller have 'reverse' set to 0, using the arguments as before.

The other two callers are using the function twice to get the prefix
and set sign going, and then the second call to get the rest of the
line going (which needs to omit the prefix as that was done in the
first call) :

+   /* Emit just the prefix, then the rest. */
+   emit_line_0(o, set_sign ? set_sign : set, !!set_sign, reset,
+   sign, "", 0);
+   emit_line_0(o, set, 0, reset, 0, line, len);

I attempted to clean it up on top, but likely got it wrong as we have
no tests for colored range diffs, yet.
https://public-inbox.org/git/20180710174552.30123-3-sbel...@google.com/
My suggestion would be to first clarify emit_line_0 and have its arguments
and its execution map better to each other, (and as a result only needing to
have one call of emit_line_0 instead of two)

That is my understanding of the situation.

Thanks,
Stefan


Re: [PATCH v4 16/21] range-diff --dual-color: fix bogus white-space warning

2018-07-23 Thread Stefan Beller
On Sat, Jul 21, 2018 at 3:05 PM Johannes Schindelin via GitGitGadget
 wrote:
>
> From: Johannes Schindelin 
>
> When displaying a diff of diffs, it is possible that there is an outer
> `+` before a context line. That happens when the context changed between
> old and new commit. When that context line starts with a tab (after the
> space that marks it as context line), our diff machinery spits out a
> white-space error (space before tab), but in this case, that is
> incorrect.
>
> Fix this by adding a specific whitespace flag that simply ignores the
> first space in the output.

That sounds like a simple (not easy) solution, which sounds acceptable
to me here.

I guess you dropped all ideas that I originally proposed for the cleanup
regarding ws. that is fine, I can roll the cleanup on top of your patches
here.

> Of course, this flag is *really* specific to the "diff of diffs" use
> case. The original idea was to simply skip the space from the output,
> but that workaround was rejected by the Git maintainer as causing
> headaches.

By that you mean
https://public-inbox.org/git/xmqqr2kb9jk2@gitster-ct.c.googlers.com/
?

> Note: as the original code did not leave any space in the bit mask
> before the WSEH_* bits, the diff of this commit looks unnecessarily
> involved: the diff is dominated by making room for one more bit to be
> used by the whitespace rules.

It took me some minutes, but I am reasonably convinced this patch
is correct (and doesn't collide with other series in flight, sb/diff-color-more
adds another flag to move detection in another bit field at (1<<23))

Thanks for writing this patch instead of the other, though I'll leave
it to Junio to weigh in if this approach is the best design.

Stefan

>
> Signed-off-by: Johannes Schindelin 
> ---
>  cache.h |  3 ++-
>  diff.c  | 15 ---
>  diff.h  |  6 +++---
>  ws.c| 11 ++-
>  4 files changed, 23 insertions(+), 12 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index 8b447652a..8abfbeb73 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1681,11 +1681,12 @@ void shift_tree_by(const struct object_id *, const 
> struct object_id *, struct ob
>  #define WS_CR_AT_EOL   01000
>  #define WS_BLANK_AT_EOF02000
>  #define WS_TAB_IN_INDENT   04000
> +#define WS_IGNORE_FIRST_SPACE 01
>  #define WS_TRAILING_SPACE  (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF)
>  #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8)
>  #define WS_TAB_WIDTH_MASK077
>  /* All WS_* -- when extended, adapt diff.c emit_symbol */
> -#define WS_RULE_MASK   0
> +#define WS_RULE_MASK   01
>  extern unsigned whitespace_rule_cfg;
>  extern unsigned whitespace_rule(const char *);
>  extern unsigned parse_whitespace_rule(const char *);
> diff --git a/diff.c b/diff.c
> index e163bc8a3..03ed235c7 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -650,14 +650,14 @@ enum diff_symbol {
>  };
>  /*
>   * Flags for content lines:
> - * 0..12 are whitespace rules
> - * 13-15 are WSEH_NEW | WSEH_OLD | WSEH_CONTEXT
> - * 16 is marking if the line is blank at EOF
> + * 0..14 are whitespace rules
> + * 14-16 are WSEH_NEW | WSEH_OLD | WSEH_CONTEXT
> + * 17 is marking if the line is blank at EOF
>   */
> -#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16)
> -#define DIFF_SYMBOL_MOVED_LINE (1<<17)
> -#define DIFF_SYMBOL_MOVED_LINE_ALT (1<<18)
> -#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING   (1<<19)
> +#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<17)
> +#define DIFF_SYMBOL_MOVED_LINE (1<<18)
> +#define DIFF_SYMBOL_MOVED_LINE_ALT (1<<19)
> +#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING   (1<<20)
>  #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | 
> WS_RULE_MASK)
>
>  /*
> @@ -1094,6 +1094,7 @@ static void emit_diff_symbol_from_struct(struct 
> diff_options *o,
> set = diff_get_color_opt(o, DIFF_FRAGINFO);
> else if (c != '+')
> set = diff_get_color_opt(o, DIFF_CONTEXT);
> +   flags |= WS_IGNORE_FIRST_SPACE;
> }
> emit_line_ws_markup(o, set, reset, line, len, set_sign, '+',
> flags & DIFF_SYMBOL_CONTENT_WS_MASK,
> diff --git a/diff.h b/diff.h
> index 79beb6eea..892416a14 100644
> --- a/diff.h
> +++ b/diff.h
> @@ -160,9 +160,9 @@ struct diff_options {
> int abbrev;
> int ita_invisible_in_index;
>  /* white-space error highlighting */
> -#define WSEH_NEW (1<<12)
> -#define WSEH_CONTEXT (1<<13)
> -#define WSEH_OLD (1<<14)
> +#define WSEH_NEW (1<<13)
> +#define WSEH_CONTEXT (1<<14)
> +#define WSEH_OLD (1<<15)
> unsigned ws_error_highlight;
> const char *prefix;
> int prefix_length;
> diff --git a/ws.c b/ws.c
> index a07caedd5..e02365a6a 100644
> --- a/ws.c
> +++ b/ws.c
> @@ -20,6 +20,7 @@ static struct whitespace_rule {
> { 

Re: [PATCH v4 11/21] range-diff: add tests

2018-07-23 Thread Stefan Beller
> +test_expect_success 'simple B...C (unmodified)' '
> +   git range-diff --no-color

I wonder if we want to have tests for colors, too, eventually.
(Feel free to push back on it or put it into the left over hashtag.
But given how much time we (or I) spent discussing colors,
this would be a welcome extension for the tests)

Stefan


Re: [PATCH v4 00/21] Add `range-diff`, a `tbdiff` lookalike

2018-07-23 Thread Stefan Beller
On Sat, Jul 21, 2018 at 3:04 PM Johannes Schindelin via GitGitGadget
 wrote:

> Side note: I work on implementing range-diff not only to make life easier for 
> reviewers who have to suffer through v2, v3, ... of my patch series, but also 
> to verify my changes before submitting a new iteration. And also, maybe even 
> more importantly, I plan to use it to verify my merging-rebases of Git for
> Windows (for which I previously used to redirect the pre-rebase/post-rebase 
> diffs vs upstream and then compare them using `git diff --no-index`). And of 
> course any interested person can see what changes were necessary e.g. in the 
> merging-rebase of Git for Windows onto v2.17.0 by running a command like:

Thanks for making tools that makes the life of a Git developer easier!
(Just filed https://github.com/gitgitgadget/gitgitgadget/issues/26
which asks to break lines for this cover letter)

> base-commit: b7bd9486b055c3f967a870311e704e3bb0654e4f
> Published-As: 
> https://github.com/gitgitgadget/git/releases/tags/pr-1%2Fdscho%2Fbranch-diff-v4
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git 
> pr-1/dscho/branch-diff-v4
> Pull-Request: https://github.com/gitgitgadget/git/pull/1

I just realized that if I had ideal memory of the previous review,
I could contain my whole review answer to this email only.

>
> Range-diff vs v3:
>
>   1:  39272eefc !  1:  f7e70689e linear-assignment: a function to solve 
> least-cost assignment problems
>  @@ -223,9 +223,7 @@
>   + BUG("negative j: %d", j);
>   + i = pred[j];
>   + column2row[j] = i;
>  -+ k = j;
>  -+ j = row2column[i];
>  -+ row2column[i] = k;
>  ++ SWAP(j, row2column[i]);

The dual color option (as a default) really helps here. Thanks for that!
Does it have to be renamed though? (It's more than two colors; originally
it was inverting the beginning signs)

Maybe --color=emphasize-later
assuming there will be other modes for coloring, such as "diff-only",
which would correspond with --no-dual-color, or "none" that will correspond
would be --no-color. I imagine there could be more fancy things, hence I would
propose a mode rather than a switch.
(Feel free to push back if discussing naming here feels like bike shedding)


2:  7f15b26d4ea !  82:  88134121d2a Introduce `range-diff` to compare
iterations of a topic branch
[...]
>   diff --git a/Makefile b/Makefile
>   --- a/Makefile
>   +++ b/Makefile

The line starting with --- is red (default removed color) and the line
with +++ is green (default add color).

Ideally these two lines and the third line above starting with "diff --git"
would render in GIT_COLOR_BOLD ("METAINFO").

>   3:  076e1192d !  3:  4e3fb47a1 range-diff: first rudimentary implementation
>  @@ -4,7 +4,7 @@
>
>   At this stage, `git range-diff` can determine corresponding commits
>   of two related commit ranges. This makes use of the recently 
> introduced
>  -implementation of the Hungarian algorithm.
>  +implementation of the linear assignment algorithm.
>
>   The core of this patch is a straight port of the ideas of tbdiff, 
> the
>   apparently dormant project at https://github.com/trast/tbdiff.
>  @@ -51,19 +51,17 @@
>   + int res = 0;
>   + struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
>
>  -- argc = parse_options(argc, argv, NULL, options,
>  --  builtin_range_diff_usage, 0);
>  -+ argc = parse_options(argc, argv, NULL, options, 
> builtin_range_diff_usage,
>  -+  0);
>  +  argc = parse_options(argc, argv, NULL, options,
>  +   builtin_range_diff_usage, 0);

This is really nice in colors when viewed locally.

>  16:  dfa7b1e71 <  -:  - range-diff --dual-color: work around bogus 
> white-space warning
>   -:  - > 16:  f4252f2b2 range-diff --dual-color: fix bogus 
> white-space warning

Ah; here my initial assumption of only reviewing the range-diff breaks down now.
I'll dig into patch 16 separately.

Maybe it is worth having an option to expand all "new" patches.
(Given that the range-diff pr-1/dscho/branch-diff-v3...pr-1/dscho/branch-diff-v4
told me you used a different base, this is a hard problem, as I certainly
would want to skip over all new base commits, but this one is interesting
to look at. An easy way out: Maybe an option to expand any new
commits/patches after the first expansion? Asking for opinions rather
than implementing it)

>  19:  144363006 <  -:  - range-diff: left-pad patch numbers
>   -:  - > 19:  07ec215e8 range-diff: left-pad patch numbers

>   -:  - > 21:  d8498fb32 range-diff: use dim/bold cues to improve 
> dual color mode

Those are interesting, I'll look at them separately, too.

Thanks,
Stefan


Re: [PATCH 1/1] t7406: avoid failures solely due to timing issues

2018-07-23 Thread Stefan Beller
On Mon, Jul 23, 2018 at 12:14 PM Junio C Hamano  wrote:
>
> "Johannes Schindelin via GitGitGadget" 
> writes:
>
> > To prevent such false positives from unnecessarily costing time when
> > investigating test failures, let's take the exact order of the lines out
> > of the equation by sorting them before comparing them.
> > ...
> >  cat  > +Cloning into '$pwd/recursivesuper/super/merging'...
> > +Cloning into '$pwd/recursivesuper/super/none'...
> > +Cloning into '$pwd/recursivesuper/super/rebasing'...
> > +Cloning into '$pwd/recursivesuper/super/submodule'...
> >  Submodule 'merging' ($pwd/merging) registered for path '../super/merging'
> >  Submodule 'none' ($pwd/none) registered for path '../super/none'
> >  Submodule 'rebasing' ($pwd/rebasing) registered for path 
> > '../super/rebasing'
> >  Submodule 'submodule' ($pwd/submodule) registered for path 
> > '../super/submodule'
>
> Thanks.  It obviously goes in the right direction to loosen the
> ordering requirement that fundamentally cannot be met, as the log
> message cleanly analizes.
>
> Do we know that the write(2)s that show these lines are atomic, or
> are we merely lucky that we don't see them intermixed in the middle
> of lines (sbeller cc'ed).  I _think_ they are but just wanted to
> double check.

Not just the lines, but the whole message per submodule (i.e.
the "Cloning... " lione and its accompanying "done") is one
atomic unit. These messages are from processes invoked via
run_processes_parallel (in run-command.h), which buffers
all output per process.

This test was last touched in c66410ed32a (submodule init:
redirect stdout to stderr, 2016-05-02), merged as f2c96ceb57a
(Merge branch 'sb/submodule-init', 2016-05-17)

The parallelizing effort was made before that
(2335b870fa7 (submodule update: expose parallelism to the
user, 2016-02-29) via bdebbeb3346 (Merge branch
'sb/submodule-parallel-update', 2016-04-06)), but did not
review existing tests for the newly introduced raciness.

Thanks for the fix!
Stefan


Re: [RFC PATCH 3/5] pack-objects: add delta-islands support

2018-07-23 Thread Stefan Beller
On Sat, Jul 21, 2018 at 10:49 PM Christian Couder
 wrote:
>
> From: Jeff King 
>
> Implement support for delta islands in git pack-objects
> and document how delta islands work in
> "Documentation/git-pack-objects.txt".
>
> Signed-off-by: Jeff King 
> Signed-off-by: Christian Couder 
> ---
>  Documentation/git-pack-objects.txt |  88 +++
>  builtin/pack-objects.c | 130 -
>  2 files changed, 177 insertions(+), 41 deletions(-)
>
> diff --git a/Documentation/git-pack-objects.txt 
> b/Documentation/git-pack-objects.txt
> index d95b472d16..7b7a36056f 100644
> --- a/Documentation/git-pack-objects.txt
> +++ b/Documentation/git-pack-objects.txt
> @@ -289,6 +289,94 @@ Unexpected missing object will raise an error.
>  --unpack-unreachable::
> Keep unreachable objects in loose form. This implies `--revs`.
>
> +--delta-islands::
> +   Restrict delta matches based on "islands". See DELTA ISLANDS
> +   below.
> +
> +
> +DELTA ISLANDS
> +-
> +
> +When possible, `pack-objects` tries to reuse existing on-disk deltas to
> +avoid having to search for new ones on the fly. This is an important
> +optimization for serving fetches, because it means the server can avoid
> +inflating most objects at all and just send the bytes directly from
> +disk. This optimization can't work when an object is stored as a delta
> +against a base which the receiver does not have (and which we are not
> +already sending). In that case the server "breaks" the delta and has to
> +find a new one, which has a high CPU cost. Therefore it's important for
> +performance that the set of objects in on-disk delta relationships match
> +what a client would fetch.
> +
> +In a normal repository, this tends to work automatically. The objects
> +are mostly reachable from the branches and tags, and that's what clients
> +fetch. Any deltas we find on the server are likely to be between objects
> +the client has or will have.
> +
> +But in some repository setups, you may have several related but separate
> +groups of ref tips, with clients tending to fetch those groups
> +independently. For example, imagine that you are hosting several "forks"
> +of a repository in a single shared object store, and letting clients
> +view them as separate repositories through `GIT_NAMESPACE` or separate
> +repos using the alternates mechanism. A naive repack may find that the
> +optimal delta for an object is against a base that is only found in
> +another fork. But when a client fetches, they will not have the base
> +object, and we'll have to find a new delta on the fly.
> +
> +A similar situation may exist if you have many refs outside of
> +`refs/heads/` and `refs/tags/` that point to related objects (e.g.,
> +`refs/pull` or `refs/changes` used by some hosting providers). By
> +default, clients fetch only heads and tags, and deltas against objects
> +found only in those other groups cannot be sent as-is.
> +
> +Delta islands solve this problem by allowing you to group your refs into
> +distinct "islands". Pack-objects computes which objects are reachable
> +from which islands, and refuses to make a delta from an object `A`
> +against a base which is not present in all of `A`'s islands. This
> +results in slightly larger packs (because we miss some delta
> +opportunities), but guarantees that a fetch of one island will not have
> +to recompute deltas on the fly due to crossing island boundaries.
> +
> +Islands are configured via the `pack.island` option, which can be
> +specified multiple times. Each value is a left-anchored regular
> +expressions matching refnames. For example:
> +
> +---
> +[pack]
> +island = refs/heads/
> +island = refs/tags/
> +---
> +
> +puts heads and tags into an island (whose name is the empty string; see
> +below for more on naming). Any refs which do not match those regular
> +expressions (e.g., `refs/pull/123`) is not in any island. Any object
> +which is reachable only from `refs/pull/` (but not heads or tags) is
> +therefore not a candidate to be used as a base for `refs/heads/`.
> +
> +Refs are grouped into islands based on their "names", and two regexes
> +that produce the same name are considered to be in the same island. The
> +names are computed from the regexes by concatenating any capture groups
> +from the regex (and if there are none, then the name is the empty
> +string, as in the above example). This allows you to create arbitrary
> +numbers of islands. For example, imagine you store the refs for each
> +fork in `refs/virtual/ID`, where `ID` is a numeric identifier. You might
> +then configure:
> +
> +---
> +[pack]
> +island = refs/virtual/([0-9]+)/heads/
> +island = refs/virtual/([0-9]+)/tags/
> +island = refs/virtual/([0-9]+)/(pull)/
> +---
> +
> +That puts the heads and tags for each fork in 

Re: [PATCH] fetch-pack: mark die strings for translation

2018-07-23 Thread Stefan Beller
On Mon, Jul 23, 2018 at 10:56 AM Brandon Williams  wrote:
>

fetch-pack is listed as a plumbing command, which means its prime consumer
is supposedly a machine; But fetch-pack is also used by git-fetch that
is invoked
by a human, who prefers translations.

.. goes reads code...

This translates protocol v2 messages, and p0 messages are translated already,
so this just aligns the new protocol to the old protocol w.r.t. i18n.

Sounds good,

Thanks,
Stefan

[ This message is a gentle hint for better commit messages. ;-) ]

> Signed-off-by: Brandon Williams 
> ---
>  fetch-pack.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/fetch-pack.c b/fetch-pack.c
> index 0b4a9f288f..51abee6181 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1245,13 +1245,13 @@ static int process_section_header(struct 
> packet_reader *reader,
> int ret;
>
> if (packet_reader_peek(reader) != PACKET_READ_NORMAL)
> -   die("error reading section header '%s'", section);
> +   die(_("error reading section header '%s'"), section);
>
> ret = !strcmp(reader->line, section);
>
> if (!peek) {
> if (!ret)
> -   die("expected '%s', received '%s'",
> +   die(_("expected '%s', received '%s'"),
> section, reader->line);
> packet_reader_read(reader);
> }
> @@ -1289,12 +1289,12 @@ static int process_acks(struct packet_reader *reader, 
> struct oidset *common)
> continue;
> }
>
> -   die("unexpected acknowledgment line: '%s'", reader->line);
> +   die(_("unexpected acknowledgment line: '%s'"), reader->line);
> }
>
> if (reader->status != PACKET_READ_FLUSH &&
> reader->status != PACKET_READ_DELIM)
> -   die("error processing acks: %d", reader->status);
> +   die(_("error processing acks: %d"), reader->status);
>
> /* return 0 if no common, 1 if there are common, or 2 if ready */
> return received_ready ? 2 : (received_ack ? 1 : 0);
> @@ -1331,7 +1331,7 @@ static void receive_shallow_info(struct fetch_pack_args 
> *args,
>
> if (reader->status != PACKET_READ_FLUSH &&
> reader->status != PACKET_READ_DELIM)
> -   die("error processing shallow info: %d", reader->status);
> +   die(_("error processing shallow info: %d"), reader->status);
>
> setup_alternate_shallow(_lock, _shallow_file, NULL);
> args->deepen = 1;
> @@ -1346,7 +1346,7 @@ static void receive_wanted_refs(struct packet_reader 
> *reader, struct ref *refs)
> struct ref *r = NULL;
>
> if (parse_oid_hex(reader->line, , ) || *end++ != ' ')
> -   die("expected wanted-ref, got '%s'", reader->line);
> +   die(_("expected wanted-ref, got '%s'"), reader->line);
>
> for (r = refs; r; r = r->next) {
> if (!strcmp(end, r->name)) {
> @@ -1356,11 +1356,11 @@ static void receive_wanted_refs(struct packet_reader 
> *reader, struct ref *refs)
> }
>
> if (!r)
> -   die("unexpected wanted-ref: '%s'", reader->line);
> +   die(_("unexpected wanted-ref: '%s'"), reader->line);
> }
>
> if (reader->status != PACKET_READ_DELIM)
> -   die("error processing wanted refs: %d", reader->status);
> +   die(_("error processing wanted refs: %d"), reader->status);
>  }
>
>  enum fetch_state {
> --
> 2.18.0.233.g985f88cf7e-goog
>


Re: Hash algorithm analysis

2018-07-23 Thread Stefan Beller
On Mon, Jul 23, 2018 at 5:41 AM demerphq  wrote:
>
> On Sun, 22 Jul 2018 at 01:59, brian m. carlson
>  wrote:
> > I will admit that I don't love making this decision by myself, because
> > right now, whatever I pick, somebody is going to be unhappy.  I want to
> > state, unambiguously, that I'm trying to make a decision that is in the
> > interests of the Git Project, the community, and our users.
> >
> > I'm happy to wait a few more days to see if a consensus develops; if so,
> > I'll follow it.  If we haven't come to one by, say, Wednesday, I'll make
> > a decision and write my patches accordingly.  The community is free, as
> > always, to reject my patches if taking them is not in the interest of
> > the project.
>
> Hi Brian.
>
> I do not envy you this decision.
>
> Personally I would aim towards pushing this decision out to the git
> user base and facilitating things so we can choose whatever hash
> function (and config) we wish, including ones not invented yet.

By Git user base you actually mean millions of people?
(And they'll have different opinions and needs)

One of the goals of the hash transition is to pick a hash function
such that git repositories are compatible.
If users were to pick their own hashes, we would need to not just give
a SHA-1 ->  transition plan, but we'd have to make sure the
full matrix of possible hashes is interchangeable as we have no idea
of what the user would think of "safer". For example one server operator
might decide to settle on SHA2 and another would settle on blake2,
whereas a user that uses both servers as remotes settles with k12.

Then there would be a whole lot of conversion going on (you cannot talk
natively to a remote with a different hash; checking pgp signatures is
also harder as you have an abstraction layer in between).

I would rather just have the discussion now and then provide only one
conversion tool which might be easy to adapt, but after the majority
converted, it is rather left to bitrot instead of needing to support ongoing
conversions.

On the other hand, even if we'd provide a "different hashes are fine"
solution, I would think the network effect would make sure that eventually
most people end up with one hash.

One example of using different hashes successfully are transports,
like TLS, SSH. The difference there is that it is a point-to-point communication
whereas a git repository needs to be read by many parties involved; also
a communication over TLS/SSH is ephemeral unlike objects in Git.

> Failing that I would aim towards a hashing strategy which has the most
> flexibility. Keccak for instance has the interesting property that its
> security level is tunable, and that it can produce aribitrarily long
> hashes.  Leaving aside other concerns raised elsewhere in this thread,
> these two features alone seem to make it a superior choice for an
> initial implementation. You can find bugs by selecting unusual hash
> sizes, including very long ones, and you can provide ways to tune the
> function to peoples security and speed preferences.  Someone really
> paranoid can specify an unusually large round count and a very long
> hash.

I do not object to this in theory, but I would rather not want to burden
the need to write code for this on the community.

> I am not a cryptographer.

Same here. My personal preference would be blake2b
as that is the fastest IIRC.

Re-reading brians initial mail, I think we should settle on
SHA-256, as that is a conservative choice for security and
the winner in HW accelerated setups, and not to shabby in
a software implementation; it is also widely available.

Stefan


Re: t7406-submodule-update shaky ?

2018-07-23 Thread Stefan Beller
On Sun, Jul 22, 2018 at 2:05 PM Torsten Bögershausen  wrote:
>
> It seems that t7406 is sometimes shaky - when checking stderr in test
> case 4.
>
> The order of the submodules may vary, sorting the stderr output makes it
>
> more reliable (and somewhat funny to read).
>
> Does anybody have a better idea ?

DScho just posted a fix for this, see:
https://public-inbox.org/git/pull.12.git.gitgitgad...@gmail.com/


Re: [PATCH v2 00/18] Consolidate reachability logic

2018-07-20 Thread Stefan Beller
Hi Derrick,

> Sure! It's on my fork [1]
>
> [1] https://github.com/derrickstolee/git/tree/reach/refactor
>

Thanks!

> >> * Use single rev-parse commands in test output, and pipe the OIDs through 
> >> 'sort'

Why do we need to sort them? The order of the answers given by rev-parse
is the same as the input given and we did not need to sort it before, i.e.
the unit under test would not give sorted output but some deterministic(?)
order, which we can replicate as input to rev-parse.
Am I missing the obvious?

> >> * Check output of parse_commit()
> >>
> >> * Update flag documentation in object.h
> >>
> >> * Add tests for commit_contains() including both algorithms.
> >>
> >> * Reduce size of "mixed-mode" commit-graph to ensure we start commit walks
> >>'above' the graph and then walk into the commits with generation 
> >> numbers.

Overall I like the series as-is, and have found
no further issues in a quick read.

Thanks,
Stefan


[PATCH] Documentation/git-interpret-trailers: explain possible values

2018-07-20 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---

Maybe we rather want to refer to the options that are described further
down in the document?

 Documentation/git-interpret-trailers.txt | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-interpret-trailers.txt 
b/Documentation/git-interpret-trailers.txt
index 9111c47a1bf..b8fafb1e8bd 100644
--- a/Documentation/git-interpret-trailers.txt
+++ b/Documentation/git-interpret-trailers.txt
@@ -88,7 +88,8 @@ OPTIONS
Specify where all new trailers will be added.  A setting
provided with '--where' overrides all configuration variables
and applies to all '--trailer' options until the next occurrence of
-   '--where' or '--no-where'.
+   '--where' or '--no-where'. Possible values are `after`, `before`,
+   `end` or `start`.
 
 --if-exists ::
 --no-if-exists::
@@ -96,7 +97,8 @@ OPTIONS
least one trailer with the same  in the message.  A setting
provided with '--if-exists' overrides all configuration variables
and applies to all '--trailer' options until the next occurrence of
-   '--if-exists' or '--no-if-exists'.
+   '--if-exists' or '--no-if-exists'. Possible actions are 
`addIfDifferent`,
+   `addIfDifferentNeighbor`, `add`, `replace` and `doNothing`.
 
 --if-missing ::
 --no-if-missing::
@@ -104,7 +106,8 @@ OPTIONS
trailer with the same  in the message.  A setting
provided with '--if-missing' overrides all configuration variables
and applies to all '--trailer' options until the next occurrence of
-   '--if-missing' or '--no-if-missing'.
+   '--if-missing' or '--no-if-missing'. Possible actions are `doNothing`
+   or `add`.
 
 --only-trailers::
Output only the trailers, not any other parts of the input.
-- 
2.18.0.233.g985f88cf7e-goog



Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems

2018-07-20 Thread Stefan Beller
+cc list
On Fri, Jul 20, 2018 at 2:29 PM Junio C Hamano  wrote:
> ... which means that it does not matter if I have an elaborate rewrite hook
> that constantly updates the reverse mapping or if the reverse mapping is
> made immediately before I push out. You wouldn't even be able to tell any
> difference.
>
> And for that matter, it could even be made on the receiving end by you
> after you fetch from me before you need the reverse map information.


Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems

2018-07-20 Thread Stefan Beller
On Fri, Jul 20, 2018 at 12:35 PM Junio C Hamano  wrote:

> It is not like anybody (including me) needs realtime up-to-date

I thought the same for a long time, but contributing to other projects
showed me that this is not necessarily the case. Having a real time
update, even if it would be just "your patch is labeled 'under discussion'"
is beneficial as I would know where it is "in the system".

In a way I'd compare our contribution process to having an
incredible fine grained paper map. Most of the world moved
on to digital maps, that zoom in on-demand.
(C.f. spelling out "See banned.h for banned functions" in
Documentation/CodingGuidelines is a fine grained detail
that is not relevant for *most* of the contributions, but just
burdens the bearer of the paper map with weight; if this hint
is given dynamically by the compiler or build system at relevant
times, it is much better;

Regarding the real time aspect here, it is also very good
comparison to maps: While I know how to read paper maps
(or offline maps) and how to navigate my way, it sure is easier
to just follow the online up-to-date navigation service, that
tells me what to do. )


FYI: histogram diff bug

2018-07-20 Thread Stefan Beller
  seq 1 100 >one
  echo 99 > two
  seq 1 2 98 >>two
  git diff --no-index --histogram one two

In addition to the recent patches[1], I found another bug in the
histogram algorithm,
which can be produced with the instructions given above. At least I
think it is a bug as
the diff looks terrible to me. (It is a correct diff, but the output
is just bad.)

[1] https://public-inbox.org/git/20180719221942.179565-1-sbel...@google.com/

And I think the issue is in the algorithm, which is basically as follows:

1) build up information about one side ("scanA()"), by putting each
line into a hashmap (and counting its occurrences, such that you
can make the histogram)

2) "find_LCS" on the other side (B) by trying low occurrence lines
  and seeing how long the common consequential string match is.
  (I think this LCS refers to [2], not to be confused with [3])

3) The whole mental model of the difference
between both sides now look like:

  stuff before the LCS
  LCS
  stuff after the LCS

As the LCS has no output associated with it, just call recursively
do_histogram on the remainders before and after.

This is my rough understanding of the algorithm so far.
(I am unsure about the exact find_LCS part how and why to
pick certain candidates for finding the LCS).

The big issue however is the count of LCS, so far we assumed
there is only one! In the example given above there are many,
i.e. lots of "longest" common continuous substrings of length 1.
We are unfortunate to pick "99" as the LCS and recurse before
and after; the other LCSs would be a better pick.

So I think we would need to find "all LCS", which there can be
more than one at the same time, and then fill the gaps between
them using recursion.
As the order of LCSs can be different in the two sides,
we actually have to produce a diff of LCSs and those which are
out of order need to be emitted as full edits (deletes or creates).
In the example above we'd want to have "99" to be a full create
and delete instead of being *the* anchor; all other LCSs ought to
be anchors for the first (zero'th?) level of recursion.

This bug is also present in JGit which this algorithm was ported
from, hence jgit-dev is cc'd (and furthers my suspicion that the
issue is not in the port but in the algorithm itself).

[2] https://en.wikipedia.org/wiki/Longest_common_substring_problem
[3] https://en.wikipedia.org/wiki/Longest_common_subsequence_problem

I'll think about this and see if I can fix it properly,
Thoughts or Opinions?

Stefan


Re: [PATCH v3 09/20] range-diff: adjust the output of the commit pairs

2018-07-20 Thread Stefan Beller
> 1. To roll again.
>
> A player who rolls two sixes can reroll the dice for an additional
> turn.

This is where I had my AHA moment!
(Consider my software development process as chaotic as a dice roll
So rerolling is really just rolling the dice again to "get my patch
accepted" ;-)

> 2. (programming) To convert (an unrolled instruction sequence) back into
>a loop. quotations ▼

We do not have unrolled loops?
This was good back in the day where the cost of each instruction weighted
heavy on the CPU, such that the JMPs that are needed (and the loop
variable check that might have had a bad branch prediction) for the loop were
slowing down the execution.

Nowadays (when I was studying 5 years ago) the branch prediction and individual
instruction execution are really good, but the bottleneck that I measured
(when I had a lot of time at my disposal and attending a class/project on micro
architectures), was the CPU instruction cache size, i.e. loop unrolling made the
code *slower* than keeping tight loops loaded in memory.
https://stackoverflow.com/questions/24196076/is-gcc-loop-unrolling-flag-really-effective

> Noun
>
> reroll (plural rerolls)
>
> (dice games) A situation in the rules of certain dice games where a
> player is given the option to reroll an undesirable roll of the dice.
>
>
> You will notice how this does not list *any* hint at referring to
> something that Junio calls "reroll".

We have undesirable patches that were 'rolled' onto the mailing list,
so they have to be rerolled?

> Footnote *1*: https://en.wiktionary.org/wiki/commit#Noun does not even
> bother to acknowledge our use of referring to a snapshot of a source code
> base as a "commit".

When Git was a content addressable file system, a commit was precisely
"a database transaction, [...] making it a permanent change."

Side note:
I was just giving a talk to my colleagues about diff aglorithms
(and eventually describing a bug in the histogram diff algorithm)
and we got really riled up with "Longest Common Subsequence",
as the mathematical definition is different than what the code
or I (after studying the code) had in mind.

Naming things is hard, and sometimes the collective wisdom got
it wrong, but changing it would be very costly in the short/medium
term.

Another note about "rolling things": At $DAYJOB I review changes
that are committed to the another revision control system w.r.t. its
compliance of open source licenses (hence I am exposed to a lot
of different projects), and some of those changes are titled
"Roll up to version $X" which I found strange, but knew
what was meant.

Stefan


Re: [PATCH v2 00/18] Consolidate reachability logic

2018-07-20 Thread Stefan Beller
Hi Derrick,

> V2 Update: The biggest material change in this version is that we drop the
> method declarations from commit.h, which requires adding a lot of references
> to commit-reach.h across the codebase. This change is in a commit on its own.
> In addition, we have the following smaller changes:

Is there a remote available to get this series from?

> * Use 'unsigned int' for the flag variables.
>
> * Properly align the here-doc test input data.
>
> * Use single rev-parse commands in test output, and pipe the OIDs through 
> 'sort'
>
> * Check output of parse_commit()
>
> * Update flag documentation in object.h
>
> * Add tests for commit_contains() including both algorithms.
>
> * Reduce size of "mixed-mode" commit-graph to ensure we start commit walks
>   'above' the graph and then walk into the commits with generation numbers.

A range diff would be nice (though I can just look at all patches again
even if it takes longer).

I notice this is not sent via the GGG, but via git send-email?

Thanks,
Stefan


Re: [PATCH 5/8] commit-graph: not compatible with replace objects

2018-07-20 Thread Stefan Beller
> Thanks, Peff, for improving the check_replace_refs interaction [1].
>
> Since this series now has two dependencies (including Stefan's ref-store
> fix [2] that I had included in my v1), I'll let those topics settle
> before I send a new v2.

FYI: I have not been working on, nor plan to work on (in the short term)
on the ref store fix series, which needs another abstraction layer IIUC to
make it easier to integrate such a thing as well as extend the series to all
functions in the refstore.

Feel free to take over that series partially (and defer the extension for
all functions in the ref store until later) ?

Stefan


Re: Find commit that referenced a blob first

2018-07-19 Thread Stefan Beller
On Thu, Jul 19, 2018 at 3:38 PM Junio C Hamano  wrote:

> > If the given object refers to a blob, it will be described as
> > :,
> > such that the blob can be found at  in the , which itself
> > describes the first commit in which this blob occurs in a reverse
> > revision walk from HEAD.
>
> You walk from the latest to earlier commit (because there by
> definition can be is no reverse pointer from older to newer commit),

but a "reverse walk from HEAD" produces the commits to us in an order
as if we were walking from earlier to latest?


[PATCH] xdiff/histogram: remove tail recursion

2018-07-19 Thread Stefan Beller
When running the same reproduction script as the previous patch,
it turns out the stack is too small, which can be easily avoided.

Signed-off-by: Stefan Beller 
---
 xdiff/xhistogram.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index fc2d3cd95d9..ec85f5992bd 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -318,7 +318,9 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t 
*env,
 {
struct region lcs;
int lcs_found;
-   int result = -1;
+   int result;
+redo:
+   result = -1;
 
if (count1 <= 0 && count2 <= 0)
return 0;
@@ -355,11 +357,17 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t 
*env,
line2, lcs.begin2 - line2);
if (result)
goto out;
-   result = histogram_diff(xpp, env,
-   lcs.end1 + 1, LINE_END(1) - 
lcs.end1,
-   lcs.end2 + 1, LINE_END(2) - 
lcs.end2);
-   if (result)
-   goto out;
+   /*
+* result = histogram_diff(xpp, env,
+*lcs.end1 + 1, LINE_END(1) - lcs.end1,
+*lcs.end2 + 1, LINE_END(2) - lcs.end2);
+* but let's optimize tail recursion ourself:
+   */
+   count1 = LINE_END(1) - lcs.end1;
+   line1 = lcs.end1 + 1;
+   count2 = LINE_END(2) - lcs.end2;
+   line2 = lcs.end2 + 1;
+   goto redo;
}
}
 out:
-- 
2.18.0.233.g985f88cf7e-goog



Re: [PATCH 1/2] introduce "banned function" list

2018-07-19 Thread Stefan Beller
On Thu, Jul 19, 2018 at 2:32 PM Jeff King  wrote:
>
> On Thu, Jul 19, 2018 at 02:15:54PM -0700, Stefan Beller wrote:
>
> > >  Documentation/CodingGuidelines |  3 +++
> >
> > I'd prefer to not add more text to our documentation
> > (It is already long and in case you run into this problem
> > the error message is clear enough IMHO)
>
> I'm fine with that too. I just wondered if somebody would complain in
> the opposite way: your patch enforces this, but we never made it an
> "official" guideline.
>
> But that may be overly paranoid.  Once upon a time there was some rules
> lawyering around CodingGuidelines, but I think that was successfully
> shut down and hasn't reared its head for several years.

Heh; My preference would be to keep docs as short and concise as
possible (and lawyering sounds like "verbosity is a feature" :-P) but
still giving advice on controversial (i.e. not enforced by a machine in
a deterministic fashion) things such as having braces around one
statement for example or leaving them out.

> > Regarding the introduction of the functions to this list,
> > I would imagine people would find the commit that introduced
> > a function to the ban list to look for a reason why.
> > Can we include a link[1] to explain why we discourage
> > strcpy and sprintf in this commit?
>
> I hoped that it was mostly common knowledge, but that's probably not a
> good assumption. I agree if there's a good reference, we should link to
> it.

In this case I am just an advocate for a better history. Countless
times I wanted
to know *why* something is the way it is and mailing list and history are not
very good at that, as my specific question was not addressed there.

Either it was obvious to all people involved at the time or not written down
why that solution (which -- in hindsight -- sucks), was superior to the other
solution (which may or may not have been known at the time).

So maybe I would have rather asked, why we start out with these two
functions. And you seem to call them "obviously bad", and you take both
of them because they need to be handled differently due to the variadic macros.
(And those two are "obviously worse" than strcat, as they are used more often.
But strcat, being on MS ban list[1], would do just as fine)

(I agree that) Any other function can be added on top with its own
commit message on *why* that function. Over time I would expect that the
reason is that we got bitten by it, or some other project got famously bitten
by it or that some smart people came up with a list[1]. The exception is
this one, which basically says: "Here is the mechanism how to do it and
$X is the obvious thing to put in first", which I agree with the mechanism
as well as that $X seems bad.

[1] https://msdn.microsoft.com/en-us/library/bb288454.aspx

Now that I am deep down the rathole of discussing a tangent, I just found
[2], when searching for "how much common knowledge is wrong", do you
know if there is such a list for (CS) security related things?

[2] http://www.marcandangel.com/2008/06/12/60-popular-pieces-of-false-knowledge/

>
> > [1] https://www.thegeekstuff.com/2013/06/buffer-overflow/?utm_source=feedly
> >   This is the best I found. So not sure if it worth it.
>
> Yeah, this one is so-so, because it goes into a lot more detail. I think
> we can assume that people know that overflowing a buffer is bad. Maybe
> just a short paragraph like:
>
>   We'll include strcpy() and sprintf() in the initial list of banned
>   functions. While these _can_ be used carefully by surrounding them
>   with extra code, there's no inherent check that the size of the
>   destination buffer is big enough, it's very easy to overflow the
>   buffer.

Sounds good to me, maybe even add "We've had problems with that
in the past, see 'git log -S strcpy'", but that may be too much again.

Thanks,
Stefan


Re: Find commit that referenced a blob first

2018-07-19 Thread Stefan Beller
On Thu, Jul 19, 2018 at 2:02 PM Lars Schneider  wrote:
>
> Hi,
>
> I have a blob hash and I would like to know what commit referenced
> this blob first in a given Git repo.

git describe 

If the given object refers to a blob, it will be described as
:,
such that the blob can be found at  in the , which itself
describes the first commit in which this blob occurs in a reverse
revision walk from HEAD.

Since
644eb60bd01 (builtin/describe.c: describe a blob, 2017-11-15)
(included first in 2.16.0

You're welcome,
Stefan


Re: [PATCH 1/2] introduce "banned function" list

2018-07-19 Thread Stefan Beller
On Thu, Jul 19, 2018 at 1:39 PM Jeff King  wrote:
>
> There are a few standard C functions (like strcpy) which are
> easy to misuse. We generally discourage these in reviews,
> but we haven't put advice in CodingGuidelines, nor provided
> any automated enforcement. The latter is especially
> important because it's more consistent, and it can often
> save a round-trip of review.

Thanks for writing these patches. I would think this is a
good idea to have as I certainly would use banned functions
if not told by the compiler not to.

>  Documentation/CodingGuidelines |  3 +++

I'd prefer to not add more text to our documentation
(It is already long and in case you run into this problem
the error message is clear enough IMHO)


> @@ -0,0 +1,19 @@
> +#ifndef BANNED_H
> +#define BANNED_H
> +
> +/*
> + * This header lists functions that have been banned from our code base,
> + * because they're too easy to misuse (and even if used correctly,
> + * complicate audits). Including this header turns them into compile-time
> + * errors.
> + */
> +
> +#define BANNED(func) sorry_##func##_is_a_banned_function()
> +
> +#define strcpy(x,y) BANNED(strcpy)
> +
> +#ifdef HAVE_VARIADIC_MACROS

In a split second I thought you forgot sprintf that was
mentioned in the commit message, but then I kept on reading
just to find it here. I wonder if we want put this #ifdef at the
beginning of the file (after the guard) as then we can have
a uncluttered list of banned functions here. The downside is that
the use of strcpy would not be banned in case you have no
variadic macros, but we'd still catch it quickly as most people
have them. Undecided.

Regarding the introduction of the functions to this list,
I would imagine people would find the commit that introduced
a function to the ban list to look for a reason why.
Can we include a link[1] to explain why we discourage
strcpy and sprintf in this commit?


[1] https://www.thegeekstuff.com/2013/06/buffer-overflow/?utm_source=feedly
  This is the best I found. So not sure if it worth it.

Thanks,
Stefan


[PATCH 1/3] xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff

2018-07-19 Thread Stefan Beller
By passing the 'xpp' and 'env' argument directly to the function
'fall_back_to_classic_diff', we eliminate an occurrence of the 'index'
in histogram_diff, which will prove useful in a bit.

While at it, move it up in the file. This will make the diff of
one of the next patches more legible.

Signed-off-by: Stefan Beller 
---
 xdiff/xhistogram.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 73210cb6f3f..6e20f75fe85 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -233,6 +233,16 @@ static int try_lcs(struct histindex *index, struct region 
*lcs, int b_ptr,
return b_next;
 }
 
+static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
+   int line1, int count1, int line2, int count2)
+{
+   xpparam_t xpparam;
+   xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
+
+   return xdl_fall_back_diff(env, ,
+ line1, count1, line2, count2);
+}
+
 static int find_lcs(struct histindex *index, struct region *lcs,
int line1, int count1, int line2, int count2) {
int b_ptr;
@@ -248,16 +258,6 @@ static int find_lcs(struct histindex *index, struct region 
*lcs,
return index->has_common && index->max_chain_length < index->cnt;
 }
 
-static int fall_back_to_classic_diff(struct histindex *index,
-   int line1, int count1, int line2, int count2)
-{
-   xpparam_t xpp;
-   xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
-
-   return xdl_fall_back_diff(index->env, ,
- line1, count1, line2, count2);
-}
-
 static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
int line1, int count1, int line2, int count2)
 {
@@ -320,7 +320,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t 
*env,
 
memset(, 0, sizeof(lcs));
if (find_lcs(, , line1, count1, line2, count2))
-   result = fall_back_to_classic_diff(, line1, count1, 
line2, count2);
+   result = fall_back_to_classic_diff(xpp, env, line1, count1, 
line2, count2);
else {
if (lcs.begin1 == 0 && lcs.begin2 == 0) {
while (count1--)
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 0/3] histogram diff: Fix memory ballooning in corner case

2018-07-19 Thread Stefan Beller
Currently if you run

seq 1   10 >one
seq 1 4 10 >two
git diff --no-index --histogram one two

you might run into memory issues. After applying the last patch of this series
you only have to wait a bit (as it doesn't fix run time complexity), but
computes the result with less memory pressure.

Thanks,
Stefan

Stefan Beller (3):
  xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff
  xdiff/xhistogram: factor out memory cleanup into free_index()
  xdiff/xhistogram: move index allocation into find_lcs

 xdiff/xhistogram.c | 117 +
 1 file changed, 66 insertions(+), 51 deletions(-)

-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 2/3] xdiff/xhistogram: factor out memory cleanup into free_index()

2018-07-19 Thread Stefan Beller
This will be useful in the next patch as we'll introduce multiple
callers.

Signed-off-by: Stefan Beller 
---
 xdiff/xhistogram.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 6e20f75fe85..5098b6c5021 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -243,6 +243,14 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, 
xdfenv_t *env,
  line1, count1, line2, count2);
 }
 
+static inline void free_index(struct histindex *index)
+{
+   xdl_free(index->records);
+   xdl_free(index->line_map);
+   xdl_free(index->next_ptrs);
+   xdl_cha_free(>rcha);
+}
+
 static int find_lcs(struct histindex *index, struct region *lcs,
int line1, int count1, int line2, int count2) {
int b_ptr;
@@ -343,10 +351,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t 
*env,
}
 
 cleanup:
-   xdl_free(index.records);
-   xdl_free(index.line_map);
-   xdl_free(index.next_ptrs);
-   xdl_cha_free();
+   free_index();
 
return result;
 }
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 3/3] xdiff/xhistogram: move index allocation into find_lcs

2018-07-19 Thread Stefan Beller
This fixes a memory issue when recursing a lot, which can be reproduced as

seq 1   10 >one
seq 1 4 10 >two
git diff --no-index --histogram one two

Before this patch, histogram_diff would call itself recursively before
calling free_index, which would mean a lot of memory is allocated during
the recursion and only freed afterwards. By moving the memory allocation
(and its free call) into find_lcs, the memory is free'd before we recurse,
such that memory is reused in the next step of the recursion instead of
using new memory.

This addresses only the memory pressure, not the run time complexity,
that is also awful for the corner case outlined above.

Helpful in understanding the code (in addition to the sparse history of
this file), was https://stackoverflow.com/a/32367597 which reproduces
most of the code comments of the JGit implementation.

Signed-off-by: Stefan Beller 
---
 xdiff/xhistogram.c | 96 +-
 1 file changed, 53 insertions(+), 43 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 5098b6c5021..fc2d3cd95d9 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -251,44 +251,13 @@ static inline void free_index(struct histindex *index)
xdl_cha_free(>rcha);
 }
 
-static int find_lcs(struct histindex *index, struct region *lcs,
-   int line1, int count1, int line2, int count2) {
-   int b_ptr;
-
-   if (scanA(index, line1, count1))
-   return -1;
-
-   index->cnt = index->max_chain_length + 1;
-
-   for (b_ptr = line2; b_ptr <= LINE_END(2); )
-   b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, 
count2);
-
-   return index->has_common && index->max_chain_length < index->cnt;
-}
-
-static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
-   int line1, int count1, int line2, int count2)
+static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
+   struct region *lcs,
+   int line1, int count1, int line2, int count2)
 {
+   int b_ptr;
+   int sz, ret = -1;
struct histindex index;
-   struct region lcs;
-   int sz;
-   int result = -1;
-
-   if (count1 <= 0 && count2 <= 0)
-   return 0;
-
-   if (LINE_END(1) >= MAX_PTR)
-   return -1;
-
-   if (!count1) {
-   while(count2--)
-   env->xdf2.rchg[line2++ - 1] = 1;
-   return 0;
-   } else if (!count2) {
-   while(count1--)
-   env->xdf1.rchg[line1++ - 1] = 1;
-   return 0;
-   }
 
memset(, 0, sizeof(index));
 
@@ -326,8 +295,52 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t 
*env,
index.ptr_shift = line1;
index.max_chain_length = 64;
 
+   if (scanA(, line1, count1))
+   goto cleanup;
+
+   index.cnt = index.max_chain_length + 1;
+
+   for (b_ptr = line2; b_ptr <= LINE_END(2); )
+   b_ptr = try_lcs(, lcs, b_ptr, line1, count1, line2, 
count2);
+
+   if (index.has_common && index.max_chain_length < index.cnt)
+   ret = 1;
+   else
+   ret = 0;
+
+cleanup:
+   free_index();
+   return ret;
+}
+
+static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
+   int line1, int count1, int line2, int count2)
+{
+   struct region lcs;
+   int lcs_found;
+   int result = -1;
+
+   if (count1 <= 0 && count2 <= 0)
+   return 0;
+
+   if (LINE_END(1) >= MAX_PTR)
+   return -1;
+
+   if (!count1) {
+   while(count2--)
+   env->xdf2.rchg[line2++ - 1] = 1;
+   return 0;
+   } else if (!count2) {
+   while(count1--)
+   env->xdf1.rchg[line1++ - 1] = 1;
+   return 0;
+   }
+
memset(, 0, sizeof(lcs));
-   if (find_lcs(, , line1, count1, line2, count2))
+   lcs_found = find_lcs(xpp, env, , line1, count1, line2, count2);
+   if (lcs_found < 0)
+   goto out;
+   else if (lcs_found)
result = fall_back_to_classic_diff(xpp, env, line1, count1, 
line2, count2);
else {
if (lcs.begin1 == 0 && lcs.begin2 == 0) {
@@ -341,18 +354,15 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t 
*env,
line1, lcs.begin1 - line1,
line2, lcs.begin2 - line2);
if (result)
-   goto cleanup;
+   goto out;
result = histogram_diff(xpp, env,
lcs.end1 + 1, LINE_END(1) - 
lcs.end1,
lcs.end2 + 

Re: [PATCH] diff.c: offer config option to control ws handling in move detection

2018-07-19 Thread Stefan Beller
On Thu, Jul 19, 2018 at 10:32 AM Junio C Hamano  wrote:
>
> Stefan Beller  writes:
>
> > "git format-patch HEAD^ --color" produces red/green output
> > (like git log would), so I do not see why --color-moved should impact
> > format-patch differently. (i.e. if the user requests format-patch with
> > color-moved we can do it, otherwise, when colors are off, we do not
> > spend cycles to compute the moves)
> >
> > Maybe I did not understand the gist of your question, still?
>
> "format-patch --color" done by end-user, especially when combined
> with "--stdout", would be useful to show coloured output.  That is
> what you are talking about in the above, but that is not what I was
> asking about.
>
> The question was specifically about configuration.  You may say
> "diff.colorwhatever = on", but "git format-patch" with no command
> line override wouldn't want to be affected by that and produce a
> patch that won't apply, I would think.

The options introduced here (even the command line arguments)
do *nothing* if no color is on, i.e.

git diff --no-color --color-moved=blocks \
--color-moved-ws=allow-indentation-change

is the same as

git diff --no-color

and as format-patch doesn't use colors by default giving
color-moved settings (even via config) is a no-op, too?

Stefan


Re: [PATCH] diff.c: offer config option to control ws handling in move detection

2018-07-19 Thread Stefan Beller
On Thu, Jul 19, 2018 at 9:29 AM Junio C Hamano  wrote:
>
> Stefan Beller  writes:
>
> > On Wed, Jul 18, 2018 at 10:45 AM Junio C Hamano  wrote:
> >>
> >> Stefan Beller  writes:
> >>
> >> > diff --git a/Documentation/diff-options.txt 
> >> > b/Documentation/diff-options.txt
> >> > index 143acd9417e..8da7fed4e22 100644
> >> > --- a/Documentation/diff-options.txt
> >> > +++ b/Documentation/diff-options.txt
> >> > @@ -294,8 +294,11 @@ dimmed_zebra::
> >> >
> >> >  --color-moved-ws=::
> >> >   This configures how white spaces are ignored when performing the
> >> > - move detection for `--color-moved`. These modes can be given
> >> > - as a comma separated list:
> >> > + move detection for `--color-moved`.
> >> > +ifdef::git-diff[]
> >> > + It can be set by the `diff.colorMovedWS` configuration setting.
> >> > +endif::git-diff[]
> >>
> >> The patch to diff.c::git_diff_ui_config() we see below does not seem
> >> to make any effort to make sure that this new configuration applies
> >> only to "git diff" and that other commands like "git log" that call
> >> git_diff_ui_config() are not affected.
> >
> > That is as intended. (We want to have it in git-log)
>
> Ah, I think what is going on here, and I think I asked a wrong
> question.
>
>  * diff-options.txt is used by the family of diff plumbing commands
>(which actively do not want to participate in the coloring game
>and do not call git_diff_ui_config()) as well as log family of
>commands (which do pay attention to the config).
>
>  * "git grep '^:git-diff:'" hits only Documentation/git-diff.txt.
>
> What the system currently does (which may not match what it should
> do) is that Porcelain "diff", "log", etc. pay attention to the
> configuration while plumbing "diff-{files,index,tree}" don't, and
> use of ifdef/endif achieves only half of that (i.e. excludes the
> sentence from plumbing manual pages).  It excludes too much and does
> not say "log", "show", etc. also honor the configuration.
>
> I think the set of asciidoc attrs diff-options.txt files uses need
> some serious clean-up.  For example, it defines :git-diff-core: that
> is only used once, whose intent seems to be "we are describing diff
> plumbing".  However, the way it does so is by excluding known
> Porcelain; if we ever add new diff porcelain (e.g. "range-diff"),
> that way of 'definition by exclusion' would break.  The scheme is
> already broken by forcing git-show.txt to define 'git-log' just like
> git-log.txt does, meaning that it is not possible to make "show" to
> be described differently from "log".  But let's leave that outside
> this topic.

Then let's call this #leftoverbits ?

> Back to a more on-topic tangent.
>
> How does this patch (and all the recent "color" options you added
> recently) avoid spending unnecessary cycles and contaminating "git
> format-patch" output, by the way?  builtin/log.c::cmd_format_patch()
> seems to eventually call into git_log_config() that ends with a call
> to diff_ui_config().

The color options are only using CPU cycles when we actually need to
color things (if no-color is set, then the move detection is off).

"git format-patch HEAD^ --color" produces red/green output
(like git log would), so I do not see why --color-moved should impact
format-patch differently. (i.e. if the user requests format-patch with
color-moved we can do it, otherwise, when colors are off, we do not
spend cycles to compute the moves)

Maybe I did not understand the gist of your question, still?
Stefan


Re: What's cooking in git.git (Jul 2018, #02; Wed, 18)

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 3:04 PM Junio C Hamano  wrote:

> Many topics have moved to 'master' and 'next' from 'next' to 'pu'
> respectively.  As I needed to re-resolve semantic merge conflicts
> while reordering some topics (i.e. some that were merged earlier in
> 'pu' are left in 'pu' while another topic that had interactions with
> them and required evil merge to resolve semantic conflicts
> leapfrogged to 'next'---the semantic conflict resolution then need
> to happen at a different merge than done earlier in 'pu' while
> rebuilding today's integration), I didn't have enough time to spend
> on new topics posted to the list in the past few days.
>

> * sb/submodule-update-in-c (2018-07-17) 6 commits
>  - submodule--helper: introduce new update-module-mode helper
>  - builtin/submodule--helper: factor out method to update a single submodule
>  - builtin/submodule--helper: store update_clone information in a struct
>  - builtin/submodule--helper: factor out submodule updating
>  - git-submodule.sh: rename unused variables
>  - git-submodule.sh: align error reporting for update mode to use path
>
>  "git submodule update" is getting rewritten piece-by-piece into C.
>
>  It seems to pass its own self-tests standalone, but seems to break
>  horribly when merged to 'pu'.

It actually breaks combined with sb/submodule-core-worktree  :-(
as that introduces another user of $name in git-submodule.sh
which we eliminate in this series.

I'll build on top of that and send a fixed version.

Sorry about being confused and breaking so much lately,

Stefan


Re: [PATCH 0/6] Resend of origin/sb/submodule-update-in-c

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 3:24 PM Junio C Hamano  wrote:
>
> Stefan Beller  writes:
>
> > This fixes the compilation issue raised by SZEDER and Stolee,
> >
> >
> > 6:  3c156c79ae7 ! 6:  f82f24e73b6 submodule--helper: introduce new 
> > update-module-mode helper
> > @@ -10,7 +10,6 @@
> >  for arbitrary repositories.
> >
> >  Signed-off-by: Stefan Beller 
> > -Signed-off-by: Junio C Hamano 
> >
> >  diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> >  --- a/builtin/submodule--helper.c
> > @@ -74,7 +73,7 @@
> >  +determine_submodule_update_strategy(the_repository,
> >  +just_cloned, path, update,
> >  +_strategy);
> > -+fprintf(stdout, 
> > submodule_strategy_to_string(_strategy));
> > ++fputs(submodule_strategy_to_string(_strategy), stdout);
> >  +
> >  +return 0;
> >  +}
> >
> > Thanks,
> > Stefan
>
> Thanks.
>
> This time not from GGG?

GGG = GitGitGadget?
I have yet to try out that tool, I am still stuck on git-send-email.
(old habits die hard)

It is even based off your branch (with all your sign offs).

I just cut down aggressively on the range diff (and short stat),
as I want to highlight the important part.

>  I don't particularly care which direction
> patches come from, though ;-)

Cool.

Thanks,
Stefan


Re: [PATCH v1 0/3] [RFC] Speeding up checkout (and merge, rebase, etc)

2018-07-18 Thread Stefan Beller
> don�t

The encoding seems to be broken somehow (also on)
https://public-inbox.org/git/20180718204458.20936-1-benpe...@microsoft.com/


> When I brought up this idea with some other git contributors they mentioned
> that multi threading unpack_trees() had been discussed a few years ago on

https://public-inbox.org/git/cacsjy8a0kuyxk_2namh+da9yithzm5d68rhqevze3ncmxin...@mail.gmail.com/
https://public-inbox.org/git/20160415095139.GA3985@lanh/


> the list but that the idea was discarded.  They couldn�t remember exactly
> why it was discarded and none of us have been able to find the email threads
> from that earlier discussion. As a result, I decided to write up this RFC
> and see if the greater git community has ideas, suggestions, or more
> background/history on whether this is a reasonable path to pursue or if
> there are other/better ideas on how to speed up checkout especially on large
> repos.

If you want more than a bare bones threaded queue, see
https://public-inbox.org/git/1440724495-708-5-git-send-email-sbel...@google.com/
for inspiration.

Stefan


Re: [PATCH v1 1/3] add unbounded Multi-Producer-Multi-Consumer queue

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 1:45 PM Ben Peart  wrote:
>

Did you have any further considerations that are worth recording here?
(memory, performance, CPU execution, threading, would all come to mind)

> Signed-off-by: Ben Peart 

> +/*
> + * Initializes a mpmcq structure.
> + */

I'd find the name mpmcq a bit troubling if I were just stumbling upon it
in the code without the knowledge of this review (and its abbreviation),
maybe just 'threadsafe_queue' ?

> +extern void mpmcq_init(struct mpmcq *queue);

We prefer no extern keyword these days
c.f. Documentation/CodingGuidelines:
 - Variables and functions local to a given source file should be marked
   with "static". Variables that are visible to other source files
   must be declared with "extern" in header files. However, function
   declarations should not use "extern", as that is already the default.


Re: [PATCH] add core.usereplacerefs config option

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 1:31 PM Jeff King  wrote:
>
> On Wed, Jul 18, 2018 at 04:23:20PM -0400, Derrick Stolee wrote:
>
> > This patch looks good to me. The only thing I saw was when I ran 'git grep
> > check_replace_refs' and saw the following in environment.c:
> >
> > int check_replace_refs = 1; /* NEEDSWORK: rename to read_replace_refs */
> >
> > This does help me feel confident that the case where the config value is
> > missing will default to 'yes, please check replace refs', but also the
> > NEEDSWORK could be something to either (1) do, or (2) remove the comment.
> > Neither needs to happen as part of this patch.
>
> Yeah, it was actually that comment that led me to Stefan's recent
> c3c36d7de2 (replace-object: check_replace_refs is safe in multi repo
> environment, 2018-04-11).
>
> And ironically, back when I originally wrote this patch, it _was_ called
> read_replace_refs. That changed in afc711b8e1 (rename read_replace_refs
> to check_replace_refs, 2014-02-18), which was in turn picking up a
> leftover from ecef23 (inline lookup_replace_object() calls,
> 2011-05-15).
>
> Since Stefan's patch logically undoes ecef23, I think that's why he
> put in the comment to move back to the old name.

I think so, too

> Personally, I do not find one name any more informative than the other,
> and would be happy to leave it as-is (dropping the comment).
>
> But I'm also fine with following through on the "do". According to
> c3c36d7de2, that was waiting for a calmer time in the code base. I guess
> the best way to find out is to write the patch and see how terribly it
> conflicts with pu. :)

As someone burned by coming up with renaming (or rather lack thereof),
I'd happily watch from the sideline this time.

I think this patch is good; ship it. :-)

Thanks,
Stefan


[PATCH 3/6] builtin/submodule--helper: factor out submodule updating

2018-07-18 Thread Stefan Beller
Separate the command line parsing from the actual execution of the command
within the repository. For now there is not a lot of execution as
most of it is still in git-submodule.sh.

Signed-off-by: Stefan Beller 
---
 builtin/submodule--helper.c | 59 +
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index ebcfe85bfa9..96929ba1096 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1472,6 +1472,8 @@ struct submodule_update_clone {
/* failed clones to be retried again */
const struct cache_entry **failed_clones;
int failed_clones_nr, failed_clones_alloc;
+
+   int max_jobs;
 };
 #define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \
SUBMODULE_UPDATE_STRATEGY_INIT, 0, 0, -1, STRING_LIST_INIT_DUP, 0, \
@@ -1714,11 +1716,36 @@ static int gitmodules_update_clone_config(const char 
*var, const char *value,
return 0;
 }
 
+static int update_submodules(struct submodule_update_clone *suc)
+{
+   struct string_list_item *item;
+
+   run_processes_parallel(suc->max_jobs,
+  update_clone_get_next_task,
+  update_clone_start_failure,
+  update_clone_task_finished,
+  suc);
+
+   /*
+* We saved the output and put it out all at once now.
+* That means:
+* - the listener does not have to interleave their (checkout)
+*   work with our fetching.  The writes involved in a
+*   checkout involve more straightforward sequential I/O.
+* - the listener can avoid doing any work if fetching failed.
+*/
+   if (suc->quickstop)
+   return 1;
+
+   for_each_string_list_item(item, >projectlines)
+   fprintf(stdout, "%s", item->string);
+
+   return 0;
+}
+
 static int update_clone(int argc, const char **argv, const char *prefix)
 {
const char *update = NULL;
-   int max_jobs = 1;
-   struct string_list_item *item;
struct pathspec pathspec;
struct submodule_update_clone suc = SUBMODULE_UPDATE_CLONE_INIT;
 
@@ -1740,7 +1767,7 @@ static int update_clone(int argc, const char **argv, 
const char *prefix)
OPT_STRING(0, "depth", , "",
   N_("Create a shallow clone truncated to the "
  "specified number of revisions")),
-   OPT_INTEGER('j', "jobs", _jobs,
+   OPT_INTEGER('j', "jobs", _jobs,
N_("parallel jobs")),
OPT_BOOL(0, "recommend-shallow", _shallow,
N_("whether the initial clone should follow the 
shallow recommendation")),
@@ -1756,8 +1783,8 @@ static int update_clone(int argc, const char **argv, 
const char *prefix)
};
suc.prefix = prefix;
 
-   config_from_gitmodules(gitmodules_update_clone_config, _jobs);
-   git_config(gitmodules_update_clone_config, _jobs);
+   config_from_gitmodules(gitmodules_update_clone_config, _jobs);
+   git_config(gitmodules_update_clone_config, _jobs);
 
argc = parse_options(argc, argv, prefix, module_update_clone_options,
 git_submodule_helper_usage, 0);
@@ -1772,27 +1799,7 @@ static int update_clone(int argc, const char **argv, 
const char *prefix)
if (pathspec.nr)
suc.warn_if_uninitialized = 1;
 
-   run_processes_parallel(max_jobs,
-  update_clone_get_next_task,
-  update_clone_start_failure,
-  update_clone_task_finished,
-  );
-
-   /*
-* We saved the output and put it out all at once now.
-* That means:
-* - the listener does not have to interleave their (checkout)
-*   work with our fetching.  The writes involved in a
-*   checkout involve more straightforward sequential I/O.
-* - the listener can avoid doing any work if fetching failed.
-*/
-   if (suc.quickstop)
-   return 1;
-
-   for_each_string_list_item(item, )
-   fprintf(stdout, "%s", item->string);
-
-   return 0;
+   return update_submodules();
 }
 
 static int resolve_relative_path(int argc, const char **argv, const char 
*prefix)
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 2/6] git-submodule.sh: rename unused variables

2018-07-18 Thread Stefan Beller
The 'mode' variable is not used in cmd_update for its original purpose,
rename it to 'dummy' as it only serves the purpose to abort quickly
documenting this knowledge.

The variable 'stage' is also not used any more in cmd_update, so remove it.

This went unnoticed as first each function used the commonly used
submodule listing, which was converted in 74703a1e4df (submodule: rewrite
`module_list` shell function in C, 2015-09-02). When cmd_update was
using its own function starting in 48308681b07 (git submodule update:
have a dedicated helper for cloning, 2016-02-29), its removal was missed.

A later patch in this series also touches the communication between
the submodule helper and git-submodule.sh, but let's have this as
a preparatory patch, as it eases the next patch, which stores the
raw data instead of the line printed for this communication.

Signed-off-by: Stefan Beller 
---
 builtin/submodule--helper.c | 5 ++---
 git-submodule.sh| 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 20ae9191ca3..ebcfe85bfa9 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1571,9 +1571,8 @@ static int prepare_to_clone_next_submodule(const struct 
cache_entry *ce,
needs_cloning = !file_exists(sb.buf);
 
strbuf_reset();
-   strbuf_addf(, "%06o %s %d %d\t%s\n", ce->ce_mode,
-   oid_to_hex(>oid), ce_stage(ce),
-   needs_cloning, ce->name);
+   strbuf_addf(, "dummy %s %d\t%s\n",
+   oid_to_hex(>oid), needs_cloning, ce->name);
string_list_append(>projectlines, sb.buf);
 
if (!needs_cloning)
diff --git a/git-submodule.sh b/git-submodule.sh
index 8a611865397..56588aa304d 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -531,9 +531,9 @@ cmd_update()
"$@" || echo "#unmatched" $?
} | {
err=
-   while read -r mode sha1 stage just_cloned sm_path
+   while read -r quickabort sha1 just_cloned sm_path
do
-   die_if_unmatched "$mode" "$sha1"
+   die_if_unmatched "$quickabort" "$sha1"
 
name=$(git submodule--helper name "$sm_path") || exit
if ! test -z "$update"
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 5/6] builtin/submodule--helper: factor out method to update a single submodule

2018-07-18 Thread Stefan Beller
In a later patch we'll find this method handy.

Signed-off-by: Stefan Beller 
---
 builtin/submodule--helper.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index fb54936efc7..034ba1bb2e0 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1725,10 +1725,17 @@ static int gitmodules_update_clone_config(const char 
*var, const char *value,
return 0;
 }
 
+static void update_submodule(struct update_clone_data *ucd)
+{
+   fprintf(stdout, "dummy %s %d\t%s\n",
+   oid_to_hex(>oid),
+   ucd->just_cloned,
+   ucd->sub->path);
+}
+
 static int update_submodules(struct submodule_update_clone *suc)
 {
int i;
-   struct strbuf sb = STRBUF_INIT;
 
run_processes_parallel(suc->max_jobs,
   update_clone_get_next_task,
@@ -1747,16 +1754,9 @@ static int update_submodules(struct 
submodule_update_clone *suc)
if (suc->quickstop)
return 1;
 
-   for (i = 0; i < suc->update_clone_nr; i++) {
-   strbuf_addf(, "dummy %s %d\t%s\n",
-   oid_to_hex(>update_clone[i].oid),
-   suc->update_clone[i].just_cloned,
-   suc->update_clone[i].sub->path);
-   fprintf(stdout, "%s", sb.buf);
-   strbuf_reset();
-   }
+   for (i = 0; i < suc->update_clone_nr; i++)
+   update_submodule(>update_clone[i]);
 
-   strbuf_release();
return 0;
 }
 
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 6/6] submodule--helper: introduce new update-module-mode helper

2018-07-18 Thread Stefan Beller
This chews off a bit of the shell part of the update command in
git-submodule.sh. When writing the C code, keep in mind that the
submodule--helper part will go away eventually and we want to have
a C function that is able to determine the submodule update strategy,
it as a nicety, make determine_submodule_update_strategy accessible
for arbitrary repositories.

Signed-off-by: Stefan Beller 
---
 builtin/submodule--helper.c | 61 +
 git-submodule.sh| 16 +-
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 034ba1bb2e0..c7de5d1e0a0 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1444,6 +1444,66 @@ static int module_clone(int argc, const char **argv, 
const char *prefix)
return 0;
 }
 
+static void determine_submodule_update_strategy(struct repository *r,
+   int just_cloned,
+   const char *path,
+   const char *update,
+   struct 
submodule_update_strategy *out)
+{
+   const struct submodule *sub = submodule_from_path(r, _oid, path);
+   char *key;
+   const char *val;
+
+   key = xstrfmt("submodule.%s.update", sub->name);
+
+   if (update) {
+   trace_printf("parsing update");
+   if (parse_submodule_update_strategy(update, out) < 0)
+   die(_("Invalid update mode '%s' for submodule path 
'%s'"),
+   update, path);
+   } else if (!repo_config_get_string_const(r, key, )) {
+   if (parse_submodule_update_strategy(val, out) < 0)
+   die(_("Invalid update mode '%s' configured for 
submodule path '%s'"),
+   val, path);
+   } else if (sub->update_strategy.type != SM_UPDATE_UNSPECIFIED) {
+   trace_printf("loaded thing");
+   out->type = sub->update_strategy.type;
+   out->command = sub->update_strategy.command;
+   } else
+   out->type = SM_UPDATE_CHECKOUT;
+
+   if (just_cloned &&
+   (out->type == SM_UPDATE_MERGE ||
+out->type == SM_UPDATE_REBASE ||
+out->type == SM_UPDATE_NONE))
+   out->type = SM_UPDATE_CHECKOUT;
+
+   free(key);
+}
+
+static int module_update_module_mode(int argc, const char **argv, const char 
*prefix)
+{
+   const char *path, *update = NULL;
+   int just_cloned;
+   struct submodule_update_strategy update_strategy = { .type = 
SM_UPDATE_CHECKOUT };
+
+   if (argc < 3 || argc > 4)
+   die("submodule--helper update-module-clone expects 
  []");
+
+   just_cloned = git_config_int("just_cloned", argv[1]);
+   path = argv[2];
+
+   if (argc == 4)
+   update = argv[3];
+
+   determine_submodule_update_strategy(the_repository,
+   just_cloned, path, update,
+   _strategy);
+   fputs(submodule_strategy_to_string(_strategy), stdout);
+
+   return 0;
+}
+
 struct update_clone_data {
const struct submodule *sub;
struct object_id oid;
@@ -2039,6 +2099,7 @@ static struct cmd_struct commands[] = {
{"list", module_list, 0},
{"name", module_name, 0},
{"clone", module_clone, 0},
+   {"update-module-mode", module_update_module_mode, 0},
{"update-clone", update_clone, 0},
{"relative-path", resolve_relative_path, 0},
{"resolve-relative-url", resolve_relative_url, 0},
diff --git a/git-submodule.sh b/git-submodule.sh
index 56588aa304d..215760898ce 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -535,27 +535,13 @@ cmd_update()
do
die_if_unmatched "$quickabort" "$sha1"
 
-   name=$(git submodule--helper name "$sm_path") || exit
-   if ! test -z "$update"
-   then
-   update_module=$update
-   else
-   update_module=$(git config submodule."$name".update)
-   if test -z "$update_module"
-   then
-   update_module="checkout"
-   fi
-   fi
+   update_module=$(git submodule--helper update-module-mode 
$just_cloned "$sm_path" $update)
 
displaypath=$(git submodule--helper relative-path 
"$prefix$sm_path" "$wt_prefix")
 
if test $just_cloned -e

[PATCH 4/6] builtin/submodule--helper: store update_clone information in a struct

2018-07-18 Thread Stefan Beller
The information that is printed for update_submodules in
'submodule--helper update-clone' and consumed by 'git submodule update'
is stored as a string per submodule. This made sense at the time of
48308681b07 (git submodule update: have a dedicated helper for cloning,
2016-02-29), but as we want to migrate the rest of the submodule update
into C, we're better off having access to the raw information in a helper
struct.

Signed-off-by: Stefan Beller 
---
 builtin/submodule--helper.c | 37 +++--
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 96929ba1096..fb54936efc7 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1444,6 +1444,12 @@ static int module_clone(int argc, const char **argv, 
const char *prefix)
return 0;
 }
 
+struct update_clone_data {
+   const struct submodule *sub;
+   struct object_id oid;
+   unsigned just_cloned;
+};
+
 struct submodule_update_clone {
/* index into 'list', the list of submodules to look into for cloning */
int current;
@@ -1463,8 +1469,9 @@ struct submodule_update_clone {
const char *recursive_prefix;
const char *prefix;
 
-   /* Machine-readable status lines to be consumed by git-submodule.sh */
-   struct string_list projectlines;
+   /* to be consumed by git-submodule.sh */
+   struct update_clone_data *update_clone;
+   int update_clone_nr; int update_clone_alloc;
 
/* If we want to stop as fast as possible and return an error */
unsigned quickstop : 1;
@@ -1478,7 +1485,7 @@ struct submodule_update_clone {
 #define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \
SUBMODULE_UPDATE_STRATEGY_INIT, 0, 0, -1, STRING_LIST_INIT_DUP, 0, \
NULL, NULL, NULL, \
-   STRING_LIST_INIT_DUP, 0, NULL, 0, 0}
+   NULL, 0, 0, 0, NULL, 0, 0, 0}
 
 
 static void next_submodule_warn_missing(struct submodule_update_clone *suc,
@@ -1572,10 +1579,12 @@ static int prepare_to_clone_next_submodule(const struct 
cache_entry *ce,
strbuf_addf(, "%s/.git", ce->name);
needs_cloning = !file_exists(sb.buf);
 
-   strbuf_reset();
-   strbuf_addf(, "dummy %s %d\t%s\n",
-   oid_to_hex(>oid), needs_cloning, ce->name);
-   string_list_append(>projectlines, sb.buf);
+   ALLOC_GROW(suc->update_clone, suc->update_clone_nr + 1,
+  suc->update_clone_alloc);
+   oidcpy(>update_clone[suc->update_clone_nr].oid, >oid);
+   suc->update_clone[suc->update_clone_nr].just_cloned = needs_cloning;
+   suc->update_clone[suc->update_clone_nr].sub = sub;
+   suc->update_clone_nr++;
 
if (!needs_cloning)
goto cleanup;
@@ -1718,7 +1727,8 @@ static int gitmodules_update_clone_config(const char 
*var, const char *value,
 
 static int update_submodules(struct submodule_update_clone *suc)
 {
-   struct string_list_item *item;
+   int i;
+   struct strbuf sb = STRBUF_INIT;
 
run_processes_parallel(suc->max_jobs,
   update_clone_get_next_task,
@@ -1737,9 +1747,16 @@ static int update_submodules(struct 
submodule_update_clone *suc)
if (suc->quickstop)
return 1;
 
-   for_each_string_list_item(item, >projectlines)
-   fprintf(stdout, "%s", item->string);
+   for (i = 0; i < suc->update_clone_nr; i++) {
+   strbuf_addf(, "dummy %s %d\t%s\n",
+   oid_to_hex(>update_clone[i].oid),
+   suc->update_clone[i].just_cloned,
+   suc->update_clone[i].sub->path);
+   fprintf(stdout, "%s", sb.buf);
+   strbuf_reset();
+   }
 
+   strbuf_release();
return 0;
 }
 
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 0/6] Resend of origin/sb/submodule-update-in-c

2018-07-18 Thread Stefan Beller
This fixes the compilation issue raised by SZEDER and Stolee,


6:  3c156c79ae7 ! 6:  f82f24e73b6 submodule--helper: introduce new 
update-module-mode helper
@@ -10,7 +10,6 @@
 for arbitrary repositories.
 
 Signed-off-by: Stefan Beller 
-Signed-off-by: Junio C Hamano 
 
 diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
 --- a/builtin/submodule--helper.c
@@ -74,7 +73,7 @@
 +  determine_submodule_update_strategy(the_repository,
 +  just_cloned, path, update,
 +  _strategy);
-+  fprintf(stdout, submodule_strategy_to_string(_strategy));
++  fputs(submodule_strategy_to_string(_strategy), stdout);
 +
 +  return 0;
 +}

Thanks,
Stefan


[PATCH 1/6] git-submodule.sh: align error reporting for update mode to use path

2018-07-18 Thread Stefan Beller
All other error messages in cmd_update are reporting the submodule based
on its path, so let's do that for invalid update modes, too.

Signed-off-by: Stefan Beller 
---
 git-submodule.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/git-submodule.sh b/git-submodule.sh
index 5f9d9f6ea37..8a611865397 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -627,7 +627,7 @@ cmd_update()
must_die_on_failure=yes
;;
*)
-   die "$(eval_gettext "Invalid update mode 
'$update_module' for submodule '$name'")"
+   die "$(eval_gettext "Invalid update mode 
'$update_module' for submodule path '$path'")"
esac
 
if (sanitize_submodule_env; cd "$sm_path" && $command 
"$sha1")
-- 
2.18.0.233.g985f88cf7e-goog



Re: [PATCH 6/6] submodule--helper: introduce new update-module-mode helper

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 8:00 AM Derrick Stolee  wrote:
>
> On 7/12/2018 3:47 PM, Stefan Beller wrote:
> > + fprintf(stdout, submodule_strategy_to_string(_strategy));
>
> This line is causing build failures on 'pu' for certain setups:
>

yeah, will fix in a resend.
originally reported at
https://public-inbox.org/git/20180717075959.30594-1-szeder@gmail.com/


Re: [PATCH v2 0/6] git-submodule.sh: convert part of cmd_update to C

2018-07-18 Thread Stefan Beller
Hi Eric,
On Tue, Jul 17, 2018 at 11:59 AM Eric Sunshine  wrote:
>
> On Tue, Jul 17, 2018 at 2:53 PM Stefan Beller  wrote:
> > > A tangent.
> > >
> > > Because this "-- " is a conventional signature separator, MUAs like
> > > Emacs message-mode seems to omit everything below it from the quote
> > > while responding, making it cumbersome to comment on the tbdiff.
> > >
> > > Something to think about if somebody is contemplating on adding more
> > > to format-patch's cover letter.
> >
> > +cc Eric who needs to think about this tangent, then.
> > https://public-inbox.org/git/20180530080325.37520-1-sunsh...@sunshineco.com/
>
> The "git-format-patch --range-diff" option implemented by that patch
> series (and its upcoming re-roll) place the range-diff before the "--
> " signature line, so this isn't a problem.
>
> I think Junio's tangent was targeted more at humans blindly plopping
> the copy/pasted range-diff at the end of the cover letter without
> taking the "-- " signature line into account. (Indeed, Gmail hides
> everything below the "-- " line by default, as well.)

Awesome thanks! (I missed that hint given by Junio)

Now that I grow more accustomed to range-diffs, I wonder
if we want to have them even *before* the short stat in the
cover letter. (I usually scroll over the short stat just to take it
as a FYI on what to expect, whereas the range-diff can already
be reviewed, hence seems more useful that the stats)

Thanks,
Stefan


[PATCH 08/10] diff.c: factor advance_or_nullify out of mark_color_as_moved

2018-07-18 Thread Stefan Beller
This moves the part of code that checks if we're still in a block
into its own function.  We'll need a different approach on advancing
the blocks in a later patch, so having it as a separate function will
prove useful.

While at it rename the variable `p` to `prev` to indicate that it refers
to the previous line. This is as pmb[i] was assigned in the last iteration
of the outmost for loop.

Further rename `pnext` to `cur` to indicate that this should match up with
the current line of the outmost for loop.

Also replace the advancement of pmb[i] to reuse `cur` instead of
using `p->next` (which is how the name for pnext could be explained.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 32 
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/diff.c b/diff.c
index 70eeb40c5fd..4963819e530 100644
--- a/diff.c
+++ b/diff.c
@@ -801,6 +801,25 @@ static void add_lines_to_move_detection(struct 
diff_options *o,
}
 }
 
+static void pmb_advance_or_null(struct diff_options *o,
+   struct moved_entry *match,
+   struct hashmap *hm,
+   struct moved_entry **pmb,
+   int pmb_nr)
+{
+   int i;
+   for (i = 0; i < pmb_nr; i++) {
+   struct moved_entry *prev = pmb[i];
+   struct moved_entry *cur = (prev && prev->next_line) ?
+   prev->next_line : NULL;
+   if (cur && !hm->cmpfn(o, cur, match, NULL)) {
+   pmb[i] = cur;
+   } else {
+   pmb[i] = NULL;
+   }
+   }
+}
+
 static int shrink_potential_moved_blocks(struct moved_entry **pmb,
 int pmb_nr)
 {
@@ -875,7 +894,6 @@ static void mark_color_as_moved(struct diff_options *o,
struct moved_entry *key;
struct moved_entry *match = NULL;
struct emitted_diff_symbol *l = >emitted_symbols->buf[n];
-   int i;
 
switch (l->s) {
case DIFF_SYMBOL_PLUS:
@@ -906,17 +924,7 @@ static void mark_color_as_moved(struct diff_options *o,
if (o->color_moved == COLOR_MOVED_PLAIN)
continue;
 
-   /* Check any potential block runs, advance each or nullify */
-   for (i = 0; i < pmb_nr; i++) {
-   struct moved_entry *p = pmb[i];
-   struct moved_entry *pnext = (p && p->next_line) ?
-   p->next_line : NULL;
-   if (pnext && !hm->cmpfn(o, pnext, match, NULL)) {
-   pmb[i] = p->next_line;
-   } else {
-   pmb[i] = NULL;
-   }
-   }
+   pmb_advance_or_null(o, match, hm, pmb, pmb_nr);
 
pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr);
 
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 02/10] xdiff/xdiffi.c: remove unneeded function declarations

2018-07-18 Thread Stefan Beller
There is no need to forward-declare these functions, as they are used
after their implementation only.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 xdiff/xdiffi.c | 17 -
 1 file changed, 17 deletions(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 0de1ef463bf..3e8aff92bc4 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -22,34 +22,17 @@
 
 #include "xinclude.h"
 
-
-
 #define XDL_MAX_COST_MIN 256
 #define XDL_HEUR_MIN_COST 256
 #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
 #define XDL_SNAKE_CNT 20
 #define XDL_K_HEUR 4
 
-
-
 typedef struct s_xdpsplit {
long i1, i2;
int min_lo, min_hi;
 } xdpsplit_t;
 
-
-
-
-static long xdl_split(unsigned long const *ha1, long off1, long lim1,
- unsigned long const *ha2, long off2, long lim2,
- long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
- xdalgoenv_t *xenv);
-static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long 
chg1, long chg2);
-
-
-
-
-
 /*
  * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
  * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 05/10] diff.c: adjust hash function signature to match hashmap expectation

2018-07-18 Thread Stefan Beller
This makes the follow up patch easier.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/diff.c b/diff.c
index ce7bedc1b92..d1bae900cdc 100644
--- a/diff.c
+++ b/diff.c
@@ -707,11 +707,15 @@ struct moved_entry {
struct moved_entry *next_line;
 };
 
-static int moved_entry_cmp(const struct diff_options *diffopt,
-  const struct moved_entry *a,
-  const struct moved_entry *b,
+static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
+  const void *entry,
+  const void *entry_or_key,
   const void *keydata)
 {
+   const struct diff_options *diffopt = hashmap_cmp_fn_data;
+   const struct moved_entry *a = entry;
+   const struct moved_entry *b = entry_or_key;
+
return !xdiff_compare_lines(a->es->line, a->es->len,
b->es->line, b->es->len,
diffopt->xdl_opts);
@@ -5534,10 +5538,8 @@ static void diff_flush_patch_all_file_pairs(struct 
diff_options *o)
if (o->color_moved) {
struct hashmap add_lines, del_lines;
 
-   hashmap_init(_lines,
-(hashmap_cmp_fn)moved_entry_cmp, o, 0);
-   hashmap_init(_lines,
-(hashmap_cmp_fn)moved_entry_cmp, o, 0);
+   hashmap_init(_lines, moved_entry_cmp, o, 0);
+   hashmap_init(_lines, moved_entry_cmp, o, 0);
 
add_lines_to_move_detection(o, _lines, _lines);
mark_color_as_moved(o, _lines, _lines);
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 03/10] t4015: avoid git as a pipe input

2018-07-18 Thread Stefan Beller
In t4015 we have a pattern of

git diff [] |
grep -v "index" |
test_decode_color >actual &&

to produce output that we want to test against. This pattern was introduced
in 86b452e2769 (diff.c: add dimming to moved line detection, 2017-06-30)
as then the focus on getting the colors right. However the pattern used
is not best practice as we do care about the exit code of Git. So let's
not have Git as the upstream of a pipe. Piping the output of grep to
some function is fine as we assume grep to be un-flawed in our test suite.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 t/t4015-diff-whitespace.sh | 50 +++---
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index 17df491a3ab..ddbc3901385 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -1271,9 +1271,8 @@ test_expect_success 'detect permutations inside moved 
code -- dimmed_zebra' '
test_config color.diff.newMovedDimmed "normal cyan" &&
test_config color.diff.oldMovedAlternativeDimmed "normal blue" &&
test_config color.diff.newMovedAlternativeDimmed "normal yellow" &&
-   git diff HEAD --no-renames --color-moved=dimmed_zebra --color |
-   grep -v "index" |
-   test_decode_color >actual &&
+   git diff HEAD --no-renames --color-moved=dimmed_zebra --color 
>actual.raw &&
+   grep -v "index" actual.raw | test_decode_color >actual &&
cat <<-\EOF >expected &&
diff --git a/lines.txt b/lines.txt
--- a/lines.txt
@@ -1315,9 +1314,8 @@ test_expect_success 'cmd option assumes configured 
colored-moved' '
test_config color.diff.oldMovedAlternativeDimmed "normal blue" &&
test_config color.diff.newMovedAlternativeDimmed "normal yellow" &&
test_config diff.colorMoved zebra &&
-   git diff HEAD --no-renames --color-moved --color |
-   grep -v "index" |
-   test_decode_color >actual &&
+   git diff HEAD --no-renames --color-moved --color >actual.raw &&
+   grep -v "index" actual.raw | test_decode_color >actual &&
cat <<-\EOF >expected &&
diff --git a/lines.txt b/lines.txt
--- a/lines.txt
@@ -1395,9 +1393,8 @@ test_expect_success 'move detection ignoring whitespace ' 
'
line 4
line 5
EOF
-   git diff HEAD --no-renames --color-moved --color |
-   grep -v "index" |
-   test_decode_color >actual &&
+   git diff HEAD --no-renames --color-moved --color >actual.raw &&
+   grep -v "index" actual.raw | test_decode_color >actual &&
cat <<-\EOF >expected &&
diff --git a/lines.txt b/lines.txt
--- a/lines.txt
@@ -1419,9 +1416,8 @@ test_expect_success 'move detection ignoring whitespace ' 
'
EOF
test_cmp expected actual &&
 
-   git diff HEAD --no-renames -w --color-moved --color |
-   grep -v "index" |
-   test_decode_color >actual &&
+   git diff HEAD --no-renames -w --color-moved --color >actual.raw &&
+   grep -v "index" actual.raw | test_decode_color >actual &&
cat <<-\EOF >expected &&
diff --git a/lines.txt b/lines.txt
--- a/lines.txt
@@ -1459,9 +1455,8 @@ test_expect_success 'move detection ignoring whitespace 
changes' '
line 5
EOF
 
-   git diff HEAD --no-renames --color-moved --color |
-   grep -v "index" |
-   test_decode_color >actual &&
+   git diff HEAD --no-renames --color-moved --color >actual.raw &&
+   grep -v "index" actual.raw | test_decode_color >actual &&
cat <<-\EOF >expected &&
diff --git a/lines.txt b/lines.txt
--- a/lines.txt
@@ -1483,9 +1478,8 @@ test_expect_success 'move detection ignoring whitespace 
changes' '
EOF
test_cmp expected actual &&
 
-   git diff HEAD --no-renames -b --color-moved --color |
-   grep -v "index" |
-   test_decode_color >actual &&
+   git diff HEAD --no-renames -b --color-moved --color >actual.raw &&
+   grep -v "index" actual.raw | test_decode_color >actual &&
cat <<-\EOF >expected &&
diff --git a/lines.txt b/lines.txt
--- a/lines.txt
@@ -1526,9 +1520,8 @@ test_expect_success 'move detection ignoring whitespace 
at eol' '
# avoid clutt

[PATCH 04/10] diff.c: do not pass diff options as keydata to hashmap

2018-07-18 Thread Stefan Beller
When we initialize the hashmap, we give it a pointer to the
diff_options, which it then passes along to each call of the
hashmap_cmp_fn function. There's no need to pass it a second time as
the "keydata" parameter, and our comparison functions never look at
keydata.

This was a mistake left over from an earlier round of 2e2d5ac184
(diff.c: color moved lines differently, 2017-06-30), before hashmap
learned to pass the data pointer for us.

Explanation-by: Jeff King 
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 1289df4b1f9..ce7bedc1b92 100644
--- a/diff.c
+++ b/diff.c
@@ -842,13 +842,13 @@ static void mark_color_as_moved(struct diff_options *o,
case DIFF_SYMBOL_PLUS:
hm = del_lines;
key = prepare_entry(o, n);
-   match = hashmap_get(hm, key, o);
+   match = hashmap_get(hm, key, NULL);
free(key);
break;
case DIFF_SYMBOL_MINUS:
hm = add_lines;
key = prepare_entry(o, n);
-   match = hashmap_get(hm, key, o);
+   match = hashmap_get(hm, key, NULL);
free(key);
break;
default:
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 09/10] diff.c: add white space mode to move detection that allows indent changes

2018-07-18 Thread Stefan Beller
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.

To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough.  However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".

As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.

This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.

As this is a white space mode, it is made exclusive to other white space
modes in the move detection.

This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):

 - A
 - B
 - C
 +A
 +B
 +C

At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.

Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:

 -  A
 ...
 + A 
 ...

As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:

 - A
 - B
 -   A
 -   B
 +A
 +B
 +  A
 +  B

When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 Documentation/diff-options.txt |   5 ++
 diff.c | 158 -
 diff.h |   3 +
 t/t4015-diff-whitespace.sh |  88 ++
 4 files changed, 252 insertions(+), 2 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 80e29e39854..143acd9417e 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -307,6 +307,11 @@ ignore-space-change::
 ignore-all-space::
Ignore whitespace when comparing lines. This ignores differences
even if one line has whitespace where the other line has none.
+allow-indentation-change::
+   Initially ignore any white spaces in the move detection, then
+   group the moved code blocks only into a block if the change in
+   whitespace is the same per line. This is incompatible with the
+   other modes.
 --
 
 --word-diff[=]::
diff --git a/diff.c b/diff.c
index 4963819e530..7810a4733f8 100644
--- a/diff.c
+++ b/diff.c
@@ -302,12 +302,18 @@ static int parse_color_moved_ws(const char *arg)
ret |= XDF_IGNORE_WHITESPACE_AT_EOL;
else if (!strcmp(sb.buf, "ignore-all-space"))
ret |= XDF_IGNORE_WHITESPACE;
+   else if (!strcmp(sb.buf, "allow-indentation-change"))
+   ret |= COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE;
else
error(_("ignoring unknown color-moved-ws mode '%s'"), 
sb.buf);
 
strbuf_release();
}
 
+   if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) &&
+   (ret & XDF_WHITESPACE_FLAGS))
+   die(_("color-moved-ws: allow-indentation-change cannot be 
combined with other white space modes"));
+
string_list_clear(, 0);
 
return ret;
@@ -737,7 +743,91 @@ struct moved_entry {
struct hashmap_entry ent;
const struct emitted_diff_symbol *es;
struct moved_entry *next_line;
+   struct ws_delta *wsd;
+};
+
+/**
+ * The struct ws_delta holds white space differences between moved lines, i.e.
+ * between '+' and '-' lines that have been detected to be a move.
+ * The string co

[PATCH 07/10] diff.c: decouple white space treatment from move detection algorithm

2018-07-18 Thread Stefan Beller
In the original implementation of the move detection logic the choice for
ignoring white space changes is the same for the move detection as it is
for the regular diff.  Some cases came up where different treatment would
have been nice.

Allow the user to specify that white space should be ignored differently
during detection of moved lines than during generation of added and removed
lines. This is done by providing analogs to the --ignore-space-at-eol,
-b, and -w options by introducing the option --color-moved-ws=
with the modes named "ignore-space-at-eol", "ignore-space-change" and
"ignore-all-space", which is used only during the move detection phase.

As we change the default, we'll adjust the tests.

For now we do not infer any options to treat white spaces in the move
detection from the generic white space options given to diff.
This can be tuned later to reasonable default.

As we plan on adding more white space related options in a later patch,
that interferes with the current white space options, use a flag field
and clamp it down to  XDF_WHITESPACE_FLAGS, as that (a) allows to easily
check at parse time if we give invalid combinations and (b) can reuse
parts of this patch.

By having the white space treatment in its own option, we'll also
make it easier for a later patch to have an config option for
spaces in the move detection.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 Documentation/diff-options.txt | 17 ++
 diff.c | 39 +++--
 diff.h |  1 +
 t/t4015-diff-whitespace.sh | 62 +++---
 4 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index ba56169de31..80e29e39854 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -292,6 +292,23 @@ dimmed_zebra::
blocks are considered interesting, the rest is uninteresting.
 --
 
+--color-moved-ws=::
+   This configures how white spaces are ignored when performing the
+   move detection for `--color-moved`. These modes can be given
+   as a comma separated list:
++
+--
+ignore-space-at-eol::
+   Ignore changes in whitespace at EOL.
+ignore-space-change::
+   Ignore changes in amount of whitespace.  This ignores whitespace
+   at line end, and considers all other sequences of one or
+   more whitespace characters to be equivalent.
+ignore-all-space::
+   Ignore whitespace when comparing lines. This ignores differences
+   even if one line has whitespace where the other line has none.
+--
+
 --word-diff[=]::
Show a word diff, using the  to delimit changed words.
By default, words are delimited by whitespace; see
diff --git a/diff.c b/diff.c
index 95c51c0b7df..70eeb40c5fd 100644
--- a/diff.c
+++ b/diff.c
@@ -283,6 +283,36 @@ static int parse_color_moved(const char *arg)
return error(_("color moved setting must be one of 'no', 
'default', 'blocks', 'zebra', 'dimmed_zebra', 'plain'"));
 }
 
+static int parse_color_moved_ws(const char *arg)
+{
+   int ret = 0;
+   struct string_list l = STRING_LIST_INIT_DUP;
+   struct string_list_item *i;
+
+   string_list_split(, arg, ',', -1);
+
+   for_each_string_list_item(i, ) {
+   struct strbuf sb = STRBUF_INIT;
+   strbuf_addstr(, i->string);
+   strbuf_trim();
+
+   if (!strcmp(sb.buf, "ignore-space-change"))
+   ret |= XDF_IGNORE_WHITESPACE_CHANGE;
+   else if (!strcmp(sb.buf, "ignore-space-at-eol"))
+   ret |= XDF_IGNORE_WHITESPACE_AT_EOL;
+   else if (!strcmp(sb.buf, "ignore-all-space"))
+   ret |= XDF_IGNORE_WHITESPACE;
+   else
+   error(_("ignoring unknown color-moved-ws mode '%s'"), 
sb.buf);
+
+   strbuf_release();
+   }
+
+   string_list_clear(, 0);
+
+   return ret;
+}
+
 int git_diff_ui_config(const char *var, const char *value, void *cb)
 {
if (!strcmp(var, "diff.color") || !strcmp(var, "color.diff")) {
@@ -717,10 +747,12 @@ static int moved_entry_cmp(const void 
*hashmap_cmp_fn_data,
const struct diff_options *diffopt = hashmap_cmp_fn_data;
const struct moved_entry *a = entry;
const struct moved_entry *b = entry_or_key;
+   unsigned flags = diffopt->color_moved_ws_handling
+& XDF_WHITESPACE_FLAGS;
 
return !xdiff_compare_lines(a->es->line, a->es->len,
b->es->line, b->es->len,
-   diffopt->xdl_opts);
+   flags);
 }
 
 static struct moved_entry *prepare_entry(struct diff_options *o,
@@ -7

[PATCH 06/10] diff.c: add a blocks mode for moved code detection

2018-07-18 Thread Stefan Beller
The new "blocks" mode provides a middle ground between plain and zebra.
It is as intuitive (few colors) as plain, but still has the requirement
for a minimum of lines/characters to count a block as moved.

Suggested-by: Ævar Arnfjörð Bjarmason 
 (https://public-inbox.org/git/87o9j0uljo@evledraar.gmail.com/)
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 Documentation/diff-options.txt |  8 --
 diff.c |  6 +++--
 diff.h |  5 ++--
 t/t4015-diff-whitespace.sh | 49 --
 4 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index e3a44f03cdc..ba56169de31 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -276,10 +276,14 @@ plain::
that are added somewhere else in the diff. This mode picks up any
moved line, but it is not very useful in a review to determine
if a block of code was moved without permutation.
-zebra::
+blocks::
Blocks of moved text of at least 20 alphanumeric characters
are detected greedily. The detected blocks are
-   painted using either the 'color.diff.{old,new}Moved' color or
+   painted using either the 'color.diff.{old,new}Moved' color.
+   Adjacent blocks cannot be told apart.
+zebra::
+   Blocks of moved text are detected as in 'blocks' mode. The blocks
+   are painted using either the 'color.diff.{old,new}Moved' color or
'color.diff.{old,new}MovedAlternative'. The change between
the two colors indicates that a new block was detected.
 dimmed_zebra::
diff --git a/diff.c b/diff.c
index d1bae900cdc..95c51c0b7df 100644
--- a/diff.c
+++ b/diff.c
@@ -271,6 +271,8 @@ static int parse_color_moved(const char *arg)
return COLOR_MOVED_NO;
else if (!strcmp(arg, "plain"))
return COLOR_MOVED_PLAIN;
+   else if (!strcmp(arg, "blocks"))
+   return COLOR_MOVED_BLOCKS;
else if (!strcmp(arg, "zebra"))
return COLOR_MOVED_ZEBRA;
else if (!strcmp(arg, "default"))
@@ -278,7 +280,7 @@ static int parse_color_moved(const char *arg)
else if (!strcmp(arg, "dimmed_zebra"))
return COLOR_MOVED_ZEBRA_DIM;
else
-   return error(_("color moved setting must be one of 'no', 
'default', 'zebra', 'dimmed_zebra', 'plain'"));
+   return error(_("color moved setting must be one of 'no', 
'default', 'blocks', 'zebra', 'dimmed_zebra', 'plain'"));
 }
 
 int git_diff_ui_config(const char *var, const char *value, void *cb)
@@ -903,7 +905,7 @@ static void mark_color_as_moved(struct diff_options *o,
 
block_length++;
 
-   if (flipped_block)
+   if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS)
l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT;
}
adjust_last_block(o, n, block_length);
diff --git a/diff.h b/diff.h
index d29560f822c..7bd4f182c33 100644
--- a/diff.h
+++ b/diff.h
@@ -208,8 +208,9 @@ struct diff_options {
enum {
COLOR_MOVED_NO = 0,
COLOR_MOVED_PLAIN = 1,
-   COLOR_MOVED_ZEBRA = 2,
-   COLOR_MOVED_ZEBRA_DIM = 3,
+   COLOR_MOVED_BLOCKS = 2,
+   COLOR_MOVED_ZEBRA = 3,
+   COLOR_MOVED_ZEBRA_DIM = 4,
} color_moved;
#define COLOR_MOVED_DEFAULT COLOR_MOVED_ZEBRA
#define COLOR_MOVED_MIN_ALNUM_COUNT 20
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index ddbc3901385..e54529f026d 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -1223,7 +1223,7 @@ test_expect_success 'plain moved code, inside file' '
test_cmp expected actual
 '
 
-test_expect_success 'detect permutations inside moved code -- dimmed_zebra' '
+test_expect_success 'detect blocks of moved code' '
git reset --hard &&
cat <<-\EOF >lines.txt &&
long line 1
@@ -1271,6 +1271,50 @@ test_expect_success 'detect permutations inside moved 
code -- dimmed_zebra' '
test_config color.diff.newMovedDimmed "normal cyan" &&
test_config color.diff.oldMovedAlternativeDimmed "normal blue" &&
test_config color.diff.newMovedAlternativeDimmed "normal yellow" &&
+   git diff HEAD --no-renames --color-moved=blocks --color >actual.raw &&
+   grep -v "index" actual.raw | test_decode_color >actual &&
+   cat <<-\EOF >expected &&
+   diff --git a/lines.txt b/lines.txt
+   --- a/lines.txt
+   +++ b/lines.txt
+   @@ -1,16 +1,16 @@
+   -long line 1
+   -long line 2
+   -long line 3
+line 4
+  

[PATCH 10/10] diff.c: offer config option to control ws handling in move detection

2018-07-18 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 Documentation/config.txt   | 5 +
 Documentation/diff-options.txt | 7 +--
 diff.c | 9 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2659153cb37..6ca7118b018 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1122,6 +1122,11 @@ diff.colorMoved::
true the default color mode will be used. When set to false,
moved lines are not colored.
 
+diff.colorMovedWS::
+   When moved lines are colored using e.g. the `diff.colorMoved` setting,
+   this option controls the `` how spaces are treated
+   for details of valid modes see '--color-moved-ws' in 
linkgit:git-diff[1].
+
 color.diff.::
Use customized color for diff colorization.  `` specifies
which part of the patch to use the specified color, and is one
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 143acd9417e..8da7fed4e22 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -294,8 +294,11 @@ dimmed_zebra::
 
 --color-moved-ws=::
This configures how white spaces are ignored when performing the
-   move detection for `--color-moved`. These modes can be given
-   as a comma separated list:
+   move detection for `--color-moved`.
+ifdef::git-diff[]
+   It can be set by the `diff.colorMovedWS` configuration setting.
+endif::git-diff[]
+   These modes can be given as a comma separated list:
 +
 --
 ignore-space-at-eol::
diff --git a/diff.c b/diff.c
index 7810a4733f8..5089c6eb3a4 100644
--- a/diff.c
+++ b/diff.c
@@ -35,6 +35,7 @@ static int diff_rename_limit_default = 400;
 static int diff_suppress_blank_empty;
 static int diff_use_color_default = -1;
 static int diff_color_moved_default;
+static int diff_color_moved_ws_default;
 static int diff_context_default = 3;
 static int diff_interhunk_context_default;
 static const char *diff_word_regex_cfg;
@@ -332,6 +333,13 @@ int git_diff_ui_config(const char *var, const char *value, 
void *cb)
diff_color_moved_default = cm;
return 0;
}
+   if (!strcmp(var, "diff.colormovedws")) {
+   int cm = parse_color_moved_ws(value);
+   if (cm < 0)
+   return -1;
+   diff_color_moved_ws_default = cm;
+   return 0;
+   }
if (!strcmp(var, "diff.context")) {
diff_context_default = git_config_int(var, value);
if (diff_context_default < 0)
@@ -4327,6 +4335,7 @@ void diff_setup(struct diff_options *options)
}
 
options->color_moved = diff_color_moved_default;
+   options->color_moved_ws_handling = diff_color_moved_ws_default;
 }
 
 void diff_setup_done(struct diff_options *options)
-- 
2.18.0.233.g985f88cf7e-goog



[PATCHv6 00/10] Reroll of sb/diff-color-move-more

2018-07-18 Thread Stefan Beller
v6:
* fixed issues hinted at by Andrei, thanks! (range-diff below)
* incorporates the new config option, sent separately previously.

v5:
This is a resend of sb/diff-color-move-more
https://public-inbox.org/git/20180629001958.85143-1-sbel...@google.com/
that fixes an errornous squashing within the series; the end result is
the same. range diff is below. (As the latest cooking email said
this series is going to land in next soon, I hope this is not too late
of a resend; otherwise just ignore it as the end result is the same)

Thanks,
Stefan


Stefan Beller (10):
  xdiff/xdiff.h: remove unused flags
  xdiff/xdiffi.c: remove unneeded function declarations
  t4015: avoid git as a pipe input
  diff.c: do not pass diff options as keydata to hashmap
  diff.c: adjust hash function signature to match hashmap expectation
  diff.c: add a blocks mode for moved code detection
  diff.c: decouple white space treatment from move detection algorithm
  diff.c: factor advance_or_nullify out of mark_color_as_moved
  diff.c: add white space mode to move detection that allows indent
changes
  diff.c: offer config option to control ws handling in move detection

 Documentation/config.txt   |   5 +
 Documentation/diff-options.txt |  33 -
 diff.c | 262 +
 diff.h |   9 +-
 t/t4015-diff-whitespace.sh | 243 +-
 xdiff/xdiff.h  |   8 -
 xdiff/xdiffi.c |  17 ---
 7 files changed, 489 insertions(+), 88 deletions(-)


1:  a512cd40cae ! 1:  aabbc4e8aff diff.c: add white space mode to move 
detection that allows indent changes
@@ -27,8 +27,8 @@
 modes in the move detection.
 
 This patch brings some challenges, related to the detection of blocks.
-We need a white net the catch the possible moved lines, but then need 
to
-narrow down to check if the blocks are still in tact. Consider this
+We need a wide net to catch the possible moved lines, but then need to
+narrow down to check if the blocks are still intact. Consider this
 example (ignoring block sizes):
 
  - A
@@ -254,7 +254,7 @@
 +  }
 +  }
 +
-+  for (i = 0; i next_line->wsd = pmb[i]->wsd;
-:  --- > 2:  f80fbe78d9b diff.c: offer config option to control ws 
handling in move detection


[PATCH 01/10] xdiff/xdiff.h: remove unused flags

2018-07-18 Thread Stefan Beller
These flags were there since the beginning (3443546f6e (Use a *real*
built-in diff generator, 2006-03-24), but were never used. Remove them.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 xdiff/xdiff.h | 8 
 1 file changed, 8 deletions(-)

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index c1937a29112..2356da5f784 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -52,14 +52,6 @@ extern "C" {
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
 
-#define XDL_MMB_READONLY (1 << 0)
-
-#define XDL_MMF_ATOMIC (1 << 0)
-
-#define XDL_BDOP_INS 1
-#define XDL_BDOP_CPY 2
-#define XDL_BDOP_INSB 3
-
 /* merge simplification levels */
 #define XDL_MERGE_MINIMAL 0
 #define XDL_MERGE_EAGER 1
-- 
2.18.0.233.g985f88cf7e-goog



Re: [PATCH 2/8] refs.c: upgrade for_each_replace_ref to be a each_repo_ref_fn callback

2018-07-18 Thread Stefan Beller
> > @@ -317,7 +317,7 @@ int for_each_fullref_in(const char *prefix, each_ref_fn 
> > fn, void *cb_data,
> >  int for_each_tag_ref(each_ref_fn fn, void *cb_data);
> >  int for_each_branch_ref(each_ref_fn fn, void *cb_data);
> >  int for_each_remote_ref(each_ref_fn fn, void *cb_data);
> > -int for_each_replace_ref(struct repository *r, each_ref_fn fn, void 
> > *cb_data);
> > +int for_each_replace_ref(struct repository *r, each_repo_ref_fn fn, void 
> > *cb_data);
>
> With a signature change like this, any change that introduces new
> call to for_each_replace_ref() using eac_ref_fn() would get
> compilation error, so this is minimally correct.
>
> Two things that bothersome are that
>
>  - for_each_tag/branch/remote_ref() and for_each_replace_ref() now
>work and look quite differently.

Yes, this series is not finished; we need to convert/upgrade for_each_tag
et al, too.

>  - existing users of for_each_replace_ref() who were all happy
>working in the_repository have to pass it explicitly, even
>thought they do not have any need to.

All callbacks that are passed to for_each_replace_ref now
operate on 'r' instead of the_repository, and that actually fixes
a bug (commit message is lacking), but the cover letter hints at:

While building this series, I got some test failures in the
non-the_repository tests. These issues are related to missing
references to an arbitrary repository (instead of the_repository)
and some uninitialized values in the tests. Stefan already sent
a patch to address this [2], and I've included those commits
(along with a small tweak [3]). These are only included for
convenience.

> In this case, even if you introduced for_each_replace_ref_in_repo(),
> making for_each_replace_ref() a thin wrapper that always uses
> the_repository is a bit more cumbersome than just a simple macro.

Yes, but such a callback would do the *wrong* subtle thing in some cases
as you really want to work in the correct repository for e.g. looking up
commits.

> But it *is* doable (you'd need to use a wrapping structure around
> cb_data), and a developer who case about maintainability during API
> transition would have taken pains to do so.  A bit dissapointing.

My original patches were RFC-ish and a trade off as for the reflog only
there is nothing in flight to care about.

Given that we would want to upgrade all the ref callbacks, we have to
take this route, I think.

Thanks,
Stefan


Re: [PATCH 9/9] diff.c: add white space mode to move detection that allows indent changes

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 11:25 AM Andrei Rybak  wrote:
>
> On 2018-07-17 01:05, Stefan Beller wrote:
> >
> > This patch brings some challenges, related to the detection of blocks.
> > We need a white net the catch the possible moved lines, but then need to
>
> The s/white/wide/ was already suggested by Brandon Williams in previous
> iteration, but it seems this also needs s/the catch/to catch/
>

Both fixed locally, thanks!

I'll resend this series later.


Re: [PATCH] diff.c: offer config option to control ws handling in move detection

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 10:45 AM Junio C Hamano  wrote:
>
> Stefan Beller  writes:
>
> > diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
> > index 143acd9417e..8da7fed4e22 100644
> > --- a/Documentation/diff-options.txt
> > +++ b/Documentation/diff-options.txt
> > @@ -294,8 +294,11 @@ dimmed_zebra::
> >
> >  --color-moved-ws=::
> >   This configures how white spaces are ignored when performing the
> > - move detection for `--color-moved`. These modes can be given
> > - as a comma separated list:
> > + move detection for `--color-moved`.
> > +ifdef::git-diff[]
> > + It can be set by the `diff.colorMovedWS` configuration setting.
> > +endif::git-diff[]
>
> The patch to diff.c::git_diff_ui_config() we see below does not seem
> to make any effort to make sure that this new configuration applies
> only to "git diff" and that other commands like "git log" that call
> git_diff_ui_config() are not affected.

That is as intended. (We want to have it in git-log)

> And I do not see a strong reason why "git log --color-moved" should
> not honor this setting, either, so I am not quite sure why we want
> this ifdef/endif pair to hide it from "git log --help".
>
> Or am I totally misunderstanding the reason why we want ifdef/endif
> here?
>
> Puzzled...

I am somewhat puzzled, too, by the use of ifdefs in
Documentation/diff-options.txt, but I rationalized it as
"man git diff" has all the details, whereas the other pages are
a bit shorter and concise:

$ git grep -A 2 "ifdef::git-diff" Documentation/diff-options.txt
ifdef::git-diff[]
 This is the default.
endif::git-diff[]

ifdef::git-diff-core[]
 This is the default.
endif::git-diff-core[]

ifdef::git-diff[]
 It can be changed by the `color.ui` and `color.diff`
 configuration settings.

ifdef::git-diff[]
 This can be used to override configuration settings.
endif::git-diff[]

ifdef::git-diff[]
 It can be changed by the `diff.colorMoved` configuration setting.
endif::git-diff[]

ifdef::git-diff[]
 It can be set by the `diff.colorMovedWS` configuration setting.
endif::git-diff[]

So I followed the style that was already there, specifically
61e89eaae88 (diff: document the new --color-moved setting, 2017-06-30)
which followed the style of
6999c54029b (config.txt,diff-options.txt: porcelain vs. plumbing for
color.diff, 2011-04-27)

So I might have picked up a bad habit there or misunderstood the original?

Stefan


Re: [RFC] push: add documentation on push v2

2018-07-18 Thread Stefan Beller
> > Given the example above for "rebase-on-push" though
> > it is better to first send the packfile (as that is assumed to
> > take longer) and then send the ref updates, such that the
> > rebasing could be faster and has no bottleneck.
>
> I don't really follow this logic.  I don't think it would change
> anything much considering the ref-updates section would usually be
> much smaller than the packfile itself, course I don't have any data so
> idk.

The server would need to serialize all incoming requests and apply
them in order. The receiving of the packfile and the response to the client
are not part of the critical section that needs to happen serialized but
can be spread out to threads. So for that use case it would make
sense to allow sending the packfile first.

> > > +update = txn_id SP action SP refname SP old_oid SP new_oid
> > > +force-update = txn_id SP "force" SP action SP refname SP new_oid
> >
> > So we insert "force" after the transaction id if we want to force it.
> > When adding the atomic capability later we could imagine another insert here
> >
> >   1 atomic create refs/heads/new-ref <0-hash> 
> >   1 atomic delete refs/heads/old-ref  <0-hash>
> >
> > which would look like a "rename" that we could also add instead.
> > The transaction numbers are an interesting concept, how do you
> > envision them to be used? In the example I put them both in the same
> > transaction to demonstrate the "atomic-ness", but one could also
> > imagine different transactions numbers per ref (i.e. exactly one
> > ref per txn_id) to have a better understanding of what the server did
> > to each individual ref.
>
> I believe I outlined their use later.  Basically if you give the server
> free reign to do what it wants with the updates you send it, then you
> need a way for the client to be able to map the result back to what it
> requested.  Since now i could push to "master" but instead of updating
> master the server creates a refs/changes/1 ref and puts my changes there
> instead of updating master.  The client needs to know that the ref
> update it requested to master is what caused the creation of the
> refs/changes/1 ref.

understood, the question was more related to how you envision what
the client/server SHOULD be doing here, (and I think a one txn_id per
ref is what SHOULD be done is how this is best to implement the
thoughts above, also the client is ALLOWED to put many refs in one
txn, or would we just disallow that already at this stage to not confuse
the server?)

>
> >
> > Are new capabilities attached to ref updates or transactions?
> > Unlike the example above, stating "atomic" on each line, you could just
> > say "transaction 1 should be atomic" in another line, that would address
> > all refs in that transaction.
>
> I haven't thought through "atomic" so i have no idea what you'd want
> that to look like.

Yeah I have not really thought about them either, I just see two ways:
(A) adding more keywords in each ref-update (like "force") or
(B) adding new subsections somewhere where we talk about the capabilities
  instead.

Depending on why way we want to go this might have impact on the
design how to write the code.

> > > +   * Normal ref-updates require that the old value of a ref is 
> > > supplied so
> > > + that the server can verify that the reference that is being 
> > > updated
> > > + hasn't changed while the request was being processed.
> >
> > create/delete assume <00..00> for either old or new ? (We could also
> > omit the second hash for create delete, which is more appealing to me)
>
> Well that depends, in the case of a create you want to ensure that no
> ref with that name exists and would want it to fail if one already
> existed.  If you want to force it then you don't care if one existed or
> not, you just want the ref to have a certain value.

What I was trying to say is to have

update = txn_id SP (modifier SP) action
modifier = "force" | "atomic"
action = (create | delete | update)
create = "create" SP 
update = "update" SP  SP 
delete = "delete" SP 

i.e. only one hash for the create and delete action.
(I added the "atomic" modifier to demonstrate (A) from above, not needed here)

> >
> > > +   * Forced ref-updates only include the new value of a ref as we 
> > > don't
> > > + care what the old value was.
> >
> > How are you implementing force-with-lease then?
>
> Currently force-with-lease/force is implemented 100% on the client side,

Uh? That would be bad. Reading 631b5ef219c (push --force-with-lease: tie
it all together, 2013-07-08) I think that send-pack is done server-side?

> this proposal extends these two to be implemented on the server as well.
> non-forced variant are basically the "with-lease" case and "force" now
> actually forces an update.

I think we have 3 modes:
(1) standard push, where both client and server check for a fast-forward
(2) "force" that blindly overwrites the ref, but as that 

Re: Receiving console output from GIT 10mins after abort/termination?

2018-07-18 Thread Stefan Beller
On Tue, Jul 17, 2018 at 11:38 PM Frank Wolf  wrote:
>
> Hi @ll,
>
> I hope I'm posting to the right group (not sure if it's Windows related) but 
> I've got
> a weird problem using GIT:
>
> By accident I've tried to push a repository (containing an already
> commited but not yet pushed submodule reference).
> This fails immediately with an error of course BUT
>
> after 10 mins I get an output on the console though the command exited!?
> (... $Received disconnect from : User session has timed out 
> idling after 600 ms)

This sounds like it is better suited for
https://github.com/git-for-windows/git/issues/new
as the error message suggests it comes from some software
bundled with GfW to make Git work on Windows.

(Gits source code doesn't contain such an error message)


Re: [PATCH 2/2] refs.c: upgrade for_each_replace_ref to be a each_repo_ref_fn callback

2018-07-18 Thread Stefan Beller
Hi Derrick,

> Overall, I think this is the right approach. The only problem is that
> you're missing a few 'the_repository' to 'r' replacements in the bodies
> of show_reference and register_replace_ref.

Thanks.

Originally I had another approach, which was to convert all
callbacks to take a repository argument, and only if the refs
backend learned to propagate the correct repository the repo
argument would be the specific repo, NULL otherwise.

With that approach it was not possible to replace all occurrences
of the_repository with 'r', and it would also be confusing.

But as I pursued that approach at the time of writing this patch...
I missed the conversions needed.

Thanks for catching them (and fixing them in the reroll that
you just sent out via gitgitgadget)!

Thanks,
Stefan


Re: [RFC] push: add documentation on push v2

2018-07-18 Thread Stefan Beller
On Wed, Jul 18, 2018 at 6:31 AM Derrick Stolee  wrote:
>
> On 7/17/2018 7:25 PM, Stefan Beller wrote:
> > On Tue, Jul 17, 2018 at 2:09 PM Brandon Williams  wrote:
> >> Signed-off-by: Brandon Williams 
> >> ---
> >>
> >> Since introducing protocol v2 and enabling fetch I've been thinking
> >> about what its inverse 'push' would look like.  After talking with a
> >> number of people I have a longish list of things that could be done to
> >> improve push and I think I've been able to distill the core features we
> >> want in push v2.
> > It would be nice to know which things you want to improve.
>
> Hopefully we can also get others to chime in with things they don't like
> about the existing protocol. What pain points exist, and what can we do
> to improve at the transport layer before considering new functionality?

Another thing that I realized last night was the possibility to chunk requests.
The web of today is driven by lots of small http(s) requests. I know our server
team fights with the internal tools all the time because the communication
involved in git-fetch is usually a large http request (large packfile).
So it would be nice to have the possibility of chunking the request.
But I think that can be added as a capability? (Not sure how)

> >>   Thankfully (due to the capability system) most of the
> >> other features/improvements can be added later with ease.
> >>
> >> What I've got now is a rough design for a more flexible push, more
> >> flexible because it allows for the server to do what it wants with the
> >> refs that are pushed and has the ability to communicate back what was
> >> done to the client.  The main motivation for this is to work around
> >> issues when working with Gerrit and other code-review systems where you
> >> need to have Change-Ids in the commit messages (now the server can just
> >> insert them for you and send back new commits) and you need to push to
> >> magic refs to get around various limitations (now a Gerrit server should
> >> be able to communicate that pushing to 'master' doesn't update master
> >> but instead creates a refs/changes/ ref).
> > Well Gerrit is our main motivation, but this allows for other workflows as 
> > well.
> > For example Facebook uses hg internally and they have a
> > "rebase-on-the-server-after-push" workflow IIRC as pushing to a single repo
> > brings up quite some contention. The protocol outlined below would allow
> > for such a workflow as well? (This might be an easier sell to the Git
> > community as most are not quite familiar with Gerrit)
>
> I'm also curious how this "change commits on push" would be helpful to
> other scenarios.
>
> Since I'm not familiar with Gerrit: what is preventing you from having a
> commit hook that inserts (or requests) a Change-Id when not present?

That is how you do it normally. But what if you just get started or want to
send a one-off to the server (I wanted to upload a git patch to our internal
Gerrit once, and as my repository is configured to work with upstream Git
which doesn't carry change ids, I ran into this problem. I had to manually
add it to have the server accept it)

> How
> can the server identify the Change-Id automatically when it isn't present?

The change id is just a randomly assigned id, which can be made up,
but should stay consistent in further revisions. (Put another way:
change ids solve the 'linear assignment problem' of range-diff at scale)

So once the protocol support is in, the client would need to get some UX
update to replace its commits just pushed with the answer from the server
to work well with server side generated change ids.

But as said I am not sure how much we want to discuss in that direction,
but rather see if we could have other use cases:
Instead of just rebasing to solve the contention problem server side,
we could also offer a "coding helper as a service" - server. That would
work similar as the change id workflow lines out above:
You push to the server, the server performs some action (style formatting
your code for example, linting) and you download it back and have it locallly
again.

I think that would be pretty cool actually.

Thanks,
Stefan


[PATCH 2/8] refs.c: upgrade for_each_replace_ref to be a each_repo_ref_fn callback

2018-07-18 Thread Stefan Beller via GitGitGadget
From: Stefan Beller 

Signed-off-by: Stefan Beller 
Signed-off-by: Derrick Stolee 
---
 builtin/replace.c | 8 
 refs.c| 9 -
 refs.h| 2 +-
 replace-object.c  | 5 +++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/builtin/replace.c b/builtin/replace.c
index ef22d724b..d0b1cdb06 100644
--- a/builtin/replace.c
+++ b/builtin/replace.c
@@ -39,7 +39,8 @@ struct show_data {
enum replace_format format;
 };
 
-static int show_reference(const char *refname, const struct object_id *oid,
+static int show_reference(struct repository *r, const char *refname,
+ const struct object_id *oid,
  int flag, void *cb_data)
 {
struct show_data *data = cb_data;
@@ -56,9 +57,8 @@ static int show_reference(const char *refname, const struct 
object_id *oid,
if (get_oid(refname, ))
return error("Failed to resolve '%s' as a valid 
ref.", refname);
 
-   obj_type = oid_object_info(the_repository, ,
-  NULL);
-   repl_type = oid_object_info(the_repository, oid, NULL);
+   obj_type = oid_object_info(r, , NULL);
+   repl_type = oid_object_info(r, oid, NULL);
 
printf("%s (%s) -> %s (%s)\n", refname, 
type_name(obj_type),
   oid_to_hex(oid), type_name(repl_type));
diff --git a/refs.c b/refs.c
index 2513f77ac..5700cd468 100644
--- a/refs.c
+++ b/refs.c
@@ -1478,12 +1478,11 @@ int refs_for_each_fullref_in(struct ref_store *refs, 
const char *prefix,
return do_for_each_ref(refs, prefix, fn, 0, flag, cb_data);
 }
 
-int for_each_replace_ref(struct repository *r, each_ref_fn fn, void *cb_data)
+int for_each_replace_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data)
 {
-   return do_for_each_ref(get_main_ref_store(r),
-  git_replace_ref_base, fn,
-  strlen(git_replace_ref_base),
-  DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
+   return do_for_each_repo_ref(r, git_replace_ref_base, fn,
+   strlen(git_replace_ref_base),
+   DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
 }
 
 int for_each_namespaced_ref(each_ref_fn fn, void *cb_data)
diff --git a/refs.h b/refs.h
index 80eec8bbc..a0a18223a 100644
--- a/refs.h
+++ b/refs.h
@@ -317,7 +317,7 @@ int for_each_fullref_in(const char *prefix, each_ref_fn fn, 
void *cb_data,
 int for_each_tag_ref(each_ref_fn fn, void *cb_data);
 int for_each_branch_ref(each_ref_fn fn, void *cb_data);
 int for_each_remote_ref(each_ref_fn fn, void *cb_data);
-int for_each_replace_ref(struct repository *r, each_ref_fn fn, void *cb_data);
+int for_each_replace_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data);
 int for_each_glob_ref(each_ref_fn fn, const char *pattern, void *cb_data);
 int for_each_glob_ref_in(each_ref_fn fn, const char *pattern,
 const char *prefix, void *cb_data);
diff --git a/replace-object.c b/replace-object.c
index 801b5c167..017f02f8e 100644
--- a/replace-object.c
+++ b/replace-object.c
@@ -6,7 +6,8 @@
 #include "repository.h"
 #include "commit.h"
 
-static int register_replace_ref(const char *refname,
+static int register_replace_ref(struct repository *r,
+   const char *refname,
const struct object_id *oid,
int flag, void *cb_data)
 {
@@ -25,7 +26,7 @@ static int register_replace_ref(const char *refname,
oidcpy(_obj->replacement, oid);
 
/* Register new object */
-   if (oidmap_put(the_repository->objects->replace_map, repl_obj))
+   if (oidmap_put(r->objects->replace_map, repl_obj))
die("duplicate replace ref: %s", refname);
 
return 0;
-- 
gitgitgadget



[PATCH 1/8] refs.c: migrate internal ref iteration to pass thru repository argument

2018-07-18 Thread Stefan Beller via GitGitGadget
From: Stefan Beller 

In 60ce76d3581 (refs: add repository argument to for_each_replace_ref,
2018-04-11) and 0d296c57aec (refs: allow for_each_replace_ref to handle
arbitrary repositories, 2018-04-11), for_each_replace_ref learned how
to iterate over refs by a given arbitrary repository.
New attempts in the object store conversion have shown that it is useful
to have the repository handle available that the refs iteration is
currently iterating over.

To achieve this goal we will need to add a repository argument to
each_ref_fn in refs.h. However as many callers rely on the signature
such a patch would be too large.

So convert the internals of the ref subsystem first to pass through a
repository argument without exposing the change to the user. Assume
the_repository for the passed through repository, although it is not
used anywhere yet.

Signed-off-by: Stefan Beller 
Signed-off-by: Derrick Stolee 
---
 refs.c   | 39 +--
 refs.h   | 10 ++
 refs/iterator.c  |  6 +++---
 refs/refs-internal.h |  5 +++--
 4 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/refs.c b/refs.c
index fcfd3171e..2513f77ac 100644
--- a/refs.c
+++ b/refs.c
@@ -1390,17 +1390,50 @@ struct ref_iterator *refs_ref_iterator_begin(
  * non-zero value, stop the iteration and return that value;
  * otherwise, return 0.
  */
+static int do_for_each_repo_ref(struct repository *r, const char *prefix,
+   each_repo_ref_fn fn, int trim, int flags,
+   void *cb_data)
+{
+   struct ref_iterator *iter;
+   struct ref_store *refs = get_main_ref_store(r);
+
+   if (!refs)
+   return 0;
+
+   iter = refs_ref_iterator_begin(refs, prefix, trim, flags);
+
+   return do_for_each_repo_ref_iterator(r, iter, fn, cb_data);
+}
+
+struct do_for_each_ref_help {
+   each_ref_fn *fn;
+   void *cb_data;
+};
+
+static int do_for_each_ref_helper(struct repository *r,
+ const char *refname,
+ const struct object_id *oid,
+ int flags,
+ void *cb_data)
+{
+   struct do_for_each_ref_help *hp = cb_data;
+
+   return hp->fn(refname, oid, flags, hp->cb_data);
+}
+
 static int do_for_each_ref(struct ref_store *refs, const char *prefix,
   each_ref_fn fn, int trim, int flags, void *cb_data)
 {
struct ref_iterator *iter;
+   struct do_for_each_ref_help hp = { fn, cb_data };
 
if (!refs)
return 0;
 
iter = refs_ref_iterator_begin(refs, prefix, trim, flags);
 
-   return do_for_each_ref_iterator(iter, fn, cb_data);
+   return do_for_each_repo_ref_iterator(the_repository, iter,
+   do_for_each_ref_helper, );
 }
 
 int refs_for_each_ref(struct ref_store *refs, each_ref_fn fn, void *cb_data)
@@ -2029,10 +2062,12 @@ cleanup:
 int refs_for_each_reflog(struct ref_store *refs, each_ref_fn fn, void *cb_data)
 {
struct ref_iterator *iter;
+   struct do_for_each_ref_help hp = { fn, cb_data };
 
iter = refs->be->reflog_iterator_begin(refs);
 
-   return do_for_each_ref_iterator(iter, fn, cb_data);
+   return do_for_each_repo_ref_iterator(the_repository, iter,
+do_for_each_ref_helper, );
 }
 
 int for_each_reflog(each_ref_fn fn, void *cb_data)
diff --git a/refs.h b/refs.h
index cc2fb4c68..80eec8bbc 100644
--- a/refs.h
+++ b/refs.h
@@ -274,6 +274,16 @@ struct ref_transaction;
 typedef int each_ref_fn(const char *refname,
const struct object_id *oid, int flags, void *cb_data);
 
+/*
+ * The same as each_ref_fn, but also with a repository argument that
+ * contains the repository associated with the callback.
+ */
+typedef int each_repo_ref_fn(struct repository *r,
+const char *refname,
+const struct object_id *oid,
+int flags,
+void *cb_data);
+
 /*
  * The following functions invoke the specified callback function for
  * each reference indicated.  If the function ever returns a nonzero
diff --git a/refs/iterator.c b/refs/iterator.c
index 2ac91ac34..629e00a12 100644
--- a/refs/iterator.c
+++ b/refs/iterator.c
@@ -407,15 +407,15 @@ struct ref_iterator *prefix_ref_iterator_begin(struct 
ref_iterator *iter0,
 
 struct ref_iterator *current_ref_iter = NULL;
 
-int do_for_each_ref_iterator(struct ref_iterator *iter,
-each_ref_fn fn, void *cb_data)
+int do_for_each_repo_ref_iterator(struct repository *r, struct ref_iterator 
*iter,
+ each_repo_ref_fn fn, void *cb_data)
 {
int retval = 0, ok;
struct ref_iterator *old_ref_iter = current_ref_iter;
 
current_ref_iter = iter;
  

[PATCH] diff.c: offer config option to control ws handling in move detection

2018-07-17 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---

This is the cherry on the cake named sb/diff-color-move-more.

 Documentation/config.txt   | 5 +
 Documentation/diff-options.txt | 7 +--
 diff.c | 9 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2659153cb37..6ca7118b018 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1122,6 +1122,11 @@ diff.colorMoved::
true the default color mode will be used. When set to false,
moved lines are not colored.
 
+diff.colorMovedWS::
+   When moved lines are colored using e.g. the `diff.colorMoved` setting,
+   this option controls the `` how spaces are treated
+   for details of valid modes see '--color-moved-ws' in 
linkgit:git-diff[1].
+
 color.diff.::
Use customized color for diff colorization.  `` specifies
which part of the patch to use the specified color, and is one
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 143acd9417e..8da7fed4e22 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -294,8 +294,11 @@ dimmed_zebra::
 
 --color-moved-ws=::
This configures how white spaces are ignored when performing the
-   move detection for `--color-moved`. These modes can be given
-   as a comma separated list:
+   move detection for `--color-moved`.
+ifdef::git-diff[]
+   It can be set by the `diff.colorMovedWS` configuration setting.
+endif::git-diff[]
+   These modes can be given as a comma separated list:
 +
 --
 ignore-space-at-eol::
diff --git a/diff.c b/diff.c
index f51f0ac32f4..9de917108d8 100644
--- a/diff.c
+++ b/diff.c
@@ -35,6 +35,7 @@ static int diff_rename_limit_default = 400;
 static int diff_suppress_blank_empty;
 static int diff_use_color_default = -1;
 static int diff_color_moved_default;
+static int diff_color_moved_ws_default;
 static int diff_context_default = 3;
 static int diff_interhunk_context_default;
 static const char *diff_word_regex_cfg;
@@ -332,6 +333,13 @@ int git_diff_ui_config(const char *var, const char *value, 
void *cb)
diff_color_moved_default = cm;
return 0;
}
+   if (!strcmp(var, "diff.colormovedws")) {
+   int cm = parse_color_moved_ws(value);
+   if (cm < 0)
+   return -1;
+   diff_color_moved_ws_default = cm;
+   return 0;
+   }
if (!strcmp(var, "diff.context")) {
diff_context_default = git_config_int(var, value);
if (diff_context_default < 0)
@@ -4327,6 +4335,7 @@ void diff_setup(struct diff_options *options)
}
 
options->color_moved = diff_color_moved_default;
+   options->color_moved_ws_handling = diff_color_moved_ws_default;
 }
 
 void diff_setup_done(struct diff_options *options)
-- 
2.18.0.233.g985f88cf7e-goog



Re: [RFC] push: add documentation on push v2

2018-07-17 Thread Stefan Beller
On Tue, Jul 17, 2018 at 2:09 PM Brandon Williams  wrote:
>
> Signed-off-by: Brandon Williams 
> ---
>
> Since introducing protocol v2 and enabling fetch I've been thinking
> about what its inverse 'push' would look like.  After talking with a
> number of people I have a longish list of things that could be done to
> improve push and I think I've been able to distill the core features we
> want in push v2.

It would be nice to know which things you want to improve.

>  Thankfully (due to the capability system) most of the
> other features/improvements can be added later with ease.
>
> What I've got now is a rough design for a more flexible push, more
> flexible because it allows for the server to do what it wants with the
> refs that are pushed and has the ability to communicate back what was
> done to the client.  The main motivation for this is to work around
> issues when working with Gerrit and other code-review systems where you
> need to have Change-Ids in the commit messages (now the server can just
> insert them for you and send back new commits) and you need to push to
> magic refs to get around various limitations (now a Gerrit server should
> be able to communicate that pushing to 'master' doesn't update master
> but instead creates a refs/changes/ ref).

Well Gerrit is our main motivation, but this allows for other workflows as well.
For example Facebook uses hg internally and they have a
"rebase-on-the-server-after-push" workflow IIRC as pushing to a single repo
brings up quite some contention. The protocol outlined below would allow
for such a workflow as well? (This might be an easier sell to the Git
community as most are not quite familiar with Gerrit)

> Before actually moving to write any code I'm hoping to get some feedback
> on if we think this is an acceptable base design for push (other
> features like atomic-push, signed-push, etc can be added as
> capabilities), so any comments are appreciated.
>
>  Documentation/technical/protocol-v2.txt | 76 +
>  1 file changed, 76 insertions(+)
>
> diff --git a/Documentation/technical/protocol-v2.txt 
> b/Documentation/technical/protocol-v2.txt
> index 49bda76d23..16c1ce60dd 100644
> --- a/Documentation/technical/protocol-v2.txt
> +++ b/Documentation/technical/protocol-v2.txt
> @@ -403,6 +403,82 @@ header.
> 2 - progress messages
> 3 - fatal error message just before stream aborts
>
> + push
> +~~
> +
> +`push` is the command used to push ref-updates and a packfile to a remote
> +server in v2.
> +
> +Additional features not supported in the base command will be advertised
> +as the value of the command in the capability advertisement in the form
> +of a space separated list of features: "= "
> +
> +The format of a push request is as follows:
> +
> +request = *section
> +section = (ref-updates | packfile)

This reads as if a request consists of sections, which
each can be a "ref-updates" or a packfile, no order given,
such that multiple ref-update sections mixed with packfiles
are possible.

I would assume we'd only want to allow for ref-updates
followed by the packfile.

Given the example above for "rebase-on-push" though
it is better to first send the packfile (as that is assumed to
take longer) and then send the ref updates, such that the
rebasing could be faster and has no bottleneck.

> +  (delim-pkt | flush-pkt)



> +
> +ref-updates = PKT-LINE("ref-updates" LF)
> + *PKT-Line(update/force-update LF)
> +
> +update = txn_id SP action SP refname SP old_oid SP new_oid
> +force-update = txn_id SP "force" SP action SP refname SP new_oid

So we insert "force" after the transaction id if we want to force it.
When adding the atomic capability later we could imagine another insert here

  1 atomic create refs/heads/new-ref <0-hash> 
  1 atomic delete refs/heads/old-ref  <0-hash>

which would look like a "rename" that we could also add instead.
The transaction numbers are an interesting concept, how do you
envision them to be used? In the example I put them both in the same
transaction to demonstrate the "atomic-ness", but one could also
imagine different transactions numbers per ref (i.e. exactly one
ref per txn_id) to have a better understanding of what the server did
to each individual ref.

> +action = ("create" | "delete" | "update")
> +txn_id = 1*DIGIT
> +
> +packfile = PKT-LINE("packfile" LF)
> +  *PKT-LINE(*%x00-ff)
> +
> +ref-updates section
> +   * Transaction id's allow for mapping what was requested to what the
> + server actually did with the ref-update.

this would imply the client ought to have at most one ref per transaction id.
Is the client allowed to put multiple refs per id?

Are new capabilities attached to ref updates or transactions?
Unlike the example above, stating "atomic" on each line, you could just
say "transaction 1 should be atomic" in another line, that would address
all refs 

[PATCH 0/2] RFC ref store to repository migration

2018-07-17 Thread Stefan Beller
Stolee said (privately):

I also ran into an issue where some of the "arbitrary repository"
patches don't fully connect. Jonathan's test demonstrated this
issue when I connected some things in an in-process patch 
Work in progress: https://github.com/gitgitgadget/git/pull/11
Specifically: 
https://github.com/gitgitgadget/git/pull/11/commits/287ec6c1cd4bf4da76c05698373aee15749d011a

And I dislike the approach taken in
https://github.com/gitgitgadget/git/pull/11/commits/287ec6c1cd4bf4da76c05698373aee15749d011a
and would prefer another approach shown at
https://github.com/stefanbeller/git/tree/object-store-convert-refstore-partial
or in this series.

This approach doesn't have the ugliness of having the repository around twice,
e.g.

int register_replace_ref(const char *refname, ...
{
  struct repository *r = cb_data;
  ...
}

...  

for_each_replace_ref(r, register_replace_ref, r);

which would iterate over the refs of the first "r" and have "r" as a call back
data point for register_replace_ref.

This approach also takes the gentle hint of Junio to not replace well used 
functions
throughout the whole code base (using coccinelle), but for now exposes just
one example in the second patch.

These patches have been developed on top of jt/commit-graph-per-object-store.

Opinions?

Thanks,
Stefan   

Stefan Beller (2):
  refs.c: migrate internal ref iteration to pass thru repository
argument
  refs.c: upgrade for_each_replace_ref to be a each_repo_ref_fn callback

 builtin/replace.c|  3 ++-
 refs.c   | 48 +---
 refs.h   | 12 ++-
 refs/iterator.c  |  6 +++---
 refs/refs-internal.h |  5 +++--
 replace-object.c |  3 ++-
 6 files changed, 62 insertions(+), 15 deletions(-)

-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 2/2] refs.c: upgrade for_each_replace_ref to be a each_repo_ref_fn callback

2018-07-17 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 builtin/replace.c | 3 ++-
 refs.c| 9 -
 refs.h| 2 +-
 replace-object.c  | 3 ++-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/builtin/replace.c b/builtin/replace.c
index ef22d724bbc..5f34659071f 100644
--- a/builtin/replace.c
+++ b/builtin/replace.c
@@ -39,7 +39,8 @@ struct show_data {
enum replace_format format;
 };
 
-static int show_reference(const char *refname, const struct object_id *oid,
+static int show_reference(struct repository *r, const char *refname,
+ const struct object_id *oid,
  int flag, void *cb_data)
 {
struct show_data *data = cb_data;
diff --git a/refs.c b/refs.c
index 2513f77acb3..5700cd4683f 100644
--- a/refs.c
+++ b/refs.c
@@ -1478,12 +1478,11 @@ int refs_for_each_fullref_in(struct ref_store *refs, 
const char *prefix,
return do_for_each_ref(refs, prefix, fn, 0, flag, cb_data);
 }
 
-int for_each_replace_ref(struct repository *r, each_ref_fn fn, void *cb_data)
+int for_each_replace_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data)
 {
-   return do_for_each_ref(get_main_ref_store(r),
-  git_replace_ref_base, fn,
-  strlen(git_replace_ref_base),
-  DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
+   return do_for_each_repo_ref(r, git_replace_ref_base, fn,
+   strlen(git_replace_ref_base),
+   DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
 }
 
 int for_each_namespaced_ref(each_ref_fn fn, void *cb_data)
diff --git a/refs.h b/refs.h
index 80eec8bbc68..a0a18223a14 100644
--- a/refs.h
+++ b/refs.h
@@ -317,7 +317,7 @@ int for_each_fullref_in(const char *prefix, each_ref_fn fn, 
void *cb_data,
 int for_each_tag_ref(each_ref_fn fn, void *cb_data);
 int for_each_branch_ref(each_ref_fn fn, void *cb_data);
 int for_each_remote_ref(each_ref_fn fn, void *cb_data);
-int for_each_replace_ref(struct repository *r, each_ref_fn fn, void *cb_data);
+int for_each_replace_ref(struct repository *r, each_repo_ref_fn fn, void 
*cb_data);
 int for_each_glob_ref(each_ref_fn fn, const char *pattern, void *cb_data);
 int for_each_glob_ref_in(each_ref_fn fn, const char *pattern,
 const char *prefix, void *cb_data);
diff --git a/replace-object.c b/replace-object.c
index 801b5c16789..01a5a59a35a 100644
--- a/replace-object.c
+++ b/replace-object.c
@@ -6,7 +6,8 @@
 #include "repository.h"
 #include "commit.h"
 
-static int register_replace_ref(const char *refname,
+static int register_replace_ref(struct repository *r,
+   const char *refname,
const struct object_id *oid,
int flag, void *cb_data)
 {
-- 
2.18.0.233.g985f88cf7e-goog



[PATCH 1/2] refs.c: migrate internal ref iteration to pass thru repository argument

2018-07-17 Thread Stefan Beller
In 60ce76d3581 (refs: add repository argument to for_each_replace_ref,
2018-04-11) and 0d296c57aec (refs: allow for_each_replace_ref to handle
arbitrary repositories, 2018-04-11), for_each_replace_ref learned how
to iterate over refs by a given arbitrary repository.
New attempts in the object store conversion have shown that it is useful
to have the repository handle available that the refs iteration is
currently iterating over.

To achieve this goal we will need to add a repository argument to
each_ref_fn in refs.h. However as many callers rely on the signature
such a patch would be too large.

So convert the internals of the ref subsystem first to pass through a
repository argument without exposing the change to the user. Assume
the_repository for the passed through repository, although it is not
used anywhere yet.

Signed-off-by: Stefan Beller 
---
 refs.c   | 39 +--
 refs.h   | 10 ++
 refs/iterator.c  |  6 +++---
 refs/refs-internal.h |  5 +++--
 4 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/refs.c b/refs.c
index fcfd3171e83..2513f77acb3 100644
--- a/refs.c
+++ b/refs.c
@@ -1390,17 +1390,50 @@ struct ref_iterator *refs_ref_iterator_begin(
  * non-zero value, stop the iteration and return that value;
  * otherwise, return 0.
  */
+static int do_for_each_repo_ref(struct repository *r, const char *prefix,
+   each_repo_ref_fn fn, int trim, int flags,
+   void *cb_data)
+{
+   struct ref_iterator *iter;
+   struct ref_store *refs = get_main_ref_store(r);
+
+   if (!refs)
+   return 0;
+
+   iter = refs_ref_iterator_begin(refs, prefix, trim, flags);
+
+   return do_for_each_repo_ref_iterator(r, iter, fn, cb_data);
+}
+
+struct do_for_each_ref_help {
+   each_ref_fn *fn;
+   void *cb_data;
+};
+
+static int do_for_each_ref_helper(struct repository *r,
+ const char *refname,
+ const struct object_id *oid,
+ int flags,
+ void *cb_data)
+{
+   struct do_for_each_ref_help *hp = cb_data;
+
+   return hp->fn(refname, oid, flags, hp->cb_data);
+}
+
 static int do_for_each_ref(struct ref_store *refs, const char *prefix,
   each_ref_fn fn, int trim, int flags, void *cb_data)
 {
struct ref_iterator *iter;
+   struct do_for_each_ref_help hp = { fn, cb_data };
 
if (!refs)
return 0;
 
iter = refs_ref_iterator_begin(refs, prefix, trim, flags);
 
-   return do_for_each_ref_iterator(iter, fn, cb_data);
+   return do_for_each_repo_ref_iterator(the_repository, iter,
+   do_for_each_ref_helper, );
 }
 
 int refs_for_each_ref(struct ref_store *refs, each_ref_fn fn, void *cb_data)
@@ -2029,10 +2062,12 @@ int refs_verify_refname_available(struct ref_store 
*refs,
 int refs_for_each_reflog(struct ref_store *refs, each_ref_fn fn, void *cb_data)
 {
struct ref_iterator *iter;
+   struct do_for_each_ref_help hp = { fn, cb_data };
 
iter = refs->be->reflog_iterator_begin(refs);
 
-   return do_for_each_ref_iterator(iter, fn, cb_data);
+   return do_for_each_repo_ref_iterator(the_repository, iter,
+do_for_each_ref_helper, );
 }
 
 int for_each_reflog(each_ref_fn fn, void *cb_data)
diff --git a/refs.h b/refs.h
index cc2fb4c68c0..80eec8bbc68 100644
--- a/refs.h
+++ b/refs.h
@@ -274,6 +274,16 @@ struct ref_transaction;
 typedef int each_ref_fn(const char *refname,
const struct object_id *oid, int flags, void *cb_data);
 
+/*
+ * The same as each_ref_fn, but also with a repository argument that
+ * contains the repository associated with the callback.
+ */
+typedef int each_repo_ref_fn(struct repository *r,
+const char *refname,
+const struct object_id *oid,
+int flags,
+void *cb_data);
+
 /*
  * The following functions invoke the specified callback function for
  * each reference indicated.  If the function ever returns a nonzero
diff --git a/refs/iterator.c b/refs/iterator.c
index 2ac91ac3401..629e00a122a 100644
--- a/refs/iterator.c
+++ b/refs/iterator.c
@@ -407,15 +407,15 @@ struct ref_iterator *prefix_ref_iterator_begin(struct 
ref_iterator *iter0,
 
 struct ref_iterator *current_ref_iter = NULL;
 
-int do_for_each_ref_iterator(struct ref_iterator *iter,
-each_ref_fn fn, void *cb_data)
+int do_for_each_repo_ref_iterator(struct repository *r, struct ref_iterator 
*iter,
+ each_repo_ref_fn fn, void *cb_data)
 {
int retval = 0, ok;
struct ref_iterator *old_ref_iter = current_ref_iter;
 
current_ref_iter = iter;
 

Re: [PATCH v2 0/6] git-submodule.sh: convert part of cmd_update to C

2018-07-17 Thread Stefan Beller
On Tue, Jul 17, 2018 at 12:52 PM Junio C Hamano  wrote:

> > (A) This sign off is inherent to the workflow. So we could
> > change the workflow, i.e. you pull series instead of applying them.
> > I think this "more in git, less in email" workflow would find supporters,
> > such as DScho (cc'd).
>
> Sign-off is inherent to the project, in the sense that we want to
> see how the change flowed recorded in the commits.
>
> With a pull-request based workflow, until Git is somehow improved so
> that a "pull" becomes "fetch and rebase what got fetched on top of
> my stuff, and add my sign-off while at it" (which is the opposite of
> the usual "pull --rebase"),

and here is where our thoughts did not align.
I imagined a git-pull as of today (fetch + merge), where you in the role
of the maintainer tells me in the role of an author to base my series
on top of $X, such that there is no need for rebase on your end,
(you would also not sign off each commit, but only the resulting merge)
Even merge conflicts could be handed off to the authors instead of
burdening you.

>  we would lose the ability to fully "use"
> Git to run this project, as we would lose most sign-offs, unlike the
> e-mail based workflow, which we can use Git fully to have it do its
> job of recording necessary information.

I think all needed information would still be there, but there would be an
actual change as authors would be committers (as they commit locally
and we keep it all in Git to get it to you and you keep it in Git to put it
into the blessed repository)

> And much more importantly, when "pull-request" based workflow is
> improved enough so that your original without my sign-off (and you
> shouldn't, unless you are relaying my changes) becomes what I
> pulled, which does have my sign-off, range-diff that compares both
> histories does need to deal with a pair of commits with only one
> side having a sign-off.  So switching the tool is not a proper
> solution to work around the "sign-off noise" we observed.

I do not view it as work around, but "another proper workflow that
has advantages and disadvantages, one of the advantages is that it
would enable us to work with this tool".

>  One side
> having a sign-off while the other side does not is inherent to what
> we actively want,

[in the current workflow that has proven good for 10 years]

> and you are letting your tail wag your dog by
> suggesting to discard it, which is disappointing.

I am suggesting to continue thinking about workflows in general, as there
are many; all of them having advantages and disadvantages.
I am not sure if workflows can be adapted easily via improving the current
workflow continually or if sometimes a workflow has to be rethought to to
changes in the landscape of available tools.

When the Git project started, an email based workflow was chosen,
precisely because Git was not available.

Now that it has gained wide spread adoption (among its developers at least)
the workflow could adapt to that.

> > The other (2) downside is that everyone else (authors, reviewers) have
> > to adapt as well. For authors this might be easy to adapt (push instead
> > of sending email sounds like a win).
>
> As I most often edit the log message and material below three-dash
> lines (long) _after_ format-patch produced files, I do not think it
> is a win to force me to push and ask to pull

Ah, that is an interesting workflow. Do you keep patch files/emails
around locally, only to (long after) add a message and resend it?
I try to keep any contribution of mine in Git as long as possible as that
helps me tracking and fixing errors in my code and log messages.

> > (B) The other point of view that I can offer is that we teach range-diff
> > to ignore certain patterns. Maybe in combination with interpret-trailers
> > this can be an easy configurable thing, or even a default to ignore
> > all sign offs?
>
> That indicates the same direction as I alluded to in the message you
> are responding to, I guess, which is a good thing.

Yes, I imagined this is the approach we'll be taking.
I think we would want to give %(trailers:none) or rather
"ignore-sign-offs" to the inner diffs.

Thanks,
Stefan


Re: [PATCH v2 0/6] git-submodule.sh: convert part of cmd_update to C

2018-07-17 Thread Stefan Beller
> A tangent.
>
> Because this "-- " is a conventional signature separator, MUAs like
> Emacs message-mode seems to omit everything below it from the quote
> while responding, making it cumbersome to comment on the tbdiff.
>
> Something to think about if somebody is contemplating on adding more
> to format-patch's cover letter.

+cc Eric who needs to think about this tangent, then.
https://public-inbox.org/git/20180530080325.37520-1-sunsh...@sunshineco.com/

>
> >> 2.18.0.203.gfac676dfb9-goog
> >>
> >> 1:  d4e1ec45740 ! 1:  bbc8697a8ca git-submodule.sh: align error reporting 
> >> for update mode to use path
> >> @@ -6,7 +6,6 @@
> >>  on its path, so let's do that for invalid update modes, too.
> >>
> >>  Signed-off-by: Stefan Beller 
> >> -Signed-off-by: Junio C Hamano 
> >>
> >>  diff --git a/git-submodule.sh b/git-submodule.sh
> >>  --- a/git-submodule.sh
>
> This is quite unfortunate.  I wonder if it is easy to tell
> range-diff that certain differences in the log message are to be
> ignored so that we can show that the first patch is unchanged in a
> case like this.  This series has 4 really changed ones with 2
> otherwise unchanged ones shown all as changed, which is not too bad,
> but for a series like sb/diff-colro-move-more reroll that has 9
> patches, out of only two have real updated patches, showing
> otherwise unchanged 7 as changed like this hunk does would make the
> cover letter useless.  It is a shame that adding range-diff to the
> cover does have so much potential.

Actually I thought it was really cool, i.e. when using your queued branch
instead of my last sent branch, I can see any edits *you* did
(including fixing up typos or applying at slightly different bases).

The sign offs are a bit unfortunate as they are repetitive.
I have two conflicting points of view on that:

(A) This sign off is inherent to the workflow. So we could
change the workflow, i.e. you pull series instead of applying them.
I think this "more in git, less in email" workflow would find supporters,
such as DScho (cc'd).

The downside is that (1) you'd have to change your
workflow, i.e. instead of applying the patches at the base you think is
best for maintenance you'd have to tell the author "please rebase to $X";
but that also has upsides, such as "If you want to have your series integrated
please merge with $Y and $Z" (looking at the object store stuff).

The other (2) downside is that everyone else (authors, reviewers) have
to adapt as well. For authors this might be easy to adapt (push instead
of sending email sounds like a win). For reviewers we'd need to have
an easy way to review things "stored in git" and not exposed via email,
which is not obvious how to do.

(B) The other point of view that I can offer is that we teach range-diff
to ignore certain patterns. Maybe in combination with interpret-trailers
this can be an easy configurable thing, or even a default to ignore
all sign offs?

Thanks,
Stefan


Re: Are clone/checkout operations deterministic?

2018-07-17 Thread Stefan Beller
On Tue, Jul 17, 2018 at 2:48 AM Ævar Arnfjörð Bjarmason
 wrote:
>
>
> On Tue, Jul 17 2018, J. Paul Reed wrote:
>
> > Hey Git Devs,
> >
> > I have a bit of an odd question: do git clone/checkout operations have a
> > deterministic ordering?
> >
> > That is: are files guaranteed to be laid down onto disk in any specific
> > (and deterministic) order as a clone and/or checkout operations occurs?
> > (And if so, is it alphabetical order? Depth-first? Something else?)
> >
> > In case the answer is different (and I'd guess that it might be?), I'm
> > mostly interested in the initial clone case... but it would be great to
> > know if, indeed, the answer is different for just-checkouts too.
> >
> > I did some cursory googling, but nothing hopped out at me as an answer to
> > this question.
>
> In practice I think clone, checkout, reset etc. always work in the same
> order you see with `git ls-tree -r --name-only HEAD`, but as far as I
> know this has never been guaranteed or documented, and shouldn't be
> relied on.

The transmission of packfiles is non-deterministic, as the packfiles
(which are packed for each clone separately when using core git as
a server) are not packed in a deterministic fashion, but in a threaded
environment which allows different packing orders.

If you clone from a server that gives you exactly the same pack at
all times (assuming the remote repo doesn't change refs), then
checkout is currently deterministic in unpacking files to the working tree.

>
> E.g. there's probably cases where writing files in parallel is going to
> be faster than writing them sequentially. We don't have such a mode just
> because nobody's written a patch for it, but having that patch would
> break any assumptions of our current order.

+cc Ben who is looking into that, but hasn't spoken up on the mailing list yet.


<    3   4   5   6   7   8   9   10   11   12   >