[PATCH v4 14/22] sha1_file: make check_and_freshen_file() non static

2017-02-27 Thread Christian Couder
This function will be used in a commit soon, so let's make
it available globally.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h | 3 +++
 sha1_file.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 955e80913e..6b25b50aab 100644
--- a/cache.h
+++ b/cache.h
@@ -1229,6 +1229,9 @@ extern int has_pack_index(const unsigned char *sha1);
 
 extern void assert_sha1_type(const unsigned char *sha1, enum object_type 
expect);
 
+/* Helper to check and "touch" a file */
+extern int check_and_freshen_file(const char *fn, int freshen);
+
 extern const signed char hexval_table[256];
 static inline unsigned int hexval(unsigned char c)
 {
diff --git a/sha1_file.c b/sha1_file.c
index ec957db5e1..192073ea95 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -593,7 +593,7 @@ static int freshen_file(const char *fn)
  * either does not exist on disk, or has a stale mtime and may be subject to
  * pruning).
  */
-static int check_and_freshen_file(const char *fn, int freshen)
+int check_and_freshen_file(const char *fn, int freshen)
 {
if (access(fn, F_OK))
return 0;
-- 
2.12.0.22.g0672473d40



[PATCH v4 12/22] t1700: add tests for splitIndex.maxPercentChange

2017-02-27 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index df19b812fd..21f43903f8 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -238,4 +238,76 @@ test_expect_success 'set core.splitIndex config variable 
to false' '
test_cmp expect actual
 '
 
+test_expect_success 'set core.splitIndex config variable to true' '
+   git config core.splitIndex true &&
+   : >three &&
+   git update-index --add three &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   $BASE
+   replacements:
+   deletions:
+   EOF
+   test_cmp expect actual &&
+   : >four &&
+   git update-index --add four &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   $BASE
+   100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   four
+   replacements:
+   deletions:
+   EOF
+   test_cmp expect actual
+'
+
+test_expect_success 'check behavior with splitIndex.maxPercentChange unset' '
+   git config --unset splitIndex.maxPercentChange &&
+   : >five &&
+   git update-index --add five &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   $BASE
+   replacements:
+   deletions:
+   EOF
+   test_cmp expect actual &&
+   : >six &&
+   git update-index --add six &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   $BASE
+   100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   six
+   replacements:
+   deletions:
+   EOF
+   test_cmp expect actual
+'
+
+test_expect_success 'check splitIndex.maxPercentChange set to 0' '
+   git config splitIndex.maxPercentChange 0 &&
+   : >seven &&
+   git update-index --add seven &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   $BASE
+   replacements:
+   deletions:
+   EOF
+   test_cmp expect actual &&
+   : >eight &&
+   git update-index --add eight &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   $BASE
+   replacements:
+   deletions:
+   EOF
+   test_cmp expect actual
+'
+
 test_done
-- 
2.12.0.22.g0672473d40



[PATCH v4 11/22] read-cache: regenerate shared index if necessary

2017-02-27 Thread Christian Couder
When writing a new split-index and there is a big number of cache
entries in the split-index compared to the shared index, it is a
good idea to regenerate the shared index.

By default when the ratio reaches 20%, we will push back all
the entries from the split-index into a new shared index file
instead of just creating a new split-index file.

The threshold can be configured using the
"splitIndex.maxPercentChange" config variable.

We need to adjust the existing tests in t1700 by setting
"splitIndex.maxPercentChange" to 100 at the beginning of t1700,
as the existing tests are assuming that the shared index is
regenerated only when `git update-index --split-index` is used.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c   | 32 
 t/t1700-split-index.sh |  1 +
 2 files changed, 33 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index 99bc274b8d..aeb413a508 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2212,6 +2212,36 @@ static int write_shared_index(struct index_state *istate,
return ret;
 }
 
+static const int default_max_percent_split_change = 20;
+
+static int too_many_not_shared_entries(struct index_state *istate)
+{
+   int i, not_shared = 0;
+   int max_split = git_config_get_max_percent_split_change();
+
+   switch (max_split) {
+   case -1:
+   /* not or badly configured: use the default value */
+   max_split = default_max_percent_split_change;
+   break;
+   case 0:
+   return 1; /* 0% means always write a new shared index */
+   case 100:
+   return 0; /* 100% means never write a new shared index */
+   default:
+   break; /* just use the configured value */
+   }
+
+   /* Count not shared entries */
+   for (i = 0; i < istate->cache_nr; i++) {
+   struct cache_entry *ce = istate->cache[i];
+   if (!ce->index)
+   not_shared++;
+   }
+
+   return (int64_t)istate->cache_nr * max_split < (int64_t)not_shared * 
100;
+}
+
 int write_locked_index(struct index_state *istate, struct lock_file *lock,
   unsigned flags)
 {
@@ -2229,6 +2259,8 @@ int write_locked_index(struct index_state *istate, struct 
lock_file *lock,
if ((v & 15) < 6)
istate->cache_changed |= SPLIT_INDEX_ORDERED;
}
+   if (too_many_not_shared_entries(istate))
+   istate->cache_changed |= SPLIT_INDEX_ORDERED;
if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
int ret = write_shared_index(istate, lock, flags);
if (ret)
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 1659986d8d..df19b812fd 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -8,6 +8,7 @@ test_description='split index mode tests'
 sane_unset GIT_TEST_SPLIT_INDEX
 
 test_expect_success 'enable split index' '
+   git config splitIndex.maxPercentChange 100 &&
git update-index --split-index &&
test-dump-split-index .git/index >actual &&
indexversion=$(test-index-version <.git/index) &&
-- 
2.12.0.22.g0672473d40



[PATCH v4 00/22] Add configuration options for split-index

2017-02-27 Thread Christian Couder
  The main change is that the new freshen_shared_index() will now
  warn if the split-index file has been written but the shared
  index file could't be freshened.

  Another change is that in 17/22 the new
  "splitIndex.sharedIndexExpire" config variable now defaults to
  "2.weeks.ago" instead of "one.week.ago" in v3.

- Patch 18/22 adds a few tests for the new feature. It is changed
  a bit to account for the change in 17/22.

  Also some flakyness in one of the tests has been fixed by using
  a 5 second, instead of 1 second, delay, thanks to Ramsay and
  Peff, see
  
http://public-inbox.org/git/818851a6-c3ef-618e-4146-518fbe6bd...@ramsayjones.plus.com/

- Patches 19/22 and 20/22 were new patches in v3. They update the
  mtime of the shared index file when a split index based on the
  shared index file is read from. 19/22 is a refactoring to make
  the actual change in 20/22 easier.

  Patch 20/22 has been changed a little bit since v3 to take into
  account changes in 17/22.

- Patches 21/22 and 22/22 add some documentation for the new
  feature. Patch 21/22 has been changed to avoid using "mtime" as
  suggested by Junio.

Links
~

This patch series is also available here:

  https://github.com/chriscool/git/commits/config-split-index

The previous versions were:

  RFC: https://github.com/chriscool/git/commits/config-split-index7
  v1:  https://github.com/chriscool/git/commits/config-split-index72
  v2:  https://github.com/chriscool/git/commits/config-split-index99
  v3:  https://github.com/chriscool/git/commits/config-split-index102

On the mailing list the related patch series and discussions were:

  RFC: 
https://public-inbox.org/git/20160711172254.13439-1-chrisc...@tuxfamily.org/
  v1:  
https://public-inbox.org/git/20161023092648.12086-1-chrisc...@tuxfamily.org/
  v2:  
https://public-inbox.org/git/20161217145547.11748-1-chrisc...@tuxfamily.org/
  v3:  
https://public-inbox.org/git/2016122610.17150-1-chrisc...@tuxfamily.org/

Christian Couder (22):
  config: mark an error message up for translation
  t1700: change here document style
  config: add git_config_get_split_index()
  split-index: add {add,remove}_split_index() functions
  read-cache: add and then use tweak_split_index()
  update-index: warn in case of split-index incoherency
  t1700: add tests for core.splitIndex
  Documentation/config: add information for core.splitIndex
  Documentation/git-update-index: talk about core.splitIndex config var
  config: add git_config_get_max_percent_split_change()
  read-cache: regenerate shared index if necessary
  t1700: add tests for splitIndex.maxPercentChange
  Documentation/config: add splitIndex.maxPercentChange
  sha1_file: make check_and_freshen_file() non static
  read-cache: touch shared index files when used
  config: add git_config_get_expiry() from gc.c
  read-cache: unlink old sharedindex files
  t1700: test shared index file expiration
  read-cache: refactor read_index_from()
  read-cache: use freshen_shared_index() in read_index_from()
  Documentation/config: add splitIndex.sharedIndexExpire
  Documentation/git-update-index: explain splitIndex.*

 Documentation/config.txt   |  29 
 Documentation/git-update-index.txt |  43 -
 builtin/gc.c   |  15 +-
 builtin/update-index.c |  25 +--
 cache.h|   8 +
 config.c   |  42 -
 read-cache.c   | 157 --
 sha1_file.c|   2 +-
 split-index.c  |  22 +++
 split-index.h  |   2 +
 t/t1700-split-index.sh | 324 +++--
 11 files changed, 539 insertions(+), 130 deletions(-)

-- 
2.12.0.22.g0672473d40



[PATCH v4 02/22] t1700: change here document style

2017-02-27 Thread Christian Couder
This improves test indentation by getting rid of the outdated
here document style.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 170 -
 1 file changed, 85 insertions(+), 85 deletions(-)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 292a0720fc..cb68b8dc1e 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -19,12 +19,12 @@ test_expect_success 'enable split index' '
own=8299b0bcd1ac364e5f1d7768efb62fa2da79a339
base=39d890139ee5356c7ef572216cebcd27aa41f9df
fi &&
-   cat >expect <expect <<-EOF &&
+   own $own
+   base $base
+   replacements:
+   deletions:
+   EOF
test_cmp expect actual
 '
 
@@ -32,51 +32,51 @@ test_expect_success 'add one file' '
: >one &&
git update-index --add one &&
git ls-files --stage >ls-files.actual &&
-   cat >ls-files.expect <ls-files.expect <<-EOF &&
+   100644 $EMPTY_BLOB 0one
+   EOF
test_cmp ls-files.expect ls-files.actual &&
 
test-dump-split-index .git/index | sed "/^own/d" >actual &&
-   cat >expect <expect <<-EOF &&
+   base $base
+   100644 $EMPTY_BLOB 0one
+   replacements:
+   deletions:
+   EOF
test_cmp expect actual
 '
 
 test_expect_success 'disable split index' '
git update-index --no-split-index &&
git ls-files --stage >ls-files.actual &&
-   cat >ls-files.expect <ls-files.expect <<-EOF &&
+   100644 $EMPTY_BLOB 0one
+   EOF
test_cmp ls-files.expect ls-files.actual &&
 
BASE=$(test-dump-split-index .git/index | grep "^own" | sed 
"s/own/base/") &&
test-dump-split-index .git/index | sed "/^own/d" >actual &&
-   cat >expect <expect <<-EOF &&
+   not a split index
+   EOF
test_cmp expect actual
 '
 
 test_expect_success 'enable split index again, "one" now belongs to base 
index"' '
git update-index --split-index &&
git ls-files --stage >ls-files.actual &&
-   cat >ls-files.expect <ls-files.expect <<-EOF &&
+   100644 $EMPTY_BLOB 0one
+   EOF
test_cmp ls-files.expect ls-files.actual &&
 
test-dump-split-index .git/index | sed "/^own/d" >actual &&
-   cat >expect <expect <<-EOF &&
+   $BASE
+   replacements:
+   deletions:
+   EOF
test_cmp expect actual
 '
 
@@ -84,18 +84,18 @@ test_expect_success 'modify original file, base index 
untouched' '
echo modified >one &&
git update-index one &&
git ls-files --stage >ls-files.actual &&
-   cat >ls-files.expect <ls-files.expect <<-EOF &&
+   100644 2e0996000b7e9019eabcad29391bf0f5c7702f0b 0   one
+   EOF
test_cmp ls-files.expect ls-files.actual &&
 
test-dump-split-index .git/index | sed "/^own/d" >actual &&
-   q_to_tab >expect <expect <<-EOF &&
+   $BASE
+   100644 2e0996000b7e9019eabcad29391bf0f5c7702f0b 0Q
+   replacements: 0
+   deletions:
+   EOF
test_cmp expect actual
 '
 
@@ -103,54 +103,54 @@ test_expect_success 'add another file, which stays index' 
'
: >two &&
git update-index --add two &&
git ls-files --stage >ls-files.actual &&
-   cat >ls-files.expect <ls-files.expect <<-EOF &&
+   100644 2e0996000b7e9019eabcad29391bf0f5c7702f0b 0   one
+   100644 $EMPTY_BLOB 0two
+   EOF
test_cmp ls-files.expect ls-files.actual &&
 
test-dump-split-index .git/index | sed "/^own/d" >actual &&
-   q_to_tab >expect <expect <<-EOF &&
+   $BASE
+   100644 2e0996000b7e9019eabcad29391bf0f5c7702f0b 0Q
+   100644 $EMPTY_BLOB 0two
+   replacements: 0
+   deletions:
+   EOF
test_cmp expect actual
 '
 
 test_expect_success 'remove file not in base index' '
git update-index --force-remove two &&
git ls-files --stage >ls-files.actual &&
-   cat >ls-files.expect <ls-files.expect <<-EOF &&
+   100644 2e0996000b7e9019eabcad29391bf0f5c7702f0b 0   one
+   EOF
test_cmp ls-files.expect ls-files.actual &&
 
test-dump-split-index .git/index | sed "/^own/d" >actual &&
-   q_to_tab >expect <expect <<-EOF &&
+   $BASE
+   10

[PATCH v4 10/22] config: add git_config_get_max_percent_split_change()

2017-02-27 Thread Christian Couder
This new function will be used in a following commit to get the
value of the "splitIndex.maxPercentChange" config variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h  |  1 +
 config.c | 15 +++
 2 files changed, 16 insertions(+)

diff --git a/cache.h b/cache.h
index 014aa7ea11..955e80913e 100644
--- a/cache.h
+++ b/cache.h
@@ -1883,6 +1883,7 @@ extern int git_config_get_maybe_bool(const char *key, int 
*dest);
 extern int git_config_get_pathname(const char *key, const char **dest);
 extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
+extern int git_config_get_max_percent_split_change(void);
 
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
diff --git a/config.c b/config.c
index 2a97696be7..35b6f02960 100644
--- a/config.c
+++ b/config.c
@@ -1746,6 +1746,21 @@ int git_config_get_split_index(void)
return -1; /* default value */
 }
 
+int git_config_get_max_percent_split_change(void)
+{
+   int val = -1;
+
+   if (!git_config_get_int("splitindex.maxpercentchange", )) {
+   if (0 <= val && val <= 100)
+   return val;
+
+   return error(_("splitIndex.maxPercentChange value '%d' "
+  "should be between 0 and 100"), val);
+   }
+
+   return -1; /* default value */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
-- 
2.12.0.22.g0672473d40



[PATCH v4 06/22] update-index: warn in case of split-index incoherency

2017-02-27 Thread Christian Couder
When users are using `git update-index --(no-)split-index`, they
may expect the split-index feature to be used or not according to
the option they just used, but this might not be the case if the
new "core.splitIndex" config variable has been set. In this case
let's warn about what will happen and why.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/update-index.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 24fdadfa4b..d74d72cc7f 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1099,12 +1099,21 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
}
 
if (split_index > 0) {
+   if (git_config_get_split_index() == 0)
+   warning(_("core.splitIndex is set to false; "
+ "remove or change it, if you really want to "
+ "enable split index"));
if (the_index.split_index)
the_index.cache_changed |= SPLIT_INDEX_ORDERED;
else
add_split_index(_index);
-   } else if (!split_index)
+   } else if (!split_index) {
+   if (git_config_get_split_index() == 1)
+   warning(_("core.splitIndex is set to true; "
+ "remove or change it, if you really want to "
+ "disable split index"));
remove_split_index(_index);
+   }
 
switch (untracked_cache) {
case UC_UNSPECIFIED:
-- 
2.12.0.22.g0672473d40



[PATCH v4 05/22] read-cache: add and then use tweak_split_index()

2017-02-27 Thread Christian Couder
This will make us use the split-index feature or not depending
on the value of the "core.splitIndex" config variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index 9054369dd0..99bc274b8d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1558,10 +1558,27 @@ static void tweak_untracked_cache(struct index_state 
*istate)
}
 }
 
+static void tweak_split_index(struct index_state *istate)
+{
+   switch (git_config_get_split_index()) {
+   case -1: /* unset: do nothing */
+   break;
+   case 0: /* false */
+   remove_split_index(istate);
+   break;
+   case 1: /* true */
+   add_split_index(istate);
+   break;
+   default: /* unknown value: do nothing */
+   break;
+   }
+}
+
 static void post_read_index_from(struct index_state *istate)
 {
check_ce_order(istate);
tweak_untracked_cache(istate);
+   tweak_split_index(istate);
 }
 
 /* remember to discard_cache() before reading a different cache! */
-- 
2.12.0.22.g0672473d40



[PATCH v4 08/22] Documentation/config: add information for core.splitIndex

2017-02-27 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 015346c417..61a863adeb 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -334,6 +334,10 @@ core.trustctime::
crawlers and some backup systems).
See linkgit:git-update-index[1]. True by default.
 
+core.splitIndex::
+   If true, the split-index feature of the index will be used.
+   See linkgit:git-update-index[1]. False by default.
+
 core.untrackedCache::
Determines what to do about the untracked cache feature of the
index. It will be kept, if this variable is unset or set to
-- 
2.12.0.22.g0672473d40



[PATCH v4 07/22] t1700: add tests for core.splitIndex

2017-02-27 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 37 +
 1 file changed, 37 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index cb68b8dc1e..1659986d8d 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -200,4 +200,41 @@ test_expect_success 'unify index, two files remain' '
test_cmp expect actual
 '
 
+test_expect_success 'set core.splitIndex config variable to true' '
+   git config core.splitIndex true &&
+   : >three &&
+   git update-index --add three &&
+   git ls-files --stage >ls-files.actual &&
+   cat >ls-files.expect <<-EOF &&
+   100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   one
+   100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   three
+   100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   two
+   EOF
+   test_cmp ls-files.expect ls-files.actual &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   $BASE
+   replacements:
+   deletions:
+   EOF
+   test_cmp expect actual
+'
+
+test_expect_success 'set core.splitIndex config variable to false' '
+   git config core.splitIndex false &&
+   git update-index --force-remove three &&
+   git ls-files --stage >ls-files.actual &&
+   cat >ls-files.expect <<-EOF &&
+   100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   one
+   100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   two
+   EOF
+   test_cmp ls-files.expect ls-files.actual &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <<-EOF &&
+   not a split index
+   EOF
+   test_cmp expect actual
+'
+
 test_done
-- 
2.12.0.22.g0672473d40



[PATCH v4 09/22] Documentation/git-update-index: talk about core.splitIndex config var

2017-02-27 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/git-update-index.txt | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/git-update-index.txt 
b/Documentation/git-update-index.txt
index 7386c93162..e091b2a409 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -171,6 +171,12 @@ may not support it yet.
given again, all changes in $GIT_DIR/index are pushed back to
the shared index file. This mode is designed for very large
indexes that take a significant amount of time to read or write.
++
+These options take effect whatever the value of the `core.splitIndex`
+configuration variable (see linkgit:git-config[1]). But a warning is
+emitted when the change goes against the configured value, as the
+configured value will take effect next time the index is read and this
+will remove the intended effect of the option.
 
 --untracked-cache::
 --no-untracked-cache::
-- 
2.12.0.22.g0672473d40



[PATCH v4 04/22] split-index: add {add,remove}_split_index() functions

2017-02-27 Thread Christian Couder
Also use the functions in cmd_update_index() in
builtin/update-index.c.

These functions will be used in a following commit to tweak
our use of the split-index feature depending on the setting
of a configuration variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/update-index.c | 18 ++
 split-index.c  | 22 ++
 split-index.h  |  2 ++
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index d530e89368..24fdadfa4b 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1099,18 +1099,12 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
}
 
if (split_index > 0) {
-   init_split_index(_index);
-   the_index.cache_changed |= SPLIT_INDEX_ORDERED;
-   } else if (!split_index && the_index.split_index) {
-   /*
-* can't discard_split_index(_index); because that
-* will destroy split_index->base->cache[], which may
-* be shared with the_index.cache[]. So yeah we're
-* leaking a bit here.
-*/
-   the_index.split_index = NULL;
-   the_index.cache_changed |= SOMETHING_CHANGED;
-   }
+   if (the_index.split_index)
+   the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+   else
+   add_split_index(_index);
+   } else if (!split_index)
+   remove_split_index(_index);
 
switch (untracked_cache) {
case UC_UNSPECIFIED:
diff --git a/split-index.c b/split-index.c
index 615f4cac05..f519e60f87 100644
--- a/split-index.c
+++ b/split-index.c
@@ -317,3 +317,25 @@ void replace_index_entry_in_base(struct index_state 
*istate,
istate->split_index->base->cache[new->index - 1] = new;
}
 }
+
+void add_split_index(struct index_state *istate)
+{
+   if (!istate->split_index) {
+   init_split_index(istate);
+   istate->cache_changed |= SPLIT_INDEX_ORDERED;
+   }
+}
+
+void remove_split_index(struct index_state *istate)
+{
+   if (istate->split_index) {
+   /*
+* can't discard_split_index(_index); because that
+* will destroy split_index->base->cache[], which may
+* be shared with the_index.cache[]. So yeah we're
+* leaking a bit here.
+*/
+   istate->split_index = NULL;
+   istate->cache_changed |= SOMETHING_CHANGED;
+   }
+}
diff --git a/split-index.h b/split-index.h
index c1324f521a..df91c1bda8 100644
--- a/split-index.h
+++ b/split-index.h
@@ -31,5 +31,7 @@ void merge_base_index(struct index_state *istate);
 void prepare_to_write_split_index(struct index_state *istate);
 void finish_writing_split_index(struct index_state *istate);
 void discard_split_index(struct index_state *istate);
+void add_split_index(struct index_state *istate);
+void remove_split_index(struct index_state *istate);
 
 #endif
-- 
2.12.0.22.g0672473d40



[PATCH v4 01/22] config: mark an error message up for translation

2017-02-27 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 config.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.c b/config.c
index c6b874a7bf..2ac1aa19b0 100644
--- a/config.c
+++ b/config.c
@@ -1728,8 +1728,8 @@ int git_config_get_untracked_cache(void)
if (!strcasecmp(v, "keep"))
return -1;
 
-   error("unknown core.untrackedCache value '%s'; "
- "using 'keep' default value", v);
+   error(_("unknown core.untrackedCache value '%s'; "
+   "using 'keep' default value"), v);
return -1;
}
 
-- 
2.12.0.22.g0672473d40



[PATCH v4 03/22] config: add git_config_get_split_index()

2017-02-27 Thread Christian Couder
This new function will be used in a following commit to know
if we want to use the split index feature or not.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h  |  1 +
 config.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/cache.h b/cache.h
index 61fc86e6d7..014aa7ea11 100644
--- a/cache.h
+++ b/cache.h
@@ -1882,6 +1882,7 @@ extern int git_config_get_bool_or_int(const char *key, 
int *is_bool, int *dest);
 extern int git_config_get_maybe_bool(const char *key, int *dest);
 extern int git_config_get_pathname(const char *key, const char **dest);
 extern int git_config_get_untracked_cache(void);
+extern int git_config_get_split_index(void);
 
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
diff --git a/config.c b/config.c
index 2ac1aa19b0..2a97696be7 100644
--- a/config.c
+++ b/config.c
@@ -1736,6 +1736,16 @@ int git_config_get_untracked_cache(void)
return -1; /* default value */
 }
 
+int git_config_get_split_index(void)
+{
+   int val;
+
+   if (!git_config_get_maybe_bool("core.splitindex", ))
+   return val;
+
+   return -1; /* default value */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
-- 
2.12.0.22.g0672473d40



[ANNOUNCE] Git Rev News edition 24

2017-02-23 Thread Christian Couder
Hi everyone,

The 24th edition of Git Rev News is now published:

https://git.github.io/rev_news/2017/02/22/edition-24/

Thanks a lot to all the contributors and helpers!

Enjoy,
Christian, Thomas, Jakub and Markus.


Draft of Git Rev News edition 24

2017-02-19 Thread Christian Couder
Hi,

A draft of a new Git Rev News edition is available here:

  https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-24.md

Everyone is welcome to contribute in any section either by editing the
above page on GitHub and sending a pull request, or by commenting on
this GitHub issue:

  https://github.com/git/git.github.io/issues/221

You can also reply to this email.

In general all kinds of contribution, for example proofreading,
suggestions for articles or links, help on the issues in GitHub, and
so on, are very much appreciated.

I tried to cc everyone who appears in this edition, but maybe I missed
some people, sorry about that.

Thomas, Jakub, Markus and myself plan to publish this edition on
Wednesday February 22.

Thanks,
Christian.


Re: Git bisect does not find commit introducing the bug

2017-02-19 Thread Christian Couder
On Sun, Feb 19, 2017 at 12:32 PM, Alex Hoffman  wrote:
>> At the end of the git-bisect man page (in the SEE ALSO section) there
>> is a link to 
>> https://github.com/git/git/blob/master/Documentation/git-bisect-lk2009.txt
>> which has a lot of details about how bisect works.
>
> Thanks for pointing out the SEE ALSO section. I think it makes sense
> to include/describe the entire algorithm in the man page itself,
> although I am not sure whether the graphs would be always correctly
> visually represented in the man page format.

It would possibly be very long to describe the entire algorithm, as it
can be quite complex in some cases and it is difficult to understand
without graphs. Maybe we could describe it, or some parts of it, in a
separate document and provide links at different places in the man
page.
Anyway feel free to send patches.

>> The goal is to find the first bad commit, which is a commit that has
>> only good parents.
>
> OK, bisect's mission is more exact than I thought, which is good. M

Good that you seem to agree with this goal.

>> As o1 is an ancestor of G, then o1 is considered good by the bisect 
>> algorithm.
>> If it was bad, it would means that there is a transition from bad to
>> good between o1 and G.
>> But when a good commit is an ancestor of the bad commit, git bisect
>> makes the assumption that there is no transition from bad to good in
>> the graph.
>
> The assumption that there is no transition from bad to good in the
> graph did not hold in my example and it does not hold when a feature
> was recently introduced and gets broken relative shortly afterwards.
> But I consider it is easy to change the algorithm not to assume, but
> rather to check it.

I don't think the default algorithm will change soon, but there have
been discussions for a long time about adding options to use different
algorithms.

For example people have been discussing a "--first-parent" option for
many years as well as recently. It would bisect only along the first
parents of the involved commits, and it could help find the merge
commit that introduced a bug in the mainline.

>> git bisect makes some assumptions that are true most of the time, so
>> in practice it works well most of the time.
>
> Whatever the definition of "most of the time" everyone might have, I
> think there is room for improvement.

So feel free to send patches that would implement an option with the
improvements you want.

> Below I am trying to make a small
> change to the current algorithm in order to deal with the assumption
> that sometimes does not hold (e.g in my example), by explicitly
> validating the check.
>
>> --o1--o2--o3--G--X1
>> \\
>>  x1--x2--x3--x4--X2--B--
>>   \  /
>>y1--y2--y3
>
> Step 1a. (Unchanged) keep only the commits that:
>
> a) are ancestor of the "bad" commit (including the "bad" commit 
> itself),
> b) are not ancestor of a "good" commit (excluding the "good" commits).
>
> The following graph results:
>   x1--x2--x3--x4--X2--B--
>\  /
> y1--y2--y3

I would say that the above graph is missing X1.

> Step 1b. (New) Mark all root commits of the resulting graph (i.e
> commits without parents) as unconfirmed (unconfirmed=node that has
> only bad parents). Remove all root commits that user already confirmed
> (e.g if user already marked its parent as good right before starting
> bisect run). For every unconfirmed root commit check if it has any
> good parents. In the example above check whether x1 has good parents.

I think I understand the above...

>  If the current root element has any parents and none of them is
> good, we can delete all paths from it until to the next commit that
> has a parent in the ancestors of GOOD. In the example above to delete
> the path x1-x3 and x1-y3. Also add new resulting root commits to the
> list of unconfirmed commits (commit x4).
>  Otherwise mark it as confirmed.

... but I don't understand the logic of the above. If the root element
has a bad parent, then it means that the "first bad commit" is either
the bad parent or one of its ancestors, so it is not logical to delete
it. In your example if x1 has one bad parent, this bad parent and its
ancestors should be included in the search of the first bad commit.

Otherwise the goal is not any more to find the first bad commit.

PS: I saw that you have just sent another version of the algorithm,
but I don't want to take a look at it right now. Anyway I am keeping
my above comments as they might still be useful.

> Step2. Continue the existing algorithm.
>
>
> If this improvement works (i.e you do not find any bugs in it and it
> is feasible to implement, which seems to me)

As you describe it, I don't think it is compatible with the goal of
finding the first bad commit.
Also there are many things that are feasible to implement, but it
doesn't mean that someone will soon make the effort to implement them
in a 

Re: Git bisect does not find commit introducing the bug

2017-02-18 Thread Christian Couder
On Sat, Feb 18, 2017 at 7:36 PM, Alex Hoffman  wrote:
>> But this is not how Git works. Git computes graph differences, i.e., it
>> subtracts from the commits reachable from v.bad those that are reachable
>> from v.good. This leaves more than just those on some path from v.good to
>> v.bad. And it should work this way. Consider this history:
>>
>> --o--o--o--G--X
>>\   \
>> x--x--x--x--X--B--
>>
>> When you find B bad and G good, than any one of the x or X may have
>> introduced the breakage, not just one of the X.
>>
>
> Thank you for clarifying how git bisect works. How did you find that
> out? Did you check the source code? If that is not documented in the
> man page may be it worth documenting it in order to avoid future
> confusion for other users.

At the end of the git-bisect man page (in the SEE ALSO section) there
is a link to 
https://github.com/git/git/blob/master/Documentation/git-bisect-lk2009.txt
which has a lot of details about how bisect works.

> Let's consider your example with distinct names for nodes:
>
> --o1--o2--o3--G--X1
> \\
>  x1--x2--x3--x4--X1--B--
>
> It makes sense that git bisect is expecting a single transition, as
> this is a precondition for a binary search to work. My definition of
> "the transition" is a commit with at least one of its parents as a
> good version, but the commit itself a bad version.

What if a commit C has one good parent A and one bad parent B?
Isn't it more likely that the first bad commit is B instead of C?

> I hope we agree
> that git bisect's mission is to search for this transition (as I
> suppose that most of people need such a functionality from git, if not
> exactly from git bisect).

The goal is to find the first bad commit, which is a commit that has
only good parents.

> How could be x1 or x3 be the transition, if
> chances are that o1 is not a good version?

As o1 is an ancestor of G, then o1 is considered good by the bisect algorithm.
If it was bad, it would means that there is a transition from bad to
good between o1 and G.
But when a good commit is an ancestor of the bad commit, git bisect
makes the assumption that there is no transition from bad to good in
the graph.

> Of course it would make
> sense to me if bisect would check o1 whether good and only then to
> check also x1-x3, but this is not what git makes (at least not in my
> initial example).
>
> If you consider that git bisect's mission is different from finding
> the transition, could you please explain what exact commit git bisect
> is supposed to return (ideally with terms from the graph theory) and
> when it makes sense to return that? Because I do not see any sense in
> looking in the path x1-x3 without knowing that those commits may be a
> transition.

I hope it makes sense with the above explanations.

>> Oh, IMO git bisect was well thought through. If it considered just paths
>> from good to bad, it would not given the correct answer. See the example
>> history above. Bisect authors would not have deemed that sufficiently good
>
> You definitely convinced me that git MUST search more than only in the
> paths between good and bad commits, as the good commit G does not have
> to be the first good commit (thank you for that). My problem/confusion
> is that it returns something that does not make sense to me, because
> it does not make sure it returns a transition.

git bisect makes some assumptions that are true most of the time, so
in practice it works well most of the time.


Re: [PATCH] show-branch: fix crash with long ref name

2017-02-16 Thread Christian Couder
On Wed, Feb 15, 2017 at 10:40 PM, Jeff King <p...@peff.net> wrote:
> On Tue, Feb 14, 2017 at 10:29:46PM +0100, Christian Couder wrote:
>
>> > I notice Christian's patch added a few tests. I don't know if we'd want
>> > to squash them in (I didn't mean to override his patch at all; I was
>> > about to send mine out when I noticed his, and I wondered if we wanted
>> > to combine the two efforts).
>>
>> I think it would be nice to have at least one test. Feel free to
>> squash mine if you want.
>
> I started to add some tests, but I had second thoughts. It _is_ nice
> to show off the fix, but as far as regressions go, this specific case is
> unlikely to come up again. What would be more valuable, I think, is a
> test script which set up a very long refname (not just 150 bytes or
> whatever) and ran it through a series of git commands.

I agree that a test script running through a series of command with
long refnames would be great.

But I think the refname should not necesarily be too long. As I wrote
in the commit message of my patch, if the ref name had been much
longer the crash would not have happened because the ref could not
have been created in the first place.

So the best would be to run through a series of commands with a
refname ranging from let's say 80 chars to 300 chars.

That would have a chance to catch crashes due to legacy code using for
example things like `char stuff[128]` or `char stuff[256]`.

Implementing those tests could have started with something like the
test case I sent, but as it would in the end be about many different
commands, one can see it as part of a different topic.

> But then you run into all sorts of portability annoyances with pathname
> restrictions (you can hack around creation by writing the refname
> directly into packed-refs, but most manipulations will want to take the
> .lock in the filesystem).

Yeah, but if a crash doesn't happen because we die() as the ref is too
long for the file system, we could detect that and make the test
succeed.

> So I dunno. It seems like being thorough is a
> lot of hassle for not much gain. Being not-thorough is easy, but is
> mostly a token that is unlikely to find any real bugs.

Yeah, if we really care, it might be better to start using a fuzzer or
a property based testing tool instead of bothering with these kind of
tests by ourselves, which is also a different topic.

> So I punted, at least for now.

Ok, no problem.


Re: [git-for-windows] Re: Continuous Testing of Git on Windows

2017-02-14 Thread Christian Couder
On Tue, Feb 14, 2017 at 10:08 PM, Junio C Hamano  wrote:
> Johannes Schindelin  writes:
>
>> On Mon, 13 Feb 2017, Junio C Hamano wrote:
>>
>>> Johannes Schindelin  writes:
>>>
>>> > That is why I taught the Git for Windows CI job that tests the four
>>> > upstream Git integration branches to *also* bisect test breakages and
>>> > then upload comments to the identified commit on GitHub
>>>
>>> Good.  I do not think it is useful to try 'pu' as an aggregate and
>>> expect it to always build and work [*1*], but your "bisect and
>>> pinpoint" approach makes it useful to identify individual topic that
>>> brings in a breakage.
>>
>> Sadly the many different merge bases[*1*] between `next` and `pu` (which
>> are the obvious good/bad points for bisecting automatically) bring my
>> build agents to its knees. I may have to disable the bisecting feature as
>> a consequence.

Yeah, this is a bug in the bisect algorithm. Fixing it is in the GSoC
2017 Ideas.

> Probably a less resource intensive approach is to find the tips of
> the topics not in 'next' but in 'pu' and test them.  That would give
> you which topic(s) are problematic, which is a better starting point
> than "Oh, 'pu' does not build".  After identifying which branch is
> problematic, bisection of individual topic would be of more manageable
> size.

It is still probably more resource intensive than it couls be.

[...]

> This is one of these times I wish "git bisect --first-parent" were
> available.

Implementing "git bisect --first-parent" is also part of the GSoC 2017 Ideas.

By the way it should not be very difficult as a patch to do this and
more was proposed a long time ago:

https://public-inbox.org/git/4d3cddf9.6080...@intel.com/

> The above "log" gives me 27 merges right now, which
> should be bisectable within 5 rounds to identify a single broken
> topic (if there is only one breakage, that is).


Re: [PATCH] show-branch: fix crash with long ref name

2017-02-14 Thread Christian Couder
On Tue, Feb 14, 2017 at 8:55 PM, Jeff King  wrote:
> On Tue, Feb 14, 2017 at 11:35:48AM -0800, Junio C Hamano wrote:
>
>> Jeff King  writes:
>>
>> > This fixes the problem, but I think we can simplify it quite a bit by
>> > using resolve_refdup(). Here's the patch series I ended up with:
>> >
>> >   [1/3]: show-branch: drop head_len variable
>> >   [2/3]: show-branch: store resolved head in heap buffer
>> >   [3/3]: show-branch: use skip_prefix to drop magic numbers

Yeah, I noticed there were a number of things that could be improved
in the area, but I didn't want to spend too much time on this, so
thanks for this series.

>> Yes, the whole thing is my fault ;-) and I agree with what these
>> patches do.
>>
>> The second one lacks free(head) but I think that is OK; it is
>> something we allocate in cmd_*() and use pretty much thruout the
>> rest of the program.
>
> Yes, I actually tested the whole thing under ASAN (which was necessary
> to notice the problem), which complained about the leak. I don't mind
> adding a free(head), but there are a bunch of similar "leaks" in that
> function, so I didn't bother.

Yeah, I didn't bother either.

> I notice Christian's patch added a few tests. I don't know if we'd want
> to squash them in (I didn't mean to override his patch at all; I was
> about to send mine out when I noticed his, and I wondered if we wanted
> to combine the two efforts).

I think it would be nice to have at least one test. Feel free to
squash mine if you want.


[PATCH] show-branch: fix crash with long ref name

2017-02-14 Thread Christian Couder
This is a minimum fix for a crash with a long ref name.

Note that if the refname is too long (for example if you
use 100 instead of 50 in the test script) you could get
an error like:

fatal: cannot lock ref 'refs/heads/1_2_3_4_ ... _98_99_100_':
Unable to create '... /.git/refs/heads/1_2_3_4_ ... _98_99_100_.lock':
File name too long

when creating the ref instead of a crash when using
show-branch and that would be ok.

So a simpler fix could have been just something like:

-   char head[128];
+   char head[1024];

But if the filesystem ever allows filenames longer than 1024
characters then the crash could appear again.

Reported by: Maxim Kuvyrkov <maxim.kuvyr...@linaro.org>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/show-branch.c  | 14 +++---
 t/t3204-show-branch-refname.sh | 19 +++
 2 files changed, 26 insertions(+), 7 deletions(-)
 create mode 100755 t/t3204-show-branch-refname.sh

diff --git a/builtin/show-branch.c b/builtin/show-branch.c
index 974f3403ab..3c0fe55eec 100644
--- a/builtin/show-branch.c
+++ b/builtin/show-branch.c
@@ -620,7 +620,7 @@ int cmd_show_branch(int ac, const char **av, const char 
*prefix)
int all_heads = 0, all_remotes = 0;
int all_mask, all_revs;
enum rev_sort_order sort_order = REV_SORT_IN_GRAPH_ORDER;
-   char head[128];
+   char *head_cpy;
const char *head_p;
int head_len;
struct object_id head_oid;
@@ -791,11 +791,11 @@ int cmd_show_branch(int ac, const char **av, const char 
*prefix)
head_oid.hash, NULL);
if (head_p) {
head_len = strlen(head_p);
-   memcpy(head, head_p, head_len + 1);
+   head_cpy = xstrdup(head_p);
}
else {
head_len = 0;
-   head[0] = 0;
+   head_cpy = xstrdup("");
}
 
if (with_current_branch && head_p) {
@@ -804,15 +804,15 @@ int cmd_show_branch(int ac, const char **av, const char 
*prefix)
/* We are only interested in adding the branch
 * HEAD points at.
 */
-   if (rev_is_head(head,
+   if (rev_is_head(head_cpy,
head_len,
ref_name[i],
head_oid.hash, NULL))
has_head++;
}
if (!has_head) {
-   int offset = starts_with(head, "refs/heads/") ? 11 : 0;
-   append_one_rev(head + offset);
+   int offset = starts_with(head_cpy, "refs/heads/") ? 11 
: 0;
+   append_one_rev(head_cpy + offset);
}
}
 
@@ -865,7 +865,7 @@ int cmd_show_branch(int ac, const char **av, const char 
*prefix)
if (1 < num_rev || extra < 0) {
for (i = 0; i < num_rev; i++) {
int j;
-   int is_head = rev_is_head(head,
+   int is_head = rev_is_head(head_cpy,
  head_len,
  ref_name[i],
  head_oid.hash,
diff --git a/t/t3204-show-branch-refname.sh b/t/t3204-show-branch-refname.sh
new file mode 100755
index 00..83b64e3032
--- /dev/null
+++ b/t/t3204-show-branch-refname.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='test show-branch with long refname'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+
+   test_commit first &&
+   long_refname=$(printf "%s_" $(seq 1 50)) &&
+   git checkout -b "$long_refname"
+'
+
+test_expect_success 'show-branch with long refname' '
+
+   git show-branch first
+'
+
+test_done
-- 
2.12.0.rc0



Re: [BUG] Memory corruption crash with "git bisect start"

2017-02-14 Thread Christian Couder
On Tue, Feb 14, 2017 at 12:56 PM, Maxim Kuvyrkov
 wrote:
> I'm seeing the following memory corruption crash on a script-constructed repo 
> when starting git bisect.  I'm seeing this crash both with system git of 
> Ubuntu Xenial and with freshly-compiled git master from the official repo.
>
> The log of the crash is attached.

Yeah, thanks for the report.

I took a look and it looks like git bisect crashes when it runs git
show-branch and it can be reproduced with just `git show-branch
bottom` instead of `git bisect start bottom top`.

> It is possible that the bug is in Xenial glibc, in which case -- please let 
> me know and I will take it up with the glibc developers.

I have Ubuntu GLIBC 2.23-0ubuntu5 and I get a crash too.


Re: [RFH] Request for Git Merge 2017 impressions for Git Rev News

2017-02-13 Thread Christian Couder
Hi,

On Sat, Feb 11, 2017 at 5:33 PM, Jakub Narębski  wrote:
> Hello,
>
> Git Rev News #24 is planned to be released on February 15. It is meant
> to cover what happened during the month of January 2017 (and earely
> February 2017) and the Git Merge 2017 conference that happened on
> February 2nd and 3rd 2017.

Yeah, I would have liked to release Git Rev News #24 on February 15
but I was busy and tired this week end, so let it be on Wednesday
February 22.

> A draft of a new Git Rev News edition is available here:
>
>   
> https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-24.md
>
> I would like to ask everyone who attended the conference (and the
> GitMerge 2017 Contributors’s Summit day before it), or watched it live
> at http://git-merge.com/watch to write his or her impressions.
>
> You can contribute either by replying to this email, or by editing the
> above page on GitHub and sending a pull request, or by commenting on
> the following GitHub issue about Git Rev News 24:
>
>   https://github.com/git/git.github.io/issues/221
>
> If you prefer to post on your own blog (or if you have did it
> already), please send an URL.

Yeah, any material (link, short impression, article, ...) would be very nice.
Thanks Jakub for the links you already added to the draft, by the way!

> P.S. I wonder if there should be not a separate section on
> https://git.github.io/ about recollection from various Git-related
> events, with Git Merge 2017 as the first one.  This way we can wait
> for later response, and incorporate videos and slides from events, as
> they begin to be available.

Jakub, if you are willing to create and maintain this section, that
would be great!

> P.P.S. Please distribute this information more widely.

Thanks,
Christian.


Re: Git status performance on PS (command prompt)

2017-02-12 Thread Christian Couder
On Sun, Feb 12, 2017 at 5:46 PM, brian m. carlson
 wrote:
> On Sun, Feb 12, 2017 at 04:53:47PM +0100, Mark Gaiser wrote:

[...]

>> I did a bit of profiling in git to figure out where this slowdown comes from.
>> Callgrind tells me that "read_directory_recursive" takes up ~62% of the time.
>> Within that call the function "last_exclude_matching_from_list" takes
>> up 49% of the time it takes to run "git status --porcelain -b".

The core.untrackedCache config option may help.


Re: Non-zero exit code without error

2017-02-11 Thread Christian Couder
On Wed, Feb 8, 2017 at 11:26 AM, Serdar Sahin  wrote:
> Hi Christian,
>
>
> We are using a private repo (Github Enterprise).

Maybe you could try 'git fast-export --anonymize ...' on it.

> Let me give you the
> details you requested.
>
>
> On Git Server: git version 2.6.5.1574.g5e6a493
>
> On my client: git version 2.10.1 (Apple Git-78)
>
>
> I’ve tried to reproduce it with public repos, but couldn’t do so.

You might try using the latest released version (2.11.1).

For example you could install the last version on the client and then
clone from the server with --bare and use this bare repo as if it was
the server.

You could also try `git fsck` to see if there are problems on your repo.

Are there submodules or something a bit special?

In the end it's difficult for us to help if we cannot reproduce, so
your best bet might be to debug yourself using gdb for example.

> If I
> could get an error/log output, that would be sufficient.
>
>
> I am also including the full output below. (also git gc)
>
>
> MacOSX:test serdar$ git clone --mirror --depth 50 --no-single-branch
> g...@git.privateserver.com:Casual/code_repository.git

You could also try with different options above...

> Cloning into bare repository 'code_repository.git'...
>
> remote: Counting objects: 3362, done.
>
> remote: Compressing objects: 100% (1214/1214), done.
>
> remote: Total 3362 (delta 2335), reused 2968 (delta 2094), pack-reused 0
>
> Receiving objects: 100% (3362/3362), 56.77 MiB | 1.83 MiB/s, done.
>
> Resolving deltas: 100% (2335/2335), done.
>
> MacOSX:test serdar$ cd code_repository.git/
>
> MacOSX:code_repository.git serdar$ GIT_CURL_VERBOSE=1 GIT_TRACE=1  git
> fetch --depth 50 origin cc086c96cdffe5c1ac78e6139a7a4b79e7c821ee

... and above.

Also it looks like you use ssh so something like GIT_SSH_COMMAND="ssh
-vv" might help more than GIT_CURL_VERBOSE=1

> 13:23:15.648337 git.c:350   trace: built-in: git 'fetch'
> '--depth' '50' 'origin' 'cc086c96cdffe5c1ac78e6139a7a4b79e7c821ee'
>
> 13:23:15.651127 run-command.c:336   trace: run_command: 'ssh'
> 'g...@git.privateserver.com' 'git-upload-pack
> '\''Casual/code_repository.git'\'''
>
> 13:23:17.750015 run-command.c:336   trace: run_command: 'gc' '--auto'
>
> 13:23:17.750829 exec_cmd.c:189  trace: exec: 'git' 'gc' '--auto'
>
> 13:23:17.753983 git.c:350   trace: built-in: git 'gc' '--auto'
>
> MacOSX:code_repository.git serdar$ echo $?
>
> 1
>
> MacOSX:code_repository.git serdar$ GIT_CURL_VERBOSE=1 GIT_TRACE=1 git gc 
> --auto
>
> 13:23:45.899038 git.c:350   trace: built-in: git 'gc' '--auto'
>
> MacOSX:code_repository.git serdar$ echo $?
>
> 0


Re: GSoC 2017: application open, deadline = February 9, 2017

2017-02-09 Thread Christian Couder
On Thu, Feb 9, 2017 at 1:22 PM, Matthieu Moy
 wrote:
> Matthieu Moy  writes:
>
>> I created a Git organization and invited you + Peff as admins.

Great thanks!
I accepted the invite.

> I'll
>> start cut-and-pasting to show my good faith ;-).
>
> I created this page based on last year's:
>
> https://git.github.io/SoC-2017-Org-Application/
>
> I filled-in the "org profile". "Org application" is still TODO.

I copy pasted the Org application from last year, so I think we are good.


Re: GSoC 2017: application open, deadline = February 9, 2017

2017-02-09 Thread Christian Couder
On Thu, Feb 9, 2017 at 11:28 AM, Siddharth Kannan
 wrote:
> On 9 February 2017 at 15:45, Matthieu Moy  
> wrote:
>>
>> A non-quoted but yet important part of my initial email was:
>>
>> | So, as much as possible, I'd like to avoid being an org admin this
>> | year. It's not a lot of work (much, much less than being a mentor!),
>> | but if I manage to get some time to work for Git, I'd rather do that
>> | on coding and reviewing this year.
>>
>> and for now, no one stepped in to admin.
>
> I would like to point everyone to this reply from Jeff King on the
> original post: [1]
> (In case this was lost in the midst of other emails) It sounds like
> Jeff King is okay
> with taking up the "admin" role.
>
> I do not mind doing the administrative stuff.  But the real work is in
> polishing up the ideas list and microprojects page. So I think the first
> step, if people are interested in GSoC, is not just to say "yes, let's
> do it", but to actually flesh out these pages:

Yeah it was also my impression based on the above that Peff would be
ok to take up the admin role.

Now if he doesn't want for some reason to take it, then I am ok with
us not applying, but again it would have been better to be clearer
about that before the eve of the deadline.


Re: GSoC 2017: application open, deadline = February 9, 2017

2017-02-09 Thread Christian Couder
On Thu, Feb 9, 2017 at 11:15 AM, Matthieu Moy
<matthieu@grenoble-inp.fr> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> On Wed, Feb 8, 2017 at 3:54 PM, Matthieu Moy
>> <matthieu@grenoble-inp.fr> wrote:
>>> Jeff King <p...@peff.net> writes:
>>>
>>>> On Mon, Jan 23, 2017 at 04:02:02PM +0100, Matthieu Moy wrote:
>>>>
>>>>> * We need to write the application, i.e. essentially polish and update
>>>>>   the text here: https://git.github.io/SoC-2016-Org-Application/ and
>>>>>   update the list of project ideas and microprojects :
>>>>>   https://git.github.io/SoC-2017-Ideas/
>>>>>   https://git.github.io/SoC-2016-Microprojects/
>>>>
>>>> That can be done incrementally by people who care (especially mentors)
>>>> over the next week or so, and doesn't require any real admin
>>>> coordination. If it happens and the result looks good, then the
>>>> application process is pretty straightforward.
>>>>
>>>> If it doesn't, then we probably ought not to participate in GSoC.
>>>
>>> OK, it seems the last message did not raise a lot of enthousiasm (unless
>>> I missed some off-list discussion at Git-Merge?).
>>
>> I think having 2 possible mentors or co-mentors still shows some
>> enthousiasm even if I agree it's unfortunate there is not more
>> enthousiasm.
>
> A non-quoted but yet important part of my initial email was:
>
> | So, as much as possible, I'd like to avoid being an org admin this
> | year. It's not a lot of work (much, much less than being a mentor!),
> | but if I manage to get some time to work for Git, I'd rather do that
> | on coding and reviewing this year.
>
> and for now, no one stepped in to admin.

Well Peff wrote in reply to your email:

> I did co-admin last year and the year before, but I made Matthieu do all
> the work. :)
>
> I do not mind doing the administrative stuff. But the real work is in
> polishing up the ideas list and microprojects page.

So I thought Peff would be ok to be the admin (do "the administrative stuff").

> Other non-negligible sources of work are reviewing microprojects and
> applications. Having a few more messages in this thread would have been
> a good hint that we had volunteers to do that.

I don't think emails in this thread is what really counts.
I worked on the Idea page starting some months ago, and as I wrote I
reviewed it again and found it not too bad.

>> Someone steps in to do what exactly?
>
> First we need an admin. Then as you said a bit of janitoring work on
> the web pages.

About the janitoring part, as I previously said I am reluctant to do
that as I don't know what Dscho would be ok to mentor.
And I also think it's not absolutely necessary to do it before
applying as an org.

If you just want Peff or someone else to apply, then please just say
it and hopefully Peff will do it and be the admin.


Re: GSoC 2017: application open, deadline = February 9, 2017

2017-02-09 Thread Christian Couder
On Wed, Feb 8, 2017 at 3:54 PM, Matthieu Moy
 wrote:
> Jeff King  writes:
>
>> On Mon, Jan 23, 2017 at 04:02:02PM +0100, Matthieu Moy wrote:
>>
>>> * We need to write the application, i.e. essentially polish and update
>>>   the text here: https://git.github.io/SoC-2016-Org-Application/ and
>>>   update the list of project ideas and microprojects :
>>>   https://git.github.io/SoC-2017-Ideas/
>>>   https://git.github.io/SoC-2016-Microprojects/
>>
>> That can be done incrementally by people who care (especially mentors)
>> over the next week or so, and doesn't require any real admin
>> coordination. If it happens and the result looks good, then the
>> application process is pretty straightforward.
>>
>> If it doesn't, then we probably ought not to participate in GSoC.
>
> OK, it seems the last message did not raise a lot of enthousiasm (unless
> I missed some off-list discussion at Git-Merge?).

I think having 2 possible mentors or co-mentors still shows some
enthousiasm even if I agree it's unfortunate there is not more
enthousiasm.

> The application deadline is tomorrow. I think it's time to admit that we
> won't participate this year, unless someone steps in really soon.

Someone steps in to do what exactly?

I just had a look and the microproject and idea pages for this year are ok.
They are not great sure, but not much worse than the previous years.
What should probably be done is to remove project ideas where is no
"possible mentor" listed.
But I am reluctant to do that as I don't know what Dscho would be ok to mentor.

Also please note that you sent this email just the day before the deadline.
I know that you sent a previous email three weeks ago, but people
easily forget this kind of deadline when they are not often reminded.
(And there is a school vacation is France right now so I am having a
vacation in Alps with unfortunately quite bad Internet access.)

> If we don't participate, I'll add a disclaimer at the top of the
> SoC-related pages on git.github.io to make sure students don't waste
> time preparing an application.

Please submit our application like this.

Thanks,
Christian.


Re: Non-zero exit code without error

2017-02-08 Thread Christian Couder
Hi,

On Tue, Feb 7, 2017 at 12:27 PM, Serdar Sahin  wrote:
> Hi,
>
> When we execute the following lines, the exit code is 1, but it is
> unclear what is the reason of this exit code. Do you have any idea?
>
> git clone --mirror --depth 50 --no-single-branch
> g...@github.hede.com:Casual/hodo-server.git

First, could you tell us the git version you are using on the client
and on the server, and if this a new problem with newer versions?
Also is the repos accessible publicly or is it possible to reproduce
on another repo?
And what happens using other protocols like HTTP/S?

> Cloning into bare repository 'hodo-server.git'...
> remote: Counting objects: 3371, done.
> remote: Compressing objects: 100% (1219/1219), done.
> remote: Total 3371 (delta 2344), reused 2971 (delta 2098), pack-reused 0
> Receiving objects: 100% (3371/3371), 56.77 MiB | 2.18 MiB/s, done.
> Resolving deltas: 100% (2344/2344), done.
>
> echo $?
> 0
>
> cd hodo-server.git/
>
> GIT_CURL_VERBOSE=1 GIT_TRACE=1  git fetch --depth 50 origin
> cc086c96cdffe5c1ac78e6139a7a4b79e7c821ee
> 14:12:35.215889 git.c:350   trace: built-in: git 'fetch'
> '--depth' '50' 'origin' 'cc086c96cdffe5c1ac78e6139a7a4b79e7c821ee'
> 14:12:35.217273 run-command.c:336   trace: run_command: 'ssh'
> 'g...@github.hede.com' 'git-upload-pack '\''Casual/hodo-server.git'\'''
> 14:12:37.301122 run-command.c:336   trace: run_command: 'gc' '--auto'
> 14:12:37.301866 exec_cmd.c:189  trace: exec: 'git' 'gc' '--auto'
> 14:12:37.304473 git.c:350   trace: built-in: git 'gc' '--auto'
>
> echo $?
> 1

What happens if you just run 'git gc --auto' after that?


Re: Git clonebundles

2017-02-05 Thread Christian Couder
On Sat, Feb 4, 2017 at 6:39 PM, Shawn Pearce  wrote:
> On Mon, Jan 30, 2017 at 11:00 PM, Stefan Saasen  wrote:
>>
>> Bitbucket recently added support for Mercurial’s clonebundle extension
>> (http://gregoryszorc.com/blog/2015/10/22/cloning-improvements-in-mercurial-3.6/).
>> Mercurial’s clone bundles allow the Mercurial client to seed a repository 
>> using
>> a bundle file instead of dynamically generating a bundle for the client.
> ...
>> Prior art
>> ~
>>
>> Our proof-of-concept is built on top of ideas that have been
>> circulating for a while. We are aware of a number of proposed changes
>> in this space:
>>
>>
>> * Jeff King's work on network bundles:
>> https://github.com/peff/git/commit/17e2409df37edd0c49ef7d35f47a7695f9608900
>> * Nguyễn Thái Ngọc Duy's work on "[PATCH 0/8] Resumable clone
>> revisited, proof of concept":
>> https://www.spinics.net/lists/git/msg267260.html
>> * Resumable clone work by Kevin Wern:
>> https://public-inbox.org/git/1473984742-12516-1-git-send-email-kevin.m.w...@gmail.com/
>
> I think you missed the most common deployment of prior art, which is
> Android using the git-repo tool[1]. The git-repo tool has had
> clone.bundle support since Sep 2011[2] and the Android Git servers
> have been answering /clone.bundle requests[3] since just before that.
> The bundle files are generated with `git bundle create` on a regular
> schedule by cron.
>
> [1] 
> https://gerrit.googlesource.com/git-repo/+/04071c1c72437a930db017bd4c562ad06087986a/project.py#2091
> [2] 
> https://gerrit.googlesource.com/git-repo/+/f322b9abb4cadc67b991baf6ba1b9f2fbd5d7812
> [3] https://android.googlesource.com/platform/frameworks/base/clone.bundle

There is also Junio's work on Bundle v3 that was unfortunately
recently discarded.
Look for "jc/bundle" in:

http://public-inbox.org/git/xmqq4m0cry60@gitster.mtv.corp.google.com/

and previous "What's cooking in git.git" emails.

I am also working on adding external object database support using
previous work by Peff:

http://public-inbox.org/git/20161130210420.15982-1-chrisc...@tuxfamily.org/

that could be extended to support clone bundles.

[...]


Re: [RFC] Add support for downloading blobs on demand

2017-02-05 Thread Christian Couder
(Sorry for the late reply and thanks to Dscho for pointing me to this thread.)

On Tue, Jan 17, 2017 at 10:50 PM, Ben Peart  wrote:
>> From: Jeff King [mailto:p...@peff.net]
>> On Fri, Jan 13, 2017 at 10:52:53AM -0500, Ben Peart wrote:
>>
>> > Clone and fetch will pass a  --lazy-clone  flag (open to a better name
>> > here) similar to  --depth  that instructs the server to only return
>> > commits and trees and to ignore blobs.
>> >
>> > Later during git operations like checkout, when a blob cannot be found
>> > after checking all the regular places (loose, pack, alternates, etc),
>> > git will download the missing object and place it into the local
>> > object store (currently as a loose object) then resume the operation.
>>
>> Have you looked at the "external odb" patches I wrote a while ago, and
>> which Christian has been trying to resurrect?
>>
>>   https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpublic-
>> inbox.org%2Fgit%2F20161130210420.15982-1-
>> chriscool%40tuxfamily.org%2F=02%7C01%7CBen.Peart%40microsoft.c
>> om%7C9596d3bf32564f123e0c08d43f08a9e1%7C72f988bf86f141af91ab2d7c
>> d011db47%7C1%7C0%7C636202753822020527=a6%2BGOAQoRhjFoxS
>> vftY8JZAVUssmrXuDZ9OBy3xqNZk%3D=0
>>
>> This is a similar approach, though I pushed the policy for "how do you get 
>> the
>> objects" out into an external script. One advantage there is that large 
>> objects
>> could easily be fetched from another source entirely (e.g., S3 or equivalent)
>> rather than the repo itself.
>>
>> The downside is that it makes things more complicated, because a push or a
>> fetch now involves three parties (server, client, and the alternate object
>> store). So questions like "do I have all the objects I need" are hard to 
>> reason
>> about.
>>
>> If you assume that there's going to be _some_ central Git repo which has all
>> of the objects, you might as well fetch from there (and do it over normal git
>> protocols). And that simplifies things a bit, at the cost of being less 
>> flexible.
>
> We looked quite a bit at the external odb patches, as well as lfs and
> even using alternates.  They all share a common downside that you must
> maintain a separate service that contains _some_ of the files.

Pushing the policy for "how do you get the objects" out into an
external helper doesn't mean that the external helper cannot use the
main service.
The external helper is still free to do whatever it wants including
calling the main service if it thinks it's better.

> These
> files must also be versioned, replicated, backed up and the service
> itself scaled out to handle the load.  As you mentioned, having multiple
> services involved increases flexability but it also increases the
> complexity and decreases the reliability of the overall version control
> service.

About reliability, I think it depends a lot on the use case. If you
want to get very big files over an unreliable connection, it can
better if you send those big files over a restartable protocol and
service like HTTP/S on a regular web server.

> For operational simplicity, we opted to go with a design that uses a
> single, central git repo which has _all_ the objects and to focus on
> enhancing it to handle large numbers of files efficiently.  This allows
> us to focus our efforts on a great git service and to avoid having to
> build out these other services.

Ok, but I don't think it prevents you from using at least some of the
same mechanisms that the external odb series is using.
And reducing the number of mechanisms in Git itself is great for its
maintainability and simplicity.

>> > To prevent git from accidentally downloading all missing blobs, some
>> > git operations are updated to be aware of the potential for missing blobs.
>> > The most obvious being check_connected which will return success as if
>> > everything in the requested commits is available locally.
>>
>> Actually, Git is pretty good about trying not to access blobs when it doesn't
>> need to. The important thing is that you know enough about the blobs to
>> fulfill has_sha1_file() and sha1_object_info() requests without actually
>> fetching the data.
>>
>> So the client definitely needs to have some list of which objects exist, and
>> which it _could_ get if it needed to.

Yeah, and the external odb series handles that already, thanks to
Peff's initial work.

>> The one place you'd probably want to tweak things is in the diff code, as a
>> single "git log -Sfoo" would fault in all of the blobs.
>
> It is an interesting idea to explore how we could be smarter about
> preventing blobs from faulting in if we had enough info to fulfill
> has_sha1_file() and sha1_object_info().  Given we also heavily prune the
> working directory using sparse-checkout, this hasn't been our top focus
> but it is certainly something worth looking into.

The external odb series doesn't handle preventing blobs from faulting
in yet, so this could be a common problem.

[...]

>> One big hurdle 

Re: [ANNOUNCE] Git Merge Contributor Summit topic planning

2017-02-01 Thread Christian Couder
On Tue, Jan 31, 2017 at 1:59 AM, Jeff King  wrote:
> On Tue, Jan 31, 2017 at 01:48:05AM +0100, Jeff King wrote:
>
>> The list of topics is totally open. If you're coming and have something
>> you'd like to present or discuss, then propose it here. If you're _not_
>> coming, you may still chime in with input on topics, but please don't
>> suggest a topic unless somebody who is there will agree to lead the
>> discussion.
>
> Here are the two topics I plan on bringing:
>
>   - Git / Software Freedom Conservancy yearly report. I'll plan to give
> a rundown of the past year's activities and financials, along with
> some open questions that could benefit from community input.
>
>   - The git-scm.com website: who runs that thing, anyway? An overview
> of the site, how it's managed, and what it needs.
>
> I plan to send out detailed emails on both topics to the list on
> Wednesday, and then follow-up with a summary of any useful in-person
> discussion (since obviously not everybody will be at the summit).
>
> I'd encourage anybody with a topic to present to consider doing
> something similar.

GitLab people at the Summit (this includes me) would like to spend a
few minutes to introduce https://gitlab.com/gitlab-org/gitaly/ and
answer any questions.


Re: [PATCH v3 14/21] read-cache: touch shared index files when used

2017-01-31 Thread Christian Couder
On Wed, Jan 25, 2017 at 9:52 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> Well, when we cannot freshen a loose file (with
>> freshen_loose_object()), we don't warn or die, we just write the loose
>> file. But here we cannot write the shared index file.
>
> I think that is an excellent point.  Let me make sure I got you
> right.  For loose object files, we may attempt to freshen and when
> it fails we stay silent and do not talk about the failure.  Instead,
> we write the same file again.  That will have two potential outcomes:
>
>  1. write fails and we fail the whole thing.  It is very clear to
> the user that there is something wrong going on.
>
>  2. write succeeds, and because we just wrote it, we know that the
> file is fresh and is protected from gc.
>
> So the "freshen and if fails just write" is sufficient.  It is
> absolutely the right thing to do for loose object files.
>
> When we are forking off a new split index file based on an old
> shared index file, we may attempt to "touch" the old shared one, but
> we cannot write the old shared one again, because other split index
> may be based on that, and we do not have enough information to
> recreate the old one [*1*].  The fallback safety is not available.

Yeah I agree. But note that I was talking about the case where the
shared index file does not exist anymore.
I was just saying that when the shared index file has been removed
behind us, it is equivalent as when
loose files have been removed behind us, except that we cannot write
the removed file to disk.

>> And this doesn't lead to a catastrophic failure right away.
>
> Exactly.
>
>> There
>> could be a catastrophic failure if the shared index file is needed
>> again later, but we are not sure that it's going to be needed later.
>> In fact it may have just been removed because it won't be needed
>> later.
>
> You are listing only the irrelevant cases.  The shared one may be
> used immediately, and the user can keep using it for a while without
> "touching".

Now you are talking about a case where the shared index file can be
used immediately and the user can keep using it.
This is a different case than the case I was talking about (which is
the case when the shared index file does not exist anymore).

> Perhaps the shared one was chowned to "root" while the
> user is looking the other way, and its timestamp is now frozen to
> the time of chown.  It is a ticking time-bomb that will go off when
> your expiry logic kicks in.

In the case I was talking about, the shared index file doesn't exist
anymore. So there is no time ticking bomb, because what could happen,
if we cannot freshen the shared index file, is that the shared index
file could be removed, which wouldn't make things worse as anyway it
does not exist anymore.

So the case when the shared index file doesn't exist is different than
other cases.

Now if you want to talk about the case when the shared index file has
not been removed, but just chowned to "root", then I wonder how is it
different from the case when the file system is read only.
Perhaps the admin just chowned everything to make sure that the repo
could not be changed anymore by the usual processes and users.

>> So I am not sure it's a good idea to anticipate a catastrophic failure
>> that may not happen. Perhaps we could just warn, but I am not sure it
>> will help the user. If the catastrophic failure doesn't happen because
>> the shared index file is not needed, I can't see how the warning could
>> help. And if there is a catastrophic failure, the following will be
>> displayed:
>>
>> fatal: .git/sharedindex.cbfe41352a4623d4d3e9757861fed53f3094e0ac:
>> index file open failed: No such file or directory
>
> That "fatal" is given _ONLY_ after time passes and our failure to
> "touch" the file that is still-in-use left it subject to "gc".  Of
> course, when "fatal" is given, it is too late to warn about ticking
> time bomb.
>
> At the time we notice a ticking time bomb is the only sensible time
> to warn.  Or better yet take a corrective action.

In a previous email you wrote that if the file system is read only, it
is a bad thing if we warn.
So I wonder why it would be a good thing to warn or try to do
something if the admin chowned everything to prevent usual processes
and users to change anything.

> As I expect (and you seem to agree) that a failure to "touch" would
> be a very rare event (like, sysadmin chowning it to "root" by
> mistake),

Yeah, I agree that a failure to "touch" would be a rare event. But the
thing is we often don't

Re: [RFC] Proof of concept: Support multiple authors

2017-01-30 Thread Christian Couder
Hi,

On Sun, Jan 29, 2017 at 7:06 PM, Cornelius Schumacher
 wrote:
> This patch is a proof of concept implementation of support for
> multiple authors. It adds an optional `authors` header to commits
> which is set when there are authors configured in the git config.

I am just wondering if you have read and taken into account the
previous threads on this mailing list about the same subject, like for
example this one:

https://public-inbox.org/git/caovwq4i_hl7xgnxzrvu3osnsbntyxbg8vh6vzi4c1issrre...@mail.gmail.com/

> A new command `git-authors` is used to manage the authors settings.
> Authors are identified by initials and their names and emails are
> set in a `.git_authors_map` file.
>
> Signed-off-by: Cornelius Schumacher 
> ---
>
> When doing pair programming we have to work around the limitation that
> git can only have a single author in each commit. There are some tools
> which help with that such as [git-duet] [1], but there are still some
> limits, because the information about multiple authors is not reflected
> in the native git data model.
>
> Here is a proposal how to change that and implement native support for
> multiple authors in git. It comes with a patch as a proof of concept.
> The patch by no means is finished, it doesn't cover all cases and needs
> more tests and error handling. It's meant as an illustration of the
> concept.
>
> The basic idea is to introduce a new optional `authors` header in
> commits which contains a list of authors. The header is set in each new
> commit when there is an entry `authors.current` in the git config listing
> the current authors. When this config is not there the behavior falls
> back to the current standard behavior.
>
> When the header is there it is treated in the same way as the author
> header. It's preserved on merges and similar operations, is displayed in
> git show, and used to create a list of `From` addresses in `format-patch`.
> Email supports multiple `From` addresses as specified in section 3.6.2 of
> RFC 5322.
>
> When multiple authors are configured, they still write the standard author
> header to keep backwards compatibility. The first author is used as author
> and committer. In the future it might be good to implement something like
> automatic rotation of the order of authors to give credit in a fair way.
>
> To make it easier to work with the authors there is a new command
> `git-authors`. It sets the list of authors using initials as shortcut for
> the full configuration with name and email. The mapping of initials to
> names and email addresses is taken from a file `.git_authors_map` in the
> home directory of the users. This way it's possible to quickly set a list
> of authors by running a command such as `git authors ab cd`. This is
> useful when doing pair programming because the people working together
> usually switch quite frequently and using the command with the intials is
> quicker and less error-prone than editing the configuration with full
> names and emails.
>
> The command also supports setting a single author, setting more than two
> authors or clearing the configuration for multiple authors to go back to
> the standard behavior without the new authors header.
>
> The concept of the command and the mappings file is similar to what
> git-duet does, so that it should be familiar to many people doing pair
> programming. The behavior of git doesn't change when the new feature is
> not used and when it's used it should be backwards compatible so that it
> doesn't break existing functionality. This should make a smooth transition
> for users who choose to make use of it.
>
> Adding support for multiple authors would make the life of developers doing
> pair programming easier. It would be useful in itself, but it would also
> need support by other tools around git to use its full potential.

>From what I recall from previous discussions, the most important
question is: are you sure that it doesn't break any other tool?

> This
> might take a while, but I think it's worth the effort.
>
> I'm willing to continue to work on this and create a patch which is suitable
> for inclusion in git.


Re: Intermittent failure of t1700-split-index.sh

2017-01-26 Thread Christian Couder
On Fri, Jan 27, 2017 at 4:58 AM, Jeff King  wrote:
> On Fri, Jan 27, 2017 at 02:45:15AM +, Ramsay Jones wrote:
>
>> I can't devote any time to looking at this further tonight
>> (it's 2-45am here, I'm off to bed!). Can you reproduce the
>> problem, or is it just me? :)
>
> I can reproduce here with 'pu'. t1700.17 seems to fail reliably with my
> stress script after 5-10 seconds.
>
> It bisects to ff97d10f57c2f99b6d86da8f07c16659979b2780, but take that
> with a moderate grain of salt, as I may have marked a bad commit as
> "good" if it simply got lucky and didn't fail as quickly.

Thanks both for your tests and your report. I will take a look.


[ANNOUNCE] Git Rev News edition 23

2017-01-25 Thread Christian Couder
Hi everyone,

The 23rd edition of Git Rev News is now published:

https://git.github.io/rev_news/2017/01/25/edition-23/

Thanks a lot to all the contributors and helpers!

Enjoy,
Christian, Thomas, Jakub and Markus.


Re: [PATCH v3 14/21] read-cache: touch shared index files when used

2017-01-25 Thread Christian Couder
On Mon, Jan 23, 2017 at 7:53 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> Also in general the split-index mode is useful when you often write
>> new indexes, and in this case shared index files that are used will
>> often be freshened, so the risk of deleting interesting shared index
>> files should be low.
>> ...
>>> Not that I think freshening would actually fail in a repository
>>> where you can actually write into to update the index or its refs to
>>> make a difference (iow, even if we make it die() loudly when shared
>>> index cannot be "touched" because we are paranoid, no real life
>>> usage will trigger that die(), and if a repository does trigger the
>>> die(), I think you would really want to know about it).
>>
>> As I wrote above, I think if we can actually write the shared index
>> file but its freshening fails, it probably means that the shared index
>> file has been removed behind us, and this case is equivalent as when
>> loose files have been removed behind us.
>
> OK, so it is unlikely to happen, and when it happens it leads to a
> catastrophic failure---do we just ignore or do we report an error?

Well, when we cannot freshen a loose file (with
freshen_loose_object()), we don't warn or die, we just write the loose
file. But here we cannot write the shared index file.

And this doesn't lead to a catastrophic failure right away. There
could be a catastrophic failure if the shared index file is needed
again later, but we are not sure that it's going to be needed later.
In fact it may have just been removed because it won't be needed
later.

So I am not sure it's a good idea to anticipate a catastrophic failure
that may not happen. Perhaps we could just warn, but I am not sure it
will help the user. If the catastrophic failure doesn't happen because
the shared index file is not needed, I can't see how the warning could
help. And if there is a catastrophic failure, the following will be
displayed:

fatal: .git/sharedindex.cbfe41352a4623d4d3e9757861fed53f3094e0ac:
index file open failed: No such file or directory

and I don't see how the warning could help on top of that. It could at
most repeat the same thing.


Re: GSoC 2017: application open, deadline = February 9, 2017

2017-01-24 Thread Christian Couder
On Tue, Jan 24, 2017 at 12:28 PM, Johannes Schindelin
 wrote:
> Hi Matthieu,
>
> On Mon, 23 Jan 2017, Matthieu Moy wrote:
>
>> * Who's willing to mentor?
>
> As in the years before, I am willing to mentor.

I am also willing to mentor.

Thanks!


Re: [PATCH v3 14/21] read-cache: touch shared index files when used

2017-01-23 Thread Christian Couder
On Thu, Jan 19, 2017 at 8:00 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Duy Nguyen <pclo...@gmail.com> writes:
>
>> On Mon, Jan 9, 2017 at 9:34 PM, Junio C Hamano <gits...@pobox.com> wrote:
>>> Duy Nguyen <pclo...@gmail.com> writes:
>>>
>>>> On Sun, Jan 8, 2017 at 4:46 AM, Junio C Hamano <gits...@pobox.com> wrote:
>>>>> Christian Couder <christian.cou...@gmail.com> writes:
>>>>>
>>>>>> So what should we do if freshen_file() returns 0 which means that the
>>>>>> freshening failed?
>>>>>
>>>>> You tell me ;-)  as you are the one who is proposing this feature.

Before the above question I already had given my opinion about what we
should do.

There are the following cases:

- Freshening failed because the shared index file does not exist
anymore. In this case it could have been removed for a good reason
(for example maybe the user wants to remove all the shared index
files), or it could be a bug somewhere else. Anyway we cannot know and
the user will get an error if the shared index file that disappeared
is read from afterwards, so I don't think we need to warn or do
anything.

- Freshening failed because the mtime of the shared index cannot be
changed. You already in a previous email wrote that we shoudn't warn
if the file system is read only, and I agree with that, as anyway if
the file system is read only, the shared index file cannot be deleted,
so there is no risk from the current user. Also the split index mode
is useful to speed up index writing at the cost of making index
reading a little slower, so its use in a read only mode should not be
the primary way it is used.

So my opinion is that there are good reasons not to do anything if
freshening fails.

>>>> My answer is, we are not worse than freshening loose objects case
>>>> (especially since I took the idea from there).
>>>
>>> I do not think so, unfortunately.  Loose object files with stale
>>> timestamps are not removed as long as objects are still reachable.

As the current index is read, which freshens its shared index file,
before a new index is created, the number of shared index files
doesn't go below 2. This can be seen in the tests changed in patch
19/21. So the risk of deleting interesting shared index files is quite
low in my opinion.

Also in general the split-index mode is useful when you often write
new indexes, and in this case shared index files that are used will
often be freshened, so the risk of deleting interesting shared index
files should be low.

>> But there are plenty of unreachable loose objects, added in index,
>> then got replaced with new versions. cache-tree can create loose trees
>> too and it's been run more often, behind user's back, to take
>> advantage of the shortcut in unpack-trees.
>
> I am not sure if I follow.  Aren't objects reachable from the
> cache-tree in the index protected from gc?
>
> Not that I think freshening would actually fail in a repository
> where you can actually write into to update the index or its refs to
> make a difference (iow, even if we make it die() loudly when shared
> index cannot be "touched" because we are paranoid, no real life
> usage will trigger that die(), and if a repository does trigger the
> die(), I think you would really want to know about it).

As I wrote above, I think if we can actually write the shared index
file but its freshening fails, it probably means that the shared index
file has been removed behind us, and this case is equivalent as when
loose files have been removed behind us.


Re: [PATCH v3 08/21] Documentation/git-update-index: talk about core.splitIndex config var

2017-01-23 Thread Christian Couder
On Mon, Jan 9, 2017 at 12:18 PM, Duy Nguyen <pclo...@gmail.com> wrote:
> On Sun, Jan 8, 2017 at 4:38 AM, Junio C Hamano <gits...@pobox.com> wrote:
>> Christian Couder <christian.cou...@gmail.com> writes:
>>
>>> It feels strange that when I do things one way, you suggest another
>>> way, and the next time in a similar situation when I do things the way
>>> you suggested previously, then you suggest the way I did it initially
>>> the first time...
>>
>> Perhaps because neither is quite satisfactory and I am being forced
>> to choose between the two unsatifactory designs?  In any case, I
>> mostly am and was pointing out the issues and not saying the other
>> one is the most preferred solution in these threads.
>>
>> I think there should just be one authoritative source of the truth,
>
> Either that, or we make sure all sources of truth are consistent. In
> this case, 'update --split-index' could update core.splitIndex if it
> finds that the config tells a different story. As a plumbing though, I
> rather leave update-index do simple things, even if it means the user
> has to clean up after it (or before it) with "git config -unset
> core.splitIndex". Another option is refuse to execute --split-index in
> the presence of (conflicting) core.splitIndex. We kind of force the
> user to keep all sources of truth consistent this way while leaving a
> back door ("git -c core.splitIndex= update-index") for those who need
> tools to recover from a bad case.
>
>> and I have always thought it should be the bit set in the index file
>> when the command line option is used, because that was the way the
>> feature was introduced first and I am superstitious about end-user
>> inertia.  And from that point of view, no matter how you make this
>> new "config" thing interact with it, it would always give a strange
>> and unsatifactory end-user experience, at least to me.
>>
>> Perhaps we should declare that config will be the one and only way
>> in the future and start deprecating the command line option way.
>> That will remove the need for two to interact with each other.

That would be my preferred solution as I think it is the simplest in
the end for users.
Also, as Duy wrote above, one can always use something like "git -c
core.splitIndex= ...", which by the way can work for any command, not
just "update-index".

Anyway we would have to introduce core.splitIndex first, and it
wouldn't make sense to have a different behavior between
--[no-]split-index and --[no-]untracked-cache in the meantime before
they are deprecated and eventually removed.

So let's just go with the implementation in this series, which is
similar to --[no-]untracked-cache, and let's plan to deprecate
--[no-]split-index and --[no-]untracked-cache in a later patch series.


Re: [RFC] what content should go in https://git-scm.com/doc/ext

2017-01-22 Thread Christian Couder
On Sat, Jan 21, 2017 at 2:55 PM, Jeff King  wrote:
> I'm wondering if anybody has opinions on:
>
>   https://github.com/git/git-scm.com/pull/924
>
> (and I suspect most people in this community do not read pull requests
> there, hence this post).

Yeah, thanks for posting it here.

> Basically, we maintain a list of links to outside documentation, as well
> as to books. Somebody has requested a link to their paid tutorial
> course. How should we handle it?

I think it depends if you are ready and willing to handle more changes.

> If the resource is valuable, then it may make sense to link to it, even
> if it costs money. We already do this with book links, and my policy has
> been to link any relevant book that is requested (I don't read them for
> quality, so I have no opinions).
>
> Should we do the same for tutorial content like this?

It could make sense to link to tutorial content, and then it could
also make sense to link to trainings (online and maybe offline too),
but my fear is that you could then have many people who want to
advertise their tutorials and trainings, and that it might be
difficult to draw a line while keeping the playing field even.

But maybe it's worth trying anyway.


Draft of Git Rev News edition 23

2017-01-21 Thread Christian Couder
Hi,

A draft of a new Git Rev News edition is available here:

  https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-23.md

Everyone is welcome to contribute in any section either by editing the
above page on GitHub and sending a pull request, or by commenting on
this GitHub issue:

  https://github.com/git/git.github.io/issues/209

You can also reply to this email.

In general all kinds of contribution, for example proofreading,
suggestions for articles or links, help on the issues in GitHub, and
so on, are very much appreciated.

I tried to cc everyone who appears in this edition but maybe I missed
some people, sorry about that.

Thomas, Jakub, Markus and myself plan to publish this edition on
Wednesday January 25.

Thanks,
Christian.


Re: [PATCH] Documentation/bisect: improve on (bad|new) and (good|bad)

2017-01-15 Thread Christian Couder
On Fri, Jan 13, 2017 at 8:14 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> The following part of the description:
>>
>> git bisect (bad|new) []
>> git bisect (good|old) [...]
>>
>> may be a bit confusing, as a reader may wonder if instead it should be:
>>
>> git bisect (bad|good) []
>> git bisect (old|new) [...]
>>
>> Of course the difference between "[]" and "[...]" should hint
>> that there is a good reason for the way it is.
>>
>> But we can further clarify and complete the description by adding
>> "" and "" to the "bad|new" and "good|old"
>> alternatives.
>>
>> Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
>> ---
>>  Documentation/git-bisect.txt | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> Thanks.  The patch looks good.
>
> A related tangent.
>
> Last night, I was trying to think if there is a fundamental reason
> why "bad/new/term-new" cannot take more than one s on the newer
> side of the bisection, and couldn't quite think of any before
> falling asleep.
>
> Currently we keep track of a single bisect/bad, while marking all the
> revs given as good previously as bisect/good-.
>
> Because the next "bad" is typically chosen from the region of the
> commit DAG that is bounded by bad and good commits, i.e. "rev-list
> bisect/bad --not bisect/good-*", the current bisect/bad will always
> be an ancestor of all bad commits that used to be bisect/bad, and
> keeping previous bisect/bad as bisect/bad- won't change the
> region of the commit DAG yet to be explored.
>
> As a reason why we need to use only a single bisect/bad, the above
> description is understandable.  But as a reason why we cannot have
> more than one, it is tautological.  It merely says "if we start from
> only one and dig history to find older culprit, we need only one
> bad".
>
> I fell asleep last night without thinking further than that.
>
> I think the answer to the question "why do we think we need a single
> bisect/bad?" is "because bisection is about assuming that there is
> only one commit that flips the tree state from 'old' to 'new' and
> finding that single commit".  That would mean that even if we had
> bisect/bad-A and bisect/bad-B, e.g.
>
>   o---o---o---bad-A
>  /
> -Good---o---o---o
>  \
>   o---o---o---bad-B
>
>
> where 'o' are all commits whose goodness is not yet known, because
> bisection is valid only when we are hunting for a single commit that
> flips the state from good to bad, that commit MUST be at or before
> the merge base of bad-A and bad-B.  So even if we allowed
>
> $ git bisect bad bad-A bad-B
>
> on the command line, we won't have to set bisect/bad-A and
> bisect/bad-B.  We only need a single bisect/bad that points at the
> merge base of these two.
>
> But what if bad-A and bad-B have more than one merge bases?  We
> won't know which side the badness came from.
>
>   o---o---o---bad-A
>  / \ /
> -Good---o---o---o   /
>  \ / \
>   o---o---o---bad-B
>
> Being able to bisect the region of DAG bound by "^Good bad-A bad-B"
> may have value in such a case.  I dunno.

I agree that there could be improvements in this area. Though from my
experience with special cases, like when a good commit is not an
ancestor of the bad commit (where there are probably bugs still
lurking), I think it could be tricky to implement correctly in all
cases, and it could make it even more difficult, than it sometimes
already is, to explain the resulting behavior to users.


[PATCH] Documentation/bisect: improve on (bad|new) and (good|bad)

2017-01-13 Thread Christian Couder
The following part of the description:

git bisect (bad|new) []
git bisect (good|old) [...]

may be a bit confusing, as a reader may wonder if instead it should be:

git bisect (bad|good) []
git bisect (old|new) [...]

Of course the difference between "[]" and "[...]" should hint
that there is a good reason for the way it is.

But we can further clarify and complete the description by adding
"" and "" to the "bad|new" and "good|old"
alternatives.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/git-bisect.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-bisect.txt b/Documentation/git-bisect.txt
index 2bb9a577a2..bdd915a66b 100644
--- a/Documentation/git-bisect.txt
+++ b/Documentation/git-bisect.txt
@@ -18,8 +18,8 @@ on the subcommand:
 
  git bisect start [--term-{old,good}= --term-{new,bad}=]
  [--no-checkout] [ [...]] [--] [...]
- git bisect (bad|new) []
- git bisect (good|old) [...]
+ git bisect (bad|new|) []
+ git bisect (good|old|) [...]
  git bisect terms [--term-good | --term-bad]
  git bisect skip [(|)...]
  git bisect reset []
-- 
2.11.0.313.g11b7cc88e6.dirty



Re: Bug report: Documentation error in git-bisect man description

2017-01-12 Thread Christian Couder
On Fri, Jan 13, 2017 at 12:42 AM, Junio C Hamano  wrote:
> Junio C Hamano  writes:
>
>> Manuel Ullmann  writes:
>>
>> Hmmm, I tend to agree, modulo a minor fix.
>>
>> If the description were in a context inside a paragraph like this:
>>
>>   When you want to tell 'git bisect' that a  belongs to
>>   the newer half of the history, you say
>>
>>   git bisect (bad|new) []
>>
>>   On the other hand, when you want to tell 'git bisect' that a
>>belongs to the older half of the history, you can say
>>
>>   git bisect (good|old) []
>>
>> then the pairing we see in the current text makes quite a lot of
>> sense.
>
> Actually, the above is _exactly_ what was intended.  I misread the
> current documentation when I made the comment, and I think that the
> current one _IS_ correct.  The latter half of the above is not about
> a single rev.  You can paint multiple commits with the "older half"
> color, i.e.
>
> On the other hand, when you want to tell 'git bisect' that
> one or more s  belong to the older half of the history,
> you can say
>
> git bisect (good|old) [...]
>
> In contrast, you can mark only one  as newer (or "already
> bad").  So pairing (bad|good) and (new|old) like you suggested
> breaks the correctness of the command line description.

Yeah, I agree.

> If (bad|new) and (good|old) bothers you because they may mislead the
> readers to think bad is an opposite of new (and good is an opposite
> of old), the only solution I can think of to that problem is to
> expand these two lines into four and list them like this:
>
> git bisect bad []
> git bisect good [...]
> git bisect new []
> git bisect old [...]

Maybe it would be more complete and a bit clearer if it was:

   git bisect (bad|new|) []
   git bisect (good|old|) [...]


Re: [PATCH v3 21/21] Documentation/git-update-index: explain splitIndex.*

2017-01-02 Thread Christian Couder
On Tue, Dec 27, 2016 at 8:13 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>>  --split-index::
>>  --no-split-index::
>> - Enable or disable split index mode. If enabled, the index is
>> - split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex..
>> - Changes are accumulated in $GIT_DIR/index while the shared
>> - index file contains all index entries stays unchanged. If
>> - split-index mode is already enabled and `--split-index` is
>> - given again, all changes in $GIT_DIR/index are pushed back to
>> - the shared index file. This mode is designed for very large
>> - indexes that take a significant amount of time to read or write.
>> + Enable or disable split index mode. If split-index mode is
>> + already enabled and `--split-index` is given again, all
>> + changes in $GIT_DIR/index are pushed back to the shared index
>> + file.
>
> In the world after this series that introduced the percentage-based
> auto consolidation, it smells strange, or even illogical, that index
> is un-split after doing this:
>
> $ git update-index --split-index
> $ git update-index --split-index
>
> Before this series, it may have been a quick and dirty way to force
> consolidating the split index without totally disabling the feature,
> i.e. it would have looked more like
>
> $ git update-index --split-index
> ... work work work to accumulate more modifications
> ... consolidate into one --- there was no other way than
> ... disabling it temporarily
> $ git update-index --no-split-index
> ... but the user likes the feature so re-enable it.
> $ git update-index --split-index
>
> which I guess is where this strange behaviour comes from.
>
> It may be something we need to fix to unconfuse the end-users after
> this series lands.  Even though "first disable and then re-enable"
> takes two commands (as opposed to one), it is far more logical.  And
> the percentage-based auto consolidation feature is meant to reduce
> the need for manual consolidation, so it probably makes sense to
> remove this illogical feature.

Yeah, I tend to agree that this feature could be removed later. Though
before removing it, I'd like to hear Duy's opinion on this as he
created the feature in the first place.

>> @@ -394,6 +390,31 @@ Although this bit looks similar to assume-unchanged 
>> bit, its goal is
>>  different from assume-unchanged bit's. Skip-worktree also takes
>>  precedence over assume-unchanged bit when both are set.
>>
>> +Split index
>> +---
>> +
>> +This mode is designed for very large indexes that take a significant
>> +amount of time to read or write.
>
> This is not a new problem, but it probably is incorrect to say "to
> read or write".  It saves time by not rewriting the whole thing but
> instead write out only the updated bits.  You'd still read the whole
> thing while populating the in-core index from the disk, and if
> anything, you'd probably spend _more_ cycles because you'd essentially
> be reading the base and then reading the delta to apply on top.

Ok, then what about:

+This mode is designed for repositories with very large indexes, and aims
+at reducing the time it takes to repeatedly write these indexes.

>> +To avoid deleting a shared index file that is still used, its mtime is
>> +updated to the current time everytime a new split index based on the
>> +shared index file is either created or read from.
>
> The same comment on the mention of "mtime" in another patch applies
> here as well.

Ok, I will use "modification time" instead of "mtime".

Thanks.


Re: [PATCH v3 20/21] Documentation/config: add splitIndex.sharedIndexExpire

2017-01-02 Thread Christian Couder
On Tue, Dec 27, 2016 at 8:11 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
>> ---
>>  Documentation/config.txt | 11 +++
>>  1 file changed, 11 insertions(+)
>>
>> diff --git a/Documentation/config.txt b/Documentation/config.txt
>> index e0f5a77980..24e771d22e 100644
>> --- a/Documentation/config.txt
>> +++ b/Documentation/config.txt
>> @@ -2786,6 +2786,17 @@ splitIndex.maxPercentChange::
>>   than 20 percent of the total number of entries.
>>   See linkgit:git-update-index[1].
>>
>> +splitIndex.sharedIndexExpire::
>> + When the split index feature is used, shared index files with
>> + a mtime older than this time will be removed when a new shared
>
> As end-user facing documentation, it would be much better if we can
> rephrase it for those who do not know what a 'mtime' is, and it
> would be even better if we can do so without losing precision.
>
> I think "shared index files that were not modified since the time
> this variable specifies will be removed" would be understandable and
> correct enough?

Yeah, I agree it is better for end users. I will use what you suggest.

>> + index file is created. The value "now" expires all entries
>> + immediately, and "never" suppresses expiration altogether.
>> + The default value is "one.week.ago".
>> + Note that each time a new split-index file is created, the
>> + mtime of the related shared index file is updated to the
>> + current time.
>
> To match the above suggestion, "Note that a shared index file is
> considered modified (for the purpose of expiration) each time a new
> split-index file is created based on it."?

Yeah, I also agree it is better and will use that.


Re: [PATCH v3 14/21] read-cache: touch shared index files when used

2017-01-02 Thread Christian Couder
On Tue, Dec 27, 2016 at 8:10 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> +/*
>> + * Signal that the shared index is used by updating its mtime.
>> + *
>> + * This way, shared index can be removed if they have not been used
>> + * for some time. It's ok to fail to update the mtime if we are on a
>> + * read only file system.
>> + */
>> +void freshen_shared_index(char *base_sha1_hex)
>> +{
>> + const char *shared_index = git_path("sharedindex.%s", base_sha1_hex);
>> + check_and_freshen_file(shared_index, 1);
>
> What happens when this call fails?  The function returns 0 if the
> file did not even exist.  It also returns 0 if you cannot update its
> timestamp.

Yeah and I don't think it's a problem in either case.
If we cannot update its timestamp, it's not a problem, as we could be
on a read-only file system, and you said in a previous iteration that
we should not even warn in this case.
If the file does not exist, it could be because it has just been
deleted for a good reason, and anyway, if it is a problem that the
shared index file has been deleted, it is better addressed when we
will actually need the shared index file to read something into from
it, rather than just to update its mtime.

> Shouldn't the series be exposing freshen_file() instead _and_ taking
> its error return value seriously?

So what should we do if freshen_file() returns 0 which means that the
freshening failed?

>> +}


Re: [PATCH v3 12/21] Documentation/config: add splitIndex.maxPercentChange

2017-01-02 Thread Christian Couder
On Tue, Dec 27, 2016 at 8:09 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
>> ---
>>  Documentation/config.txt | 13 +
>>  1 file changed, 13 insertions(+)
>>
>> diff --git a/Documentation/config.txt b/Documentation/config.txt
>> index 221c5982c0..e0f5a77980 100644
>> --- a/Documentation/config.txt
>> +++ b/Documentation/config.txt
>> @@ -2773,6 +2773,19 @@ showbranch.default::
>>   The default set of branches for linkgit:git-show-branch[1].
>>   See linkgit:git-show-branch[1].
>>
>> +splitIndex.maxPercentChange::
>> + When the split index feature is used, this specifies the
>> + percent of entries the split index can contain compared to the
>> + whole number of entries in both the split index and the shared
>
> s/whole/total/ to match the last sentence of this section, perhaps?
> "The number of all entries" would also be OK, but "the whole number
> of entries" sounds a bit strange.

Yeah "total" is better than "whole" here. I will use "total".


Re: [PATCH v3 10/21] read-cache: regenerate shared index if necessary

2017-01-02 Thread Christian Couder
On Tue, Dec 27, 2016 at 8:08 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> + case 0:
>> + return 1; /* 0% means always write a new shared index */
>> + case 100:
>> + return 0; /* 100% means never write a new shared index */
>> + default:
>> + ; /* do nothing: just use the configured value */
>> + }
>
> Just like you did in 04/21, write "break" to avoid mistakes made in
> the future, i.e.
>
> default:
> break; /* just use the configured value */

Ok, I will do that.

>> +
>> + /* Count not shared entries */
>> + for (i = 0; i < istate->cache_nr; i++) {
>> + struct cache_entry *ce = istate->cache[i];
>> + if (!ce->index)
>> + not_shared++;
>> + }
>> +
>> + return istate->cache_nr * max_split < not_shared * 100;
>
> On a 32-bit arch with 2G int and more than 20 million paths in the
> index, multiplying by max_split that can come close to 100 can
> theoretically cause integer overflow, but in practice it probably
> does not matter.  Or does it?

>From a cursory look a "struct cache_entry" takes at least 80 bytes
without counting the "char name[FLEX_ARRAY]" on a 32 bit machine, so I
don't think it would be a good idea to work on a repo with 20 million
paths on a 32 bit machine, but maybe theoretically it could be a
problem.

To be safe I think I will use:

return (int64_t)istate->cache_nr * max_split < (int64_t)not_shared * 100;


Re: [PATCH v3 08/21] Documentation/git-update-index: talk about core.splitIndex config var

2017-01-02 Thread Christian Couder
On Tue, Dec 27, 2016 at 8:07 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
>> ---
>>  Documentation/git-update-index.txt | 6 ++
>>  1 file changed, 6 insertions(+)
>>
>> diff --git a/Documentation/git-update-index.txt 
>> b/Documentation/git-update-index.txt
>> index 7386c93162..e091b2a409 100644
>> --- a/Documentation/git-update-index.txt
>> +++ b/Documentation/git-update-index.txt
>> @@ -171,6 +171,12 @@ may not support it yet.
>>   given again, all changes in $GIT_DIR/index are pushed back to
>>   the shared index file. This mode is designed for very large
>>   indexes that take a significant amount of time to read or write.
>> ++
>> +These options take effect whatever the value of the `core.splitIndex`
>> +configuration variable (see linkgit:git-config[1]).
>
> Doesn't the "whatever..." clause in this sentence lack its verb
> (presumably, "is", right after "variable")?

I think that it is ok to have no verb here. My preferred online
dictionary (http://www.wordreference.com/enfr/whatever) gives "We'll
try to free the hostage whatever the cost." as an example with no
verb, though I am not a native speaker.

Also note that I mostly copied what I had already written for
--(no-)untracked-cache:

--untracked-cache::
--no-untracked-cache::
Enable or disable untracked cache feature. Please use
`--test-untracked-cache` before enabling it.
+
These options take effect whatever the value of the `core.untrackedCache`
configuration variable (see linkgit:git-config[1]). But a warning is
emitted when the change goes against the configured value, as the
configured value will take effect next time the index is read and this
will remove the intended effect of the option.

>> +emitted when the change goes against the configured value, as the
>> +configured value will take effect next time the index is read and this
>> +will remove the intended effect of the option.
>
> It feels somewhat strange to see a warning in this case.

We already discussed the warning issue for --(no-)untracked-cache, and
I am just following the conclusions of that previous discussion about
--(no-)untracked-cache. See the end of this message where you
suggested the warning:

https://public-inbox.org/git/xmqqlh8cg9y2@gitster.mtv.corp.google.com/

> If the user configures the system to usually do X, and issues a
> command that explicitly does Y (or "not-X"), we do not warn for the
> command invocation if it is a single-shot operation.  That's the
> usual "command line overrides configuration" an end-user expects.
>
> I think you made this deviate from the usual end-user expectation
> because it is unpredictable when the configuration kicks in the next
> time to undo the effect of this command line request.  And I agree
> that it is a very un-nice thing to do to the users if we did not
> warn here, if we are to keep that unpredictableness.

I also think it would be strange and user unfriendly for
--untracked-cache and --split-index to behave differently, and I am
reluctant at this point to rework the way it works for
--untracked-cache.

> But stepping back a bit, we know the user's intention is that with
> "--split-index" the user wants to use the split index, even though
> the user may have configuration not to use the split index.  Don't
> we want to honor that intention?  In order to honor that intention
> without getting confused by the configured variable to tell us not
> to split, another way is to destroy that unpredictableness.  For
> that, shouldn't we automatically remove the configuration that says
> "do not split" (by perhaps set it to "do split")?  Doing so still
> may need a warning that says "we disabled your previously configured
> setting while at it", but it would be much better than a warning
> message that essentially says "we do it once, but we will disobey
> you at an unpredictable time", I would think.

I wanted to do that for --untracked-cache around one year ago but in
the following message:

https://public-inbox.org/git/xmqqsi3ckadi@gitster.mtv.corp.google.com/

you wrote the following:

"Why is it a good idea to allow an explicit operation from the
command line to muck with the configuration?  If the user wants to
change the configuration permanently, "git config" has been there
for the purpose for all the configuration variables to do so for
almost forever.  Why is it a good idea to make this variable so
special that the user does not have to use "git config" but disable
or enable it in some other way?"

It feels strange that when I do things one way, you suggest another
way, and the next time in a similar situation when I do things the way
you suggested previously, then you suggest the way I did it initially
the first time...


Re: [PATCH v3 06/21] t1700: add tests for core.splitIndex

2017-01-02 Thread Christian Couder
On Tue, Dec 27, 2016 at 8:04 PM, Junio C Hamano <gits...@pobox.com> wrote:
> Christian Couder <christian.cou...@gmail.com> writes:
>
>> +test_expect_success 'set core.splitIndex config variable to true' '
>> + git config core.splitIndex true &&
>> + : >three &&
>> + git update-index --add three &&
>> + git ls-files --stage >ls-files.actual &&
>> + cat >ls-files.expect <> +100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0one
>> +100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0three
>> +100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0two
>> +EOF
>> + test_cmp ls-files.expect ls-files.actual &&
>
> It does not add much value to follow the "existing" outdated style
> like this when you are only adding new tests.  Write these like
>
> cat >ls-files.expect <<-\EOF &&
> 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   one
> 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   three
> 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   two
> EOF
>
> which would give incentive to others (or yourself) to update the
> style of the existing mess ;-).

Ok, I will add a patch to update the style of the existing tests at
the beginning of the series and then use the same new style in the
tests I add in later patches.


Re: [PATCH v2 00/21] Add configuration options for split-index

2016-12-26 Thread Christian Couder
On Mon, Dec 19, 2016 at 1:02 PM, Duy Nguyen <pclo...@gmail.com> wrote:
> On Sat, Dec 17, 2016 at 03:55:26PM +0100, Christian Couder wrote:
>> Goal
>> 
>>
>> We want to make it possible to use the split-index feature
>> automatically by just setting a new "core.splitIndex" configuration
>> variable to true.
>>
>> This can be valuable as split-index can help significantly speed up
>> `git rebase` especially along with the work to libify `git apply`
>> that has been merged to master
>> (see 
>> https://github.com/git/git/commit/81358dc238372793b1590efa149cc1581d1fbd98)
>> and is now in v2.11.
>
> I've read through the series (*) and I think it looks good, just a few
> minor comments here and there.

Thanks for your review.

I think I addressed all the minor points left in the v3 and the emails
I just sent.

> (*) guiltily admit that I only skimmed through tests, not giving them
> as much attention as I should have


Re: [PATCH v3 00/21] Add configuration options for split-index

2016-12-26 Thread Christian Couder
On Mon, Dec 26, 2016 at 11:22 AM, Christian Couder
<christian.cou...@gmail.com> wrote:
>
> Highlevel view of the patches in the series
> ~~~
>
> Except for patch 1/21, there are 3 big steps, one for each new
> configuration variable introduced.
>
> There only a few small differences between this patch series and the
> v2 patch series sent a few weeks ago. Only two commits have been
> changed a little, as suggested by Duy.

Here is the diff compared to v2:

diff --git a/config.c b/config.c
index 5c52cefd78..d6c8f8f3ba 100644
--- a/config.c
+++ b/config.c
@@ -1724,7 +1724,7 @@ int git_config_get_untracked_cache(void)

 int git_config_get_split_index(void)
 {
-   int val = -1;
+   int val;

if (!git_config_get_maybe_bool("core.splitindex", ))
return val;
diff --git a/read-cache.c b/read-cache.c
index 772343ab25..bf0ac1ce61 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2227,18 +2227,17 @@ static unsigned long get_shared_index_expire_date(void)
return shared_index_expire_date;
 }

-static int can_delete_shared_index(const char *shared_sha1_hex)
+static int can_delete_shared_index(const char *shared_index_path)
 {
struct stat st;
unsigned long expiration;
-   const char *shared_index = git_path("sharedindex.%s", shared_sha1_hex);

/* Check timestamp */
expiration = get_shared_index_expire_date();
if (!expiration)
return 0;
-   if (stat(shared_index, ))
-   return error_errno(_("could not stat '%s"), shared_index);
+   if (stat(shared_index_path, ))
+   return error_errno(_("could not stat '%s"), shared_index_path);
if (st.st_mtime > expiration)
return 0;

@@ -2255,13 +2254,15 @@ static int clean_shared_index_files(const char
*current_hex)

while ((de = readdir(dir)) != NULL) {
const char *sha1_hex;
+   const char *shared_index_path;
if (!skip_prefix(de->d_name, "sharedindex.", _hex))
continue;
if (!strcmp(sha1_hex, current_hex))
continue;
-   if (can_delete_shared_index(sha1_hex) > 0 &&
-   unlink(git_path("%s", de->d_name)))
-   error_errno(_("unable to unlink: %s"),
git_path("%s", de->d_name));
+   shared_index_path = git_path("%s", de->d_name);
+   if (can_delete_shared_index(shared_index_path) > 0 &&
+   unlink(shared_index_path))
+   error_errno(_("unable to unlink: %s"),
shared_index_path);
}
closedir(dir);


[PATCH v3 16/21] read-cache: unlink old sharedindex files

2016-12-26 Thread Christian Couder
Everytime split index is turned on, it creates a "sharedindex."
file in the git directory. This change makes sure that shared index
files that haven't been used for a long time are removed when a new
shared index file is created.

The new "splitIndex.sharedIndexExpire" config variable is created
to tell the delay after which an unused shared index file can be
deleted. It defaults to "1.week.ago".

A previous commit made sure that each time a split index file is
created the mtime of the shared index file it references is updated.
This makes sure that recently used shared index file will not be
deleted.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 64 +++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index 9fbad2044b..e62a6c742d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2207,6 +2207,65 @@ static int write_split_index(struct index_state *istate,
return ret;
 }
 
+static const char *shared_index_expire = "1.week.ago";
+
+static unsigned long get_shared_index_expire_date(void)
+{
+   static unsigned long shared_index_expire_date;
+   static int shared_index_expire_date_prepared;
+
+   if (!shared_index_expire_date_prepared) {
+   git_config_get_expiry("splitindex.sharedindexexpire",
+ _index_expire);
+   shared_index_expire_date = approxidate(shared_index_expire);
+   shared_index_expire_date_prepared = 1;
+   }
+
+   return shared_index_expire_date;
+}
+
+static int can_delete_shared_index(const char *shared_index_path)
+{
+   struct stat st;
+   unsigned long expiration;
+
+   /* Check timestamp */
+   expiration = get_shared_index_expire_date();
+   if (!expiration)
+   return 0;
+   if (stat(shared_index_path, ))
+   return error_errno(_("could not stat '%s"), shared_index_path);
+   if (st.st_mtime > expiration)
+   return 0;
+
+   return 1;
+}
+
+static int clean_shared_index_files(const char *current_hex)
+{
+   struct dirent *de;
+   DIR *dir = opendir(get_git_dir());
+
+   if (!dir)
+   return error_errno(_("unable to open git dir: %s"), 
get_git_dir());
+
+   while ((de = readdir(dir)) != NULL) {
+   const char *sha1_hex;
+   const char *shared_index_path;
+   if (!skip_prefix(de->d_name, "sharedindex.", _hex))
+   continue;
+   if (!strcmp(sha1_hex, current_hex))
+   continue;
+   shared_index_path = git_path("%s", de->d_name);
+   if (can_delete_shared_index(shared_index_path) > 0 &&
+   unlink(shared_index_path))
+   error_errno(_("unable to unlink: %s"), 
shared_index_path);
+   }
+   closedir(dir);
+
+   return 0;
+}
+
 static struct tempfile temporary_sharedindex;
 
 static int write_shared_index(struct index_state *istate,
@@ -2228,8 +2287,11 @@ static int write_shared_index(struct index_state *istate,
}
ret = rename_tempfile(_sharedindex,
  git_path("sharedindex.%s", 
sha1_to_hex(si->base->sha1)));
-   if (!ret)
+   if (!ret) {
hashcpy(si->base_sha1, si->base->sha1);
+   clean_shared_index_files(sha1_to_hex(si->base->sha1));
+   }
+
return ret;
 }
 
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 13/21] sha1_file: make check_and_freshen_file() non static

2016-12-26 Thread Christian Couder
This function will be used in a commit soon, so let's make
it available globally.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h | 3 +++
 sha1_file.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index e15b421b6f..f442f28189 100644
--- a/cache.h
+++ b/cache.h
@@ -1170,6 +1170,9 @@ extern int has_pack_index(const unsigned char *sha1);
 
 extern void assert_sha1_type(const unsigned char *sha1, enum object_type 
expect);
 
+/* Helper to check and "touch" a file */
+extern int check_and_freshen_file(const char *fn, int freshen);
+
 extern const signed char hexval_table[256];
 static inline unsigned int hexval(unsigned char c)
 {
diff --git a/sha1_file.c b/sha1_file.c
index 1173071859..f5303d955a 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -601,7 +601,7 @@ static int freshen_file(const char *fn)
  * either does not exist on disk, or has a stale mtime and may be subject to
  * pruning).
  */
-static int check_and_freshen_file(const char *fn, int freshen)
+int check_and_freshen_file(const char *fn, int freshen)
 {
if (access(fn, F_OK))
return 0;
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 06/21] t1700: add tests for core.splitIndex

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 37 +
 1 file changed, 37 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 292a0720fc..db8c39f446 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -200,4 +200,41 @@ EOF
test_cmp expect actual
 '
 
+test_expect_success 'set core.splitIndex config variable to true' '
+   git config core.splitIndex true &&
+   : >three &&
+   git update-index --add three &&
+   git ls-files --stage >ls-files.actual &&
+   cat >ls-files.expect <actual &&
+   cat >expect <ls-files.actual &&
+   cat >ls-files.expect <actual &&
+   cat >expect <

[PATCH v3 07/21] Documentation/config: add information for core.splitIndex

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index d51182a060..221c5982c0 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -331,6 +331,10 @@ core.trustctime::
crawlers and some backup systems).
See linkgit:git-update-index[1]. True by default.
 
+core.splitIndex::
+   If true, the split-index feature of the index will be used.
+   See linkgit:git-update-index[1]. False by default.
+
 core.untrackedCache::
Determines what to do about the untracked cache feature of the
index. It will be kept, if this variable is unset or set to
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 18/21] read-cache: refactor read_index_from()

2016-12-26 Thread Christian Couder
It looks better and is simpler to review when we don't compute
the same things many times in the function.

It will also help make the following commit simpler.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index e62a6c742d..98ef1323d6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1699,6 +1699,8 @@ int read_index_from(struct index_state *istate, const 
char *path)
 {
struct split_index *split_index;
int ret;
+   char *base_sha1_hex;
+   const char *base_path;
 
/* istate->initialized covers both .git/index and .git/sharedindex.xxx 
*/
if (istate->initialized)
@@ -1716,15 +1718,15 @@ int read_index_from(struct index_state *istate, const 
char *path)
discard_index(split_index->base);
else
split_index->base = xcalloc(1, sizeof(*split_index->base));
-   ret = do_read_index(split_index->base,
-   git_path("sharedindex.%s",
-sha1_to_hex(split_index->base_sha1)), 1);
+
+   base_sha1_hex = sha1_to_hex(split_index->base_sha1);
+   base_path = git_path("sharedindex.%s", base_sha1_hex);
+   ret = do_read_index(split_index->base, base_path, 1);
if (hashcmp(split_index->base_sha1, split_index->base->sha1))
die("broken index, expect %s in %s, got %s",
-   sha1_to_hex(split_index->base_sha1),
-   git_path("sharedindex.%s",
-sha1_to_hex(split_index->base_sha1)),
+   base_sha1_hex, base_path,
sha1_to_hex(split_index->base->sha1));
+
merge_base_index(istate);
post_read_index_from(istate);
return ret;
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 08/21] Documentation/git-update-index: talk about core.splitIndex config var

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/git-update-index.txt | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/git-update-index.txt 
b/Documentation/git-update-index.txt
index 7386c93162..e091b2a409 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -171,6 +171,12 @@ may not support it yet.
given again, all changes in $GIT_DIR/index are pushed back to
the shared index file. This mode is designed for very large
indexes that take a significant amount of time to read or write.
++
+These options take effect whatever the value of the `core.splitIndex`
+configuration variable (see linkgit:git-config[1]). But a warning is
+emitted when the change goes against the configured value, as the
+configured value will take effect next time the index is read and this
+will remove the intended effect of the option.
 
 --untracked-cache::
 --no-untracked-cache::
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 20/21] Documentation/config: add splitIndex.sharedIndexExpire

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index e0f5a77980..24e771d22e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2786,6 +2786,17 @@ splitIndex.maxPercentChange::
than 20 percent of the total number of entries.
See linkgit:git-update-index[1].
 
+splitIndex.sharedIndexExpire::
+   When the split index feature is used, shared index files with
+   a mtime older than this time will be removed when a new shared
+   index file is created. The value "now" expires all entries
+   immediately, and "never" suppresses expiration altogether.
+   The default value is "one.week.ago".
+   Note that each time a new split-index file is created, the
+   mtime of the related shared index file is updated to the
+   current time.
+   See linkgit:git-update-index[1].
+
 status.relativePaths::
By default, linkgit:git-status[1] shows paths relative to the
current directory. Setting this variable to `false` shows paths
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 05/21] update-index: warn in case of split-index incoherency

2016-12-26 Thread Christian Couder
When users are using `git update-index --(no-)split-index`, they
may expect the split-index feature to be used or not according to
the option they just used, but this might not be the case if the
new "core.splitIndex" config variable has been set. In this case
let's warn about what will happen and why.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/update-index.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 24fdadfa4b..d74d72cc7f 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1099,12 +1099,21 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
}
 
if (split_index > 0) {
+   if (git_config_get_split_index() == 0)
+   warning(_("core.splitIndex is set to false; "
+ "remove or change it, if you really want to "
+ "enable split index"));
if (the_index.split_index)
the_index.cache_changed |= SPLIT_INDEX_ORDERED;
else
add_split_index(_index);
-   } else if (!split_index)
+   } else if (!split_index) {
+   if (git_config_get_split_index() == 1)
+   warning(_("core.splitIndex is set to true; "
+ "remove or change it, if you really want to "
+ "disable split index"));
remove_split_index(_index);
+   }
 
switch (untracked_cache) {
case UC_UNSPECIFIED:
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 03/21] split-index: add {add,remove}_split_index() functions

2016-12-26 Thread Christian Couder
Also use the functions in cmd_update_index() in
builtin/update-index.c.

These functions will be used in a following commit to tweak
our use of the split-index feature depending on the setting
of a configuration variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/update-index.c | 18 ++
 split-index.c  | 22 ++
 split-index.h  |  2 ++
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index d530e89368..24fdadfa4b 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1099,18 +1099,12 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
}
 
if (split_index > 0) {
-   init_split_index(_index);
-   the_index.cache_changed |= SPLIT_INDEX_ORDERED;
-   } else if (!split_index && the_index.split_index) {
-   /*
-* can't discard_split_index(_index); because that
-* will destroy split_index->base->cache[], which may
-* be shared with the_index.cache[]. So yeah we're
-* leaking a bit here.
-*/
-   the_index.split_index = NULL;
-   the_index.cache_changed |= SOMETHING_CHANGED;
-   }
+   if (the_index.split_index)
+   the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+   else
+   add_split_index(_index);
+   } else if (!split_index)
+   remove_split_index(_index);
 
switch (untracked_cache) {
case UC_UNSPECIFIED:
diff --git a/split-index.c b/split-index.c
index 615f4cac05..f519e60f87 100644
--- a/split-index.c
+++ b/split-index.c
@@ -317,3 +317,25 @@ void replace_index_entry_in_base(struct index_state 
*istate,
istate->split_index->base->cache[new->index - 1] = new;
}
 }
+
+void add_split_index(struct index_state *istate)
+{
+   if (!istate->split_index) {
+   init_split_index(istate);
+   istate->cache_changed |= SPLIT_INDEX_ORDERED;
+   }
+}
+
+void remove_split_index(struct index_state *istate)
+{
+   if (istate->split_index) {
+   /*
+* can't discard_split_index(_index); because that
+* will destroy split_index->base->cache[], which may
+* be shared with the_index.cache[]. So yeah we're
+* leaking a bit here.
+*/
+   istate->split_index = NULL;
+   istate->cache_changed |= SOMETHING_CHANGED;
+   }
+}
diff --git a/split-index.h b/split-index.h
index c1324f521a..df91c1bda8 100644
--- a/split-index.h
+++ b/split-index.h
@@ -31,5 +31,7 @@ void merge_base_index(struct index_state *istate);
 void prepare_to_write_split_index(struct index_state *istate);
 void finish_writing_split_index(struct index_state *istate);
 void discard_split_index(struct index_state *istate);
+void add_split_index(struct index_state *istate);
+void remove_split_index(struct index_state *istate);
 
 #endif
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 15/21] config: add git_config_get_expiry() from gc.c

2016-12-26 Thread Christian Couder
This function will be used in a following commit to get the expiration
time of the shared index files from the config, and it is generic
enough to be put in "config.c".

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/gc.c | 15 ++-
 cache.h  |  3 +++
 config.c | 13 +
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index 069950d0b4..1e40d45aa2 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -62,17 +62,6 @@ static void report_pack_garbage(unsigned seen_bits, const 
char *path)
string_list_append(_garbage, path);
 }
 
-static void git_config_date_string(const char *key, const char **output)
-{
-   if (git_config_get_string_const(key, output))
-   return;
-   if (strcmp(*output, "now")) {
-   unsigned long now = approxidate("now");
-   if (approxidate(*output) >= now)
-   git_die_config(key, _("Invalid %s: '%s'"), key, 
*output);
-   }
-}
-
 static void process_log_file(void)
 {
struct stat st;
@@ -111,8 +100,8 @@ static void gc_config(void)
git_config_get_int("gc.auto", _auto_threshold);
git_config_get_int("gc.autopacklimit", _auto_pack_limit);
git_config_get_bool("gc.autodetach", _auto);
-   git_config_date_string("gc.pruneexpire", _expire);
-   git_config_date_string("gc.worktreepruneexpire", 
_worktrees_expire);
+   git_config_get_expiry("gc.pruneexpire", _expire);
+   git_config_get_expiry("gc.worktreepruneexpire", 
_worktrees_expire);
git_config(git_default_config, NULL);
 }
 
diff --git a/cache.h b/cache.h
index f442f28189..279415afbd 100644
--- a/cache.h
+++ b/cache.h
@@ -1827,6 +1827,9 @@ extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);
 
+/* This dies if the configured or default date is in the future */
+extern int git_config_get_expiry(const char *key, const char **output);
+
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
  * ensure that they do not modify the untracked cache when reading it.
diff --git a/config.c b/config.c
index cf212785bb..d6c8f8f3ba 100644
--- a/config.c
+++ b/config.c
@@ -1685,6 +1685,19 @@ int git_config_get_pathname(const char *key, const char 
**dest)
return ret;
 }
 
+int git_config_get_expiry(const char *key, const char **output)
+{
+   int ret = git_config_get_string_const(key, output);
+   if (ret)
+   return ret;
+   if (strcmp(*output, "now")) {
+   unsigned long now = approxidate("now");
+   if (approxidate(*output) >= now)
+   git_die_config(key, _("Invalid %s: '%s'"), key, 
*output);
+   }
+   return ret;
+}
+
 int git_config_get_untracked_cache(void)
 {
int val = -1;
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 09/21] config: add git_config_get_max_percent_split_change()

2016-12-26 Thread Christian Couder
This new function will be used in a following commit to get the
value of the "splitIndex.maxPercentChange" config variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h  |  1 +
 config.c | 15 +++
 2 files changed, 16 insertions(+)

diff --git a/cache.h b/cache.h
index c126fe475e..e15b421b6f 100644
--- a/cache.h
+++ b/cache.h
@@ -1822,6 +1822,7 @@ extern int git_config_get_maybe_bool(const char *key, int 
*dest);
 extern int git_config_get_pathname(const char *key, const char **dest);
 extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
+extern int git_config_get_max_percent_split_change(void);
 
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
diff --git a/config.c b/config.c
index 421e8c9da6..cf212785bb 100644
--- a/config.c
+++ b/config.c
@@ -1719,6 +1719,21 @@ int git_config_get_split_index(void)
return -1; /* default value */
 }
 
+int git_config_get_max_percent_split_change(void)
+{
+   int val = -1;
+
+   if (!git_config_get_int("splitindex.maxpercentchange", )) {
+   if (0 <= val && val <= 100)
+   return val;
+
+   return error(_("splitIndex.maxPercentChange value '%d' "
+  "should be between 0 and 100"), val);
+   }
+
+   return -1; /* default value */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 11/21] t1700: add tests for splitIndex.maxPercentChange

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 507a1dd1ad..f03addf654 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -238,4 +238,76 @@ EOF
test_cmp expect actual
 '
 
+test_expect_success 'set core.splitIndex config variable to true' '
+   git config core.splitIndex true &&
+   : >three &&
+   git update-index --add three &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <four &&
+   git update-index --add four &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <five &&
+   git update-index --add five &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <six &&
+   git update-index --add six &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <seven &&
+   git update-index --add seven &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <eight &&
+   git update-index --add eight &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <

[PATCH v3 14/21] read-cache: touch shared index files when used

2016-12-26 Thread Christian Couder
When a split-index file is created, let's update the mtime of the
shared index file that the split-index file is referencing.

In a following commit we will make shared index file expire
depending on their mtime, so updating the mtime makes sure that
the shared index file will not be deleted soon.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index a1aaec5135..9fbad2044b 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1682,6 +1682,19 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
die("index file corrupt");
 }
 
+/*
+ * Signal that the shared index is used by updating its mtime.
+ *
+ * This way, shared index can be removed if they have not been used
+ * for some time. It's ok to fail to update the mtime if we are on a
+ * read only file system.
+ */
+void freshen_shared_index(char *base_sha1_hex)
+{
+   const char *shared_index = git_path("sharedindex.%s", base_sha1_hex);
+   check_and_freshen_file(shared_index, 1);
+}
+
 int read_index_from(struct index_state *istate, const char *path)
 {
struct split_index *split_index;
@@ -2273,6 +2286,8 @@ int write_locked_index(struct index_state *istate, struct 
lock_file *lock,
int ret = write_shared_index(istate, lock, flags);
if (ret)
return ret;
+   } else {
+   freshen_shared_index(sha1_to_hex(si->base_sha1));
}
 
return write_split_index(istate, lock, flags);
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 21/21] Documentation/git-update-index: explain splitIndex.*

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt   |  6 +++---
 Documentation/git-update-index.txt | 37 +
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 24e771d22e..87a570cf88 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2792,9 +2792,9 @@ splitIndex.sharedIndexExpire::
index file is created. The value "now" expires all entries
immediately, and "never" suppresses expiration altogether.
The default value is "one.week.ago".
-   Note that each time a new split-index file is created, the
-   mtime of the related shared index file is updated to the
-   current time.
+   Note that each time a split index based on a shared index file
+   is either created or read from, the mtime of the shared index
+   file is updated to the current time.
See linkgit:git-update-index[1].
 
 status.relativePaths::
diff --git a/Documentation/git-update-index.txt 
b/Documentation/git-update-index.txt
index e091b2a409..46c953b2f2 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -163,14 +163,10 @@ may not support it yet.
 
 --split-index::
 --no-split-index::
-   Enable or disable split index mode. If enabled, the index is
-   split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex..
-   Changes are accumulated in $GIT_DIR/index while the shared
-   index file contains all index entries stays unchanged. If
-   split-index mode is already enabled and `--split-index` is
-   given again, all changes in $GIT_DIR/index are pushed back to
-   the shared index file. This mode is designed for very large
-   indexes that take a significant amount of time to read or write.
+   Enable or disable split index mode. If split-index mode is
+   already enabled and `--split-index` is given again, all
+   changes in $GIT_DIR/index are pushed back to the shared index
+   file.
 +
 These options take effect whatever the value of the `core.splitIndex`
 configuration variable (see linkgit:git-config[1]). But a warning is
@@ -394,6 +390,31 @@ Although this bit looks similar to assume-unchanged bit, 
its goal is
 different from assume-unchanged bit's. Skip-worktree also takes
 precedence over assume-unchanged bit when both are set.
 
+Split index
+---
+
+This mode is designed for very large indexes that take a significant
+amount of time to read or write.
+
+In this mode, the index is split into two files, $GIT_DIR/index and
+$GIT_DIR/sharedindex.. Changes are accumulated in
+$GIT_DIR/index, the split index, while the shared index file contains
+all index entries and stays unchanged.
+
+All changes in the split index are pushed back to the shared index
+file when the number of entries in the split index reaches a level
+specified by the splitIndex.maxPercentChange config variable (see
+linkgit:git-config[1]).
+
+Each time a new shared index file is created, the old shared index
+files are deleted if their mtime is older than what is specified by
+the splitIndex.sharedIndexExpire config variable (see
+linkgit:git-config[1]).
+
+To avoid deleting a shared index file that is still used, its mtime is
+updated to the current time everytime a new split index based on the
+shared index file is either created or read from.
+
 Untracked cache
 ---
 
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 17/21] t1700: test shared index file expiration

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 44 
 1 file changed, 44 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index f03addf654..f448fc13cd 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -310,4 +310,48 @@ EOF
test_cmp expect actual
 '
 
+test_expect_success 'shared index files expire after 7 days by default' '
+   : >ten &&
+   git update-index --add ten &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   just_under_7_days_ago=$((1-7*86400)) &&
+   test-chmtime =$just_under_7_days_ago .git/sharedindex.* &&
+   : >eleven &&
+   git update-index --add eleven &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   just_over_7_days_ago=$((-1-7*86400)) &&
+   test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
+   : >twelve &&
+   git update-index --add twelve &&
+   test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
+test_expect_success 'check splitIndex.sharedIndexExpire set to 8 days' '
+   git config splitIndex.sharedIndexExpire "8.days.ago" &&
+   test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
+   : >thirteen &&
+   git update-index --add thirteen &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   just_over_8_days_ago=$((-1-8*86400)) &&
+   test-chmtime =$just_over_8_days_ago .git/sharedindex.* &&
+   : >fourteen &&
+   git update-index --add fourteen &&
+   test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
+test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and 
"now"' '
+   git config splitIndex.sharedIndexExpire never &&
+   just_10_years_ago=$((-365*10*86400)) &&
+   test-chmtime =$just_10_years_ago .git/sharedindex.* &&
+   : >fifteen &&
+   git update-index --add fifteen &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   git config splitIndex.sharedIndexExpire now &&
+   just_1_second_ago=-1 &&
+   test-chmtime =$just_1_second_ago .git/sharedindex.* &&
+   : >sixteen &&
+   git update-index --add sixteen &&
+   test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
 test_done
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 10/21] read-cache: regenerate shared index if necessary

2016-12-26 Thread Christian Couder
When writing a new split-index and there is a big number of cache
entries in the split-index compared to the shared index, it is a
good idea to regenerate the shared index.

By default when the ratio reaches 20%, we will push back all
the entries from the split-index into a new shared index file
instead of just creating a new split-index file.

The threshold can be configured using the
"splitIndex.maxPercentChange" config variable.

We need to adjust the existing tests in t1700 by setting
"splitIndex.maxPercentChange" to 100 at the beginning of t1700,
as the existing tests are assuming that the shared index is
regenerated only when `git update-index --split-index` is used.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c   | 32 
 t/t1700-split-index.sh |  1 +
 2 files changed, 33 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index 732b7fe611..a1aaec5135 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2220,6 +2220,36 @@ static int write_shared_index(struct index_state *istate,
return ret;
 }
 
+static const int default_max_percent_split_change = 20;
+
+static int too_many_not_shared_entries(struct index_state *istate)
+{
+   int i, not_shared = 0;
+   int max_split = git_config_get_max_percent_split_change();
+
+   switch (max_split) {
+   case -1:
+   /* not or badly configured: use the default value */
+   max_split = default_max_percent_split_change;
+   break;
+   case 0:
+   return 1; /* 0% means always write a new shared index */
+   case 100:
+   return 0; /* 100% means never write a new shared index */
+   default:
+   ; /* do nothing: just use the configured value */
+   }
+
+   /* Count not shared entries */
+   for (i = 0; i < istate->cache_nr; i++) {
+   struct cache_entry *ce = istate->cache[i];
+   if (!ce->index)
+   not_shared++;
+   }
+
+   return istate->cache_nr * max_split < not_shared * 100;
+}
+
 int write_locked_index(struct index_state *istate, struct lock_file *lock,
   unsigned flags)
 {
@@ -2237,6 +2267,8 @@ int write_locked_index(struct index_state *istate, struct 
lock_file *lock,
if ((v & 15) < 6)
istate->cache_changed |= SPLIT_INDEX_ORDERED;
}
+   if (too_many_not_shared_entries(istate))
+   istate->cache_changed |= SPLIT_INDEX_ORDERED;
if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
int ret = write_shared_index(istate, lock, flags);
if (ret)
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index db8c39f446..507a1dd1ad 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -8,6 +8,7 @@ test_description='split index mode tests'
 sane_unset GIT_TEST_SPLIT_INDEX
 
 test_expect_success 'enable split index' '
+   git config splitIndex.maxPercentChange 100 &&
git update-index --split-index &&
test-dump-split-index .git/index >actual &&
indexversion=$(test-index-version <.git/index) &&
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 04/21] read-cache: add and then use tweak_split_index()

2016-12-26 Thread Christian Couder
This will make us use the split-index feature or not depending
on the value of the "core.splitIndex" config variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index f92a912dcb..732b7fe611 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1566,10 +1566,27 @@ static void tweak_untracked_cache(struct index_state 
*istate)
}
 }
 
+static void tweak_split_index(struct index_state *istate)
+{
+   switch (git_config_get_split_index()) {
+   case -1: /* unset: do nothing */
+   break;
+   case 0: /* false */
+   remove_split_index(istate);
+   break;
+   case 1: /* true */
+   add_split_index(istate);
+   break;
+   default: /* unknown value: do nothing */
+   break;
+   }
+}
+
 static void post_read_index_from(struct index_state *istate)
 {
check_ce_order(istate);
tweak_untracked_cache(istate);
+   tweak_split_index(istate);
 }
 
 /* remember to discard_cache() before reading a different cache! */
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 19/21] read-cache: use freshen_shared_index() in read_index_from()

2016-12-26 Thread Christian Couder
This way a share index file will not be garbage collected if
we still read from an index it is based from.

As we need to read the current index before creating a new
one, the tests have to be adjusted, so that we don't expect
an old shared index file to be deleted right away when we
create a new one.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c   |  1 +
 t/t1700-split-index.sh | 14 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 98ef1323d6..bf0ac1ce61 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1727,6 +1727,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
base_sha1_hex, base_path,
sha1_to_hex(split_index->base->sha1));
 
+   freshen_shared_index(base_sha1_hex);
merge_base_index(istate);
post_read_index_from(istate);
return ret;
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index f448fc13cd..800f84a593 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -313,17 +313,17 @@ EOF
 test_expect_success 'shared index files expire after 7 days by default' '
: >ten &&
git update-index --add ten &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
just_under_7_days_ago=$((1-7*86400)) &&
test-chmtime =$just_under_7_days_ago .git/sharedindex.* &&
: >eleven &&
git update-index --add eleven &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
just_over_7_days_ago=$((-1-7*86400)) &&
test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
: >twelve &&
git update-index --add twelve &&
-   test $(ls .git/sharedindex.* | wc -l) = 1
+   test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_expect_success 'check splitIndex.sharedIndexExpire set to 8 days' '
@@ -331,12 +331,12 @@ test_expect_success 'check splitIndex.sharedIndexExpire 
set to 8 days' '
test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
: >thirteen &&
git update-index --add thirteen &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
just_over_8_days_ago=$((-1-8*86400)) &&
test-chmtime =$just_over_8_days_ago .git/sharedindex.* &&
: >fourteen &&
git update-index --add fourteen &&
-   test $(ls .git/sharedindex.* | wc -l) = 1
+   test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and 
"now"' '
@@ -345,13 +345,13 @@ test_expect_success 'check splitIndex.sharedIndexExpire 
set to "never" and "now"
test-chmtime =$just_10_years_ago .git/sharedindex.* &&
: >fifteen &&
git update-index --add fifteen &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
git config splitIndex.sharedIndexExpire now &&
just_1_second_ago=-1 &&
test-chmtime =$just_1_second_ago .git/sharedindex.* &&
: >sixteen &&
git update-index --add sixteen &&
-   test $(ls .git/sharedindex.* | wc -l) = 1
+   test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_done
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 12/21] Documentation/config: add splitIndex.maxPercentChange

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt | 13 +
 1 file changed, 13 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 221c5982c0..e0f5a77980 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2773,6 +2773,19 @@ showbranch.default::
The default set of branches for linkgit:git-show-branch[1].
See linkgit:git-show-branch[1].
 
+splitIndex.maxPercentChange::
+   When the split index feature is used, this specifies the
+   percent of entries the split index can contain compared to the
+   whole number of entries in both the split index and the shared
+   index before a new shared index is written.
+   The value should be between 0 and 100. If the value is 0 then
+   a new shared index is always written, if it is 100 a new
+   shared index is never written.
+   By default the value is 20, so a new shared index is written
+   if the number of entries in the split index would be greater
+   than 20 percent of the total number of entries.
+   See linkgit:git-update-index[1].
+
 status.relativePaths::
By default, linkgit:git-status[1] shows paths relative to the
current directory. Setting this variable to `false` shows paths
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 01/21] config: mark an error message up for translation

2016-12-26 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 config.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.c b/config.c
index 83fdecb1bc..2eaf8ad77a 100644
--- a/config.c
+++ b/config.c
@@ -1701,8 +1701,8 @@ int git_config_get_untracked_cache(void)
if (!strcasecmp(v, "keep"))
return -1;
 
-   error("unknown core.untrackedCache value '%s'; "
- "using 'keep' default value", v);
+   error(_("unknown core.untrackedCache value '%s'; "
+   "using 'keep' default value"), v);
return -1;
}
 
-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 00/21] Add configuration options for split-index

2016-12-26 Thread Christian Couder
new
  feature.

The only change since v2 in this step is that the full shared
index path is passed to the can_delete_shared_index() function in
16/21 as suggested by Duy.

Links
~

This patch series is also available here:

  https://github.com/chriscool/git/commits/config-split-index

The previous versions were:

  RFC: https://github.com/chriscool/git/commits/config-split-index7
  v1:  https://github.com/chriscool/git/commits/config-split-index72
  v2:  https://github.com/chriscool/git/commits/config-split-index99

On the mailing list the related patch series and discussions were:

  RFC: 
https://public-inbox.org/git/20160711172254.13439-1-chrisc...@tuxfamily.org/
  v1:  
https://public-inbox.org/git/20161023092648.12086-1-chrisc...@tuxfamily.org/
  v2:  
https://public-inbox.org/git/20161217145547.11748-1-chrisc...@tuxfamily.org/

Christian Couder (21):
  config: mark an error message up for translation
  config: add git_config_get_split_index()
  split-index: add {add,remove}_split_index() functions
  read-cache: add and then use tweak_split_index()
  update-index: warn in case of split-index incoherency
  t1700: add tests for core.splitIndex
  Documentation/config: add information for core.splitIndex
  Documentation/git-update-index: talk about core.splitIndex config var
  config: add git_config_get_max_percent_split_change()
  read-cache: regenerate shared index if necessary
  t1700: add tests for splitIndex.maxPercentChange
  Documentation/config: add splitIndex.maxPercentChange
  sha1_file: make check_and_freshen_file() non static
  read-cache: touch shared index files when used
  config: add git_config_get_expiry() from gc.c
  read-cache: unlink old sharedindex files
  t1700: test shared index file expiration
  read-cache: refactor read_index_from()
  read-cache: use freshen_shared_index() in read_index_from()
  Documentation/config: add splitIndex.sharedIndexExpire
  Documentation/git-update-index: explain splitIndex.*

 Documentation/config.txt   |  28 +++
 Documentation/git-update-index.txt |  43 +--
 builtin/gc.c   |  15 +---
 builtin/update-index.c |  25 +++---
 cache.h|   8 ++
 config.c   |  42 +-
 read-cache.c   | 143 --
 sha1_file.c|   2 +-
 split-index.c  |  22 ++
 split-index.h  |   2 +
 t/t1700-split-index.sh | 154 +
 11 files changed, 442 insertions(+), 42 deletions(-)

-- 
2.11.0.209.gda91e66374.dirty



[PATCH v3 02/21] config: add git_config_get_split_index()

2016-12-26 Thread Christian Couder
This new function will be used in a following commit to know
if we want to use the split index feature or not.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h  |  1 +
 config.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/cache.h b/cache.h
index a50a61a197..c126fe475e 100644
--- a/cache.h
+++ b/cache.h
@@ -1821,6 +1821,7 @@ extern int git_config_get_bool_or_int(const char *key, 
int *is_bool, int *dest);
 extern int git_config_get_maybe_bool(const char *key, int *dest);
 extern int git_config_get_pathname(const char *key, const char **dest);
 extern int git_config_get_untracked_cache(void);
+extern int git_config_get_split_index(void);
 
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
diff --git a/config.c b/config.c
index 2eaf8ad77a..421e8c9da6 100644
--- a/config.c
+++ b/config.c
@@ -1709,6 +1709,16 @@ int git_config_get_untracked_cache(void)
return -1; /* default value */
 }
 
+int git_config_get_split_index(void)
+{
+   int val;
+
+   if (!git_config_get_maybe_bool("core.splitindex", ))
+   return val;
+
+   return -1; /* default value */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
-- 
2.11.0.209.gda91e66374.dirty



Re: [PATCH v2 16/21] read-cache: unlink old sharedindex files

2016-12-26 Thread Christian Couder
On Mon, Dec 19, 2016 at 12:58 PM, Duy Nguyen <pclo...@gmail.com> wrote:
> On Sat, Dec 17, 2016 at 03:55:42PM +0100, Christian Couder wrote:
>> +static int clean_shared_index_files(const char *current_hex)
>> +{
>> + struct dirent *de;
>> + DIR *dir = opendir(get_git_dir());
>> +
>> + if (!dir)
>> + return error_errno(_("unable to open git dir: %s"), 
>> get_git_dir());
>> +
>> + while ((de = readdir(dir)) != NULL) {
>> + const char *sha1_hex;
>> + if (!skip_prefix(de->d_name, "sharedindex.", _hex))
>> + continue;
>> + if (!strcmp(sha1_hex, current_hex))
>
> fspathcmp since we're dealing fs paths?
>
> In theory we should be ok using strcmp, even on Windows because case
> is preserved, I think. It's only a problem when we write path 'abc'
> down and read 'ABC' back.
>
> Oh well.. your call because if you go with fspathcmp, skip_prefix can't
> be used either. A lot more changes for a very rare case.

I'd rather keep using skip_prefix() and strcmp().

I could find no place in the code where fspathcmp() is used to compare
dir entries, but a few where other *cmp() functions are used:

$ git grep '>d_name' | grep cmp
builtin/repack.c:   if (strncmp(e->d_name, buf.buf +
dirlen, prefixlen))
dir.c:  if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
sha1_name.c:if (memcmp(de->d_name, ds->hex_pfx +
2, ds->len - 2))

and also a few where skip_prefix() is used:

$ git grep -n '>d_name' | grep prefix
builtin/repack.c:60:if (strncmp(e->d_name, buf.buf +
dirlen, prefixlen))
help.c:147: if (!skip_prefix(de->d_name, prefix, ))

so I think it is ok to use skip_prefix() and strcmp().

>> + continue;
>> + if (can_delete_shared_index(sha1_hex) > 0 &&
>
> Probably shorter to pass full d->name here so you don't have to do
> another git_path() in can_delete_

Yeah, you are right, I will do that.


Re: [PATCH v2 10/21] read-cache: regenerate shared index if necessary

2016-12-26 Thread Christian Couder
On Mon, Dec 19, 2016 at 12:48 PM, Duy Nguyen <pclo...@gmail.com> wrote:
> On Sat, Dec 17, 2016 at 03:55:36PM +0100, Christian Couder wrote:
>> +static const int default_max_percent_split_change = 20;
>> +
>> +static int too_many_not_shared_entries(struct index_state *istate)
>> +{
>> + int i, not_shared = 0;
>> + int max_split = git_config_get_max_percent_split_change();
>> +
>> + switch (max_split) {
>> + case -1:
>> + /* not or badly configured: use the default value */
>> + max_split = default_max_percent_split_change;
>> + break;
>> + case 0:
>> + return 1; /* 0% means always write a new shared index */
>> + case 100:
>> + return 0; /* 100% means never write a new shared index */
>
> I wonder if we really need to special case these here. If I read it
> correctly, the expression at the end of this function will return 1
> when max_split is 0, and 0 when max_split is 100 (not counting the
> case when cache_nr is zero).

It's better for performance if we can avoid computing the number of
unshared entries, which we can in case of 0 or 100.

> Perhaps it's good for documentation purpose.

Yeah, I think it's also good for documentation purpose.

> Though I find it hard to
> see a use case for max_split == 0. Always creating a new shared index
> sounds crazy.

Yeah, but perhaps to test shared index writing performance people
might want to use it. And I don't see any good way or any good reason
to disallow it.


Re: [PATCH v2 04/21] read-cache: add and then use tweak_split_index()

2016-12-26 Thread Christian Couder
On Mon, Dec 19, 2016 at 12:18 PM, Duy Nguyen <pclo...@gmail.com> wrote:
> On Sat, Dec 17, 2016 at 03:55:30PM +0100, Christian Couder wrote:
>> +static void tweak_split_index(struct index_state *istate)
>> +{
>> + switch (git_config_get_split_index()) {
>> + case -1: /* unset: do nothing */
>> + break;
>> + case 0: /* false */
>> + remove_split_index(istate);
>> + break;
>> + case 1: /* true */
>> + add_split_index(istate);
>> + break;
>> + default: /* unknown value: do nothing */
>
> Probably should die("BUG:") here since it looks to me like
> git_config_maybe_bool() (inside git_config_get_split_index) in this
> case has been updated to return more values than we can handle. And we
> need to know about that so we can handle it properly.

If we later add another value to the possible ones, like for example
"auto", we might not want old versions of Git to fail with a "BUG:..."
message when they read config files tweaked for newer Git versions, so
I don't think we should die() here.

A warning would be ok, but given that in tweak_untracked_cache() we
decided to do nothing, I am thinking that it would be more consistent
to just do nothing here too.


Re: [PATCH v2 02/21] config: add git_config_get_split_index()

2016-12-26 Thread Christian Couder
On Mon, Dec 19, 2016 at 12:14 PM, Duy Nguyen <pclo...@gmail.com> wrote:
> On Sat, Dec 17, 2016 at 03:55:28PM +0100, Christian Couder wrote:
>> diff --git a/config.c b/config.c
>> index 2eaf8ad77a..c1343bbb3e 100644
>> --- a/config.c
>> +++ b/config.c
>> @@ -1709,6 +1709,16 @@ int git_config_get_untracked_cache(void)
>>   return -1; /* default value */
>>  }
>>
>> +int git_config_get_split_index(void)
>> +{
>> + int val = -1;
>
> Is it redundant to set default value here because it's not used
> anywhere? The "return val;" will always have the new value from
> git_config_. And you don't use "val" in error case.

Yeah, it is redundant, so I will not set the default value here in the
next version.


Re: [PATCH v2 00/21] Add configuration options for split-index

2016-12-17 Thread Christian Couder
> The previous versions were:
>
>   RFC: https://github.com/chriscool/git/commits/config-split-index7
>   v1:  https://github.com/chriscool/git/commits/config-split-index72

The diff since v1 is:

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 8fbef25cb1..52a3cac4ff 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2782,9 +2782,9 @@ splitIndex.sharedIndexExpire::
 index file is created. The value "now" expires all entries
 immediately, and "never" suppresses expiration altogether.
 The default value is "one.week.ago".
-Note that each time a new split-index file is created, the
-mtime of the related shared index file is updated to the
-current time.
+Note that each time a split index based on a shared index file
+is either created or read from, the mtime of the shared index
+file is updated to the current time.
 See linkgit:git-update-index[1].

 status.relativePaths::
diff --git a/Documentation/git-update-index.txt
b/Documentation/git-update-index.txt
index 635d1574b2..46c953b2f2 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -407,10 +407,14 @@ specified by the splitIndex.maxPercentChange
config variable (see
 linkgit:git-config[1]).

 Each time a new shared index file is created, the old shared index
-files are deleted if they are older than what is specified by the
-splitIndex.sharedIndexExpire config variable (see
+files are deleted if their mtime is older than what is specified by
+the splitIndex.sharedIndexExpire config variable (see
 linkgit:git-config[1]).

+To avoid deleting a shared index file that is still used, its mtime is
+updated to the current time everytime a new split index based on the
+shared index file is either created or read from.
+
 Untracked cache
 ---

diff --git a/builtin/gc.c b/builtin/gc.c
index c1e9602892..1e40d45aa2 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -100,8 +100,8 @@ static void gc_config(void)
 git_config_get_int("gc.auto", _auto_threshold);
 git_config_get_int("gc.autopacklimit", _auto_pack_limit);
 git_config_get_bool("gc.autodetach", _auto);
-git_config_get_date_string("gc.pruneexpire", _expire);
-git_config_get_date_string("gc.worktreepruneexpire",
_worktrees_expire);
+git_config_get_expiry("gc.pruneexpire", _expire);
+git_config_get_expiry("gc.worktreepruneexpire", _worktrees_expire);
 git_config(git_default_config, NULL);
 }

diff --git a/builtin/update-index.c b/builtin/update-index.c
index a14dbf2612..dc1fd0d44d 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1099,18 +1099,18 @@ int cmd_update_index(int argc, const char
**argv, const char *prefix)

 if (split_index > 0) {
 if (git_config_get_split_index() == 0)
-warning("core.splitIndex is set to false; "
-"remove or change it, if you really want to "
-"enable split index");
+warning(_("core.splitIndex is set to false; "
+  "remove or change it, if you really want to "
+  "enable split index"));
 if (the_index.split_index)
 the_index.cache_changed |= SPLIT_INDEX_ORDERED;
 else
 add_split_index(_index);
 } else if (!split_index) {
 if (git_config_get_split_index() == 1)
-warning("core.splitIndex is set to true; "
-"remove or change it, if you really want to "
-"disable split index");
+warning(_("core.splitIndex is set to true; "
+  "remove or change it, if you really want to "
+  "disable split index"));
 remove_split_index(_index);
 }

diff --git a/cache.h b/cache.h
index 8e26aaf05e..279415afbd 100644
--- a/cache.h
+++ b/cache.h
@@ -1823,11 +1823,13 @@ extern int git_config_get_bool(const char
*key, int *dest);
 extern int git_config_get_bool_or_int(const char *key, int *is_bool,
int *dest);
 extern int git_config_get_maybe_bool(const char *key, int *dest);
 extern int git_config_get_pathname(const char *key, const char **dest);
-extern int git_config_get_date_string(const char *key, const char **output);
 extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);

+/* This dies if the configured or default date is in the future */
+extern int git_config_get_expiry(const char *key, const char **output);
+
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
  * ensure that they do not modify the untracked cache when reading it.
diff --git a/config.c b/config.c
index f88c61bb30..5c52cefd78 100644
--- a/config.c
+++ b/config.c
@@ -1685,7 +1685,7 @@ int git_config_get_pathname(const char *key,
const char **dest)
 return ret;
 }

-int git_config_get_date_string(const char *key, const char **output)
+int git_config_get_expiry(const char *key, const 

[PATCH v2 19/21] read-cache: use freshen_shared_index() in read_index_from()

2016-12-17 Thread Christian Couder
This way a share index file will not be garbage collected if
we still read from an index it is based from.

As we need to read the current index before creating a new
one, the tests have to be adjusted, so that we don't expect
an old shared index file to be deleted right away when we
create a new one.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c   |  1 +
 t/t1700-split-index.sh | 14 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 96e097e341..35377f0a3e 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1730,6 +1730,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
base_sha1_hex, base_path,
sha1_to_hex(split_index->base->sha1));
 
+   freshen_shared_index(base_sha1_hex);
merge_base_index(istate);
post_read_index_from(istate);
return ret;
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index f448fc13cd..800f84a593 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -313,17 +313,17 @@ EOF
 test_expect_success 'shared index files expire after 7 days by default' '
: >ten &&
git update-index --add ten &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
just_under_7_days_ago=$((1-7*86400)) &&
test-chmtime =$just_under_7_days_ago .git/sharedindex.* &&
: >eleven &&
git update-index --add eleven &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
just_over_7_days_ago=$((-1-7*86400)) &&
test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
: >twelve &&
git update-index --add twelve &&
-   test $(ls .git/sharedindex.* | wc -l) = 1
+   test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_expect_success 'check splitIndex.sharedIndexExpire set to 8 days' '
@@ -331,12 +331,12 @@ test_expect_success 'check splitIndex.sharedIndexExpire 
set to 8 days' '
test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
: >thirteen &&
git update-index --add thirteen &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
just_over_8_days_ago=$((-1-8*86400)) &&
test-chmtime =$just_over_8_days_ago .git/sharedindex.* &&
: >fourteen &&
git update-index --add fourteen &&
-   test $(ls .git/sharedindex.* | wc -l) = 1
+   test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and 
"now"' '
@@ -345,13 +345,13 @@ test_expect_success 'check splitIndex.sharedIndexExpire 
set to "never" and "now"
test-chmtime =$just_10_years_ago .git/sharedindex.* &&
: >fifteen &&
git update-index --add fifteen &&
-   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
git config splitIndex.sharedIndexExpire now &&
just_1_second_ago=-1 &&
test-chmtime =$just_1_second_ago .git/sharedindex.* &&
: >sixteen &&
git update-index --add sixteen &&
-   test $(ls .git/sharedindex.* | wc -l) = 1
+   test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_done
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 10/21] read-cache: regenerate shared index if necessary

2016-12-17 Thread Christian Couder
When writing a new split-index and there is a big number of cache
entries in the split-index compared to the shared index, it is a
good idea to regenerate the shared index.

By default when the ratio reaches 20%, we will push back all
the entries from the split-index into a new shared index file
instead of just creating a new split-index file.

The threshold can be configured using the
"splitIndex.maxPercentChange" config variable.

We need to adjust the existing tests in t1700 by setting
"splitIndex.maxPercentChange" to 100 at the beginning of t1700,
as the existing tests are assuming that the shared index is
regenerated only when `git update-index --split-index` is used.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c   | 32 
 t/t1700-split-index.sh |  1 +
 2 files changed, 33 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index 79aae6bd20..280b01bd6c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2223,6 +2223,36 @@ static int write_shared_index(struct index_state *istate,
return ret;
 }
 
+static const int default_max_percent_split_change = 20;
+
+static int too_many_not_shared_entries(struct index_state *istate)
+{
+   int i, not_shared = 0;
+   int max_split = git_config_get_max_percent_split_change();
+
+   switch (max_split) {
+   case -1:
+   /* not or badly configured: use the default value */
+   max_split = default_max_percent_split_change;
+   break;
+   case 0:
+   return 1; /* 0% means always write a new shared index */
+   case 100:
+   return 0; /* 100% means never write a new shared index */
+   default:
+   ; /* do nothing: just use the configured value */
+   }
+
+   /* Count not shared entries */
+   for (i = 0; i < istate->cache_nr; i++) {
+   struct cache_entry *ce = istate->cache[i];
+   if (!ce->index)
+   not_shared++;
+   }
+
+   return istate->cache_nr * max_split < not_shared * 100;
+}
+
 int write_locked_index(struct index_state *istate, struct lock_file *lock,
   unsigned flags)
 {
@@ -2240,6 +2270,8 @@ int write_locked_index(struct index_state *istate, struct 
lock_file *lock,
if ((v & 15) < 6)
istate->cache_changed |= SPLIT_INDEX_ORDERED;
}
+   if (too_many_not_shared_entries(istate))
+   istate->cache_changed |= SPLIT_INDEX_ORDERED;
if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
int ret = write_shared_index(istate, lock, flags);
if (ret)
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index db8c39f446..507a1dd1ad 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -8,6 +8,7 @@ test_description='split index mode tests'
 sane_unset GIT_TEST_SPLIT_INDEX
 
 test_expect_success 'enable split index' '
+   git config splitIndex.maxPercentChange 100 &&
git update-index --split-index &&
test-dump-split-index .git/index >actual &&
indexversion=$(test-index-version <.git/index) &&
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 11/21] t1700: add tests for splitIndex.maxPercentChange

2016-12-17 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 507a1dd1ad..f03addf654 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -238,4 +238,76 @@ EOF
test_cmp expect actual
 '
 
+test_expect_success 'set core.splitIndex config variable to true' '
+   git config core.splitIndex true &&
+   : >three &&
+   git update-index --add three &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <four &&
+   git update-index --add four &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <five &&
+   git update-index --add five &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <six &&
+   git update-index --add six &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <seven &&
+   git update-index --add seven &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <eight &&
+   git update-index --add eight &&
+   BASE=$(test-dump-split-index .git/index | grep "^base") &&
+   test-dump-split-index .git/index | sed "/^own/d" >actual &&
+   cat >expect <

[PATCH v2 18/21] read-cache: refactor read_index_from()

2016-12-17 Thread Christian Couder
It looks better and is simpler to review when we don't compute
the same things many times in the function.

It will also help make the following commit simpler.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 8e99a7c325..96e097e341 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1702,6 +1702,8 @@ int read_index_from(struct index_state *istate, const 
char *path)
 {
struct split_index *split_index;
int ret;
+   char *base_sha1_hex;
+   const char *base_path;
 
/* istate->initialized covers both .git/index and .git/sharedindex.xxx 
*/
if (istate->initialized)
@@ -1719,15 +1721,15 @@ int read_index_from(struct index_state *istate, const 
char *path)
discard_index(split_index->base);
else
split_index->base = xcalloc(1, sizeof(*split_index->base));
-   ret = do_read_index(split_index->base,
-   git_path("sharedindex.%s",
-sha1_to_hex(split_index->base_sha1)), 1);
+
+   base_sha1_hex = sha1_to_hex(split_index->base_sha1);
+   base_path = git_path("sharedindex.%s", base_sha1_hex);
+   ret = do_read_index(split_index->base, base_path, 1);
if (hashcmp(split_index->base_sha1, split_index->base->sha1))
die("broken index, expect %s in %s, got %s",
-   sha1_to_hex(split_index->base_sha1),
-   git_path("sharedindex.%s",
-sha1_to_hex(split_index->base_sha1)),
+   base_sha1_hex, base_path,
sha1_to_hex(split_index->base->sha1));
+
merge_base_index(istate);
post_read_index_from(istate);
return ret;
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 04/21] read-cache: add and then use tweak_split_index()

2016-12-17 Thread Christian Couder
This will make us use the split-index feature or not depending
on the value of the "core.splitIndex" config variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index db5d910642..79aae6bd20 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1569,10 +1569,27 @@ static void tweak_untracked_cache(struct index_state 
*istate)
}
 }
 
+static void tweak_split_index(struct index_state *istate)
+{
+   switch (git_config_get_split_index()) {
+   case -1: /* unset: do nothing */
+   break;
+   case 0: /* false */
+   remove_split_index(istate);
+   break;
+   case 1: /* true */
+   add_split_index(istate);
+   break;
+   default: /* unknown value: do nothing */
+   break;
+   }
+}
+
 static void post_read_index_from(struct index_state *istate)
 {
check_ce_order(istate);
tweak_untracked_cache(istate);
+   tweak_split_index(istate);
 }
 
 /* remember to discard_cache() before reading a different cache! */
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 14/21] read-cache: touch shared index files when used

2016-12-17 Thread Christian Couder
When a split-index file is created, let's update the mtime of the
shared index file that the split-index file is referencing.

In a following commit we will make shared index file expire
depending on their mtime, so updating the mtime makes sure that
the shared index file will not be deleted soon.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index 280b01bd6c..42688261f7 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1685,6 +1685,19 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
die("index file corrupt");
 }
 
+/*
+ * Signal that the shared index is used by updating its mtime.
+ *
+ * This way, shared index can be removed if they have not been used
+ * for some time. It's ok to fail to update the mtime if we are on a
+ * read only file system.
+ */
+void freshen_shared_index(char *base_sha1_hex)
+{
+   const char *shared_index = git_path("sharedindex.%s", base_sha1_hex);
+   check_and_freshen_file(shared_index, 1);
+}
+
 int read_index_from(struct index_state *istate, const char *path)
 {
struct split_index *split_index;
@@ -2276,6 +2289,8 @@ int write_locked_index(struct index_state *istate, struct 
lock_file *lock,
int ret = write_shared_index(istate, lock, flags);
if (ret)
return ret;
+   } else {
+   freshen_shared_index(sha1_to_hex(si->base_sha1));
}
 
return write_split_index(istate, lock, flags);
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 15/21] config: add git_config_get_expiry() from gc.c

2016-12-17 Thread Christian Couder
This function will be used in a following commit to get the expiration
time of the shared index files from the config, and it is generic
enough to be put in "config.c".

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/gc.c | 15 ++-
 cache.h  |  3 +++
 config.c | 13 +
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index 069950d0b4..1e40d45aa2 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -62,17 +62,6 @@ static void report_pack_garbage(unsigned seen_bits, const 
char *path)
string_list_append(_garbage, path);
 }
 
-static void git_config_date_string(const char *key, const char **output)
-{
-   if (git_config_get_string_const(key, output))
-   return;
-   if (strcmp(*output, "now")) {
-   unsigned long now = approxidate("now");
-   if (approxidate(*output) >= now)
-   git_die_config(key, _("Invalid %s: '%s'"), key, 
*output);
-   }
-}
-
 static void process_log_file(void)
 {
struct stat st;
@@ -111,8 +100,8 @@ static void gc_config(void)
git_config_get_int("gc.auto", _auto_threshold);
git_config_get_int("gc.autopacklimit", _auto_pack_limit);
git_config_get_bool("gc.autodetach", _auto);
-   git_config_date_string("gc.pruneexpire", _expire);
-   git_config_date_string("gc.worktreepruneexpire", 
_worktrees_expire);
+   git_config_get_expiry("gc.pruneexpire", _expire);
+   git_config_get_expiry("gc.worktreepruneexpire", 
_worktrees_expire);
git_config(git_default_config, NULL);
 }
 
diff --git a/cache.h b/cache.h
index f442f28189..279415afbd 100644
--- a/cache.h
+++ b/cache.h
@@ -1827,6 +1827,9 @@ extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);
 
+/* This dies if the configured or default date is in the future */
+extern int git_config_get_expiry(const char *key, const char **output);
+
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
  * ensure that they do not modify the untracked cache when reading it.
diff --git a/config.c b/config.c
index 3e96c223f5..5c52cefd78 100644
--- a/config.c
+++ b/config.c
@@ -1685,6 +1685,19 @@ int git_config_get_pathname(const char *key, const char 
**dest)
return ret;
 }
 
+int git_config_get_expiry(const char *key, const char **output)
+{
+   int ret = git_config_get_string_const(key, output);
+   if (ret)
+   return ret;
+   if (strcmp(*output, "now")) {
+   unsigned long now = approxidate("now");
+   if (approxidate(*output) >= now)
+   git_die_config(key, _("Invalid %s: '%s'"), key, 
*output);
+   }
+   return ret;
+}
+
 int git_config_get_untracked_cache(void)
 {
int val = -1;
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 21/21] Documentation/git-update-index: explain splitIndex.*

2016-12-17 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt   |  6 +++---
 Documentation/git-update-index.txt | 37 +
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 8fbef25cb1..52a3cac4ff 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2782,9 +2782,9 @@ splitIndex.sharedIndexExpire::
index file is created. The value "now" expires all entries
immediately, and "never" suppresses expiration altogether.
The default value is "one.week.ago".
-   Note that each time a new split-index file is created, the
-   mtime of the related shared index file is updated to the
-   current time.
+   Note that each time a split index based on a shared index file
+   is either created or read from, the mtime of the shared index
+   file is updated to the current time.
See linkgit:git-update-index[1].
 
 status.relativePaths::
diff --git a/Documentation/git-update-index.txt 
b/Documentation/git-update-index.txt
index e091b2a409..46c953b2f2 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -163,14 +163,10 @@ may not support it yet.
 
 --split-index::
 --no-split-index::
-   Enable or disable split index mode. If enabled, the index is
-   split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex..
-   Changes are accumulated in $GIT_DIR/index while the shared
-   index file contains all index entries stays unchanged. If
-   split-index mode is already enabled and `--split-index` is
-   given again, all changes in $GIT_DIR/index are pushed back to
-   the shared index file. This mode is designed for very large
-   indexes that take a significant amount of time to read or write.
+   Enable or disable split index mode. If split-index mode is
+   already enabled and `--split-index` is given again, all
+   changes in $GIT_DIR/index are pushed back to the shared index
+   file.
 +
 These options take effect whatever the value of the `core.splitIndex`
 configuration variable (see linkgit:git-config[1]). But a warning is
@@ -394,6 +390,31 @@ Although this bit looks similar to assume-unchanged bit, 
its goal is
 different from assume-unchanged bit's. Skip-worktree also takes
 precedence over assume-unchanged bit when both are set.
 
+Split index
+---
+
+This mode is designed for very large indexes that take a significant
+amount of time to read or write.
+
+In this mode, the index is split into two files, $GIT_DIR/index and
+$GIT_DIR/sharedindex.. Changes are accumulated in
+$GIT_DIR/index, the split index, while the shared index file contains
+all index entries and stays unchanged.
+
+All changes in the split index are pushed back to the shared index
+file when the number of entries in the split index reaches a level
+specified by the splitIndex.maxPercentChange config variable (see
+linkgit:git-config[1]).
+
+Each time a new shared index file is created, the old shared index
+files are deleted if their mtime is older than what is specified by
+the splitIndex.sharedIndexExpire config variable (see
+linkgit:git-config[1]).
+
+To avoid deleting a shared index file that is still used, its mtime is
+updated to the current time everytime a new split index based on the
+shared index file is either created or read from.
+
 Untracked cache
 ---
 
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 07/21] Documentation/config: add information for core.splitIndex

2016-12-17 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index a0ab66aae7..dc44d8a417 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -331,6 +331,10 @@ core.trustctime::
crawlers and some backup systems).
See linkgit:git-update-index[1]. True by default.
 
+core.splitIndex::
+   If true, the split-index feature of the index will be used.
+   See linkgit:git-update-index[1]. False by default.
+
 core.untrackedCache::
Determines what to do about the untracked cache feature of the
index. It will be kept, if this variable is unset or set to
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 16/21] read-cache: unlink old sharedindex files

2016-12-17 Thread Christian Couder
Everytime split index is turned on, it creates a "sharedindex."
file in the git directory. This change makes sure that shared index
files that haven't been used for a long time are removed when a new
shared index file is created.

The new "splitIndex.sharedIndexExpire" config variable is created
to tell the delay after which an unused shared index file can be
deleted. It defaults to "1.week.ago".

A previous commit made sure that each time a split index file is
created the mtime of the shared index file it references is updated.
This makes sure that recently used shared index file will not be
deleted.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 read-cache.c | 63 +++-
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index 42688261f7..8e99a7c325 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2210,6 +2210,64 @@ static int write_split_index(struct index_state *istate,
return ret;
 }
 
+static const char *shared_index_expire = "1.week.ago";
+
+static unsigned long get_shared_index_expire_date(void)
+{
+   static unsigned long shared_index_expire_date;
+   static int shared_index_expire_date_prepared;
+
+   if (!shared_index_expire_date_prepared) {
+   git_config_get_expiry("splitindex.sharedindexexpire",
+ _index_expire);
+   shared_index_expire_date = approxidate(shared_index_expire);
+   shared_index_expire_date_prepared = 1;
+   }
+
+   return shared_index_expire_date;
+}
+
+static int can_delete_shared_index(const char *shared_sha1_hex)
+{
+   struct stat st;
+   unsigned long expiration;
+   const char *shared_index = git_path("sharedindex.%s", shared_sha1_hex);
+
+   /* Check timestamp */
+   expiration = get_shared_index_expire_date();
+   if (!expiration)
+   return 0;
+   if (stat(shared_index, ))
+   return error_errno(_("could not stat '%s"), shared_index);
+   if (st.st_mtime > expiration)
+   return 0;
+
+   return 1;
+}
+
+static int clean_shared_index_files(const char *current_hex)
+{
+   struct dirent *de;
+   DIR *dir = opendir(get_git_dir());
+
+   if (!dir)
+   return error_errno(_("unable to open git dir: %s"), 
get_git_dir());
+
+   while ((de = readdir(dir)) != NULL) {
+   const char *sha1_hex;
+   if (!skip_prefix(de->d_name, "sharedindex.", _hex))
+   continue;
+   if (!strcmp(sha1_hex, current_hex))
+   continue;
+   if (can_delete_shared_index(sha1_hex) > 0 &&
+   unlink(git_path("%s", de->d_name)))
+   error_errno(_("unable to unlink: %s"), git_path("%s", 
de->d_name));
+   }
+   closedir(dir);
+
+   return 0;
+}
+
 static struct tempfile temporary_sharedindex;
 
 static int write_shared_index(struct index_state *istate,
@@ -2231,8 +2289,11 @@ static int write_shared_index(struct index_state *istate,
}
ret = rename_tempfile(_sharedindex,
  git_path("sharedindex.%s", 
sha1_to_hex(si->base->sha1)));
-   if (!ret)
+   if (!ret) {
hashcpy(si->base_sha1, si->base->sha1);
+   clean_shared_index_files(sha1_to_hex(si->base->sha1));
+   }
+
return ret;
 }
 
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 20/21] Documentation/config: add splitIndex.sharedIndexExpire

2016-12-17 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/config.txt | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 08f638c65c..8fbef25cb1 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2776,6 +2776,17 @@ splitIndex.maxPercentChange::
than 20 percent of the total number of entries.
See linkgit:git-update-index[1].
 
+splitIndex.sharedIndexExpire::
+   When the split index feature is used, shared index files with
+   a mtime older than this time will be removed when a new shared
+   index file is created. The value "now" expires all entries
+   immediately, and "never" suppresses expiration altogether.
+   The default value is "one.week.ago".
+   Note that each time a new split-index file is created, the
+   mtime of the related shared index file is updated to the
+   current time.
+   See linkgit:git-update-index[1].
+
 status.relativePaths::
By default, linkgit:git-status[1] shows paths relative to the
current directory. Setting this variable to `false` shows paths
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 03/21] split-index: add {add,remove}_split_index() functions

2016-12-17 Thread Christian Couder
Also use the functions in cmd_update_index() in
builtin/update-index.c.

These functions will be used in a following commit to tweak
our use of the split-index feature depending on the setting
of a configuration variable.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/update-index.c | 18 ++
 split-index.c  | 22 ++
 split-index.h  |  2 ++
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index f3f07e7f1c..b75ea037dd 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1098,18 +1098,12 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
}
 
if (split_index > 0) {
-   init_split_index(_index);
-   the_index.cache_changed |= SPLIT_INDEX_ORDERED;
-   } else if (!split_index && the_index.split_index) {
-   /*
-* can't discard_split_index(_index); because that
-* will destroy split_index->base->cache[], which may
-* be shared with the_index.cache[]. So yeah we're
-* leaking a bit here.
-*/
-   the_index.split_index = NULL;
-   the_index.cache_changed |= SOMETHING_CHANGED;
-   }
+   if (the_index.split_index)
+   the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+   else
+   add_split_index(_index);
+   } else if (!split_index)
+   remove_split_index(_index);
 
switch (untracked_cache) {
case UC_UNSPECIFIED:
diff --git a/split-index.c b/split-index.c
index 615f4cac05..f519e60f87 100644
--- a/split-index.c
+++ b/split-index.c
@@ -317,3 +317,25 @@ void replace_index_entry_in_base(struct index_state 
*istate,
istate->split_index->base->cache[new->index - 1] = new;
}
 }
+
+void add_split_index(struct index_state *istate)
+{
+   if (!istate->split_index) {
+   init_split_index(istate);
+   istate->cache_changed |= SPLIT_INDEX_ORDERED;
+   }
+}
+
+void remove_split_index(struct index_state *istate)
+{
+   if (istate->split_index) {
+   /*
+* can't discard_split_index(_index); because that
+* will destroy split_index->base->cache[], which may
+* be shared with the_index.cache[]. So yeah we're
+* leaking a bit here.
+*/
+   istate->split_index = NULL;
+   istate->cache_changed |= SOMETHING_CHANGED;
+   }
+}
diff --git a/split-index.h b/split-index.h
index c1324f521a..df91c1bda8 100644
--- a/split-index.h
+++ b/split-index.h
@@ -31,5 +31,7 @@ void merge_base_index(struct index_state *istate);
 void prepare_to_write_split_index(struct index_state *istate);
 void finish_writing_split_index(struct index_state *istate);
 void discard_split_index(struct index_state *istate);
+void add_split_index(struct index_state *istate);
+void remove_split_index(struct index_state *istate);
 
 #endif
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 13/21] sha1_file: make check_and_freshen_file() non static

2016-12-17 Thread Christian Couder
This function will be used in a commit soon, so let's make
it available globally.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h | 3 +++
 sha1_file.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index e15b421b6f..f442f28189 100644
--- a/cache.h
+++ b/cache.h
@@ -1170,6 +1170,9 @@ extern int has_pack_index(const unsigned char *sha1);
 
 extern void assert_sha1_type(const unsigned char *sha1, enum object_type 
expect);
 
+/* Helper to check and "touch" a file */
+extern int check_and_freshen_file(const char *fn, int freshen);
+
 extern const signed char hexval_table[256];
 static inline unsigned int hexval(unsigned char c)
 {
diff --git a/sha1_file.c b/sha1_file.c
index 9c86d1924a..7e5846d4f9 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -576,7 +576,7 @@ static int freshen_file(const char *fn)
  * either does not exist on disk, or has a stale mtime and may be subject to
  * pruning).
  */
-static int check_and_freshen_file(const char *fn, int freshen)
+int check_and_freshen_file(const char *fn, int freshen)
 {
if (access(fn, F_OK))
return 0;
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 17/21] t1700: test shared index file expiration

2016-12-17 Thread Christian Couder
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t1700-split-index.sh | 44 
 1 file changed, 44 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index f03addf654..f448fc13cd 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -310,4 +310,48 @@ EOF
test_cmp expect actual
 '
 
+test_expect_success 'shared index files expire after 7 days by default' '
+   : >ten &&
+   git update-index --add ten &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   just_under_7_days_ago=$((1-7*86400)) &&
+   test-chmtime =$just_under_7_days_ago .git/sharedindex.* &&
+   : >eleven &&
+   git update-index --add eleven &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   just_over_7_days_ago=$((-1-7*86400)) &&
+   test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
+   : >twelve &&
+   git update-index --add twelve &&
+   test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
+test_expect_success 'check splitIndex.sharedIndexExpire set to 8 days' '
+   git config splitIndex.sharedIndexExpire "8.days.ago" &&
+   test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
+   : >thirteen &&
+   git update-index --add thirteen &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   just_over_8_days_ago=$((-1-8*86400)) &&
+   test-chmtime =$just_over_8_days_ago .git/sharedindex.* &&
+   : >fourteen &&
+   git update-index --add fourteen &&
+   test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
+test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and 
"now"' '
+   git config splitIndex.sharedIndexExpire never &&
+   just_10_years_ago=$((-365*10*86400)) &&
+   test-chmtime =$just_10_years_ago .git/sharedindex.* &&
+   : >fifteen &&
+   git update-index --add fifteen &&
+   test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+   git config splitIndex.sharedIndexExpire now &&
+   just_1_second_ago=-1 &&
+   test-chmtime =$just_1_second_ago .git/sharedindex.* &&
+   : >sixteen &&
+   git update-index --add sixteen &&
+   test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
 test_done
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 05/21] update-index: warn in case of split-index incoherency

2016-12-17 Thread Christian Couder
When users are using `git update-index --(no-)split-index`, they
may expect the split-index feature to be used or not according to
the option they just used, but this might not be the case if the
new "core.splitIndex" config variable has been set. In this case
let's warn about what will happen and why.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/update-index.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index b75ea037dd..dc1fd0d44d 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1098,12 +1098,21 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
}
 
if (split_index > 0) {
+   if (git_config_get_split_index() == 0)
+   warning(_("core.splitIndex is set to false; "
+ "remove or change it, if you really want to "
+ "enable split index"));
if (the_index.split_index)
the_index.cache_changed |= SPLIT_INDEX_ORDERED;
else
add_split_index(_index);
-   } else if (!split_index)
+   } else if (!split_index) {
+   if (git_config_get_split_index() == 1)
+   warning(_("core.splitIndex is set to true; "
+ "remove or change it, if you really want to "
+ "disable split index"));
remove_split_index(_index);
+   }
 
switch (untracked_cache) {
case UC_UNSPECIFIED:
-- 
2.11.0.49.g2414764.dirty



[PATCH v2 00/21] Add configuration options for split-index

2016-12-17 Thread Christian Couder
1 and 19/21 are new patches. They update the mtime
  of the shared index file when a split index based on the shared
  index file is read from. 18/21 is a refactoring to make the
  actual change in 19/21 easier.

- Patches 20/21 and 21/21 add some documentation for the new
  feature.

The changes since v1 in this step are:

- a warning was removed from 14/21 as suggested by Junio,
- code to touch the shared index file as been refactored into
  a function in 14/21,
- a function has been renamed git_config_get_expiry() in 15/21
  as suggested by Junio,
- error messages have been marked for translation in 16/21 as
  suggested by Duy,
- "return error_errno(...)" is now used as suggested by Duy in
  16/21,
- patches 18/21 and 19/21 are new following Duy's suggestion,
- patches 20/21 and 21/21 are updated to take changes made in
  19/21 into account.

Links
~

This patch series is also available here:

  https://github.com/chriscool/git/commits/config-split-index

The previous versions were:

  RFC: https://github.com/chriscool/git/commits/config-split-index7
  v1:  https://github.com/chriscool/git/commits/config-split-index72

On the mailing list the related patch series and discussions were:

  RFC: 
https://public-inbox.org/git/20160711172254.13439-1-chrisc...@tuxfamily.org/
  v1:  
https://public-inbox.org/git/20161023092648.12086-1-chrisc...@tuxfamily.org/

Christian Couder (21):
  config: mark an error message up for translation
  config: add git_config_get_split_index()
  split-index: add {add,remove}_split_index() functions
  read-cache: add and then use tweak_split_index()
  update-index: warn in case of split-index incoherency
  t1700: add tests for core.splitIndex
  Documentation/config: add information for core.splitIndex
  Documentation/git-update-index: talk about core.splitIndex config var
  config: add git_config_get_max_percent_split_change()
  read-cache: regenerate shared index if necessary
  t1700: add tests for splitIndex.maxPercentChange
  Documentation/config: add splitIndex.maxPercentChange
  sha1_file: make check_and_freshen_file() non static
  read-cache: touch shared index files when used
  config: add git_config_get_expiry() from gc.c
  read-cache: unlink old sharedindex files
  t1700: test shared index file expiration
  read-cache: refactor read_index_from()
  read-cache: use freshen_shared_index() in read_index_from()
  Documentation/config: add splitIndex.sharedIndexExpire
  Documentation/git-update-index: explain splitIndex.*

 Documentation/config.txt   |  28 +++
 Documentation/git-update-index.txt |  43 +--
 builtin/gc.c   |  15 +---
 builtin/update-index.c |  25 +++---
 cache.h|   8 ++
 config.c   |  42 +-
 read-cache.c   | 142 --
 sha1_file.c|   2 +-
 split-index.c  |  22 ++
 split-index.h  |   2 +
 t/t1700-split-index.sh | 154 +
 11 files changed, 441 insertions(+), 42 deletions(-)

-- 
2.11.0.53.gb263787



[PATCH v2 02/21] config: add git_config_get_split_index()

2016-12-17 Thread Christian Couder
This new function will be used in a following commit to know
if we want to use the split index feature or not.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h  |  1 +
 config.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/cache.h b/cache.h
index a50a61a197..c126fe475e 100644
--- a/cache.h
+++ b/cache.h
@@ -1821,6 +1821,7 @@ extern int git_config_get_bool_or_int(const char *key, 
int *is_bool, int *dest);
 extern int git_config_get_maybe_bool(const char *key, int *dest);
 extern int git_config_get_pathname(const char *key, const char **dest);
 extern int git_config_get_untracked_cache(void);
+extern int git_config_get_split_index(void);
 
 /*
  * This is a hack for test programs like test-dump-untracked-cache to
diff --git a/config.c b/config.c
index 2eaf8ad77a..c1343bbb3e 100644
--- a/config.c
+++ b/config.c
@@ -1709,6 +1709,16 @@ int git_config_get_untracked_cache(void)
return -1; /* default value */
 }
 
+int git_config_get_split_index(void)
+{
+   int val = -1;
+
+   if (!git_config_get_maybe_bool("core.splitindex", ))
+   return val;
+
+   return -1; /* default value */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
-- 
2.11.0.49.g2414764.dirty



<    5   6   7   8   9   10   11   12   13   14   >