[PATCH v4 0/5] Improving performance of git clean

2015-04-25 Thread Erik Elfström
v3 of the patch can be found here:
http://thread.gmane.org/gmane.comp.version-control.git/267422

Changes in v4:
* changed some tests to use more meaningful dir names.
* fixed performance test by doing "git clean -n" to avoid
  timing setup code. Increased test size to 10 directories
  (~0.5s runtime).
* changed interface of read_gitfile_gently to be able to
  return error code.
* fixed a compiler warning in read_gitfile_gently ("warning:
  ‘dir’ may be used uninitialized in this function").
* added sanity check of git file size in read_gitfile_gently
* updated commit message in [5/5] to more clearly motivate
  remaining behavioral changes of git clean.

Thanks to Junio C Hamano and Jeff King for comments and help on v3.

Erik Elfström (5):
  setup: add gentle version of read_gitfile
  setup: sanity check file size in read_gitfile_gently
  t7300: add tests to document behavior of clean and nested git
  p7300: add performance tests for clean
  clean: improve performance when removing lots of directories

 builtin/clean.c   |  26 +--
 cache.h   |   3 +-
 setup.c   |  88 ---
 t/perf/p7300-clean.sh |  31 +
 t/t7300-clean.sh  | 126 ++
 5 files changed, 252 insertions(+), 22 deletions(-)
 create mode 100755 t/perf/p7300-clean.sh

-- 
2.4.0.rc3.8.g4ebd28d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 1/5] setup: add gentle version of read_gitfile

2015-04-25 Thread Erik Elfström
read_gitfile will die on most error cases. This makes it unsuitable
for speculative calls. Extract the core logic and provide a gentle
version that returns NULL on failure.

The first usecase of the new gentle version will be to probe for
submodules during git clean.

Helped-by: Junio C Hamano 
Helped-by: Jeff King 
Signed-off-by: Erik Elfström 
---
 cache.h |  3 ++-
 setup.c | 82 +++--
 2 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/cache.h b/cache.h
index 3d3244b..6e29068 100644
--- a/cache.h
+++ b/cache.h
@@ -431,7 +431,8 @@ extern int set_git_dir(const char *path);
 extern const char *get_git_namespace(void);
 extern const char *strip_namespace(const char *namespaced_ref);
 extern const char *get_git_work_tree(void);
-extern const char *read_gitfile(const char *path);
+extern const char *read_gitfile_gently(const char *path, int 
*return_error_code);
+#define read_gitfile(path) read_gitfile_gently((path), NULL)
 extern const char *resolve_gitdir(const char *suspect);
 extern void set_git_work_tree(const char *tree);
 
diff --git a/setup.c b/setup.c
index 979b13f..e1897cc 100644
--- a/setup.c
+++ b/setup.c
@@ -335,35 +335,53 @@ static int check_repository_format_gently(const char 
*gitdir, int *nongit_ok)
 /*
  * Try to read the location of the git directory from the .git file,
  * return path to git directory if found.
+ *
+ * On failure, if return_error_code is not NULL, return_error_code
+ * will be set to an error code and NULL will be returned. If
+ * return_error_code is NULL the function will die instead (for most
+ * cases).
  */
-const char *read_gitfile(const char *path)
+const char *read_gitfile_gently(const char *path, int *return_error_code)
 {
-   char *buf;
-   char *dir;
+   int error_code = 0;
+   char *buf = NULL;
+   char *dir = NULL;
const char *slash;
struct stat st;
int fd;
ssize_t len;
 
-   if (stat(path, &st))
-   return NULL;
-   if (!S_ISREG(st.st_mode))
-   return NULL;
+   if (stat(path, &st)) {
+   error_code = 1;
+   goto cleanup_return;
+   }
+   if (!S_ISREG(st.st_mode)) {
+   error_code = 2;
+   goto cleanup_return;
+   }
fd = open(path, O_RDONLY);
-   if (fd < 0)
-   die_errno("Error opening '%s'", path);
+   if (fd < 0) {
+   error_code = 3;
+   goto cleanup_return;
+   }
buf = xmalloc(st.st_size + 1);
len = read_in_full(fd, buf, st.st_size);
close(fd);
-   if (len != st.st_size)
-   die("Error reading %s", path);
+   if (len != st.st_size) {
+   error_code = 4;
+   goto cleanup_return;
+   }
buf[len] = '\0';
-   if (!starts_with(buf, "gitdir: "))
-   die("Invalid gitfile format: %s", path);
+   if (!starts_with(buf, "gitdir: ")) {
+   error_code = 5;
+   goto cleanup_return;
+   }
while (buf[len - 1] == '\n' || buf[len - 1] == '\r')
len--;
-   if (len < 9)
-   die("No path in gitfile: %s", path);
+   if (len < 9) {
+   error_code = 6;
+   goto cleanup_return;
+   }
buf[len] = '\0';
dir = buf + 8;
 
@@ -378,11 +396,41 @@ const char *read_gitfile(const char *path)
buf = dir;
}
 
-   if (!is_git_directory(dir))
-   die("Not a git repository: %s", dir);
+   if (!is_git_directory(dir)) {
+   error_code = 7;
+   goto cleanup_return;
+   }
path = real_path(dir);
 
+cleanup_return:
free(buf);
+
+   if (return_error_code)
+   *return_error_code = error_code;
+
+   if (error_code) {
+   if (return_error_code)
+   return NULL;
+
+   switch (error_code) {
+   case 1: // failed to stat
+   case 2: // not regular file
+   return NULL;
+   case 3:
+   die_errno("Error opening '%s'", path);
+   case 4:
+   die("Error reading %s", path);
+   case 5:
+   die("Invalid gitfile format: %s", path);
+   case 6:
+   die("No path in gitfile: %s", path);
+   case 7:
+   die("Not a git repository: %s", dir);
+   default:
+   assert(0);
+   }
+   }
+
return path;
 }
 
-- 
2.4.0.rc3.8.g4ebd28d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 5/5] clean: improve performance when removing lots of directories

2015-04-25 Thread Erik Elfström
"git clean" uses resolve_gitlink_ref() to check for the presence of
nested git repositories, but it has the drawback of creating a
ref_cache entry for every directory that should potentially be
cleaned. The linear search through the ref_cache list causes a massive
performance hit for large number of directories.

Modify clean.c:remove_dirs to use setup.c:is_git_directory and
setup.c:read_gitfile_gently instead.

Both these functions will open files and parse contents when they find
something that looks like a git repository. This is ok from a
performance standpoint since finding repository candidates should be
comparatively rare.

Using is_git_directory and read_gitfile_gently should give a more
standardized check for what is and what isn't a git repository but
also gives three behavioral changes.

The first change is that we will now detect and avoid cleaning empty
nested git repositories (only init run). This is desirable.

Second, we will no longer die when cleaning a file named ".git" with
garbage content (it will be cleaned instead). This is also desirable.

The last change is that we will detect and avoid cleaning empty bare
repositories that have been placed in a directory named ".git". This
is not desirable but should have no real user impact since we already
fail to clean non-empty bare repositories in the same scenario. This
is thus deemed acceptable.

Update t7300 to reflect these changes in behavior.

The time to clean an untracked directory containing 10 sub
directories went from 61s to 1.7s after this change.

Helped-by: Jeff King 
Signed-off-by: Erik Elfström 
---
 builtin/clean.c  | 26 ++
 t/t7300-clean.sh |  8 +++-
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/builtin/clean.c b/builtin/clean.c
index 98c103f..17a1c13 100644
--- a/builtin/clean.c
+++ b/builtin/clean.c
@@ -10,7 +10,6 @@
 #include "cache.h"
 #include "dir.h"
 #include "parse-options.h"
-#include "refs.h"
 #include "string-list.h"
 #include "quote.h"
 #include "column.h"
@@ -148,6 +147,27 @@ static int exclude_cb(const struct option *opt, const char 
*arg, int unset)
return 0;
 }
 
+/*
+ * Return 1 if the given path is the root of a git repository or
+ * submodule else 0. Will not return 1 for bare repositories with the
+ * exception of creating a bare repository in "foo/.git" and calling
+ * is_git_repository("foo").
+ */
+static int is_git_repository(struct strbuf *path)
+{
+   int ret = 0;
+   int unused_error_code;
+   size_t orig_path_len = path->len;
+   assert(orig_path_len != 0);
+   if (path->buf[orig_path_len - 1] != '/')
+   strbuf_addch(path, '/');
+   strbuf_addstr(path, ".git");
+   if (read_gitfile_gently(path->buf, &unused_error_code) || 
is_git_directory(path->buf))
+   ret = 1;
+   strbuf_setlen(path, orig_path_len);
+   return ret;
+}
+
 static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
int dry_run, int quiet, int *dir_gone)
 {
@@ -155,13 +175,11 @@ static int remove_dirs(struct strbuf *path, const char 
*prefix, int force_flag,
struct strbuf quoted = STRBUF_INIT;
struct dirent *e;
int res = 0, ret = 0, gone = 1, original_len = path->len, len;
-   unsigned char submodule_head[20];
struct string_list dels = STRING_LIST_INIT_DUP;
 
*dir_gone = 1;
 
-   if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
-   !resolve_gitlink_ref(path->buf, "HEAD", 
submodule_head)) {
+   if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && 
is_git_repository(path)) {
if (!quiet) {
quote_path_relative(path->buf, prefix, "ed);
printf(dry_run ?  _(msg_would_skip_git_dir) : 
_(msg_skip_git_dir),
diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 11f3a6d..4d07a4a 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -455,7 +455,7 @@ test_expect_success 'nested git work tree' '
! test -d bar
 '
 
-test_expect_failure 'should clean things that almost look like git but are 
not' '
+test_expect_success 'should clean things that almost look like git but are 
not' '
rm -fr almost_git almost_bare_git almost_submodule &&
mkdir -p almost_git/.git/objects &&
mkdir -p almost_git/.git/refs &&
@@ -468,8 +468,6 @@ test_expect_failure 'should clean things that almost look 
like git but are not'
garbage
EOF
test_when_finished "rm -rf almost_*" &&
-   ## This will fail due to die("Invalid gitfile format: %s", path); in
-   ## setup.c:read_gitfile.
git clean -f -d &&
test_path_is_missing almost_git &&
test_path_is_missing almost_bare_git &&
@@ -501,7 +499,7 @@ test_expect_success 'should not clean submodules' '
test_path_is_missing to_clean
 '
 
-test_expect_failure 'nested (empty) git should be kept' '
+test_expect_success 'nested (empty) git should be kept' '
 

[PATCH v4 4/5] p7300: add performance tests for clean

2015-04-25 Thread Erik Elfström
The tests are run in dry-run mode to avoid having to restore the test
directories for each timed iteration. Using dry-run is an acceptable
compromise since we are mostly interested in the initial computation
of what to clean and not so much in the cleaning it self.

Signed-off-by: Erik Elfström 
---
 t/perf/p7300-clean.sh | 31 +++
 1 file changed, 31 insertions(+)
 create mode 100755 t/perf/p7300-clean.sh

diff --git a/t/perf/p7300-clean.sh b/t/perf/p7300-clean.sh
new file mode 100755
index 000..6ae55ec
--- /dev/null
+++ b/t/perf/p7300-clean.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+test_description="Test git-clean performance"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+test_expect_success 'setup untracked directory with many sub dirs' '
+   rm -rf 500_sub_dirs 10_sub_dirs clean_test_dir &&
+   mkdir 500_sub_dirs 10_sub_dirs clean_test_dir &&
+   for i in $(test_seq 1 500)
+   do
+   mkdir 500_sub_dirs/dir$i || return $?
+   done &&
+   for i in $(test_seq 1 200)
+   do
+   cp -r 500_sub_dirs 10_sub_dirs/dir$i || return $?
+   done
+'
+
+test_perf 'clean many untracked sub dirs, check for nested git' '
+   git clean -n -q -f -d 10_sub_dirs/
+'
+
+test_perf 'clean many untracked sub dirs, ignore nested git' '
+   git clean -n -q -f -f -d 10_sub_dirs/
+'
+
+test_done
-- 
2.4.0.rc3.8.g4ebd28d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 2/5] setup: sanity check file size in read_gitfile_gently

2015-04-25 Thread Erik Elfström
read_gitfile_gently will allocate a buffer to fit the entire file that
should be read. Add a sanity check of the file size before opening to
avoid allocating a potentially huge amount of memory if we come across
a large file that someone happened to name ".git". The limit is set to
a sufficiently unreasonable size that should never be exceeded by a
genuine .git file.

Signed-off-by: Erik Elfström 
---

I'm not sure about this one but it felt like the safe thing to do.
This patch can be dropped if it is not desired.

I considered testing it using
 "mkdir foo && truncate -s 200G foo/.git && git clean -f -d"
but that feels like a pretty evil test that is likely to cause lots
of problems and not fail in any good way.

 setup.c | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/setup.c b/setup.c
index e1897cc..ed87334 100644
--- a/setup.c
+++ b/setup.c
@@ -364,22 +364,26 @@ const char *read_gitfile_gently(const char *path, int 
*return_error_code)
error_code = 3;
goto cleanup_return;
}
+   if (st.st_size > PATH_MAX * 4) {
+   error_code = 4;
+   goto cleanup_return;
+   }
buf = xmalloc(st.st_size + 1);
len = read_in_full(fd, buf, st.st_size);
close(fd);
if (len != st.st_size) {
-   error_code = 4;
+   error_code = 5;
goto cleanup_return;
}
buf[len] = '\0';
if (!starts_with(buf, "gitdir: ")) {
-   error_code = 5;
+   error_code = 6;
goto cleanup_return;
}
while (buf[len - 1] == '\n' || buf[len - 1] == '\r')
len--;
if (len < 9) {
-   error_code = 6;
+   error_code = 7;
goto cleanup_return;
}
buf[len] = '\0';
@@ -397,7 +401,7 @@ const char *read_gitfile_gently(const char *path, int 
*return_error_code)
}
 
if (!is_git_directory(dir)) {
-   error_code = 7;
+   error_code = 8;
goto cleanup_return;
}
path = real_path(dir);
@@ -419,12 +423,14 @@ cleanup_return:
case 3:
die_errno("Error opening '%s'", path);
case 4:
-   die("Error reading %s", path);
+   die("Too large to be a .git file: '%s'", path);
case 5:
-   die("Invalid gitfile format: %s", path);
+   die("Error reading %s", path);
case 6:
-   die("No path in gitfile: %s", path);
+   die("Invalid gitfile format: %s", path);
case 7:
+   die("No path in gitfile: %s", path);
+   case 8:
die("Not a git repository: %s", dir);
default:
assert(0);
-- 
2.4.0.rc3.8.g4ebd28d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 3/5] t7300: add tests to document behavior of clean and nested git

2015-04-25 Thread Erik Elfström
Signed-off-by: Erik Elfström 
---
 t/t7300-clean.sh | 128 +++
 1 file changed, 128 insertions(+)

diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 99be5d9..11f3a6d 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -455,6 +455,134 @@ test_expect_success 'nested git work tree' '
! test -d bar
 '
 
+test_expect_failure 'should clean things that almost look like git but are 
not' '
+   rm -fr almost_git almost_bare_git almost_submodule &&
+   mkdir -p almost_git/.git/objects &&
+   mkdir -p almost_git/.git/refs &&
+   cat >almost_git/.git/HEAD <<-\EOF &&
+   garbage
+   EOF
+   cp -r almost_git/.git/ almost_bare_git &&
+   mkdir almost_submodule/ &&
+   cat >almost_submodule/.git <<-\EOF &&
+   garbage
+   EOF
+   test_when_finished "rm -rf almost_*" &&
+   ## This will fail due to die("Invalid gitfile format: %s", path); in
+   ## setup.c:read_gitfile.
+   git clean -f -d &&
+   test_path_is_missing almost_git &&
+   test_path_is_missing almost_bare_git &&
+   test_path_is_missing almost_submodule
+'
+
+test_expect_success 'should not clean submodules' '
+   rm -fr repo to_clean sub1 sub2 &&
+   mkdir repo to_clean &&
+   (
+   cd repo &&
+   git init &&
+   test_commit msg hello.world
+   ) &&
+   git submodule add ./repo/.git sub1 &&
+   git commit -m "sub1" &&
+   git branch before_sub2 &&
+   git submodule add ./repo/.git sub2 &&
+   git commit -m "sub2" &&
+   git checkout before_sub2 &&
+   >to_clean/should_clean.this &&
+   git clean -f -d &&
+   test_path_is_file repo/.git/index &&
+   test_path_is_file repo/hello.world &&
+   test_path_is_file sub1/.git &&
+   test_path_is_file sub1/hello.world &&
+   test_path_is_file sub2/.git &&
+   test_path_is_file sub2/hello.world &&
+   test_path_is_missing to_clean
+'
+
+test_expect_failure 'nested (empty) git should be kept' '
+   rm -fr empty_repo to_clean &&
+   git init empty_repo &&
+   mkdir to_clean &&
+   >to_clean/should_clean.this &&
+   git clean -f -d &&
+   test_path_is_file empty_repo/.git/HEAD &&
+   test_path_is_missing to_clean
+'
+
+test_expect_success 'nested bare repositories should be cleaned' '
+   rm -fr bare1 bare2 subdir &&
+   git init --bare bare1 &&
+   git clone --local --bare . bare2 &&
+   mkdir subdir &&
+   cp -r bare2 subdir/bare3 &&
+   git clean -f -d &&
+   test_path_is_missing bare1 &&
+   test_path_is_missing bare2 &&
+   test_path_is_missing subdir
+'
+
+test_expect_success 'nested (empty) bare repositories should be cleaned even 
when in .git' '
+   rm -fr strange_bare &&
+   mkdir strange_bare &&
+   git init --bare strange_bare/.git &&
+   git clean -f -d &&
+   test_path_is_missing strange_bare
+'
+
+test_expect_failure 'nested (non-empty) bare repositories should be cleaned 
even when in .git' '
+   rm -fr strange_bare &&
+   mkdir strange_bare &&
+   git clone --local --bare . strange_bare/.git &&
+   git clean -f -d &&
+   test_path_is_missing strange_bare
+'
+
+test_expect_success 'giving path in nested git work tree will remove it' '
+   rm -fr repo &&
+   mkdir repo &&
+   (
+   cd repo &&
+   git init &&
+   mkdir -p bar/baz &&
+   test_commit msg bar/baz/hello.world
+   ) &&
+   git clean -f -d repo/bar/baz &&
+   test_path_is_file repo/.git/HEAD &&
+   test_path_is_dir repo/bar/ &&
+   test_path_is_missing repo/bar/baz
+'
+
+test_expect_success 'giving path to nested .git will not remove it' '
+   rm -fr repo &&
+   mkdir repo untracked &&
+   (
+   cd repo &&
+   git init &&
+   test_commit msg hello.world
+   ) &&
+   git clean -f -d repo/.git &&
+   test_path_is_file repo/.git/HEAD &&
+   test_path_is_dir repo/.git/refs &&
+   test_path_is_dir repo/.git/objects &&
+   test_path_is_dir untracked/
+'
+
+test_expect_success 'giving path to nested .git/ will remove contents' '
+   rm -fr repo untracked &&
+   mkdir repo untracked &&
+   (
+   cd repo &&
+   git init &&
+   test_commit msg hello.world
+   ) &&
+   git clean -f -d repo/.git/ &&
+   test_path_is_dir repo/.git &&
+   test_dir_is_empty repo/.git &&
+   test_path_is_dir untracked/
+'
+
 test_expect_success 'force removal of nested git work tree' '
rm -fr foo bar baz &&
mkdir -p foo bar baz/boo &&
-- 
2.4.0.rc3.8.g4ebd28d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option

2015-04-25 Thread karthik nayak


On 04/22/2015 02:06 AM, Junio C Hamano wrote:

Eric Sunshine  writes:

> It's easy to be blinded into thinking that cat-file's new option
> should be named --literally since it was inspired by the --literally
> option of hash-object, but indeed it may not be the best choice.

Yeah, I wouldn't even say "inspired".  It was envisioned as a
counter-part debugging aid, nothing more.

Is there any other way to make cat-file looser other than accepting
an unknown type name from the future?  If so, then perhaps it may
make sense to give it a generic name that implys that we would
trigger such additional looseness in the future.  But as the
inventor of it as a debugging aid, I would say a name that spells
out the specific way this option is being loose, e.g.

>  --allow-bogus-type

or with s/bogus/unknown/, best describes what it currently does.

Yes this gives the best description, but its large, while we could use something
like --no-strict instead. Is the size worth the trade-off for a better 
description?


Thanks.





--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] t0027: Add repoMIX and LF_nul

2015-04-25 Thread Johannes Schindelin
Hi Torsten,

On 2015-04-25 08:47, Torsten Bögershausen wrote:
> "new safer autocrlf handling":
>   Check if eols in a file are converted at commit, when the file has
>   CR (or CLLF) in the repo (technically speaking in the index).
>   Add a test-file repoMIX with mixed line-endings.
>   When converting LF->CRLF or CRLF->LF: check the warnings
> 
> checkout_files():
>   Checking out CRLF_nul and checking for eol coversion does not
>   make much sense (CRLF will stay CRLF).
>   Use the file LF_nul instead: It is handled a binary in "auto" modes,
>   and when declared as text the LF may be replaced with CRLF, depending
>   on the configuration

Works precisely as advertised on Windows (tested on top of Git for Windows 
2.3.6 release 2).

> Signed-off-by: Torsten Bögershausen 

FWIW Acked-by: Johannes Schindelin 

Ciao,
Dscho
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/2] connect: improve check for plink to reduce false positives

2015-04-25 Thread Torsten Bögershausen
On 2015-04-25 00.28, brian m. carlson wrote:
> The git_connect function has code to handle plink and tortoiseplink
> specially, as they require different command line arguments from
> OpenSSH.  However, the match was done by checking for "plink"
> case-insensitively in the string, which led to false positives when
> GIT_SSH contained "uplink".  Improve the check by looking for "plink" or
> "tortoiseplink" (or those names suffixed with ".exe") in the final
> component of the path.
> 
(I'm not sute if the commit message describes the problem deep enough
for readers which are not familar with all the details of the original
report):
A feature implemented for Windows may break things for e.g. Linux users)
The following may read exaggerated, so please read it as a suggestion.

The git_connect function has code to handle plink and tortoiseplink
specially, as they require different command line arguments compared to
OpenSSH (-P instead of -p, tortoiseplink uses -batch), commit 36ad53ffee6ed.
The special handling is only needed for Windows, and a sloppy
case-insensitve search for "plink" will trigger that an the extra
parameter "-batch" is added to the command line.

This was observed on a Linux system where a command line including
"/uplink_deploy/" was used.

There are different ways to improve the situation:
(The following mentions only plink, but assumes that tortoiseplink is handled
 similar)
a) Disable the plink/tortoiseplink special handling on non-Windows systems
b) Tighten the search for plink:
   Allow basename() == plink || !!strcasestr(ssh, "plink.exe")
c) Tighten the search for plink:
   Allow basename() == plink || !!strcasestr(ssh, "plink.exe")
d) Tighten the check for tortoiseplink.
   Today we set "int putty" to true when plink is found, and -batch
   is set when tortoiseplink is not found.
   This fixes the reported bug, but still has the -P problem.
e) Unix users typically use shell scripts and could use GIT_SSH_COMMAND.
   Declare the GIT_SSH as not-well-documented (and to obsolete ?) for 
non-Windows systems,


This patch implements c):
Extract the basename and compare it to plink, plink.exe respective
tortoiseplink/tortoiseplink.exe

Note that there is a slight risk of breakage for Windows users:
Strings like "myplink" or "plink-0.83" are no longer accepted.

-
I would probably vote for a), as Unix/Linux/Mac OS users don't use 
plink/tortoiseplink
at all. 
-
What about adding test-cases in t5601,
this will ease the documentation later.
f:/util/plink
/c/util/plink.exe
f:/util/tortoiseplink
/c/util/tortoiseplink.exe
/usr/local/uplink/sshwrapper.sh


Other opinions, other thoughts ?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] git-gui: sort entries in tclIndex

2015-04-25 Thread René Scharfe

Looping in Pat (git-gui maintainer).

Am 15.04.2015 um 09:22 schrieb Olaf Hering:

Ping?

On Tue, Feb 10, Olaf Hering wrote:


Ping?

On Mon, Jan 26, Olaf Hering wrote:


ALL_LIBFILES uses wildcard, which provides the result in directory
order. This order depends on the underlying filesystem on the
buildhost. To get reproducible builds it is required to sort such list
before using them.

Signed-off-by: Olaf Hering 
---
  git-gui/Makefile | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/git-gui/Makefile b/git-gui/Makefile
index cde8b2e..7564a18 100644
--- a/git-gui/Makefile
+++ b/git-gui/Makefile
@@ -258,7 +258,7 @@ lib/tclIndex: $(ALL_LIBFILES) GIT-GUI-VARS
 rm -f $@ ; \
 echo '# Autogenerated by git-gui Makefile' >$@ && \
 echo >>$@ && \
-$(foreach p,$(PRELOAD_FILES) $(ALL_LIBFILES),echo '$(subst lib/,,$p)' >>$@ 
&&) \
+$(foreach p,$(PRELOAD_FILES) $(sort $(ALL_LIBFILES)),echo '$(subst lib/,,$p)' 
>>$@ &&) \
 echo >>$@ ; \
fi



--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] t0027: Add repoMIX and LF_nul

2015-04-25 Thread Junio C Hamano
Torsten Bögershausen  writes:

> "new safer autocrlf handling":
>   Check if eols in a file are converted at commit, when the file has
>   CR (or CLLF) in the repo (technically speaking in the index).

s/CLLF/CRLF/?  (no need to resend for this; I'll locally amend)

>   Add a test-file repoMIX with mixed line-endings.
>   When converting LF->CRLF or CRLF->LF: check the warnings
>
> checkout_files():
>   Checking out CRLF_nul and checking for eol coversion does not
>   make much sense (CRLF will stay CRLF).

Hmph, would it still make sense to make sure that CRLF will stay CRLF
so that future changes to break this expectation can be caught?  Not
that such a breakage is likely...

>   Use the file LF_nul instead: It is handled a binary in "auto" modes,
>   and when declared as text the LF may be replaced with CRLF, depending
>   on the configuration

Makes sense.  Thanks.

> Signed-off-by: Torsten Bögershausen 
> ---
> Changes since V1:
> Use TAB for indentation at on place (Thanks Eric)
>  t/t0027-auto-crlf.sh | 157 
> ---
>  1 file changed, 85 insertions(+), 72 deletions(-)
>
> diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh
> index 810934b..1a56e5e 100755
> --- a/t/t0027-auto-crlf.sh
> +++ b/t/t0027-auto-crlf.sh
> @@ -71,12 +71,21 @@ commit_check_warn () {
>   attr=$2
>   lfname=$3
>   crlfname=$4
> - lfmixcrlf=$5
> - lfmixcr=$6
> - crlfnul=$7
> - create_gitattributes "$attr" &&
> + repoMIX=$5
> + lfmixcrlf=$6
> + lfmixcr=$7
> + crlfnul=$8
>   pfx=crlf_${crlf}_attr_${attr}
> - for f in LF CRLF LF_mix_CR CRLF_mix_LF CRLF_nul
> + # Special handling for repoMIX: It should already be in the repo
> + # with CRLF
> + f=repoMIX
> + fname=${pfx}_$f.txt
> + echo >.gitattributes &&
> + cp $f $fname &&
> + git -c core.autocrlf=false add $fname 2>"${pfx}_$f.err" &&
> + git commit -m "repoMIX" &&
> + create_gitattributes "$attr" &&
> + for f in LF CRLF repoMIX LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
>   do
>   fname=${pfx}_$f.txt &&
>   cp $f $fname &&
> @@ -120,7 +129,7 @@ checkout_files () {
>   git config core.autocrlf $crlf &&
>   pfx=eol_${eol}_crlf_${crlf}_attr_${attr}_ &&
>   src=crlf_false_attr__ &&
> - for f in LF CRLF LF_mix_CR CRLF_mix_LF CRLF_nul
> + for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul
>   do
>   rm $src$f.txt &&
>   if test -z "$eol"; then
> @@ -142,8 +151,8 @@ checkout_files () {
>   test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf 
> gitattributes=$attr file=LF_mix_CR" "
>   compare_ws_file $pfx $lfmixcr   ${src}LF_mix_CR.txt
>   "
> - test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf 
> gitattributes=$attr file=CRLF_nul" "
> - compare_ws_file $pfx $crlfnul   ${src}CRLF_nul.txt
> + test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf 
> gitattributes=$attr file=LF_nul" "
> + compare_ws_file $pfx $crlfnul   ${src}LF_nul.txt
>   "
>  }
>  
> @@ -155,6 +164,7 @@ test_expect_success 'setup master' '
>   git commit -m "add .gitattributes" "" &&
>   printf "line1\nline2\nline3" >LF &&
>   printf "line1\r\nline2\r\nline3" >CRLF &&
> + printf "line1\r\nline2\nline3"   >repoMIX &&
>   printf "line1\r\nline2\nline3"   >CRLF_mix_LF &&
>   printf "line1\nline2\rline3" >LF_mix_CR &&
>   printf "line1\r\nline2\rline3"   >CRLF_mix_CR &&
> @@ -181,40 +191,41 @@ else
>   WAMIX=CRLF_LF
>  fi
>  
> +# attr   LFCRLF  repoMIX   CRLFmixLF 
> LFmixCR   CRLFNUL
>  test_expect_success 'commit files empty attr' '
> - commit_check_warn false "" """"""""
> "" &&
> - commit_check_warn true  "" "LF_CRLF" """LF_CRLF" ""
> "" &&
> - commit_check_warn input "" """CRLF_LF" "CRLF_LF" ""
> ""
> + commit_check_warn false "" """"""""
> """" &&
> + commit_check_warn true  "" "LF_CRLF" """LF_CRLF" "LF_CRLF" 
> """" &&
> + commit_check_warn input "" """CRLF_LF" "CRLF_LF" "CRLF_LF" 
> """"
>  '
>  
>  test_expect_success 'commit files attr=auto' '
> - commit_check_warn false "auto" "$WILC"  "$WICL""$WAMIX"  ""
> "" &&
> - commit_check_warn true  "auto" "LF_CRLF" """LF_CRLF" ""
> "" &&
> - commit_check_warn input "auto" """CRLF_LF" "CRLF_LF" ""
> ""
> + commit_check_warn false "auto" "$WILC"   "$WICL"   "$WAMIX"  "$WAMIX"  
> """" &&
> + commit_check_warn true  "auto" "LF_CRLF" """LF_CRLF" "LF_CRLF" 
> """" &&
> + commit_check_warn input "auto" """CRLF_LF" "CRLF_LF" "CRLF_LF" 
> """"
>  '
>  
>  test_expect_success 'commit files attr=text' '
> - commit_check_

Re: [PATCH v4 2/5] setup: sanity check file size in read_gitfile_gently

2015-04-25 Thread Junio C Hamano
Erik Elfström  writes:

> read_gitfile_gently will allocate a buffer to fit the entire file that
> should be read. Add a sanity check of the file size before opening to
> avoid allocating a potentially huge amount of memory if we come across
> a large file that someone happened to name ".git". The limit is set to
> a sufficiently unreasonable size that should never be exceeded by a
> genuine .git file.
>
> Signed-off-by: Erik Elfström 
> ---
>
> I'm not sure about this one but it felt like the safe thing to do.
> This patch can be dropped if it is not desired.

I do not think it is wrong per-se, but the changes in this patch
shows why hardcoded values assigned to error_code without #define is
not a good idea, as these values are now exposed to the callers of
the new function.  After we gain a new caller that does care about
the exact error code (e.g. to react differently to the reason why we
failed to read by giving different error messages) if we decide to
revert this step, or if we decide to add a new error type, for
whatever reason, this organization forces the caller to be updated.

> I considered testing it using
>  "mkdir foo && truncate -s 200G foo/.git && git clean -f -d"
> but that feels like a pretty evil test that is likely to cause lots
> of problems and not fail in any good way.

Amen to that.

>
>  setup.c | 20 +---
>  1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/setup.c b/setup.c
> index e1897cc..ed87334 100644
> --- a/setup.c
> +++ b/setup.c
> @@ -364,22 +364,26 @@ const char *read_gitfile_gently(const char *path, int 
> *return_error_code)
>   error_code = 3;
>   goto cleanup_return;
>   }
> + if (st.st_size > PATH_MAX * 4) {
> + error_code = 4;
> + goto cleanup_return;
> + }
>   buf = xmalloc(st.st_size + 1);
>   len = read_in_full(fd, buf, st.st_size);
>   close(fd);
>   if (len != st.st_size) {
> - error_code = 4;
> + error_code = 5;
>   goto cleanup_return;
>   }
>   buf[len] = '\0';
>   if (!starts_with(buf, "gitdir: ")) {
> - error_code = 5;
> + error_code = 6;
>   goto cleanup_return;
>   }
>   while (buf[len - 1] == '\n' || buf[len - 1] == '\r')
>   len--;
>   if (len < 9) {
> - error_code = 6;
> + error_code = 7;
>   goto cleanup_return;
>   }
>   buf[len] = '\0';
> @@ -397,7 +401,7 @@ const char *read_gitfile_gently(const char *path, int 
> *return_error_code)
>   }
>  
>   if (!is_git_directory(dir)) {
> - error_code = 7;
> + error_code = 8;
>   goto cleanup_return;
>   }
>   path = real_path(dir);
> @@ -419,12 +423,14 @@ cleanup_return:
>   case 3:
>   die_errno("Error opening '%s'", path);
>   case 4:
> - die("Error reading %s", path);
> + die("Too large to be a .git file: '%s'", path);
>   case 5:
> - die("Invalid gitfile format: %s", path);
> + die("Error reading %s", path);
>   case 6:
> - die("No path in gitfile: %s", path);
> + die("Invalid gitfile format: %s", path);
>   case 7:
> + die("No path in gitfile: %s", path);
> + case 8:
>   die("Not a git repository: %s", dir);
>   default:
>   assert(0);
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/5] setup: add gentle version of read_gitfile

2015-04-25 Thread Junio C Hamano
Erik Elfström  writes:

> read_gitfile will die on most error cases. This makes it unsuitable
> for speculative calls. Extract the core logic and provide a gentle
> version that returns NULL on failure.
>
> The first usecase of the new gentle version will be to probe for
> submodules during git clean.
>
> Helped-by: Junio C Hamano 
> Helped-by: Jeff King 
> Signed-off-by: Erik Elfström 
> ---
>  cache.h |  3 ++-
>  setup.c | 82 
> +++--
>  2 files changed, 67 insertions(+), 18 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index 3d3244b..6e29068 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -431,7 +431,8 @@ extern int set_git_dir(const char *path);
>  extern const char *get_git_namespace(void);
>  extern const char *strip_namespace(const char *namespaced_ref);
>  extern const char *get_git_work_tree(void);
> -extern const char *read_gitfile(const char *path);
> +extern const char *read_gitfile_gently(const char *path, int 
> *return_error_code);
> +#define read_gitfile(path) read_gitfile_gently((path), NULL)
>  extern const char *resolve_gitdir(const char *suspect);
>  extern void set_git_work_tree(const char *tree);
>  
> diff --git a/setup.c b/setup.c
> index 979b13f..e1897cc 100644
> --- a/setup.c
> +++ b/setup.c
> @@ -335,35 +335,53 @@ static int check_repository_format_gently(const char 
> *gitdir, int *nongit_ok)
>  /*
>   * Try to read the location of the git directory from the .git file,
>   * return path to git directory if found.
> + *
> + * On failure, if return_error_code is not NULL, return_error_code
> + * will be set to an error code and NULL will be returned. If
> + * return_error_code is NULL the function will die instead (for most
> + * cases).
>   */
> -const char *read_gitfile(const char *path)
> +const char *read_gitfile_gently(const char *path, int *return_error_code)
>  {
> - char *buf;
> - char *dir;
> + int error_code = 0;
> + char *buf = NULL;
> + char *dir = NULL;
>   const char *slash;
>   struct stat st;
>   int fd;
>   ssize_t len;
>  
> - if (stat(path, &st))
> - return NULL;
> - if (!S_ISREG(st.st_mode))
> - return NULL;
> + if (stat(path, &st)) {
> + error_code = 1;
> + goto cleanup_return;
> + }
> + if (!S_ISREG(st.st_mode)) {
> + error_code = 2;
> + goto cleanup_return;
> + }
>   fd = open(path, O_RDONLY);
> - if (fd < 0)
> - die_errno("Error opening '%s'", path);
> + if (fd < 0) {
> + error_code = 3;
> + goto cleanup_return;
> + }
>   buf = xmalloc(st.st_size + 1);
>   len = read_in_full(fd, buf, st.st_size);
>   close(fd);
> - if (len != st.st_size)
> - die("Error reading %s", path);
> + if (len != st.st_size) {
> + error_code = 4;
> + goto cleanup_return;
> + }
>   buf[len] = '\0';
> - if (!starts_with(buf, "gitdir: "))
> - die("Invalid gitfile format: %s", path);
> + if (!starts_with(buf, "gitdir: ")) {
> + error_code = 5;
> + goto cleanup_return;
> + }
>   while (buf[len - 1] == '\n' || buf[len - 1] == '\r')
>   len--;
> - if (len < 9)
> - die("No path in gitfile: %s", path);
> + if (len < 9) {
> + error_code = 6;
> + goto cleanup_return;
> + }
>   buf[len] = '\0';
>   dir = buf + 8;
>  
> @@ -378,11 +396,41 @@ const char *read_gitfile(const char *path)
>   buf = dir;
>   }
>  
> - if (!is_git_directory(dir))
> - die("Not a git repository: %s", dir);
> + if (!is_git_directory(dir)) {
> + error_code = 7;
> + goto cleanup_return;
> + }
>   path = real_path(dir);
>  
> +cleanup_return:
>   free(buf);
> +
> + if (return_error_code)
> + *return_error_code = error_code;
> +
> + if (error_code) {
> + if (return_error_code)
> + return NULL;
> +
> + switch (error_code) {
> + case 1: // failed to stat
> + case 2: // not regular file

Please do not use C++ // comments.

> + return NULL;
> + case 3:
> + die_errno("Error opening '%s'", path);
> + case 4:
> + die("Error reading %s", path);
> + case 5:
> + die("Invalid gitfile format: %s", path);
> + case 6:
> + die("No path in gitfile: %s", path);
> + case 7:
> + die("Not a git repository: %s", dir);
> + default:
> + assert(0);
> + }
> + }
> +
>   return path;
>  }
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.

Re: [PATCH v4 1/5] setup: add gentle version of read_gitfile

2015-04-25 Thread Junio C Hamano
Junio C Hamano  writes:

>> +switch (error_code) {
>> +case 1: // failed to stat
>> +case 2: // not regular file
>
> Please do not use C++ // comments.

No need to resend for this; I'll locally amend.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option

2015-04-25 Thread Junio C Hamano
karthik nayak  writes:

>> Is there any other way to make cat-file looser other than accepting
>> an unknown type name from the future?  If not, then perhaps it may
>> make sense to give it a generic name that implies that we would
>> trigger such additional looseness in the future.  But as the
>> inventor of it as a debugging aid, I would say a name that spells
>> out the specific way this option is being loose, e.g.
>>
>> >  --allow-bogus-type
>>
>> or with s/bogus/unknown/, best describes what it currently does.
>
> Yes this gives the best description, but its large, while we could use 
> something
> like --no-strict instead.

We could, if you answered my first question with "no".

By naming this "--no-strict", the first bug report you will receive
may be that "cat-file --no-strict" should parse a zlib deflate that
begins with "blob 1234\n\0" (notice that there are two SPs instead
of the usual one, and length is followed by LF that should not be
there before the NUL) but it does not.

As your option name "--no-strict" signals that you will make the
best effort to parse such nonsense, that would be a valid bug
report, against which you would need to update the code to make it
work.  But is it worth the effort to make such a thing work?  I
dunno.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 2/5] setup: sanity check file size in read_gitfile_gently

2015-04-25 Thread Erik Elfström
On Sat, Apr 25, 2015 at 6:47 PM, Junio C Hamano  wrote:
> I do not think it is wrong per-se, but the changes in this patch
> shows why hardcoded values assigned to error_code without #define is
> not a good idea, as these values are now exposed to the callers of
> the new function.  After we gain a new caller that does care about
> the exact error code (e.g. to react differently to the reason why we
> failed to read by giving different error messages) if we decide to
> revert this step, or if we decide to add a new error type, for
> whatever reason, this organization forces the caller to be updated.

Yes, it was a bit silly of me not to add that. Would something like
this be ok?

---
 cache.h |  9 +
 setup.c | 32 
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/cache.h b/cache.h
index 6e29068..177337a 100644
--- a/cache.h
+++ b/cache.h
@@ -431,6 +431,15 @@ extern int set_git_dir(const char *path);
 extern const char *get_git_namespace(void);
 extern const char *strip_namespace(const char *namespaced_ref);
 extern const char *get_git_work_tree(void);
+
+#define READ_GITFILE_ERR_STAT_FAILED 1
+#define READ_GITFILE_ERR_NOT_A_FILE 2
+#define READ_GITFILE_ERR_OPEN_FAILED 3
+#define READ_GITFILE_ERR_TOO_LARGE 4
+#define READ_GITFILE_ERR_READ_FAILED 5
+#define READ_GITFILE_ERR_INVALID_FORMAT 6
+#define READ_GITFILE_ERR_NO_PATH 7
+#define READ_GITFILE_ERR_NOT_A_REPO 8
 extern const char *read_gitfile_gently(const char *path, int 
*return_error_code);
 #define read_gitfile(path) read_gitfile_gently((path), NULL)
 extern const char *resolve_gitdir(const char *suspect);
diff --git a/setup.c b/setup.c
index ed87334..792c37b 100644
--- a/setup.c
+++ b/setup.c
@@ -352,38 +352,38 @@ const char *read_gitfile_gently(const char *path, int 
*return_error_code)
ssize_t len;
 
if (stat(path, &st)) {
-   error_code = 1;
+   error_code = READ_GITFILE_ERR_STAT_FAILED;
goto cleanup_return;
}
if (!S_ISREG(st.st_mode)) {
-   error_code = 2;
+   error_code = READ_GITFILE_ERR_NOT_A_FILE;
goto cleanup_return;
}
fd = open(path, O_RDONLY);
if (fd < 0) {
-   error_code = 3;
+   error_code = READ_GITFILE_ERR_OPEN_FAILED;
goto cleanup_return;
}
if (st.st_size > PATH_MAX * 4) {
-   error_code = 4;
+   error_code = READ_GITFILE_ERR_TOO_LARGE;
goto cleanup_return;
}
buf = xmalloc(st.st_size + 1);
len = read_in_full(fd, buf, st.st_size);
close(fd);
if (len != st.st_size) {
-   error_code = 5;
+   error_code = READ_GITFILE_ERR_READ_FAILED;
goto cleanup_return;
}
buf[len] = '\0';
if (!starts_with(buf, "gitdir: ")) {
-   error_code = 6;
+   error_code = READ_GITFILE_ERR_INVALID_FORMAT;
goto cleanup_return;
}
while (buf[len - 1] == '\n' || buf[len - 1] == '\r')
len--;
if (len < 9) {
-   error_code = 7;
+   error_code = READ_GITFILE_ERR_NO_PATH;
goto cleanup_return;
}
buf[len] = '\0';
@@ -401,7 +401,7 @@ const char *read_gitfile_gently(const char *path, int 
*return_error_code)
}
 
if (!is_git_directory(dir)) {
-   error_code = 8;
+   error_code = READ_GITFILE_ERR_NOT_A_REPO;
goto cleanup_return;
}
path = real_path(dir);
@@ -417,20 +417,20 @@ cleanup_return:
return NULL;
 
switch (error_code) {
-   case 1: // failed to stat
-   case 2: // not regular file
+   case READ_GITFILE_ERR_STAT_FAILED:
+   case READ_GITFILE_ERR_NOT_A_FILE:
return NULL;
-   case 3:
+   case READ_GITFILE_ERR_OPEN_FAILED:
die_errno("Error opening '%s'", path);
-   case 4:
+   case READ_GITFILE_ERR_TOO_LARGE:
die("Too large to be a .git file: '%s'", path);
-   case 5:
+   case READ_GITFILE_ERR_READ_FAILED:
die("Error reading %s", path);
-   case 6:
+   case READ_GITFILE_ERR_INVALID_FORMAT:
die("Invalid gitfile format: %s", path);
-   case 7:
+   case READ_GITFILE_ERR_NO_PATH:
die("No path in gitfile: %s", path);
-   case 8:
+   case READ_GITFILE_ERR_NOT_A_REPO:
die("Not a git repository: %s", dir);
default:
assert(0);
-- 
2.4.0.rc3.8.g86acfbd.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.

Re: [PATCH 5/5] ref_transaction_commit(): only keep one lockfile open at a time

2015-04-25 Thread Junio C Hamano
Junio C Hamano  writes:

> I am not too worried about "push --atomic", as we can just add a few
> words to Release Notes and documentation saying "this is still an
> experimental broken code that is unusable; don't use the feature in
> production".
>
> I however am more worried about the other one "update-ref --stdin";
> the change will be pure regression for those who want to do many
> updates and do not care if the update is atomic, no?

I should have refrained from touching the keyboard so late at night
X-<.  This regression was done long time ago (even in v2.1.0 I see
that ref_transaction_commit() tries to grab all locks at once).

So it is only "push --atomic".

The choice is between (1) shipping "push --atomic" that is known to
be broken, (2) applying your five-patch series which may (a) fix
both "push --atomic" and "update-ref --stdin", or (b) break other
transaction users including "update-ref -stdin" in unexpected ways.

I dunno.  I am still tempted to go route (2) hoping that it would
result in (2-a) not (2-b).
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/5] Avoid file descriptor exhaustion in ref_transaction_commit()

2015-04-25 Thread Junio C Hamano
Jeff King  writes:

> Stefan's patch is just in pu at this point, right? I do not think there
> is any rushing/release concern. It is too late for either to be in
> v2.4.0, so the only decision is whether to aim for "master" or "maint".
> To me, they both seem to be in the same ballpark as far as risking a
> regression.

The series builds on c1f0ca9 that is on 2.4.0-rc2, so we would need
to wiggle things around to apply to older codebase if we want to fix
the "too many open file descriptor" issue on older maintenance
releases for "update-ref --stdin".

I personally feel that it is OK to ship v2.4.0 without the fix,
leaving "push --atomic" broken, and fix it in v2.4.1, but I kinda
prefer that the final fix to be applicable for older maintenance
releases, at least to 2.3.x track, if not 2.2.x track.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Cloning or pushing only files that have been updated

2015-04-25 Thread cl
Hi,

I have two sets of files.

A_Old is a large unversioned directory tree containing many files.

A_Updated is a git repository containing the files from A_Old, some of
which have been modified. A_Updated also contains new files.

I am looking for a way of cloning only the new or modified files from
A_Updated to A_Old. I would like A_Old to become a clone of A_Updated,
however I don't want to download the entire archive because of access
speed issues.

Timestamps on all files are accurate.

Any ideas?

- J

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: bug report : 2.3.5 ssh repo not found

2015-04-25 Thread Torsten Bögershausen
On 2015-04-25 07.57, Chris wrote:
> Hello,
> 
> Using git version 2.3.5 with kernel 3.19.3-3-ARCH #1 SMP PREEMPT x86_64 I see 
> the following error message when pulling or cloning a repo over ssh:
> 
> """
> git clone ssh://user@mydomain:/home/user/path/to/project.git
> Cloning into 'project'...
> ssh: Could not resolve hostname mydomain:: Name or service not known
> fatal: Could not read from remote repository.
> 
> Please make sure you have the correct access rights
> and the repository exists.
> """
> 
> Obviously I changed the url to hide credential info
> 
> After ensuring DNS was OK and being able to ssh to that instance directly I 
> tried downgrading git to my distro's last installed version of git version 
> 2.2.2 and now I can clone / pull / push to/from that repo without issue.
> 
> Is this a bug?
> 
> Best,
> Chris

It's a know issue/bug, sorry for the inconvenience.

There are 2 options:
Either:

change the URL
git clone ssh://user@mydomain:/home/user/path/to/project.git
into
git clone ssh://user@mydomain/home/user/path/to/project.git

(In other words, remove the ':' after the hostname)


Or
Compile Git yourself from the latest git.git/master, which has this fix:

commit 9718c7c0c2a92585729d0f2e05ebf2c44b0cc56a
Merge: a59ac46 6b6c5f7
Author: Junio C Hamano 
Date:   Mon Apr 20 15:28:33 2015 -0700

Merge branch 'tb/connect-ipv6-parse-fix'

An earlier update to the parser that disects an address broke an
address, followed by a colon, followed by an empty string (instead
of the port number).

* tb/connect-ipv6-parse-fix:
  connect.c: ignore extra colon after hostname

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] t0027: Add repoMIX and LF_nul

2015-04-25 Thread Torsten Bögershausen
On 2015-04-25 18.41, Junio C Hamano wrote:
> Torsten Bögershausen  writes:
> 
>> "new safer autocrlf handling":
>>   Check if eols in a file are converted at commit, when the file has
>>   CR (or CLLF) in the repo (technically speaking in the index).
> 
> s/CLLF/CRLF/?  (no need to resend for this; I'll locally amend)
> 
>>   Add a test-file repoMIX with mixed line-endings.
>>   When converting LF->CRLF or CRLF->LF: check the warnings
>>
>> checkout_files():
>>   Checking out CRLF_nul and checking for eol coversion does not
>>   make much sense (CRLF will stay CRLF).
> 
> Hmph, would it still make sense to make sure that CRLF will stay CRLF
> so that future changes to break this expectation can be caught?  Not
> that such a breakage is likely...

Thanks for amending.

We have the file CRLF (and CRLFmixLF), where we check that CRLF stays CRLF and 
is not
converted into CRLFLF.

The LF_nul is to test the "auto text detection":
It should not be converted into CRLF_nul in "auto mode",
but should be converted when declared as "text" in .gitattributes.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Cleaning projects with submodules

2015-04-25 Thread Simon Richter
Hi,

I'm trying to set up a continuous integration build for Boost, which
uses massive amounts of submodules, and keep running into problems when
running "git clean" in the toplevel project.

When I switch to a version where a submodule has been removed (e.g. an
earlier version), "git clean -dx" will not remove the submodule's
directory, because it has its own .git directory. Using a single -f
flag, the process fails (because directories containing .git directories
are no longer skipped), and using -ff removes all submodules (which is
overkill).

Is there a good way to clean a project, leaving valid submodules in
place (these are then switched to the new tip and cleaned in separate
commands) while removing submodules that are no longer referenced?

   Simon



signature.asc
Description: OpenPGP digital signature


Re: Cloning or pushing only files that have been updated

2015-04-25 Thread Johannes Sixt

Am 25.04.2015 um 23:17 schrieb c...@qgenuity.com:

I have two sets of files.

A_Old is a large unversioned directory tree containing many files.

A_Updated is a git repository containing the files from A_Old, some of
which have been modified. A_Updated also contains new files.

I am looking for a way of cloning only the new or modified files from
A_Updated to A_Old. I would like A_Old to become a clone of A_Updated,
however I don't want to download the entire archive because of access
speed issues.

Timestamps on all files are accurate.


1. Create a commit from the files in each of the directories.

2. Use git rev-list --objects --all | sort to generate a sorted list of 
objects in each of the repositories.


3. In A_Old, generate a pack from the objects that are not in A_Updated 
using 'comm' of the objects lists piped into git pack-objects.


4. Transfer this pack from A_old to A_Updated.

5. Use git fast-export --no-data in A_old.

6. Use git fast-import in A_Updated to import the commit that you have 
in A_Old into A_Updated.


7. In A_Updated, git reset --soft the-commit-from-A_Old && git commit. 
Now you have a second commit with the updated state.


8. Use git-remote to connect the two repositories and to push or pull 
the updated state.


-- Hannes

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 2/5] setup: sanity check file size in read_gitfile_gently

2015-04-25 Thread Junio C Hamano
Erik Elfström  writes:

> On Sat, Apr 25, 2015 at 6:47 PM, Junio C Hamano  wrote:
>> I do not think it is wrong per-se, but the changes in this patch
>> shows why hardcoded values assigned to error_code without #define is
>> not a good idea, as these values are now exposed to the callers of
>> the new function.  After we gain a new caller that does care about
>> the exact error code (e.g. to react differently to the reason why we
>> failed to read by giving different error messages) if we decide to
>> revert this step, or if we decide to add a new error type, for
>> whatever reason, this organization forces the caller to be updated.
>
> Yes, it was a bit silly of me not to add that. Would something like
> this be ok?

Yeah, if you used symbolic constants from the get-go, then the
second patch to add the "too-large? it cannot be gitfile" safety
would not have to renumber everything; instead it would be adding a
single constant to the header file and adding a new case to setup.c
that uses the error code.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] t0027: Add repoMIX and LF_nul

2015-04-25 Thread Junio C Hamano
Torsten Bögershausen  writes:

>> Hmph, would it still make sense to make sure that CRLF will stay CRLF
>> so that future changes to break this expectation can be caught?  Not
>> that such a breakage is likely...
>
> Thanks for amending.
>
> We have the file CRLF (and CRLFmixLF), where we check that CRLF stays CRLF 
> and is not
> converted into CRLFLF.
>
> The LF_nul is to test the "auto text detection":
> It should not be converted into CRLF_nul in "auto mode",
> but should be converted when declared as "text" in .gitattributes.

The two paragraphs above may explain why the test would pass with
the current code, but is that sufficient assurance that other people
in future changes would not break the "It should not be" with their
bugs that you do not foresee at this moment?  Whenever you say "it
should not be", I hear "if the code somehow does that, somebody
broke our code", which in turn suggests me that catching that
condition with tests is a good thing.

Or am I hearing you incorrectly?

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/14] merge: simplify code flow

2015-04-25 Thread Junio C Hamano
One of the first things cmd_merge() does is to see if the "--abort"
option is given and run "reset --merge" and exit.  When the control
reaches this point, we know "--abort" was not given.

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index bebbe5b..8477878 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1165,15 +1165,15 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
option_commit = 0;
}
 
-   if (!abort_current_merge) {
-   if (!argc) {
-   if (default_to_upstream)
-   argc = setup_with_upstream(&argv);
-   else
-   die(_("No commit specified and 
merge.defaultToUpstream not set."));
-   } else if (argc == 1 && !strcmp(argv[0], "-"))
-   argv[0] = "@{-1}";
+   if (!argc) {
+   if (default_to_upstream)
+   argc = setup_with_upstream(&argv);
+   else
+   die(_("No commit specified and merge.defaultToUpstream 
not set."));
+   } else if (argc == 1 && !strcmp(argv[0], "-")) {
+   argv[0] = "@{-1}";
}
+
if (!argc)
usage_with_options(builtin_merge_usage,
builtin_merge_options);
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/14] Teach "git merge FETCH_HEAD" octopus merges

2015-04-25 Thread Junio C Hamano
Junio C Hamano  writes:

> Jeff King  writes:
>> On Mon, Apr 20, 2015 at 11:59:04AM -0700, Junio C Hamano wrote:
>>
>>> Unfortunately, "git merge"'s parsing of FETCH_HEAD forgets that we
>>> may be creating an Octopus.  Otherwise the above should work well.
>>
>> That sounds like a bug we should fix regardless.
>
> But I am not sure how it should behave. "git fetch $there A B C"
> followed by "git merge FETCH_HEAD" merges only A, and I do not know
> if people have come to depend on this behaviour.
>
> I suspect there may be larger fallout from such a change, namely,
> what should "git log FETCH_HEAD" do?  Should it traverse the history
> starting from all things that are not marked as not-for-merge, or
> should it just say "git rev-parse FETCH_HEAD" and use only the first
> one as the starting point?

So, I thought we may want to try this and see how it goes.

Tentatively, I am saying that "FETCH_HEAD" is a magic token
understood by "git merge", like "git merge  HEAD commits..."
syntax was a magic that made "git merge" work differently from "git
merge -m  ..."; no changes to get_sha1() or anything
heavy like that is intended.


Earlier, we thought that it would just be the matter of turning
existing invocation of "git merge  HEAD $commits..." in "git
pull" into "git merge -m  $commits..." to deprecate the ugly
original "merge" command line syntax.

This unfortunately failed for two reasons.

 * "-m " stops the editor from running; recent "git pull"
   encourage the users to justify the merge in the log message,
   and the auto-generated  that comes from format-merge-msg
   should still be shown to the user in the editor to be edited.

 * "-m " also adds auto-generated summary when merge.log
   configuration is enabled, but "git pull" calls "git merge" with
   the message _with_ that log already in there.

Invoking "git merge FETCH_HEAD" (no messages fed by the caller) from
"git pull" almost works.  "git merge" knows how to extract the name
of the repository and the branch from FETCH_HEAD to use in the merge
log message it auto-generates.  However, this is done only for a
single branch; if you did "git pull $there topic-A topic-B", and
then invoked "git merge FETCH_HEAD" from there, we would end up
recording a merge with only one branch, which is not what we want.

This teaches "git merge FETCH_HEAD" that FETCH_HEAD may describe
multiple branches that were fetched for merging.  With that, the
last step eradicates the "merge  HEAD ..." syntax from
our codebase, finally.

This may be rough in a few places and some patches that are done as
preparatory clean-up steps may want to be squashed into the patch
that follows them that implements the real change.

These patches are designed to apply on top of v2.2.2; I'll push them
out on 'pu' later, on 'jc/merge' topic.

Junio C Hamano (14):
  merge: simplify code flow
  t5520: style fixes
  t5520: test pulling an octopus into an unborn branch
  merge: clarify "pulling into void" special case
  merge: do not check argc to determine number of remote heads
  merge: small leakfix and code simplification
  merge: clarify collect_parents() logic
  merge: split reduce_parents() out of collect_parents()
  merge: narrow scope of merge_names
  merge: extract prepare_merge_message() logic out
  merge: make collect_parents() auto-generate the merge message
  merge: decide if we auto-generate the message early in
collect_parents()
  merge: handle FETCH_HEAD internally
  merge: deprecate 'git merge  HEAD ' syntax

 Documentation/git-merge.txt   |   4 +
 builtin/merge.c   | 248 +++---
 git-cvsimport.perl|   2 +-
 git-pull.sh   |   3 +-
 t/t3402-rebase-merge.sh   |   2 +-
 t/t5520-pull.sh   |  31 +++---
 t/t6020-merge-df.sh   |   2 +-
 t/t6021-merge-criss-cross.sh  |   6 +-
 t/t9402-git-cvsserver-refs.sh |   2 +-
 9 files changed, 188 insertions(+), 112 deletions(-)

-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/14] t5520: style fixes

2015-04-25 Thread Junio C Hamano
Fix style funnies in early part of this test script that checks "git
pull" into an unborn branch.  The primary change is that 'chdir' to
a newly created empty test repository is now protected by being done
in a subshell to make it more robust without having to chdir back to
the original place.

Signed-off-by: Junio C Hamano 
---
 t/t5520-pull.sh | 22 ++
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/t/t5520-pull.sh b/t/t5520-pull.sh
index 227d293..5195a21 100755
--- a/t/t5520-pull.sh
+++ b/t/t5520-pull.sh
@@ -9,36 +9,27 @@ modify () {
mv "$2.x" "$2"
 }
 
-D=`pwd`
-
 test_expect_success setup '
-
echo file >file &&
git add file &&
git commit -a -m original
-
 '
 
 test_expect_success 'pulling into void' '
-   mkdir cloned &&
-   cd cloned &&
-   git init &&
-   git pull ..
-'
-
-cd "$D"
-
-test_expect_success 'checking the results' '
+   git init cloned &&
+   (
+   cd cloned &&
+   git pull ..
+   ) &&
test -f file &&
test -f cloned/file &&
test_cmp file cloned/file
 '
 
 test_expect_success 'pulling into void using master:master' '
-   mkdir cloned-uho &&
+   git init cloned-uho &&
(
cd cloned-uho &&
-   git init &&
git pull .. master:master
) &&
test -f file &&
@@ -71,7 +62,6 @@ test_expect_success 'pulling into void does not overwrite 
staged files' '
)
 '
 
-
 test_expect_success 'pulling into void does not remove new staged files' '
git init cloned-staged-new &&
(
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/14] t5520: test pulling an octopus into an unborn branch

2015-04-25 Thread Junio C Hamano
The code comment for "git merge" in builtin/merge.c, we say

If the merged head is a valid one there is no reason
to forbid "git merge" into a branch yet to be born.
We do the same for "git pull".

and t5520 does have an existing test for that behaviour.  However,
there was no test to make sure that 'git pull' to pull multiple
branches into an unborn branch must fail.

Signed-off-by: Junio C Hamano 
---
 t/t5520-pull.sh | 9 +
 1 file changed, 9 insertions(+)

diff --git a/t/t5520-pull.sh b/t/t5520-pull.sh
index 5195a21..7efd45b 100755
--- a/t/t5520-pull.sh
+++ b/t/t5520-pull.sh
@@ -76,6 +76,15 @@ test_expect_success 'pulling into void does not remove new 
staged files' '
)
 '
 
+test_expect_success 'pulling into void must not create an octopus' '
+   git init cloned-octopus &&
+   (
+   cd cloned-octopus &&
+   test_must_fail git pull .. master master &&
+   ! test -f file
+   )
+'
+
 test_expect_success 'test . as a remote' '
 
git branch copy master &&
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/14] merge: small leakfix and code simplification

2015-04-25 Thread Junio C Hamano
When parsing a merged object name like "foo~20" to formulate a merge
summary "Merge branch foo (early part)", a temporary strbuf is used,
but we forgot to deallocate it when we failed to find the named
branch.

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 1d4fbd3..b2d0332 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -491,8 +491,7 @@ static void merge_name(const char *remote, struct strbuf 
*msg)
}
if (len) {
struct strbuf truname = STRBUF_INIT;
-   strbuf_addstr(&truname, "refs/heads/");
-   strbuf_addstr(&truname, remote);
+   strbuf_addf(&truname, "refs/heads/%s", remote);
strbuf_setlen(&truname, truname.len - len);
if (ref_exists(truname.buf)) {
strbuf_addf(msg,
@@ -503,6 +502,7 @@ static void merge_name(const char *remote, struct strbuf 
*msg)
strbuf_release(&truname);
goto cleanup;
}
+   strbuf_release(&truname);
}
 
if (!strcmp(remote, "FETCH_HEAD") &&
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/14] merge: handle FETCH_HEAD internally

2015-04-25 Thread Junio C Hamano
The collect_parents() function now is responsible for

 1. parsing the commits given on the command line into a list of
commits to be merged;

 2. filtering these parents into independent ones; and

 3. optionally calling fmt_merge_msg() via prepare_merge_message()
to prepare an auto-generated merge log message, using fake
contents that FETCH_HEAD would have had if these commits were
fetched from the current repository with "git pull . $args..."

Make "git merge FETCH_HEAD" to be the same as the traditional

git merge "$(git fmt-merge-msg <.git/FETCH_HEAD)" $commits

invocation of the command in "git pull", where $commits are the ones
that appear in FETCH_HEAD that are not marked as not-for-merge, by
making it do a bit more, specifically:

 - noticing "FETCH_HEAD" is the only "commit" on the command line
   and picking the commits that are not marked as not-for-merge as
   the list of commits to be merged (substitute for step #1 above);

 - letting the resulting list fed to step #2 above;

 - doing the step #3 above, using the contents of the FETCH_HEAD
   instead of fake contents crafted from the list of commits parsed
   in the step #1 above.

Note that this changes the semantics.  "git merge FETCH_HEAD" has
always behaved as if the first commit in the FETCH_HEAD file were
directly specified on the command line, creating a two-way merge
whose auto-generated merge log said "merge commit xyz".  With this
change, if the previous fetch was to grab multiple branches (e.g.
"git fetch $there topic-a topic-b"), the new world order is to
create an octopus, behaving as if "git pull $there topic-a topic-b"
were run.  This is a deliberate change to make that happen.

Signed-off-by: Junio C Hamano 
---
 Documentation/git-merge.txt |   4 ++
 builtin/merge.c | 105 ++--
 2 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/Documentation/git-merge.txt b/Documentation/git-merge.txt
index cf2c374..d9aa6b6 100644
--- a/Documentation/git-merge.txt
+++ b/Documentation/git-merge.txt
@@ -104,6 +104,10 @@ commit or stash your changes before running 'git merge'.
 If no commit is given from the command line, merge the remote-tracking
 branches that the current branch is configured to use as its upstream.
 See also the configuration section of this manual page.
++
+When `FETCH_HEAD` (and no other commit) is specified, the branches
+recorded in the `.git/FETCH_HEAD` file by the previous invocation
+of `git fetch` for merging are merged to the current branch.
 
 
 PRE-MERGE CHECKS
diff --git a/builtin/merge.c b/builtin/merge.c
index c7d9d6e..42f9fcc 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -505,28 +505,6 @@ static void merge_name(const char *remote, struct strbuf 
*msg)
strbuf_release(&truname);
}
 
-   if (!strcmp(remote, "FETCH_HEAD") &&
-   !access(git_path("FETCH_HEAD"), R_OK)) {
-   const char *filename;
-   FILE *fp;
-   struct strbuf line = STRBUF_INIT;
-   char *ptr;
-
-   filename = git_path("FETCH_HEAD");
-   fp = fopen(filename, "r");
-   if (!fp)
-   die_errno(_("could not open '%s' for reading"),
- filename);
-   strbuf_getline(&line, fp, '\n');
-   fclose(fp);
-   ptr = strstr(line.buf, "\tnot-for-merge\t");
-   if (ptr)
-   strbuf_remove(&line, ptr-line.buf+1, 13);
-   strbuf_addbuf(msg, &line);
-   strbuf_release(&line);
-   goto cleanup;
-   }
-
if (remote_head->util) {
struct merge_remote_desc *desc;
desc = merge_remote_util(remote_head);
@@ -1090,6 +1068,60 @@ static void prepare_merge_message(struct strbuf 
*merge_names, struct strbuf *mer
strbuf_setlen(merge_msg, merge_msg->len - 1);
 }
 
+static void handle_fetch_head(struct commit_list **remotes, struct strbuf 
*merge_names)
+{
+   const char *filename;
+   int fd, pos, npos;
+   struct strbuf fetch_head_file = STRBUF_INIT;
+
+   if (!merge_names)
+   merge_names = &fetch_head_file;
+
+   filename = git_path("FETCH_HEAD");
+   fd = open(filename, O_RDONLY);
+   if (fd < 0)
+   die_errno(_("could not open '%s' for reading"), filename);
+
+   if (strbuf_read(merge_names, fd, 0) < 0)
+   die_errno(_("could not read '%s'"), filename);
+   if (close(fd) < 0)
+   die_errno(_("could not close '%s'"), filename);
+
+   for (pos = 0; pos < merge_names->len; pos = npos) {
+   unsigned char sha1[20];
+   char *ptr;
+   struct commit *commit;
+
+   ptr = strchr(merge_names->buf + pos, '\n');
+   if (ptr)
+   npos = ptr - merge_names->buf + 1;
+   else
+

[PATCH 09/14] merge: narrow scope of merge_names

2015-04-25 Thread Junio C Hamano
In order to pass the list of parents to fmt_merge_msg(), cmd_merge()
uses this strbuf to create something that look like FETCH_HEAD that
describes commits that are being merged.  This is necessary only
when we are creating the merge commit message ourselves, but was
done unconditionally.

Move the variable and the logic to populate it to confine them in a
block that needs them.

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 054f052..d853c9d 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1236,8 +1236,6 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
argc -= 2;
remoteheads = collect_parents(head_commit, &head_subsumed, 
argc, argv);
} else {
-   struct strbuf merge_names = STRBUF_INIT;
-
/* We are invoked directly as the first-class UI. */
head_arg = "HEAD";
 
@@ -1247,11 +1245,14 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
 * to the given message.
 */
remoteheads = collect_parents(head_commit, &head_subsumed, 
argc, argv);
-   for (p = remoteheads; p; p = p->next)
-   merge_name(merge_remote_util(p->item)->name, 
&merge_names);
 
if (!have_message || shortlog_len) {
+   struct strbuf merge_names = STRBUF_INIT;
struct fmt_merge_msg_opts opts;
+
+   for (p = remoteheads; p; p = p->next)
+   merge_name(merge_remote_util(p->item)->name, 
&merge_names);
+
memset(&opts, 0, sizeof(opts));
opts.add_title = !have_message;
opts.shortlog_len = shortlog_len;
@@ -1260,6 +1261,8 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
fmt_merge_msg(&merge_names, &merge_msg, &opts);
if (merge_msg.len)
strbuf_setlen(&merge_msg, merge_msg.len - 1);
+
+   strbuf_release(&merge_names);
}
}
 
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/14] merge: decide if we auto-generate the message early in collect_parents()

2015-04-25 Thread Junio C Hamano
Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 84ebb22..c7d9d6e 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1098,6 +1098,10 @@ static struct commit_list *collect_parents(struct commit 
*head_commit,
int i;
struct commit_list *remoteheads = NULL;
struct commit_list **remotes = &remoteheads;
+   struct strbuf merge_names = STRBUF_INIT, *autogen = NULL;
+
+   if (merge_msg && (!have_message || shortlog_len))
+   autogen = &merge_names;
 
if (head_commit)
remotes = &commit_list_insert(head_commit, remotes)->next;
@@ -,14 +1115,12 @@ static struct commit_list *collect_parents(struct 
commit *head_commit,
 
remoteheads = reduce_parents(head_commit, head_subsumed, remoteheads);
 
-   if (merge_msg &&
-   (!have_message || shortlog_len)) {
-   struct strbuf merge_names = STRBUF_INIT;
-
+   if (autogen) {
for (p = remoteheads; p; p = p->next)
-   merge_name(merge_remote_util(p->item)->name, 
&merge_names);
-   prepare_merge_message(&merge_names, merge_msg);
-   strbuf_release(&merge_names);
+   merge_name(merge_remote_util(p->item)->name, autogen);
+
+   prepare_merge_message(autogen, merge_msg);
+   strbuf_release(autogen);
}
 
return remoteheads;
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/14] merge: deprecate 'git merge HEAD ' syntax

2015-04-25 Thread Junio C Hamano
We had this in "git merge" manual for eternity:

'git merge'  HEAD ...

[This] syntax ( `HEAD` ...) is supported for
historical reasons.  Do not use it from the command line or in
new scripts.  It is the same as `git merge -m  ...`.

With the update to "git merge" to make it understand what is
recorded in FETCH_HEAD directly, including Octopus merge cases, we
now can rewrite the use of this syntax in "git pull" with a simple
"git merge FETCH_HEAD".

Also there are quite a few fallouts in the test scripts, and it
turns out that "git cvsimport" also uses this old syntax to record
a merge.

Judging from this result, I would not be surprised if dropping the
support of the old syntax broke scripts people have written and been
relying on for the past ten years.  But at least we can start the
deprecation process by throwing a warning message when the syntax is
used.

With luck, we might be able to drop the support in a few years.

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c   | 1 +
 git-cvsimport.perl| 2 +-
 git-pull.sh   | 3 +--
 t/t3402-rebase-merge.sh   | 2 +-
 t/t6020-merge-df.sh   | 2 +-
 t/t6021-merge-criss-cross.sh  | 6 +++---
 t/t9402-git-cvsserver-refs.sh | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 42f9fcc..67fbfaf 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1299,6 +1299,7 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
 */
if (!have_message &&
is_old_style_invocation(argc, argv, head_commit->object.sha1)) {
+   warning("old-style 'git merge  HEAD ' is 
deprecated.");
strbuf_addstr(&merge_msg, argv[0]);
head_arg = argv[1];
argv += 2;
diff --git a/git-cvsimport.perl b/git-cvsimport.perl
index 73d367c..82ecb03 100755
--- a/git-cvsimport.perl
+++ b/git-cvsimport.perl
@@ -1162,7 +1162,7 @@ sub commit {
die "Fast-forward update failed: $?\n" if $?;
}
else {
-   system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
+   system(qw(git merge -m cvsimport), "$remote/$opt_o");
die "Could not merge $opt_o into the current branch.\n" if $?;
}
 } else {
diff --git a/git-pull.sh b/git-pull.sh
index 4d4fc77..15d9431 100755
--- a/git-pull.sh
+++ b/git-pull.sh
@@ -323,7 +323,6 @@ then
fi
 fi
 
-merge_name=$(git fmt-merge-msg $log_arg <"$GIT_DIR/FETCH_HEAD") || exit
 case "$rebase" in
 true)
eval="git-rebase $diffstat $strategy_args $merge_args $rebase_args 
$verbosity"
@@ -334,7 +333,7 @@ true)
eval="git-merge $diffstat $no_commit $verify_signatures $edit $squash 
$no_ff $ff_only"
eval="$eval $log_arg $strategy_args $merge_args $verbosity $progress"
eval="$eval $gpg_sign_args"
-   eval="$eval \"\$merge_name\" HEAD $merge_head"
+   eval="$eval FETCH_HEAD"
;;
 esac
 eval "exec $eval"
diff --git a/t/t3402-rebase-merge.sh b/t/t3402-rebase-merge.sh
index 5a27ec9..8f64505 100755
--- a/t/t3402-rebase-merge.sh
+++ b/t/t3402-rebase-merge.sh
@@ -47,7 +47,7 @@ test_expect_success setup '
 '
 
 test_expect_success 'reference merge' '
-   git merge -s recursive "reference merge" HEAD master
+   git merge -s recursive -m "reference merge" master
 '
 
 PRE_REBASE=$(git rev-parse test-rebase)
diff --git a/t/t6020-merge-df.sh b/t/t6020-merge-df.sh
index 27c3d73..2af1bee 100755
--- a/t/t6020-merge-df.sh
+++ b/t/t6020-merge-df.sh
@@ -24,7 +24,7 @@ test_expect_success 'prepare repository' '
 '
 
 test_expect_success 'Merge with d/f conflicts' '
-   test_expect_code 1 git merge "merge msg" B master
+   test_expect_code 1 git merge -m "merge msg" master
 '
 
 test_expect_success 'F/D conflict' '
diff --git a/t/t6021-merge-criss-cross.sh b/t/t6021-merge-criss-cross.sh
index d15b313..213deec 100755
--- a/t/t6021-merge-criss-cross.sh
+++ b/t/t6021-merge-criss-cross.sh
@@ -48,7 +48,7 @@ echo "1
 " > file &&
 git commit -m "C3" file &&
 git branch C3 &&
-git merge "pre E3 merge" B A &&
+git merge -m "pre E3 merge" A &&
 echo "1
 2
 3 changed in E3, branch B. New file size
@@ -61,7 +61,7 @@ echo "1
 " > file &&
 git commit -m "E3" file &&
 git checkout A &&
-git merge "pre D8 merge" A C3 &&
+git merge -m "pre D8 merge" C3 &&
 echo "1
 2
 3 changed in C3, branch B
@@ -73,7 +73,7 @@ echo "1
 9" > file &&
 git commit -m D8 file'
 
-test_expect_success 'Criss-cross merge' 'git merge "final merge" A B'
+test_expect_success 'Criss-cross merge' 'git merge -m "final merge" B'
 
 cat > file-expect 

[PATCH 10/14] merge: extract prepare_merge_message() logic out

2015-04-25 Thread Junio C Hamano
Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index d853c9d..a972ed6 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1076,6 +1076,20 @@ static struct commit_list *reduce_parents(struct commit 
*head_commit,
return remoteheads;
 }
 
+static void prepare_merge_message(struct strbuf *merge_names, struct strbuf 
*merge_msg)
+{
+   struct fmt_merge_msg_opts opts;
+
+   memset(&opts, 0, sizeof(opts));
+   opts.add_title = !have_message;
+   opts.shortlog_len = shortlog_len;
+   opts.credit_people = (0 < option_edit);
+
+   fmt_merge_msg(merge_names, merge_msg, &opts);
+   if (merge_msg->len)
+   strbuf_setlen(merge_msg, merge_msg->len - 1);
+}
+
 static struct commit_list *collect_parents(struct commit *head_commit,
   int *head_subsumed,
   int argc, const char **argv)
@@ -1248,20 +1262,10 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
 
if (!have_message || shortlog_len) {
struct strbuf merge_names = STRBUF_INIT;
-   struct fmt_merge_msg_opts opts;
 
for (p = remoteheads; p; p = p->next)
merge_name(merge_remote_util(p->item)->name, 
&merge_names);
-
-   memset(&opts, 0, sizeof(opts));
-   opts.add_title = !have_message;
-   opts.shortlog_len = shortlog_len;
-   opts.credit_people = (0 < option_edit);
-
-   fmt_merge_msg(&merge_names, &merge_msg, &opts);
-   if (merge_msg.len)
-   strbuf_setlen(&merge_msg, merge_msg.len - 1);
-
+   prepare_merge_message(&merge_names, &merge_msg);
strbuf_release(&merge_names);
}
}
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/14] merge: make collect_parents() auto-generate the merge message

2015-04-25 Thread Junio C Hamano
Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 35 +--
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index a972ed6..84ebb22 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1092,7 +1092,8 @@ static void prepare_merge_message(struct strbuf 
*merge_names, struct strbuf *mer
 
 static struct commit_list *collect_parents(struct commit *head_commit,
   int *head_subsumed,
-  int argc, const char **argv)
+  int argc, const char **argv,
+  struct strbuf *merge_msg)
 {
int i;
struct commit_list *remoteheads = NULL;
@@ -1108,7 +1109,19 @@ static struct commit_list *collect_parents(struct commit 
*head_commit,
remotes = &commit_list_insert(commit, remotes)->next;
}
 
-   return reduce_parents(head_commit, head_subsumed, remoteheads);
+   remoteheads = reduce_parents(head_commit, head_subsumed, remoteheads);
+
+   if (merge_msg &&
+   (!have_message || shortlog_len)) {
+   struct strbuf merge_names = STRBUF_INIT;
+
+   for (p = remoteheads; p; p = p->next)
+   merge_name(merge_remote_util(p->item)->name, 
&merge_names);
+   prepare_merge_message(&merge_names, merge_msg);
+   strbuf_release(&merge_names);
+   }
+
+   return remoteheads;
 }
 
 int cmd_merge(int argc, const char **argv, const char *prefix)
@@ -1222,7 +1235,8 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
if (fast_forward == FF_NO)
die(_("Non-fast-forward commit does not make sense into 
"
"an empty head"));
-   remoteheads = collect_parents(head_commit, &head_subsumed, 
argc, argv);
+   remoteheads = collect_parents(head_commit, &head_subsumed,
+ argc, argv, NULL);
remote_head = remoteheads->item;
if (!remote_head)
die(_("%s - not something we can merge"), argv[0]);
@@ -1248,7 +1262,8 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
head_arg = argv[1];
argv += 2;
argc -= 2;
-   remoteheads = collect_parents(head_commit, &head_subsumed, 
argc, argv);
+   remoteheads = collect_parents(head_commit, &head_subsumed,
+ argc, argv, NULL);
} else {
/* We are invoked directly as the first-class UI. */
head_arg = "HEAD";
@@ -1258,16 +1273,8 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
 * the standard merge summary message to be appended
 * to the given message.
 */
-   remoteheads = collect_parents(head_commit, &head_subsumed, 
argc, argv);
-
-   if (!have_message || shortlog_len) {
-   struct strbuf merge_names = STRBUF_INIT;
-
-   for (p = remoteheads; p; p = p->next)
-   merge_name(merge_remote_util(p->item)->name, 
&merge_names);
-   prepare_merge_message(&merge_names, &merge_msg);
-   strbuf_release(&merge_names);
-   }
+   remoteheads = collect_parents(head_commit, &head_subsumed,
+ argc, argv, &merge_msg);
}
 
if (!head_commit || !argc)
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/14] merge: do not check argc to determine number of remote heads

2015-04-25 Thread Junio C Hamano
To reject merging multiple commits into an unborn branch, we check
argc, thinking that collect_parents() that reads the remaining
command line arguments from  will give us the same
number of commits as its input, i.e. argc.

Because what we really care about is the number of commits, let the
function run and then make sure it returns only one commit instead.

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 878f82a..1d4fbd3 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1185,9 +1185,6 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
 * to forbid "git merge" into a branch yet to be born.
 * We do the same for "git pull".
 */
-   if (argc != 1)
-   die(_("Can merge only exactly one commit into "
-   "empty head"));
if (squash)
die(_("Squash commit into empty head not supported 
yet"));
if (fast_forward == FF_NO)
@@ -1197,6 +1194,8 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
remote_head = remoteheads->item;
if (!remote_head)
die(_("%s - not something we can merge"), argv[0]);
+   if (remoteheads->next)
+   die(_("Can merge only exactly one commit into empty 
head"));
read_empty(remote_head->object.sha1, 0);
update_ref("initial pull", "HEAD", remote_head->object.sha1,
   NULL, 0, UPDATE_REFS_DIE_ON_ERR);
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/14] merge: clarify "pulling into void" special case

2015-04-25 Thread Junio C Hamano
Instead of having it as one of the three if/elseif/.. case arms,
test the condition and handle this special case upfront.  This makes
it easier to follow the flow of logic.

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 35 ++-
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 8477878..878f82a 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1178,23 +1178,7 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
usage_with_options(builtin_merge_usage,
builtin_merge_options);
 
-   /*
-* This could be traditional "merge  HEAD ..."  and
-* the way we can tell it is to see if the second token is HEAD,
-* but some people might have misused the interface and used a
-* commit-ish that is the same as HEAD there instead.
-* Traditional format never would have "-m" so it is an
-* additional safety measure to check for it.
-*/
-
-   if (!have_message && head_commit &&
-   is_old_style_invocation(argc, argv, head_commit->object.sha1)) {
-   strbuf_addstr(&merge_msg, argv[0]);
-   head_arg = argv[1];
-   argv += 2;
-   argc -= 2;
-   remoteheads = collect_parents(head_commit, &head_subsumed, 
argc, argv);
-   } else if (!head_commit) {
+   if (!head_commit) {
struct commit *remote_head;
/*
 * If the merged head is a valid one there is no reason
@@ -1217,6 +1201,23 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
update_ref("initial pull", "HEAD", remote_head->object.sha1,
   NULL, 0, UPDATE_REFS_DIE_ON_ERR);
goto done;
+   }
+
+   /*
+* This could be traditional "merge  HEAD ..."  and
+* the way we can tell it is to see if the second token is HEAD,
+* but some people might have misused the interface and used a
+* commit-ish that is the same as HEAD there instead.
+* Traditional format never would have "-m" so it is an
+* additional safety measure to check for it.
+*/
+   if (!have_message &&
+   is_old_style_invocation(argc, argv, head_commit->object.sha1)) {
+   strbuf_addstr(&merge_msg, argv[0]);
+   head_arg = argv[1];
+   argv += 2;
+   argc -= 2;
+   remoteheads = collect_parents(head_commit, &head_subsumed, 
argc, argv);
} else {
struct strbuf merge_names = STRBUF_INIT;
 
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/14] merge: clarify collect_parents() logic

2015-04-25 Thread Junio C Hamano
Clarify this small function in three ways.

 - The function initially collects all commits to be merged into a
   commit_list "remoteheads"; the "remotes" pointer always points at
   the tail of this list (either the remoteheads variable itself, or
   the ->next slot of the element at the end of the list) to help
   elongate the list by repeated calls to commit_list_insert().
   Because the new element appended by commit_list_insert() will
   always have its ->next slot NULLed out, there is no need for us
   to assign NULL to *remotes to terminate the list at the end.

 - The variable "head_subsumed" always confused me every time I read
   this code.  What is happening here is that we inspect what the
   caller told us to merge (including the current HEAD) and come up
   with the list of parents to be recorded for the resulting merge
   commit, omitting commits that are ancestor of other commits.
   This filtering may remove the current HEAD from the resulting
   parent list---and we signal that fact with this variable, so that
   we can later record it as the first parent when "--no-ff" is in
   effect.

 - The "parents" list is created for this function by reduce_heads()
   and was not deallocated after its use, even though the loop
   control was written in such a way to allow us to do so by taking
   the "next" element in a separate variable so that it can be used
   in the next-step part of the loop control.

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index b2d0332..d2e36e2 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1061,11 +1061,19 @@ static struct commit_list *collect_parents(struct 
commit *head_commit,
 "not something we can merge");
remotes = &commit_list_insert(commit, remotes)->next;
}
-   *remotes = NULL;
 
+   /*
+* Is the current HEAD reachable from another commit being
+* merged?  If so we do not want to record it as a parent of
+* the resulting merge, unless --no-ff is given.  We will flip
+* this variable to 0 when we find HEAD among the independent
+* tips being merged.
+*/
+   *head_subsumed = 1;
+
+   /* Find what parents to record by checking independent ones. */
parents = reduce_heads(remoteheads);
 
-   *head_subsumed = 1; /* we will flip this to 0 when we find it */
for (remoteheads = NULL, remotes = &remoteheads;
 parents;
 parents = next) {
@@ -1075,6 +1083,7 @@ static struct commit_list *collect_parents(struct commit 
*head_commit,
*head_subsumed = 0;
else
remotes = &commit_list_insert(commit, remotes)->next;
+   free(parents);
}
return remoteheads;
 }
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/14] merge: split reduce_parents() out of collect_parents()

2015-04-25 Thread Junio C Hamano
The latter does two separate things:

 - Parse the list of commits on the command line, and formulate the
   list of commits to be merged (including the current HEAD);

 - Compute the list of parents to be recorded in the resulting merge
   commit.

Split the latter into a separate helper function, so that we can
later supply the list commits to be merged from a different source
(namely, FETCH_HEAD).

Signed-off-by: Junio C Hamano 
---
 builtin/merge.c | 41 +
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index d2e36e2..054f052 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1044,23 +1044,11 @@ static int default_edit_option(void)
st_stdin.st_mode == st_stdout.st_mode);
 }
 
-static struct commit_list *collect_parents(struct commit *head_commit,
-  int *head_subsumed,
-  int argc, const char **argv)
+static struct commit_list *reduce_parents(struct commit *head_commit,
+ int *head_subsumed,
+ struct commit_list *remoteheads)
 {
-   int i;
-   struct commit_list *remoteheads = NULL, *parents, *next;
-   struct commit_list **remotes = &remoteheads;
-
-   if (head_commit)
-   remotes = &commit_list_insert(head_commit, remotes)->next;
-   for (i = 0; i < argc; i++) {
-   struct commit *commit = get_merge_parent(argv[i]);
-   if (!commit)
-   help_unknown_ref(argv[i], "merge",
-"not something we can merge");
-   remotes = &commit_list_insert(commit, remotes)->next;
-   }
+   struct commit_list *parents, *next, **remotes = &remoteheads;
 
/*
 * Is the current HEAD reachable from another commit being
@@ -1088,6 +1076,27 @@ static struct commit_list *collect_parents(struct commit 
*head_commit,
return remoteheads;
 }
 
+static struct commit_list *collect_parents(struct commit *head_commit,
+  int *head_subsumed,
+  int argc, const char **argv)
+{
+   int i;
+   struct commit_list *remoteheads = NULL;
+   struct commit_list **remotes = &remoteheads;
+
+   if (head_commit)
+   remotes = &commit_list_insert(head_commit, remotes)->next;
+   for (i = 0; i < argc; i++) {
+   struct commit *commit = get_merge_parent(argv[i]);
+   if (!commit)
+   help_unknown_ref(argv[i], "merge",
+"not something we can merge");
+   remotes = &commit_list_insert(commit, remotes)->next;
+   }
+
+   return reduce_parents(head_commit, head_subsumed, remoteheads);
+}
+
 int cmd_merge(int argc, const char **argv, const char *prefix)
 {
unsigned char result_tree[20];
-- 
2.4.0-rc3-238-g36f5934

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/5] setup: add gentle version of read_gitfile

2015-04-25 Thread Erik Elfström
read_gitfile will die on most error cases. This makes it unsuitable
for speculative calls. Extract the core logic and provide a gentle
version that returns NULL on failure.

The first usecase of the new gentle version will be to probe for
submodules during git clean.

Helped-by: Junio C Hamano 
Helped-by: Jeff King 
Signed-off-by: Erik Elfström 
---
 cache.h | 11 -
 setup.c | 82 +++--
 2 files changed, 75 insertions(+), 18 deletions(-)

diff --git a/cache.h b/cache.h
index 3d3244b..868e4d3 100644
--- a/cache.h
+++ b/cache.h
@@ -431,7 +431,16 @@ extern int set_git_dir(const char *path);
 extern const char *get_git_namespace(void);
 extern const char *strip_namespace(const char *namespaced_ref);
 extern const char *get_git_work_tree(void);
-extern const char *read_gitfile(const char *path);
+
+#define READ_GITFILE_ERR_STAT_FAILED 1
+#define READ_GITFILE_ERR_NOT_A_FILE 2
+#define READ_GITFILE_ERR_OPEN_FAILED 3
+#define READ_GITFILE_ERR_READ_FAILED 4
+#define READ_GITFILE_ERR_INVALID_FORMAT 5
+#define READ_GITFILE_ERR_NO_PATH 6
+#define READ_GITFILE_ERR_NOT_A_REPO 7
+extern const char *read_gitfile_gently(const char *path, int 
*return_error_code);
+#define read_gitfile(path) read_gitfile_gently((path), NULL)
 extern const char *resolve_gitdir(const char *suspect);
 extern void set_git_work_tree(const char *tree);
 
diff --git a/setup.c b/setup.c
index 979b13f..c4538ca 100644
--- a/setup.c
+++ b/setup.c
@@ -335,35 +335,53 @@ static int check_repository_format_gently(const char 
*gitdir, int *nongit_ok)
 /*
  * Try to read the location of the git directory from the .git file,
  * return path to git directory if found.
+ *
+ * On failure, if return_error_code is not NULL, return_error_code
+ * will be set to an error code and NULL will be returned. If
+ * return_error_code is NULL the function will die instead (for most
+ * cases).
  */
-const char *read_gitfile(const char *path)
+const char *read_gitfile_gently(const char *path, int *return_error_code)
 {
-   char *buf;
-   char *dir;
+   int error_code = 0;
+   char *buf = NULL;
+   char *dir = NULL;
const char *slash;
struct stat st;
int fd;
ssize_t len;
 
-   if (stat(path, &st))
-   return NULL;
-   if (!S_ISREG(st.st_mode))
-   return NULL;
+   if (stat(path, &st)) {
+   error_code = READ_GITFILE_ERR_STAT_FAILED;
+   goto cleanup_return;
+   }
+   if (!S_ISREG(st.st_mode)) {
+   error_code = READ_GITFILE_ERR_NOT_A_FILE;
+   goto cleanup_return;
+   }
fd = open(path, O_RDONLY);
-   if (fd < 0)
-   die_errno("Error opening '%s'", path);
+   if (fd < 0) {
+   error_code = READ_GITFILE_ERR_OPEN_FAILED;
+   goto cleanup_return;
+   }
buf = xmalloc(st.st_size + 1);
len = read_in_full(fd, buf, st.st_size);
close(fd);
-   if (len != st.st_size)
-   die("Error reading %s", path);
+   if (len != st.st_size) {
+   error_code = READ_GITFILE_ERR_READ_FAILED;
+   goto cleanup_return;
+   }
buf[len] = '\0';
-   if (!starts_with(buf, "gitdir: "))
-   die("Invalid gitfile format: %s", path);
+   if (!starts_with(buf, "gitdir: ")) {
+   error_code = READ_GITFILE_ERR_INVALID_FORMAT;
+   goto cleanup_return;
+   }
while (buf[len - 1] == '\n' || buf[len - 1] == '\r')
len--;
-   if (len < 9)
-   die("No path in gitfile: %s", path);
+   if (len < 9) {
+   error_code = READ_GITFILE_ERR_NO_PATH;
+   goto cleanup_return;
+   }
buf[len] = '\0';
dir = buf + 8;
 
@@ -378,11 +396,41 @@ const char *read_gitfile(const char *path)
buf = dir;
}
 
-   if (!is_git_directory(dir))
-   die("Not a git repository: %s", dir);
+   if (!is_git_directory(dir)) {
+   error_code = READ_GITFILE_ERR_NOT_A_REPO;
+   goto cleanup_return;
+   }
path = real_path(dir);
 
+cleanup_return:
free(buf);
+
+   if (return_error_code)
+   *return_error_code = error_code;
+
+   if (error_code) {
+   if (return_error_code)
+   return NULL;
+
+   switch (error_code) {
+   case READ_GITFILE_ERR_STAT_FAILED:
+   case READ_GITFILE_ERR_NOT_A_FILE:
+   return NULL;
+   case READ_GITFILE_ERR_OPEN_FAILED:
+   die_errno("Error opening '%s'", path);
+   case READ_GITFILE_ERR_READ_FAILED:
+   die("Error reading %s", path);
+   case READ_GITFILE_ERR_INVALID_FORMAT:
+   die("Invalid gitfile format: %s", path);
+   case READ_GITFILE_ERR_NO_PATH:
+

[PATCH v5 0/5] Improving performance of git clean

2015-04-25 Thread Erik Elfström
Changes in v5:
* Added defines for read_gitfile_gently error codes. 
  This was a silly mistake, sorry about that.

Erik Elfström (5):
  setup: add gentle version of read_gitfile
  setup: sanity check file size in read_gitfile_gently
  t7300: add tests to document behavior of clean and nested git
  p7300: add performance tests for clean
  clean: improve performance when removing lots of directories

 builtin/clean.c   |  26 +--
 cache.h   |  12 -
 setup.c   |  88 ---
 t/perf/p7300-clean.sh |  31 +
 t/t7300-clean.sh  | 126 ++
 5 files changed, 261 insertions(+), 22 deletions(-)
 create mode 100755 t/perf/p7300-clean.sh

-- 
2.4.0.rc3.8.gbb31afb

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/5] setup: sanity check file size in read_gitfile_gently

2015-04-25 Thread Erik Elfström
read_gitfile_gently will allocate a buffer to fit the entire file that
should be read. Add a sanity check of the file size before opening to
avoid allocating a potentially huge amount of memory if we come across
a large file that someone happened to name ".git". The limit is set to
a sufficiently unreasonable size that should never be exceeded by a
genuine .git file.

Signed-off-by: Erik Elfström 
---
 cache.h | 1 +
 setup.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/cache.h b/cache.h
index 868e4d3..c9f1f8e 100644
--- a/cache.h
+++ b/cache.h
@@ -439,6 +439,7 @@ extern const char *get_git_work_tree(void);
 #define READ_GITFILE_ERR_INVALID_FORMAT 5
 #define READ_GITFILE_ERR_NO_PATH 6
 #define READ_GITFILE_ERR_NOT_A_REPO 7
+#define READ_GITFILE_ERR_TOO_LARGE 8
 extern const char *read_gitfile_gently(const char *path, int 
*return_error_code);
 #define read_gitfile(path) read_gitfile_gently((path), NULL)
 extern const char *resolve_gitdir(const char *suspect);
diff --git a/setup.c b/setup.c
index c4538ca..792c37b 100644
--- a/setup.c
+++ b/setup.c
@@ -364,6 +364,10 @@ const char *read_gitfile_gently(const char *path, int 
*return_error_code)
error_code = READ_GITFILE_ERR_OPEN_FAILED;
goto cleanup_return;
}
+   if (st.st_size > PATH_MAX * 4) {
+   error_code = READ_GITFILE_ERR_TOO_LARGE;
+   goto cleanup_return;
+   }
buf = xmalloc(st.st_size + 1);
len = read_in_full(fd, buf, st.st_size);
close(fd);
@@ -418,6 +422,8 @@ cleanup_return:
return NULL;
case READ_GITFILE_ERR_OPEN_FAILED:
die_errno("Error opening '%s'", path);
+   case READ_GITFILE_ERR_TOO_LARGE:
+   die("Too large to be a .git file: '%s'", path);
case READ_GITFILE_ERR_READ_FAILED:
die("Error reading %s", path);
case READ_GITFILE_ERR_INVALID_FORMAT:
-- 
2.4.0.rc3.8.gbb31afb

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 4/5] p7300: add performance tests for clean

2015-04-25 Thread Erik Elfström
The tests are run in dry-run mode to avoid having to restore the test
directories for each timed iteration. Using dry-run is an acceptable
compromise since we are mostly interested in the initial computation
of what to clean and not so much in the cleaning it self.

Signed-off-by: Erik Elfström 
---
 t/perf/p7300-clean.sh | 31 +++
 1 file changed, 31 insertions(+)
 create mode 100755 t/perf/p7300-clean.sh

diff --git a/t/perf/p7300-clean.sh b/t/perf/p7300-clean.sh
new file mode 100755
index 000..6ae55ec
--- /dev/null
+++ b/t/perf/p7300-clean.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+test_description="Test git-clean performance"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+test_expect_success 'setup untracked directory with many sub dirs' '
+   rm -rf 500_sub_dirs 10_sub_dirs clean_test_dir &&
+   mkdir 500_sub_dirs 10_sub_dirs clean_test_dir &&
+   for i in $(test_seq 1 500)
+   do
+   mkdir 500_sub_dirs/dir$i || return $?
+   done &&
+   for i in $(test_seq 1 200)
+   do
+   cp -r 500_sub_dirs 10_sub_dirs/dir$i || return $?
+   done
+'
+
+test_perf 'clean many untracked sub dirs, check for nested git' '
+   git clean -n -q -f -d 10_sub_dirs/
+'
+
+test_perf 'clean many untracked sub dirs, ignore nested git' '
+   git clean -n -q -f -f -d 10_sub_dirs/
+'
+
+test_done
-- 
2.4.0.rc3.8.gbb31afb

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/5] t7300: add tests to document behavior of clean and nested git

2015-04-25 Thread Erik Elfström
Signed-off-by: Erik Elfström 
---
 t/t7300-clean.sh | 128 +++
 1 file changed, 128 insertions(+)

diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 99be5d9..11f3a6d 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -455,6 +455,134 @@ test_expect_success 'nested git work tree' '
! test -d bar
 '
 
+test_expect_failure 'should clean things that almost look like git but are 
not' '
+   rm -fr almost_git almost_bare_git almost_submodule &&
+   mkdir -p almost_git/.git/objects &&
+   mkdir -p almost_git/.git/refs &&
+   cat >almost_git/.git/HEAD <<-\EOF &&
+   garbage
+   EOF
+   cp -r almost_git/.git/ almost_bare_git &&
+   mkdir almost_submodule/ &&
+   cat >almost_submodule/.git <<-\EOF &&
+   garbage
+   EOF
+   test_when_finished "rm -rf almost_*" &&
+   ## This will fail due to die("Invalid gitfile format: %s", path); in
+   ## setup.c:read_gitfile.
+   git clean -f -d &&
+   test_path_is_missing almost_git &&
+   test_path_is_missing almost_bare_git &&
+   test_path_is_missing almost_submodule
+'
+
+test_expect_success 'should not clean submodules' '
+   rm -fr repo to_clean sub1 sub2 &&
+   mkdir repo to_clean &&
+   (
+   cd repo &&
+   git init &&
+   test_commit msg hello.world
+   ) &&
+   git submodule add ./repo/.git sub1 &&
+   git commit -m "sub1" &&
+   git branch before_sub2 &&
+   git submodule add ./repo/.git sub2 &&
+   git commit -m "sub2" &&
+   git checkout before_sub2 &&
+   >to_clean/should_clean.this &&
+   git clean -f -d &&
+   test_path_is_file repo/.git/index &&
+   test_path_is_file repo/hello.world &&
+   test_path_is_file sub1/.git &&
+   test_path_is_file sub1/hello.world &&
+   test_path_is_file sub2/.git &&
+   test_path_is_file sub2/hello.world &&
+   test_path_is_missing to_clean
+'
+
+test_expect_failure 'nested (empty) git should be kept' '
+   rm -fr empty_repo to_clean &&
+   git init empty_repo &&
+   mkdir to_clean &&
+   >to_clean/should_clean.this &&
+   git clean -f -d &&
+   test_path_is_file empty_repo/.git/HEAD &&
+   test_path_is_missing to_clean
+'
+
+test_expect_success 'nested bare repositories should be cleaned' '
+   rm -fr bare1 bare2 subdir &&
+   git init --bare bare1 &&
+   git clone --local --bare . bare2 &&
+   mkdir subdir &&
+   cp -r bare2 subdir/bare3 &&
+   git clean -f -d &&
+   test_path_is_missing bare1 &&
+   test_path_is_missing bare2 &&
+   test_path_is_missing subdir
+'
+
+test_expect_success 'nested (empty) bare repositories should be cleaned even 
when in .git' '
+   rm -fr strange_bare &&
+   mkdir strange_bare &&
+   git init --bare strange_bare/.git &&
+   git clean -f -d &&
+   test_path_is_missing strange_bare
+'
+
+test_expect_failure 'nested (non-empty) bare repositories should be cleaned 
even when in .git' '
+   rm -fr strange_bare &&
+   mkdir strange_bare &&
+   git clone --local --bare . strange_bare/.git &&
+   git clean -f -d &&
+   test_path_is_missing strange_bare
+'
+
+test_expect_success 'giving path in nested git work tree will remove it' '
+   rm -fr repo &&
+   mkdir repo &&
+   (
+   cd repo &&
+   git init &&
+   mkdir -p bar/baz &&
+   test_commit msg bar/baz/hello.world
+   ) &&
+   git clean -f -d repo/bar/baz &&
+   test_path_is_file repo/.git/HEAD &&
+   test_path_is_dir repo/bar/ &&
+   test_path_is_missing repo/bar/baz
+'
+
+test_expect_success 'giving path to nested .git will not remove it' '
+   rm -fr repo &&
+   mkdir repo untracked &&
+   (
+   cd repo &&
+   git init &&
+   test_commit msg hello.world
+   ) &&
+   git clean -f -d repo/.git &&
+   test_path_is_file repo/.git/HEAD &&
+   test_path_is_dir repo/.git/refs &&
+   test_path_is_dir repo/.git/objects &&
+   test_path_is_dir untracked/
+'
+
+test_expect_success 'giving path to nested .git/ will remove contents' '
+   rm -fr repo untracked &&
+   mkdir repo untracked &&
+   (
+   cd repo &&
+   git init &&
+   test_commit msg hello.world
+   ) &&
+   git clean -f -d repo/.git/ &&
+   test_path_is_dir repo/.git &&
+   test_dir_is_empty repo/.git &&
+   test_path_is_dir untracked/
+'
+
 test_expect_success 'force removal of nested git work tree' '
rm -fr foo bar baz &&
mkdir -p foo bar baz/boo &&
-- 
2.4.0.rc3.8.gbb31afb

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 5/5] clean: improve performance when removing lots of directories

2015-04-25 Thread Erik Elfström
"git clean" uses resolve_gitlink_ref() to check for the presence of
nested git repositories, but it has the drawback of creating a
ref_cache entry for every directory that should potentially be
cleaned. The linear search through the ref_cache list causes a massive
performance hit for large number of directories.

Modify clean.c:remove_dirs to use setup.c:is_git_directory and
setup.c:read_gitfile_gently instead.

Both these functions will open files and parse contents when they find
something that looks like a git repository. This is ok from a
performance standpoint since finding repository candidates should be
comparatively rare.

Using is_git_directory and read_gitfile_gently should give a more
standardized check for what is and what isn't a git repository but
also gives three behavioral changes.

The first change is that we will now detect and avoid cleaning empty
nested git repositories (only init run). This is desirable.

Second, we will no longer die when cleaning a file named ".git" with
garbage content (it will be cleaned instead). This is also desirable.

The last change is that we will detect and avoid cleaning empty bare
repositories that have been placed in a directory named ".git". This
is not desirable but should have no real user impact since we already
fail to clean non-empty bare repositories in the same scenario. This
is thus deemed acceptable.

Update t7300 to reflect these changes in behavior.

The time to clean an untracked directory containing 10 sub
directories went from 61s to 1.7s after this change.

Helped-by: Jeff King 
Signed-off-by: Erik Elfström 
---
 builtin/clean.c  | 26 ++
 t/t7300-clean.sh |  8 +++-
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/builtin/clean.c b/builtin/clean.c
index 98c103f..17a1c13 100644
--- a/builtin/clean.c
+++ b/builtin/clean.c
@@ -10,7 +10,6 @@
 #include "cache.h"
 #include "dir.h"
 #include "parse-options.h"
-#include "refs.h"
 #include "string-list.h"
 #include "quote.h"
 #include "column.h"
@@ -148,6 +147,27 @@ static int exclude_cb(const struct option *opt, const char 
*arg, int unset)
return 0;
 }
 
+/*
+ * Return 1 if the given path is the root of a git repository or
+ * submodule else 0. Will not return 1 for bare repositories with the
+ * exception of creating a bare repository in "foo/.git" and calling
+ * is_git_repository("foo").
+ */
+static int is_git_repository(struct strbuf *path)
+{
+   int ret = 0;
+   int unused_error_code;
+   size_t orig_path_len = path->len;
+   assert(orig_path_len != 0);
+   if (path->buf[orig_path_len - 1] != '/')
+   strbuf_addch(path, '/');
+   strbuf_addstr(path, ".git");
+   if (read_gitfile_gently(path->buf, &unused_error_code) || 
is_git_directory(path->buf))
+   ret = 1;
+   strbuf_setlen(path, orig_path_len);
+   return ret;
+}
+
 static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
int dry_run, int quiet, int *dir_gone)
 {
@@ -155,13 +175,11 @@ static int remove_dirs(struct strbuf *path, const char 
*prefix, int force_flag,
struct strbuf quoted = STRBUF_INIT;
struct dirent *e;
int res = 0, ret = 0, gone = 1, original_len = path->len, len;
-   unsigned char submodule_head[20];
struct string_list dels = STRING_LIST_INIT_DUP;
 
*dir_gone = 1;
 
-   if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
-   !resolve_gitlink_ref(path->buf, "HEAD", 
submodule_head)) {
+   if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && 
is_git_repository(path)) {
if (!quiet) {
quote_path_relative(path->buf, prefix, "ed);
printf(dry_run ?  _(msg_would_skip_git_dir) : 
_(msg_skip_git_dir),
diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 11f3a6d..4d07a4a 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -455,7 +455,7 @@ test_expect_success 'nested git work tree' '
! test -d bar
 '
 
-test_expect_failure 'should clean things that almost look like git but are 
not' '
+test_expect_success 'should clean things that almost look like git but are 
not' '
rm -fr almost_git almost_bare_git almost_submodule &&
mkdir -p almost_git/.git/objects &&
mkdir -p almost_git/.git/refs &&
@@ -468,8 +468,6 @@ test_expect_failure 'should clean things that almost look 
like git but are not'
garbage
EOF
test_when_finished "rm -rf almost_*" &&
-   ## This will fail due to die("Invalid gitfile format: %s", path); in
-   ## setup.c:read_gitfile.
git clean -f -d &&
test_path_is_missing almost_git &&
test_path_is_missing almost_bare_git &&
@@ -501,7 +499,7 @@ test_expect_success 'should not clean submodules' '
test_path_is_missing to_clean
 '
 
-test_expect_failure 'nested (empty) git should be kept' '
+test_expect_success 'nested (empty) git should be kept' '