[PATCH 15/24] read_raw_ref(): manage own scratch space

2016-04-07 Thread David Turner
From: Michael Haggerty 

Instead of creating scratch space in resolve_ref_unsafe() and passing it
down through resolve_ref_1 to read_raw_ref(), teach read_raw_ref() to
manage its own scratch space. This reduces coupling across the functions
at the cost of some extra allocations. Also, when read_raw_ref() is
implemented for different reference backends, the other implementations
might have different scratch space requirements.

Note that we now preserve errno across the calls to strbuf_release(),
which calls free() and can thus theoretically overwrite errno.

Signed-off-by: Michael Haggerty 
---
 refs/files-backend.c | 76 
 1 file changed, 41 insertions(+), 35 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index d51e778..f752568 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1421,17 +1421,20 @@ static int resolve_missing_loose_ref(const char 
*refname,
  *   refname will still be valid and unchanged.
  */
 static int read_raw_ref(const char *refname, unsigned char *sha1,
-   struct strbuf *symref, struct strbuf *sb_path,
-   struct strbuf *sb_contents, int *flags)
+   struct strbuf *symref, int *flags)
 {
+   struct strbuf sb_contents = STRBUF_INIT;
+   struct strbuf sb_path = STRBUF_INIT;
const char *path;
const char *buf;
struct stat st;
int fd;
+   int ret = -1;
+   int save_errno;
 
-   strbuf_reset(sb_path);
-   strbuf_git_path(sb_path, "%s", refname);
-   path = sb_path->buf;
+   strbuf_reset(&sb_path);
+   strbuf_git_path(&sb_path, "%s", refname);
+   path = sb_path.buf;
 
 stat_ref:
/*
@@ -1446,36 +1449,38 @@ stat_ref:
 
if (lstat(path, &st) < 0) {
if (errno != ENOENT)
-   return -1;
+   goto out;
if (resolve_missing_loose_ref(refname, sha1, flags)) {
errno = ENOENT;
-   return -1;
+   goto out;
}
-   return 0;
+   ret = 0;
+   goto out;
}
 
/* Follow "normalized" - ie "refs/.." symlinks by hand */
if (S_ISLNK(st.st_mode)) {
-   strbuf_reset(sb_contents);
-   if (strbuf_readlink(sb_contents, path, 0) < 0) {
+   strbuf_reset(&sb_contents);
+   if (strbuf_readlink(&sb_contents, path, 0) < 0) {
if (errno == ENOENT || errno == EINVAL)
/* inconsistent with lstat; retry */
goto stat_ref;
else
-   return -1;
+   goto out;
}
-   if (starts_with(sb_contents->buf, "refs/") &&
-   !check_refname_format(sb_contents->buf, 0)) {
-   strbuf_swap(sb_contents, symref);
+   if (starts_with(sb_contents.buf, "refs/") &&
+   !check_refname_format(sb_contents.buf, 0)) {
+   strbuf_swap(&sb_contents, symref);
*flags |= REF_ISSYMREF;
-   return 0;
+   ret = 0;
+   goto out;
}
}
 
/* Is it a directory? */
if (S_ISDIR(st.st_mode)) {
errno = EISDIR;
-   return -1;
+   goto out;
}
 
/*
@@ -1488,18 +1493,18 @@ stat_ref:
/* inconsistent with lstat; retry */
goto stat_ref;
else
-   return -1;
+   goto out;
}
-   strbuf_reset(sb_contents);
-   if (strbuf_read(sb_contents, fd, 256) < 0) {
+   strbuf_reset(&sb_contents);
+   if (strbuf_read(&sb_contents, fd, 256) < 0) {
int save_errno = errno;
close(fd);
errno = save_errno;
-   return -1;
+   goto out;
}
close(fd);
-   strbuf_rtrim(sb_contents);
-   buf = sb_contents->buf;
+   strbuf_rtrim(&sb_contents);
+   buf = sb_contents.buf;
if (starts_with(buf, "ref:")) {
buf += 4;
while (isspace(*buf))
@@ -1508,7 +1513,8 @@ stat_ref:
strbuf_reset(symref);
strbuf_addstr(symref, buf);
*flags |= REF_ISSYMREF;
-   return 0;
+   ret = 0;
+   goto out;
}
 
/*
@@ -1519,10 +1525,17 @@ stat_ref:
(buf[40] != '\0' && !isspace(buf[40]))) {
*flags |= REF_ISBROKEN;
errno = EINVAL;
-   return -1;
+   goto out;
}
 
-   return 0;
+   ret = 0;
+
+out:
+   save_errno = errno;
+   strbuf_release(&sb_path);
+ 

[PATCH 02/24] refs: move for_each_*ref* functions into common code

2016-04-07 Thread David Turner
Make do_for_each_ref take a submodule as an argument instead of a
ref_cache.  Since all for_each_*ref* functions are defined in terms of
do_for_each_ref, we can then move them into the common code.

Later, we can simply make do_for_each_ref into a backend function.

Signed-off-by: David Turner 
Signed-off-by: Junio C Hamano 
---
 refs.c   | 52 +++
 refs/files-backend.c | 62 +---
 refs/refs-internal.h |  9 
 3 files changed, 66 insertions(+), 57 deletions(-)

diff --git a/refs.c b/refs.c
index 6b8c16c..f0feff7 100644
--- a/refs.c
+++ b/refs.c
@@ -1103,3 +1103,55 @@ int head_ref(each_ref_fn fn, void *cb_data)
 {
return head_ref_submodule(NULL, fn, cb_data);
 }
+
+int for_each_ref(each_ref_fn fn, void *cb_data)
+{
+   return do_for_each_ref(NULL, "", fn, 0, 0, cb_data);
+}
+
+int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void 
*cb_data)
+{
+   return do_for_each_ref(submodule, "", fn, 0, 0, cb_data);
+}
+
+int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data)
+{
+   return do_for_each_ref(NULL, prefix, fn, strlen(prefix), 0, cb_data);
+}
+
+int for_each_fullref_in(const char *prefix, each_ref_fn fn, void *cb_data, 
unsigned int broken)
+{
+   unsigned int flag = 0;
+
+   if (broken)
+   flag = DO_FOR_EACH_INCLUDE_BROKEN;
+   return do_for_each_ref(NULL, prefix, fn, 0, flag, cb_data);
+}
+
+int for_each_ref_in_submodule(const char *submodule, const char *prefix,
+   each_ref_fn fn, void *cb_data)
+{
+   return do_for_each_ref(submodule, prefix, fn, strlen(prefix), 0, 
cb_data);
+}
+
+int for_each_replace_ref(each_ref_fn fn, void *cb_data)
+{
+   return do_for_each_ref(NULL, git_replace_ref_base, fn,
+  strlen(git_replace_ref_base), 0, cb_data);
+}
+
+int for_each_namespaced_ref(each_ref_fn fn, void *cb_data)
+{
+   struct strbuf buf = STRBUF_INIT;
+   int ret;
+   strbuf_addf(&buf, "%srefs/", get_git_namespace());
+   ret = do_for_each_ref(NULL, buf.buf, fn, 0, 0, cb_data);
+   strbuf_release(&buf);
+   return ret;
+}
+
+int for_each_rawref(each_ref_fn fn, void *cb_data)
+{
+   return do_for_each_ref(NULL, "", fn, 0,
+  DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
+}
diff --git a/refs/files-backend.c b/refs/files-backend.c
index c07dc41..9676ec2 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -513,9 +513,6 @@ static void sort_ref_dir(struct ref_dir *dir)
dir->sorted = dir->nr = i;
 }
 
-/* Include broken references in a do_for_each_ref*() iteration: */
-#define DO_FOR_EACH_INCLUDE_BROKEN 0x01
-
 /*
  * Return true iff the reference described by entry can be resolved to
  * an object in the database.  Emit a warning if the referred-to
@@ -1727,10 +1724,13 @@ static int do_for_each_entry(struct ref_cache *refs, 
const char *base,
  * value, stop the iteration and return that value; otherwise, return
  * 0.
  */
-static int do_for_each_ref(struct ref_cache *refs, const char *base,
-  each_ref_fn fn, int trim, int flags, void *cb_data)
+int do_for_each_ref(const char *submodule, const char *base,
+   each_ref_fn fn, int trim, int flags, void *cb_data)
 {
struct ref_entry_cb data;
+   struct ref_cache *refs;
+
+   refs = get_ref_cache(submodule);
data.base = base;
data.trim = trim;
data.flags = flags;
@@ -1745,58 +1745,6 @@ static int do_for_each_ref(struct ref_cache *refs, const 
char *base,
return do_for_each_entry(refs, base, do_one_ref, &data);
 }
 
-int for_each_ref(each_ref_fn fn, void *cb_data)
-{
-   return do_for_each_ref(&ref_cache, "", fn, 0, 0, cb_data);
-}
-
-int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void 
*cb_data)
-{
-   return do_for_each_ref(get_ref_cache(submodule), "", fn, 0, 0, cb_data);
-}
-
-int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data)
-{
-   return do_for_each_ref(&ref_cache, prefix, fn, strlen(prefix), 0, 
cb_data);
-}
-
-int for_each_fullref_in(const char *prefix, each_ref_fn fn, void *cb_data, 
unsigned int broken)
-{
-   unsigned int flag = 0;
-
-   if (broken)
-   flag = DO_FOR_EACH_INCLUDE_BROKEN;
-   return do_for_each_ref(&ref_cache, prefix, fn, 0, flag, cb_data);
-}
-
-int for_each_ref_in_submodule(const char *submodule, const char *prefix,
-   each_ref_fn fn, void *cb_data)
-{
-   return do_for_each_ref(get_ref_cache(submodule), prefix, fn, 
strlen(prefix), 0, cb_data);
-}
-
-int for_each_replace_ref(each_ref_fn fn, void *cb_data)
-{
-   return do_for_each_ref(&ref_cache, git_replace_ref_base, fn,
-  strlen(git_replace_ref_base), 0, cb_data);
-}
-
-int for

[PATCH 14/24] files-backend: break out ref reading

2016-04-07 Thread David Turner
Refactor resolve_ref_1 in terms of a new function read_raw_ref, which
is responsible for reading ref data from the ref storage.

Later, we will make read_raw_ref a pluggable backend function, and make
resolve_ref_unsafe common.

Signed-off-by: David Turner 
Helped-by: Duy Nguyen 
Signed-off-by: Junio C Hamano 
Signed-off-by: Michael Haggerty 
---
 refs/files-backend.c | 244 ++-
 1 file changed, 145 insertions(+), 99 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index b865ba5..d51e778 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1390,6 +1390,141 @@ static int resolve_missing_loose_ref(const char 
*refname,
return -1;
 }
 
+/*
+ * Read a raw ref from the filesystem or packed refs file.
+ *
+ * If the ref is a sha1, fill in sha1 and return 0.
+ *
+ * If the ref is symbolic, fill in *symref with the referrent
+ * (e.g. "refs/heads/master") and return 0.  The caller is responsible
+ * for validating the referrent.  Set REF_ISSYMREF in flags.
+ *
+ * If the ref doesn't exist, set errno to ENOENT and return -1.
+ *
+ * If the ref exists but is neither a symbolic ref nor a sha1, it is
+ * broken. Set REF_ISBROKEN in flags, set errno to EINVAL, and return
+ * -1.
+ *
+ * If there is another error reading the ref, set errno appropriately and
+ * return -1.
+ *
+ * Backend-specific flags might be set in flags as well, regardless of
+ * outcome.
+ *
+ * sb_path is workspace: the caller should allocate and free it.
+ *
+ * It is OK for refname to point into symref. In this case:
+ * - if the function succeeds with REF_ISSYMREF, symref will be
+ *   overwritten and the memory pointed to by refname might be changed
+ *   or even freed.
+ * - in all other cases, symref will be untouched, and therefore
+ *   refname will still be valid and unchanged.
+ */
+static int read_raw_ref(const char *refname, unsigned char *sha1,
+   struct strbuf *symref, struct strbuf *sb_path,
+   struct strbuf *sb_contents, int *flags)
+{
+   const char *path;
+   const char *buf;
+   struct stat st;
+   int fd;
+
+   strbuf_reset(sb_path);
+   strbuf_git_path(sb_path, "%s", refname);
+   path = sb_path->buf;
+
+stat_ref:
+   /*
+* We might have to loop back here to avoid a race
+* condition: first we lstat() the file, then we try
+* to read it as a link or as a file.  But if somebody
+* changes the type of the file (file <-> directory
+* <-> symlink) between the lstat() and reading, then
+* we don't want to report that as an error but rather
+* try again starting with the lstat().
+*/
+
+   if (lstat(path, &st) < 0) {
+   if (errno != ENOENT)
+   return -1;
+   if (resolve_missing_loose_ref(refname, sha1, flags)) {
+   errno = ENOENT;
+   return -1;
+   }
+   return 0;
+   }
+
+   /* Follow "normalized" - ie "refs/.." symlinks by hand */
+   if (S_ISLNK(st.st_mode)) {
+   strbuf_reset(sb_contents);
+   if (strbuf_readlink(sb_contents, path, 0) < 0) {
+   if (errno == ENOENT || errno == EINVAL)
+   /* inconsistent with lstat; retry */
+   goto stat_ref;
+   else
+   return -1;
+   }
+   if (starts_with(sb_contents->buf, "refs/") &&
+   !check_refname_format(sb_contents->buf, 0)) {
+   strbuf_swap(sb_contents, symref);
+   *flags |= REF_ISSYMREF;
+   return 0;
+   }
+   }
+
+   /* Is it a directory? */
+   if (S_ISDIR(st.st_mode)) {
+   errno = EISDIR;
+   return -1;
+   }
+
+   /*
+* Anything else, just open it and try to use it as
+* a ref
+*/
+   fd = open(path, O_RDONLY);
+   if (fd < 0) {
+   if (errno == ENOENT)
+   /* inconsistent with lstat; retry */
+   goto stat_ref;
+   else
+   return -1;
+   }
+   strbuf_reset(sb_contents);
+   if (strbuf_read(sb_contents, fd, 256) < 0) {
+   int save_errno = errno;
+   close(fd);
+   errno = save_errno;
+   return -1;
+   }
+   close(fd);
+   strbuf_rtrim(sb_contents);
+   buf = sb_contents->buf;
+   if (starts_with(buf, "ref:")) {
+   buf += 4;
+   while (isspace(*buf))
+   buf++;
+
+   strbuf_reset(symref);
+   strbuf_addstr(symref, buf);
+   *flags |

[PATCH 12/24] resolve_ref_1(): reorder code

2016-04-07 Thread David Turner
From: Michael Haggerty 

There is no need to adjust *flags if we're just about to fail.

Signed-off-by: Michael Haggerty 
---
 refs/files-backend.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 69ec903..60f1493 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1542,13 +1542,13 @@ static const char *resolve_ref_1(const char *refname,
return refname;
}
if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
-   *flags |= REF_ISBROKEN;
-
if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
!refname_is_safe(refname)) {
errno = EINVAL;
return NULL;
}
+
+   *flags |= REF_ISBROKEN;
bad_name = 1;
}
}
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/24] resolve_ref_1(): eliminate local variable "bad_name"

2016-04-07 Thread David Turner
From: Michael Haggerty 

We can use (*flags & REF_BAD_NAME) for that purpose.

Signed-off-by: Michael Haggerty 
---
 refs/files-backend.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 60f1493..b865ba5 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1399,19 +1399,17 @@ static const char *resolve_ref_1(const char *refname,
 struct strbuf *sb_path,
 struct strbuf *sb_contents)
 {
-   int bad_name = 0;
int symref_count;
 
*flags = 0;
 
if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
-   *flags |= REF_BAD_NAME;
-
if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
!refname_is_safe(refname)) {
errno = EINVAL;
return NULL;
}
+
/*
 * dwim_ref() uses REF_ISBROKEN to distinguish between
 * missing refs and refs that were present but invalid,
@@ -1420,7 +1418,7 @@ static const char *resolve_ref_1(const char *refname,
 * We don't know whether the ref exists, so don't set
 * REF_ISBROKEN yet.
 */
-   bad_name = 1;
+   *flags |= REF_BAD_NAME;
}
 
for (symref_count = 0; symref_count < MAXDEPTH; symref_count++) {
@@ -1452,7 +1450,7 @@ static const char *resolve_ref_1(const char *refname,
}
hashclr(sha1);
}
-   if (bad_name) {
+   if (*flags & REF_BAD_NAME) {
hashclr(sha1);
*flags |= REF_ISBROKEN;
}
@@ -1524,7 +1522,7 @@ static const char *resolve_ref_1(const char *refname,
errno = EINVAL;
return NULL;
}
-   if (bad_name) {
+   if (*flags & REF_BAD_NAME) {
hashclr(sha1);
*flags |= REF_ISBROKEN;
}
@@ -1548,8 +1546,7 @@ static const char *resolve_ref_1(const char *refname,
return NULL;
}
 
-   *flags |= REF_ISBROKEN;
-   bad_name = 1;
+   *flags |= REF_ISBROKEN | REF_BAD_NAME;
}
}
 
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/24] refs: move head_ref{,_submodule} to the common code

2016-04-07 Thread David Turner
These don't use any backend-specific functions.  These were previously
defined in terms of the do_head_ref helper function, but since they
are otherwise identical, we don't need that function.

Signed-off-by: David Turner 
Signed-off-by: Junio C Hamano 
---
 refs.c   | 23 +++
 refs/files-backend.c | 28 
 2 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/refs.c b/refs.c
index b0e6ece..6b8c16c 100644
--- a/refs.c
+++ b/refs.c
@@ -1080,3 +1080,26 @@ int rename_ref_available(const char *oldname, const char 
*newname)
strbuf_release(&err);
return ret;
 }
+
+int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
+{
+   struct object_id oid;
+   int flag;
+
+   if (submodule) {
+   if (resolve_gitlink_ref(submodule, "HEAD", oid.hash) == 0)
+   return fn("HEAD", &oid, 0, cb_data);
+
+   return 0;
+   }
+
+   if (!read_ref_full("HEAD", RESOLVE_REF_READING, oid.hash, &flag))
+   return fn("HEAD", &oid, flag, cb_data);
+
+   return 0;
+}
+
+int head_ref(each_ref_fn fn, void *cb_data)
+{
+   return head_ref_submodule(NULL, fn, cb_data);
+}
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 81f68f8..c07dc41 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1745,34 +1745,6 @@ static int do_for_each_ref(struct ref_cache *refs, const 
char *base,
return do_for_each_entry(refs, base, do_one_ref, &data);
 }
 
-static int do_head_ref(const char *submodule, each_ref_fn fn, void *cb_data)
-{
-   struct object_id oid;
-   int flag;
-
-   if (submodule) {
-   if (resolve_gitlink_ref(submodule, "HEAD", oid.hash) == 0)
-   return fn("HEAD", &oid, 0, cb_data);
-
-   return 0;
-   }
-
-   if (!read_ref_full("HEAD", RESOLVE_REF_READING, oid.hash, &flag))
-   return fn("HEAD", &oid, flag, cb_data);
-
-   return 0;
-}
-
-int head_ref(each_ref_fn fn, void *cb_data)
-{
-   return do_head_ref(NULL, fn, cb_data);
-}
-
-int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
-{
-   return do_head_ref(submodule, fn, cb_data);
-}
-
 int for_each_ref(each_ref_fn fn, void *cb_data)
 {
return do_for_each_ref(&ref_cache, "", fn, 0, 0, cb_data);
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: git segfaults on older Solaris releases

2016-04-07 Thread David Turner
On Thu, 2016-04-07 at 11:50 -0700, Junio C Hamano wrote:
> Junio C Hamano  writes:
> 
> > "Tom G. Christensen"  writes:
> > 
> > > The reason for the crash is simple, a null value was passed to
> > > the 's'
> > > format for the *printf family of functions.
> > > ...
> > > Passing a null value to the 's' format is explicitly documented
> > > as
> > > giving undefined results on Solaris, even on Solaris 11(2).
> > 
> > Do you mean
> > 
> > *printf("...%.*s...", ..., 0, NULL, ...)
> > 
> > i.e. you saw a NULL passed only when we use %.*s with width=0?
> 
> So, I've looked at places where we use "%.*s" with "prefix" nearby,
> and it seems that this is the only place.
> 
> The "prefix" being a NULL is a perfectly valid state throughout the
> system and means a different thing than it being an empty string, so
> it is valid for callers of prefix_path() and prefix_path_gently() to
> pass prefix=NULL as long as they pass len=0.

TIOLI: The code below does not reflect the "as long as they pass len=0"
condition.  Maybe add an assert for that?  


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 03/16] index-helper: new daemon for caching index and related stuff

2016-04-07 Thread David Turner
On Thu, 2016-04-07 at 16:14 +0200, Johannes Schindelin wrote:
> Hi,
> 
> On Thu, 7 Apr 2016, Johannes Sixt wrote:
> 
> > Am 07.04.2016 um 00:11 schrieb David Turner:
> > > +static void share_index(struct index_state *istate, struct shm
> > > *is)
> > > +{
> > > + void *new_mmap;
> > > + if (istate->mmap_size <= 20 ||
> > > + hashcmp(istate->sha1,
> > > + (unsigned char *)istate->mmap + istate
> > > ->mmap_size - 20)
> > > > > 
> > > + !hashcmp(istate->sha1, is->sha1) ||
> > > + git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, istate
> > > ->mmap_size,
> > > + &new_mmap, PROT_READ | PROT_WRITE,
> > > MAP_SHARED,
> > > + "git-index-%s", sha1_to_hex(istate
> > > ->sha1)) < 0)
> > 
> > Builds which have NO_MMAP set require that MAP_PRIVATE is set. So I
> > would
> > guess that at this point you leave those builds behind. Unless we
> > declare
> > such systems as hopelessly outdated and remove NO_MMAP and
> > compat/mmap.c or
> > you support index-helper only when NO_MMAP is not set.
> 
> I vote for the latter: support index-helper only when NO_MMAP is
> unset.

Will fix, thanks.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 05/16] daemonize(): set a flag before exiting the main process

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

This allows signal handlers and atexit functions to realize this
situation and not clean up.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/gc.c | 2 +-
 cache.h  | 2 +-
 daemon.c | 2 +-
 setup.c  | 4 +++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index c583aad..37180de 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -385,7 +385,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 * failure to daemonize is ok, we'll continue
 * in foreground
 */
-   daemonized = !daemonize();
+   daemonized = !daemonize(NULL);
}
} else
add_repack_all_option();
diff --git a/cache.h b/cache.h
index 4b678e9..0aeb994 100644
--- a/cache.h
+++ b/cache.h
@@ -530,7 +530,7 @@ extern int set_git_dir_init(const char *git_dir, const char 
*real_git_dir, int);
 extern int init_db(const char *template_dir, unsigned int flags);
 
 extern void sanitize_stdfds(void);
-extern int daemonize(void);
+extern int daemonize(int *);
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
diff --git a/daemon.c b/daemon.c
index 8d45c33..a5cf954 100644
--- a/daemon.c
+++ b/daemon.c
@@ -1365,7 +1365,7 @@ int main(int argc, char **argv)
return execute();
 
if (detach) {
-   if (daemonize())
+   if (daemonize(NULL))
die("--detach not supported on this platform");
} else
sanitize_stdfds();
diff --git a/setup.c b/setup.c
index de1a2a7..9adf13f 100644
--- a/setup.c
+++ b/setup.c
@@ -1017,7 +1017,7 @@ void sanitize_stdfds(void)
close(fd);
 }
 
-int daemonize(void)
+int daemonize(int *daemonized)
 {
 #ifdef NO_POSIX_GOODIES
errno = ENOSYS;
@@ -1029,6 +1029,8 @@ int daemonize(void)
case -1:
die_errno("fork failed");
default:
+   if (daemonized)
+   *daemonized = 1;
exit(0);
}
if (setsid() == -1)
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 16/16] read-cache: config for waiting for index-helper

2016-04-06 Thread David Turner
When we poke index-helper, and index-helper is using watchman, we want
to wait for a response (showing that the watchman extension shm has
been prepared).  If it's not using watchman, we don't.

So add a new config, core.waitforindexhelper, with sensible defaults, to
configure this behavior.

Signed-off-by: David Turner 
---
 Documentation/config.txt | 6 ++
 cache.h  | 1 +
 config.c | 5 +
 environment.c| 5 +
 read-cache.c | 5 -
 5 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 8ec8824..f61e6fe 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -670,6 +670,12 @@ Likewise, when the `LV` environment variable is unset, Git 
sets it
 to `-c`.  You can override this setting by exporting `LV` with
 another value or setting `core.pager` to `lv +c`.
 
+core.waitforindexhelper::
+   If true, git status and other commands which use the index
+   will wait for a response from the index-helper before continuing;
+   this gives time for the index-helper to communicate with watchman
+   and give information about modified files.
+
 core.whitespace::
A comma separated list of common whitespace problems to
notice.  'git diff' will use `color.diff.whitespace` to
diff --git a/cache.h b/cache.h
index 5713835..db66451 100644
--- a/cache.h
+++ b/cache.h
@@ -691,6 +691,7 @@ extern char *git_replace_ref_base;
 extern int fsync_object_files;
 extern int core_preload_index;
 extern int core_watchman_sync_timeout;
+extern int wait_for_index_helper;
 extern int core_apply_sparse_checkout;
 extern int precomposed_unicode;
 extern int protect_hfs;
diff --git a/config.c b/config.c
index e6dc141..5f1b8bd 100644
--- a/config.c
+++ b/config.c
@@ -887,6 +887,11 @@ static int git_default_core_config(const char *var, const 
char *value)
return 0;
}
 
+   if (!strcmp(var, "core.waitforindexhelper")) {
+   wait_for_index_helper = git_config_bool(var, value);
+   return 0;
+   }
+
if (!strcmp(var, "core.createobject")) {
if (!strcmp(value, "rename"))
object_creation_mode = OBJECT_CREATION_USES_RENAMES;
diff --git a/environment.c b/environment.c
index 35e03c7..c7fb0a9 100644
--- a/environment.c
+++ b/environment.c
@@ -95,6 +95,11 @@ int core_preload_index = 1;
 int ignore_untracked_cache_config;
 
 int core_watchman_sync_timeout = 300;
+#ifdef USE_WATCHMAN
+int wait_for_index_helper = 1;
+#else
+int wait_for_index_helper = 0;
+#endif
 
 
 /* This is set by setup_git_dir_gently() and/or git_default_config() */
diff --git a/read-cache.c b/read-cache.c
index 470cd7b..23dbb73 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1800,7 +1800,10 @@ static int poke_daemon(struct index_state *istate,
if (refresh_cache) {
ret = write_in_full(fd, "refresh", 8) != 8;
} else {
-   ret = poke_and_wait_for_reply(fd);
+   if (wait_for_index_helper)
+   ret = poke_and_wait_for_reply(fd);
+   else
+   ret = write_in_full(fd, "poke", 5) != 5;
}
 
close(fd);
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 09/16] index-helper: use watchman to avoid refreshing index with lstat()

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Watchman is hidden behind index-helper. Before git tries to read the
index from shm, it notifies index-helper through the socket and
waits for index-helper to prepare shm. index-helper then contacts
watchman, updates 'WAMA' extension and put it in a separate shm and
wakes git up with a reply to git's socket.

Git uses this extension to not lstat unchanged entries. Git only
trusts the 'WAMA' extension when it's received from the separate shm,
not from disk. Unmarked entries are "clean". Marked entries are dirty
from watchman point of view. If it finds out some entries are
'watchman-dirty', but are really unchanged (e.g. the file was changed,
then reverted back), then Git will clear the marking in 'WAMA' before
writing it down.

Hiding watchman behind index-helper means you need both daemons. You
can't run watchman alone. Not so good. But on the other hand, 'git'
binary is not linked to watchman/json libraries, which is good for
packaging. Core git package will run fine without watchman-related
packages. If they need watchman, they can install git-index-helper and
dependencies.

This also lets us trust anything in the untracked cache that we haven't
marked invalid, saving those stat() calls.

Another reason for tying watchman to index-helper is, when used with
untracked cache, we need to keep track of $GIT_WORK_TREE file
listing. That kind of list can be kept in index-helper.

Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: David Turner 
---
 Documentation/git-index-helper.txt |   6 +
 cache.h|   8 ++
 dir.c  |  23 +++-
 dir.h  |   3 +
 index-helper.c | 102 ++---
 read-cache.c   | 218 +
 6 files changed, 319 insertions(+), 41 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index bb344cf..9d1ad0e 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -51,6 +51,12 @@ following commands are used to control the daemon:
Let the daemon know the index is to be read. It keeps the
daemon alive longer, unless `--exit-after=0` is used.
 
+"poke ":
+   Like "poke", but replies with "OK".  If watchman is
+   configured, index-helper queries watchman, then prepares a
+   shared memory object with the watchman index extension before
+   replying.
+
 All commands and replies are terminated by a 0 byte.
 
 GIT
diff --git a/cache.h b/cache.h
index 37f211b..e43a6e1 100644
--- a/cache.h
+++ b/cache.h
@@ -558,6 +558,7 @@ extern int daemonize(int *);
 
 /* Initialize and use the cache information */
 struct lock_file;
+extern int verify_index(const struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec 
*pathspec);
 extern int do_read_index(struct index_state *istate, const char *path,
@@ -565,6 +566,7 @@ extern int do_read_index(struct index_state *istate, const 
char *path,
 extern int read_index_from(struct index_state *, const char *path);
 extern int is_index_unborn(struct index_state *);
 extern int read_index_unmerged(struct index_state *);
+extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate);
 #define COMMIT_LOCK(1 << 0)
 #define CLOSE_LOCK (1 << 1)
 extern int write_locked_index(struct index_state *, struct lock_file *lock, 
unsigned flags);
@@ -1839,4 +1841,10 @@ void sleep_millisec(int millisec);
  */
 void safe_create_dir(const char *dir, int share);
 
+/*
+ * Open a socket to the git index-helper. Return the fd of that
+ * socket, or -1 on error.
+ */
+int connect_to_index_helper(void);
+
 #endif /* CACHE_H */
diff --git a/dir.c b/dir.c
index 69e0be6..5058b29 100644
--- a/dir.c
+++ b/dir.c
@@ -597,9 +597,9 @@ static void trim_trailing_spaces(char *buf)
  *
  * If "name" has the trailing slash, it'll be excluded in the search.
  */
-static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
-   struct untracked_cache_dir 
*dir,
-   const char *name, int len)
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+struct untracked_cache_dir *dir,
+const char *name, int len)
 {
int first, last;
struct untracked_cache_dir *d;
@@ -1726,6 +1726,17 @@ static int valid_cached_dir(struct dir_struct *dir,
if (!untracked)
return 0;
 
+   if (dir->untracked->use_watchman) {
+   /*
+* With watchman, we can trust the untracked cache's
+ 

[PATCH v3 15/16] index-helper: optionally automatically run

2016-04-06 Thread David Turner
Introduce a new config option, indexhelper.autorun, to automatically
run git index-helper before starting up a builtin git command.  This
enables users to keep index-helper running without manual
intervention.

Signed-off-by: David Turner 
---
 Documentation/config.txt |  4 
 git.c| 35 ++-
 t/t7900-index-helper.sh  | 16 
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2cd6bdd..8ec8824 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1852,6 +1852,10 @@ index.version::
Specify the version with which new index files should be
initialized.  This does not affect existing repositories.
 
+indexhelper.autorun::
+   Automatically run git index-helper when any builtin git
+   command is run inside a repository.
+
 init.templateDir::
Specify the directory from which templates will be copied.
(See the "TEMPLATE DIRECTORY" section of linkgit:git-init[1].)
diff --git a/git.c b/git.c
index 6cc0c07..7d27782 100644
--- a/git.c
+++ b/git.c
@@ -521,6 +521,37 @@ static void strip_extension(const char **argv)
 #define strip_extension(cmd)
 #endif
 
+static int want_auto_index_helper(void)
+{
+   int want = -1;
+   const char *value = NULL;
+   const char *key = "indexhelper.autorun";
+
+   if (git_config_key_is_valid(key) &&
+   !git_config_get_value(key, &value)) {
+   int b = git_config_maybe_bool(key, value);
+   want = b >= 0 ? b : 0;
+   }
+   return want;
+}
+
+static void maybe_run_index_helper(struct cmd_struct *cmd)
+{
+   const char *argv[] = {"git-index-helper", "--detach", "--autorun", 
NULL};
+
+   if (!(cmd->option & NEED_WORK_TREE))
+   return;
+
+   if (want_auto_index_helper() <= 0)
+   return;
+
+   trace_argv_printf(argv, "trace: auto index-helper:");
+
+   if (run_command_v_opt(argv,
+ RUN_SILENT_EXEC_FAILURE | RUN_CLEAN_ON_EXIT))
+   warning(_("You specified indexhelper.autorun, but running 
git-index-helper failed."));
+}
+
 static void handle_builtin(int argc, const char **argv)
 {
const char *cmd;
@@ -536,8 +567,10 @@ static void handle_builtin(int argc, const char **argv)
}
 
builtin = get_builtin(cmd);
-   if (builtin)
+   if (builtin) {
+   maybe_run_index_helper(builtin);
exit(run_builtin(builtin, argc, argv));
+   }
 }
 
 static void execv_dashed_external(const char **argv)
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index 3925ee3..6e11f8c 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -50,4 +50,20 @@ test_expect_success 'index-helper is quiet with --autorun' '
git index-helper --autorun
 '
 
+test_expect_success 'index-helper autorun works' '
+   rm -f .git/index-helper.path &&
+   git status &&
+   test_path_is_missing .git/index-helper.path &&
+   test_config indexhelper.autorun true &&
+   git status &&
+   test -L .git/index-helper.path &&
+   git status 2>err &&
+   test -L .git/index-helper.path &&
+   ! grep -q . err &&
+   git index-helper --kill &&
+   test_config indexhelper.autorun false &&
+   git status &&
+   test_path_is_missing .git/index-helper.path
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 14/16] index-helper: autorun mode

2016-04-06 Thread David Turner
Soon, we'll want to automatically start index-helper, so we need
a mode that silently exits if it can't start up (either because
it's not in a git dir, or because another one is already running).

Signed-off-by: David Turner 
---
 index-helper.c  | 29 +++--
 t/t7900-index-helper.sh |  8 
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/index-helper.c b/index-helper.c
index b62d805..4d08ff4 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -382,8 +382,9 @@ static void request_kill(void)
 int main(int argc, char **argv)
 {
const char *prefix;
-   int idle_in_seconds = 600, detach = 0, kill = 0;
+   int idle_in_seconds = 600, detach = 0, kill = 0, autorun = 0;
int fd;
+   int nongit;
struct strbuf socket_path = STRBUF_INIT;
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_seconds,
@@ -392,6 +393,7 @@ int main(int argc, char **argv)
 "verify shared memory after creating"),
OPT_BOOL(0, "detach", &detach, "detach the process"),
OPT_BOOL(0, "kill", &kill, "request that existing index helper 
processes exit"),
+   OPT_BOOL(0, "autorun", &autorun, "this is an automatic run of 
git index-helper, so certain errors can be solved by silently exiting"),
OPT_END()
};
 
@@ -401,7 +403,14 @@ int main(int argc, char **argv)
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(usage_text, options);
 
-   prefix = setup_git_directory();
+   prefix = setup_git_directory_gently(&nongit);
+   if (nongit) {
+   if (autorun)
+   exit(0);
+   else
+   die(_("not a git repository"));
+   }
+
if (parse_options(argc, (const char **)argv, prefix,
  options, usage_text, 0))
die(_("too many arguments"));
@@ -415,10 +424,18 @@ int main(int argc, char **argv)
 
/* check that no other copy is running */
fd = connect_to_index_helper();
-   if (fd > 0)
-   die(_("Already running"));
-   if (errno != ECONNREFUSED && errno != ENOENT)
-   die_errno(_("Unexpected error checking socket"));
+   if (fd > 0) {
+   if (autorun)
+   exit(0);
+   else
+   die(_("Already running"));
+   }
+   if (errno != ECONNREFUSED && errno != ENOENT) {
+   if (autorun)
+   return 0;
+   else
+   die_errno(_("Unexpected error checking socket"));
+   }
 
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index 3f94d8a..3925ee3 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -42,4 +42,12 @@ test_expect_success 'index-helper will not start if already 
running' '
grep "Already running" err
 '
 
+test_expect_success 'index-helper is quiet with --autorun' '
+   test_when_finished "git index-helper --kill" &&
+   git index-helper --kill &&
+   git index-helper --detach &&
+   test -L .git/index-helper.path &&
+   git index-helper --autorun
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 11/16] unpack-trees: preserve index extensions

2016-04-06 Thread David Turner
Make git checkout (and other unpack_tree operations) preserve the
untracked cache and watchman status. This is valuable for two reasons:

1. Often, an unpack_tree operation will not touch large parts of the
working tree, and thus most of the untracked cache will continue to be
valid.

2. Even if the untracked cache were entirely invalidated by such an
operation, the user has signaled their intention to have such a cache,
and we don't want to throw it away.

The same logic applies to the watchman state.

Signed-off-by: David Turner 
---
 cache.h   |  1 +
 read-cache.c  |  8 
 t/t7063-status-untracked-cache.sh | 22 ++
 t/test-lib-functions.sh   |  4 
 unpack-trees.c|  1 +
 5 files changed, 36 insertions(+)

diff --git a/cache.h b/cache.h
index e43a6e1..5713835 100644
--- a/cache.h
+++ b/cache.h
@@ -571,6 +571,7 @@ extern void write_watchman_ext(struct strbuf *sb, struct 
index_state* istate);
 #define CLOSE_LOCK (1 << 1)
 extern int write_locked_index(struct index_state *, struct lock_file *lock, 
unsigned flags);
 extern int discard_index(struct index_state *);
+extern void move_index_extensions(struct index_state *dst, struct index_state 
*src);
 extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
 extern int index_dir_exists(struct index_state *istate, const char *name, int 
namelen);
diff --git a/read-cache.c b/read-cache.c
index b6e9244..470cd7b 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2725,3 +2725,11 @@ void stat_validity_update(struct stat_validity *sv, int 
fd)
fill_stat_data(sv->sd, &st);
}
 }
+
+void move_index_extensions(struct index_state *dst, struct index_state *src)
+{
+   dst->untracked = src->untracked;
+   src->untracked = NULL;
+   dst->last_update = src->last_update;
+   src->last_update = NULL;
+}
diff --git a/t/t7063-status-untracked-cache.sh 
b/t/t7063-status-untracked-cache.sh
index a971884..083516d 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -646,4 +646,26 @@ test_expect_success 'test ident field is working' '
test_cmp ../expect ../err
 '
 
+test_expect_success 'untracked cache survives a checkout' '
+   git commit --allow-empty -m empty &&
+   test-dump-untracked-cache >../before &&
+   test_when_finished  "git checkout master" &&
+   git checkout -b other_branch &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after &&
+   test_commit test &&
+   test-dump-untracked-cache >../before &&
+   git checkout master &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after
+'
+
+test_expect_success 'untracked cache survives a commit' '
+   test-dump-untracked-cache >../before &&
+   git add done/two &&
+   git commit -m commit &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after
+'
+
 test_done
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8d99eb3..e974b5b 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -186,6 +186,10 @@ test_commit () {
test_tick
fi &&
git commit $signoff -m "$1" &&
+   if [ "$(git config core.bare)" = false ]
+   then
+   git update-index --force-untracked-cache
+   fi
git tag "${4:-$1}"
 }
 
diff --git a/unpack-trees.c b/unpack-trees.c
index 9f55cc2..fc90eb3 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1215,6 +1215,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, 
struct unpack_trees_options
  WRITE_TREE_SILENT |
  WRITE_TREE_REPAIR);
}
+   move_index_extensions(&o->result, o->dst_index);
discard_index(o->dst_index);
*o->dst_index = o->result;
} else {
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 13/16] index-helper: don't run if already running

2016-04-06 Thread David Turner
Signed-off-by: David Turner 
---
 index-helper.c  | 7 +++
 t/t7900-index-helper.sh | 9 +
 2 files changed, 16 insertions(+)

diff --git a/index-helper.c b/index-helper.c
index a3f6a74..b62d805 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -413,6 +413,13 @@ int main(int argc, char **argv)
return 0;
}
 
+   /* check that no other copy is running */
+   fd = connect_to_index_helper();
+   if (fd > 0)
+   die(_("Already running"));
+   if (errno != ECONNREFUSED && errno != ENOENT)
+   die_errno(_("Unexpected error checking socket"));
+
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
 
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index b1fa96e..3f94d8a 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -33,4 +33,13 @@ test_expect_success 'index-helper creates usable path file 
and can be killed' '
test_path_is_missing .git/index-helper.path
 '
 
+test_expect_success 'index-helper will not start if already running' '
+   test_when_finished "git index-helper --kill" &&
+   git index-helper --detach &&
+   test -L .git/index-helper.path &&
+   test_must_fail git index-helper 2>err &&
+   test -L .git/index-helper.path &&
+   grep "Already running" err
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 12/16] index-helper: kill mode

2016-04-06 Thread David Turner
Add a new command (and command-line arg) to allow index-helpers to
exit cleanly.

This is mainly useful for tests.

Signed-off-by: David Turner 
---
 index-helper.c  | 31 ++-
 t/t7900-index-helper.sh | 13 +
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/index-helper.c b/index-helper.c
index f993ae6..a3f6a74 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -276,6 +276,8 @@ static void loop(int fd, int idle_in_seconds)
 * alive, nothing to do.
 */
}
+   } else if (!strcmp(command.buf, "die")) {
+   break;
} else {
warning("BUG: Bogus command %s", command.buf);
}
@@ -358,10 +360,29 @@ static void make_socket_path(struct strbuf *path)
strbuf_addstr(path, "/s");
 }
 
+static void request_kill(void)
+{
+   int fd = connect_to_index_helper();
+
+   if (fd >= 0) {
+   write_in_full(fd, "die", 4);
+   close(fd);
+   }
+
+   /*
+* The child will try to do this anyway, but we want to be
+* ready to launch a new daemon immediately after this command
+* returns.
+*/
+
+   unlink(git_path("index-helper.path"));
+   return;
+}
+
 int main(int argc, char **argv)
 {
const char *prefix;
-   int idle_in_seconds = 600, detach = 0;
+   int idle_in_seconds = 600, detach = 0, kill = 0;
int fd;
struct strbuf socket_path = STRBUF_INIT;
struct option options[] = {
@@ -370,6 +391,7 @@ int main(int argc, char **argv)
OPT_BOOL(0, "strict", &to_verify,
 "verify shared memory after creating"),
OPT_BOOL(0, "detach", &detach, "detach the process"),
+   OPT_BOOL(0, "kill", &kill, "request that existing index helper 
processes exit"),
OPT_END()
};
 
@@ -384,6 +406,13 @@ int main(int argc, char **argv)
  options, usage_text, 0))
die(_("too many arguments"));
 
+   if (kill) {
+   if (detach)
+   die(_("--kill doesn't want any other options"));
+   request_kill();
+   return 0;
+   }
+
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
 
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index 6a06094..b1fa96e 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -20,4 +20,17 @@ test_expect_success 'index-helper smoke test' '
test_path_is_missing .git/index-helper.path
 '
 
+test_expect_success 'index-helper creates usable path file and can be killed' '
+   test_when_finished "git index-helper --kill" &&
+   test_path_is_missing .git/index-helper.path &&
+   git index-helper --detach &&
+   test -L .git/index-helper.path &&
+   sock="$(readlink .git/index-helper.path)" &&
+   test -S "$sock" &&
+   dir=$(dirname "$sock") &&
+   ls -ld "$dir" | grep ^drwx...--- &&
+   git index-helper --kill &&
+   test_path_is_missing .git/index-helper.path
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 04/16] index-helper: add --strict

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

There are "holes" in the index-helper approach because the shared
memory is not verified again by git. If $USER is compromised, shared
memory could be modified. But anyone who could do this could already
modify $GIT_DIR/index. A more realistic risk is some bugs in
index-helper that produce corrupt shared memory. --strict is added to
avoid that.

Strictly speaking there's still a very small gap where corrupt shared
memory could still be read by git: after we write the trailing SHA-1 in
the shared memory (thus signaling "this shm is ready") and before
verify_shm() detects an error.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git-index-helper.txt |  9 +++
 cache.h|  1 +
 index-helper.c | 48 ++
 read-cache.c   |  9 ---
 4 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index 61605e9..b177fb8 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -22,6 +22,15 @@ OPTIONS
Exit if the cached index is not accessed for ``
seconds. Specify 0 to wait forever. Default is 600.
 
+--strict::
+--no-strict::
+   Strict mode makes index-helper verify the shared memory after
+   it's created. If the result does not match what's read from
+   $GIT_DIR/index, the shared memory is destroyed. This makes
+   index-helper take more than double the amount of time required
+   for reading an index, but because it will happen in the
+   background, it's not noticable. `--strict` is enabled by default.
+
 NOTES
 -
 $GIT_DIR/index-helper.path is a symlink to a Unix domain socket in
diff --git a/cache.h b/cache.h
index 43fb314..4b678e9 100644
--- a/cache.h
+++ b/cache.h
@@ -336,6 +336,7 @@ struct index_state {
 keep_mmap : 1,
 from_shm : 1,
 to_shm : 1,
+always_verify_trailing_sha1 : 1,
 initialized : 1;
struct hashmap name_hash;
struct hashmap dir_hash;
diff --git a/index-helper.c b/index-helper.c
index 263b066..8288e30 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -15,6 +15,7 @@ struct shm {
 
 static struct shm shm_index;
 static struct shm shm_base_index;
+static int to_verify = 1;
 
 static void release_index_shm(struct shm *is)
 {
@@ -76,11 +77,56 @@ static void share_index(struct index_state *istate, struct 
shm *is)
hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, is->sha1);
 }
 
+static int verify_shm(void)
+{
+   int i;
+   struct index_state istate;
+   memset(&istate, 0, sizeof(istate));
+   istate.always_verify_trailing_sha1 = 1;
+   istate.to_shm = 1;
+   i = read_index(&istate);
+   if (i != the_index.cache_nr)
+   goto done;
+   for (; i < the_index.cache_nr; i++) {
+   struct cache_entry *base, *ce;
+   /* namelen is checked separately */
+   const unsigned int ondisk_flags =
+   CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS;
+   unsigned int ce_flags, base_flags, ret;
+   base = the_index.cache[i];
+   ce = istate.cache[i];
+   if (ce->ce_namelen != base->ce_namelen ||
+   strcmp(ce->name, base->name)) {
+   warning("mismatch at entry %d", i);
+   break;
+   }
+   ce_flags = ce->ce_flags;
+   base_flags = base->ce_flags;
+   /* only on-disk flags matter */
+   ce->ce_flags   &= ondisk_flags;
+   base->ce_flags &= ondisk_flags;
+   ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data,
+offsetof(struct cache_entry, name) -
+offsetof(struct cache_entry, ce_stat_data));
+   ce->ce_flags = ce_flags;
+   base->ce_flags = base_flags;
+   if (ret) {
+   warning("mismatch at entry %d", i);
+   break;
+   }
+   }
+done:
+   discard_index(&istate);
+   return i == the_index.cache_nr;
+}
+
 static void share_the_index(void)
 {
if (the_index.split_index && the_index.split_index->base)
share_index(the_index.split_index->base, &shm_base_index);
share_index(&the_index, &shm_index);
+   if (to_verify && !verify_shm())
+   cleanup_shm();
discard_index(&the_index);
 }
 
@@ -249,6 +295,8 @@ int main(int argc, char **argv)
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_seconds,
N_("exit if not used after some seconds")),
+   OPT_BOOL(0, "strict", &to_verify,
+"verify shared memory after creating"),
  

[PATCH v3 08/16] Add watchman support to reduce index refresh cost

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

The previous patch has the logic to clear bits in 'WAMA' bitmap. This
patch has logic to set bits as told by watchman. The missing bit,
_using_ these bits, are not here yet.

A lot of this code is written by David Turner originally, mostly from
[1]. I'm just copying and polishing it a bit.

[1] http://article.gmane.org/gmane.comp.version-control.git/248006

Signed-off-by: David Turner 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Makefile   |  12 +
 cache.h|   1 +
 config.c   |   5 ++
 configure.ac   |   8 
 environment.c  |   3 ++
 watchman-support.c | 135 +
 watchman-support.h |   7 +++
 7 files changed, 171 insertions(+)
 create mode 100644 watchman-support.c
 create mode 100644 watchman-support.h

diff --git a/Makefile b/Makefile
index 24d6c0e..8ad8e1f 100644
--- a/Makefile
+++ b/Makefile
@@ -453,6 +453,7 @@ MSGFMT = msgfmt
 CURL_CONFIG = curl-config
 PTHREAD_LIBS = -lpthread
 PTHREAD_CFLAGS =
+WATCHMAN_LIBS =
 GCOV = gcov
 
 export TCL_PATH TCLTK_PATH
@@ -1419,6 +1420,13 @@ else
LIB_OBJS += thread-utils.o
 endif
 
+ifdef USE_WATCHMAN
+   LIB_H += watchman-support.h
+   LIB_OBJS += watchman-support.o
+   WATCHMAN_LIBS = -lwatchman
+   BASIC_CFLAGS += -DUSE_WATCHMAN
+endif
+
 ifdef HAVE_PATHS_H
BASIC_CFLAGS += -DHAVE_PATHS_H
 endif
@@ -2030,6 +2038,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS 
$(GITLIBS) $(VCSSVN_LIB)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) 
$(LIBS) \
$(VCSSVN_LIB)
 
+git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS)
+   $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) 
$(LIBS) $(WATCHMAN_LIBS)
+
 $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY)
$(QUIET_LNCP)$(RM) $@ && \
ln $< $@ 2>/dev/null || \
@@ -2169,6 +2180,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst 
','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+
@echo HAVE_SHM=\''$(subst ','\'',$(subst ','\'',$(HAVE_SHM)))'\' >>$@+
+   @echo USE_WATCHMAN=\''$(subst ','\'',$(subst 
','\'',$(USE_WATCHMAN)))'\' >>$@+
 ifdef TEST_OUTPUT_DIRECTORY
@echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst 
','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+
 endif
diff --git a/cache.h b/cache.h
index f4f7eef..37f211b 100644
--- a/cache.h
+++ b/cache.h
@@ -687,6 +687,7 @@ extern char *git_replace_ref_base;
 
 extern int fsync_object_files;
 extern int core_preload_index;
+extern int core_watchman_sync_timeout;
 extern int core_apply_sparse_checkout;
 extern int precomposed_unicode;
 extern int protect_hfs;
diff --git a/config.c b/config.c
index 9ba40bc..e6dc141 100644
--- a/config.c
+++ b/config.c
@@ -882,6 +882,11 @@ static int git_default_core_config(const char *var, const 
char *value)
return 0;
}
 
+   if (!strcmp(var, "core.watchmansynctimeout")) {
+   core_watchman_sync_timeout = git_config_int(var, value);
+   return 0;
+   }
+
if (!strcmp(var, "core.createobject")) {
if (!strcmp(value, "rename"))
object_creation_mode = OBJECT_CREATION_USES_RENAMES;
diff --git a/configure.ac b/configure.ac
index 0cd9f46..334d63b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1099,6 +1099,14 @@ AC_COMPILE_IFELSE([BSD_SYSCTL_SRC],
HAVE_BSD_SYSCTL=])
 GIT_CONF_SUBST([HAVE_BSD_SYSCTL])
 
+#
+# Check for watchman client library
+
+AC_CHECK_LIB([watchman], [watchman_connect],
+   [USE_WATCHMAN=YesPlease],
+   [USE_WATCHMAN=])
+GIT_CONF_SUBST([USE_WATCHMAN])
+
 ## Other checks.
 # Define USE_PIC if you need the main git objects to be built with -fPIC
 # in order to build and link perl/Git.so.  x86-64 seems to need this.
diff --git a/environment.c b/environment.c
index 6dec9d0..35e03c7 100644
--- a/environment.c
+++ b/environment.c
@@ -94,6 +94,9 @@ int core_preload_index = 1;
  */
 int ignore_untracked_cache_config;
 
+int core_watchman_sync_timeout = 300;
+
+
 /* This is set by setup_git_dir_gently() and/or git_default_config() */
 char *git_work_tree_cfg;
 static char *work_tree;
diff --git a/watchman-support.c b/watchman-support.c
new file mode 100644
index 000..b168e88
--- /dev/null
+++ b/watchman-support.c
@@ -0,0 +1,135 @@
+#include "cache.h"
+#include "watchman-support.h"
+#include "strbuf.h"
+#include "dir.h"
+#include 
+
+static str

[PATCH v3 07/16] read-cache: add watchman 'WAMA' extension

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

The extension contains a bitmap, one bit for each entry in the
index. If the n-th bit is zero, the n-th entry is considered
unchanged, we can ce_mark_uptodate() it without refreshing. If the bit
is non-zero and we found out the corresponding file is clean after
refresh, we can clear the bit.

In addition, there's a list of directories in the untracked-cache
to invalidate (because they have new or modified entries).

The 'skipping refresh' bit is not in this patch yet as we would need
watchman. More details in later patches.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 cache.h  |   4 ++
 dir.h|   3 ++
 read-cache.c | 117 ++-
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 0aeb994..f4f7eef 100644
--- a/cache.h
+++ b/cache.h
@@ -182,6 +182,8 @@ struct cache_entry {
 #define CE_VALID (0x8000)
 #define CE_STAGESHIFT 12
 
+#define CE_WATCHMAN_DIRTY  (0x0001)
+
 /*
  * Range 0x0FFF in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -320,6 +322,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED (1 << 5)
 #define SPLIT_INDEX_ORDERED(1 << 6)
 #define UNTRACKED_CHANGED  (1 << 7)
+#define WATCHMAN_CHANGED   (1 << 8)
 
 struct split_index;
 struct untracked_cache;
@@ -344,6 +347,7 @@ struct index_state {
struct untracked_cache *untracked;
void *mmap;
size_t mmap_size;
+   char *last_update;
 };
 
 extern struct index_state the_index;
diff --git a/dir.h b/dir.h
index 3ec3fb0..3d540de 100644
--- a/dir.h
+++ b/dir.h
@@ -142,6 +142,9 @@ struct untracked_cache {
int gitignore_invalidated;
int dir_invalidated;
int dir_opened;
+   /* watchman invalidation data */
+   unsigned int use_watchman : 1;
+   struct string_list invalid_untracked;
 };
 
 struct dir_struct {
diff --git a/read-cache.c b/read-cache.c
index 7215a17..59d892e 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -19,6 +19,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "shm.h"
+#include "ewah/ewok.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
   unsigned int options);
@@ -41,11 +42,13 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce,
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b/* "link" */
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
+#define CACHE_EXT_WATCHMAN 0x57414D41/* "WAMA" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
+SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | \
+WATCHMAN_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -1220,8 +1223,13 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
continue;
 
new = refresh_cache_ent(istate, ce, options, &cache_errno, 
&changed);
-   if (new == ce)
+   if (new == ce) {
+   if (ce->ce_flags & CE_WATCHMAN_DIRTY) {
+   ce->ce_flags  &= ~CE_WATCHMAN_DIRTY;
+   istate->cache_changed |= WATCHMAN_CHANGED;
+   }
continue;
+   }
if (!new) {
const char *fmt;
 
@@ -1365,6 +1373,94 @@ static int verify_hdr(const struct cache_header *hdr, 
unsigned long size)
return 0;
 }
 
+static void mark_no_watchman(size_t pos, void *data)
+{
+   struct index_state *istate = data;
+   assert(pos < istate->cache_nr);
+   istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY;
+}
+
+static int read_watchman_ext(struct index_state *istate, const void *data,
+unsigned long sz)
+{
+   struct ewah_bitmap *bitmap;
+   int ret, len;
+   uint32_t bitmap_size;
+   uint32_t untracked_nr;
+
+   if (memchr(data, 0, sz) == NULL)
+   return error("invalid extension");
+
+   len = strlen(data) + 1;
+   memcpy(&bitmap_size, (const char *)data + len, 4);
+   memcpy(&untracked_nr, (const char *)data + len + 4, 4);
+   untracked_nr = ntohl(untracked_nr);
+   bitmap_size = ntohl(bitmap_size);
+
+   bitmap = ewah_new();
+   ret = ewah_read_mmap(bitmap, (const char *)data + len + 8, bitmap_size);
+   if (ret != bitmap_size) {
+   ewah_free(bitmap);
+   return error("failed to parse ewah bitmap reading watchman 
index extension");
+   }
+   istate->last_update = xstrdup(data);
+   ewah_each_bit(bitmap, mar

[PATCH v3 10/16] update-index: enable/disable watchman support

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/update-index.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 1c94ca5..7c08e1c 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -914,6 +914,7 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
 {
int newfd, entries, has_errors = 0, nul_term_line = 0;
enum uc_mode untracked_cache = UC_UNSPECIFIED;
+   int use_watchman = -1;
int read_from_stdin = 0;
int prefix_length = prefix ? strlen(prefix) : 0;
int preferred_index_format = 0;
@@ -1012,6 +1013,8 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
N_("test if the filesystem supports untracked 
cache"), UC_TEST),
OPT_SET_INT(0, "force-untracked-cache", &untracked_cache,
N_("enable untracked cache without testing the 
filesystem"), UC_FORCE),
+   OPT_BOOL(0, "watchman", &use_watchman,
+   N_("use or not use watchman to reduce refresh cost")),
OPT_END()
};
 
@@ -1149,6 +1152,14 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
die("Bug: bad untracked_cache value: %d", untracked_cache);
}
 
+   if (use_watchman > 0) {
+   the_index.last_update= xstrdup("");
+   the_index.cache_changed |= WATCHMAN_CHANGED;
+   } else if (!use_watchman) {
+   the_index.last_update= NULL;
+   the_index.cache_changed |= WATCHMAN_CHANGED;
+   }
+
if (active_cache_changed) {
if (newfd < 0) {
if (refresh_args.flags & REFRESH_QUIET)
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 06/16] index-helper: add --detach

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

We detach after creating and opening the socket, because otherwise
we might return control to the shell before index-helper is ready to
accept commands.  This might lead to flaky tests.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git-index-helper.txt |  3 +++
 index-helper.c | 11 +--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index b177fb8..bb344cf 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -31,6 +31,9 @@ OPTIONS
for reading an index, but because it will happen in the
background, it's not noticable. `--strict` is enabled by default.
 
+--detach::
+   Detach from the shell.
+
 NOTES
 -
 $GIT_DIR/index-helper.path is a symlink to a Unix domain socket in
diff --git a/index-helper.c b/index-helper.c
index 8288e30..10f29f5 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -15,7 +15,7 @@ struct shm {
 
 static struct shm shm_index;
 static struct shm shm_base_index;
-static int to_verify = 1;
+static int daemonized, to_verify = 1;
 
 static void release_index_shm(struct shm *is)
 {
@@ -34,6 +34,8 @@ static void cleanup_shm(void)
 
 static void cleanup(void)
 {
+   if (daemonized)
+   return;
unlink(git_path("index-helper.path"));
cleanup_shm();
 }
@@ -289,7 +291,7 @@ static void make_socket_path(struct strbuf *path)
 int main(int argc, char **argv)
 {
const char *prefix;
-   int idle_in_seconds = 600;
+   int idle_in_seconds = 600, detach = 0;
int fd;
struct strbuf socket_path = STRBUF_INIT;
struct option options[] = {
@@ -297,6 +299,7 @@ int main(int argc, char **argv)
N_("exit if not used after some seconds")),
OPT_BOOL(0, "strict", &to_verify,
 "verify shared memory after creating"),
+   OPT_BOOL(0, "detach", &detach, "detach the process"),
OPT_END()
};
 
@@ -326,6 +329,10 @@ int main(int argc, char **argv)
die(_("failed to delete old index-helper.path"));
if (symlink(socket_path.buf, git_path("index-helper.path")))
die(_("failed to symlink socket path into index-helper.path"));
+
+   if (detach && daemonize(&daemonized))
+   die_errno(_("unable to detach"));
+
loop(fd, idle_in_seconds);
 
return 0;
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 00/16] index-helper, watchman

2016-04-06 Thread David Turner
The major change here is to switch from named pipes to sockets.  But somehow
while doing this, I noticed a few other things (and others noticed some too):

Fixed bugs in successful write testing
Added a bit more doco
Improved tests slightly
Memory barriers
.git vs .git/ from watchman
one patch moved/squashed at Duy's suggestion
(some of?) my spelling errors corrected

David Turner (6):
  unpack-trees: preserve index extensions
  index-helper: kill mode
  index-helper: don't run if already running
  index-helper: autorun mode
  index-helper: optionally automatically run
  read-cache: config for waiting for index-helper

Nguyễn Thái Ngọc Duy (10):
  read-cache.c: fix constness of verify_hdr()
  read-cache: allow to keep mmap'd memory after reading
  index-helper: new daemon for caching index and related stuff
  index-helper: add --strict
  daemonize(): set a flag before exiting the main process
  index-helper: add --detach
  read-cache: add watchman 'WAMA' extension
  Add watchman support to reduce index refresh cost
  index-helper: use watchman to avoid refreshing index with lstat()
  update-index: enable/disable watchman support

 .gitignore |   1 +
 Documentation/config.txt   |  10 +
 Documentation/git-index-helper.txt |  64 +
 Makefile   |  22 ++
 builtin/gc.c   |   2 +-
 builtin/update-index.c |  11 +
 cache.h|  23 +-
 config.c   |  10 +
 config.mak.uname   |   1 +
 configure.ac   |   8 +
 daemon.c   |   2 +-
 dir.c  |  23 +-
 dir.h  |   6 +
 environment.c  |   8 +
 git-compat-util.h  |  18 ++
 git.c  |  35 ++-
 index-helper.c | 462 +
 read-cache.c   | 438 +--
 setup.c|   4 +-
 shm.c  |  67 ++
 shm.h  |  23 ++
 t/t7063-status-untracked-cache.sh  |  22 ++
 t/t7900-index-helper.sh|  69 ++
 t/test-lib-functions.sh|   4 +
 unpack-trees.c |   1 +
 watchman-support.c | 135 +++
 watchman-support.h |   7 +
 27 files changed, 1454 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/git-index-helper.txt
 create mode 100644 index-helper.c
 create mode 100644 shm.c
 create mode 100644 shm.h
 create mode 100755 t/t7900-index-helper.sh
 create mode 100644 watchman-support.c
 create mode 100644 watchman-support.h

-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 02/16] read-cache: allow to keep mmap'd memory after reading

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Later, we will introduce git index-helper to share this memory with
other git processes.

Since the memory will be shared, it will never be unmapped (although
the kernel may of course choose to page it out).

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 cache.h  |  3 +++
 read-cache.c | 13 -
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index b829410..4180e2b 100644
--- a/cache.h
+++ b/cache.h
@@ -333,11 +333,14 @@ struct index_state {
struct split_index *split_index;
struct cache_time timestamp;
unsigned name_hash_initialized : 1,
+keep_mmap : 1,
 initialized : 1;
struct hashmap name_hash;
struct hashmap dir_hash;
unsigned char sha1[20];
struct untracked_cache *untracked;
+   void *mmap;
+   size_t mmap_size;
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index 16cc487..7e387e9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1574,6 +1574,10 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
mmap = xmmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (mmap == MAP_FAILED)
die_errno("unable to map index file");
+   if (istate->keep_mmap) {
+   istate->mmap = mmap;
+   istate->mmap_size = mmap_size;
+   }
close(fd);
 
hdr = mmap;
@@ -1626,10 +1630,12 @@ int do_read_index(struct index_state *istate, const 
char *path, int must_exist)
src_offset += 8;
src_offset += extsize;
}
-   munmap(mmap, mmap_size);
+   if (!istate->keep_mmap)
+   munmap(mmap, mmap_size);
return istate->cache_nr;
 
 unmap:
+   istate->mmap = NULL;
munmap(mmap, mmap_size);
die("index file corrupt");
 }
@@ -1655,6 +1661,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
discard_index(split_index->base);
else
split_index->base = xcalloc(1, sizeof(*split_index->base));
+   split_index->base->keep_mmap = istate->keep_mmap;
ret = do_read_index(split_index->base,
git_path("sharedindex.%s",
 sha1_to_hex(split_index->base_sha1)), 1);
@@ -1698,6 +1705,10 @@ int discard_index(struct index_state *istate)
free(istate->cache);
istate->cache = NULL;
istate->cache_alloc = 0;
+   if (istate->keep_mmap && istate->mmap) {
+   munmap(istate->mmap, istate->mmap_size);
+   istate->mmap = NULL;
+   }
discard_split_index(istate);
free_untracked_cache(istate->untracked);
istate->untracked = NULL;
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 03/16] index-helper: new daemon for caching index and related stuff

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Instead of reading the index from disk and worrying about disk
corruption, the index is cached in memory (memory bit-flips happen
too, but hopefully less often). The result is faster read. Read time
is reduced by 70%.

The biggest gain is not having to verify the trailing SHA-1, which
takes lots of time especially on large index files. But this also
opens doors for further optimiztions:

 - we could create an in-memory format that's essentially the memory
   dump of the index to eliminate most of parsing/allocation
   overhead. The mmap'd memory can be used straight away. Experiment
   [1] shows we could reduce read time by 88%.

 - we could cache non-index info such as name hash

The shared memory's name folows the template "git--"
where  is the trailing SHA-1 of the index file.  is
"index" for cached index files (and may be "name-hash" for name-hash
cache). If such shared memory exists, it contains the same index
content as on disk. The content is already validated by the daemon and
git won't validate it again (except comparing the trailing SHA-1s).

We keep this daemon's logic as thin as possible. The "brain" stays in
git. So the daemon can read and validate stuff, but that's all it's
allowed to do. It does not add/create new information. It doesn't even
accept direct updates from git.

Git can poke the daemon via unix domain sockets to tell it to refresh
the index cache, or to keep it alive some more minutes. It can't give
any real index data directly to the daemon. Real data goes to disk
first, then the daemon reads and verifies it from there. Poking only
happens for $GIT_DIR/index, not temporary index files.

$GIT_DIR/index-helper.path is a symlink to the socket for the daemon
process. The daemon reads from the socket and executes commands.

Named pipes were considered for portability reasons, but then commands
that need replies from the daemon would have open their own pipes,
since a named pipe should only have one reader.  Unix domain sockets
don't have this problem.

index-helper requires POSIX realtime extension. POSIX shm interface
however is abstracted away so that Windows support can be added later.

On webkit.git with index format v2, duplicating 8 times to 1.4m
entries and 200MB in size:

(vanilla)  0.986986364 s: read_index_from .git/index
(index-helper) 0.267850279 s: read_index_from .git/index

Interestingly with index v4, we get less out of index-helper. It makes
sense as v4 requires more processing after loading the index:

(vanilla)  0.72249 s: read_index_from .git/index
(index-helper) 0.302741500 s: read_index_from .git/index

[1] http://thread.gmane.org/gmane.comp.version-control.git/247268/focus=248771

Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: David Turner 
---
 .gitignore |   1 +
 Documentation/git-index-helper.txt |  46 ++
 Makefile   |  10 ++
 cache.h|   2 +
 config.mak.uname   |   1 +
 git-compat-util.h  |  18 +++
 index-helper.c | 284 +
 read-cache.c   | 116 +--
 shm.c  |  67 +
 shm.h  |  23 +++
 t/t7900-index-helper.sh|  23 +++
 11 files changed, 582 insertions(+), 9 deletions(-)
 create mode 100644 Documentation/git-index-helper.txt
 create mode 100644 index-helper.c
 create mode 100644 shm.c
 create mode 100644 shm.h
 create mode 100755 t/t7900-index-helper.sh

diff --git a/.gitignore b/.gitignore
index 5087ce1..b92f122 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /git-http-fetch
 /git-http-push
 /git-imap-send
+/git-index-helper
 /git-index-pack
 /git-init
 /git-init-db
diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
new file mode 100644
index 000..61605e9
--- /dev/null
+++ b/Documentation/git-index-helper.txt
@@ -0,0 +1,46 @@
+git-index-helper(1)
+===
+
+NAME
+
+git-index-helper - A simple cache daemon for speeding up index file access
+
+SYNOPSIS
+
+[verse]
+'git index-helper' [options]
+
+DESCRIPTION
+---
+Keep the index file in memory for faster access. This daemon is per
+repository.
+
+OPTIONS
+---
+
+--exit-after=::
+   Exit if the cached index is not accessed for ``
+   seconds. Specify 0 to wait forever. Default is 600.
+
+NOTES
+-
+$GIT_DIR/index-helper.path is a symlink to a Unix domain socket in
+$TMPDIR that the daemon reads commands from. At least on Linux, shared
+memory objects are available via /dev/shm with the name pattern
+"git--".  Normally the daemon will clean up shared
+memory objects when it exits.  But if it crashes, some objects could
+remain there and they can be safely deleted with "rm" command.

[PATCH v3 01/16] read-cache.c: fix constness of verify_hdr()

2016-04-06 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 read-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index d9fb78b..16cc487 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1345,7 +1345,7 @@ struct ondisk_cache_entry_extended {
ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
ondisk_cache_entry_size(ce_namelen(ce)))
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 {
git_SHA_CTX c;
unsigned char sha1[20];
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33

2016-04-01 Thread David Turner
On Thu, 2016-03-31 at 18:37 -0700, Stefan Beller wrote:
> On Wed, Mar 30, 2016 at 1:05 PM, David Turner <
> dtur...@twopensource.com> wrote:
> > On Wed, 2016-03-30 at 08:37 +0200, Michael Haggerty wrote:
> > > On 03/29/2016 10:12 PM, David Turner wrote:
> > > > On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote:
> > > > > On 03/24/2016 07:47 AM, David Turner wrote:
> > > > > > [...]
> > > > > > I incorporated your changes into the lmdb backend.  To make
> > > > > > merging
> > > > > > later more convenient, I rebased on top of pu -- I think
> > > > > > this
> > > > > > mainly
> > > > > > depends on jk/check-repository-format, but I also included
> > > > > > some
> > > > > > fixes
> > > > > > for a couple of tests that had been changed by other
> > > > > > patches.
> > > > > 
> > > > > I think rebasing changes on top of pu is counterproductive. I
> > > > > believe
> > > > > that Junio had extra work rebasing your earlier series onto a
> > > > > merge
> > > > > of
> > > > > the minimum number of topics that it really depended on.
> > > > > There is
> > > > > no
> > > > > way
> > > > > that he could merge the branch in this form because it would
> > > > > imply
> > > > > merging all of pu.
> > > > > 
> > > > > See the zeroth section of SubmittingPatches [1] for the
> > > > > guidelines.
> > > > 
> > > > I'm a bit confused because
> > > > [PATCH 18/21] get_default_remote(): remove unneeded flag
> > > > variable
> > > > 
> > > > doesn't do anything on master -- it depends on some patch in
> > > > pu.
> > > >  And
> > > > we definitely want to pick up jk/check-repository-format (which
> > > > doesn't
> > > > include whatever 18/21 depends on).
> > > > 
> > > > So what do you think our base should be?
> > > 
> > > I think the preference is to base a patch series on the merge of
> > > master
> > > plus the minimum number of topics in pu (ideally, none) that are
> > > "essential" prerequisites of the changes in the patch series. For
> > > example, the version of this patch series that Junio has in his
> > > tree
> > > was
> > > based on master + sb/submodule-parallel-update.
> > > 
> > > Even if there are minor
> > > conflicts with another in-flight topic, it is easier for Junio to
> > > resolve the conflicts when merging the topics together than to
> > > rebase
> > > the patch series over and over as the other patch series evolves.
> > > The
> > > goal of this practice is of course to allow patch series to
> > > evolve
> > > independently of each other as much as possible.
> > > 
> > > Of course if you have insights into nontrivial conflicts between
> > > your
> > > patch series and others, it would be helpful to discuss these in
> > > your
> > > cover letter.
> > 
> > If I am reading this correctly, it looks like your series also has
> > a
> > few more sb submodule patches, e.g. sb/submodule-init, which is
> > responsible for the code that 18/21 depends on.
> > 
> > I think jk/check-repository-format is also  good to get in first,
> > because it changes the startup sequence a bit and it's a bit tricky
> > to
> > figure out what needs to change in dt/refs-backend-lmdb as a result
> > of
> > it.
> > 
> > But I can't just merge jk/check-repository-format on top of
> > 71defe0047
> > -- some function signatures have changed in the run-command stuff
> > and
> > it seems kind of annoying to fix up.
> > 
> > So I propose instead that we just drop 18/21 for now, and use just
> > jk/check-repository-format as the base.
> 
> By 18/21 you mean
> [PATCH 18/21] get_default_remote(): remove unneeded flag variable
> in builtin/submodule--helper.c?
> You could drop that and I'll pick it up in one of the submodule
> series',
> if that is more convenient for you.
> 


Yes, thanks.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33

2016-03-31 Thread David Turner
On Thu, 2016-03-31 at 18:14 +0200, Michael Haggerty wrote:
> On 03/30/2016 10:05 PM, David Turner wrote:
> > On Wed, 2016-03-30 at 08:37 +0200, Michael Haggerty wrote:
> > > On 03/29/2016 10:12 PM, David Turner wrote:
> > > > On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote:
> > > > > On 03/24/2016 07:47 AM, David Turner wrote:
> > > > > > [...]
> > > > > > I incorporated your changes into the lmdb backend.  To make
> > > > > > merging
> > > > > > later more convenient, I rebased on top of pu -- I think
> > > > > > this
> > > > > > mainly
> > > > > > depends on jk/check-repository-format, but I also included
> > > > > > some
> > > > > > fixes
> > > > > > for a couple of tests that had been changed by other
> > > > > > patches.
> > > > > 
> > > > > I think rebasing changes on top of pu is counterproductive. I
> > > > > believe
> > > > > that Junio had extra work rebasing your earlier series onto a
> > > > > merge
> > > > > of
> > > > > the minimum number of topics that it really depended on.
> > > > > There is
> > > > > no
> > > > > way
> > > > > that he could merge the branch in this form because it would
> > > > > imply
> > > > > merging all of pu.
> > > > > 
> > > > > See the zeroth section of SubmittingPatches [1] for the
> > > > > guidelines.
> > > > 
> > > > I'm a bit confused because 
> > > > [PATCH 18/21] get_default_remote(): remove unneeded flag
> > > > variable
> > > > 
> > > > doesn't do anything on master -- it depends on some patch in
> > > > pu. 
> > > >  And
> > > > we definitely want to pick up jk/check-repository-format (which
> > > > doesn't
> > > > include whatever 18/21 depends on).
> > > > 
> > > > So what do you think our base should be?
> > > 
> > > I think the preference is to base a patch series on the merge of
> > > master
> > > plus the minimum number of topics in pu (ideally, none) that are
> > > "essential" prerequisites of the changes in the patch series. For
> > > example, the version of this patch series that Junio has in his
> > > tree
> > > was
> > > based on master + sb/submodule-parallel-update. 
> > > 
> > > Even if there are minor
> > > conflicts with another in-flight topic, it is easier for Junio to
> > > resolve the conflicts when merging the topics together than to
> > > rebase
> > > the patch series over and over as the other patch series evolves.
> > > The
> > > goal of this practice is of course to allow patch series to
> > > evolve
> > > independently of each other as much as possible.
> > > 
> > > Of course if you have insights into nontrivial conflicts between
> > > your
> > > patch series and others, it would be helpful to discuss these in
> > > your
> > > cover letter.
> > 
> > If I am reading this correctly, it looks like your series also has
> > a
> > few more sb submodule patches, e.g. sb/submodule-init, which is
> > responsible for the code that 18/21 depends on.  
> > 
> > I think jk/check-repository-format is also  good to get in first,
> > because it changes the startup sequence a bit and it's a bit tricky
> > to
> > figure out what needs to change in dt/refs-backend-lmdb as a result
> > of
> > it. 
> > 
> > But I can't just merge jk/check-repository-format on top of
> > 71defe0047 
> > -- some function signatures have changed in the run-command stuff
> > and
> > it seems kind of annoying to fix up.  
> > 
> > So I propose instead that we just drop 18/21 for now, and use just
> > jk/check-repository-format as the base.
> > 
> > Does this seem reasonable to you?
> 
> Yes, that's fine. Patch 18/21 is just a random cleanup that nothing
> else
> depends on. Will you do the rebasing? If so, please let me know where
> I
> can fetch the result from.

Rebased:

https://github.com/dturner-tw/git.git on branch
dturner/pluggable-backends


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: RFC: New reference iteration paradigm

2016-03-31 Thread David Turner
On Thu, 2016-03-31 at 18:13 +0200, Michael Haggerty wrote:
> Currently the way to iterate over references is via a family of
> for_each_ref()-style functions. You pass some arguments plus a
> callback
> function and cb_data to the function, and your callback is called for
> each reference that is selected.
> 
> This works, but it has two big disadvantages:
> 
> 1. It is cumbersome for callers. The caller's logic has to be split
>into two functions, the one that calls for_each_ref() and the
>callback function. Any data that have to be passed between the
>functions has to be stuck in a separate data structure.
> 
> 2. This interface is not composable. For example, you can't write a
>single function that iterates over references from two sources,
>as is interesting for combining packed plus loose references,
>shared plus worktree-specific references, symbolic plus normal
>references, etc. The current code for combining packed and loose
>references needs to walk the two reference trees in lockstep,
>using intimate knowledge about how references are stored [1,2,3].
> 
> I'm currently working on a patch series to transition the reference
> code
> from using for_each_ref()-style iteration to using proper iterators.
> 
> The main point of this change is to change the base iteration
> paradigm
> that has to be supported by reference backends. So instead of
> 
> > int do_for_each_ref_fn(const char *submodule, const char *base,
> >each_ref_fn fn, int trim, int flags,
> >void *cb_data);
> 
> the backend now has to implement
> 
> > struct ref_iterator *ref_iterator_begin_fn(const char *submodule,
> >const char *prefix,
> >unsigned int flags);
> 
> The ref_iterator itself has to implement two main methods:
> 
> > int iterator_advance_fn(struct ref_iterator *ref_iterator);
> > void iterator_free_fn(struct ref_iterator *ref_iterator);
> 
> A loop over references now looks something like
> 
> > struct ref_iterator *iter = each_ref_in_iterator("refs/tags/");
> > while (ref_iterator_advance(iter)) {
> > /* code using iter->refname, iter->oid, iter->flags */
> > }
> 
> I built quite a bit of ref_iterator infrastructure to make it easy to
> plug things together quite flexibly. For example, there is an
> overlay_ref_iterator which takes two other iterators (e.g., one for
> packed and one for loose refs) and overlays them, presenting the
> result
> via the same iterator interface. But the same overlay_ref_iterator
> can
> be used to overlay any two other iterators on top of each other.

I haven't looked at the code yet, but this makes sense to me.  In
general, the major reason to supply a callback style of API is when
iteration is more complicated than whatever will be consuming the data
(I can't remember where I heard this argument, but I found it pretty
convincing).  It seems like this is increasingly not the case, so we
should move towards the iterator style.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33

2016-03-30 Thread David Turner
On Wed, 2016-03-30 at 08:37 +0200, Michael Haggerty wrote:
> On 03/29/2016 10:12 PM, David Turner wrote:
> > On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote:
> > > On 03/24/2016 07:47 AM, David Turner wrote:
> > > > [...]
> > > > I incorporated your changes into the lmdb backend.  To make
> > > > merging
> > > > later more convenient, I rebased on top of pu -- I think this
> > > > mainly
> > > > depends on jk/check-repository-format, but I also included some
> > > > fixes
> > > > for a couple of tests that had been changed by other patches.
> > > 
> > > I think rebasing changes on top of pu is counterproductive. I
> > > believe
> > > that Junio had extra work rebasing your earlier series onto a
> > > merge
> > > of
> > > the minimum number of topics that it really depended on. There is
> > > no
> > > way
> > > that he could merge the branch in this form because it would
> > > imply
> > > merging all of pu.
> > > 
> > > See the zeroth section of SubmittingPatches [1] for the
> > > guidelines.
> > 
> > I'm a bit confused because 
> > [PATCH 18/21] get_default_remote(): remove unneeded flag variable
> > 
> > doesn't do anything on master -- it depends on some patch in pu. 
> >  And
> > we definitely want to pick up jk/check-repository-format (which
> > doesn't
> > include whatever 18/21 depends on).
> > 
> > So what do you think our base should be?
> 
> I think the preference is to base a patch series on the merge of
> master
> plus the minimum number of topics in pu (ideally, none) that are
> "essential" prerequisites of the changes in the patch series. For
> example, the version of this patch series that Junio has in his tree
> was
> based on master + sb/submodule-parallel-update. 
>
> Even if there are minor
> conflicts with another in-flight topic, it is easier for Junio to
> resolve the conflicts when merging the topics together than to rebase
> the patch series over and over as the other patch series evolves. The
> goal of this practice is of course to allow patch series to evolve
> independently of each other as much as possible.
> 
> Of course if you have insights into nontrivial conflicts between your
> patch series and others, it would be helpful to discuss these in your
> cover letter.

If I am reading this correctly, it looks like your series also has a
few more sb submodule patches, e.g. sb/submodule-init, which is
responsible for the code that 18/21 depends on.  

I think jk/check-repository-format is also  good to get in first,
because it changes the startup sequence a bit and it's a bit tricky to
figure out what needs to change in dt/refs-backend-lmdb as a result of
it. 

But I can't just merge jk/check-repository-format on top of 71defe0047 
-- some function signatures have changed in the run-command stuff and
it seems kind of annoying to fix up.  

So I propose instead that we just drop 18/21 for now, and use just
jk/check-repository-format as the base.

Does this seem reasonable to you?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 03/17] index-helper: new daemon for caching index and related stuff

2016-03-29 Thread David Turner
On Tue, 2016-03-29 at 09:31 +0700, Duy Nguyen wrote:
> On Sat, Mar 19, 2016 at 8:04 AM, David Turner <
> dtur...@twopensource.com> wrote:
> > From: Nguyễn Thái Ngọc Duy 
> > 
> > Instead of reading the index from disk and worrying about disk
> > corruption, the index is cached in memory (memory bit-flips happen
> > too, but hopefully less often). The result is faster read. Read
> > time
> > is reduced by 70%.
> > 
> > The biggest gain is not having to verify the trailing SHA-1, which
> > takes lots of time especially on large index files. But this also
> > opens doors for further optimiztions:
> > 
> >  - we could create an in-memory format that's essentially the
> > memory
> >dump of the index to eliminate most of parsing/allocation
> >overhead. The mmap'd memory can be used straight away.
> > Experiment
> >[1] shows we could reduce read time by 88%.
> 
> This reference [1] is missing (even in my old version). I believe
> it's
> http://thread.gmane.org/gmane.comp.version-control.git/247268/focus=2
> 48771,
> comparing 256.442ms in that mail with v4 number, 2245.113ms in 0/8
> mail from the same thread.
> 
> > Git can poke the daemon via named pipes to tell it to refresh the
> > index cache, or to keep it alive some more minutes. It can't give
> > any
> > real index data directly to the daemon. Real data goes to disk
> > first,
> > then the daemon reads and verifies it from there. Poking only
> > happens
> > for $GIT_DIR/index, not temporary index files.
> 
> I think we should go with unix socket on *nix platform instead of
> named pipe. UNIX named pipe only allows one communication channel at
> a
> time. Windows named pipe is different and allows multiple clients,
> which is the same as unix socket.
> 
> 
> > $GIT_DIR/index-helper.pipe is the named pipe for daemon process.
> > The
> > daemon reads from the pipe and executes commands.  Commands that
> > need
> > replies from the daemon will have to open their own pipe, since a
> > named pipe should only have one reader.  Unix domain sockets don't
> > have this problem, but are less portable.
> 
> Hm..NO_UNIX_SOCKETS is only set for Windows in config.mak.uname and
> Windows will need to be specially tailored anyway, I think unix
> socket
> would be more elegant.

One annoyance with unix sockets is that they must have short paths
(UNIX_PATH_MAX -- about a hundred characters).  This basically means
they should be in $TMPDIR.  I guess we could go back to pid files in
$GIT_DIR, and then have a socket named after the pid.  There's also
some security issues, but it actually looks like there's a simple
enough workaround for them.

I'll change this, but it might take a bit as I'm busy with other things
this week.

> > +static void share_index(struct index_state *istate, struct shm
> > *is)
> > +{
> > +   void *new_mmap;
> > +   if (istate->mmap_size <= 20 ||
> > +   hashcmp(istate->sha1,
> > +   (unsigned char *)istate->mmap + istate
> > ->mmap_size - 20) ||
> > +   !hashcmp(istate->sha1, is->sha1) ||
> > +   git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, istate
> > ->mmap_size,
> > +   &new_mmap, PROT_READ | PROT_WRITE,
> > MAP_SHARED,
> > +   "git-index-%s", sha1_to_hex(istate->sha1))
> > < 0)
> > +   return;
> > +
> > +   release_index_shm(is);
> > +   is->size = istate->mmap_size;
> > +   is->shm = new_mmap;
> > +   hashcpy(is->sha1, istate->sha1);
> > +   memcpy(new_mmap, istate->mmap, istate->mmap_size - 20);
> > +
> > +   /*
> > +* The trailing hash must be written last after everything
> > is
> > +* written. It's the indication that the shared memory is
> > now
> > +* ready.
> > +*/
> > +   hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20,
> > is->sha1);
> 
> You commented here [1] a long time ago about memory barrier. I'm not
> entirely sure if compilers dare to reorder function calls, but when
> hashcpy is inlined and memcpy is builtin, I suppose that's
> possible...
> 
> [1] http://article.gmane.org/gmane.comp.version-control.git/280729

Oh, right.  Will fix.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/19] index-helper, watchman

2016-03-29 Thread David Turner
On Tue, 2016-03-29 at 19:09 +0200, Torsten Bögershausen wrote:
> On 2016-03-09 19.36, David Turner wrote:
> > This is a rebase and extension of Duy's work on git index-helper
> > and
> > watchman support.
> > 
> Somewhere we need to tweak something:
> t7900 do all fail under Mac OS, because the index-helper is not
> build.
> 
> The best would be to have a precondition when running the tests ?
> 
> t7900-index-helper.sh   not ok 1 - index-helper smoke test
> t7900-index-helper.sh   not ok 2 - index-helper creates usable pipe
> file and can
> be killed
> t7900-index-helper.sh   not ok 3 - index-helper will not start if
> already running
> t7900-index-helper.sh   not ok 4 - index-helper is quiet with -
> -autorun
> t7900-index-helper.sh   not ok 5 - index-helper autorun works
> 
> 
> The other thing is to enable SHM on other platforms, but first things
> first.

Will fix, thanks.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 08/17] read-cache: invalidate untracked cache data when reading WAMA

2016-03-29 Thread David Turner
On Tue, 2016-03-29 at 09:50 +0700, Duy Nguyen wrote:
> On Sat, Mar 19, 2016 at 8:04 AM, David Turner <
> dtur...@twopensource.com> wrote:
> > @@ -1407,10 +1472,24 @@ static int read_watchman_ext(struct
> > index_state *istate, const void *data,
> > ewah_each_bit(bitmap, mark_no_watchman, istate);
> > ewah_free(bitmap);
> > 
> > -   /*
> > -* TODO: update the untracked cache from the untracked data
> > in this
> > -* extension.
> > -*/
> > +   if (istate->untracked && istate->untracked->root) {
> > +   int i;
> > +   const char *untracked;
> > +
> > +   untracked = data + len + 8 + bitmap_size;
> > +   for (i = 0; i < untracked_nr; ++i) {
> > +   int len = strlen(untracked);
> > +   string_list_append(&istate->untracked
> > ->invalid_untracked,
> > +  untracked);
> > +   untracked += len + 1;
> > +   }
> > +
> > +   for_each_string_list(&istate->untracked
> > ->invalid_untracked,
> > +mark_untracked_invalid, istate
> > ->untracked);
> 
> I think it's a bit early to invalidate untracked cache here. We can
> do
> that in refresh_by_watchman() in 10/17, where ce_mark_uptodate() to
> prevent lstat() is also done.

Will move/squash

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33

2016-03-29 Thread David Turner
On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote:
> On 03/24/2016 07:47 AM, David Turner wrote:
> > [...]
> > I incorporated your changes into the lmdb backend.  To make merging
> > later more convenient, I rebased on top of pu -- I think this
> > mainly
> > depends on jk/check-repository-format, but I also included some
> > fixes
> > for a couple of tests that had been changed by other patches.
> 
> I think rebasing changes on top of pu is counterproductive. I believe
> that Junio had extra work rebasing your earlier series onto a merge
> of
> the minimum number of topics that it really depended on. There is no
> way
> that he could merge the branch in this form because it would imply
> merging all of pu.
> 
> See the zeroth section of SubmittingPatches [1] for the guidelines.

I'm a bit confused because 
[PATCH 18/21] get_default_remote(): remove unneeded flag variable

doesn't do anything on master -- it depends on some patch in pu.  And
we definitely want to pick up jk/check-repository-format (which doesn't
include whatever 18/21 depends on).

So what do you think our base should be?

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/2] refs: add a new function set_worktree_head_symref

2016-03-28 Thread David Turner
On Mon, 2016-03-28 at 10:48 -0700, Junio C Hamano wrote:
> Kazuki Yamaguchi  writes:
> 
> > Add a new function set_worktree_head_symref, to update HEAD symref
> > for
> > the specified worktree.
> > 
> > To update HEAD of a linked working tree,
> > create_symref("worktrees/$work_tree/HEAD", "refs/heads/$branch",
> > msg)
> > could be used. However when it comes to updating HEAD of the main
> > working tree, it is unusable because it uses $GIT_DIR for
> > worktree-specific symrefs (HEAD).
> > 
> > The new function takes git_dir (real directory) as an argument, and
> > updates HEAD of the working tree. This function will be used when
> > renaming a branch.
> > 
> > Signed-off-by: Kazuki Yamaguchi 
> > ---
> 
> I suspect that this helper function should be in the common layer
> (refs.c) to be redirected to specific backend(s).
> 
> David & Michael, what do you think?

HEAD is a per-worktree ref, so it's always handled by the files
backend.  So this looks fine to me.

> > +/*
> > + * Update HEAD of the specified gitdir.
> > + * Similar to create_symref("relative-git-dir/HEAD", target,
> > NULL), but this is
> > + * able to update the main working tree's HEAD wherever $GIT_DIR
> > points to.


nit: "able to update the main working tree's HEAD regardless of where
GIT_DIR points to" would be clearer.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 09/33] refs: reduce the visibility of do_for_each_ref()

2016-03-24 Thread David Turner
On Thu, 2016-03-24 at 08:07 +0100, Michael Haggerty wrote:
> > +/*
> > + * The common backend for the for_each_*ref* functions
> > + */
> > +static int do_for_each_ref(const char *submodule, const char
> > *base,
> > +   each_ref_fn fn, int trim, int flags,
> > +   void *cb_data)
> 
> The two lines above are indented incorrectly.

Fixed, thanks.

> > -
> > -int do_for_each_ref(const char *submodule, const char *base,
> > -   each_ref_fn fn, int trim, int flags,
> > -   void *cb_data)
> > -{
> > -   return the_refs_backend->do_for_each_ref(submodule, base,
> > fn, trim,
> > -flags, cb_data);
> > -}
> 
> Nit: in the previous patch, please put the function where you want it
> so
> that you don't have to move it in this patch.
> 
> > [...]
> 
> Michael

Ok.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 14/33] refs: add methods to init refs db

2016-03-24 Thread David Turner
On Thu, 2016-03-24 at 08:28 +0100, Michael Haggerty wrote:
> > if (shared_repository) {
> > adjust_shared_perm(get_git_dir());
> > -   adjust_shared_perm(git_path_buf(&buf, "refs"));
> 
> Given that this function is creating the "refs" directory, it seems
> like
> adjust_shared_perm() should be called for it here, too (rather than
> in
> the backend-specific code).


Good point.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 09/17] Add watchman support to reduce index refresh cost

2016-03-24 Thread David Turner
On Thu, 2016-03-24 at 09:47 -0400, Jeff Hostetler wrote:
> I'm seeing wm->name have value ".git" rather than ".git/" on Linux.
> 
> 
> On 03/18/2016 09:04 PM, David Turner wrote:
> > +   if (!strncmp(wm->name, ".git/", 5) ||
> > +   strstr(wm->name, "/.git/"))
> > +   continue;
> 

Thanks.  I don't think I considered the case of .git itself being
modified, although clearly files are occasionally created/deleted in
there.  I'm going to fix this by ignoring all directories, since we'll
capture untracked-cache changes via files in those dirs.


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33

2016-03-23 Thread David Turner
On Wed, 2016-03-23 at 11:04 +0100, Michael Haggerty wrote:
> Patch 04/33 in David Turner's refs-backend-lmdb series v7 [1] did way
> too much in a single patch, and in fact got a few minor things wrong.
> Instead of that patch, I suggest this patch series, which
> ...

LGTM.  I think I would squash these patches:
  fsck_head_link(): remove unneeded flag variable
  cmd_merge(): remove unneeded flag variable
  get_default_remote(): remove unneeded flag variable
  checkout_paths(): remove unneeded flag variable

But that's up to you.

I incorporated your changes into the lmdb backend.  To make merging
later more convenient, I rebased on top of pu -- I think this mainly
depends on jk/check-repository-format, but I also included some fixes
for a couple of tests that had been changed by other patches.

The current version can be found here:

https://github.com/dturner-tw/git/tree/dturner/pluggable-backends

I won't resend the full patchset to the list until I hear back on the
rest of the review.

It seems like maybe we should now split this into two patchsets:
everything up to and including "refs: move resolve_ref_unsafe into
common code" does not depend on the backend structure and could go in
earlier.  If you agree, we could send that first series and get it in,
hopefully reducing later merge conflicts.  
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] read-cache: increase write buffer size

2016-03-21 Thread David Turner
On Sat, 2016-03-19 at 18:18 +0700, Duy Nguyen wrote:
> On Sat, Mar 19, 2016 at 8:19 AM, David Turner <
> dtur...@twopensource.com> wrote:
> > Each write() has syscall overhead, and writing a large index
> > entails
> > many such calls.  A larger write buffer reduces the overhead,
> > leading to increased performance.
> > 
> > On my repo, which has an index size of 30m, this saves about 10ms
> > of
> > time writing the index.
> 
> I wonder if split-index does not work as intended. Because if it
> does,
> you should rarely need to write that such big index files (or is this
> 30mb the small part and the base index is even bigger?).
> 
> But I agree with Torsten that we should make this configurable,
> preferably via config file, if not we can still move this define back
> in Makefile, overridable using config.mak

Oh, right, split index.  Yeah, I should just turn that on.  Nevermind.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 18/19] index-helper: autorun

2016-03-19 Thread David Turner
On Thu, 2016-03-17 at 15:43 +0100, Johannes Schindelin wrote:
> Hi Duy,
> 
> On Thu, 17 Mar 2016, Duy Nguyen wrote:
> 
> > On Thu, Mar 17, 2016 at 1:27 AM, Johannes Schindelin
> >  wrote:
> > > I am much more concerned about concurrent accesses and the
> > > communication
> > > between the Git processes and the index-helper. Writing to the
> > > .pid file
> > > sounds very fragile to me, in particular when multiple processes
> > > can poke
> > > the index-helper in succession and some readers are unaware that
> > > the index
> > > is being refreshed.
> > 
> > It's not that bad.
> 
> Well, the way I read the code it is possible that:
> 
> 1. Git process 1 starts, reading the index
> 2. Git process 2 starts, poking the index-helper
> 3. The index-helper updates the .pid file (why not set a bit in the
> shared
>memory?) with a prefix "W"
> 4. Git process 2 reads the .pid file and waits for the "W" to go away
>(what if index-helper is not fast enough to write the "W"?)
> 5. Git process 1 access the index, happily oblivious that it is being
>updated and the data is in an inconsistent state


That's not quite how I understand it.  It's more like MVCC.  Writes to
the index go to a new index file.  Index files are identified by their
SHA.  Reads from the index go into a new shm, identified by SHA.  

The "W" is set only once -- it just means "this index helper knows how
to talk to watchman".  It's a compile-time option.

(I'm going to change this anyway when I switch to named pipes).

The watchman data is shared independently; if it's not ready in time
(whatever that means -- it's 1s in the current code), then read-cache
should fall back to brute-force checking every file.

> > We should have protection in place to deal with this and fall back
> > to
> > reading directly from file when things get suspicious.
> 
> I really want to prevent that. I know of use cases where the index
> weighs
> 300MB, and falling back to reading it directly *really* hurts.





> > But I agree that sending UNIX signals (or PostMessage) is not
> > really
> > good communication.
> 
> Yeah, I really would like two-way communication instead. Named pipes?
> They'd have the advantage that you could use the full path to the
> index as
> identifier.
> 
> The way I read the current code, we would actually create a different
> shared memory every time the index changes because its checksum is
> part of
> the shared memory's "path"...
> 
> Ciao,
> Dscho
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 19/19] hack: watchman/untracked cache mashup

2016-03-19 Thread David Turner
On Thu, 2016-03-17 at 20:06 +0700, Duy Nguyen wrote:
> On Thu, Mar 17, 2016 at 7:56 AM, David Turner <
> dtur...@twopensource.com> wrote:
> > > So if we detect an updated file that's not in the index, we are
> > > prepared to invalidate that path, correct? We may invalidate more
> > > than
> > > necessary if that's true. Imagine a.o is already ignored. If it's
> > > rebuilt, we should not need to update untracked cache.
> > 
> > Yes, that's true.  But it would be true with the mtime system too.
> > This
> > is no worse, even if it's no better.  In-tree builds are a hard
> > case to
> > support, and I'm totally OK with a system that encourages out-of
> > -tree
> > builds.
> > 
> > We could check if it's ignored, but then if someone changes their
> > gitignore, we could be wrong.
> > 
> > Or we could suggest that people make their watchmanignore match
> > their
> > gitignore.
> 
> So your purpose is to reduce stat() on those "quiet" directories?

Yes.  Twitter's repo is perhaps somewhat unusual in that it has a very
"bushy" directory structure -- tens of thousands of directories.


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 18/19] index-helper: autorun

2016-03-19 Thread David Turner
On Fri, 2016-03-18 at 14:44 +0700, Duy Nguyen wrote:
> > So yeah, this is the challenge: to make Git work at real-world
> > scale
> > (didn't we hear a lot about this at the latest Git Merge?)
> 
> I'm all for making Junio cry by using Git for what it is (or was) not
> intended for, but this seems too much. A repo about 500k files or
> less, I think I can deal with,  not those in million range.

Sad news: Facebook's repo was already getting towards that size three
years ago:
http://comments.gmane.org/gmane.comp.version-control.git/189776


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] read-cache: increase write buffer size

2016-03-19 Thread David Turner
Each write() has syscall overhead, and writing a large index entails
many such calls.  A larger write buffer reduces the overhead,
leading to increased performance.

On my repo, which has an index size of 30m, this saves about 10ms of
time writing the index.

Signed-off-by: David Turner 
---
 read-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index d9fb78b..09ebe08 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1714,7 +1714,7 @@ int unmerged_index(const struct index_state *istate)
return 0;
 }
 
-#define WRITE_BUFFER_SIZE 8192
+#define WRITE_BUFFER_SIZE 131072
 static unsigned char write_buffer[WRITE_BUFFER_SIZE];
 static unsigned long write_buffer_len;
 
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 19/19] hack: watchman/untracked cache mashup

2016-03-19 Thread David Turner
On Tue, 2016-03-15 at 19:31 +0700, Duy Nguyen wrote:
> On Thu, Mar 10, 2016 at 1:36 AM, David Turner <
> dtur...@twopensource.com> wrote:
> >  static struct watchman_query *make_query(const char *last_update)
> > @@ -60,8 +61,24 @@ static void update_index(struct index_state
> > *istate,
> > continue;
> > 
> > pos = index_name_pos(istate, wm->name, strlen(wm
> > ->name));
> > -   if (pos < 0)
> > +   if (pos < 0) {
> > +   if (istate->untracked) {
> > +   char *name = xstrdup(wm->name);
> > +   char *dname = dirname(name);
> > +
> > +   /*
> > +* dirname() returns '.' for the
> > root,
> > +* but we call it ''.
> > +*/
> > +   if (dname[0] == '.' && dname[1] ==
> > 0)
> > +   string_list_append(&istate
> > ->untracked->invalid_untracked, "");
> > +   else
> > +   string_list_append(&istate
> > ->untracked->invalid_untracked,
> > +  dname);
> > +   free(name);
> > +   }
> > continue;
> > +   }
> 
> So if we detect an updated file that's not in the index, we are
> prepared to invalidate that path, correct? We may invalidate more
> than
> necessary if that's true. Imagine a.o is already ignored. If it's
> rebuilt, we should not need to update untracked cache.

Yes, that's true.  But it would be true with the mtime system too. This
is no worse, even if it's no better.  In-tree builds are a hard case to
support, and I'm totally OK with a system that encourages out-of-tree
builds.

We could check if it's ignored, but then if someone changes their
gitignore, we could be wrong.

Or we could suggest that people make their watchmanignore match their
gitignore.


> What I had in mind (and argued with watchman devs a bit [1]) was
> maintain the file list of each clock and compare the file list of
> different clocks to figure out what files are added or deleted. Then
> we can generate invalidation list more accurately. We need to keep at
> least one file list corresponds to  the clock saved in the index.
> When
> we get the refresh request, we get a new file list (with new clock),
> do a diff then save the invalidation list as an extension. Once we
> notice that new clock is written back in index, we can discard older
> file lists. In theory we should not need to keep too many file lists,
> so even if one list is big, it should not be a big problem.
> 
> I have a note with me about race conditions with this approach, but I
> haven't remembered exactly why or how yet.. My recent thoughts about
> it though, are race conditions when you do "git status" is probably
> tolerable. After all if you're doing some I/O when you do git-status,
> you're guaranteed to miss some updates.
> 
> [1] https://github.com/facebook/watchman/issues/65

I think it would be possible to just check the UNTR extension and only
add a dir to it if that dir doesn't already contain (untracked) the
file that's being modified.  Or is that also racy?

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 18/19] index-helper: autorun

2016-03-19 Thread David Turner
On Tue, 2016-03-15 at 15:26 +0100, Johannes Schindelin wrote:
> Hi Duy,
> 
> On Tue, 15 Mar 2016, Duy Nguyen wrote:
> 
> > On Thu, Mar 10, 2016 at 1:36 AM, David Turner <
> > dtur...@twopensource.com> wrote:
> > > Introduce a new config option, indexhelper.autorun, to
> > > automatically
> > > run git index-helper before starting up a builtin git command. 
> > >  This
> > > enables users to keep index-helper running without manual
> > > intervention.
> > 
> > This could be a problem on Windows because "index-helper --detach"
> > does not work there. I have no idea how "daemons" are managed on
> > Windows and not sure if our design is still good when such a
> > "daemon"
> > is added on Windows. So I'm pulling Johannes in for his opinions.
> > 
> > Background for Johannes. We're adding "git index-helper" daemon
> > (one
> > per repo) to cache the index in memory to speed up index load time
> > (and in future probably name-hash too, I think it's also more often
> > used on Windows because of case-insensitive fs). It also enables
> > watchman (on Windows) for faster refresh. This patch allows to
> > start
> > the daemon automatically if it's not running. But I don't know it
> > will
> > work ok on Windows.
> > 
> > Assuming that "index-helper" service has to be installed and
> > started
> > from system, there can only be one service running right? This
> > clashes
> > with the per-repo daemon design... I think it can stilf work, if
> > the
> > main service just spawns new process, one for each repo. But again
> > I'm
> > not sure.
> 
> If we want to run the process as a Windows service, you are correct,
> there
> really can only be one. Worse: it runs with admin privileges.
> 
> But why not just keep it running as a detached process? We can run
> those
> on Windows, and if we're opening a named pipe whose name reveals the
> one-to-one mapping with the index in question, I think we are fine
> (read:
> we can detect whether the process is running already).
> 
> We can even tell those processes to have a timeout, or to react to
> other
> system events.
> 
> Please note that I am *very* interested in this feature (speeding up
> index
> operations).

I don't understand what a "detached process" is on Windows (I have
never done any real Windows programming). Does that mean "call
daemonize() and it'll take care of it?"  Or something else?  Or should
I just not worry about it and let you take care of it?

Also, I'll figure out how to switch to named pipes. 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 12/17] unpack-trees: preserve index extensions

2016-03-18 Thread David Turner
Make git checkout (and other unpack_tree operations) preserve the
untracked cache and watchman status. This is valuable for two reasons:

1. Often, an unpack_tree operation will not touch large parts of the
working tree, and thus most of the untracked cache will continue to be
valid.

2. Even if the untracked cache were entirely invalidated by such an
operation, the user has signaled their intention to have such a cache,
and we don't want to throw it away.

The same logic applies to the watchman state.

Signed-off-by: David Turner 
---
 cache.h   |  1 +
 read-cache.c  |  8 
 t/t7063-status-untracked-cache.sh | 23 +++
 t/test-lib-functions.sh   |  4 
 unpack-trees.c|  1 +
 5 files changed, 37 insertions(+)

diff --git a/cache.h b/cache.h
index 9fa339a..4ae7dd0 100644
--- a/cache.h
+++ b/cache.h
@@ -572,6 +572,7 @@ extern void write_watchman_ext(struct strbuf *sb, struct 
index_state* istate);
 #define REFRESH_DAEMON (1 << 2)
 extern int write_locked_index(struct index_state *, struct lock_file *lock, 
unsigned flags);
 extern int discard_index(struct index_state *);
+extern void move_index_extensions(struct index_state *dst, struct index_state 
*src);
 extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
 extern int index_dir_exists(struct index_state *istate, const char *name, int 
namelen);
diff --git a/read-cache.c b/read-cache.c
index 8e886d1..c141fec 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2742,3 +2742,11 @@ void stat_validity_update(struct stat_validity *sv, int 
fd)
fill_stat_data(sv->sd, &st);
}
 }
+
+void move_index_extensions(struct index_state *dst, struct index_state *src)
+{
+   dst->untracked = src->untracked;
+   src->untracked = NULL;
+   dst->last_update = src->last_update;
+   src->last_update = NULL;
+}
diff --git a/t/t7063-status-untracked-cache.sh 
b/t/t7063-status-untracked-cache.sh
index a971884..a2c8535 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -646,4 +646,27 @@ test_expect_success 'test ident field is working' '
test_cmp ../expect ../err
 '
 
+test_expect_success 'untracked cache survives a checkout' '
+   git commit --allow-empty -m empty &&
+   test-dump-untracked-cache >../before &&
+   test_when_finished  "git checkout master" &&
+   git checkout -b other_branch &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after &&
+   test_commit test &&
+   test-dump-untracked-cache >../before &&
+   git checkout master &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after
+'
+
+
+test_expect_success 'untracked cache survives a commit' '
+   test-dump-untracked-cache >../before &&
+   git add done/two &&
+   git commit -m commit &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after
+'
+
 test_done
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8d99eb3..e974b5b 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -186,6 +186,10 @@ test_commit () {
test_tick
fi &&
git commit $signoff -m "$1" &&
+   if [ "$(git config core.bare)" = false ]
+   then
+   git update-index --force-untracked-cache
+   fi
git tag "${4:-$1}"
 }
 
diff --git a/unpack-trees.c b/unpack-trees.c
index 9f55cc2..fc90eb3 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1215,6 +1215,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, 
struct unpack_trees_options
  WRITE_TREE_SILENT |
  WRITE_TREE_REPAIR);
}
+   move_index_extensions(&o->result, o->dst_index);
discard_index(o->dst_index);
*o->dst_index = o->result;
} else {
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 15/17] index-helper: autorun mode

2016-03-18 Thread David Turner
Soon, we'll want to automatically start index-helper, so we need
a mode that silently exists if it can't start up (either because
it's not in a git dir, or because another one is already running).

Signed-off-by: David Turner 
---
 index-helper.c  | 29 +++--
 t/t7900-index-helper.sh |  8 
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/index-helper.c b/index-helper.c
index 7362abb..ab96ded 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -368,8 +368,9 @@ done:
 int main(int argc, char **argv)
 {
const char *prefix;
-   int idle_in_seconds = 600, detach = 0, kill = 0;
+   int idle_in_seconds = 600, detach = 0, kill = 0, autorun = 0;
int fd;
+   int nongit;
struct strbuf pipe_path = STRBUF_INIT;
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_seconds,
@@ -378,6 +379,7 @@ int main(int argc, char **argv)
 "verify shared memory after creating"),
OPT_BOOL(0, "detach", &detach, "detach the process"),
OPT_BOOL(0, "kill", &kill, "request that existing index helper 
processes exit"),
+   OPT_BOOL(0, "autorun", &autorun, "this is an automatic run of 
git index-helper, so certain errors can be solved by silently exiting"),
OPT_END()
};
 
@@ -387,7 +389,14 @@ int main(int argc, char **argv)
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(usage_text, options);
 
-   prefix = setup_git_directory();
+   prefix = setup_git_directory_gently(&nongit);
+   if (nongit) {
+   if (autorun)
+   exit(0);
+   else
+   die("Not a git repository");
+   }
+
if (parse_options(argc, (const char **)argv, prefix,
  options, usage_text, 0))
die(_("too many arguments"));
@@ -402,10 +411,18 @@ int main(int argc, char **argv)
 
/* check that no other copy is running */
fd = open(pipe_path.buf, O_RDONLY | O_NONBLOCK);
-   if (fd > 0)
-   die(_("Already running"));
-   if (errno != ENXIO && errno != ENOENT)
-   die_errno(_("Unexpected error checking pipe"));
+   if (fd > 0) {
+   if (autorun)
+   return 0;
+   else
+   die(_("Already running"));
+   }
+   if (errno != ENXIO && errno != ENOENT) {
+   if (autorun)
+   return 0;
+   else
+   die_errno(_("Unexpected error checking pipe"));
+   }
 
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index ce0cc27..6020fe4 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -33,4 +33,12 @@ test_expect_success 'index-helper will not start if already 
running' '
grep "Already running" err
 '
 
+test_expect_success 'index-helper is quiet with --autorun' '
+   test_when_finished "git index-helper --kill" &&
+   git index-helper --kill &&
+   git index-helper --detach &&
+   test -p .git/index-helper.pipe &&
+   git index-helper --autorun
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 17/17] read-cache: config for waiting for index-helper

2016-03-18 Thread David Turner
When we poke index-helper, and index-helper is using watchman, we want
to wait for a response (showing that the watchman extension shm has
been prepared).  If it's not using watchman, we don't.

So add a new config, core.waitforindexhelper, with sensible defaults, to
configure this behavior.

Signed-off-by: David Turner 
---
 cache.h   | 1 +
 config.c  | 5 +
 environment.c | 5 +
 read-cache.c  | 5 -
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 4ae7dd0..f8b8dbf 100644
--- a/cache.h
+++ b/cache.h
@@ -692,6 +692,7 @@ extern char *git_replace_ref_base;
 extern int fsync_object_files;
 extern int core_preload_index;
 extern int core_watchman_sync_timeout;
+extern int wait_for_index_helper;
 extern int core_apply_sparse_checkout;
 extern int precomposed_unicode;
 extern int protect_hfs;
diff --git a/config.c b/config.c
index e6dc141..5f1b8bd 100644
--- a/config.c
+++ b/config.c
@@ -887,6 +887,11 @@ static int git_default_core_config(const char *var, const 
char *value)
return 0;
}
 
+   if (!strcmp(var, "core.waitforindexhelper")) {
+   wait_for_index_helper = git_config_bool(var, value);
+   return 0;
+   }
+
if (!strcmp(var, "core.createobject")) {
if (!strcmp(value, "rename"))
object_creation_mode = OBJECT_CREATION_USES_RENAMES;
diff --git a/environment.c b/environment.c
index 35e03c7..c7fb0a9 100644
--- a/environment.c
+++ b/environment.c
@@ -95,6 +95,11 @@ int core_preload_index = 1;
 int ignore_untracked_cache_config;
 
 int core_watchman_sync_timeout = 300;
+#ifdef USE_WATCHMAN
+int wait_for_index_helper = 1;
+#else
+int wait_for_index_helper = 0;
+#endif
 
 
 /* This is set by setup_git_dir_gently() and/or git_default_config() */
diff --git a/read-cache.c b/read-cache.c
index c141fec..7fd9b2c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1821,7 +1821,10 @@ static int poke_daemon(struct index_state *istate,
if (refresh_cache) {
ret = write_in_full(fd, "refresh", 8) == 8;
} else {
-   ret = poke_and_wait_for_reply(fd);
+   if (wait_for_index_helper)
+   ret = poke_and_wait_for_reply(fd);
+   else
+   ret = write_in_full(fd, "poke", 5) == 5;
}
 
close(fd);
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 14/17] index-helper: don't run if already running

2016-03-18 Thread David Turner
Signed-off-by: David Turner 
---
 index-helper.c  | 7 +++
 t/t7900-index-helper.sh | 9 +
 2 files changed, 16 insertions(+)

diff --git a/index-helper.c b/index-helper.c
index ffa3c2a..7362abb 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -400,6 +400,13 @@ int main(int argc, char **argv)
return 0;
}
 
+   /* check that no other copy is running */
+   fd = open(pipe_path.buf, O_RDONLY | O_NONBLOCK);
+   if (fd > 0)
+   die(_("Already running"));
+   if (errno != ENXIO && errno != ENOENT)
+   die_errno(_("Unexpected error checking pipe"));
+
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
 
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index 14b5a6c..ce0cc27 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -24,4 +24,13 @@ test_expect_success 'index-helper creates usable pipe file 
and can be killed' '
test_path_is_missing .git/index-helper.pipe
 '
 
+test_expect_success 'index-helper will not start if already running' '
+   test_when_finished "git index-helper --kill" &&
+   git index-helper --detach &&
+   test -p .git/index-helper.pipe &&
+   test_must_fail git index-helper 2>err &&
+   test -p .git/index-helper.pipe &&
+   grep "Already running" err
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 06/17] index-helper: add --detach

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

We detach after creating and opening the named pipe, because otherwise
we might return control to the shell before index-helper is ready to
accept commands.  This might lead to flaky tests.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git-index-helper.txt |  3 +++
 index-helper.c | 11 +--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index 7afc5c9..b858a8d 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -31,6 +31,9 @@ OPTIONS
for reading an index, but because it will happen in the
background, it's not noticable. `--strict` is enabled by default.
 
+--detach::
+   Detach from the shell.
+
 NOTES
 -
 $GIT_DIR/index-helper.pipe is a named pipe that the daemon reads
diff --git a/index-helper.c b/index-helper.c
index 8d221e0..a854ed8 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -15,7 +15,7 @@ struct shm {
 
 static struct shm shm_index;
 static struct shm shm_base_index;
-static int to_verify = 1;
+static int daemonized, to_verify = 1;
 
 static void release_index_shm(struct shm *is)
 {
@@ -34,6 +34,8 @@ static void cleanup_shm(void)
 
 static void cleanup(void)
 {
+   if (daemonized)
+   return;
unlink(git_path("index-helper.pipe"));
cleanup_shm();
 }
@@ -229,7 +231,7 @@ static const char * const usage_text[] = {
 int main(int argc, char **argv)
 {
const char *prefix;
-   int idle_in_seconds = 600;
+   int idle_in_seconds = 600, detach = 0;
int fd;
struct strbuf pipe_path = STRBUF_INIT;
struct option options[] = {
@@ -237,6 +239,7 @@ int main(int argc, char **argv)
N_("exit if not used after some seconds")),
OPT_BOOL(0, "strict", &to_verify,
 "verify shared memory after creating"),
+   OPT_BOOL(0, "detach", &detach, "detach the process"),
OPT_END()
};
 
@@ -258,6 +261,10 @@ int main(int argc, char **argv)
fd = setup_pipe(pipe_path.buf);
if (fd < 0)
die_errno("Could not set up index-helper.pipe");
+
+   if (detach && daemonize(&daemonized))
+   die_errno("unable to detach");
+
loop(fd, idle_in_seconds);
return 0;
 }
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 05/17] daemonize(): set a flag before exiting the main process

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

This allows signal handlers and atexit functions to realize this
situation and not clean up.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/gc.c | 2 +-
 cache.h  | 2 +-
 daemon.c | 2 +-
 setup.c  | 4 +++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index c583aad..37180de 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -385,7 +385,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 * failure to daemonize is ok, we'll continue
 * in foreground
 */
-   daemonized = !daemonize();
+   daemonized = !daemonize(NULL);
}
} else
add_repack_all_option();
diff --git a/cache.h b/cache.h
index 5805962..d386722 100644
--- a/cache.h
+++ b/cache.h
@@ -530,7 +530,7 @@ extern int set_git_dir_init(const char *git_dir, const char 
*real_git_dir, int);
 extern int init_db(const char *template_dir, unsigned int flags);
 
 extern void sanitize_stdfds(void);
-extern int daemonize(void);
+extern int daemonize(int *);
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
diff --git a/daemon.c b/daemon.c
index 8d45c33..a5cf954 100644
--- a/daemon.c
+++ b/daemon.c
@@ -1365,7 +1365,7 @@ int main(int argc, char **argv)
return execute();
 
if (detach) {
-   if (daemonize())
+   if (daemonize(NULL))
die("--detach not supported on this platform");
} else
sanitize_stdfds();
diff --git a/setup.c b/setup.c
index de1a2a7..9adf13f 100644
--- a/setup.c
+++ b/setup.c
@@ -1017,7 +1017,7 @@ void sanitize_stdfds(void)
close(fd);
 }
 
-int daemonize(void)
+int daemonize(int *daemonized)
 {
 #ifdef NO_POSIX_GOODIES
errno = ENOSYS;
@@ -1029,6 +1029,8 @@ int daemonize(void)
case -1:
die_errno("fork failed");
default:
+   if (daemonized)
+   *daemonized = 1;
exit(0);
}
if (setsid() == -1)
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 03/17] index-helper: new daemon for caching index and related stuff

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Instead of reading the index from disk and worrying about disk
corruption, the index is cached in memory (memory bit-flips happen
too, but hopefully less often). The result is faster read. Read time
is reduced by 70%.

The biggest gain is not having to verify the trailing SHA-1, which
takes lots of time especially on large index files. But this also
opens doors for further optimiztions:

 - we could create an in-memory format that's essentially the memory
   dump of the index to eliminate most of parsing/allocation
   overhead. The mmap'd memory can be used straight away. Experiment
   [1] shows we could reduce read time by 88%.

 - we could cache non-index info such as name hash

The shared memory's name folows the template "git--"
where  is the trailing SHA-1 of the index file.  is
"index" for cached index files (and may be "name-hash" for name-hash
cache). If such shared memory exists, it contains the same index
content as on disk. The content is already validated by the daemon and
git won't validate it again (except comparing the trailing SHA-1s).

We keep this daemon's logic as thin as possible. The "brain" stays in
git. So the daemon can read and validate stuff, but that's all it's
allowed to do. It does not add/create new information. It doesn't even
accept direct updates from git.

Git can poke the daemon via named pipes to tell it to refresh the
index cache, or to keep it alive some more minutes. It can't give any
real index data directly to the daemon. Real data goes to disk first,
then the daemon reads and verifies it from there. Poking only happens
for $GIT_DIR/index, not temporary index files.

$GIT_DIR/index-helper.pipe is the named pipe for daemon process. The
daemon reads from the pipe and executes commands.  Commands that need
replies from the daemon will have to open their own pipe, since a
named pipe should only have one reader.  Unix domain sockets don't
have this problem, but are less portable.

index-helper requires POSIX realtime extension. POSIX shm interface
however is abstracted away so that Windows support can be added later.

On webkit.git with index format v2, duplicating 8 times to 1.4m
entries and 200MB in size:

(vanilla)  0.986986364 s: read_index_from .git/index
(index-helper) 0.267850279 s: read_index_from .git/index

Interestingly with index v4, we get less out of index-helper. It makes
sense as v4 requires more processing after loading the index:

(vanilla)  0.72249 s: read_index_from .git/index
(index-helper) 0.302741500 s: read_index_from .git/index

Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: David Turner 
---
 .gitignore |   1 +
 Documentation/git-index-helper.txt |  46 
 Makefile   |   9 ++
 cache.h|   3 +
 config.mak.uname   |   1 +
 git-compat-util.h  |   1 +
 index-helper.c | 215 +
 read-cache.c   |  99 +++--
 shm.c  |  67 
 shm.h  |  23 
 t/t7900-index-helper.sh|  18 
 11 files changed, 474 insertions(+), 9 deletions(-)
 create mode 100644 Documentation/git-index-helper.txt
 create mode 100644 index-helper.c
 create mode 100644 shm.c
 create mode 100644 shm.h
 create mode 100755 t/t7900-index-helper.sh

diff --git a/.gitignore b/.gitignore
index 5087ce1..b92f122 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /git-http-fetch
 /git-http-push
 /git-imap-send
+/git-index-helper
 /git-index-pack
 /git-init
 /git-init-db
diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
new file mode 100644
index 000..59a5abc
--- /dev/null
+++ b/Documentation/git-index-helper.txt
@@ -0,0 +1,46 @@
+git-index-helper(1)
+===
+
+NAME
+
+git-index-helper - A simple cache daemon for speeding up index file access
+
+SYNOPSIS
+
+[verse]
+'git index-helper' [options]
+
+DESCRIPTION
+---
+Keep the index file in memory for faster access. This daemon is per
+repository.
+
+OPTIONS
+---
+
+--exit-after=::
+   Exit if the cached index is not accessed for ``
+   minutes. Specify 0 to wait forever. Default is 10.
+
+NOTES
+-
+$GIT_DIR/index-helper.pipe is a named pipe that the daemon reads
+commands from. At least on Linux, shared memory objects are availble
+via /dev/shm with the name pattern "git--".  Normally
+the daemon will clean up shared memory objects when it exits.  But if
+it crashes, some objects could remain there and they can be safely
+deleted with "rm" command. The following commands are used to control
+the daemon:
+
+"refresh"::
+   Reread the index.
+
+"poke":
+   Let the daemon know the index is t

[PATCH v2 00/15] index-helper, watchman

2016-03-18 Thread David Turner
In this version:

I removed the statistics since they're not really a core part of the
series and we can always add them later.

I added a little more testing.

I merged the untracked cache/watchman mashup into the relevant
patches.  Hopefully I didn't screw it up -- I got into the rebase
weeds here and took a while to get out.  I wish I were better at
rebasing.

I moved the index-helper to a named-pipe based IPC mechanism instead
of a signal-based one.  I don't actualy know much about named pipes
other than what I learned while writing this, so someone with clue
should probably take a peek.

The index-helper pid files was kind of a hassle -- there was a
lot of mechanism involved in making sure that they were in fact
pointing to a live index-helper process.  With named pipes, we don't
really need them: if a live index-helper is running, it can simply be
instructed to die, and if it's not, then the pipe can be safely
removed and a new index-helper started.

Because there is no longer a pid file for index-helper, index-helper
has no way to let git know that it should wait for a watchman update
reply when poked.  I worked around this by making the waiting
configurable, and giving a sensible default.  Note that in the
no-watchman case, the wait will be short even if misconfigured,
because the daemon can immediately send an "OK" message.

I updated some commit messages, following Junio and Duy's suggestions.

Johannes Schindelin said that he would work on the Windows side, so
I've dropped the Windows bits (including one patch).  Since I don't
know anything about Windows, it's probably for the best that I'm not
coding for it.

I eliminated the ability to start up an index-helper when there was
already one running because I don't see why you would want to do that,
and because it would lead to multiple readers on the same named pipe,
which has undefined behavior.  Just kill the old one if you're done
with it.

David Turner (7):
  read-cache: invalidate untracked cache data when reading WAMA
  unpack-trees: preserve index extensions
  index-helper: kill mode
  index-helper: don't run if already running
  index-helper: autorun mode
  index-helper: optionally automatically run
  read-cache: config for waiting for index-helper

Nguyễn Thái Ngọc Duy (10):
  read-cache.c: fix constness of verify_hdr()
  read-cache: allow to keep mmap'd memory after reading
  index-helper: new daemon for caching index and related stuff
  index-helper: add --strict
  daemonize(): set a flag before exiting the main process
  index-helper: add --detach
  read-cache: add watchman 'WAMA' extension
  Add watchman support to reduce index refresh cost
  index-helper: use watchman to avoid refreshing index with lstat()
  update-index: enable/disable watchman support

 .gitignore |   1 +
 Documentation/git-index-helper.txt |  64 ++
 Makefile   |  21 ++
 builtin/gc.c   |   2 +-
 builtin/update-index.c |  11 +
 cache.h|  18 +-
 config.c   |  10 +
 config.mak.uname   |   1 +
 configure.ac   |   8 +
 daemon.c   |   2 +-
 dir.c  |  23 +-
 dir.h  |   6 +
 environment.c  |   8 +
 git-compat-util.h  |   1 +
 git.c  |  35 ++-
 index-helper.c | 439 +++
 read-cache.c   | 455 +++--
 setup.c|   4 +-
 shm.c  |  67 ++
 shm.h  |  23 ++
 t/t7063-status-untracked-cache.sh  |  23 ++
 t/t7900-index-helper.sh|  60 +
 t/test-lib-functions.sh|   4 +
 unpack-trees.c |   1 +
 watchman-support.c | 134 +++
 watchman-support.h |   7 +
 26 files changed, 1406 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/git-index-helper.txt
 create mode 100644 index-helper.c
 create mode 100644 shm.c
 create mode 100644 shm.h
 create mode 100755 t/t7900-index-helper.sh
 create mode 100644 watchman-support.c
 create mode 100644 watchman-support.h

-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] merge: refuse to create too cool a merge by default

2016-03-18 Thread David Turner
On Fri, 2016-03-18 at 13:21 -0700, Junio C Hamano wrote:
>  Many tests that are updated by this patch does the
> pass-through manually by turning:

Nit: Should be many tests ... do

Also, I didn't notice a test that shows that "cool" merges without
allow-unrelated-histories are forbidden.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 16/17] index-helper: optionally automatically run

2016-03-18 Thread David Turner
Introduce a new config option, indexhelper.autorun, to automatically
run git index-helper before starting up a builtin git command.  This
enables users to keep index-helper running without manual
intervention.

Signed-off-by: David Turner 
---
 git.c   | 35 ++-
 t/t7900-index-helper.sh | 16 
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/git.c b/git.c
index 6cc0c07..7d27782 100644
--- a/git.c
+++ b/git.c
@@ -521,6 +521,37 @@ static void strip_extension(const char **argv)
 #define strip_extension(cmd)
 #endif
 
+static int want_auto_index_helper(void)
+{
+   int want = -1;
+   const char *value = NULL;
+   const char *key = "indexhelper.autorun";
+
+   if (git_config_key_is_valid(key) &&
+   !git_config_get_value(key, &value)) {
+   int b = git_config_maybe_bool(key, value);
+   want = b >= 0 ? b : 0;
+   }
+   return want;
+}
+
+static void maybe_run_index_helper(struct cmd_struct *cmd)
+{
+   const char *argv[] = {"git-index-helper", "--detach", "--autorun", 
NULL};
+
+   if (!(cmd->option & NEED_WORK_TREE))
+   return;
+
+   if (want_auto_index_helper() <= 0)
+   return;
+
+   trace_argv_printf(argv, "trace: auto index-helper:");
+
+   if (run_command_v_opt(argv,
+ RUN_SILENT_EXEC_FAILURE | RUN_CLEAN_ON_EXIT))
+   warning(_("You specified indexhelper.autorun, but running 
git-index-helper failed."));
+}
+
 static void handle_builtin(int argc, const char **argv)
 {
const char *cmd;
@@ -536,8 +567,10 @@ static void handle_builtin(int argc, const char **argv)
}
 
builtin = get_builtin(cmd);
-   if (builtin)
+   if (builtin) {
+   maybe_run_index_helper(builtin);
exit(run_builtin(builtin, argc, argv));
+   }
 }
 
 static void execv_dashed_external(const char **argv)
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index 6020fe4..7fe66b7 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -41,4 +41,20 @@ test_expect_success 'index-helper is quiet with --autorun' '
git index-helper --autorun
 '
 
+test_expect_success 'index-helper autorun works' '
+   rm -f .git/index-helper.pipe &&
+   git status &&
+   test_path_is_missing .git/index-helper.pipe &&
+   test_config indexhelper.autorun true &&
+   git status &&
+   test -p .git/index-helper.pipe &&
+   git status 2>err &&
+   test -p .git/index-helper.pipe &&
+   ! grep -q . err &&
+   git index-helper --kill &&
+   test_config indexhelper.autorun false &&
+   git status &&
+   test_path_is_missing .git/index-helper.pipe
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 13/17] index-helper: kill mode

2016-03-18 Thread David Turner
Add a new command (and command-line arg) to allow index-helpers to
exit cleanly.

This is mainly useful for tests.

Signed-off-by: David Turner 
---
 index-helper.c  | 39 +--
 t/t7900-index-helper.sh |  9 +
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/index-helper.c b/index-helper.c
index 445a0ac..ffa3c2a 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -286,6 +286,8 @@ static void loop(int fd, int idle_in_seconds)
 * alive, nothing to do.
 */
;
+   } else if (!strcmp(command.buf, "die")) {
+   break;
} else {
warning("BUG: Bogus command %s", 
command.buf);
}
@@ -338,10 +340,35 @@ static const char * const usage_text[] = {
NULL
 };
 
+static void request_kill(const char *pipe_path)
+{
+   int fd;
+
+   fd = open(pipe_path, O_WRONLY | O_NONBLOCK);
+   if (fd < 0) {
+   warning("No existing pipe; can't send kill message to old 
process");
+   goto done;
+   }
+
+   write_in_full(fd, "die", 4);
+   close(fd);
+
+done:
+   /*
+* The child will try to do this anyway, but we want to be
+* ready to launch a new daemon immediately after this command
+* returns.
+*/
+
+   unlink(pipe_path);
+   return;
+}
+
+
 int main(int argc, char **argv)
 {
const char *prefix;
-   int idle_in_seconds = 600, detach = 0;
+   int idle_in_seconds = 600, detach = 0, kill = 0;
int fd;
struct strbuf pipe_path = STRBUF_INIT;
struct option options[] = {
@@ -350,6 +377,7 @@ int main(int argc, char **argv)
OPT_BOOL(0, "strict", &to_verify,
 "verify shared memory after creating"),
OPT_BOOL(0, "detach", &detach, "detach the process"),
+   OPT_BOOL(0, "kill", &kill, "request that existing index helper 
processes exit"),
OPT_END()
};
 
@@ -364,10 +392,17 @@ int main(int argc, char **argv)
  options, usage_text, 0))
die(_("too many arguments"));
 
+   strbuf_git_path(&pipe_path, "index-helper.pipe");
+   if (kill) {
+   if (detach)
+   die(_("--kill doesn't want any other options"));
+   request_kill(pipe_path.buf);
+   return 0;
+   }
+
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
 
-   strbuf_git_path(&pipe_path, "index-helper.pipe");
fd = setup_pipe(pipe_path.buf);
if (fd < 0)
die_errno("Could not set up index-helper.pipe");
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index 7126037..14b5a6c 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -15,4 +15,13 @@ test_expect_success 'index-helper smoke test' '
test_path_is_missing .git/index-helper.pipe
 '
 
+test_expect_success 'index-helper creates usable pipe file and can be killed' '
+   test_when_finished "git index-helper --kill" &&
+   test_path_is_missing .git/index-helper.pipe &&
+   git index-helper --detach &&
+   test -p .git/index-helper.pipe &&
+   git index-helper --kill &&
+   test_path_is_missing .git/index-helper.pipe
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 10/17] index-helper: use watchman to avoid refreshing index with lstat()

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Watchman is hidden behind index-helper. Before git tries to read the
index from shm, it notifies index-helper through the named pipe and
waits for index-helper to prepare shm. index-helper then contacts
watchman, updates 'WAMA' extension and put it in a separate shm and
wakes git up with a write to git's named pipe.

Git uses this extension to not lstat unchanged entries. Git only
trusts the 'WAMA' extension when it's received from the separate shm,
not from disk. Unmarked entries are "clean". Marked entries are dirty
from watchman point of view. If it finds out some entries are
'watchman-dirty', but are really unchanged (e.g. the file was changed,
then reverted back), then Git will clear the marking in 'WAMA' before
writing it down.

Hiding watchman behind index-helper means you need both daemons. You
can't run watchman alone. Not so good. But on the other hand, 'git'
binary is not linked to watchman/json libraries, which is good for
packaging. Core git package will run fine without watchman-related
packages. If they need watchman, they can install git-index-helper and
dependencies.

This also lets us trust anything in the untracked cache that we haven't
marked invalid, saving those stat() calls.

Another reason for tying watchman to index-helper is, when used with
untracked cache, we need to keep track of $GIT_WORK_TREE file
listing. That kind of list can be kept in index-helper.

Signed-off-by: Nguyễn Thái Ngọc Duy  gmail.com>
Signed-off-by: David Turner 
---
 Documentation/git-index-helper.txt |   6 ++
 cache.h|   2 +
 dir.c  |  12 
 index-helper.c | 128 ++---
 read-cache.c   | 141 +++--
 5 files changed, 275 insertions(+), 14 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index b858a8d..7a8042e 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -51,6 +51,12 @@ the daemon:
Let the daemon know the index is to be read. It keeps the
daemon alive longer, unless `--exit-after=0` is used.
 
+"poke ":
+   Like "poke", but replies with "OK" by opening the named pipe
+   at  and writing the string.  If watchman is configured,
+   index-helper queries watchman, then prepares a shared memory
+   object with the watchman index extension before replying.
+
 All commands and replies are terminated by a 0 byte.
 
 GIT
diff --git a/cache.h b/cache.h
index 95715fd..9fa339a 100644
--- a/cache.h
+++ b/cache.h
@@ -558,6 +558,7 @@ extern int daemonize(int *);
 
 /* Initialize and use the cache information */
 struct lock_file;
+extern int verify_index(const struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec 
*pathspec);
 extern int do_read_index(struct index_state *istate, const char *path,
@@ -565,6 +566,7 @@ extern int do_read_index(struct index_state *istate, const 
char *path,
 extern int read_index_from(struct index_state *, const char *path);
 extern int is_index_unborn(struct index_state *);
 extern int read_index_unmerged(struct index_state *);
+extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate);
 #define COMMIT_LOCK(1 << 0)
 #define CLOSE_LOCK (1 << 1)
 #define REFRESH_DAEMON (1 << 2)
diff --git a/dir.c b/dir.c
index 9b659e6..5058b29 100644
--- a/dir.c
+++ b/dir.c
@@ -1726,6 +1726,17 @@ static int valid_cached_dir(struct dir_struct *dir,
if (!untracked)
return 0;
 
+   if (dir->untracked->use_watchman) {
+   /*
+* With watchman, we can trust the untracked cache's
+* valid field.
+*/
+   if (untracked->valid)
+   goto skip_stat;
+   else
+   invalidate_directory(dir->untracked, untracked);
+   }
+
if (stat(path->len ? path->buf : ".", &st)) {
invalidate_directory(dir->untracked, untracked);
memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
@@ -1739,6 +1750,7 @@ static int valid_cached_dir(struct dir_struct *dir,
return 0;
}
 
+skip_stat:
if (untracked->check_only != !!check_only) {
invalidate_directory(dir->untracked, untracked);
return 0;
diff --git a/index-helper.c b/index-helper.c
index a854ed8..445a0ac 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -6,15 +6,18 @@
 #include "split-index.h"
 #include "shm.h"
 #include "lockfile.h"
+#include "watchman-support.h&q

[PATCH v2 11/17] update-index: enable/disable watchman support

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/update-index.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 1c94ca5..7c08e1c 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -914,6 +914,7 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
 {
int newfd, entries, has_errors = 0, nul_term_line = 0;
enum uc_mode untracked_cache = UC_UNSPECIFIED;
+   int use_watchman = -1;
int read_from_stdin = 0;
int prefix_length = prefix ? strlen(prefix) : 0;
int preferred_index_format = 0;
@@ -1012,6 +1013,8 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
N_("test if the filesystem supports untracked 
cache"), UC_TEST),
OPT_SET_INT(0, "force-untracked-cache", &untracked_cache,
N_("enable untracked cache without testing the 
filesystem"), UC_FORCE),
+   OPT_BOOL(0, "watchman", &use_watchman,
+   N_("use or not use watchman to reduce refresh cost")),
OPT_END()
};
 
@@ -1149,6 +1152,14 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
die("Bug: bad untracked_cache value: %d", untracked_cache);
}
 
+   if (use_watchman > 0) {
+   the_index.last_update= xstrdup("");
+   the_index.cache_changed |= WATCHMAN_CHANGED;
+   } else if (!use_watchman) {
+   the_index.last_update= NULL;
+   the_index.cache_changed |= WATCHMAN_CHANGED;
+   }
+
if (active_cache_changed) {
if (newfd < 0) {
if (refresh_args.flags & REFRESH_QUIET)
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 07/17] read-cache: add watchman 'WAMA' extension

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

The extension contains a bitmap, one bit for each entry in the
index. If the n-th bit is zero, the n-th entry is considered
unchanged, we can ce_mark_uptodate() it without refreshing. If the bit
is non-zero and we found out the corresponding file is clean after
refresh, we can clear the bit.

In addition, there's a list of directories in the untracked-cache
to invalidate (because they have new or modified entries).

The 'skipping refresh' bit is not in this patch yet as we would need
watchman. More details in later patches.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 cache.h  |   4 ++
 dir.h|   3 ++
 read-cache.c | 117 ++-
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index d386722..5b10d52 100644
--- a/cache.h
+++ b/cache.h
@@ -182,6 +182,8 @@ struct cache_entry {
 #define CE_VALID (0x8000)
 #define CE_STAGESHIFT 12
 
+#define CE_WATCHMAN_DIRTY  (0x0001)
+
 /*
  * Range 0x0FFF in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -320,6 +322,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED (1 << 5)
 #define SPLIT_INDEX_ORDERED(1 << 6)
 #define UNTRACKED_CHANGED  (1 << 7)
+#define WATCHMAN_CHANGED   (1 << 8)
 
 struct split_index;
 struct untracked_cache;
@@ -344,6 +347,7 @@ struct index_state {
struct untracked_cache *untracked;
void *mmap;
size_t mmap_size;
+   char *last_update;
 };
 
 extern struct index_state the_index;
diff --git a/dir.h b/dir.h
index 3ec3fb0..3d540de 100644
--- a/dir.h
+++ b/dir.h
@@ -142,6 +142,9 @@ struct untracked_cache {
int gitignore_invalidated;
int dir_invalidated;
int dir_opened;
+   /* watchman invalidation data */
+   unsigned int use_watchman : 1;
+   struct string_list invalid_untracked;
 };
 
 struct dir_struct {
diff --git a/read-cache.c b/read-cache.c
index fe73828..6d5e871 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -19,6 +19,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "shm.h"
+#include "ewah/ewok.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
   unsigned int options);
@@ -41,11 +42,13 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce,
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b/* "link" */
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
+#define CACHE_EXT_WATCHMAN 0x57414D41/* "WAMA" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
+SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | \
+WATCHMAN_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -1220,8 +1223,13 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
continue;
 
new = refresh_cache_ent(istate, ce, options, &cache_errno, 
&changed);
-   if (new == ce)
+   if (new == ce) {
+   if (ce->ce_flags & CE_WATCHMAN_DIRTY) {
+   ce->ce_flags  &= ~CE_WATCHMAN_DIRTY;
+   istate->cache_changed |= WATCHMAN_CHANGED;
+   }
continue;
+   }
if (!new) {
const char *fmt;
 
@@ -1365,6 +1373,94 @@ static int verify_hdr(const struct cache_header *hdr, 
unsigned long size)
return 0;
 }
 
+static void mark_no_watchman(size_t pos, void *data)
+{
+   struct index_state *istate = data;
+   assert(pos < istate->cache_nr);
+   istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY;
+}
+
+static int read_watchman_ext(struct index_state *istate, const void *data,
+unsigned long sz)
+{
+   struct ewah_bitmap *bitmap;
+   int ret, len;
+   uint32_t bitmap_size;
+   uint32_t untracked_nr;
+
+   if (memchr(data, 0, sz) == NULL)
+   return error("invalid extension");
+
+   len = strlen(data) + 1;
+   memcpy(&bitmap_size, (const char *)data + len, 4);
+   memcpy(&untracked_nr, (const char *)data + len + 4, 4);
+   untracked_nr = ntohl(untracked_nr);
+   bitmap_size = ntohl(bitmap_size);
+
+   bitmap = ewah_new();
+   ret = ewah_read_mmap(bitmap, (const char *)data + len + 8, bitmap_size);
+   if (ret != bitmap_size) {
+   ewah_free(bitmap);
+   return error("failed to parse ewah bitmap reading watchman 
index extension");
+   }
+   istate->last_update = xstrdup(data);
+   ewah_each_bit(bitmap, mar

[PATCH v2 02/17] read-cache: allow to keep mmap'd memory after reading

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Later, we will introduce git index-helper to share this memory with
other git processes.

Since the memory will be shared, it will never be unmapped (although
the kernel may of course choose to page it out).

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 cache.h  |  3 +++
 read-cache.c | 13 -
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index b829410..4180e2b 100644
--- a/cache.h
+++ b/cache.h
@@ -333,11 +333,14 @@ struct index_state {
struct split_index *split_index;
struct cache_time timestamp;
unsigned name_hash_initialized : 1,
+keep_mmap : 1,
 initialized : 1;
struct hashmap name_hash;
struct hashmap dir_hash;
unsigned char sha1[20];
struct untracked_cache *untracked;
+   void *mmap;
+   size_t mmap_size;
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index 16cc487..7e387e9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1574,6 +1574,10 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
mmap = xmmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (mmap == MAP_FAILED)
die_errno("unable to map index file");
+   if (istate->keep_mmap) {
+   istate->mmap = mmap;
+   istate->mmap_size = mmap_size;
+   }
close(fd);
 
hdr = mmap;
@@ -1626,10 +1630,12 @@ int do_read_index(struct index_state *istate, const 
char *path, int must_exist)
src_offset += 8;
src_offset += extsize;
}
-   munmap(mmap, mmap_size);
+   if (!istate->keep_mmap)
+   munmap(mmap, mmap_size);
return istate->cache_nr;
 
 unmap:
+   istate->mmap = NULL;
munmap(mmap, mmap_size);
die("index file corrupt");
 }
@@ -1655,6 +1661,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
discard_index(split_index->base);
else
split_index->base = xcalloc(1, sizeof(*split_index->base));
+   split_index->base->keep_mmap = istate->keep_mmap;
ret = do_read_index(split_index->base,
git_path("sharedindex.%s",
 sha1_to_hex(split_index->base_sha1)), 1);
@@ -1698,6 +1705,10 @@ int discard_index(struct index_state *istate)
free(istate->cache);
istate->cache = NULL;
istate->cache_alloc = 0;
+   if (istate->keep_mmap && istate->mmap) {
+   munmap(istate->mmap, istate->mmap_size);
+   istate->mmap = NULL;
+   }
discard_split_index(istate);
free_untracked_cache(istate->untracked);
istate->untracked = NULL;
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 04/17] index-helper: add --strict

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

There are "holes" in the index-helper approach because the shared
memory is not verified again by git. If $USER is compromised, shared
memory could be modified. But anyone who could do this could already
modify $GIT_DIR/index. A more realistic risk is some bugs in
index-helper that produce corrupt shared memory. --strict is added to
avoid that.

Strictly speaking there's still a very small gap where corrupt shared
memory could still be read by git: after we write the trailing SHA-1 in
the shared memory (thus signaling "this shm is ready") and before
verify_shm() detects an error.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git-index-helper.txt |  9 +++
 cache.h|  1 +
 index-helper.c | 48 ++
 read-cache.c   |  9 ---
 4 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index 59a5abc..7afc5c9 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -22,6 +22,15 @@ OPTIONS
Exit if the cached index is not accessed for ``
minutes. Specify 0 to wait forever. Default is 10.
 
+--strict::
+--no-strict::
+   Strict mode makes index-helper verify the shared memory after
+   it's created. If the result does not match what's read from
+   $GIT_DIR/index, the shared memory is destroyed. This makes
+   index-helper take more than double the amount of time required
+   for reading an index, but because it will happen in the
+   background, it's not noticable. `--strict` is enabled by default.
+
 NOTES
 -
 $GIT_DIR/index-helper.pipe is a named pipe that the daemon reads
diff --git a/cache.h b/cache.h
index c3c6d85..5805962 100644
--- a/cache.h
+++ b/cache.h
@@ -336,6 +336,7 @@ struct index_state {
 keep_mmap : 1,
 from_shm : 1,
 to_shm : 1,
+always_verify_trailing_sha1 : 1,
 initialized : 1;
struct hashmap name_hash;
struct hashmap dir_hash;
diff --git a/index-helper.c b/index-helper.c
index 962c973..8d221e0 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -15,6 +15,7 @@ struct shm {
 
 static struct shm shm_index;
 static struct shm shm_base_index;
+static int to_verify = 1;
 
 static void release_index_shm(struct shm *is)
 {
@@ -73,11 +74,56 @@ static void share_index(struct index_state *istate, struct 
shm *is)
hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, is->sha1);
 }
 
+static int verify_shm(void)
+{
+   int i;
+   struct index_state istate;
+   memset(&istate, 0, sizeof(istate));
+   istate.always_verify_trailing_sha1 = 1;
+   istate.to_shm = 1;
+   i = read_index(&istate);
+   if (i != the_index.cache_nr)
+   goto done;
+   for (; i < the_index.cache_nr; i++) {
+   struct cache_entry *base, *ce;
+   /* namelen is checked separately */
+   const unsigned int ondisk_flags =
+   CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS;
+   unsigned int ce_flags, base_flags, ret;
+   base = the_index.cache[i];
+   ce = istate.cache[i];
+   if (ce->ce_namelen != base->ce_namelen ||
+   strcmp(ce->name, base->name)) {
+   warning("mismatch at entry %d", i);
+   break;
+   }
+   ce_flags = ce->ce_flags;
+   base_flags = base->ce_flags;
+   /* only on-disk flags matter */
+   ce->ce_flags   &= ondisk_flags;
+   base->ce_flags &= ondisk_flags;
+   ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data,
+offsetof(struct cache_entry, name) -
+offsetof(struct cache_entry, ce_stat_data));
+   ce->ce_flags = ce_flags;
+   base->ce_flags = base_flags;
+   if (ret) {
+   warning("mismatch at entry %d", i);
+   break;
+   }
+   }
+done:
+   discard_index(&istate);
+   return i == the_index.cache_nr;
+}
+
 static void share_the_index(void)
 {
if (the_index.split_index && the_index.split_index->base)
share_index(the_index.split_index->base, &shm_base_index);
share_index(&the_index, &shm_index);
+   if (to_verify && !verify_shm())
+   cleanup_shm();
discard_index(&the_index);
 }
 
@@ -189,6 +235,8 @@ int main(int argc, char **argv)
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_seconds,
N_("exit if not used after some seconds")),
+   OPT_BOOL(0, "strict", &to_verify,
+"verify shared memory after creating"),
O

[PATCH v2 08/17] read-cache: invalidate untracked cache data when reading WAMA

2016-03-18 Thread David Turner
When reading the watchman extension, invalidate the listed
untracked-cache entries.

We don't clear these entries yet; we can only do that once we know
that they came from a live watchman (rather than from disk).

Signed-off-by: David Turner 
---
 dir.c| 11 +---
 dir.h|  3 ++
 read-cache.c | 89 
 3 files changed, 94 insertions(+), 9 deletions(-)

diff --git a/dir.c b/dir.c
index 69e0be6..9b659e6 100644
--- a/dir.c
+++ b/dir.c
@@ -597,9 +597,9 @@ static void trim_trailing_spaces(char *buf)
  *
  * If "name" has the trailing slash, it'll be excluded in the search.
  */
-static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
-   struct untracked_cache_dir 
*dir,
-   const char *name, int len)
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+struct untracked_cache_dir *dir,
+const char *name, int len)
 {
int first, last;
struct untracked_cache_dir *d;
@@ -2625,8 +2625,10 @@ static void free_untracked(struct untracked_cache_dir 
*ucd)
 
 void free_untracked_cache(struct untracked_cache *uc)
 {
-   if (uc)
+   if (uc) {
free_untracked(uc->root);
+   string_list_clear(&uc->invalid_untracked, 0);
+   }
free(uc);
 }
 
@@ -2775,6 +2777,7 @@ struct untracked_cache *read_untracked_extension(const 
void *data, unsigned long
return NULL;
 
uc = xcalloc(1, sizeof(*uc));
+   string_list_init(&uc->invalid_untracked, 1);
strbuf_init(&uc->ident, ident_len);
strbuf_add(&uc->ident, ident, ident_len);
load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
diff --git a/dir.h b/dir.h
index 3d540de..8fd3f9e 100644
--- a/dir.h
+++ b/dir.h
@@ -315,4 +315,7 @@ struct untracked_cache *read_untracked_extension(const void 
*data, unsigned long
 void write_untracked_extension(struct strbuf *out, struct untracked_cache 
*untracked);
 void add_untracked_cache(struct index_state *istate);
 void remove_untracked_cache(struct index_state *istate);
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+struct untracked_cache_dir *dir,
+const char *name, int len);
 #endif
diff --git a/read-cache.c b/read-cache.c
index 6d5e871..b4bd15c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1373,11 +1373,76 @@ static int verify_hdr(const struct cache_header *hdr, 
unsigned long size)
return 0;
 }
 
+static struct untracked_cache_dir *find_untracked_cache_dir(
+   struct untracked_cache *uc, struct untracked_cache_dir *ucd,
+   const char *name)
+{
+   int component_len;
+   const char *end;
+   struct untracked_cache_dir *dir;
+
+   if (!*name)
+   return ucd;
+
+   end = strchr(name, '/');
+   if (end)
+   component_len = end - name;
+   else
+   component_len = strlen(name);
+
+   dir = lookup_untracked(uc, ucd, name, component_len);
+   if (dir)
+   return find_untracked_cache_dir(uc, dir, name + component_len + 
1);
+
+   return NULL;
+}
+
 static void mark_no_watchman(size_t pos, void *data)
 {
struct index_state *istate = data;
+   struct cache_entry *ce = istate->cache[pos];
+   struct strbuf sb = STRBUF_INIT;
+   char *c;
+   struct untracked_cache_dir *dir;
+
assert(pos < istate->cache_nr);
-   istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY;
+   ce->ce_flags |= CE_WATCHMAN_DIRTY;
+
+   if (!istate->untracked || !istate->untracked->root)
+   return;
+
+   strbuf_add(&sb, ce->name, ce_namelen(ce));
+
+   for (c = sb.buf + sb.len - 1; c > sb.buf; c--) {
+   if (*c == '/') {
+   strbuf_setlen(&sb, c - sb.buf);
+   break;
+   }
+   }
+
+   if (c == sb.buf) {
+   strbuf_setlen(&sb, 0);
+   }
+
+   dir = find_untracked_cache_dir(istate->untracked,
+  istate->untracked->root, sb.buf);
+   if (dir)
+   dir->valid = 0;
+
+   strbuf_release(&sb);
+}
+
+static int mark_untracked_invalid(struct string_list_item *item, void *uc)
+{
+   struct untracked_cache *untracked = uc;
+   struct untracked_cache_dir *dir;
+
+   dir = find_untracked_cache_dir(untracked, untracked->root,
+  item->string);
+   if (dir)
+   dir->valid = 0;
+
+   return 0;
 }
 
 static int read_watchman_ext(struct index

[PATCH v2 09/17] Add watchman support to reduce index refresh cost

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

The previous patch has the logic to clear bits in 'WAMA' bitmap. This
patch has logic to set bits as told by watchman. The missing bit,
_using_ these bits, are not here yet.

A lot of this code is written by David Turner originally, mostly from
[1]. I'm just copying and polishing it a bit.

[1] http://article.gmane.org/gmane.comp.version-control.git/248006

Signed-off-by: David Turner 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Makefile   |  12 +
 cache.h|   1 +
 config.c   |   5 ++
 configure.ac   |   8 
 environment.c  |   3 ++
 watchman-support.c | 134 +
 watchman-support.h |   7 +++
 7 files changed, 170 insertions(+)
 create mode 100644 watchman-support.c
 create mode 100644 watchman-support.h

diff --git a/Makefile b/Makefile
index 2d72771..8bf705b 100644
--- a/Makefile
+++ b/Makefile
@@ -453,6 +453,7 @@ MSGFMT = msgfmt
 CURL_CONFIG = curl-config
 PTHREAD_LIBS = -lpthread
 PTHREAD_CFLAGS =
+WATCHMAN_LIBS =
 GCOV = gcov
 
 export TCL_PATH TCLTK_PATH
@@ -1419,6 +1420,13 @@ else
LIB_OBJS += thread-utils.o
 endif
 
+ifdef USE_WATCHMAN
+   LIB_H += watchman-support.h
+   LIB_OBJS += watchman-support.o
+   WATCHMAN_LIBS = -lwatchman
+   BASIC_CFLAGS += -DUSE_WATCHMAN
+endif
+
 ifdef HAVE_PATHS_H
BASIC_CFLAGS += -DHAVE_PATHS_H
 endif
@@ -2030,6 +2038,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS 
$(GITLIBS) $(VCSSVN_LIB)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) 
$(LIBS) \
$(VCSSVN_LIB)
 
+git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS)
+   $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) 
$(LIBS) $(WATCHMAN_LIBS)
+
 $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY)
$(QUIET_LNCP)$(RM) $@ && \
ln $< $@ 2>/dev/null || \
@@ -2168,6 +2179,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst 
','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+
+   @echo USE_WATCHMAN=\''$(subst ','\'',$(subst 
','\'',$(USE_WATCHMAN)))'\' >>$@+
 ifdef TEST_OUTPUT_DIRECTORY
@echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst 
','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+
 endif
diff --git a/cache.h b/cache.h
index 5b10d52..95715fd 100644
--- a/cache.h
+++ b/cache.h
@@ -688,6 +688,7 @@ extern char *git_replace_ref_base;
 
 extern int fsync_object_files;
 extern int core_preload_index;
+extern int core_watchman_sync_timeout;
 extern int core_apply_sparse_checkout;
 extern int precomposed_unicode;
 extern int protect_hfs;
diff --git a/config.c b/config.c
index 9ba40bc..e6dc141 100644
--- a/config.c
+++ b/config.c
@@ -882,6 +882,11 @@ static int git_default_core_config(const char *var, const 
char *value)
return 0;
}
 
+   if (!strcmp(var, "core.watchmansynctimeout")) {
+   core_watchman_sync_timeout = git_config_int(var, value);
+   return 0;
+   }
+
if (!strcmp(var, "core.createobject")) {
if (!strcmp(value, "rename"))
object_creation_mode = OBJECT_CREATION_USES_RENAMES;
diff --git a/configure.ac b/configure.ac
index 0cd9f46..334d63b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1099,6 +1099,14 @@ AC_COMPILE_IFELSE([BSD_SYSCTL_SRC],
HAVE_BSD_SYSCTL=])
 GIT_CONF_SUBST([HAVE_BSD_SYSCTL])
 
+#
+# Check for watchman client library
+
+AC_CHECK_LIB([watchman], [watchman_connect],
+   [USE_WATCHMAN=YesPlease],
+   [USE_WATCHMAN=])
+GIT_CONF_SUBST([USE_WATCHMAN])
+
 ## Other checks.
 # Define USE_PIC if you need the main git objects to be built with -fPIC
 # in order to build and link perl/Git.so.  x86-64 seems to need this.
diff --git a/environment.c b/environment.c
index 6dec9d0..35e03c7 100644
--- a/environment.c
+++ b/environment.c
@@ -94,6 +94,9 @@ int core_preload_index = 1;
  */
 int ignore_untracked_cache_config;
 
+int core_watchman_sync_timeout = 300;
+
+
 /* This is set by setup_git_dir_gently() and/or git_default_config() */
 char *git_work_tree_cfg;
 static char *work_tree;
diff --git a/watchman-support.c b/watchman-support.c
new file mode 100644
index 000..b7302b9
--- /dev/null
+++ b/watchman-support.c
@@ -0,0 +1,134 @@
+#include "cache.h"
+#include "watchman-support.h"
+#include "strbuf.h"
+#include "dir.h"
+#include 
+
+static str

[PATCH v2 01/17] read-cache.c: fix constness of verify_hdr()

2016-03-18 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 read-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index d9fb78b..16cc487 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1345,7 +1345,7 @@ struct ondisk_cache_entry_extended {
ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
ondisk_cache_entry_size(ce_namelen(ce)))
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 {
git_SHA_CTX c;
unsigned char sha1[20];
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/19] index-helper: new daemon for caching index and related stuff

2016-03-10 Thread David Turner
On Thu, 2016-03-10 at 18:17 +0700, Duy Nguyen wrote:
> On Thu, Mar 10, 2016 at 6:21 AM, Junio C Hamano 
> wrote:
> > Junio C Hamano  writes:
> > 
> > > David Turner  writes:
> > > 
> > > > From: Nguyễn Thái Ngọc Duy 
> > > > 
> > > > Instead of reading the index from disk and worrying about disk
> > > > corruption, the index is cached in memory (memory bit-flips
> > > > happen
> > > > too, but hopefully less often). The result is faster read. Read
> > > > time
> > > > is reduced by 70%.
> > > > 
> > > > The biggest gain is not having to verify the trailing SHA-1,
> > > > which
> > > > takes lots of time especially on large index files.
> > 
> > Come to think of it, wouldn't it be far less intrusive change to
> > just put the index on a ramdisk and skip the trailing SHA-1
> > verification, to obtain a similar result with the same trade off
> > (i.e. blindly trusting memory instead of being paranoid)?
> 
> I'm straying off-topic again because this reminded me about lmdb :)
> For the record when I looked at lmdb I thought of using it as an
> index
> format too. It has everything we wanted in index-v5. Good enough that
> we would not need index-helper and split-index to work around current
> index format. Though I finally decided it didn't fit well.
> 
> On the good side, lmdb is b+ trees, we can update directly on disk
> (without rewriting whole file), read directly from mmap'd file and
> not
> worry about corrupting it. There's no SHA-1 verification either (and
> we can do crc32 per entry if we want too). It's good.
> 
> But, it does not fit well the our locking model (but we can change
> this). And we sometimes want to create temporary throw-away indexes
> (e.g. partial commits) which I don't think is easy to do. And the
> reading directly from mmap does not give us much because we have to
> do
> byte endian conversion  anyway.
> 
> Hmm.. on second thought, even though lmdb can't be the default format
> (for a bunch of other limitations), it can still be an option for
> super big worktrees, just like index-helper being an optional
> optimization. Hm.. hm.. if we can support lmdb as index format in
> addition to the current format, bringing back some work from Thomas..
> We may still need a daemon or something to deal with watchman, but
> the
> underlying mechanism is exactly the same... David, Junio, what do you
> think?

LMDB makes writes faster, since we only have to write the stuff that's
changed.  That's good.

Reads (ignoring SHA verification) will be slightly slower (due to the
btree overhead).  If, in general, we only had to read part of the
index, that would be faster. But a fair amount of git code is written
to assume that it is reasonable to iterate over the whole index.  So I
don't see a win from just switching to LMDB without other changes.

(I guess we could parallelize index deserialization more easily with
lmdb, but I don't know 

The original intent of the index was to be a "staging area" to build up
a tree before committing it.  This implies an alternate view of the
index as a diff against some (possibly-empty) tree.  An index diff or
status operation, then, just requires iterating over the changes. 

I haven't really dug into merges to understand whether it would be
reasonable for them to use a representation like this, and what that
would do to speed there.

In general, git takes the commit side of the commit-diff duality. This
is generally a good thing, because commits are simpler to reason about.
But I wonder if we could do better for this specific case.

But I don't think switching to LMDB would replace index-helper.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/19] index-helper: new daemon for caching index and related stuff

2016-03-09 Thread David Turner
On Wed, 2016-03-09 at 15:09 -0800, Junio C Hamano wrote:
> David Turner  writes:
> 
> > From: Nguyễn Thái Ngọc Duy 
> > 
> > Instead of reading the index from disk and worrying about disk
> > corruption, the index is cached in memory (memory bit-flips happen
> > too, but hopefully less often). The result is faster read. Read
> > time
> > is reduced by 70%.
> > 
> > The biggest gain is not having to verify the trailing SHA-1, which
> > takes lots of time especially on large index files. But this also
> > opens doors for further optimiztions:
> > 
> >  - we could create an in-memory format that's essentially the
> > memory
> >dump of the index to eliminate most of parsing/allocation
> >overhead. The mmap'd memory can be used straight away.
> > Experiment
> >[1] shows we could reduce read time by 88%.
> > 
> >  - we could cache non-index info such as name hash
> > 
> > The shared memory's name folows the template "git--"
> > where  is the trailing SHA-1 of the index file.  is
> > "index" for cached index files (and may be "name-hash" for name
> > -hash
> > cache). If such shared memory exists, it contains the same index
> > content as on disk. The content is already validated by the daemon
> > and
> > git won't validate it again (except comparing the trailing SHA-1s).
> 
> This indeed is an interesting approach; what is not explained but
> must be is when the on-disk index is updated to reflect the reality
> (if I am reading the explanation and the code right, while the
> daemon is running, its in-core cache becomes the source of truth by
> forcing everybody's read-index-from() to go to the daemon).  The
> explanation could be "this is only for read side, and updating the
> index happens via the traditional 'write a new file and rename it to
> the final place' codepath, at which time the daemon must be told to
> re-read it."

This seems like the explanation (from the current commit message):

"Git can poke the daemon to tell it to refresh the index cache, or to
keep it alive some more minutes via UNIX signals. It can't give any
real index data directly to the daemon. Real data goes to disk first,
then the daemon reads and verifies it from there. Poking only happens
for $GIT_DIR/index, not temporary index files."

I guess this could be rewritten as:

"Index validity is ensured by the following method: When a read is
requested from the index-helper, it checks the SHA1 of its cached index
copy against the on-disk version.  If they differ, index-helper rereads
the index.  In addition, any git process may explicitly suggest a
reread via a UNIX signal, but this is only an optimization and it is
not necessary for correctness.

In addition, Git can signal the daemon with a heartbeat signal, to keep
the daemon alive longer."

How does that sound?


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/19] read-cache: allow to keep mmap'd memory after reading

2016-03-09 Thread David Turner
On Wed, 2016-03-09 at 15:02 -0800, Junio C Hamano wrote:
> David Turner  writes:
> 
> > From: Nguyễn Thái Ngọc Duy 
> > 
> > Signed-off-by: Nguyễn Thái Ngọc Duy 
> > ---
> 
> As usual in Duy's patches, this one seriously lacks "why".  And also
> makes the reader wonder if the memory region is ever unmapped() and
> if so under what condition.

How about this:

Later, we will introduce git index-helper to share this memory with
other git processes.

Since the memory will be shared, it will never be unmapped (although
the kernel may of course choose to page it out).


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/19] trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics

2016-03-09 Thread David Turner
On Wed, 2016-03-09 at 14:58 -0800, Junio C Hamano wrote:
> David Turner  writes:
...
> > trace_stats() is intended for GIT_TRACE_*_STATS variable group and
> > GIT_TRACE_PACK_STATS is more like an example how new vars can be
> > added.
...

> > +   pack_access_nr++;
> >  }
> 
> This looks rather half-hearted, in that those who are interested in
> this new number can run "wc -l" on the pack-access trace log without
> adding a new "stats" anyway.  It may make the "stats" far more
> useful to keep track of the summary information of what would be
> written to the pack access log and add to the report_pack_stats()
> output things like the average and median distance of seeks
> (i.e. differences in the "obj_offset" into the same pack by two
> adjacent pack accesse) and the variance, for example?

On reflection, I do not think I need this patch; it was in Duy's series
and the index stats patch would need to be rewritten slightly to get
rid of it, but I'm OK with that.  I wanted to make minimal changes to
Duy's patches, but I'd rather make larger changes than do work on
patches I don't need.

So unless Duy objects, I'm going to drop this from the series.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/19] index-helper: new daemon for caching index and related stuff

2016-03-09 Thread David Turner
On Wed, 2016-03-09 at 15:21 -0800, Junio C Hamano wrote:
> Junio C Hamano  writes:
> 
> > David Turner  writes:
> > 
> > > From: Nguyễn Thái Ngọc Duy 
> > > 
> > > Instead of reading the index from disk and worrying about disk
> > > corruption, the index is cached in memory (memory bit-flips
> > > happen
> > > too, but hopefully less often). The result is faster read. Read
> > > time
> > > is reduced by 70%.
> > > 
> > > The biggest gain is not having to verify the trailing SHA-1,
> > > which
> > > takes lots of time especially on large index files.
> 
> Come to think of it, wouldn't it be far less intrusive change to
> just put the index on a ramdisk and skip the trailing SHA-1
> verification, to obtain a similar result with the same trade off
> (i.e. blindly trusting memory instead of being paranoid)?
> 

1. If the index were stored on a ramdisk, we would *also* have to write
it to durable storage to avoid losing the user's work when they power
off.  That's more complicated.  

2. Duy notes that it is easier to add further optimizations to this --
for instance, we could share a pre-parsed version of the index.  It
would be harder to add these optimizations to the disk format, because
(a) they would take up more space, and (b) they would probably be
harder to write in a single pass, which is presently how index writing
works.

3. If we wanted to just skip SHA-1 verification, we would not need a
ramdisk; it's almost certain that the index would be in the disk cache
most of the time, so regular storage should be very nearly as fast as a
ramdisk. I think mlock might help ensure that the data remains in the
cache, although I'm not sure what the permissions story is on that.


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/19] index-helper, watchman

2016-03-09 Thread David Turner
This is a rebase and extension of Duy's work on git index-helper and
watchman support.

Patches 1-14 and are more-or-less just Duy's patches, rebased. I fixed
up a race condition with signal handling, but otherwise didn't change
much.

Patch 15 preserves the untracked cache and watchman extensions across
checkouts.

Patches 16-18 are new index-helper patches that I wrote to improve the
general usability of index-helper.

And patch 19 is for discussion only -- if the overall concept is
correct, I'll rewrite it into the places where it should go.  I didn't
do it yet in part because I am not sure folks will agree with it and
in part because I didn't want to mess too much with Duy's code before
re-sending it.

I haven't used this code much.  I just wrote patches 16-18 today, in
fact. It's got fewer tests than I would like, in part because it is
somewhat difficult to test since it involves two separate persistent
daemons (index-helper, watchman), and the interaction between them.

At Twitter, we're still using something based on my earlier watchman
patches[1].  We would like to switch to this (especially if this is
the version that mainstream git is going towards).  But we have other
local patches[2], and I haven't fully finished migrating them to 2.8.
My informal testing shows that with the untracked cache and watchman
index extensions, and git index-helper[3], performance is slightly
better than my earlier code.  That's not a surprise, since the
index-helper eliminates index verification time.

[1] https://github.com/dturner-tw/git/tree/dturner/watchman

[2] e.g. something like this:
https://github.com/dturner-tw/git/tree/dturner/journal

[3] In both cases, I'm using a version which has the SSE ref-parsing
and hashmap code; these were rejected due to complexity but they provide
a benefit for us, so we still use them.

David Turner (5):
  unpack-trees: preserve index extensions
  index-helper: rewrite pidfile after daemonizing
  index-helper: process management
  index-helper: autorun
  hack: watchman/untracked cache mashup

Nguyễn Thái Ngọc Duy (14):
  trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics
  read-cache.c: fix constness of verify_hdr()
  read-cache: allow to keep mmap'd memory after reading
  index-helper: new daemon for caching index and related stuff
  trace.c: add GIT_TRACE_INDEX_STATS for index statistics
  index-helper: add --strict
  daemonize(): set a flag before exiting the main process
  index-helper: add --detach
  index-helper: add Windows support
  read-cache: add watchman 'WAMA' extension
  Add watchman support to reduce index refresh cost
  read-cache: allow index-helper to prepare shm before git reads it
  index-helper: use watchman to avoid refreshing index with lstat()
  update-index: enable/disable watchman support

 .gitignore |   1 +
 Documentation/git-index-helper.txt |  56 +
 Documentation/git.txt  |   4 +
 Makefile   |  21 ++
 builtin/gc.c   |   2 +-
 builtin/update-index.c |  11 +
 cache.h|  20 +-
 config.c   |   5 +
 config.mak.uname   |   3 +
 configure.ac   |   8 +
 daemon.c   |   2 +-
 dir.c  |  23 +-
 dir.h  |   6 +
 environment.c  |   3 +
 git-compat-util.h  |   1 +
 git.c  |  37 +++-
 index-helper.c | 437 
 read-cache.c   | 441 +++--
 setup.c|   4 +-
 sha1_file.c|  24 ++
 shm.c  | 163 ++
 shm.h  |  23 ++
 t/t7063-status-untracked-cache.sh  |  23 ++
 t/t7900-index-helper.sh|  23 ++
 t/test-lib-functions.sh|   4 +
 trace.c|  16 ++
 trace.h|   1 +
 unpack-trees.c |   1 +
 watchman-support.c | 134 +++
 watchman-support.h |   7 +
 30 files changed, 1481 insertions(+), 23 deletions(-)
 create mode 100644 Documentation/git-index-helper.txt
 create mode 100644 index-helper.c
 create mode 100644 shm.c
 create mode 100644 shm.h
 create mode 100755 t/t7900-index-helper.sh
 create mode 100644 watchman-support.c
 create mode 100644 watchman-support.h

-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/19] trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

trace_stats() is intended for GIT_TRACE_*_STATS variable group and
GIT_TRACE_PACK_STATS is more like an example how new vars can be added.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git.txt |  3 +++
 cache.h   |  2 ++
 git.c |  1 +
 sha1_file.c   | 24 
 trace.c   | 13 +
 trace.h   |  1 +
 6 files changed, 44 insertions(+)

diff --git a/Documentation/git.txt b/Documentation/git.txt
index 2754af8..794271e 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -1055,6 +1055,9 @@ of clones and fetches.
time of each Git command.
See 'GIT_TRACE' for available trace output options.
 
+'GIT_TRACE_PACK_STATS'::
+   Print various statistics.
+
 'GIT_TRACE_SETUP'::
Enables trace messages printing the .git, working tree and current
working directory after Git has completed its setup phase.
diff --git a/cache.h b/cache.h
index b829410..7e01403 100644
--- a/cache.h
+++ b/cache.h
@@ -1828,4 +1828,6 @@ void sleep_millisec(int millisec);
  */
 void safe_create_dir(const char *dir, int share);
 
+void report_pack_stats(struct trace_key *key);
+
 #endif /* CACHE_H */
diff --git a/git.c b/git.c
index 6cc0c07..a4f6f71 100644
--- a/git.c
+++ b/git.c
@@ -655,6 +655,7 @@ int main(int argc, char **av)
git_setup_gettext();
 
trace_command_performance(argv);
+   trace_stats();
 
/*
 * "git-" is the same as "git ", but we obviously:
diff --git a/sha1_file.c b/sha1_file.c
index d0f2aa0..14cebdf 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -518,6 +518,7 @@ static unsigned int peak_pack_open_windows;
 static unsigned int pack_open_windows;
 static unsigned int pack_open_fds;
 static unsigned int pack_max_fds;
+static unsigned int pack_access_nr;
 static size_t peak_pack_mapped;
 static size_t pack_mapped;
 struct packed_git *packed_git;
@@ -543,6 +544,28 @@ void pack_report(void)
sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
 }
 
+void report_pack_stats(struct trace_key *key)
+{
+   trace_printf_key(key, "\n"
+"pack_report: getpagesize()= %10" SZ_FMT 
"\n"
+"pack_report: core.packedGitWindowSize = %10" SZ_FMT 
"\n"
+"pack_report: core.packedGitLimit  = %10" SZ_FMT 
"\n"
+"pack_report: pack_used_ctr= %10u\n"
+"pack_report: pack_mmap_calls  = %10u\n"
+"pack_report: pack_open_windows= %10u / %10u\n"
+"pack_report: pack_mapped  = "
+"%10" SZ_FMT " / %10" SZ_FMT "\n"
+"pack_report: pack accesss = %10u\n",
+sz_fmt(getpagesize()),
+sz_fmt(packed_git_window_size),
+sz_fmt(packed_git_limit),
+pack_used_ctr,
+pack_mmap_calls,
+pack_open_windows, peak_pack_open_windows,
+sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped),
+pack_access_nr);
+}
+
 /*
  * Open and mmap the index file at path, perform a couple of
  * consistency checks, then record its information to p.  Return 0 on
@@ -2238,6 +2261,7 @@ static void write_pack_access_log(struct packed_git *p, 
off_t obj_offset)
static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
 p->pack_name, (uintmax_t)obj_offset);
+   pack_access_nr++;
 }
 
 int do_check_packed_object_crc;
diff --git a/trace.c b/trace.c
index 4aeea60..b1d0885 100644
--- a/trace.c
+++ b/trace.c
@@ -432,3 +432,16 @@ void trace_command_performance(const char **argv)
sq_quote_argv(&command_line, argv, 0);
command_start_time = getnanotime();
 }
+
+static struct trace_key trace_pack_stats = TRACE_KEY_INIT(PACK_STATS);
+
+static void print_stats_atexit(void)
+{
+   report_pack_stats(&trace_pack_stats);
+}
+
+void trace_stats(void)
+{
+   if (trace_want(&trace_pack_stats))
+   atexit(print_stats_atexit);
+}
diff --git a/trace.h b/trace.h
index 179b249..52bda4e 100644
--- a/trace.h
+++ b/trace.h
@@ -19,6 +19,7 @@ extern void trace_disable(struct trace_key *key);
 extern uint64_t getnanotime(void);
 extern void trace_command_performance(const char **argv);
 extern void trace_verbatim(struct trace_key *key, const void *buf, unsigned 
len);
+extern void trace_stats(void);
 
 #ifndef HAVE_VARIADIC_MACROS
 
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/19] read-cache.c: fix constness of verify_hdr()

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 read-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index d9fb78b..16cc487 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1345,7 +1345,7 @@ struct ondisk_cache_entry_extended {
ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
ondisk_cache_entry_size(ce_namelen(ce)))
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 {
git_SHA_CTX c;
unsigned char sha1[20];
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/19] read-cache: allow to keep mmap'd memory after reading

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 cache.h  |  3 +++
 read-cache.c | 13 -
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 7e01403..c43ef3d 100644
--- a/cache.h
+++ b/cache.h
@@ -333,11 +333,14 @@ struct index_state {
struct split_index *split_index;
struct cache_time timestamp;
unsigned name_hash_initialized : 1,
+keep_mmap : 1,
 initialized : 1;
struct hashmap name_hash;
struct hashmap dir_hash;
unsigned char sha1[20];
struct untracked_cache *untracked;
+   void *mmap;
+   size_t mmap_size;
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index 16cc487..7e387e9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1574,6 +1574,10 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
mmap = xmmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (mmap == MAP_FAILED)
die_errno("unable to map index file");
+   if (istate->keep_mmap) {
+   istate->mmap = mmap;
+   istate->mmap_size = mmap_size;
+   }
close(fd);
 
hdr = mmap;
@@ -1626,10 +1630,12 @@ int do_read_index(struct index_state *istate, const 
char *path, int must_exist)
src_offset += 8;
src_offset += extsize;
}
-   munmap(mmap, mmap_size);
+   if (!istate->keep_mmap)
+   munmap(mmap, mmap_size);
return istate->cache_nr;
 
 unmap:
+   istate->mmap = NULL;
munmap(mmap, mmap_size);
die("index file corrupt");
 }
@@ -1655,6 +1661,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
discard_index(split_index->base);
else
split_index->base = xcalloc(1, sizeof(*split_index->base));
+   split_index->base->keep_mmap = istate->keep_mmap;
ret = do_read_index(split_index->base,
git_path("sharedindex.%s",
 sha1_to_hex(split_index->base_sha1)), 1);
@@ -1698,6 +1705,10 @@ int discard_index(struct index_state *istate)
free(istate->cache);
istate->cache = NULL;
istate->cache_alloc = 0;
+   if (istate->keep_mmap && istate->mmap) {
+   munmap(istate->mmap, istate->mmap_size);
+   istate->mmap = NULL;
+   }
discard_split_index(istate);
free_untracked_cache(istate->untracked);
istate->untracked = NULL;
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/19] hack: watchman/untracked cache mashup

2016-03-09 Thread David Turner
DO NOT APPLY THIS

This is a hack to add untracked cache info to the watchman WAMA
extension.  The idea is that we don't have to stat directories if
watchman hasn't told us of any changes in those dirs.

If this is the right idea, I can go back and add it to the relevant
patch(es).  But I want to see if other folks think it's the right idea
first.

Signed-off-by: David Turner 
---
 dir.c  |  23 +++--
 dir.h  |   6 +++
 read-cache.c   | 142 +++--
 watchman-support.c |  21 +++-
 4 files changed, 182 insertions(+), 10 deletions(-)

diff --git a/dir.c b/dir.c
index 69e0be6..5058b29 100644
--- a/dir.c
+++ b/dir.c
@@ -597,9 +597,9 @@ static void trim_trailing_spaces(char *buf)
  *
  * If "name" has the trailing slash, it'll be excluded in the search.
  */
-static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
-   struct untracked_cache_dir 
*dir,
-   const char *name, int len)
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+struct untracked_cache_dir *dir,
+const char *name, int len)
 {
int first, last;
struct untracked_cache_dir *d;
@@ -1726,6 +1726,17 @@ static int valid_cached_dir(struct dir_struct *dir,
if (!untracked)
return 0;
 
+   if (dir->untracked->use_watchman) {
+   /*
+* With watchman, we can trust the untracked cache's
+* valid field.
+*/
+   if (untracked->valid)
+   goto skip_stat;
+   else
+   invalidate_directory(dir->untracked, untracked);
+   }
+
if (stat(path->len ? path->buf : ".", &st)) {
invalidate_directory(dir->untracked, untracked);
memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
@@ -1739,6 +1750,7 @@ static int valid_cached_dir(struct dir_struct *dir,
return 0;
}
 
+skip_stat:
if (untracked->check_only != !!check_only) {
invalidate_directory(dir->untracked, untracked);
return 0;
@@ -2625,8 +2637,10 @@ static void free_untracked(struct untracked_cache_dir 
*ucd)
 
 void free_untracked_cache(struct untracked_cache *uc)
 {
-   if (uc)
+   if (uc) {
free_untracked(uc->root);
+   string_list_clear(&uc->invalid_untracked, 0);
+   }
free(uc);
 }
 
@@ -2775,6 +2789,7 @@ struct untracked_cache *read_untracked_extension(const 
void *data, unsigned long
return NULL;
 
uc = xcalloc(1, sizeof(*uc));
+   string_list_init(&uc->invalid_untracked, 1);
strbuf_init(&uc->ident, ident_len);
strbuf_add(&uc->ident, ident, ident_len);
load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
diff --git a/dir.h b/dir.h
index 3ec3fb0..8fd3f9e 100644
--- a/dir.h
+++ b/dir.h
@@ -142,6 +142,9 @@ struct untracked_cache {
int gitignore_invalidated;
int dir_invalidated;
int dir_opened;
+   /* watchman invalidation data */
+   unsigned int use_watchman : 1;
+   struct string_list invalid_untracked;
 };
 
 struct dir_struct {
@@ -312,4 +315,7 @@ struct untracked_cache *read_untracked_extension(const void 
*data, unsigned long
 void write_untracked_extension(struct strbuf *out, struct untracked_cache 
*untracked);
 void add_untracked_cache(struct index_state *istate);
 void remove_untracked_cache(struct index_state *istate);
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+struct untracked_cache_dir *dir,
+const char *name, int len);
 #endif
diff --git a/read-cache.c b/read-cache.c
index 40d789a..66438d8 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1237,7 +1237,7 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
if (!new) {
const char *fmt;
 
-   if (really && cache_errno == EINVAL) {
+   if (really || cache_errno == EINVAL) {
/* If we are doing --really-refresh that
 * means the index is not valid anymore.
 */
@@ -1377,11 +1377,82 @@ static int verify_hdr(const struct cache_header *hdr, 
unsigned long size)
return 0;
 }
 
+static struct untracked_cache_dir *find_untracked_cache_dir(
+   struct untracked_cache *uc, struct untracked_cache_dir *ucd,
+   const char *name)
+{
+   int component_len;
+   

[PATCH 15/19] unpack-trees: preserve index extensions

2016-03-09 Thread David Turner
Make git checkout (and other unpack_tree operations) preserve the
untracked cache and watchman status. This is valuable for two reasons:

1. Often, an unpack_tree operation will not touch large parts of the
working tree, and thus most of the untracked cache will continue to be
valid.

2. Even if the untracked cache were entirely invalidated by such an
operation, the user has signaled their intention to have such a cache,
and we don't want to throw it away.

The same logic applies to the watchman state.

Signed-off-by: David Turner 
---
 cache.h   |  1 +
 read-cache.c  |  8 
 t/t7063-status-untracked-cache.sh | 23 +++
 t/test-lib-functions.sh   |  4 
 unpack-trees.c|  1 +
 5 files changed, 37 insertions(+)

diff --git a/cache.h b/cache.h
index 272c928..03c34f1 100644
--- a/cache.h
+++ b/cache.h
@@ -572,6 +572,7 @@ extern void write_watchman_ext(struct strbuf *sb, struct 
index_state* istate);
 #define REFRESH_DAEMON (1 << 2)
 extern int write_locked_index(struct index_state *, struct lock_file *lock, 
unsigned flags);
 extern int discard_index(struct index_state *);
+extern void move_index_extensions(struct index_state *dst, struct index_state 
*src);
 extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
 extern int index_dir_exists(struct index_state *istate, const char *name, int 
namelen);
diff --git a/read-cache.c b/read-cache.c
index 78f5f0e..40d789a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2591,3 +2591,11 @@ void report_index_stats(struct trace_key *key)
 "index stats: file writes   = %10u\n",
 nr_read_index, nr_read_shm_index, nr_write_index);
 }
+
+void move_index_extensions(struct index_state *dst, struct index_state *src)
+{
+   dst->untracked = src->untracked;
+   src->untracked = NULL;
+   dst->last_update = src->last_update;
+   src->last_update = NULL;
+}
diff --git a/t/t7063-status-untracked-cache.sh 
b/t/t7063-status-untracked-cache.sh
index a971884..a2c8535 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -646,4 +646,27 @@ test_expect_success 'test ident field is working' '
test_cmp ../expect ../err
 '
 
+test_expect_success 'untracked cache survives a checkout' '
+   git commit --allow-empty -m empty &&
+   test-dump-untracked-cache >../before &&
+   test_when_finished  "git checkout master" &&
+   git checkout -b other_branch &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after &&
+   test_commit test &&
+   test-dump-untracked-cache >../before &&
+   git checkout master &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after
+'
+
+
+test_expect_success 'untracked cache survives a commit' '
+   test-dump-untracked-cache >../before &&
+   git add done/two &&
+   git commit -m commit &&
+   test-dump-untracked-cache >../after &&
+   test_cmp ../before ../after
+'
+
 test_done
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8d99eb3..e974b5b 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -186,6 +186,10 @@ test_commit () {
test_tick
fi &&
git commit $signoff -m "$1" &&
+   if [ "$(git config core.bare)" = false ]
+   then
+   git update-index --force-untracked-cache
+   fi
git tag "${4:-$1}"
 }
 
diff --git a/unpack-trees.c b/unpack-trees.c
index 9f55cc2..fc90eb3 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1215,6 +1215,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, 
struct unpack_trees_options
  WRITE_TREE_SILENT |
  WRITE_TREE_REPAIR);
}
+   move_index_extensions(&o->result, o->dst_index);
discard_index(o->dst_index);
*o->dst_index = o->result;
} else {
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/19] read-cache: allow index-helper to prepare shm before git reads it

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

If index-helper puts 'W' before pid in $GIT_DIR/index-helper.pid, then
git will sleep for a while, expecting to be waken up by SIGUSR1 when
index-helper has done shm preparation, or after the timeout.

Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: David Turner 
---
 read-cache.c | 36 ++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 85ef15b..57c5df9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1613,14 +1613,46 @@ static void do_poke(struct strbuf *sb, int 
refresh_cache)
PostMessage(hwnd, refresh_cache ? WM_USER : WM_USER + 1, 0, 0);
 }
 #else
+
+static volatile int done_sleeping;
+
+static void mark_done_sleeping(int sig)
+{
+   done_sleeping = 1;
+}
+
+/*
+ * Send a message to the index-helper to let it know that we're going
+ * to read the index.  If refresh_cache is true, then the index-helper
+ * should re-read the index; otherwise, it should just stay alive.
+ *
+ * If the index-helper supports watchman, it will refresh the index
+ * before it hands it over.  Wait up to one second for a response
+ * indicating that the index has been successfully refreshed.
+ *
+ */
 static void do_poke(struct strbuf *sb, int refresh_cache)
 {
-   char*start = sb->buf;
+   int  wait  = sb->buf[0] == 'W';
+   char*start = wait ? sb->buf + 1 : sb->buf;
char*end   = NULL;
pid_tpid   = strtoul(start, &end, 10);
+   int  ret;
+   int  count = 0;
+
+   done_sleeping = 0;
if (!end || end != sb->buf + sb->len)
return;
-   kill(pid, refresh_cache ? SIGHUP : SIGUSR1);
+   if (!refresh_cache && wait)
+   signal(SIGHUP, mark_done_sleeping);
+   ret = kill(pid, refresh_cache ? SIGHUP : SIGUSR1);
+   if (!refresh_cache && wait) {
+   if (!ret)
+   while (!done_sleeping && count++ < 1000)
+   sleep_millisec(1);
+
+   sigaction(SIGHUP, NULL, NULL);
+   }
 }
 #endif
 
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/19] index-helper: process management

2016-03-09 Thread David Turner
Don't start index-helper when it is already running for a repository.

Keep the index-helper pid file's mtime up-to-date.  Before starting
index-helper, check that (a) the pid file's mtime is recent and (b)
there is some process with that pid.  Of course, it's possible that
the mtime is recent but the index-helper has died anyway (and some other
process has taken its pid), but that's pretty unlikely.

Provide index-helper --kill (to kill the running index-helper) and
index-helper --ignore-existing (to start up a new index-helper even
if we believe that there's one already running).

Add a test for index-helper's pid-file writing and for --kill.

Signed-off-by: David Turner 
---
 index-helper.c  | 95 +
 t/t7900-index-helper.sh | 23 
 2 files changed, 111 insertions(+), 7 deletions(-)
 create mode 100755 t/t7900-index-helper.sh

diff --git a/index-helper.c b/index-helper.c
index dc03e1e..a75da60 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -152,6 +152,17 @@ static void refresh(int sig)
 
 #ifdef HAVE_SHM
 
+static void touch_pid_file(void)
+{
+   /*
+* If we fail to update the times on index-helper.pid, we've
+* probably had the repo deleted out from under us or otherwise
+* lost access; might as well give up.
+*/
+   if (utimes(git_path("index-helper.pid"), NULL))
+   exit(0);
+}
+
 #ifdef USE_WATCHMAN
 static void share_watchman(struct index_state *istate,
   struct shm *is, pid_t pid)
@@ -211,9 +222,10 @@ static void prepare_index(int sig, siginfo_t *si, void 
*context)
 
 #endif
 
-static void loop(const char *pid_file, int idle_in_seconds)
+static void loop(const char *pid_file, int idle_in_minutes)
 {
struct sigaction sa;
+   int timeout_count = 0;
 
sigchain_pop(SIGHUP);   /* pushed by sigchain_push_common */
sigchain_push(SIGHUP, refresh);
@@ -225,19 +237,25 @@ static void loop(const char *pid_file, int 
idle_in_seconds)
sigaction(SIGUSR1, &sa, NULL);
 
refresh(0);
-   while (sleep(idle_in_seconds))
-   ; /* do nothing, all is handled by signal handlers already */
+   while (timeout_count < idle_in_minutes) {
+   if (sleep(60) == EINTR)
+   timeout_count = 0;
+   else
+   timeout_count ++;
+   touch_pid_file();
+   }
 }
 
 #elif defined(GIT_WINDOWS_NATIVE)
 
-static void loop(const char *pid_file, int idle_in_seconds)
+static void loop(const char *pid_file, int idle_in_minutes)
 {
HWND hwnd;
UINT_PTR timer = 0;
MSG msg;
HINSTANCE hinst = GetModuleHandle(NULL);
WNDCLASS wc;
+   int timeout_count = 0;
 
/*
 * Emulate UNIX signals by sending WM_USER+x to a
@@ -258,11 +276,18 @@ static void loop(const char *pid_file, int 
idle_in_seconds)
 
refresh(0);
while (1) {
-   timer = SetTimer(hwnd, timer, idle_in_seconds * 1000, NULL);
+   timer = SetTimer(hwnd, timer, 60 * 1000, NULL);
if (!timer)
die(_("no timer!"));
-   if (!GetMessage(&msg, hwnd, 0, 0) || msg.message == WM_TIMER)
+   if (!GetMessage(&msg, hwnd, 0, 0))
break;
+   if (msg.message == WM_TIMER) {
+   timeout_count ++;
+   if (timeout_count == idle_in_minutes)
+   break;
+   }
+   timeout_count = 0;
+   touch_pid_file();
switch (msg.message) {
case WM_USER:
refresh(0);
@@ -288,6 +313,44 @@ static const char * const usage_text[] = {
NULL
 };
 
+static int already_running(void)
+{
+   char contents[20] = {0};
+   char *end;
+   int fd, dead, i = 0;
+   time_t now;
+   pid_t pid;
+   struct stat st;
+
+   fd = open(git_path("index-helper.pid"), O_RDONLY);
+   if (fd < 0)
+   return 0;
+
+   if (fstat(fd, &st) < 0)
+   return 0;
+
+   now = time(&now);
+
+   /*
+* We touch the pid file every minute, so if a pid file hasn't
+* been updated in two minutes, it's out-of-date.
+*/
+   if (st.st_mtime + 120 < now)
+   return 0;
+
+   read_in_full(fd, &contents, sizeof(contents));
+
+   while (!isdigit(contents[i]) && contents[i])
+   i++;
+   pid = strtoll(contents + i, &end, 10);
+   if (contents == end)
+   return 0;
+   dead = kill(pid, 0);
+   if (!dead)
+   return pid;
+   return 0;
+}
+
 static const char *write_pid(void)
 {
static struct strbuf sb = STRBUF_INIT;
@@ -314,6 +377,8 @@ int main(int

[PATCH 14/19] update-index: enable/disable watchman support

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/update-index.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 1c94ca5..7c08e1c 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -914,6 +914,7 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
 {
int newfd, entries, has_errors = 0, nul_term_line = 0;
enum uc_mode untracked_cache = UC_UNSPECIFIED;
+   int use_watchman = -1;
int read_from_stdin = 0;
int prefix_length = prefix ? strlen(prefix) : 0;
int preferred_index_format = 0;
@@ -1012,6 +1013,8 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
N_("test if the filesystem supports untracked 
cache"), UC_TEST),
OPT_SET_INT(0, "force-untracked-cache", &untracked_cache,
N_("enable untracked cache without testing the 
filesystem"), UC_FORCE),
+   OPT_BOOL(0, "watchman", &use_watchman,
+   N_("use or not use watchman to reduce refresh cost")),
OPT_END()
};
 
@@ -1149,6 +1152,14 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
die("Bug: bad untracked_cache value: %d", untracked_cache);
}
 
+   if (use_watchman > 0) {
+   the_index.last_update= xstrdup("");
+   the_index.cache_changed |= WATCHMAN_CHANGED;
+   } else if (!use_watchman) {
+   the_index.last_update= NULL;
+   the_index.cache_changed |= WATCHMAN_CHANGED;
+   }
+
if (active_cache_changed) {
if (newfd < 0) {
if (refresh_args.flags & REFRESH_QUIET)
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/19] index-helper: autorun

2016-03-09 Thread David Turner
Introduce a new config option, indexhelper.autorun, to automatically
run git index-helper before starting up a builtin git command.  This
enables users to keep index-helper running without manual
intervention.

Signed-off-by: David Turner 
---
 git.c   | 38 +-
 index-helper.c  | 11 ++-
 t/t7900-index-helper.sh | 10 ++
 3 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/git.c b/git.c
index a4f6f71..ccf04ab 100644
--- a/git.c
+++ b/git.c
@@ -521,6 +521,40 @@ static void strip_extension(const char **argv)
 #define strip_extension(cmd)
 #endif
 
+static int want_auto_index_helper(void)
+{
+   int want = -1;
+   const char *value = NULL;
+   const char *key = "indexhelper.autorun";
+
+   if (git_config_key_is_valid(key) &&
+   !git_config_get_value(key, &value)) {
+   int b = git_config_maybe_bool(key, value);
+   want = b >= 0 ? b : 0;
+   }
+   return want;
+}
+
+static void maybe_run_index_helper(struct cmd_struct *cmd)
+{
+   const char *argv[] = {"git-index-helper", "--detach", "--auto", NULL};
+   int status;
+
+   if (!(cmd->option & NEED_WORK_TREE))
+   return;
+
+   if (want_auto_index_helper() <= 0)
+   return;
+
+   trace_argv_printf(argv, "trace: auto index-helper:");
+
+   status = run_command_v_opt(argv, RUN_SILENT_EXEC_FAILURE | 
RUN_CLEAN_ON_EXIT);
+
+   if (status) {
+   warning("You specified indexhelper.autorun, but running 
git-index-helper failed");
+   }
+}
+
 static void handle_builtin(int argc, const char **argv)
 {
const char *cmd;
@@ -536,8 +570,10 @@ static void handle_builtin(int argc, const char **argv)
}
 
builtin = get_builtin(cmd);
-   if (builtin)
+   if (builtin) {
+   maybe_run_index_helper(builtin);
exit(run_builtin(builtin, argc, argv));
+   }
 }
 
 static void execv_dashed_external(const char **argv)
diff --git a/index-helper.c b/index-helper.c
index a75da60..bc5c328 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -379,6 +379,7 @@ int main(int argc, char **argv)
int idle_in_minutes = 10, detach = 0;
int ignore_existing = 0;
int kill_existing = 0;
+   int nongit = 0, autorun = 0;
const char *pid_file;
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_minutes,
@@ -388,6 +389,7 @@ int main(int argc, char **argv)
OPT_BOOL(0, "detach", &detach, "detach the process"),
OPT_BOOL(0, "ignore-existing", &ignore_existing, "run even if 
another index-helper seems to be running for this repo"),
OPT_BOOL(0, "kill", &kill_existing, "kill any running 
index-helper for this repo"),
+   OPT_BOOL(0, "auto", &autorun, "this is an automatic run of git 
index-helper, so certain errors can be solved by silently exiting"),
OPT_END()
};
 
@@ -397,11 +399,18 @@ int main(int argc, char **argv)
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(usage_text, options);
 
-   prefix = setup_git_directory();
+   prefix = setup_git_directory_gently(&nongit);
if (parse_options(argc, (const char **)argv, prefix,
  options, usage_text, 0))
die(_("too many arguments"));
 
+   if (nongit) {
+   if (autorun)
+   exit(0);
+   else
+   die("Not a git repository");
+   }
+
if (ignore_existing && kill_existing)
die(_("--ignore-existing and --kill don't make sense 
together"));
 
diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh
index 6708180..e4f9564 100755
--- a/t/t7900-index-helper.sh
+++ b/t/t7900-index-helper.sh
@@ -20,4 +20,14 @@ test_expect_success 'index-helper creates usable pid file 
and can be killed' '
! kill -0 $pid
 '
 
+test_expect_success 'index-helper autorun works' '
+   rm -f .git/index-helper.pid &&
+   git status &&
+   test_path_is_missing .git/index-helper.pid &&
+   test_config indexhelper.autorun true &&
+   git status &&
+   test_path_is_file .git/index-helper.pid &&
+   git index-helper --kill
+'
+
 test_done
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/19] trace.c: add GIT_TRACE_INDEX_STATS for index statistics

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git.txt |  1 +
 cache.h   |  1 +
 read-cache.c  | 16 
 trace.c   |  5 -
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/git.txt b/Documentation/git.txt
index 794271e..71a88a8 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -1056,6 +1056,7 @@ of clones and fetches.
See 'GIT_TRACE' for available trace output options.
 
 'GIT_TRACE_PACK_STATS'::
+'GIT_TRACE_INDEX_STATS'::
Print various statistics.
 
 'GIT_TRACE_SETUP'::
diff --git a/cache.h b/cache.h
index bc2f529..e22296c 100644
--- a/cache.h
+++ b/cache.h
@@ -1835,5 +1835,6 @@ void sleep_millisec(int millisec);
 void safe_create_dir(const char *dir, int share);
 
 void report_pack_stats(struct trace_key *key);
+void report_index_stats(struct trace_key *key);
 
 #endif /* CACHE_H */
diff --git a/read-cache.c b/read-cache.c
index eb4b9b4..7bd3ce4 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -50,6 +50,10 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce,
 struct index_state the_index;
 static const char *alternate_index_output;
 
+static unsigned int nr_read_index;
+static unsigned int nr_read_shm_index;
+static unsigned int nr_write_index;
+
 static void set_index_entry(struct index_state *istate, int nr, struct 
cache_entry *ce)
 {
istate->cache[nr] = ce;
@@ -1614,6 +1618,7 @@ static int try_shm(struct index_state *istate)
istate->mmap = new_mmap;
istate->mmap_size = new_size;
istate->from_shm = 1;
+   nr_read_shm_index++;
return 0;
 }
 
@@ -1711,6 +1716,7 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
}
if (!istate->keep_mmap)
munmap(mmap, mmap_size);
+   nr_read_index++;
return istate->cache_nr;
 
 unmap:
@@ -2197,6 +2203,7 @@ static int do_write_index(struct index_state *istate, int 
newfd,
return -1;
istate->timestamp.sec = (unsigned int)st.st_mtime;
istate->timestamp.nsec = ST_MTIME_NSEC(st);
+   nr_write_index++;
return 0;
 }
 
@@ -2423,3 +2430,12 @@ void stat_validity_update(struct stat_validity *sv, int 
fd)
fill_stat_data(sv->sd, &st);
}
 }
+
+void report_index_stats(struct trace_key *key)
+{
+   trace_printf_key(key, "\n"
+"index stats: file reads= %10u\n"
+"index stats: cache reads   = %10u\n"
+"index stats: file writes   = %10u\n",
+nr_read_index, nr_read_shm_index, nr_write_index);
+}
diff --git a/trace.c b/trace.c
index b1d0885..eea1fa8 100644
--- a/trace.c
+++ b/trace.c
@@ -434,14 +434,17 @@ void trace_command_performance(const char **argv)
 }
 
 static struct trace_key trace_pack_stats = TRACE_KEY_INIT(PACK_STATS);
+static struct trace_key trace_index_stats = TRACE_KEY_INIT(INDEX_STATS);
 
 static void print_stats_atexit(void)
 {
report_pack_stats(&trace_pack_stats);
+   report_index_stats(&trace_index_stats);
 }
 
 void trace_stats(void)
 {
-   if (trace_want(&trace_pack_stats))
+   if (trace_want(&trace_pack_stats) ||
+   trace_want(&trace_index_stats))
atexit(print_stats_atexit);
 }
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/19] Add watchman support to reduce index refresh cost

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

The previous patch has the logic to clear bits in 'WAMA' bitmap. This
patch has logic to set bits as told by watchman. The missing bit,
_using_ these bits, are not here yet.

A lot of this code is written by David Turner originally, mostly from
[1]. I'm just copying and polishing it a bit.

[1] http://article.gmane.org/gmane.comp.version-control.git/248006

Signed-off-by: David Turner 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Makefile   |   7 
 cache.h|   1 +
 config.c   |   5 +++
 configure.ac   |   8 
 environment.c  |   3 ++
 watchman-support.c | 115 +
 watchman-support.h |   7 
 7 files changed, 146 insertions(+)
 create mode 100644 watchman-support.c
 create mode 100644 watchman-support.h

diff --git a/Makefile b/Makefile
index a6c668b..e51331c 100644
--- a/Makefile
+++ b/Makefile
@@ -1416,6 +1416,12 @@ else
LIB_OBJS += thread-utils.o
 endif
 
+ifdef USE_WATCHMAN
+   LIB_H += watchman-support.h
+   LIB_OBJS += watchman-support.o
+   BASIC_CFLAGS += -DUSE_WATCHMAN
+endif
+
 ifdef HAVE_PATHS_H
BASIC_CFLAGS += -DHAVE_PATHS_H
 endif
@@ -2164,6 +2170,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst 
','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+
+   @echo USE_WATCHMAN=\''$(subst ','\'',$(subst 
','\'',$(USE_WATCHMAN)))'\' >>$@+
 ifdef TEST_OUTPUT_DIRECTORY
@echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst 
','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+
 endif
diff --git a/cache.h b/cache.h
index 8f7b4b1..bf20652 100644
--- a/cache.h
+++ b/cache.h
@@ -688,6 +688,7 @@ extern char *git_replace_ref_base;
 
 extern int fsync_object_files;
 extern int core_preload_index;
+extern int core_watchman_sync_timeout;
 extern int core_apply_sparse_checkout;
 extern int precomposed_unicode;
 extern int protect_hfs;
diff --git a/config.c b/config.c
index 9ba40bc..e6dc141 100644
--- a/config.c
+++ b/config.c
@@ -882,6 +882,11 @@ static int git_default_core_config(const char *var, const 
char *value)
return 0;
}
 
+   if (!strcmp(var, "core.watchmansynctimeout")) {
+   core_watchman_sync_timeout = git_config_int(var, value);
+   return 0;
+   }
+
if (!strcmp(var, "core.createobject")) {
if (!strcmp(value, "rename"))
object_creation_mode = OBJECT_CREATION_USES_RENAMES;
diff --git a/configure.ac b/configure.ac
index 89e2590..6f10a15 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1092,6 +1092,14 @@ AC_COMPILE_IFELSE([BSD_SYSCTL_SRC],
HAVE_BSD_SYSCTL=])
 GIT_CONF_SUBST([HAVE_BSD_SYSCTL])
 
+#
+# Check for watchman client library
+
+AC_CHECK_LIB([watchman], [watchman_connect],
+   [USE_WATCHMAN=YesPlease],
+   [USE_WATCHMAN=])
+GIT_CONF_SUBST([USE_WATCHMAN])
+
 ## Other checks.
 # Define USE_PIC if you need the main git objects to be built with -fPIC
 # in order to build and link perl/Git.so.  x86-64 seems to need this.
diff --git a/environment.c b/environment.c
index 6dec9d0..35e03c7 100644
--- a/environment.c
+++ b/environment.c
@@ -94,6 +94,9 @@ int core_preload_index = 1;
  */
 int ignore_untracked_cache_config;
 
+int core_watchman_sync_timeout = 300;
+
+
 /* This is set by setup_git_dir_gently() and/or git_default_config() */
 char *git_work_tree_cfg;
 static char *work_tree;
diff --git a/watchman-support.c b/watchman-support.c
new file mode 100644
index 000..08e37ae
--- /dev/null
+++ b/watchman-support.c
@@ -0,0 +1,115 @@
+#include "cache.h"
+#include "watchman-support.h"
+#include "strbuf.h"
+#include 
+
+static struct watchman_query *make_query(const char *last_update)
+{
+   struct watchman_query *query = watchman_query();
+   watchman_query_set_fields(query, WATCHMAN_FIELD_NAME |
+WATCHMAN_FIELD_EXISTS |
+WATCHMAN_FIELD_NEWER);
+   watchman_query_set_empty_on_fresh(query, 1);
+   query->sync_timeout = core_watchman_sync_timeout;
+   if (*last_update)
+   watchman_query_set_since_oclock(query, last_update);
+   return query;
+}
+
+static struct watchman_query_result* query_watchman(
+   struct index_state *istate, struct watchman_connection *connection,
+   const char *fs_path, const char *last_update)
+{
+   struc

[PATCH 07/19] daemonize(): set a flag before exiting the main process

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

This allows signal handlers and atexit functions to realize this
situation and not clean up.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/gc.c | 2 +-
 cache.h  | 2 +-
 daemon.c | 2 +-
 setup.c  | 4 +++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index c583aad..37180de 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -385,7 +385,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 * failure to daemonize is ok, we'll continue
 * in foreground
 */
-   daemonized = !daemonize();
+   daemonized = !daemonize(NULL);
}
} else
add_repack_all_option();
diff --git a/cache.h b/cache.h
index 1d7e561..cc1f6f5 100644
--- a/cache.h
+++ b/cache.h
@@ -530,7 +530,7 @@ extern int set_git_dir_init(const char *git_dir, const char 
*real_git_dir, int);
 extern int init_db(const char *template_dir, unsigned int flags);
 
 extern void sanitize_stdfds(void);
-extern int daemonize(void);
+extern int daemonize(int *);
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
diff --git a/daemon.c b/daemon.c
index 8d45c33..a5cf954 100644
--- a/daemon.c
+++ b/daemon.c
@@ -1365,7 +1365,7 @@ int main(int argc, char **argv)
return execute();
 
if (detach) {
-   if (daemonize())
+   if (daemonize(NULL))
die("--detach not supported on this platform");
} else
sanitize_stdfds();
diff --git a/setup.c b/setup.c
index de1a2a7..9adf13f 100644
--- a/setup.c
+++ b/setup.c
@@ -1017,7 +1017,7 @@ void sanitize_stdfds(void)
close(fd);
 }
 
-int daemonize(void)
+int daemonize(int *daemonized)
 {
 #ifdef NO_POSIX_GOODIES
errno = ENOSYS;
@@ -1029,6 +1029,8 @@ int daemonize(void)
case -1:
die_errno("fork failed");
default:
+   if (daemonized)
+   *daemonized = 1;
exit(0);
}
if (setsid() == -1)
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/19] index-helper: use watchman to avoid refreshing index with lstat()

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Watchman is hidden behind index-helper. Before git tries to read the
index from shm, it notifies index-helper with SIGHUP and sleep,
waiting for index-helper to prepare shm. index-helper then contacts
watchman, updates 'WAMA' extension and put it in a separate shm and
wakes git up with SIGHUP.

Git uses this extension to not lstat unchanged entries. Git only trust
'WAMA' extension when it's received from the separate shm, not from
disk. Unmarked entries are "clean". Marked entries are dirty from
watchman point of view. If it finds out some entries are
'watchman-dirty', but are really unchanged (e.g. the file was changed,
then reverted back), then Git will clear the marking in 'WAMA' before
writing it down.

Hiding watchman behind index-helper means you need both daemons. You
can't run watchman alone. Not so good. But on the other hand, 'git'
binary is not linked to watchman/json libraries, which is good for
packaging. Core git package will run fine without watchman-related
packages. If they need watchman, they can install git-index-helper and
dependencies.

Another reason for tying watchman to index-helper is, when used with
untracked cache, we need to keep track of $GIT_WORK_TREE file
listing. That kind of list can be kept in index-helper.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Makefile   |  5 
 cache.h|  2 ++
 index-helper.c | 84 +++---
 read-cache.c   | 43 +++---
 4 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index e51331c..d79fc0c 100644
--- a/Makefile
+++ b/Makefile
@@ -450,6 +450,7 @@ MSGFMT = msgfmt
 CURL_CONFIG = curl-config
 PTHREAD_LIBS = -lpthread
 PTHREAD_CFLAGS =
+WATCHMAN_LIBS =
 GCOV = gcov
 
 export TCL_PATH TCLTK_PATH
@@ -1419,6 +1420,7 @@ endif
 ifdef USE_WATCHMAN
LIB_H += watchman-support.h
LIB_OBJS += watchman-support.o
+   WATCHMAN_LIBS = -lwatchman
BASIC_CFLAGS += -DUSE_WATCHMAN
 endif
 
@@ -2032,6 +2034,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS 
$(GITLIBS) $(VCSSVN_LIB)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) 
$(LIBS) \
$(VCSSVN_LIB)
 
+git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS)
+   $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) 
$(LIBS) $(WATCHMAN_LIBS)
+
 $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY)
$(QUIET_LNCP)$(RM) $@ && \
ln $< $@ 2>/dev/null || \
diff --git a/cache.h b/cache.h
index bf20652..272c928 100644
--- a/cache.h
+++ b/cache.h
@@ -558,6 +558,7 @@ extern int daemonize(int *);
 
 /* Initialize and use the cache information */
 struct lock_file;
+extern int verify_index(const struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec 
*pathspec);
 extern int do_read_index(struct index_state *istate, const char *path,
@@ -565,6 +566,7 @@ extern int do_read_index(struct index_state *istate, const 
char *path,
 extern int read_index_from(struct index_state *, const char *path);
 extern int is_index_unborn(struct index_state *);
 extern int read_index_unmerged(struct index_state *);
+extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate);
 #define COMMIT_LOCK(1 << 0)
 #define CLOSE_LOCK (1 << 1)
 #define REFRESH_DAEMON (1 << 2)
diff --git a/index-helper.c b/index-helper.c
index cf26da7..7e7ce9b 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -5,15 +5,18 @@
 #include "split-index.h"
 #include "shm.h"
 #include "lockfile.h"
+#include "watchman-support.h"
 
 struct shm {
unsigned char sha1[20];
void *shm;
size_t size;
+   pid_t pid;
 };
 
 static struct shm shm_index;
 static struct shm shm_base_index;
+static struct shm shm_watchman;
 static int daemonized, to_verify = 1;
 
 static void release_index_shm(struct shm *is)
@@ -25,10 +28,21 @@ static void release_index_shm(struct shm *is)
is->shm = NULL;
 }
 
+static void release_watchman_shm(struct shm *is)
+{
+   if (!is->shm)
+   return;
+   munmap(is->shm, is->size);
+   git_shm_unlink("git-watchman-%s-%" PRIuMAX,
+  sha1_to_hex(is->sha1), (uintmax_t)is->pid);
+   is->shm = NULL;
+}
+
 static void cleanup_shm(void)
 {
release_index_shm(&shm_index);
release_index_shm(&shm_base_index);
+   release_watchman_shm(&shm_watchman);
 }
 
 static void cleanup(void)
@@ -120,13 +134,15 @@ static void share_the_index(void)
if (the_index.split_index && the_index.split_index->base)
share_index(the_index.split_index->base, &shm_base_index);
share_index(&the_index, &shm_index);
-   if (to_verify && !verify_shm())
+   if (to_verify && !verify_shm()) {
cleanup_shm();
-   discard_index(&the_index);
+   discard_index(&the_index

[PATCH 10/19] read-cache: add watchman 'WAMA' extension

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

The extension contains a bitmap, one bit for each entry in the
index. If the n-th bit is zero, the n-th entry is considered
unchanged, we can ce_mark_uptodate() it without refreshing. If the bit
is non-zero and we found out the corresponding file is clean after
refresh, we can clear the bit.

The 'skipping refresh' bit is not in this patch yet as we would need
watchman. More details in later patches.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 cache.h  |  4 
 read-cache.c | 71 ++--
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index cc1f6f5..8f7b4b1 100644
--- a/cache.h
+++ b/cache.h
@@ -182,6 +182,8 @@ struct cache_entry {
 #define CE_VALID (0x8000)
 #define CE_STAGESHIFT 12
 
+#define CE_WATCHMAN_DIRTY  (0x0001)
+
 /*
  * Range 0x0FFF in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -320,6 +322,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED (1 << 5)
 #define SPLIT_INDEX_ORDERED(1 << 6)
 #define UNTRACKED_CHANGED  (1 << 7)
+#define WATCHMAN_CHANGED   (1 << 8)
 
 struct split_index;
 struct untracked_cache;
@@ -344,6 +347,7 @@ struct index_state {
struct untracked_cache *untracked;
void *mmap;
size_t mmap_size;
+   char *last_update;
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index 16fbdf6..85ef15b 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -19,6 +19,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "shm.h"
+#include "ewah/ewok.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
   unsigned int options);
@@ -41,11 +42,13 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce,
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b/* "link" */
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
+#define CACHE_EXT_WATCHMAN 0x57414D41/* "WAMA" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
+SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | \
+WATCHMAN_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -1224,8 +1227,13 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
continue;
 
new = refresh_cache_ent(istate, ce, options, &cache_errno, 
&changed);
-   if (new == ce)
+   if (new == ce) {
+   if (ce->ce_flags & CE_WATCHMAN_DIRTY) {
+   ce->ce_flags  &= ~CE_WATCHMAN_DIRTY;
+   istate->cache_changed |= WATCHMAN_CHANGED;
+   }
continue;
+   }
if (!new) {
const char *fmt;
 
@@ -1369,6 +1377,48 @@ static int verify_hdr(const struct cache_header *hdr, 
unsigned long size)
return 0;
 }
 
+static void mark_no_watchman(size_t pos, void *data)
+{
+   struct index_state *istate = data;
+   assert(pos < istate->cache_nr);
+   istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY;
+}
+
+static int read_watchman_ext(struct index_state *istate, const void *data,
+unsigned long sz)
+{
+   struct ewah_bitmap *bitmap;
+   int ret, len;
+
+   if (memchr(data, 0, sz) == NULL)
+   return error("invalid extension");
+   len = strlen(data) + 1;
+   bitmap = ewah_new();
+   ret = ewah_read_mmap(bitmap, (const char *)data + len, sz - len);
+   if (ret != sz - len) {
+   ewah_free(bitmap);
+   return error("failed to parse ewah bitmap reading watchman 
index extension");
+   }
+   istate->last_update = xstrdup(data);
+   ewah_each_bit(bitmap, mark_no_watchman, istate);
+   ewah_free(bitmap);
+   return 0;
+}
+
+static void write_watchman_ext(struct strbuf *sb, struct index_state* istate)
+{
+   struct ewah_bitmap *bitmap;
+   int i;
+
+   strbuf_add(sb, istate->last_update, strlen(istate->last_update) + 1);
+   bitmap = ewah_new();
+   for (i = 0; i < istate->cache_nr; i++)
+   if (istate->cache[i]->ce_flags & CE_WATCHMAN_DIRTY)
+   ewah_set(bitmap, i);
+   ewah_serialize_strbuf(bitmap, sb);
+   ewah_free(bitmap);
+}
+
 static int read_index_extension(struct index_state *istate,
const char *ext, void *data, unsigned long sz)
 {
@@ -1386,6 +1436,11 @@ static int read_index_extension(struct index_state 
*istate,
case CACHE_EXT

[PATCH 16/19] index-helper: rewrite pidfile after daemonizing

2016-03-09 Thread David Turner
When we daemonize, our pid changes, so we need to rewrite the pid file.

Signed-off-by: David Turner 
---
 index-helper.c | 48 
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/index-helper.c b/index-helper.c
index 7e7ce9b..dc03e1e 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -288,12 +288,33 @@ static const char * const usage_text[] = {
NULL
 };
 
-int main(int argc, char **argv)
+static const char *write_pid(void)
 {
+   static struct strbuf sb = STRBUF_INIT;
static struct lock_file lock;
-   struct strbuf sb = STRBUF_INIT;
+   int fd;
+
+   strbuf_reset(&sb);
+   fd = hold_lock_file_for_update(&lock,
+  git_path("index-helper.pid"),
+  LOCK_DIE_ON_ERROR);
+#ifdef GIT_WINDOWS_NATIVE
+   strbuf_addstr(&sb, "HWND");
+#elif defined(USE_WATCHMAN)
+   strbuf_addch(&sb, 'W'); /* see poke_daemon() */
+#endif
+   strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid());
+   write_in_full(fd, sb.buf, sb.len);
+   commit_lock_file(&lock);
+
+   return sb.buf;
+}
+
+int main(int argc, char **argv)
+{
const char *prefix;
-   int fd, idle_in_minutes = 10, detach = 0;
+   int idle_in_minutes = 10, detach = 0;
+   const char *pid_file;
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_minutes,
N_("exit if not used after some minutes")),
@@ -314,17 +335,7 @@ int main(int argc, char **argv)
  options, usage_text, 0))
die(_("too many arguments"));
 
-   fd = hold_lock_file_for_update(&lock,
-  git_path("index-helper.pid"),
-  LOCK_DIE_ON_ERROR);
-#ifdef GIT_WINDOWS_NATIVE
-   strbuf_addstr(&sb, "HWND");
-#elif defined(USE_WATCHMAN)
-   strbuf_addch(&sb, 'W'); /* see poke_daemon() */
-#endif
-   strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid());
-   write_in_full(fd, sb.buf, sb.len);
-   commit_lock_file(&lock);
+   write_pid();
 
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
@@ -332,9 +343,14 @@ int main(int argc, char **argv)
if (detach && daemonize(&daemonized))
die_errno("unable to detach");
 
+   /*
+* Now that we're daemonized, we need to rewrite the PID file
+* because we have a new PID.
+*/
+   pid_file = write_pid();
+
if (!idle_in_minutes)
idle_in_minutes = 0x / 60;
-   loop(sb.buf, idle_in_minutes * 60);
-   strbuf_release(&sb);
+   loop(pid_file, idle_in_minutes * 60);
return 0;
 }
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/19] index-helper: add Windows support

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Windows supports shared memory, but the semantics is a bit different
than POSIX shm. The most noticeable thing is there's no way to get the
shared memory's size by the reader, and wrapping fstat to do that
would be hell. So the shm size is added near the end, hidden away from
shm users (storing it in headers would cause more problems with munmap,
storing it as a separate shm is even worse).

PostMessage is used instead of UNIX signals for
notification. Lightweight (at least code-wise) on the client side.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 config.mak.uname |  2 ++
 index-helper.c   | 48 
 read-cache.c | 13 
 shm.c| 96 
 4 files changed, 159 insertions(+)

diff --git a/config.mak.uname b/config.mak.uname
index b5108e1..49320c7 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -394,6 +394,7 @@ ifndef DEBUG
 else
BASIC_CFLAGS += -Zi -MDd
 endif
+   PROGRAM_OBJS += index-helper.o
X = .exe
 endif
 ifeq ($(uname_S),Interix)
@@ -574,6 +575,7 @@ else
NO_CURL = YesPlease
endif
 endif
+   PROGRAM_OBJS += index-helper.o
 endif
 ifeq ($(uname_S),QNX)
COMPAT_CFLAGS += -DSA_RESTART=0
diff --git a/index-helper.c b/index-helper.c
index 4dd9656..cf26da7 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -155,6 +155,51 @@ static void loop(const char *pid_file, int idle_in_seconds)
; /* do nothing, all is handled by signal handlers already */
 }
 
+#elif defined(GIT_WINDOWS_NATIVE)
+
+static void loop(const char *pid_file, int idle_in_seconds)
+{
+   HWND hwnd;
+   UINT_PTR timer = 0;
+   MSG msg;
+   HINSTANCE hinst = GetModuleHandle(NULL);
+   WNDCLASS wc;
+
+   /*
+* Emulate UNIX signals by sending WM_USER+x to a
+* window. Register window class and create a new window to
+* catch these messages.
+*/
+   memset(&wc, 0, sizeof(wc));
+   wc.lpfnWndProc   = DefWindowProc;
+   wc.hInstance = hinst;
+   wc.lpszClassName = "git-index-helper";
+   if (!RegisterClass(&wc))
+   die_errno(_("could not register new window class"));
+
+   hwnd = CreateWindow("git-index-helper", pid_file,
+   0, 0, 0, 1, 1, NULL, NULL, hinst, NULL);
+   if (!hwnd)
+   die_errno(_("could not register new window"));
+
+   refresh(0);
+   while (1) {
+   timer = SetTimer(hwnd, timer, idle_in_seconds * 1000, NULL);
+   if (!timer)
+   die(_("no timer!"));
+   if (!GetMessage(&msg, hwnd, 0, 0) || msg.message == WM_TIMER)
+   break;
+   switch (msg.message) {
+   case WM_USER:
+   refresh(0);
+   break;
+   default:
+   /* just reset the timer */
+   break;
+   }
+   }
+}
+
 #else
 
 static void loop(const char *pid_file, int idle_in_seconds)
@@ -198,6 +243,9 @@ int main(int argc, char **argv)
fd = hold_lock_file_for_update(&lock,
   git_path("index-helper.pid"),
   LOCK_DIE_ON_ERROR);
+#ifdef GIT_WINDOWS_NATIVE
+   strbuf_addstr(&sb, "HWND");
+#endif
strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid());
write_in_full(fd, sb.buf, sb.len);
commit_lock_file(&lock);
diff --git a/read-cache.c b/read-cache.c
index 1a0ab0c..16fbdf6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1546,6 +1546,18 @@ static void post_read_index_from(struct index_state 
*istate)
tweak_untracked_cache(istate);
 }
 
+#if defined(GIT_WINDOWS_NATIVE)
+static void do_poke(struct strbuf *sb, int refresh_cache)
+{
+   HWND hwnd;
+   if (!starts_with(sb->buf, "HWND"))
+   return;
+   hwnd = FindWindow("git-index-helper", sb->buf);
+   if (!hwnd)
+   return;
+   PostMessage(hwnd, refresh_cache ? WM_USER : WM_USER + 1, 0, 0);
+}
+#else
 static void do_poke(struct strbuf *sb, int refresh_cache)
 {
char*start = sb->buf;
@@ -1555,6 +1567,7 @@ static void do_poke(struct strbuf *sb, int refresh_cache)
return;
kill(pid, refresh_cache ? SIGHUP : SIGUSR1);
 }
+#endif
 
 static void poke_daemon(struct index_state *istate,
const struct stat *st, int refresh_cache)
diff --git a/shm.c b/shm.c
index 4ec1a00..04d8a35 100644
--- a/shm.c
+++ b/shm.c
@@ -52,6 +52,102 @@ void git_shm_unlink(const char *fmt, ...)
shm_unlink(path);
 }
 
+#elif defined(GIT_WINDOWS_NATIVE)
+
+#define SHM_PATH_LEN 82/* a little bit longer than POSIX because of 
"Local\\" */
+
+static ssize_t create_shm_map(int oflag, int perm, ssize_t length,
+ void **mmap, int prot, int flags,
+ co

[PATCH 08/19] index-helper: add --detach

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git-index-helper.txt |  3 +++
 index-helper.c | 10 --
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index ad40366..9ced091 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -31,6 +31,9 @@ OPTIONS
for reading an index, but because it will happen in the
background, it's not noticable. `--strict` is enabled by default.
 
+--detach::
+   Detach from the shell.
+
 NOTES
 -
 On UNIX-like systems, $GIT_DIR/index-helper.pid contains the process
diff --git a/index-helper.c b/index-helper.c
index 1140bc0..4dd9656 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -14,7 +14,7 @@ struct shm {
 
 static struct shm shm_index;
 static struct shm shm_base_index;
-static int to_verify = 1;
+static int daemonized, to_verify = 1;
 
 static void release_index_shm(struct shm *is)
 {
@@ -33,6 +33,8 @@ static void cleanup_shm(void)
 
 static void cleanup(void)
 {
+   if (daemonized)
+   return;
unlink(git_path("index-helper.pid"));
cleanup_shm();
 }
@@ -172,12 +174,13 @@ int main(int argc, char **argv)
static struct lock_file lock;
struct strbuf sb = STRBUF_INIT;
const char *prefix;
-   int fd, idle_in_minutes = 10;
+   int fd, idle_in_minutes = 10, detach = 0;
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_minutes,
N_("exit if not used after some minutes")),
OPT_BOOL(0, "strict", &to_verify,
 "verify shared memory after creating"),
+   OPT_BOOL(0, "detach", &detach, "detach the process"),
OPT_END()
};
 
@@ -202,6 +205,9 @@ int main(int argc, char **argv)
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
 
+   if (detach && daemonize(&daemonized))
+   die_errno("unable to detach");
+
if (!idle_in_minutes)
idle_in_minutes = 0x / 60;
loop(sb.buf, idle_in_minutes * 60);
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/19] index-helper: new daemon for caching index and related stuff

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

Instead of reading the index from disk and worrying about disk
corruption, the index is cached in memory (memory bit-flips happen
too, but hopefully less often). The result is faster read. Read time
is reduced by 70%.

The biggest gain is not having to verify the trailing SHA-1, which
takes lots of time especially on large index files. But this also
opens doors for further optimiztions:

 - we could create an in-memory format that's essentially the memory
   dump of the index to eliminate most of parsing/allocation
   overhead. The mmap'd memory can be used straight away. Experiment
   [1] shows we could reduce read time by 88%.

 - we could cache non-index info such as name hash

The shared memory's name folows the template "git--"
where  is the trailing SHA-1 of the index file.  is
"index" for cached index files (and may be "name-hash" for name-hash
cache). If such shared memory exists, it contains the same index
content as on disk. The content is already validated by the daemon and
git won't validate it again (except comparing the trailing SHA-1s).

Git can poke the daemon to tell it to refresh the index cache, or to
keep it alive some more minutes via UNIX signals. It can't give any
real index data directly to the daemon. Real data goes to disk first,
then the daemon reads and verifies it from there. Poking only happens
for $GIT_DIR/index, not temporary index files.

$GIT_DIR/index-helper.pid contains a reference to daemon process (and
it's pid on *nix). The file's mtime is updated every time it's accessed
(or should be updated often enough). Old index-helper.pid is considered
stale and ignored.

index-helper requires POSIX realtime extension. POSIX shm interface
however is abstracted away so that Windows support can be added later.

On webkit.git with index format v2, duplicating 8 times to 1.4m
entries and 200MB in size:

(vanilla)  0.986986364 s: read_index_from .git/index
(index-helper) 0.267850279 s: read_index_from .git/index

Interestingly with index v4, we get less out of index-helper. It makes
sense as v4 requires more processing after loading the index:

(vanilla)  0.72249 s: read_index_from .git/index
(index-helper) 0.302741500 s: read_index_from .git/index

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 .gitignore |   1 +
 Documentation/git-index-helper.txt |  44 ++
 Makefile   |   9 +++
 cache.h|   3 +
 config.mak.uname   |   1 +
 git-compat-util.h  |   1 +
 index-helper.c | 162 +
 read-cache.c   | 106 +---
 shm.c  |  67 +++
 shm.h  |  23 ++
 10 files changed, 408 insertions(+), 9 deletions(-)
 create mode 100644 Documentation/git-index-helper.txt
 create mode 100644 index-helper.c
 create mode 100644 shm.c
 create mode 100644 shm.h

diff --git a/.gitignore b/.gitignore
index 5087ce1..b92f122 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /git-http-fetch
 /git-http-push
 /git-imap-send
+/git-index-helper
 /git-index-pack
 /git-init
 /git-init-db
diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
new file mode 100644
index 000..9db28cf
--- /dev/null
+++ b/Documentation/git-index-helper.txt
@@ -0,0 +1,44 @@
+git-index-helper(1)
+===
+
+NAME
+
+git-index-helper - A simple cache daemon for speeding up index file access
+
+SYNOPSIS
+
+[verse]
+'git index-helper' [options]
+
+DESCRIPTION
+---
+Keep the index file in memory for faster access. This daemon is per
+repository.
+
+OPTIONS
+---
+
+--exit-after=::
+   Exit if the cached index is not accessed for ``
+   minutes. Specify 0 to wait forever. Default is 10.
+
+NOTES
+-
+On UNIX-like systems, $GIT_DIR/index-helper.pid contains the process
+id of the daemon. At least on Linux, shared memory objects are
+availble via /dev/shm with the name pattern "git--".
+Normally the daemon will clean up shared memory objects when it exits.
+But if it crashes, some objects could remain there and they can be
+safely deleted with "rm" command. The following signals are used to
+control the daemon:
+
+SIGHUP::
+   Reread the index.
+
+SIGUSR1::
+   Let the daemon know the index is to be read. It keeps the
+   daemon alive longer, unless `--exit-after=0` is used.
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index 24bef8d..a6c668b 100644
--- a/Makefile
+++ b/Makefile
@@ -367,6 +367,8 @@ all::
 # Define HAVE_BSD_SYSCTL if your platform has a BSD-compatible sysctl function.
 #
 # Define HAVE_GETDELIM if your system has the getdelim() function.
+#
+# Define HAVE_SHM if you platform support shm_* functions in librt.
 
 GIT-VERSION-FILE: FORCE
@$(SHELL_PATH) ./GIT-VERSION-GEN
@@ -802,6 +804,7 @@ LI

[PATCH 06/19] index-helper: add --strict

2016-03-09 Thread David Turner
From: Nguyễn Thái Ngọc Duy 

There's are "holes" in the index-helper approach because the shared
memory is not verified again by git. If $USER is compromised, shared
memory could be modified. But then they can already modify
$GIT_DIR/index. A more realistic risk is some bugs in index-helper
produce corrupt shared memory. --strict is added to avoid that

Strictly speaking there's still a very small gap where corrupt shared
memory could still be read by git: after we write the trailing SHA-1 in
the shared memory (thus signaling "this shm is ready") and before
verify_shm() detects an error.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git-index-helper.txt |  9 +++
 cache.h|  1 +
 index-helper.c | 48 ++
 read-cache.c   |  9 ---
 4 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-index-helper.txt 
b/Documentation/git-index-helper.txt
index 9db28cf..ad40366 100644
--- a/Documentation/git-index-helper.txt
+++ b/Documentation/git-index-helper.txt
@@ -22,6 +22,15 @@ OPTIONS
Exit if the cached index is not accessed for ``
minutes. Specify 0 to wait forever. Default is 10.
 
+--strict::
+--no-strict::
+   Strict mode makes index-helper verify the shared memory after
+   it's created. If the result does not match what's read from
+   $GIT_DIR/index, the shared memory is destroyed. This makes
+   index-helper take more than double the amount of time required
+   for reading an index, but because it will happen in the
+   background, it's not noticable. `--strict` is enabled by default.
+
 NOTES
 -
 On UNIX-like systems, $GIT_DIR/index-helper.pid contains the process
diff --git a/cache.h b/cache.h
index e22296c..1d7e561 100644
--- a/cache.h
+++ b/cache.h
@@ -336,6 +336,7 @@ struct index_state {
 keep_mmap : 1,
 from_shm : 1,
 to_shm : 1,
+always_verify_trailing_sha1 : 1,
 initialized : 1;
struct hashmap name_hash;
struct hashmap dir_hash;
diff --git a/index-helper.c b/index-helper.c
index cf2971d..1140bc0 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -14,6 +14,7 @@ struct shm {
 
 static struct shm shm_index;
 static struct shm shm_base_index;
+static int to_verify = 1;
 
 static void release_index_shm(struct shm *is)
 {
@@ -69,11 +70,56 @@ static void share_index(struct index_state *istate, struct 
shm *is)
hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, is->sha1);
 }
 
+static int verify_shm(void)
+{
+   int i;
+   struct index_state istate;
+   memset(&istate, 0, sizeof(istate));
+   istate.always_verify_trailing_sha1 = 1;
+   istate.to_shm = 1;
+   i = read_index(&istate);
+   if (i != the_index.cache_nr)
+   goto done;
+   for (; i < the_index.cache_nr; i++) {
+   struct cache_entry *base, *ce;
+   /* namelen is checked separately */
+   const unsigned int ondisk_flags =
+   CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS;
+   unsigned int ce_flags, base_flags, ret;
+   base = the_index.cache[i];
+   ce = istate.cache[i];
+   if (ce->ce_namelen != base->ce_namelen ||
+   strcmp(ce->name, base->name)) {
+   warning("mismatch at entry %d", i);
+   break;
+   }
+   ce_flags = ce->ce_flags;
+   base_flags = base->ce_flags;
+   /* only on-disk flags matter */
+   ce->ce_flags   &= ondisk_flags;
+   base->ce_flags &= ondisk_flags;
+   ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data,
+offsetof(struct cache_entry, name) -
+offsetof(struct cache_entry, ce_stat_data));
+   ce->ce_flags = ce_flags;
+   base->ce_flags = base_flags;
+   if (ret) {
+   warning("mismatch at entry %d", i);
+   break;
+   }
+   }
+done:
+   discard_index(&istate);
+   return i == the_index.cache_nr;
+}
+
 static void share_the_index(void)
 {
if (the_index.split_index && the_index.split_index->base)
share_index(the_index.split_index->base, &shm_base_index);
share_index(&the_index, &shm_index);
+   if (to_verify && !verify_shm())
+   cleanup_shm();
discard_index(&the_index);
 }
 
@@ -130,6 +176,8 @@ int main(int argc, char **argv)
struct option options[] = {
OPT_INTEGER(0, "exit-after", &idle_in_minutes,
N_("exit if not used after some minutes")),
+   OPT_BOOL(0, "strict", &to_verify,
+"verify shared memory after creating"),
OPT_END()

Re: [PATCH 0/10] cleaning up check_repository_format_gently

2016-03-01 Thread David Turner

On Tue, 2016-03-01 at 09:35 -0500, Jeff King wrote:
> After the discussion in:
> 
>   http://thread.gmane.org/gmane.comp.version-control.git/287949/focus
> =288002
> 
> I came up with this series to try to address some of the warts in
> check_repository_format_gently().
> 
> I had hoped to come up with something very small that could go at the
> front of the refs-backend series, but as with any time I look at the
> setup code, it managed to snowball into a potpourri of hidden
> assumptions.
> 
> So I hope David isn't _too_ irritated to see this in his inbox.


I commented on two of the patches; the rest look good to me.

I rebased on these (minus the repo_version_string change from 10/10). Wasn't 
too bad.  And it did clean up the submodule check.

I will wait to resend my series until I hear back about your shortlog/mailmap 
fix and your suggestion on a better archive --remote fix.  

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 01/33] setup: call setup_git_directory_gently before accessing refs

2016-03-01 Thread David Turner
On Tue, 2016-03-01 at 18:47 -0500, David Turner wrote:
> On Tue, 2016-03-01 at 03:35 -0500, Jeff King wrote:
> > On Mon, Feb 29, 2016 at 07:52:34PM -0500, David Turner wrote:
> > 
> > > Usually, git calls some form of setup_git_directory at startup. 
> > >  But
> > > sometimes, it doesn't.  Usually, that's OK because it's not
> > > really
> > > using the repository.  But in some cases, it is using the repo. 
> > >  In
> > > those cases, either setup_git_directory_gently must be called, or
> > > the
> > > repository (e.g. the refs) must not be accessed.
> > 
> > It's actually not just setup_git_directory(). We can also use
> > check_repository_format(), which is used by enter_repo() (and hence
> > by
> > things like upload-pack). I think the rule really ought to be: if
> > we
> > didn't have check_repository_format_gently() tell us we have a
> > valid
> > repo, we should not access any repo elements (refs, objects, etc).
> 
> I'll change that commit message to say
> "check_repository_format_gently".
> 
> > > diff --git a/builtin/grep.c b/builtin/grep.
> > [snip: this is a probably-good behavior change]
> 
> Agreed.
> 
> > My fix for this was to teach read_mailmap to avoid looking for
> > HEAD:.mailmap if we are not in a repository, but to continue with
> > the
> > others (.mailmap in the cwd, and the mailmap.file config variable).
> > ...
> > But I do think your patch is a potential regression there, if
> > anybody
> > does do that.
> 
> Your version sounds better.  But I don't see it in the patch set you
> sent earlier? 
> 
> > > diff --git a/git.c b/git.c
> > > index 6cc0c07..51e0508 100644
> > > --- a/git.c
> > > +++ b/git.c
> > > @@ -376,7 +376,7 @@ static struct cmd_struct commands[] = {
> > >   { "am", cmd_am, RUN_SETUP | NEED_WORK_TREE },
> > >   { "annotate", cmd_annotate, RUN_SETUP },
> > >   { "apply", cmd_apply, RUN_SETUP_GENTLY },
> > > - { "archive", cmd_archive },
> > > + { "archive", cmd_archive, RUN_SETUP_GENTLY },
> > >   { "bisect--helper", cmd_bisect__helper, RUN_SETUP },
> > >   { "blame", cmd_blame, RUN_SETUP },
> > >   { "branch", cmd_branch, RUN_SETUP },
> > 
> > I didn't have to touch this case in my experimenting. I wonder if
> > it's
> > because I resolved the "grep" case a little differently.
> > 
> > I taught get_ref_cache() to only assert() that we have a repository
> > when
> > we are looking at the main ref-cache, not a submodule. In theory,
> > we
> > can
> > look at a submodule from inside an outer non-repo (it's not really
> > a
> > submodule then, but just a plain git dir). I don't think there's
> > anything in git right now that says you can't do so, though I think
> > your
> > refs-backend work does introduce that restriction (because it
> > actually
> > requires the submodules to use the same backend).
> > 
> > So with that requirement, I think we do need to require a repo even
> > to
> > access submodule refs. Is that what triggered this change?
> 
> No.  What triggered this change was a test failure with your earlier
> patch on master -- none of my stuff at all.  The failing command was:
> 
> git archive --remote=. HEAD
> 
> When writing my patch, I had assumed that the issue was the
> resolve_ref
> on the HEAD that's an argument -- but it's not.  The actual traceback
> is:
> 
> #0  die (
> err=err@entry=0x57ddb0 "BUG: resolve_ref called without
> initializing repo") at usage.c:99
> #1  0x004f7ed9 in resolve_ref_1 (sb_refname=0x7c4a50
> , 
> sb_contents=0x7fffcfc0, sb_path=0x7fffcfe0,
> flags=0x7fffdaaa, 
> sha1=0x7fffd100 "\b\326\377\377\377\177",
> resolve_flags=5572384, 
> refname=0x2 )
> at refs/files-backend.c:1429
> #2  resolve_ref_unsafe (refname=refname@entry=0x550b3b "HEAD", 
> resolve_flags=resolve_flags@entry=0, 
> sha1=sha1@entry=0x7fffd100 "\b\326\377\377\377\177", 
> flags=flags@entry=0x7fffd0fc) at refs/files-backend.c:1600
> #3  0x004ffe69 in read_config () at remote.c:471
> #4  0x00500235 in read_config () at remote.c:705
> #5  remote_get_1 (name=0x7fffdaaa ".", 
> get_default=get_default@entry=0x4fe230 )
> at remote.c:688
> #6  0x005004ca in remote

Re: [PATCH 10/10] setup: drop GIT_REPO_VERSION constants

2016-03-01 Thread David Turner
On Tue, 2016-03-01 at 09:45 -0500, Jeff King wrote:
> - char repo_version_string[10];
>  
>   /* This forces creation of new config file */
> - xsnprintf(repo_version_string, sizeof(repo_version_string),
> -   "%d", GIT_REPO_VERSION);
> - git_config_set("core.repositoryformatversion",
> repo_version_string);
> + git_config_set("core.repositoryformatversion", "0");

I'm about to use that in my series, so let's not get rid of it here.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 01/33] setup: call setup_git_directory_gently before accessing refs

2016-03-01 Thread David Turner
On Tue, 2016-03-01 at 03:35 -0500, Jeff King wrote:
> On Mon, Feb 29, 2016 at 07:52:34PM -0500, David Turner wrote:
> 
> > Usually, git calls some form of setup_git_directory at startup. 
> >  But
> > sometimes, it doesn't.  Usually, that's OK because it's not really
> > using the repository.  But in some cases, it is using the repo.  In
> > those cases, either setup_git_directory_gently must be called, or
> > the
> > repository (e.g. the refs) must not be accessed.
> 
> It's actually not just setup_git_directory(). We can also use
> check_repository_format(), which is used by enter_repo() (and hence
> by
> things like upload-pack). I think the rule really ought to be: if we
> didn't have check_repository_format_gently() tell us we have a valid
> repo, we should not access any repo elements (refs, objects, etc).

I'll change that commit message to say
"check_repository_format_gently".

> > diff --git a/builtin/grep.c b/builtin/grep.
> [snip: this is a probably-good behavior change]

Agreed.

> My fix for this was to teach read_mailmap to avoid looking for
> HEAD:.mailmap if we are not in a repository, but to continue with the
> others (.mailmap in the cwd, and the mailmap.file config variable).
> ...
> But I do think your patch is a potential regression there, if anybody
> does do that.

Your version sounds better.  But I don't see it in the patch set you
sent earlier? 

> > diff --git a/git.c b/git.c
> > index 6cc0c07..51e0508 100644
> > --- a/git.c
> > +++ b/git.c
> > @@ -376,7 +376,7 @@ static struct cmd_struct commands[] = {
> > { "am", cmd_am, RUN_SETUP | NEED_WORK_TREE },
> > { "annotate", cmd_annotate, RUN_SETUP },
> > { "apply", cmd_apply, RUN_SETUP_GENTLY },
> > -   { "archive", cmd_archive },
> > +   { "archive", cmd_archive, RUN_SETUP_GENTLY },
> > { "bisect--helper", cmd_bisect__helper, RUN_SETUP },
> > { "blame", cmd_blame, RUN_SETUP },
> > { "branch", cmd_branch, RUN_SETUP },
> 
> I didn't have to touch this case in my experimenting. I wonder if
> it's
> because I resolved the "grep" case a little differently.
>
> I taught get_ref_cache() to only assert() that we have a repository
> when
> we are looking at the main ref-cache, not a submodule. In theory, we
> can
> look at a submodule from inside an outer non-repo (it's not really a
> submodule then, but just a plain git dir). I don't think there's
> anything in git right now that says you can't do so, though I think
> your
> refs-backend work does introduce that restriction (because it
> actually
> requires the submodules to use the same backend).
> 
> So with that requirement, I think we do need to require a repo even
> to
> access submodule refs. Is that what triggered this change?

No.  What triggered this change was a test failure with your earlier
patch on master -- none of my stuff at all.  The failing command was:

git archive --remote=. HEAD

When writing my patch, I had assumed that the issue was the resolve_ref
on the HEAD that's an argument -- but it's not.  The actual traceback
is:

#0  die (
err=err@entry=0x57ddb0 "BUG: resolve_ref called without
initializing repo") at usage.c:99
#1  0x004f7ed9 in resolve_ref_1 (sb_refname=0x7c4a50
, 
sb_contents=0x7fffcfc0, sb_path=0x7fffcfe0,
flags=0x7fffdaaa, 
sha1=0x7fffd100 "\b\326\377\377\377\177",
resolve_flags=5572384, 
refname=0x2 )
at refs/files-backend.c:1429
#2  resolve_ref_unsafe (refname=refname@entry=0x550b3b "HEAD", 
resolve_flags=resolve_flags@entry=0, 
sha1=sha1@entry=0x7fffd100 "\b\326\377\377\377\177", 
flags=flags@entry=0x7fffd0fc) at refs/files-backend.c:1600
#3  0x004ffe69 in read_config () at remote.c:471
#4  0x00500235 in read_config () at remote.c:705
#5  remote_get_1 (name=0x7fffdaaa ".", 
get_default=get_default@entry=0x4fe230 )
at remote.c:688
#6  0x005004ca in remote_get (name=) at
remote.c:713
#7  0x004159d8 in run_remote_archiver (name_hint=0x0, 
exec=0x550720 "git-upload-archive", remote=, 
argv=0x7fffd608, argc=2) at builtin/archive.c:35
#8  cmd_archive (argc=2, argv=0x7fffd608, prefix=0x0)
at builtin/archive.c:104
#9  0x00406051 in run_builtin (argv=0x7fffd608, argc=3, 
p=0x7bd7a0 ) at git.c:357
#10 handle_builtin (argc=3, argv=0x7fffd608) at git.c:540
#11 0x0040519a in main (argc=3, av=) at
git.c:671

> I'd think you would need a matching line inside cmd_archive, too. It
> should allow "--remote" wit

Re: [PATCH 06/10] setup: refactor repo format reading and verification

2016-03-01 Thread David Turner
On Tue, 2016-03-01 at 09:42 -0500, Jeff King wrote:
> +/*
> + * Read the repository format characteristics from the config file
> "path". If
> + * the version cannot be extracted from the file for any reason, the
> version
> + * field will be set to -1, and all other fields are undefined.
> + */
> +void read_repository_format(struct repository_format *format, const
> char *path);

Generally speaking, I don't care for this interface; I would prefer to
return -1 and not change the struct.  But I see why it's maybe simpler
in this one case.

+/*
> + * Internally, we need to swap out "fn" here, but we don't want to
> expose
> + * that to the world. Hence a wrapper around this internal function.
> + */

I don't understand this comment.  fn is not being swapped out -- it's
being passed on directly.  

> +static void read_repository_format_1(struct repository_format
> *format,
> +  config_fn_t fn, const char
> *path)

The argument order here is different from git_config_from_file -- is
that for a reason?

> + if (format->version >= 1 && format->unknown_extensions.nr) {
> + int i;
> +
> + for (i = 0; i < format->unknown_extensions.nr; i++)
> + strbuf_addf(err, "unknown repository
> extension: %s",
> + format
> ->unknown_extensions.items[i].string);

newline here or something?  Otherwise multiple unknown extensions will
get jammed together.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 29/33] setup: configure ref storage on setup

2016-03-01 Thread David Turner
On Tue, 2016-03-01 at 17:18 +, Ramsay Jones wrote:
> 
> On 01/03/16 00:53, David Turner wrote:
> > This sets up the existing backend early, so that other code which
> > reads refs is reading from the right place.
> > 
> > Signed-off-by: David Turner 
> > Signed-off-by: Junio C Hamano 
> > ---
> >  config.c | 1 +
> >  setup.c  | 4 
> >  2 files changed, 5 insertions(+)
> > 
> > diff --git a/config.c b/config.c
> > index 9ba40bc..cca7e28 100644
> > --- a/config.c
> > +++ b/config.c
> > @@ -11,6 +11,7 @@
> >  #include "strbuf.h"
> >  #include "quote.h"
> >  #include "hashmap.h"
> > +#include "refs.h"
> >  #include "string-list.h"
> >  #include "utf8.h"
> >  
> 
> I was just skimming these patches as they passed by, and this
> caught my eye. If this include is required (eg. to fix a compiler
> warning), then it should probably be in an earlier patch.
> Otherwise, it should be in a later patch, no?

Actually, it's cruft from the previous version of this series :(.  I
looked at the patch and didn't notice that it was in config.c instead
of setup.c.  Oops.  Will remove.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 30/33] refs: break out resolve_ref_unsafe_submodule

2016-03-01 Thread David Turner
On Tue, 2016-03-01 at 17:21 +, Ramsay Jones wrote:
> 
> On 01/03/16 00:53, David Turner wrote:
> > It will soon be useful for resolve_ref_unsafe to support
> > submodules.
> > But since it is called from so many places, changing it would have
> > been painful.  Fortunately, it's just a thin wrapper around (the
> > former) resolve_ref_1.  So now resolve_ref_1 becomes
> > resolve_ref_unsafe_submodule, and it passes its submodule argument
> > through to read_raw_ref.
> > 
> > The files backend doesn't need this functionality, but it doesn't
> > hurt.
> > 
> > Signed-off-by: David Turner 
> > Signed-off-by: Junio C Hamano 
> > ---
> >  refs.c   | 41 +---
> > -
> >  refs/files-backend.c |  8 ++--
> >  refs/refs-internal.h | 19 ---
> >  3 files changed, 47 insertions(+), 21 deletions(-)
> > 
> > diff --git a/refs.c b/refs.c
> > index 5fe0bac..d1cf707 100644
> > --- a/refs.c
> > +++ b/refs.c
> > @@ -60,6 +60,9 @@ void register_ref_storage_backends(void)
> >  * entries below when you add a new backend.
> >  */
> > register_ref_storage_backend(&refs_be_files);
> > +#ifdef USE_LIBLMDB
> > +   register_ref_storage_backend(&refs_be_lmdb);
> > +#endif
> 
> Again, just skimming patches, ...
> 
> The lmdb refs backend (hence refs_be_lmdb) is not introduced until
> the next patch [31/33], right?

Yep.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 31/33] refs: add LMDB refs storage backend

2016-02-29 Thread David Turner
On Tue, 2016-03-01 at 08:31 +0700, Duy Nguyen wrote:
> On Tue, Mar 1, 2016 at 7:53 AM, David Turner <
> dtur...@twopensource.com> wrote:
> > +Weaknesses:
> > +---
> > +
> > +The reflog format is somewhat inefficient: a binary format could
> > store
> > +reflog date/time information in somewhat less space.
> 
> This raises an interesting question. What if we want to change lmdb
> format in future (e.g. to address this weakness)? Do we need some
> sort
> of versioning in lmdb database? I suppose you can always add "lmdb2"
> backend that shares most of the code with current lmdb backend. Not
> sure if that's what you had in mind though.

The weakness of versioning inside the LMDB database is that in order to
check the version, you need to already have LMDB.  That's the argument
for "lmdb2". 

I had sort of hoped that this version would be "good enough" that we
could avoid modifying it.  So maybe that means we ought to address the
reflog format.  But I'm not sure that it's that big a deal.  What do
you think?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 09/33] refs: reduce the visibility of do_for_each_ref()

2016-02-29 Thread David Turner
From: Ramsay Jones 

Now that we have moved do_for_each_ref into refs.c, it no longer needs
to be public.

Signed-off-by: Ramsay Jones 
Signed-off-by: Junio C Hamano 
---
 refs.c   | 19 +++
 refs/refs-internal.h |  6 --
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/refs.c b/refs.c
index dc5682a..cea5997 100644
--- a/refs.c
+++ b/refs.c
@@ -1142,6 +1142,17 @@ int head_ref(each_ref_fn fn, void *cb_data)
return head_ref_submodule(NULL, fn, cb_data);
 }
 
+/*
+ * The common backend for the for_each_*ref* functions
+ */
+static int do_for_each_ref(const char *submodule, const char *base,
+   each_ref_fn fn, int trim, int flags,
+   void *cb_data)
+{
+   return the_refs_backend->do_for_each_ref(submodule, base, fn, trim,
+flags, cb_data);
+}
+
 int for_each_ref(each_ref_fn fn, void *cb_data)
 {
return do_for_each_ref(NULL, "", fn, 0, 0, cb_data);
@@ -1342,11 +1353,3 @@ int resolve_gitlink_ref(const char *path, const char 
*refname,
 {
return the_refs_backend->resolve_gitlink_ref(path, refname, sha1);
 }
-
-int do_for_each_ref(const char *submodule, const char *base,
-   each_ref_fn fn, int trim, int flags,
-   void *cb_data)
-{
-   return the_refs_backend->do_for_each_ref(submodule, base, fn, trim,
-flags, cb_data);
-}
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index c9b6745..3702737 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -203,12 +203,6 @@ int rename_ref_available(const char *oldname, const char 
*newname);
 /* Include broken references in a do_for_each_ref*() iteration: */
 #define DO_FOR_EACH_INCLUDE_BROKEN 0x01
 
-/*
- * The common backend for the for_each_*ref* functions
- */
-int do_for_each_ref(const char *submodule, const char *base,
-   each_ref_fn fn, int trim, int flags, void *cb_data);
-
 /* refs backends */
 typedef int ref_transaction_commit_fn(struct ref_transaction *transaction,
  struct strbuf *err);
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 28/33] refs: register ref storage backends

2016-02-29 Thread David Turner
Add new function register_ref_storage_backends().  This new function
registers all known ref storage backends... once there are any other
than the default.  For now, it just registers the files backend.

Call the function before calling set_ref_storage_backend.

Signed-off-by: David Turner 
Signed-off-by: Junio C Hamano 
---
 builtin/init-db.c |  3 +++
 refs.c| 22 ++
 refs.h|  2 ++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/builtin/init-db.c b/builtin/init-db.c
index af7fe04..04cc522 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -220,6 +220,7 @@ static int create_default_files(const char *template_path)
  requested_ref_storage_backend);
}
 
+   register_ref_storage_backends();
if (requested_ref_storage_backend)
ref_storage_backend = requested_ref_storage_backend;
if (strcmp(ref_storage_backend, "files")) {
@@ -502,6 +503,8 @@ int cmd_init_db(int argc, const char **argv, const char 
*prefix)
 
argc = parse_options(argc, argv, prefix, init_db_options, 
init_db_usage, 0);
 
+   register_ref_storage_backends();
+
if (requested_ref_storage_backend &&
!ref_storage_backend_exists(requested_ref_storage_backend))
die(_("Unknown ref storage backend %s"),
diff --git a/refs.c b/refs.c
index 5d69179..5fe0bac 100644
--- a/refs.c
+++ b/refs.c
@@ -14,14 +14,11 @@ static const char split_transaction_fail_warning[] = N_(
"transaction succeeded, but then the update to the per-worktree refs "
"failed.  Your repository may be in an inconsistent state.");
 
-/*
- * We always have a files backend and it is the default.
- */
 static struct ref_storage_be *the_refs_backend = &refs_be_files;
 /*
  * List of all available backends
  */
-static struct ref_storage_be *refs_backends = &refs_be_files;
+static struct ref_storage_be *refs_backends;
 
 const char *ref_storage_backend = "files";
 
@@ -48,6 +45,23 @@ int ref_storage_backend_exists(const char *name)
return find_ref_storage_backend(name) != NULL;
 }
 
+static void register_ref_storage_backend(struct ref_storage_be *be)
+{
+   be->next = refs_backends;
+   refs_backends = be;
+}
+
+void register_ref_storage_backends(void)
+{
+   if (refs_backends)
+   return;
+   /*
+* Add register_ref_storage_backend(ptr-to-backend)
+* entries below when you add a new backend.
+*/
+   register_ref_storage_backend(&refs_be_files);
+}
+
 /*
  * How to handle various characters in refnames:
  * 0: An acceptable character for refs
diff --git a/refs.h b/refs.h
index 1888ad8..c0964f5 100644
--- a/refs.h
+++ b/refs.h
@@ -523,4 +523,6 @@ int set_ref_storage_backend(const char *name);
 
 int ref_storage_backend_exists(const char *name);
 
+void register_ref_storage_backends(void);
+
 #endif /* REFS_H */
-- 
2.4.2.767.g62658d5-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 33/33] tests: add ref-storage argument

2016-02-29 Thread David Turner
Add a --ref-storage argument to make it easy to test alternate ref
storage backends.  This means that every test that calls git init or
git clone must use the new ref storage argument.

Modify many tests to work under alternate ref storage backends.

Introduce abstractions for raw ref/reflog reading/writing in tests
instead of directly frobbing the filesystem.

Conditionally skip tests that are not expected to succeed under this
condition. Most of this is straightforward. Of particular note are the
following test changes:

* The rearrangement of commands in t1401 is because without HEAD in
the right place, git doesn't recognized the trash dir as a git repo,
so no git commands work.

* t1430-bad-ref-name specifically blocks lmdb because other alternate
backends might want to keep this test.

Signed-off-by: David Turner 
Signed-off-by: Junio C Hamano 
---
 t/README |  6 +++
 t/lib-submodule-update.sh| 15 +--
 t/lib-t6000.sh   |  7 ++-
 t/t0008-ignores.sh   |  2 +-
 t/t0062-revision-walking.sh  |  6 +++
 t/t1021-rerere-in-workdir.sh |  6 +++
 t/t1200-tutorial.sh  |  8 +++-
 t/t1302-repo-version.sh  |  6 +++
 t/t1305-config-include.sh| 17 ++--
 t/t1400-update-ref.sh|  6 +++
 t/t1401-symbolic-ref.sh  | 17 +---
 t/t1404-update-ref-df-conflicts.sh   |  8 +++-
 t/t1410-reflog.sh|  6 +++
 t/t1430-bad-ref-name.sh  |  6 +++
 t/t1450-fsck.sh  | 12 ++---
 t/t1506-rev-parse-diagnosis.sh   |  4 +-
 t/t2013-checkout-submodule.sh|  2 +-
 t/t2105-update-index-gitfile.sh  |  4 +-
 t/t2107-update-index-basic.sh|  6 +--
 t/t2201-add-update-typechange.sh |  4 +-
 t/t3001-ls-files-others-exclude.sh   |  2 +-
 t/t3010-ls-files-killed-modified.sh  |  4 +-
 t/t3040-subprojects-basic.sh |  4 +-
 t/t3050-subprojects-fetch.sh |  2 +-
 t/t3200-branch.sh| 75 ++--
 t/t3210-pack-refs.sh |  7 +++
 t/t3211-peel-ref.sh  |  6 +++
 t/t3308-notes-merge.sh   |  2 +-
 t/t3404-rebase-interactive.sh|  2 +-
 t/t3600-rm.sh|  2 +-
 t/t3800-mktag.sh |  4 +-
 t/t3903-stash.sh |  2 +-
 t/t4010-diff-pathspec.sh |  2 +-
 t/t4020-diff-external.sh |  2 +-
 t/t4027-diff-submodule.sh|  2 +-
 t/t4035-diff-quiet.sh|  2 +-
 t/t4255-am-submodule.sh  |  2 +-
 t/t5000-tar-tree.sh  |  3 +-
 t/t5304-prune.sh |  2 +-
 t/t5312-prune-corruption.sh  | 11 -
 t/t5500-fetch-pack.sh| 10 ++---
 t/t5510-fetch.sh | 30 ++---
 t/t5526-fetch-submodules.sh  |  4 +-
 t/t5527-fetch-odd-refs.sh|  7 +++
 t/t5537-fetch-shallow.sh |  7 +++
 t/t5700-clone-reference.sh   | 42 +-
 t/t6001-rev-list-graft.sh|  3 +-
 t/t6010-merge-base.sh|  2 +-
 t/t6050-replace.sh   |  4 +-
 t/t6120-describe.sh  |  6 ++-
 t/t6301-for-each-ref-errors.sh   | 12 ++---
 t/t7201-co.sh|  2 +-
 t/t7300-clean.sh | 25 ++-
 t/t7400-submodule-basic.sh   | 22 +-
 t/t7402-submodule-rebase.sh  |  2 +-
 t/t7405-submodule-merge.sh   | 10 ++---
 t/t9104-git-svn-follow-parent.sh |  3 +-
 t/t9115-git-svn-dcommit-funky-renames.sh |  2 +-
 t/t9350-fast-export.sh   |  6 +--
 t/t9902-completion.sh|  4 +-
 t/t9903-bash-prompt.sh   |  2 +-
 t/test-lib-functions.sh  | 53 +-
 t/test-lib.sh| 11 +
 63 files changed, 370 insertions(+), 185 deletions(-)

diff --git a/t/README b/t/README
index 1dc908e..8e047cb 100644
--- a/t/README
+++ b/t/README
@@ -178,6 +178,12 @@ appropriately before running "make".
this feature by setting the GIT_TEST_CHAIN_LINT environment
variable to "1" or "0", respectively.
 
+--ref-storage=::
+   If --ref-storage is set, run tests under an alternate ref
+   backend.  Tests that rely on the original files backend will
+   be skipped.  You may also enable or disable this feature by
+   setting the GIT_TEST_REF_STORAGE environment variable
+
 You can also set the GIT_TEST_INSTALLED environment variable to
 the bindir of an existing git installation to test that installation.
 You still need to have built this git sandbox, fr

<    1   2   3   4   5   6   7   8   9   10   >