[PATCH 15/24] read_raw_ref(): manage own scratch space
From: Michael Haggerty Instead of creating scratch space in resolve_ref_unsafe() and passing it down through resolve_ref_1 to read_raw_ref(), teach read_raw_ref() to manage its own scratch space. This reduces coupling across the functions at the cost of some extra allocations. Also, when read_raw_ref() is implemented for different reference backends, the other implementations might have different scratch space requirements. Note that we now preserve errno across the calls to strbuf_release(), which calls free() and can thus theoretically overwrite errno. Signed-off-by: Michael Haggerty --- refs/files-backend.c | 76 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index d51e778..f752568 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1421,17 +1421,20 @@ static int resolve_missing_loose_ref(const char *refname, * refname will still be valid and unchanged. */ static int read_raw_ref(const char *refname, unsigned char *sha1, - struct strbuf *symref, struct strbuf *sb_path, - struct strbuf *sb_contents, int *flags) + struct strbuf *symref, int *flags) { + struct strbuf sb_contents = STRBUF_INIT; + struct strbuf sb_path = STRBUF_INIT; const char *path; const char *buf; struct stat st; int fd; + int ret = -1; + int save_errno; - strbuf_reset(sb_path); - strbuf_git_path(sb_path, "%s", refname); - path = sb_path->buf; + strbuf_reset(&sb_path); + strbuf_git_path(&sb_path, "%s", refname); + path = sb_path.buf; stat_ref: /* @@ -1446,36 +1449,38 @@ stat_ref: if (lstat(path, &st) < 0) { if (errno != ENOENT) - return -1; + goto out; if (resolve_missing_loose_ref(refname, sha1, flags)) { errno = ENOENT; - return -1; + goto out; } - return 0; + ret = 0; + goto out; } /* Follow "normalized" - ie "refs/.." symlinks by hand */ if (S_ISLNK(st.st_mode)) { - strbuf_reset(sb_contents); - if (strbuf_readlink(sb_contents, path, 0) < 0) { + strbuf_reset(&sb_contents); + if (strbuf_readlink(&sb_contents, path, 0) < 0) { if (errno == ENOENT || errno == EINVAL) /* inconsistent with lstat; retry */ goto stat_ref; else - return -1; + goto out; } - if (starts_with(sb_contents->buf, "refs/") && - !check_refname_format(sb_contents->buf, 0)) { - strbuf_swap(sb_contents, symref); + if (starts_with(sb_contents.buf, "refs/") && + !check_refname_format(sb_contents.buf, 0)) { + strbuf_swap(&sb_contents, symref); *flags |= REF_ISSYMREF; - return 0; + ret = 0; + goto out; } } /* Is it a directory? */ if (S_ISDIR(st.st_mode)) { errno = EISDIR; - return -1; + goto out; } /* @@ -1488,18 +1493,18 @@ stat_ref: /* inconsistent with lstat; retry */ goto stat_ref; else - return -1; + goto out; } - strbuf_reset(sb_contents); - if (strbuf_read(sb_contents, fd, 256) < 0) { + strbuf_reset(&sb_contents); + if (strbuf_read(&sb_contents, fd, 256) < 0) { int save_errno = errno; close(fd); errno = save_errno; - return -1; + goto out; } close(fd); - strbuf_rtrim(sb_contents); - buf = sb_contents->buf; + strbuf_rtrim(&sb_contents); + buf = sb_contents.buf; if (starts_with(buf, "ref:")) { buf += 4; while (isspace(*buf)) @@ -1508,7 +1513,8 @@ stat_ref: strbuf_reset(symref); strbuf_addstr(symref, buf); *flags |= REF_ISSYMREF; - return 0; + ret = 0; + goto out; } /* @@ -1519,10 +1525,17 @@ stat_ref: (buf[40] != '\0' && !isspace(buf[40]))) { *flags |= REF_ISBROKEN; errno = EINVAL; - return -1; + goto out; } - return 0; + ret = 0; + +out: + save_errno = errno; + strbuf_release(&sb_path); +
[PATCH 02/24] refs: move for_each_*ref* functions into common code
Make do_for_each_ref take a submodule as an argument instead of a ref_cache. Since all for_each_*ref* functions are defined in terms of do_for_each_ref, we can then move them into the common code. Later, we can simply make do_for_each_ref into a backend function. Signed-off-by: David Turner Signed-off-by: Junio C Hamano --- refs.c | 52 +++ refs/files-backend.c | 62 +--- refs/refs-internal.h | 9 3 files changed, 66 insertions(+), 57 deletions(-) diff --git a/refs.c b/refs.c index 6b8c16c..f0feff7 100644 --- a/refs.c +++ b/refs.c @@ -1103,3 +1103,55 @@ int head_ref(each_ref_fn fn, void *cb_data) { return head_ref_submodule(NULL, fn, cb_data); } + +int for_each_ref(each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(NULL, "", fn, 0, 0, cb_data); +} + +int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(submodule, "", fn, 0, 0, cb_data); +} + +int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(NULL, prefix, fn, strlen(prefix), 0, cb_data); +} + +int for_each_fullref_in(const char *prefix, each_ref_fn fn, void *cb_data, unsigned int broken) +{ + unsigned int flag = 0; + + if (broken) + flag = DO_FOR_EACH_INCLUDE_BROKEN; + return do_for_each_ref(NULL, prefix, fn, 0, flag, cb_data); +} + +int for_each_ref_in_submodule(const char *submodule, const char *prefix, + each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(submodule, prefix, fn, strlen(prefix), 0, cb_data); +} + +int for_each_replace_ref(each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(NULL, git_replace_ref_base, fn, + strlen(git_replace_ref_base), 0, cb_data); +} + +int for_each_namespaced_ref(each_ref_fn fn, void *cb_data) +{ + struct strbuf buf = STRBUF_INIT; + int ret; + strbuf_addf(&buf, "%srefs/", get_git_namespace()); + ret = do_for_each_ref(NULL, buf.buf, fn, 0, 0, cb_data); + strbuf_release(&buf); + return ret; +} + +int for_each_rawref(each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(NULL, "", fn, 0, + DO_FOR_EACH_INCLUDE_BROKEN, cb_data); +} diff --git a/refs/files-backend.c b/refs/files-backend.c index c07dc41..9676ec2 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -513,9 +513,6 @@ static void sort_ref_dir(struct ref_dir *dir) dir->sorted = dir->nr = i; } -/* Include broken references in a do_for_each_ref*() iteration: */ -#define DO_FOR_EACH_INCLUDE_BROKEN 0x01 - /* * Return true iff the reference described by entry can be resolved to * an object in the database. Emit a warning if the referred-to @@ -1727,10 +1724,13 @@ static int do_for_each_entry(struct ref_cache *refs, const char *base, * value, stop the iteration and return that value; otherwise, return * 0. */ -static int do_for_each_ref(struct ref_cache *refs, const char *base, - each_ref_fn fn, int trim, int flags, void *cb_data) +int do_for_each_ref(const char *submodule, const char *base, + each_ref_fn fn, int trim, int flags, void *cb_data) { struct ref_entry_cb data; + struct ref_cache *refs; + + refs = get_ref_cache(submodule); data.base = base; data.trim = trim; data.flags = flags; @@ -1745,58 +1745,6 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base, return do_for_each_entry(refs, base, do_one_ref, &data); } -int for_each_ref(each_ref_fn fn, void *cb_data) -{ - return do_for_each_ref(&ref_cache, "", fn, 0, 0, cb_data); -} - -int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) -{ - return do_for_each_ref(get_ref_cache(submodule), "", fn, 0, 0, cb_data); -} - -int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data) -{ - return do_for_each_ref(&ref_cache, prefix, fn, strlen(prefix), 0, cb_data); -} - -int for_each_fullref_in(const char *prefix, each_ref_fn fn, void *cb_data, unsigned int broken) -{ - unsigned int flag = 0; - - if (broken) - flag = DO_FOR_EACH_INCLUDE_BROKEN; - return do_for_each_ref(&ref_cache, prefix, fn, 0, flag, cb_data); -} - -int for_each_ref_in_submodule(const char *submodule, const char *prefix, - each_ref_fn fn, void *cb_data) -{ - return do_for_each_ref(get_ref_cache(submodule), prefix, fn, strlen(prefix), 0, cb_data); -} - -int for_each_replace_ref(each_ref_fn fn, void *cb_data) -{ - return do_for_each_ref(&ref_cache, git_replace_ref_base, fn, - strlen(git_replace_ref_base), 0, cb_data); -} - -int for
[PATCH 14/24] files-backend: break out ref reading
Refactor resolve_ref_1 in terms of a new function read_raw_ref, which is responsible for reading ref data from the ref storage. Later, we will make read_raw_ref a pluggable backend function, and make resolve_ref_unsafe common. Signed-off-by: David Turner Helped-by: Duy Nguyen Signed-off-by: Junio C Hamano Signed-off-by: Michael Haggerty --- refs/files-backend.c | 244 ++- 1 file changed, 145 insertions(+), 99 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index b865ba5..d51e778 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1390,6 +1390,141 @@ static int resolve_missing_loose_ref(const char *refname, return -1; } +/* + * Read a raw ref from the filesystem or packed refs file. + * + * If the ref is a sha1, fill in sha1 and return 0. + * + * If the ref is symbolic, fill in *symref with the referrent + * (e.g. "refs/heads/master") and return 0. The caller is responsible + * for validating the referrent. Set REF_ISSYMREF in flags. + * + * If the ref doesn't exist, set errno to ENOENT and return -1. + * + * If the ref exists but is neither a symbolic ref nor a sha1, it is + * broken. Set REF_ISBROKEN in flags, set errno to EINVAL, and return + * -1. + * + * If there is another error reading the ref, set errno appropriately and + * return -1. + * + * Backend-specific flags might be set in flags as well, regardless of + * outcome. + * + * sb_path is workspace: the caller should allocate and free it. + * + * It is OK for refname to point into symref. In this case: + * - if the function succeeds with REF_ISSYMREF, symref will be + * overwritten and the memory pointed to by refname might be changed + * or even freed. + * - in all other cases, symref will be untouched, and therefore + * refname will still be valid and unchanged. + */ +static int read_raw_ref(const char *refname, unsigned char *sha1, + struct strbuf *symref, struct strbuf *sb_path, + struct strbuf *sb_contents, int *flags) +{ + const char *path; + const char *buf; + struct stat st; + int fd; + + strbuf_reset(sb_path); + strbuf_git_path(sb_path, "%s", refname); + path = sb_path->buf; + +stat_ref: + /* +* We might have to loop back here to avoid a race +* condition: first we lstat() the file, then we try +* to read it as a link or as a file. But if somebody +* changes the type of the file (file <-> directory +* <-> symlink) between the lstat() and reading, then +* we don't want to report that as an error but rather +* try again starting with the lstat(). +*/ + + if (lstat(path, &st) < 0) { + if (errno != ENOENT) + return -1; + if (resolve_missing_loose_ref(refname, sha1, flags)) { + errno = ENOENT; + return -1; + } + return 0; + } + + /* Follow "normalized" - ie "refs/.." symlinks by hand */ + if (S_ISLNK(st.st_mode)) { + strbuf_reset(sb_contents); + if (strbuf_readlink(sb_contents, path, 0) < 0) { + if (errno == ENOENT || errno == EINVAL) + /* inconsistent with lstat; retry */ + goto stat_ref; + else + return -1; + } + if (starts_with(sb_contents->buf, "refs/") && + !check_refname_format(sb_contents->buf, 0)) { + strbuf_swap(sb_contents, symref); + *flags |= REF_ISSYMREF; + return 0; + } + } + + /* Is it a directory? */ + if (S_ISDIR(st.st_mode)) { + errno = EISDIR; + return -1; + } + + /* +* Anything else, just open it and try to use it as +* a ref +*/ + fd = open(path, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) + /* inconsistent with lstat; retry */ + goto stat_ref; + else + return -1; + } + strbuf_reset(sb_contents); + if (strbuf_read(sb_contents, fd, 256) < 0) { + int save_errno = errno; + close(fd); + errno = save_errno; + return -1; + } + close(fd); + strbuf_rtrim(sb_contents); + buf = sb_contents->buf; + if (starts_with(buf, "ref:")) { + buf += 4; + while (isspace(*buf)) + buf++; + + strbuf_reset(symref); + strbuf_addstr(symref, buf); + *flags |
[PATCH 12/24] resolve_ref_1(): reorder code
From: Michael Haggerty There is no need to adjust *flags if we're just about to fail. Signed-off-by: Michael Haggerty --- refs/files-backend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 69ec903..60f1493 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1542,13 +1542,13 @@ static const char *resolve_ref_1(const char *refname, return refname; } if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { - *flags |= REF_ISBROKEN; - if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) || !refname_is_safe(refname)) { errno = EINVAL; return NULL; } + + *flags |= REF_ISBROKEN; bad_name = 1; } } -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 13/24] resolve_ref_1(): eliminate local variable "bad_name"
From: Michael Haggerty We can use (*flags & REF_BAD_NAME) for that purpose. Signed-off-by: Michael Haggerty --- refs/files-backend.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 60f1493..b865ba5 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1399,19 +1399,17 @@ static const char *resolve_ref_1(const char *refname, struct strbuf *sb_path, struct strbuf *sb_contents) { - int bad_name = 0; int symref_count; *flags = 0; if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { - *flags |= REF_BAD_NAME; - if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) || !refname_is_safe(refname)) { errno = EINVAL; return NULL; } + /* * dwim_ref() uses REF_ISBROKEN to distinguish between * missing refs and refs that were present but invalid, @@ -1420,7 +1418,7 @@ static const char *resolve_ref_1(const char *refname, * We don't know whether the ref exists, so don't set * REF_ISBROKEN yet. */ - bad_name = 1; + *flags |= REF_BAD_NAME; } for (symref_count = 0; symref_count < MAXDEPTH; symref_count++) { @@ -1452,7 +1450,7 @@ static const char *resolve_ref_1(const char *refname, } hashclr(sha1); } - if (bad_name) { + if (*flags & REF_BAD_NAME) { hashclr(sha1); *flags |= REF_ISBROKEN; } @@ -1524,7 +1522,7 @@ static const char *resolve_ref_1(const char *refname, errno = EINVAL; return NULL; } - if (bad_name) { + if (*flags & REF_BAD_NAME) { hashclr(sha1); *flags |= REF_ISBROKEN; } @@ -1548,8 +1546,7 @@ static const char *resolve_ref_1(const char *refname, return NULL; } - *flags |= REF_ISBROKEN; - bad_name = 1; + *flags |= REF_ISBROKEN | REF_BAD_NAME; } } -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/24] refs: move head_ref{,_submodule} to the common code
These don't use any backend-specific functions. These were previously defined in terms of the do_head_ref helper function, but since they are otherwise identical, we don't need that function. Signed-off-by: David Turner Signed-off-by: Junio C Hamano --- refs.c | 23 +++ refs/files-backend.c | 28 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/refs.c b/refs.c index b0e6ece..6b8c16c 100644 --- a/refs.c +++ b/refs.c @@ -1080,3 +1080,26 @@ int rename_ref_available(const char *oldname, const char *newname) strbuf_release(&err); return ret; } + +int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + struct object_id oid; + int flag; + + if (submodule) { + if (resolve_gitlink_ref(submodule, "HEAD", oid.hash) == 0) + return fn("HEAD", &oid, 0, cb_data); + + return 0; + } + + if (!read_ref_full("HEAD", RESOLVE_REF_READING, oid.hash, &flag)) + return fn("HEAD", &oid, flag, cb_data); + + return 0; +} + +int head_ref(each_ref_fn fn, void *cb_data) +{ + return head_ref_submodule(NULL, fn, cb_data); +} diff --git a/refs/files-backend.c b/refs/files-backend.c index 81f68f8..c07dc41 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1745,34 +1745,6 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base, return do_for_each_entry(refs, base, do_one_ref, &data); } -static int do_head_ref(const char *submodule, each_ref_fn fn, void *cb_data) -{ - struct object_id oid; - int flag; - - if (submodule) { - if (resolve_gitlink_ref(submodule, "HEAD", oid.hash) == 0) - return fn("HEAD", &oid, 0, cb_data); - - return 0; - } - - if (!read_ref_full("HEAD", RESOLVE_REF_READING, oid.hash, &flag)) - return fn("HEAD", &oid, flag, cb_data); - - return 0; -} - -int head_ref(each_ref_fn fn, void *cb_data) -{ - return do_head_ref(NULL, fn, cb_data); -} - -int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) -{ - return do_head_ref(submodule, fn, cb_data); -} - int for_each_ref(each_ref_fn fn, void *cb_data) { return do_for_each_ref(&ref_cache, "", fn, 0, 0, cb_data); -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: git segfaults on older Solaris releases
On Thu, 2016-04-07 at 11:50 -0700, Junio C Hamano wrote: > Junio C Hamano writes: > > > "Tom G. Christensen" writes: > > > > > The reason for the crash is simple, a null value was passed to > > > the 's' > > > format for the *printf family of functions. > > > ... > > > Passing a null value to the 's' format is explicitly documented > > > as > > > giving undefined results on Solaris, even on Solaris 11(2). > > > > Do you mean > > > > *printf("...%.*s...", ..., 0, NULL, ...) > > > > i.e. you saw a NULL passed only when we use %.*s with width=0? > > So, I've looked at places where we use "%.*s" with "prefix" nearby, > and it seems that this is the only place. > > The "prefix" being a NULL is a perfectly valid state throughout the > system and means a different thing than it being an empty string, so > it is valid for callers of prefix_path() and prefix_path_gently() to > pass prefix=NULL as long as they pass len=0. TIOLI: The code below does not reflect the "as long as they pass len=0" condition. Maybe add an assert for that? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 03/16] index-helper: new daemon for caching index and related stuff
On Thu, 2016-04-07 at 16:14 +0200, Johannes Schindelin wrote: > Hi, > > On Thu, 7 Apr 2016, Johannes Sixt wrote: > > > Am 07.04.2016 um 00:11 schrieb David Turner: > > > +static void share_index(struct index_state *istate, struct shm > > > *is) > > > +{ > > > + void *new_mmap; > > > + if (istate->mmap_size <= 20 || > > > + hashcmp(istate->sha1, > > > + (unsigned char *)istate->mmap + istate > > > ->mmap_size - 20) > > > > > > > > + !hashcmp(istate->sha1, is->sha1) || > > > + git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, istate > > > ->mmap_size, > > > + &new_mmap, PROT_READ | PROT_WRITE, > > > MAP_SHARED, > > > + "git-index-%s", sha1_to_hex(istate > > > ->sha1)) < 0) > > > > Builds which have NO_MMAP set require that MAP_PRIVATE is set. So I > > would > > guess that at this point you leave those builds behind. Unless we > > declare > > such systems as hopelessly outdated and remove NO_MMAP and > > compat/mmap.c or > > you support index-helper only when NO_MMAP is not set. > > I vote for the latter: support index-helper only when NO_MMAP is > unset. Will fix, thanks. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 05/16] daemonize(): set a flag before exiting the main process
From: Nguyễn Thái Ngọc Duy This allows signal handlers and atexit functions to realize this situation and not clean up. Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/gc.c | 2 +- cache.h | 2 +- daemon.c | 2 +- setup.c | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index c583aad..37180de 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -385,7 +385,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) * failure to daemonize is ok, we'll continue * in foreground */ - daemonized = !daemonize(); + daemonized = !daemonize(NULL); } } else add_repack_all_option(); diff --git a/cache.h b/cache.h index 4b678e9..0aeb994 100644 --- a/cache.h +++ b/cache.h @@ -530,7 +530,7 @@ extern int set_git_dir_init(const char *git_dir, const char *real_git_dir, int); extern int init_db(const char *template_dir, unsigned int flags); extern void sanitize_stdfds(void); -extern int daemonize(void); +extern int daemonize(int *); #define alloc_nr(x) (((x)+16)*3/2) diff --git a/daemon.c b/daemon.c index 8d45c33..a5cf954 100644 --- a/daemon.c +++ b/daemon.c @@ -1365,7 +1365,7 @@ int main(int argc, char **argv) return execute(); if (detach) { - if (daemonize()) + if (daemonize(NULL)) die("--detach not supported on this platform"); } else sanitize_stdfds(); diff --git a/setup.c b/setup.c index de1a2a7..9adf13f 100644 --- a/setup.c +++ b/setup.c @@ -1017,7 +1017,7 @@ void sanitize_stdfds(void) close(fd); } -int daemonize(void) +int daemonize(int *daemonized) { #ifdef NO_POSIX_GOODIES errno = ENOSYS; @@ -1029,6 +1029,8 @@ int daemonize(void) case -1: die_errno("fork failed"); default: + if (daemonized) + *daemonized = 1; exit(0); } if (setsid() == -1) -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 16/16] read-cache: config for waiting for index-helper
When we poke index-helper, and index-helper is using watchman, we want to wait for a response (showing that the watchman extension shm has been prepared). If it's not using watchman, we don't. So add a new config, core.waitforindexhelper, with sensible defaults, to configure this behavior. Signed-off-by: David Turner --- Documentation/config.txt | 6 ++ cache.h | 1 + config.c | 5 + environment.c| 5 + read-cache.c | 5 - 5 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 8ec8824..f61e6fe 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -670,6 +670,12 @@ Likewise, when the `LV` environment variable is unset, Git sets it to `-c`. You can override this setting by exporting `LV` with another value or setting `core.pager` to `lv +c`. +core.waitforindexhelper:: + If true, git status and other commands which use the index + will wait for a response from the index-helper before continuing; + this gives time for the index-helper to communicate with watchman + and give information about modified files. + core.whitespace:: A comma separated list of common whitespace problems to notice. 'git diff' will use `color.diff.whitespace` to diff --git a/cache.h b/cache.h index 5713835..db66451 100644 --- a/cache.h +++ b/cache.h @@ -691,6 +691,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; extern int core_watchman_sync_timeout; +extern int wait_for_index_helper; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; diff --git a/config.c b/config.c index e6dc141..5f1b8bd 100644 --- a/config.c +++ b/config.c @@ -887,6 +887,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.waitforindexhelper")) { + wait_for_index_helper = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.createobject")) { if (!strcmp(value, "rename")) object_creation_mode = OBJECT_CREATION_USES_RENAMES; diff --git a/environment.c b/environment.c index 35e03c7..c7fb0a9 100644 --- a/environment.c +++ b/environment.c @@ -95,6 +95,11 @@ int core_preload_index = 1; int ignore_untracked_cache_config; int core_watchman_sync_timeout = 300; +#ifdef USE_WATCHMAN +int wait_for_index_helper = 1; +#else +int wait_for_index_helper = 0; +#endif /* This is set by setup_git_dir_gently() and/or git_default_config() */ diff --git a/read-cache.c b/read-cache.c index 470cd7b..23dbb73 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1800,7 +1800,10 @@ static int poke_daemon(struct index_state *istate, if (refresh_cache) { ret = write_in_full(fd, "refresh", 8) != 8; } else { - ret = poke_and_wait_for_reply(fd); + if (wait_for_index_helper) + ret = poke_and_wait_for_reply(fd); + else + ret = write_in_full(fd, "poke", 5) != 5; } close(fd); -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 09/16] index-helper: use watchman to avoid refreshing index with lstat()
From: Nguyễn Thái Ngọc Duy Watchman is hidden behind index-helper. Before git tries to read the index from shm, it notifies index-helper through the socket and waits for index-helper to prepare shm. index-helper then contacts watchman, updates 'WAMA' extension and put it in a separate shm and wakes git up with a reply to git's socket. Git uses this extension to not lstat unchanged entries. Git only trusts the 'WAMA' extension when it's received from the separate shm, not from disk. Unmarked entries are "clean". Marked entries are dirty from watchman point of view. If it finds out some entries are 'watchman-dirty', but are really unchanged (e.g. the file was changed, then reverted back), then Git will clear the marking in 'WAMA' before writing it down. Hiding watchman behind index-helper means you need both daemons. You can't run watchman alone. Not so good. But on the other hand, 'git' binary is not linked to watchman/json libraries, which is good for packaging. Core git package will run fine without watchman-related packages. If they need watchman, they can install git-index-helper and dependencies. This also lets us trust anything in the untracked cache that we haven't marked invalid, saving those stat() calls. Another reason for tying watchman to index-helper is, when used with untracked cache, we need to keep track of $GIT_WORK_TREE file listing. That kind of list can be kept in index-helper. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: David Turner --- Documentation/git-index-helper.txt | 6 + cache.h| 8 ++ dir.c | 23 +++- dir.h | 3 + index-helper.c | 102 ++--- read-cache.c | 218 + 6 files changed, 319 insertions(+), 41 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index bb344cf..9d1ad0e 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -51,6 +51,12 @@ following commands are used to control the daemon: Let the daemon know the index is to be read. It keeps the daemon alive longer, unless `--exit-after=0` is used. +"poke ": + Like "poke", but replies with "OK". If watchman is + configured, index-helper queries watchman, then prepares a + shared memory object with the watchman index extension before + replying. + All commands and replies are terminated by a 0 byte. GIT diff --git a/cache.h b/cache.h index 37f211b..e43a6e1 100644 --- a/cache.h +++ b/cache.h @@ -558,6 +558,7 @@ extern int daemonize(int *); /* Initialize and use the cache information */ struct lock_file; +extern int verify_index(const struct index_state *); extern int read_index(struct index_state *); extern int read_index_preload(struct index_state *, const struct pathspec *pathspec); extern int do_read_index(struct index_state *istate, const char *path, @@ -565,6 +566,7 @@ extern int do_read_index(struct index_state *istate, const char *path, extern int read_index_from(struct index_state *, const char *path); extern int is_index_unborn(struct index_state *); extern int read_index_unmerged(struct index_state *); +extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate); #define COMMIT_LOCK(1 << 0) #define CLOSE_LOCK (1 << 1) extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags); @@ -1839,4 +1841,10 @@ void sleep_millisec(int millisec); */ void safe_create_dir(const char *dir, int share); +/* + * Open a socket to the git index-helper. Return the fd of that + * socket, or -1 on error. + */ +int connect_to_index_helper(void); + #endif /* CACHE_H */ diff --git a/dir.c b/dir.c index 69e0be6..5058b29 100644 --- a/dir.c +++ b/dir.c @@ -597,9 +597,9 @@ static void trim_trailing_spaces(char *buf) * * If "name" has the trailing slash, it'll be excluded in the search. */ -static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, - struct untracked_cache_dir *dir, - const char *name, int len) +struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, +struct untracked_cache_dir *dir, +const char *name, int len) { int first, last; struct untracked_cache_dir *d; @@ -1726,6 +1726,17 @@ static int valid_cached_dir(struct dir_struct *dir, if (!untracked) return 0; + if (dir->untracked->use_watchman) { + /* +* With watchman, we can trust the untracked cache's +
[PATCH v3 15/16] index-helper: optionally automatically run
Introduce a new config option, indexhelper.autorun, to automatically run git index-helper before starting up a builtin git command. This enables users to keep index-helper running without manual intervention. Signed-off-by: David Turner --- Documentation/config.txt | 4 git.c| 35 ++- t/t7900-index-helper.sh | 16 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 2cd6bdd..8ec8824 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1852,6 +1852,10 @@ index.version:: Specify the version with which new index files should be initialized. This does not affect existing repositories. +indexhelper.autorun:: + Automatically run git index-helper when any builtin git + command is run inside a repository. + init.templateDir:: Specify the directory from which templates will be copied. (See the "TEMPLATE DIRECTORY" section of linkgit:git-init[1].) diff --git a/git.c b/git.c index 6cc0c07..7d27782 100644 --- a/git.c +++ b/git.c @@ -521,6 +521,37 @@ static void strip_extension(const char **argv) #define strip_extension(cmd) #endif +static int want_auto_index_helper(void) +{ + int want = -1; + const char *value = NULL; + const char *key = "indexhelper.autorun"; + + if (git_config_key_is_valid(key) && + !git_config_get_value(key, &value)) { + int b = git_config_maybe_bool(key, value); + want = b >= 0 ? b : 0; + } + return want; +} + +static void maybe_run_index_helper(struct cmd_struct *cmd) +{ + const char *argv[] = {"git-index-helper", "--detach", "--autorun", NULL}; + + if (!(cmd->option & NEED_WORK_TREE)) + return; + + if (want_auto_index_helper() <= 0) + return; + + trace_argv_printf(argv, "trace: auto index-helper:"); + + if (run_command_v_opt(argv, + RUN_SILENT_EXEC_FAILURE | RUN_CLEAN_ON_EXIT)) + warning(_("You specified indexhelper.autorun, but running git-index-helper failed.")); +} + static void handle_builtin(int argc, const char **argv) { const char *cmd; @@ -536,8 +567,10 @@ static void handle_builtin(int argc, const char **argv) } builtin = get_builtin(cmd); - if (builtin) + if (builtin) { + maybe_run_index_helper(builtin); exit(run_builtin(builtin, argc, argv)); + } } static void execv_dashed_external(const char **argv) diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index 3925ee3..6e11f8c 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -50,4 +50,20 @@ test_expect_success 'index-helper is quiet with --autorun' ' git index-helper --autorun ' +test_expect_success 'index-helper autorun works' ' + rm -f .git/index-helper.path && + git status && + test_path_is_missing .git/index-helper.path && + test_config indexhelper.autorun true && + git status && + test -L .git/index-helper.path && + git status 2>err && + test -L .git/index-helper.path && + ! grep -q . err && + git index-helper --kill && + test_config indexhelper.autorun false && + git status && + test_path_is_missing .git/index-helper.path +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 14/16] index-helper: autorun mode
Soon, we'll want to automatically start index-helper, so we need a mode that silently exits if it can't start up (either because it's not in a git dir, or because another one is already running). Signed-off-by: David Turner --- index-helper.c | 29 +++-- t/t7900-index-helper.sh | 8 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/index-helper.c b/index-helper.c index b62d805..4d08ff4 100644 --- a/index-helper.c +++ b/index-helper.c @@ -382,8 +382,9 @@ static void request_kill(void) int main(int argc, char **argv) { const char *prefix; - int idle_in_seconds = 600, detach = 0, kill = 0; + int idle_in_seconds = 600, detach = 0, kill = 0, autorun = 0; int fd; + int nongit; struct strbuf socket_path = STRBUF_INIT; struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_seconds, @@ -392,6 +393,7 @@ int main(int argc, char **argv) "verify shared memory after creating"), OPT_BOOL(0, "detach", &detach, "detach the process"), OPT_BOOL(0, "kill", &kill, "request that existing index helper processes exit"), + OPT_BOOL(0, "autorun", &autorun, "this is an automatic run of git index-helper, so certain errors can be solved by silently exiting"), OPT_END() }; @@ -401,7 +403,14 @@ int main(int argc, char **argv) if (argc == 2 && !strcmp(argv[1], "-h")) usage_with_options(usage_text, options); - prefix = setup_git_directory(); + prefix = setup_git_directory_gently(&nongit); + if (nongit) { + if (autorun) + exit(0); + else + die(_("not a git repository")); + } + if (parse_options(argc, (const char **)argv, prefix, options, usage_text, 0)) die(_("too many arguments")); @@ -415,10 +424,18 @@ int main(int argc, char **argv) /* check that no other copy is running */ fd = connect_to_index_helper(); - if (fd > 0) - die(_("Already running")); - if (errno != ECONNREFUSED && errno != ENOENT) - die_errno(_("Unexpected error checking socket")); + if (fd > 0) { + if (autorun) + exit(0); + else + die(_("Already running")); + } + if (errno != ECONNREFUSED && errno != ENOENT) { + if (autorun) + return 0; + else + die_errno(_("Unexpected error checking socket")); + } atexit(cleanup); sigchain_push_common(cleanup_on_signal); diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index 3f94d8a..3925ee3 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -42,4 +42,12 @@ test_expect_success 'index-helper will not start if already running' ' grep "Already running" err ' +test_expect_success 'index-helper is quiet with --autorun' ' + test_when_finished "git index-helper --kill" && + git index-helper --kill && + git index-helper --detach && + test -L .git/index-helper.path && + git index-helper --autorun +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 11/16] unpack-trees: preserve index extensions
Make git checkout (and other unpack_tree operations) preserve the untracked cache and watchman status. This is valuable for two reasons: 1. Often, an unpack_tree operation will not touch large parts of the working tree, and thus most of the untracked cache will continue to be valid. 2. Even if the untracked cache were entirely invalidated by such an operation, the user has signaled their intention to have such a cache, and we don't want to throw it away. The same logic applies to the watchman state. Signed-off-by: David Turner --- cache.h | 1 + read-cache.c | 8 t/t7063-status-untracked-cache.sh | 22 ++ t/test-lib-functions.sh | 4 unpack-trees.c| 1 + 5 files changed, 36 insertions(+) diff --git a/cache.h b/cache.h index e43a6e1..5713835 100644 --- a/cache.h +++ b/cache.h @@ -571,6 +571,7 @@ extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate); #define CLOSE_LOCK (1 << 1) extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags); extern int discard_index(struct index_state *); +extern void move_index_extensions(struct index_state *dst, struct index_state *src); extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); diff --git a/read-cache.c b/read-cache.c index b6e9244..470cd7b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2725,3 +2725,11 @@ void stat_validity_update(struct stat_validity *sv, int fd) fill_stat_data(sv->sd, &st); } } + +void move_index_extensions(struct index_state *dst, struct index_state *src) +{ + dst->untracked = src->untracked; + src->untracked = NULL; + dst->last_update = src->last_update; + src->last_update = NULL; +} diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index a971884..083516d 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -646,4 +646,26 @@ test_expect_success 'test ident field is working' ' test_cmp ../expect ../err ' +test_expect_success 'untracked cache survives a checkout' ' + git commit --allow-empty -m empty && + test-dump-untracked-cache >../before && + test_when_finished "git checkout master" && + git checkout -b other_branch && + test-dump-untracked-cache >../after && + test_cmp ../before ../after && + test_commit test && + test-dump-untracked-cache >../before && + git checkout master && + test-dump-untracked-cache >../after && + test_cmp ../before ../after +' + +test_expect_success 'untracked cache survives a commit' ' + test-dump-untracked-cache >../before && + git add done/two && + git commit -m commit && + test-dump-untracked-cache >../after && + test_cmp ../before ../after +' + test_done diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 8d99eb3..e974b5b 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -186,6 +186,10 @@ test_commit () { test_tick fi && git commit $signoff -m "$1" && + if [ "$(git config core.bare)" = false ] + then + git update-index --force-untracked-cache + fi git tag "${4:-$1}" } diff --git a/unpack-trees.c b/unpack-trees.c index 9f55cc2..fc90eb3 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1215,6 +1215,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options WRITE_TREE_SILENT | WRITE_TREE_REPAIR); } + move_index_extensions(&o->result, o->dst_index); discard_index(o->dst_index); *o->dst_index = o->result; } else { -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 13/16] index-helper: don't run if already running
Signed-off-by: David Turner --- index-helper.c | 7 +++ t/t7900-index-helper.sh | 9 + 2 files changed, 16 insertions(+) diff --git a/index-helper.c b/index-helper.c index a3f6a74..b62d805 100644 --- a/index-helper.c +++ b/index-helper.c @@ -413,6 +413,13 @@ int main(int argc, char **argv) return 0; } + /* check that no other copy is running */ + fd = connect_to_index_helper(); + if (fd > 0) + die(_("Already running")); + if (errno != ECONNREFUSED && errno != ENOENT) + die_errno(_("Unexpected error checking socket")); + atexit(cleanup); sigchain_push_common(cleanup_on_signal); diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index b1fa96e..3f94d8a 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -33,4 +33,13 @@ test_expect_success 'index-helper creates usable path file and can be killed' ' test_path_is_missing .git/index-helper.path ' +test_expect_success 'index-helper will not start if already running' ' + test_when_finished "git index-helper --kill" && + git index-helper --detach && + test -L .git/index-helper.path && + test_must_fail git index-helper 2>err && + test -L .git/index-helper.path && + grep "Already running" err +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 12/16] index-helper: kill mode
Add a new command (and command-line arg) to allow index-helpers to exit cleanly. This is mainly useful for tests. Signed-off-by: David Turner --- index-helper.c | 31 ++- t/t7900-index-helper.sh | 13 + 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/index-helper.c b/index-helper.c index f993ae6..a3f6a74 100644 --- a/index-helper.c +++ b/index-helper.c @@ -276,6 +276,8 @@ static void loop(int fd, int idle_in_seconds) * alive, nothing to do. */ } + } else if (!strcmp(command.buf, "die")) { + break; } else { warning("BUG: Bogus command %s", command.buf); } @@ -358,10 +360,29 @@ static void make_socket_path(struct strbuf *path) strbuf_addstr(path, "/s"); } +static void request_kill(void) +{ + int fd = connect_to_index_helper(); + + if (fd >= 0) { + write_in_full(fd, "die", 4); + close(fd); + } + + /* +* The child will try to do this anyway, but we want to be +* ready to launch a new daemon immediately after this command +* returns. +*/ + + unlink(git_path("index-helper.path")); + return; +} + int main(int argc, char **argv) { const char *prefix; - int idle_in_seconds = 600, detach = 0; + int idle_in_seconds = 600, detach = 0, kill = 0; int fd; struct strbuf socket_path = STRBUF_INIT; struct option options[] = { @@ -370,6 +391,7 @@ int main(int argc, char **argv) OPT_BOOL(0, "strict", &to_verify, "verify shared memory after creating"), OPT_BOOL(0, "detach", &detach, "detach the process"), + OPT_BOOL(0, "kill", &kill, "request that existing index helper processes exit"), OPT_END() }; @@ -384,6 +406,13 @@ int main(int argc, char **argv) options, usage_text, 0)) die(_("too many arguments")); + if (kill) { + if (detach) + die(_("--kill doesn't want any other options")); + request_kill(); + return 0; + } + atexit(cleanup); sigchain_push_common(cleanup_on_signal); diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index 6a06094..b1fa96e 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -20,4 +20,17 @@ test_expect_success 'index-helper smoke test' ' test_path_is_missing .git/index-helper.path ' +test_expect_success 'index-helper creates usable path file and can be killed' ' + test_when_finished "git index-helper --kill" && + test_path_is_missing .git/index-helper.path && + git index-helper --detach && + test -L .git/index-helper.path && + sock="$(readlink .git/index-helper.path)" && + test -S "$sock" && + dir=$(dirname "$sock") && + ls -ld "$dir" | grep ^drwx...--- && + git index-helper --kill && + test_path_is_missing .git/index-helper.path +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 04/16] index-helper: add --strict
From: Nguyễn Thái Ngọc Duy There are "holes" in the index-helper approach because the shared memory is not verified again by git. If $USER is compromised, shared memory could be modified. But anyone who could do this could already modify $GIT_DIR/index. A more realistic risk is some bugs in index-helper that produce corrupt shared memory. --strict is added to avoid that. Strictly speaking there's still a very small gap where corrupt shared memory could still be read by git: after we write the trailing SHA-1 in the shared memory (thus signaling "this shm is ready") and before verify_shm() detects an error. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git-index-helper.txt | 9 +++ cache.h| 1 + index-helper.c | 48 ++ read-cache.c | 9 --- 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index 61605e9..b177fb8 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -22,6 +22,15 @@ OPTIONS Exit if the cached index is not accessed for `` seconds. Specify 0 to wait forever. Default is 600. +--strict:: +--no-strict:: + Strict mode makes index-helper verify the shared memory after + it's created. If the result does not match what's read from + $GIT_DIR/index, the shared memory is destroyed. This makes + index-helper take more than double the amount of time required + for reading an index, but because it will happen in the + background, it's not noticable. `--strict` is enabled by default. + NOTES - $GIT_DIR/index-helper.path is a symlink to a Unix domain socket in diff --git a/cache.h b/cache.h index 43fb314..4b678e9 100644 --- a/cache.h +++ b/cache.h @@ -336,6 +336,7 @@ struct index_state { keep_mmap : 1, from_shm : 1, to_shm : 1, +always_verify_trailing_sha1 : 1, initialized : 1; struct hashmap name_hash; struct hashmap dir_hash; diff --git a/index-helper.c b/index-helper.c index 263b066..8288e30 100644 --- a/index-helper.c +++ b/index-helper.c @@ -15,6 +15,7 @@ struct shm { static struct shm shm_index; static struct shm shm_base_index; +static int to_verify = 1; static void release_index_shm(struct shm *is) { @@ -76,11 +77,56 @@ static void share_index(struct index_state *istate, struct shm *is) hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, is->sha1); } +static int verify_shm(void) +{ + int i; + struct index_state istate; + memset(&istate, 0, sizeof(istate)); + istate.always_verify_trailing_sha1 = 1; + istate.to_shm = 1; + i = read_index(&istate); + if (i != the_index.cache_nr) + goto done; + for (; i < the_index.cache_nr; i++) { + struct cache_entry *base, *ce; + /* namelen is checked separately */ + const unsigned int ondisk_flags = + CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS; + unsigned int ce_flags, base_flags, ret; + base = the_index.cache[i]; + ce = istate.cache[i]; + if (ce->ce_namelen != base->ce_namelen || + strcmp(ce->name, base->name)) { + warning("mismatch at entry %d", i); + break; + } + ce_flags = ce->ce_flags; + base_flags = base->ce_flags; + /* only on-disk flags matter */ + ce->ce_flags &= ondisk_flags; + base->ce_flags &= ondisk_flags; + ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data, +offsetof(struct cache_entry, name) - +offsetof(struct cache_entry, ce_stat_data)); + ce->ce_flags = ce_flags; + base->ce_flags = base_flags; + if (ret) { + warning("mismatch at entry %d", i); + break; + } + } +done: + discard_index(&istate); + return i == the_index.cache_nr; +} + static void share_the_index(void) { if (the_index.split_index && the_index.split_index->base) share_index(the_index.split_index->base, &shm_base_index); share_index(&the_index, &shm_index); + if (to_verify && !verify_shm()) + cleanup_shm(); discard_index(&the_index); } @@ -249,6 +295,8 @@ int main(int argc, char **argv) struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_seconds, N_("exit if not used after some seconds")), + OPT_BOOL(0, "strict", &to_verify, +"verify shared memory after creating"),
[PATCH v3 08/16] Add watchman support to reduce index refresh cost
From: Nguyễn Thái Ngọc Duy The previous patch has the logic to clear bits in 'WAMA' bitmap. This patch has logic to set bits as told by watchman. The missing bit, _using_ these bits, are not here yet. A lot of this code is written by David Turner originally, mostly from [1]. I'm just copying and polishing it a bit. [1] http://article.gmane.org/gmane.comp.version-control.git/248006 Signed-off-by: David Turner Signed-off-by: Nguyễn Thái Ngọc Duy --- Makefile | 12 + cache.h| 1 + config.c | 5 ++ configure.ac | 8 environment.c | 3 ++ watchman-support.c | 135 + watchman-support.h | 7 +++ 7 files changed, 171 insertions(+) create mode 100644 watchman-support.c create mode 100644 watchman-support.h diff --git a/Makefile b/Makefile index 24d6c0e..8ad8e1f 100644 --- a/Makefile +++ b/Makefile @@ -453,6 +453,7 @@ MSGFMT = msgfmt CURL_CONFIG = curl-config PTHREAD_LIBS = -lpthread PTHREAD_CFLAGS = +WATCHMAN_LIBS = GCOV = gcov export TCL_PATH TCLTK_PATH @@ -1419,6 +1420,13 @@ else LIB_OBJS += thread-utils.o endif +ifdef USE_WATCHMAN + LIB_H += watchman-support.h + LIB_OBJS += watchman-support.o + WATCHMAN_LIBS = -lwatchman + BASIC_CFLAGS += -DUSE_WATCHMAN +endif + ifdef HAVE_PATHS_H BASIC_CFLAGS += -DHAVE_PATHS_H endif @@ -2030,6 +2038,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS $(GITLIBS) $(VCSSVN_LIB) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) \ $(VCSSVN_LIB) +git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) $(WATCHMAN_LIBS) + $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY) $(QUIET_LNCP)$(RM) $@ && \ ln $< $@ 2>/dev/null || \ @@ -2169,6 +2180,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+ @echo HAVE_SHM=\''$(subst ','\'',$(subst ','\'',$(HAVE_SHM)))'\' >>$@+ + @echo USE_WATCHMAN=\''$(subst ','\'',$(subst ','\'',$(USE_WATCHMAN)))'\' >>$@+ ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+ endif diff --git a/cache.h b/cache.h index f4f7eef..37f211b 100644 --- a/cache.h +++ b/cache.h @@ -687,6 +687,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; +extern int core_watchman_sync_timeout; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; diff --git a/config.c b/config.c index 9ba40bc..e6dc141 100644 --- a/config.c +++ b/config.c @@ -882,6 +882,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.watchmansynctimeout")) { + core_watchman_sync_timeout = git_config_int(var, value); + return 0; + } + if (!strcmp(var, "core.createobject")) { if (!strcmp(value, "rename")) object_creation_mode = OBJECT_CREATION_USES_RENAMES; diff --git a/configure.ac b/configure.ac index 0cd9f46..334d63b 100644 --- a/configure.ac +++ b/configure.ac @@ -1099,6 +1099,14 @@ AC_COMPILE_IFELSE([BSD_SYSCTL_SRC], HAVE_BSD_SYSCTL=]) GIT_CONF_SUBST([HAVE_BSD_SYSCTL]) +# +# Check for watchman client library + +AC_CHECK_LIB([watchman], [watchman_connect], + [USE_WATCHMAN=YesPlease], + [USE_WATCHMAN=]) +GIT_CONF_SUBST([USE_WATCHMAN]) + ## Other checks. # Define USE_PIC if you need the main git objects to be built with -fPIC # in order to build and link perl/Git.so. x86-64 seems to need this. diff --git a/environment.c b/environment.c index 6dec9d0..35e03c7 100644 --- a/environment.c +++ b/environment.c @@ -94,6 +94,9 @@ int core_preload_index = 1; */ int ignore_untracked_cache_config; +int core_watchman_sync_timeout = 300; + + /* This is set by setup_git_dir_gently() and/or git_default_config() */ char *git_work_tree_cfg; static char *work_tree; diff --git a/watchman-support.c b/watchman-support.c new file mode 100644 index 000..b168e88 --- /dev/null +++ b/watchman-support.c @@ -0,0 +1,135 @@ +#include "cache.h" +#include "watchman-support.h" +#include "strbuf.h" +#include "dir.h" +#include + +static str
[PATCH v3 07/16] read-cache: add watchman 'WAMA' extension
From: Nguyễn Thái Ngọc Duy The extension contains a bitmap, one bit for each entry in the index. If the n-th bit is zero, the n-th entry is considered unchanged, we can ce_mark_uptodate() it without refreshing. If the bit is non-zero and we found out the corresponding file is clean after refresh, we can clear the bit. In addition, there's a list of directories in the untracked-cache to invalidate (because they have new or modified entries). The 'skipping refresh' bit is not in this patch yet as we would need watchman. More details in later patches. Signed-off-by: Nguyễn Thái Ngọc Duy --- cache.h | 4 ++ dir.h| 3 ++ read-cache.c | 117 ++- 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index 0aeb994..f4f7eef 100644 --- a/cache.h +++ b/cache.h @@ -182,6 +182,8 @@ struct cache_entry { #define CE_VALID (0x8000) #define CE_STAGESHIFT 12 +#define CE_WATCHMAN_DIRTY (0x0001) + /* * Range 0x0FFF in ce_flags is divided into * two parts: in-memory flags and on-disk ones. @@ -320,6 +322,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define CACHE_TREE_CHANGED (1 << 5) #define SPLIT_INDEX_ORDERED(1 << 6) #define UNTRACKED_CHANGED (1 << 7) +#define WATCHMAN_CHANGED (1 << 8) struct split_index; struct untracked_cache; @@ -344,6 +347,7 @@ struct index_state { struct untracked_cache *untracked; void *mmap; size_t mmap_size; + char *last_update; }; extern struct index_state the_index; diff --git a/dir.h b/dir.h index 3ec3fb0..3d540de 100644 --- a/dir.h +++ b/dir.h @@ -142,6 +142,9 @@ struct untracked_cache { int gitignore_invalidated; int dir_invalidated; int dir_opened; + /* watchman invalidation data */ + unsigned int use_watchman : 1; + struct string_list invalid_untracked; }; struct dir_struct { diff --git a/read-cache.c b/read-cache.c index 7215a17..59d892e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -19,6 +19,7 @@ #include "split-index.h" #include "utf8.h" #include "shm.h" +#include "ewah/ewok.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -41,11 +42,13 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */ #define CACHE_EXT_LINK 0x6c696e6b/* "link" */ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ +#define CACHE_EXT_WATCHMAN 0x57414D41/* "WAMA" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \ -SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED) +SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | \ +WATCHMAN_CHANGED) struct index_state the_index; static const char *alternate_index_output; @@ -1220,8 +1223,13 @@ int refresh_index(struct index_state *istate, unsigned int flags, continue; new = refresh_cache_ent(istate, ce, options, &cache_errno, &changed); - if (new == ce) + if (new == ce) { + if (ce->ce_flags & CE_WATCHMAN_DIRTY) { + ce->ce_flags &= ~CE_WATCHMAN_DIRTY; + istate->cache_changed |= WATCHMAN_CHANGED; + } continue; + } if (!new) { const char *fmt; @@ -1365,6 +1373,94 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) return 0; } +static void mark_no_watchman(size_t pos, void *data) +{ + struct index_state *istate = data; + assert(pos < istate->cache_nr); + istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY; +} + +static int read_watchman_ext(struct index_state *istate, const void *data, +unsigned long sz) +{ + struct ewah_bitmap *bitmap; + int ret, len; + uint32_t bitmap_size; + uint32_t untracked_nr; + + if (memchr(data, 0, sz) == NULL) + return error("invalid extension"); + + len = strlen(data) + 1; + memcpy(&bitmap_size, (const char *)data + len, 4); + memcpy(&untracked_nr, (const char *)data + len + 4, 4); + untracked_nr = ntohl(untracked_nr); + bitmap_size = ntohl(bitmap_size); + + bitmap = ewah_new(); + ret = ewah_read_mmap(bitmap, (const char *)data + len + 8, bitmap_size); + if (ret != bitmap_size) { + ewah_free(bitmap); + return error("failed to parse ewah bitmap reading watchman index extension"); + } + istate->last_update = xstrdup(data); + ewah_each_bit(bitmap, mar
[PATCH v3 10/16] update-index: enable/disable watchman support
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/update-index.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/builtin/update-index.c b/builtin/update-index.c index 1c94ca5..7c08e1c 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -914,6 +914,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) { int newfd, entries, has_errors = 0, nul_term_line = 0; enum uc_mode untracked_cache = UC_UNSPECIFIED; + int use_watchman = -1; int read_from_stdin = 0; int prefix_length = prefix ? strlen(prefix) : 0; int preferred_index_format = 0; @@ -1012,6 +1013,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) N_("test if the filesystem supports untracked cache"), UC_TEST), OPT_SET_INT(0, "force-untracked-cache", &untracked_cache, N_("enable untracked cache without testing the filesystem"), UC_FORCE), + OPT_BOOL(0, "watchman", &use_watchman, + N_("use or not use watchman to reduce refresh cost")), OPT_END() }; @@ -1149,6 +1152,14 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) die("Bug: bad untracked_cache value: %d", untracked_cache); } + if (use_watchman > 0) { + the_index.last_update= xstrdup(""); + the_index.cache_changed |= WATCHMAN_CHANGED; + } else if (!use_watchman) { + the_index.last_update= NULL; + the_index.cache_changed |= WATCHMAN_CHANGED; + } + if (active_cache_changed) { if (newfd < 0) { if (refresh_args.flags & REFRESH_QUIET) -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 06/16] index-helper: add --detach
From: Nguyễn Thái Ngọc Duy We detach after creating and opening the socket, because otherwise we might return control to the shell before index-helper is ready to accept commands. This might lead to flaky tests. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git-index-helper.txt | 3 +++ index-helper.c | 11 +-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index b177fb8..bb344cf 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -31,6 +31,9 @@ OPTIONS for reading an index, but because it will happen in the background, it's not noticable. `--strict` is enabled by default. +--detach:: + Detach from the shell. + NOTES - $GIT_DIR/index-helper.path is a symlink to a Unix domain socket in diff --git a/index-helper.c b/index-helper.c index 8288e30..10f29f5 100644 --- a/index-helper.c +++ b/index-helper.c @@ -15,7 +15,7 @@ struct shm { static struct shm shm_index; static struct shm shm_base_index; -static int to_verify = 1; +static int daemonized, to_verify = 1; static void release_index_shm(struct shm *is) { @@ -34,6 +34,8 @@ static void cleanup_shm(void) static void cleanup(void) { + if (daemonized) + return; unlink(git_path("index-helper.path")); cleanup_shm(); } @@ -289,7 +291,7 @@ static void make_socket_path(struct strbuf *path) int main(int argc, char **argv) { const char *prefix; - int idle_in_seconds = 600; + int idle_in_seconds = 600, detach = 0; int fd; struct strbuf socket_path = STRBUF_INIT; struct option options[] = { @@ -297,6 +299,7 @@ int main(int argc, char **argv) N_("exit if not used after some seconds")), OPT_BOOL(0, "strict", &to_verify, "verify shared memory after creating"), + OPT_BOOL(0, "detach", &detach, "detach the process"), OPT_END() }; @@ -326,6 +329,10 @@ int main(int argc, char **argv) die(_("failed to delete old index-helper.path")); if (symlink(socket_path.buf, git_path("index-helper.path"))) die(_("failed to symlink socket path into index-helper.path")); + + if (detach && daemonize(&daemonized)) + die_errno(_("unable to detach")); + loop(fd, idle_in_seconds); return 0; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 00/16] index-helper, watchman
The major change here is to switch from named pipes to sockets. But somehow while doing this, I noticed a few other things (and others noticed some too): Fixed bugs in successful write testing Added a bit more doco Improved tests slightly Memory barriers .git vs .git/ from watchman one patch moved/squashed at Duy's suggestion (some of?) my spelling errors corrected David Turner (6): unpack-trees: preserve index extensions index-helper: kill mode index-helper: don't run if already running index-helper: autorun mode index-helper: optionally automatically run read-cache: config for waiting for index-helper Nguyễn Thái Ngọc Duy (10): read-cache.c: fix constness of verify_hdr() read-cache: allow to keep mmap'd memory after reading index-helper: new daemon for caching index and related stuff index-helper: add --strict daemonize(): set a flag before exiting the main process index-helper: add --detach read-cache: add watchman 'WAMA' extension Add watchman support to reduce index refresh cost index-helper: use watchman to avoid refreshing index with lstat() update-index: enable/disable watchman support .gitignore | 1 + Documentation/config.txt | 10 + Documentation/git-index-helper.txt | 64 + Makefile | 22 ++ builtin/gc.c | 2 +- builtin/update-index.c | 11 + cache.h| 23 +- config.c | 10 + config.mak.uname | 1 + configure.ac | 8 + daemon.c | 2 +- dir.c | 23 +- dir.h | 6 + environment.c | 8 + git-compat-util.h | 18 ++ git.c | 35 ++- index-helper.c | 462 + read-cache.c | 438 +-- setup.c| 4 +- shm.c | 67 ++ shm.h | 23 ++ t/t7063-status-untracked-cache.sh | 22 ++ t/t7900-index-helper.sh| 69 ++ t/test-lib-functions.sh| 4 + unpack-trees.c | 1 + watchman-support.c | 135 +++ watchman-support.h | 7 + 27 files changed, 1454 insertions(+), 22 deletions(-) create mode 100644 Documentation/git-index-helper.txt create mode 100644 index-helper.c create mode 100644 shm.c create mode 100644 shm.h create mode 100755 t/t7900-index-helper.sh create mode 100644 watchman-support.c create mode 100644 watchman-support.h -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 02/16] read-cache: allow to keep mmap'd memory after reading
From: Nguyễn Thái Ngọc Duy Later, we will introduce git index-helper to share this memory with other git processes. Since the memory will be shared, it will never be unmapped (although the kernel may of course choose to page it out). Signed-off-by: Nguyễn Thái Ngọc Duy --- cache.h | 3 +++ read-cache.c | 13 - 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index b829410..4180e2b 100644 --- a/cache.h +++ b/cache.h @@ -333,11 +333,14 @@ struct index_state { struct split_index *split_index; struct cache_time timestamp; unsigned name_hash_initialized : 1, +keep_mmap : 1, initialized : 1; struct hashmap name_hash; struct hashmap dir_hash; unsigned char sha1[20]; struct untracked_cache *untracked; + void *mmap; + size_t mmap_size; }; extern struct index_state the_index; diff --git a/read-cache.c b/read-cache.c index 16cc487..7e387e9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1574,6 +1574,10 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) mmap = xmmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0); if (mmap == MAP_FAILED) die_errno("unable to map index file"); + if (istate->keep_mmap) { + istate->mmap = mmap; + istate->mmap_size = mmap_size; + } close(fd); hdr = mmap; @@ -1626,10 +1630,12 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) src_offset += 8; src_offset += extsize; } - munmap(mmap, mmap_size); + if (!istate->keep_mmap) + munmap(mmap, mmap_size); return istate->cache_nr; unmap: + istate->mmap = NULL; munmap(mmap, mmap_size); die("index file corrupt"); } @@ -1655,6 +1661,7 @@ int read_index_from(struct index_state *istate, const char *path) discard_index(split_index->base); else split_index->base = xcalloc(1, sizeof(*split_index->base)); + split_index->base->keep_mmap = istate->keep_mmap; ret = do_read_index(split_index->base, git_path("sharedindex.%s", sha1_to_hex(split_index->base_sha1)), 1); @@ -1698,6 +1705,10 @@ int discard_index(struct index_state *istate) free(istate->cache); istate->cache = NULL; istate->cache_alloc = 0; + if (istate->keep_mmap && istate->mmap) { + munmap(istate->mmap, istate->mmap_size); + istate->mmap = NULL; + } discard_split_index(istate); free_untracked_cache(istate->untracked); istate->untracked = NULL; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 03/16] index-helper: new daemon for caching index and related stuff
From: Nguyễn Thái Ngọc Duy Instead of reading the index from disk and worrying about disk corruption, the index is cached in memory (memory bit-flips happen too, but hopefully less often). The result is faster read. Read time is reduced by 70%. The biggest gain is not having to verify the trailing SHA-1, which takes lots of time especially on large index files. But this also opens doors for further optimiztions: - we could create an in-memory format that's essentially the memory dump of the index to eliminate most of parsing/allocation overhead. The mmap'd memory can be used straight away. Experiment [1] shows we could reduce read time by 88%. - we could cache non-index info such as name hash The shared memory's name folows the template "git--" where is the trailing SHA-1 of the index file. is "index" for cached index files (and may be "name-hash" for name-hash cache). If such shared memory exists, it contains the same index content as on disk. The content is already validated by the daemon and git won't validate it again (except comparing the trailing SHA-1s). We keep this daemon's logic as thin as possible. The "brain" stays in git. So the daemon can read and validate stuff, but that's all it's allowed to do. It does not add/create new information. It doesn't even accept direct updates from git. Git can poke the daemon via unix domain sockets to tell it to refresh the index cache, or to keep it alive some more minutes. It can't give any real index data directly to the daemon. Real data goes to disk first, then the daemon reads and verifies it from there. Poking only happens for $GIT_DIR/index, not temporary index files. $GIT_DIR/index-helper.path is a symlink to the socket for the daemon process. The daemon reads from the socket and executes commands. Named pipes were considered for portability reasons, but then commands that need replies from the daemon would have open their own pipes, since a named pipe should only have one reader. Unix domain sockets don't have this problem. index-helper requires POSIX realtime extension. POSIX shm interface however is abstracted away so that Windows support can be added later. On webkit.git with index format v2, duplicating 8 times to 1.4m entries and 200MB in size: (vanilla) 0.986986364 s: read_index_from .git/index (index-helper) 0.267850279 s: read_index_from .git/index Interestingly with index v4, we get less out of index-helper. It makes sense as v4 requires more processing after loading the index: (vanilla) 0.72249 s: read_index_from .git/index (index-helper) 0.302741500 s: read_index_from .git/index [1] http://thread.gmane.org/gmane.comp.version-control.git/247268/focus=248771 Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: David Turner --- .gitignore | 1 + Documentation/git-index-helper.txt | 46 ++ Makefile | 10 ++ cache.h| 2 + config.mak.uname | 1 + git-compat-util.h | 18 +++ index-helper.c | 284 + read-cache.c | 116 +-- shm.c | 67 + shm.h | 23 +++ t/t7900-index-helper.sh| 23 +++ 11 files changed, 582 insertions(+), 9 deletions(-) create mode 100644 Documentation/git-index-helper.txt create mode 100644 index-helper.c create mode 100644 shm.c create mode 100644 shm.h create mode 100755 t/t7900-index-helper.sh diff --git a/.gitignore b/.gitignore index 5087ce1..b92f122 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ /git-http-fetch /git-http-push /git-imap-send +/git-index-helper /git-index-pack /git-init /git-init-db diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt new file mode 100644 index 000..61605e9 --- /dev/null +++ b/Documentation/git-index-helper.txt @@ -0,0 +1,46 @@ +git-index-helper(1) +=== + +NAME + +git-index-helper - A simple cache daemon for speeding up index file access + +SYNOPSIS + +[verse] +'git index-helper' [options] + +DESCRIPTION +--- +Keep the index file in memory for faster access. This daemon is per +repository. + +OPTIONS +--- + +--exit-after=:: + Exit if the cached index is not accessed for `` + seconds. Specify 0 to wait forever. Default is 600. + +NOTES +- +$GIT_DIR/index-helper.path is a symlink to a Unix domain socket in +$TMPDIR that the daemon reads commands from. At least on Linux, shared +memory objects are available via /dev/shm with the name pattern +"git--". Normally the daemon will clean up shared +memory objects when it exits. But if it crashes, some objects could +remain there and they can be safely deleted with "rm" command.
[PATCH v3 01/16] read-cache.c: fix constness of verify_hdr()
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- read-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index d9fb78b..16cc487 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1345,7 +1345,7 @@ struct ondisk_cache_entry_extended { ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ ondisk_cache_entry_size(ce_namelen(ce))) -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(const struct cache_header *hdr, unsigned long size) { git_SHA_CTX c; unsigned char sha1[20]; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33
On Thu, 2016-03-31 at 18:37 -0700, Stefan Beller wrote: > On Wed, Mar 30, 2016 at 1:05 PM, David Turner < > dtur...@twopensource.com> wrote: > > On Wed, 2016-03-30 at 08:37 +0200, Michael Haggerty wrote: > > > On 03/29/2016 10:12 PM, David Turner wrote: > > > > On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote: > > > > > On 03/24/2016 07:47 AM, David Turner wrote: > > > > > > [...] > > > > > > I incorporated your changes into the lmdb backend. To make > > > > > > merging > > > > > > later more convenient, I rebased on top of pu -- I think > > > > > > this > > > > > > mainly > > > > > > depends on jk/check-repository-format, but I also included > > > > > > some > > > > > > fixes > > > > > > for a couple of tests that had been changed by other > > > > > > patches. > > > > > > > > > > I think rebasing changes on top of pu is counterproductive. I > > > > > believe > > > > > that Junio had extra work rebasing your earlier series onto a > > > > > merge > > > > > of > > > > > the minimum number of topics that it really depended on. > > > > > There is > > > > > no > > > > > way > > > > > that he could merge the branch in this form because it would > > > > > imply > > > > > merging all of pu. > > > > > > > > > > See the zeroth section of SubmittingPatches [1] for the > > > > > guidelines. > > > > > > > > I'm a bit confused because > > > > [PATCH 18/21] get_default_remote(): remove unneeded flag > > > > variable > > > > > > > > doesn't do anything on master -- it depends on some patch in > > > > pu. > > > > And > > > > we definitely want to pick up jk/check-repository-format (which > > > > doesn't > > > > include whatever 18/21 depends on). > > > > > > > > So what do you think our base should be? > > > > > > I think the preference is to base a patch series on the merge of > > > master > > > plus the minimum number of topics in pu (ideally, none) that are > > > "essential" prerequisites of the changes in the patch series. For > > > example, the version of this patch series that Junio has in his > > > tree > > > was > > > based on master + sb/submodule-parallel-update. > > > > > > Even if there are minor > > > conflicts with another in-flight topic, it is easier for Junio to > > > resolve the conflicts when merging the topics together than to > > > rebase > > > the patch series over and over as the other patch series evolves. > > > The > > > goal of this practice is of course to allow patch series to > > > evolve > > > independently of each other as much as possible. > > > > > > Of course if you have insights into nontrivial conflicts between > > > your > > > patch series and others, it would be helpful to discuss these in > > > your > > > cover letter. > > > > If I am reading this correctly, it looks like your series also has > > a > > few more sb submodule patches, e.g. sb/submodule-init, which is > > responsible for the code that 18/21 depends on. > > > > I think jk/check-repository-format is also good to get in first, > > because it changes the startup sequence a bit and it's a bit tricky > > to > > figure out what needs to change in dt/refs-backend-lmdb as a result > > of > > it. > > > > But I can't just merge jk/check-repository-format on top of > > 71defe0047 > > -- some function signatures have changed in the run-command stuff > > and > > it seems kind of annoying to fix up. > > > > So I propose instead that we just drop 18/21 for now, and use just > > jk/check-repository-format as the base. > > By 18/21 you mean > [PATCH 18/21] get_default_remote(): remove unneeded flag variable > in builtin/submodule--helper.c? > You could drop that and I'll pick it up in one of the submodule > series', > if that is more convenient for you. > Yes, thanks. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33
On Thu, 2016-03-31 at 18:14 +0200, Michael Haggerty wrote: > On 03/30/2016 10:05 PM, David Turner wrote: > > On Wed, 2016-03-30 at 08:37 +0200, Michael Haggerty wrote: > > > On 03/29/2016 10:12 PM, David Turner wrote: > > > > On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote: > > > > > On 03/24/2016 07:47 AM, David Turner wrote: > > > > > > [...] > > > > > > I incorporated your changes into the lmdb backend. To make > > > > > > merging > > > > > > later more convenient, I rebased on top of pu -- I think > > > > > > this > > > > > > mainly > > > > > > depends on jk/check-repository-format, but I also included > > > > > > some > > > > > > fixes > > > > > > for a couple of tests that had been changed by other > > > > > > patches. > > > > > > > > > > I think rebasing changes on top of pu is counterproductive. I > > > > > believe > > > > > that Junio had extra work rebasing your earlier series onto a > > > > > merge > > > > > of > > > > > the minimum number of topics that it really depended on. > > > > > There is > > > > > no > > > > > way > > > > > that he could merge the branch in this form because it would > > > > > imply > > > > > merging all of pu. > > > > > > > > > > See the zeroth section of SubmittingPatches [1] for the > > > > > guidelines. > > > > > > > > I'm a bit confused because > > > > [PATCH 18/21] get_default_remote(): remove unneeded flag > > > > variable > > > > > > > > doesn't do anything on master -- it depends on some patch in > > > > pu. > > > > And > > > > we definitely want to pick up jk/check-repository-format (which > > > > doesn't > > > > include whatever 18/21 depends on). > > > > > > > > So what do you think our base should be? > > > > > > I think the preference is to base a patch series on the merge of > > > master > > > plus the minimum number of topics in pu (ideally, none) that are > > > "essential" prerequisites of the changes in the patch series. For > > > example, the version of this patch series that Junio has in his > > > tree > > > was > > > based on master + sb/submodule-parallel-update. > > > > > > Even if there are minor > > > conflicts with another in-flight topic, it is easier for Junio to > > > resolve the conflicts when merging the topics together than to > > > rebase > > > the patch series over and over as the other patch series evolves. > > > The > > > goal of this practice is of course to allow patch series to > > > evolve > > > independently of each other as much as possible. > > > > > > Of course if you have insights into nontrivial conflicts between > > > your > > > patch series and others, it would be helpful to discuss these in > > > your > > > cover letter. > > > > If I am reading this correctly, it looks like your series also has > > a > > few more sb submodule patches, e.g. sb/submodule-init, which is > > responsible for the code that 18/21 depends on. > > > > I think jk/check-repository-format is also good to get in first, > > because it changes the startup sequence a bit and it's a bit tricky > > to > > figure out what needs to change in dt/refs-backend-lmdb as a result > > of > > it. > > > > But I can't just merge jk/check-repository-format on top of > > 71defe0047 > > -- some function signatures have changed in the run-command stuff > > and > > it seems kind of annoying to fix up. > > > > So I propose instead that we just drop 18/21 for now, and use just > > jk/check-repository-format as the base. > > > > Does this seem reasonable to you? > > Yes, that's fine. Patch 18/21 is just a random cleanup that nothing > else > depends on. Will you do the rebasing? If so, please let me know where > I > can fetch the result from. Rebased: https://github.com/dturner-tw/git.git on branch dturner/pluggable-backends -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: RFC: New reference iteration paradigm
On Thu, 2016-03-31 at 18:13 +0200, Michael Haggerty wrote: > Currently the way to iterate over references is via a family of > for_each_ref()-style functions. You pass some arguments plus a > callback > function and cb_data to the function, and your callback is called for > each reference that is selected. > > This works, but it has two big disadvantages: > > 1. It is cumbersome for callers. The caller's logic has to be split >into two functions, the one that calls for_each_ref() and the >callback function. Any data that have to be passed between the >functions has to be stuck in a separate data structure. > > 2. This interface is not composable. For example, you can't write a >single function that iterates over references from two sources, >as is interesting for combining packed plus loose references, >shared plus worktree-specific references, symbolic plus normal >references, etc. The current code for combining packed and loose >references needs to walk the two reference trees in lockstep, >using intimate knowledge about how references are stored [1,2,3]. > > I'm currently working on a patch series to transition the reference > code > from using for_each_ref()-style iteration to using proper iterators. > > The main point of this change is to change the base iteration > paradigm > that has to be supported by reference backends. So instead of > > > int do_for_each_ref_fn(const char *submodule, const char *base, > >each_ref_fn fn, int trim, int flags, > >void *cb_data); > > the backend now has to implement > > > struct ref_iterator *ref_iterator_begin_fn(const char *submodule, > >const char *prefix, > >unsigned int flags); > > The ref_iterator itself has to implement two main methods: > > > int iterator_advance_fn(struct ref_iterator *ref_iterator); > > void iterator_free_fn(struct ref_iterator *ref_iterator); > > A loop over references now looks something like > > > struct ref_iterator *iter = each_ref_in_iterator("refs/tags/"); > > while (ref_iterator_advance(iter)) { > > /* code using iter->refname, iter->oid, iter->flags */ > > } > > I built quite a bit of ref_iterator infrastructure to make it easy to > plug things together quite flexibly. For example, there is an > overlay_ref_iterator which takes two other iterators (e.g., one for > packed and one for loose refs) and overlays them, presenting the > result > via the same iterator interface. But the same overlay_ref_iterator > can > be used to overlay any two other iterators on top of each other. I haven't looked at the code yet, but this makes sense to me. In general, the major reason to supply a callback style of API is when iteration is more complicated than whatever will be consuming the data (I can't remember where I heard this argument, but I found it pretty convincing). It seems like this is increasingly not the case, so we should move towards the iterator style. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33
On Wed, 2016-03-30 at 08:37 +0200, Michael Haggerty wrote: > On 03/29/2016 10:12 PM, David Turner wrote: > > On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote: > > > On 03/24/2016 07:47 AM, David Turner wrote: > > > > [...] > > > > I incorporated your changes into the lmdb backend. To make > > > > merging > > > > later more convenient, I rebased on top of pu -- I think this > > > > mainly > > > > depends on jk/check-repository-format, but I also included some > > > > fixes > > > > for a couple of tests that had been changed by other patches. > > > > > > I think rebasing changes on top of pu is counterproductive. I > > > believe > > > that Junio had extra work rebasing your earlier series onto a > > > merge > > > of > > > the minimum number of topics that it really depended on. There is > > > no > > > way > > > that he could merge the branch in this form because it would > > > imply > > > merging all of pu. > > > > > > See the zeroth section of SubmittingPatches [1] for the > > > guidelines. > > > > I'm a bit confused because > > [PATCH 18/21] get_default_remote(): remove unneeded flag variable > > > > doesn't do anything on master -- it depends on some patch in pu. > > And > > we definitely want to pick up jk/check-repository-format (which > > doesn't > > include whatever 18/21 depends on). > > > > So what do you think our base should be? > > I think the preference is to base a patch series on the merge of > master > plus the minimum number of topics in pu (ideally, none) that are > "essential" prerequisites of the changes in the patch series. For > example, the version of this patch series that Junio has in his tree > was > based on master + sb/submodule-parallel-update. > > Even if there are minor > conflicts with another in-flight topic, it is easier for Junio to > resolve the conflicts when merging the topics together than to rebase > the patch series over and over as the other patch series evolves. The > goal of this practice is of course to allow patch series to evolve > independently of each other as much as possible. > > Of course if you have insights into nontrivial conflicts between your > patch series and others, it would be helpful to discuss these in your > cover letter. If I am reading this correctly, it looks like your series also has a few more sb submodule patches, e.g. sb/submodule-init, which is responsible for the code that 18/21 depends on. I think jk/check-repository-format is also good to get in first, because it changes the startup sequence a bit and it's a bit tricky to figure out what needs to change in dt/refs-backend-lmdb as a result of it. But I can't just merge jk/check-repository-format on top of 71defe0047 -- some function signatures have changed in the run-command stuff and it seems kind of annoying to fix up. So I propose instead that we just drop 18/21 for now, and use just jk/check-repository-format as the base. Does this seem reasonable to you? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 03/17] index-helper: new daemon for caching index and related stuff
On Tue, 2016-03-29 at 09:31 +0700, Duy Nguyen wrote: > On Sat, Mar 19, 2016 at 8:04 AM, David Turner < > dtur...@twopensource.com> wrote: > > From: Nguyễn Thái Ngọc Duy > > > > Instead of reading the index from disk and worrying about disk > > corruption, the index is cached in memory (memory bit-flips happen > > too, but hopefully less often). The result is faster read. Read > > time > > is reduced by 70%. > > > > The biggest gain is not having to verify the trailing SHA-1, which > > takes lots of time especially on large index files. But this also > > opens doors for further optimiztions: > > > > - we could create an in-memory format that's essentially the > > memory > >dump of the index to eliminate most of parsing/allocation > >overhead. The mmap'd memory can be used straight away. > > Experiment > >[1] shows we could reduce read time by 88%. > > This reference [1] is missing (even in my old version). I believe > it's > http://thread.gmane.org/gmane.comp.version-control.git/247268/focus=2 > 48771, > comparing 256.442ms in that mail with v4 number, 2245.113ms in 0/8 > mail from the same thread. > > > Git can poke the daemon via named pipes to tell it to refresh the > > index cache, or to keep it alive some more minutes. It can't give > > any > > real index data directly to the daemon. Real data goes to disk > > first, > > then the daemon reads and verifies it from there. Poking only > > happens > > for $GIT_DIR/index, not temporary index files. > > I think we should go with unix socket on *nix platform instead of > named pipe. UNIX named pipe only allows one communication channel at > a > time. Windows named pipe is different and allows multiple clients, > which is the same as unix socket. > > > > $GIT_DIR/index-helper.pipe is the named pipe for daemon process. > > The > > daemon reads from the pipe and executes commands. Commands that > > need > > replies from the daemon will have to open their own pipe, since a > > named pipe should only have one reader. Unix domain sockets don't > > have this problem, but are less portable. > > Hm..NO_UNIX_SOCKETS is only set for Windows in config.mak.uname and > Windows will need to be specially tailored anyway, I think unix > socket > would be more elegant. One annoyance with unix sockets is that they must have short paths (UNIX_PATH_MAX -- about a hundred characters). This basically means they should be in $TMPDIR. I guess we could go back to pid files in $GIT_DIR, and then have a socket named after the pid. There's also some security issues, but it actually looks like there's a simple enough workaround for them. I'll change this, but it might take a bit as I'm busy with other things this week. > > +static void share_index(struct index_state *istate, struct shm > > *is) > > +{ > > + void *new_mmap; > > + if (istate->mmap_size <= 20 || > > + hashcmp(istate->sha1, > > + (unsigned char *)istate->mmap + istate > > ->mmap_size - 20) || > > + !hashcmp(istate->sha1, is->sha1) || > > + git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, istate > > ->mmap_size, > > + &new_mmap, PROT_READ | PROT_WRITE, > > MAP_SHARED, > > + "git-index-%s", sha1_to_hex(istate->sha1)) > > < 0) > > + return; > > + > > + release_index_shm(is); > > + is->size = istate->mmap_size; > > + is->shm = new_mmap; > > + hashcpy(is->sha1, istate->sha1); > > + memcpy(new_mmap, istate->mmap, istate->mmap_size - 20); > > + > > + /* > > +* The trailing hash must be written last after everything > > is > > +* written. It's the indication that the shared memory is > > now > > +* ready. > > +*/ > > + hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, > > is->sha1); > > You commented here [1] a long time ago about memory barrier. I'm not > entirely sure if compilers dare to reorder function calls, but when > hashcpy is inlined and memcpy is builtin, I suppose that's > possible... > > [1] http://article.gmane.org/gmane.comp.version-control.git/280729 Oh, right. Will fix. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/19] index-helper, watchman
On Tue, 2016-03-29 at 19:09 +0200, Torsten Bögershausen wrote: > On 2016-03-09 19.36, David Turner wrote: > > This is a rebase and extension of Duy's work on git index-helper > > and > > watchman support. > > > Somewhere we need to tweak something: > t7900 do all fail under Mac OS, because the index-helper is not > build. > > The best would be to have a precondition when running the tests ? > > t7900-index-helper.sh not ok 1 - index-helper smoke test > t7900-index-helper.sh not ok 2 - index-helper creates usable pipe > file and can > be killed > t7900-index-helper.sh not ok 3 - index-helper will not start if > already running > t7900-index-helper.sh not ok 4 - index-helper is quiet with - > -autorun > t7900-index-helper.sh not ok 5 - index-helper autorun works > > > The other thing is to enable SHM on other platforms, but first things > first. Will fix, thanks. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 08/17] read-cache: invalidate untracked cache data when reading WAMA
On Tue, 2016-03-29 at 09:50 +0700, Duy Nguyen wrote: > On Sat, Mar 19, 2016 at 8:04 AM, David Turner < > dtur...@twopensource.com> wrote: > > @@ -1407,10 +1472,24 @@ static int read_watchman_ext(struct > > index_state *istate, const void *data, > > ewah_each_bit(bitmap, mark_no_watchman, istate); > > ewah_free(bitmap); > > > > - /* > > -* TODO: update the untracked cache from the untracked data > > in this > > -* extension. > > -*/ > > + if (istate->untracked && istate->untracked->root) { > > + int i; > > + const char *untracked; > > + > > + untracked = data + len + 8 + bitmap_size; > > + for (i = 0; i < untracked_nr; ++i) { > > + int len = strlen(untracked); > > + string_list_append(&istate->untracked > > ->invalid_untracked, > > + untracked); > > + untracked += len + 1; > > + } > > + > > + for_each_string_list(&istate->untracked > > ->invalid_untracked, > > +mark_untracked_invalid, istate > > ->untracked); > > I think it's a bit early to invalidate untracked cache here. We can > do > that in refresh_by_watchman() in 10/17, where ce_mark_uptodate() to > prevent lstat() is also done. Will move/squash -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33
On Sun, 2016-03-27 at 07:22 +0200, Michael Haggerty wrote: > On 03/24/2016 07:47 AM, David Turner wrote: > > [...] > > I incorporated your changes into the lmdb backend. To make merging > > later more convenient, I rebased on top of pu -- I think this > > mainly > > depends on jk/check-repository-format, but I also included some > > fixes > > for a couple of tests that had been changed by other patches. > > I think rebasing changes on top of pu is counterproductive. I believe > that Junio had extra work rebasing your earlier series onto a merge > of > the minimum number of topics that it really depended on. There is no > way > that he could merge the branch in this form because it would imply > merging all of pu. > > See the zeroth section of SubmittingPatches [1] for the guidelines. I'm a bit confused because [PATCH 18/21] get_default_remote(): remove unneeded flag variable doesn't do anything on master -- it depends on some patch in pu. And we definitely want to pick up jk/check-repository-format (which doesn't include whatever 18/21 depends on). So what do you think our base should be? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 1/2] refs: add a new function set_worktree_head_symref
On Mon, 2016-03-28 at 10:48 -0700, Junio C Hamano wrote: > Kazuki Yamaguchi writes: > > > Add a new function set_worktree_head_symref, to update HEAD symref > > for > > the specified worktree. > > > > To update HEAD of a linked working tree, > > create_symref("worktrees/$work_tree/HEAD", "refs/heads/$branch", > > msg) > > could be used. However when it comes to updating HEAD of the main > > working tree, it is unusable because it uses $GIT_DIR for > > worktree-specific symrefs (HEAD). > > > > The new function takes git_dir (real directory) as an argument, and > > updates HEAD of the working tree. This function will be used when > > renaming a branch. > > > > Signed-off-by: Kazuki Yamaguchi > > --- > > I suspect that this helper function should be in the common layer > (refs.c) to be redirected to specific backend(s). > > David & Michael, what do you think? HEAD is a per-worktree ref, so it's always handled by the files backend. So this looks fine to me. > > +/* > > + * Update HEAD of the specified gitdir. > > + * Similar to create_symref("relative-git-dir/HEAD", target, > > NULL), but this is > > + * able to update the main working tree's HEAD wherever $GIT_DIR > > points to. nit: "able to update the main working tree's HEAD regardless of where GIT_DIR points to" would be clearer. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 09/33] refs: reduce the visibility of do_for_each_ref()
On Thu, 2016-03-24 at 08:07 +0100, Michael Haggerty wrote: > > +/* > > + * The common backend for the for_each_*ref* functions > > + */ > > +static int do_for_each_ref(const char *submodule, const char > > *base, > > + each_ref_fn fn, int trim, int flags, > > + void *cb_data) > > The two lines above are indented incorrectly. Fixed, thanks. > > - > > -int do_for_each_ref(const char *submodule, const char *base, > > - each_ref_fn fn, int trim, int flags, > > - void *cb_data) > > -{ > > - return the_refs_backend->do_for_each_ref(submodule, base, > > fn, trim, > > -flags, cb_data); > > -} > > Nit: in the previous patch, please put the function where you want it > so > that you don't have to move it in this patch. > > > [...] > > Michael Ok. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 14/33] refs: add methods to init refs db
On Thu, 2016-03-24 at 08:28 +0100, Michael Haggerty wrote: > > if (shared_repository) { > > adjust_shared_perm(get_git_dir()); > > - adjust_shared_perm(git_path_buf(&buf, "refs")); > > Given that this function is creating the "refs" directory, it seems > like > adjust_shared_perm() should be called for it here, too (rather than > in > the backend-specific code). Good point. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 09/17] Add watchman support to reduce index refresh cost
On Thu, 2016-03-24 at 09:47 -0400, Jeff Hostetler wrote: > I'm seeing wm->name have value ".git" rather than ".git/" on Linux. > > > On 03/18/2016 09:04 PM, David Turner wrote: > > + if (!strncmp(wm->name, ".git/", 5) || > > + strstr(wm->name, "/.git/")) > > + continue; > Thanks. I don't think I considered the case of .git itself being modified, although clearly files are occasionally created/deleted in there. I'm going to fix this by ignoring all directories, since we'll capture untracked-cache changes via files in those dirs. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/21] replacement for dt/refs-backend-lmdb v7 patch 04/33
On Wed, 2016-03-23 at 11:04 +0100, Michael Haggerty wrote: > Patch 04/33 in David Turner's refs-backend-lmdb series v7 [1] did way > too much in a single patch, and in fact got a few minor things wrong. > Instead of that patch, I suggest this patch series, which > ... LGTM. I think I would squash these patches: fsck_head_link(): remove unneeded flag variable cmd_merge(): remove unneeded flag variable get_default_remote(): remove unneeded flag variable checkout_paths(): remove unneeded flag variable But that's up to you. I incorporated your changes into the lmdb backend. To make merging later more convenient, I rebased on top of pu -- I think this mainly depends on jk/check-repository-format, but I also included some fixes for a couple of tests that had been changed by other patches. The current version can be found here: https://github.com/dturner-tw/git/tree/dturner/pluggable-backends I won't resend the full patchset to the list until I hear back on the rest of the review. It seems like maybe we should now split this into two patchsets: everything up to and including "refs: move resolve_ref_unsafe into common code" does not depend on the backend structure and could go in earlier. If you agree, we could send that first series and get it in, hopefully reducing later merge conflicts. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] read-cache: increase write buffer size
On Sat, 2016-03-19 at 18:18 +0700, Duy Nguyen wrote: > On Sat, Mar 19, 2016 at 8:19 AM, David Turner < > dtur...@twopensource.com> wrote: > > Each write() has syscall overhead, and writing a large index > > entails > > many such calls. A larger write buffer reduces the overhead, > > leading to increased performance. > > > > On my repo, which has an index size of 30m, this saves about 10ms > > of > > time writing the index. > > I wonder if split-index does not work as intended. Because if it > does, > you should rarely need to write that such big index files (or is this > 30mb the small part and the base index is even bigger?). > > But I agree with Torsten that we should make this configurable, > preferably via config file, if not we can still move this define back > in Makefile, overridable using config.mak Oh, right, split index. Yeah, I should just turn that on. Nevermind. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 18/19] index-helper: autorun
On Thu, 2016-03-17 at 15:43 +0100, Johannes Schindelin wrote: > Hi Duy, > > On Thu, 17 Mar 2016, Duy Nguyen wrote: > > > On Thu, Mar 17, 2016 at 1:27 AM, Johannes Schindelin > > wrote: > > > I am much more concerned about concurrent accesses and the > > > communication > > > between the Git processes and the index-helper. Writing to the > > > .pid file > > > sounds very fragile to me, in particular when multiple processes > > > can poke > > > the index-helper in succession and some readers are unaware that > > > the index > > > is being refreshed. > > > > It's not that bad. > > Well, the way I read the code it is possible that: > > 1. Git process 1 starts, reading the index > 2. Git process 2 starts, poking the index-helper > 3. The index-helper updates the .pid file (why not set a bit in the > shared >memory?) with a prefix "W" > 4. Git process 2 reads the .pid file and waits for the "W" to go away >(what if index-helper is not fast enough to write the "W"?) > 5. Git process 1 access the index, happily oblivious that it is being >updated and the data is in an inconsistent state That's not quite how I understand it. It's more like MVCC. Writes to the index go to a new index file. Index files are identified by their SHA. Reads from the index go into a new shm, identified by SHA. The "W" is set only once -- it just means "this index helper knows how to talk to watchman". It's a compile-time option. (I'm going to change this anyway when I switch to named pipes). The watchman data is shared independently; if it's not ready in time (whatever that means -- it's 1s in the current code), then read-cache should fall back to brute-force checking every file. > > We should have protection in place to deal with this and fall back > > to > > reading directly from file when things get suspicious. > > I really want to prevent that. I know of use cases where the index > weighs > 300MB, and falling back to reading it directly *really* hurts. > > But I agree that sending UNIX signals (or PostMessage) is not > > really > > good communication. > > Yeah, I really would like two-way communication instead. Named pipes? > They'd have the advantage that you could use the full path to the > index as > identifier. > > The way I read the current code, we would actually create a different > shared memory every time the index changes because its checksum is > part of > the shared memory's "path"... > > Ciao, > Dscho -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 19/19] hack: watchman/untracked cache mashup
On Thu, 2016-03-17 at 20:06 +0700, Duy Nguyen wrote: > On Thu, Mar 17, 2016 at 7:56 AM, David Turner < > dtur...@twopensource.com> wrote: > > > So if we detect an updated file that's not in the index, we are > > > prepared to invalidate that path, correct? We may invalidate more > > > than > > > necessary if that's true. Imagine a.o is already ignored. If it's > > > rebuilt, we should not need to update untracked cache. > > > > Yes, that's true. But it would be true with the mtime system too. > > This > > is no worse, even if it's no better. In-tree builds are a hard > > case to > > support, and I'm totally OK with a system that encourages out-of > > -tree > > builds. > > > > We could check if it's ignored, but then if someone changes their > > gitignore, we could be wrong. > > > > Or we could suggest that people make their watchmanignore match > > their > > gitignore. > > So your purpose is to reduce stat() on those "quiet" directories? Yes. Twitter's repo is perhaps somewhat unusual in that it has a very "bushy" directory structure -- tens of thousands of directories. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 18/19] index-helper: autorun
On Fri, 2016-03-18 at 14:44 +0700, Duy Nguyen wrote: > > So yeah, this is the challenge: to make Git work at real-world > > scale > > (didn't we hear a lot about this at the latest Git Merge?) > > I'm all for making Junio cry by using Git for what it is (or was) not > intended for, but this seems too much. A repo about 500k files or > less, I think I can deal with, not those in million range. Sad news: Facebook's repo was already getting towards that size three years ago: http://comments.gmane.org/gmane.comp.version-control.git/189776 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] read-cache: increase write buffer size
Each write() has syscall overhead, and writing a large index entails many such calls. A larger write buffer reduces the overhead, leading to increased performance. On my repo, which has an index size of 30m, this saves about 10ms of time writing the index. Signed-off-by: David Turner --- read-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index d9fb78b..09ebe08 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1714,7 +1714,7 @@ int unmerged_index(const struct index_state *istate) return 0; } -#define WRITE_BUFFER_SIZE 8192 +#define WRITE_BUFFER_SIZE 131072 static unsigned char write_buffer[WRITE_BUFFER_SIZE]; static unsigned long write_buffer_len; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 19/19] hack: watchman/untracked cache mashup
On Tue, 2016-03-15 at 19:31 +0700, Duy Nguyen wrote: > On Thu, Mar 10, 2016 at 1:36 AM, David Turner < > dtur...@twopensource.com> wrote: > > static struct watchman_query *make_query(const char *last_update) > > @@ -60,8 +61,24 @@ static void update_index(struct index_state > > *istate, > > continue; > > > > pos = index_name_pos(istate, wm->name, strlen(wm > > ->name)); > > - if (pos < 0) > > + if (pos < 0) { > > + if (istate->untracked) { > > + char *name = xstrdup(wm->name); > > + char *dname = dirname(name); > > + > > + /* > > +* dirname() returns '.' for the > > root, > > +* but we call it ''. > > +*/ > > + if (dname[0] == '.' && dname[1] == > > 0) > > + string_list_append(&istate > > ->untracked->invalid_untracked, ""); > > + else > > + string_list_append(&istate > > ->untracked->invalid_untracked, > > + dname); > > + free(name); > > + } > > continue; > > + } > > So if we detect an updated file that's not in the index, we are > prepared to invalidate that path, correct? We may invalidate more > than > necessary if that's true. Imagine a.o is already ignored. If it's > rebuilt, we should not need to update untracked cache. Yes, that's true. But it would be true with the mtime system too. This is no worse, even if it's no better. In-tree builds are a hard case to support, and I'm totally OK with a system that encourages out-of-tree builds. We could check if it's ignored, but then if someone changes their gitignore, we could be wrong. Or we could suggest that people make their watchmanignore match their gitignore. > What I had in mind (and argued with watchman devs a bit [1]) was > maintain the file list of each clock and compare the file list of > different clocks to figure out what files are added or deleted. Then > we can generate invalidation list more accurately. We need to keep at > least one file list corresponds to the clock saved in the index. > When > we get the refresh request, we get a new file list (with new clock), > do a diff then save the invalidation list as an extension. Once we > notice that new clock is written back in index, we can discard older > file lists. In theory we should not need to keep too many file lists, > so even if one list is big, it should not be a big problem. > > I have a note with me about race conditions with this approach, but I > haven't remembered exactly why or how yet.. My recent thoughts about > it though, are race conditions when you do "git status" is probably > tolerable. After all if you're doing some I/O when you do git-status, > you're guaranteed to miss some updates. > > [1] https://github.com/facebook/watchman/issues/65 I think it would be possible to just check the UNTR extension and only add a dir to it if that dir doesn't already contain (untracked) the file that's being modified. Or is that also racy? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 18/19] index-helper: autorun
On Tue, 2016-03-15 at 15:26 +0100, Johannes Schindelin wrote: > Hi Duy, > > On Tue, 15 Mar 2016, Duy Nguyen wrote: > > > On Thu, Mar 10, 2016 at 1:36 AM, David Turner < > > dtur...@twopensource.com> wrote: > > > Introduce a new config option, indexhelper.autorun, to > > > automatically > > > run git index-helper before starting up a builtin git command. > > > This > > > enables users to keep index-helper running without manual > > > intervention. > > > > This could be a problem on Windows because "index-helper --detach" > > does not work there. I have no idea how "daemons" are managed on > > Windows and not sure if our design is still good when such a > > "daemon" > > is added on Windows. So I'm pulling Johannes in for his opinions. > > > > Background for Johannes. We're adding "git index-helper" daemon > > (one > > per repo) to cache the index in memory to speed up index load time > > (and in future probably name-hash too, I think it's also more often > > used on Windows because of case-insensitive fs). It also enables > > watchman (on Windows) for faster refresh. This patch allows to > > start > > the daemon automatically if it's not running. But I don't know it > > will > > work ok on Windows. > > > > Assuming that "index-helper" service has to be installed and > > started > > from system, there can only be one service running right? This > > clashes > > with the per-repo daemon design... I think it can stilf work, if > > the > > main service just spawns new process, one for each repo. But again > > I'm > > not sure. > > If we want to run the process as a Windows service, you are correct, > there > really can only be one. Worse: it runs with admin privileges. > > But why not just keep it running as a detached process? We can run > those > on Windows, and if we're opening a named pipe whose name reveals the > one-to-one mapping with the index in question, I think we are fine > (read: > we can detect whether the process is running already). > > We can even tell those processes to have a timeout, or to react to > other > system events. > > Please note that I am *very* interested in this feature (speeding up > index > operations). I don't understand what a "detached process" is on Windows (I have never done any real Windows programming). Does that mean "call daemonize() and it'll take care of it?" Or something else? Or should I just not worry about it and let you take care of it? Also, I'll figure out how to switch to named pipes. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 12/17] unpack-trees: preserve index extensions
Make git checkout (and other unpack_tree operations) preserve the untracked cache and watchman status. This is valuable for two reasons: 1. Often, an unpack_tree operation will not touch large parts of the working tree, and thus most of the untracked cache will continue to be valid. 2. Even if the untracked cache were entirely invalidated by such an operation, the user has signaled their intention to have such a cache, and we don't want to throw it away. The same logic applies to the watchman state. Signed-off-by: David Turner --- cache.h | 1 + read-cache.c | 8 t/t7063-status-untracked-cache.sh | 23 +++ t/test-lib-functions.sh | 4 unpack-trees.c| 1 + 5 files changed, 37 insertions(+) diff --git a/cache.h b/cache.h index 9fa339a..4ae7dd0 100644 --- a/cache.h +++ b/cache.h @@ -572,6 +572,7 @@ extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate); #define REFRESH_DAEMON (1 << 2) extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags); extern int discard_index(struct index_state *); +extern void move_index_extensions(struct index_state *dst, struct index_state *src); extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); diff --git a/read-cache.c b/read-cache.c index 8e886d1..c141fec 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2742,3 +2742,11 @@ void stat_validity_update(struct stat_validity *sv, int fd) fill_stat_data(sv->sd, &st); } } + +void move_index_extensions(struct index_state *dst, struct index_state *src) +{ + dst->untracked = src->untracked; + src->untracked = NULL; + dst->last_update = src->last_update; + src->last_update = NULL; +} diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index a971884..a2c8535 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -646,4 +646,27 @@ test_expect_success 'test ident field is working' ' test_cmp ../expect ../err ' +test_expect_success 'untracked cache survives a checkout' ' + git commit --allow-empty -m empty && + test-dump-untracked-cache >../before && + test_when_finished "git checkout master" && + git checkout -b other_branch && + test-dump-untracked-cache >../after && + test_cmp ../before ../after && + test_commit test && + test-dump-untracked-cache >../before && + git checkout master && + test-dump-untracked-cache >../after && + test_cmp ../before ../after +' + + +test_expect_success 'untracked cache survives a commit' ' + test-dump-untracked-cache >../before && + git add done/two && + git commit -m commit && + test-dump-untracked-cache >../after && + test_cmp ../before ../after +' + test_done diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 8d99eb3..e974b5b 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -186,6 +186,10 @@ test_commit () { test_tick fi && git commit $signoff -m "$1" && + if [ "$(git config core.bare)" = false ] + then + git update-index --force-untracked-cache + fi git tag "${4:-$1}" } diff --git a/unpack-trees.c b/unpack-trees.c index 9f55cc2..fc90eb3 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1215,6 +1215,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options WRITE_TREE_SILENT | WRITE_TREE_REPAIR); } + move_index_extensions(&o->result, o->dst_index); discard_index(o->dst_index); *o->dst_index = o->result; } else { -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 15/17] index-helper: autorun mode
Soon, we'll want to automatically start index-helper, so we need a mode that silently exists if it can't start up (either because it's not in a git dir, or because another one is already running). Signed-off-by: David Turner --- index-helper.c | 29 +++-- t/t7900-index-helper.sh | 8 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/index-helper.c b/index-helper.c index 7362abb..ab96ded 100644 --- a/index-helper.c +++ b/index-helper.c @@ -368,8 +368,9 @@ done: int main(int argc, char **argv) { const char *prefix; - int idle_in_seconds = 600, detach = 0, kill = 0; + int idle_in_seconds = 600, detach = 0, kill = 0, autorun = 0; int fd; + int nongit; struct strbuf pipe_path = STRBUF_INIT; struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_seconds, @@ -378,6 +379,7 @@ int main(int argc, char **argv) "verify shared memory after creating"), OPT_BOOL(0, "detach", &detach, "detach the process"), OPT_BOOL(0, "kill", &kill, "request that existing index helper processes exit"), + OPT_BOOL(0, "autorun", &autorun, "this is an automatic run of git index-helper, so certain errors can be solved by silently exiting"), OPT_END() }; @@ -387,7 +389,14 @@ int main(int argc, char **argv) if (argc == 2 && !strcmp(argv[1], "-h")) usage_with_options(usage_text, options); - prefix = setup_git_directory(); + prefix = setup_git_directory_gently(&nongit); + if (nongit) { + if (autorun) + exit(0); + else + die("Not a git repository"); + } + if (parse_options(argc, (const char **)argv, prefix, options, usage_text, 0)) die(_("too many arguments")); @@ -402,10 +411,18 @@ int main(int argc, char **argv) /* check that no other copy is running */ fd = open(pipe_path.buf, O_RDONLY | O_NONBLOCK); - if (fd > 0) - die(_("Already running")); - if (errno != ENXIO && errno != ENOENT) - die_errno(_("Unexpected error checking pipe")); + if (fd > 0) { + if (autorun) + return 0; + else + die(_("Already running")); + } + if (errno != ENXIO && errno != ENOENT) { + if (autorun) + return 0; + else + die_errno(_("Unexpected error checking pipe")); + } atexit(cleanup); sigchain_push_common(cleanup_on_signal); diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index ce0cc27..6020fe4 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -33,4 +33,12 @@ test_expect_success 'index-helper will not start if already running' ' grep "Already running" err ' +test_expect_success 'index-helper is quiet with --autorun' ' + test_when_finished "git index-helper --kill" && + git index-helper --kill && + git index-helper --detach && + test -p .git/index-helper.pipe && + git index-helper --autorun +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 17/17] read-cache: config for waiting for index-helper
When we poke index-helper, and index-helper is using watchman, we want to wait for a response (showing that the watchman extension shm has been prepared). If it's not using watchman, we don't. So add a new config, core.waitforindexhelper, with sensible defaults, to configure this behavior. Signed-off-by: David Turner --- cache.h | 1 + config.c | 5 + environment.c | 5 + read-cache.c | 5 - 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index 4ae7dd0..f8b8dbf 100644 --- a/cache.h +++ b/cache.h @@ -692,6 +692,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; extern int core_watchman_sync_timeout; +extern int wait_for_index_helper; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; diff --git a/config.c b/config.c index e6dc141..5f1b8bd 100644 --- a/config.c +++ b/config.c @@ -887,6 +887,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.waitforindexhelper")) { + wait_for_index_helper = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.createobject")) { if (!strcmp(value, "rename")) object_creation_mode = OBJECT_CREATION_USES_RENAMES; diff --git a/environment.c b/environment.c index 35e03c7..c7fb0a9 100644 --- a/environment.c +++ b/environment.c @@ -95,6 +95,11 @@ int core_preload_index = 1; int ignore_untracked_cache_config; int core_watchman_sync_timeout = 300; +#ifdef USE_WATCHMAN +int wait_for_index_helper = 1; +#else +int wait_for_index_helper = 0; +#endif /* This is set by setup_git_dir_gently() and/or git_default_config() */ diff --git a/read-cache.c b/read-cache.c index c141fec..7fd9b2c 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1821,7 +1821,10 @@ static int poke_daemon(struct index_state *istate, if (refresh_cache) { ret = write_in_full(fd, "refresh", 8) == 8; } else { - ret = poke_and_wait_for_reply(fd); + if (wait_for_index_helper) + ret = poke_and_wait_for_reply(fd); + else + ret = write_in_full(fd, "poke", 5) == 5; } close(fd); -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 14/17] index-helper: don't run if already running
Signed-off-by: David Turner --- index-helper.c | 7 +++ t/t7900-index-helper.sh | 9 + 2 files changed, 16 insertions(+) diff --git a/index-helper.c b/index-helper.c index ffa3c2a..7362abb 100644 --- a/index-helper.c +++ b/index-helper.c @@ -400,6 +400,13 @@ int main(int argc, char **argv) return 0; } + /* check that no other copy is running */ + fd = open(pipe_path.buf, O_RDONLY | O_NONBLOCK); + if (fd > 0) + die(_("Already running")); + if (errno != ENXIO && errno != ENOENT) + die_errno(_("Unexpected error checking pipe")); + atexit(cleanup); sigchain_push_common(cleanup_on_signal); diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index 14b5a6c..ce0cc27 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -24,4 +24,13 @@ test_expect_success 'index-helper creates usable pipe file and can be killed' ' test_path_is_missing .git/index-helper.pipe ' +test_expect_success 'index-helper will not start if already running' ' + test_when_finished "git index-helper --kill" && + git index-helper --detach && + test -p .git/index-helper.pipe && + test_must_fail git index-helper 2>err && + test -p .git/index-helper.pipe && + grep "Already running" err +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 06/17] index-helper: add --detach
From: Nguyễn Thái Ngọc Duy We detach after creating and opening the named pipe, because otherwise we might return control to the shell before index-helper is ready to accept commands. This might lead to flaky tests. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git-index-helper.txt | 3 +++ index-helper.c | 11 +-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index 7afc5c9..b858a8d 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -31,6 +31,9 @@ OPTIONS for reading an index, but because it will happen in the background, it's not noticable. `--strict` is enabled by default. +--detach:: + Detach from the shell. + NOTES - $GIT_DIR/index-helper.pipe is a named pipe that the daemon reads diff --git a/index-helper.c b/index-helper.c index 8d221e0..a854ed8 100644 --- a/index-helper.c +++ b/index-helper.c @@ -15,7 +15,7 @@ struct shm { static struct shm shm_index; static struct shm shm_base_index; -static int to_verify = 1; +static int daemonized, to_verify = 1; static void release_index_shm(struct shm *is) { @@ -34,6 +34,8 @@ static void cleanup_shm(void) static void cleanup(void) { + if (daemonized) + return; unlink(git_path("index-helper.pipe")); cleanup_shm(); } @@ -229,7 +231,7 @@ static const char * const usage_text[] = { int main(int argc, char **argv) { const char *prefix; - int idle_in_seconds = 600; + int idle_in_seconds = 600, detach = 0; int fd; struct strbuf pipe_path = STRBUF_INIT; struct option options[] = { @@ -237,6 +239,7 @@ int main(int argc, char **argv) N_("exit if not used after some seconds")), OPT_BOOL(0, "strict", &to_verify, "verify shared memory after creating"), + OPT_BOOL(0, "detach", &detach, "detach the process"), OPT_END() }; @@ -258,6 +261,10 @@ int main(int argc, char **argv) fd = setup_pipe(pipe_path.buf); if (fd < 0) die_errno("Could not set up index-helper.pipe"); + + if (detach && daemonize(&daemonized)) + die_errno("unable to detach"); + loop(fd, idle_in_seconds); return 0; } -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 05/17] daemonize(): set a flag before exiting the main process
From: Nguyễn Thái Ngọc Duy This allows signal handlers and atexit functions to realize this situation and not clean up. Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/gc.c | 2 +- cache.h | 2 +- daemon.c | 2 +- setup.c | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index c583aad..37180de 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -385,7 +385,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) * failure to daemonize is ok, we'll continue * in foreground */ - daemonized = !daemonize(); + daemonized = !daemonize(NULL); } } else add_repack_all_option(); diff --git a/cache.h b/cache.h index 5805962..d386722 100644 --- a/cache.h +++ b/cache.h @@ -530,7 +530,7 @@ extern int set_git_dir_init(const char *git_dir, const char *real_git_dir, int); extern int init_db(const char *template_dir, unsigned int flags); extern void sanitize_stdfds(void); -extern int daemonize(void); +extern int daemonize(int *); #define alloc_nr(x) (((x)+16)*3/2) diff --git a/daemon.c b/daemon.c index 8d45c33..a5cf954 100644 --- a/daemon.c +++ b/daemon.c @@ -1365,7 +1365,7 @@ int main(int argc, char **argv) return execute(); if (detach) { - if (daemonize()) + if (daemonize(NULL)) die("--detach not supported on this platform"); } else sanitize_stdfds(); diff --git a/setup.c b/setup.c index de1a2a7..9adf13f 100644 --- a/setup.c +++ b/setup.c @@ -1017,7 +1017,7 @@ void sanitize_stdfds(void) close(fd); } -int daemonize(void) +int daemonize(int *daemonized) { #ifdef NO_POSIX_GOODIES errno = ENOSYS; @@ -1029,6 +1029,8 @@ int daemonize(void) case -1: die_errno("fork failed"); default: + if (daemonized) + *daemonized = 1; exit(0); } if (setsid() == -1) -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 03/17] index-helper: new daemon for caching index and related stuff
From: Nguyễn Thái Ngọc Duy Instead of reading the index from disk and worrying about disk corruption, the index is cached in memory (memory bit-flips happen too, but hopefully less often). The result is faster read. Read time is reduced by 70%. The biggest gain is not having to verify the trailing SHA-1, which takes lots of time especially on large index files. But this also opens doors for further optimiztions: - we could create an in-memory format that's essentially the memory dump of the index to eliminate most of parsing/allocation overhead. The mmap'd memory can be used straight away. Experiment [1] shows we could reduce read time by 88%. - we could cache non-index info such as name hash The shared memory's name folows the template "git--" where is the trailing SHA-1 of the index file. is "index" for cached index files (and may be "name-hash" for name-hash cache). If such shared memory exists, it contains the same index content as on disk. The content is already validated by the daemon and git won't validate it again (except comparing the trailing SHA-1s). We keep this daemon's logic as thin as possible. The "brain" stays in git. So the daemon can read and validate stuff, but that's all it's allowed to do. It does not add/create new information. It doesn't even accept direct updates from git. Git can poke the daemon via named pipes to tell it to refresh the index cache, or to keep it alive some more minutes. It can't give any real index data directly to the daemon. Real data goes to disk first, then the daemon reads and verifies it from there. Poking only happens for $GIT_DIR/index, not temporary index files. $GIT_DIR/index-helper.pipe is the named pipe for daemon process. The daemon reads from the pipe and executes commands. Commands that need replies from the daemon will have to open their own pipe, since a named pipe should only have one reader. Unix domain sockets don't have this problem, but are less portable. index-helper requires POSIX realtime extension. POSIX shm interface however is abstracted away so that Windows support can be added later. On webkit.git with index format v2, duplicating 8 times to 1.4m entries and 200MB in size: (vanilla) 0.986986364 s: read_index_from .git/index (index-helper) 0.267850279 s: read_index_from .git/index Interestingly with index v4, we get less out of index-helper. It makes sense as v4 requires more processing after loading the index: (vanilla) 0.72249 s: read_index_from .git/index (index-helper) 0.302741500 s: read_index_from .git/index Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: David Turner --- .gitignore | 1 + Documentation/git-index-helper.txt | 46 Makefile | 9 ++ cache.h| 3 + config.mak.uname | 1 + git-compat-util.h | 1 + index-helper.c | 215 + read-cache.c | 99 +++-- shm.c | 67 shm.h | 23 t/t7900-index-helper.sh| 18 11 files changed, 474 insertions(+), 9 deletions(-) create mode 100644 Documentation/git-index-helper.txt create mode 100644 index-helper.c create mode 100644 shm.c create mode 100644 shm.h create mode 100755 t/t7900-index-helper.sh diff --git a/.gitignore b/.gitignore index 5087ce1..b92f122 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ /git-http-fetch /git-http-push /git-imap-send +/git-index-helper /git-index-pack /git-init /git-init-db diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt new file mode 100644 index 000..59a5abc --- /dev/null +++ b/Documentation/git-index-helper.txt @@ -0,0 +1,46 @@ +git-index-helper(1) +=== + +NAME + +git-index-helper - A simple cache daemon for speeding up index file access + +SYNOPSIS + +[verse] +'git index-helper' [options] + +DESCRIPTION +--- +Keep the index file in memory for faster access. This daemon is per +repository. + +OPTIONS +--- + +--exit-after=:: + Exit if the cached index is not accessed for `` + minutes. Specify 0 to wait forever. Default is 10. + +NOTES +- +$GIT_DIR/index-helper.pipe is a named pipe that the daemon reads +commands from. At least on Linux, shared memory objects are availble +via /dev/shm with the name pattern "git--". Normally +the daemon will clean up shared memory objects when it exits. But if +it crashes, some objects could remain there and they can be safely +deleted with "rm" command. The following commands are used to control +the daemon: + +"refresh":: + Reread the index. + +"poke": + Let the daemon know the index is t
[PATCH v2 00/15] index-helper, watchman
In this version: I removed the statistics since they're not really a core part of the series and we can always add them later. I added a little more testing. I merged the untracked cache/watchman mashup into the relevant patches. Hopefully I didn't screw it up -- I got into the rebase weeds here and took a while to get out. I wish I were better at rebasing. I moved the index-helper to a named-pipe based IPC mechanism instead of a signal-based one. I don't actualy know much about named pipes other than what I learned while writing this, so someone with clue should probably take a peek. The index-helper pid files was kind of a hassle -- there was a lot of mechanism involved in making sure that they were in fact pointing to a live index-helper process. With named pipes, we don't really need them: if a live index-helper is running, it can simply be instructed to die, and if it's not, then the pipe can be safely removed and a new index-helper started. Because there is no longer a pid file for index-helper, index-helper has no way to let git know that it should wait for a watchman update reply when poked. I worked around this by making the waiting configurable, and giving a sensible default. Note that in the no-watchman case, the wait will be short even if misconfigured, because the daemon can immediately send an "OK" message. I updated some commit messages, following Junio and Duy's suggestions. Johannes Schindelin said that he would work on the Windows side, so I've dropped the Windows bits (including one patch). Since I don't know anything about Windows, it's probably for the best that I'm not coding for it. I eliminated the ability to start up an index-helper when there was already one running because I don't see why you would want to do that, and because it would lead to multiple readers on the same named pipe, which has undefined behavior. Just kill the old one if you're done with it. David Turner (7): read-cache: invalidate untracked cache data when reading WAMA unpack-trees: preserve index extensions index-helper: kill mode index-helper: don't run if already running index-helper: autorun mode index-helper: optionally automatically run read-cache: config for waiting for index-helper Nguyễn Thái Ngọc Duy (10): read-cache.c: fix constness of verify_hdr() read-cache: allow to keep mmap'd memory after reading index-helper: new daemon for caching index and related stuff index-helper: add --strict daemonize(): set a flag before exiting the main process index-helper: add --detach read-cache: add watchman 'WAMA' extension Add watchman support to reduce index refresh cost index-helper: use watchman to avoid refreshing index with lstat() update-index: enable/disable watchman support .gitignore | 1 + Documentation/git-index-helper.txt | 64 ++ Makefile | 21 ++ builtin/gc.c | 2 +- builtin/update-index.c | 11 + cache.h| 18 +- config.c | 10 + config.mak.uname | 1 + configure.ac | 8 + daemon.c | 2 +- dir.c | 23 +- dir.h | 6 + environment.c | 8 + git-compat-util.h | 1 + git.c | 35 ++- index-helper.c | 439 +++ read-cache.c | 455 +++-- setup.c| 4 +- shm.c | 67 ++ shm.h | 23 ++ t/t7063-status-untracked-cache.sh | 23 ++ t/t7900-index-helper.sh| 60 + t/test-lib-functions.sh| 4 + unpack-trees.c | 1 + watchman-support.c | 134 +++ watchman-support.h | 7 + 26 files changed, 1406 insertions(+), 22 deletions(-) create mode 100644 Documentation/git-index-helper.txt create mode 100644 index-helper.c create mode 100644 shm.c create mode 100644 shm.h create mode 100755 t/t7900-index-helper.sh create mode 100644 watchman-support.c create mode 100644 watchman-support.h -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] merge: refuse to create too cool a merge by default
On Fri, 2016-03-18 at 13:21 -0700, Junio C Hamano wrote: > Many tests that are updated by this patch does the > pass-through manually by turning: Nit: Should be many tests ... do Also, I didn't notice a test that shows that "cool" merges without allow-unrelated-histories are forbidden. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 16/17] index-helper: optionally automatically run
Introduce a new config option, indexhelper.autorun, to automatically run git index-helper before starting up a builtin git command. This enables users to keep index-helper running without manual intervention. Signed-off-by: David Turner --- git.c | 35 ++- t/t7900-index-helper.sh | 16 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/git.c b/git.c index 6cc0c07..7d27782 100644 --- a/git.c +++ b/git.c @@ -521,6 +521,37 @@ static void strip_extension(const char **argv) #define strip_extension(cmd) #endif +static int want_auto_index_helper(void) +{ + int want = -1; + const char *value = NULL; + const char *key = "indexhelper.autorun"; + + if (git_config_key_is_valid(key) && + !git_config_get_value(key, &value)) { + int b = git_config_maybe_bool(key, value); + want = b >= 0 ? b : 0; + } + return want; +} + +static void maybe_run_index_helper(struct cmd_struct *cmd) +{ + const char *argv[] = {"git-index-helper", "--detach", "--autorun", NULL}; + + if (!(cmd->option & NEED_WORK_TREE)) + return; + + if (want_auto_index_helper() <= 0) + return; + + trace_argv_printf(argv, "trace: auto index-helper:"); + + if (run_command_v_opt(argv, + RUN_SILENT_EXEC_FAILURE | RUN_CLEAN_ON_EXIT)) + warning(_("You specified indexhelper.autorun, but running git-index-helper failed.")); +} + static void handle_builtin(int argc, const char **argv) { const char *cmd; @@ -536,8 +567,10 @@ static void handle_builtin(int argc, const char **argv) } builtin = get_builtin(cmd); - if (builtin) + if (builtin) { + maybe_run_index_helper(builtin); exit(run_builtin(builtin, argc, argv)); + } } static void execv_dashed_external(const char **argv) diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index 6020fe4..7fe66b7 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -41,4 +41,20 @@ test_expect_success 'index-helper is quiet with --autorun' ' git index-helper --autorun ' +test_expect_success 'index-helper autorun works' ' + rm -f .git/index-helper.pipe && + git status && + test_path_is_missing .git/index-helper.pipe && + test_config indexhelper.autorun true && + git status && + test -p .git/index-helper.pipe && + git status 2>err && + test -p .git/index-helper.pipe && + ! grep -q . err && + git index-helper --kill && + test_config indexhelper.autorun false && + git status && + test_path_is_missing .git/index-helper.pipe +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 13/17] index-helper: kill mode
Add a new command (and command-line arg) to allow index-helpers to exit cleanly. This is mainly useful for tests. Signed-off-by: David Turner --- index-helper.c | 39 +-- t/t7900-index-helper.sh | 9 + 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/index-helper.c b/index-helper.c index 445a0ac..ffa3c2a 100644 --- a/index-helper.c +++ b/index-helper.c @@ -286,6 +286,8 @@ static void loop(int fd, int idle_in_seconds) * alive, nothing to do. */ ; + } else if (!strcmp(command.buf, "die")) { + break; } else { warning("BUG: Bogus command %s", command.buf); } @@ -338,10 +340,35 @@ static const char * const usage_text[] = { NULL }; +static void request_kill(const char *pipe_path) +{ + int fd; + + fd = open(pipe_path, O_WRONLY | O_NONBLOCK); + if (fd < 0) { + warning("No existing pipe; can't send kill message to old process"); + goto done; + } + + write_in_full(fd, "die", 4); + close(fd); + +done: + /* +* The child will try to do this anyway, but we want to be +* ready to launch a new daemon immediately after this command +* returns. +*/ + + unlink(pipe_path); + return; +} + + int main(int argc, char **argv) { const char *prefix; - int idle_in_seconds = 600, detach = 0; + int idle_in_seconds = 600, detach = 0, kill = 0; int fd; struct strbuf pipe_path = STRBUF_INIT; struct option options[] = { @@ -350,6 +377,7 @@ int main(int argc, char **argv) OPT_BOOL(0, "strict", &to_verify, "verify shared memory after creating"), OPT_BOOL(0, "detach", &detach, "detach the process"), + OPT_BOOL(0, "kill", &kill, "request that existing index helper processes exit"), OPT_END() }; @@ -364,10 +392,17 @@ int main(int argc, char **argv) options, usage_text, 0)) die(_("too many arguments")); + strbuf_git_path(&pipe_path, "index-helper.pipe"); + if (kill) { + if (detach) + die(_("--kill doesn't want any other options")); + request_kill(pipe_path.buf); + return 0; + } + atexit(cleanup); sigchain_push_common(cleanup_on_signal); - strbuf_git_path(&pipe_path, "index-helper.pipe"); fd = setup_pipe(pipe_path.buf); if (fd < 0) die_errno("Could not set up index-helper.pipe"); diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index 7126037..14b5a6c 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -15,4 +15,13 @@ test_expect_success 'index-helper smoke test' ' test_path_is_missing .git/index-helper.pipe ' +test_expect_success 'index-helper creates usable pipe file and can be killed' ' + test_when_finished "git index-helper --kill" && + test_path_is_missing .git/index-helper.pipe && + git index-helper --detach && + test -p .git/index-helper.pipe && + git index-helper --kill && + test_path_is_missing .git/index-helper.pipe +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 10/17] index-helper: use watchman to avoid refreshing index with lstat()
From: Nguyễn Thái Ngọc Duy Watchman is hidden behind index-helper. Before git tries to read the index from shm, it notifies index-helper through the named pipe and waits for index-helper to prepare shm. index-helper then contacts watchman, updates 'WAMA' extension and put it in a separate shm and wakes git up with a write to git's named pipe. Git uses this extension to not lstat unchanged entries. Git only trusts the 'WAMA' extension when it's received from the separate shm, not from disk. Unmarked entries are "clean". Marked entries are dirty from watchman point of view. If it finds out some entries are 'watchman-dirty', but are really unchanged (e.g. the file was changed, then reverted back), then Git will clear the marking in 'WAMA' before writing it down. Hiding watchman behind index-helper means you need both daemons. You can't run watchman alone. Not so good. But on the other hand, 'git' binary is not linked to watchman/json libraries, which is good for packaging. Core git package will run fine without watchman-related packages. If they need watchman, they can install git-index-helper and dependencies. This also lets us trust anything in the untracked cache that we haven't marked invalid, saving those stat() calls. Another reason for tying watchman to index-helper is, when used with untracked cache, we need to keep track of $GIT_WORK_TREE file listing. That kind of list can be kept in index-helper. Signed-off-by: Nguyễn Thái Ngọc Duy gmail.com> Signed-off-by: David Turner --- Documentation/git-index-helper.txt | 6 ++ cache.h| 2 + dir.c | 12 index-helper.c | 128 ++--- read-cache.c | 141 +++-- 5 files changed, 275 insertions(+), 14 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index b858a8d..7a8042e 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -51,6 +51,12 @@ the daemon: Let the daemon know the index is to be read. It keeps the daemon alive longer, unless `--exit-after=0` is used. +"poke ": + Like "poke", but replies with "OK" by opening the named pipe + at and writing the string. If watchman is configured, + index-helper queries watchman, then prepares a shared memory + object with the watchman index extension before replying. + All commands and replies are terminated by a 0 byte. GIT diff --git a/cache.h b/cache.h index 95715fd..9fa339a 100644 --- a/cache.h +++ b/cache.h @@ -558,6 +558,7 @@ extern int daemonize(int *); /* Initialize and use the cache information */ struct lock_file; +extern int verify_index(const struct index_state *); extern int read_index(struct index_state *); extern int read_index_preload(struct index_state *, const struct pathspec *pathspec); extern int do_read_index(struct index_state *istate, const char *path, @@ -565,6 +566,7 @@ extern int do_read_index(struct index_state *istate, const char *path, extern int read_index_from(struct index_state *, const char *path); extern int is_index_unborn(struct index_state *); extern int read_index_unmerged(struct index_state *); +extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate); #define COMMIT_LOCK(1 << 0) #define CLOSE_LOCK (1 << 1) #define REFRESH_DAEMON (1 << 2) diff --git a/dir.c b/dir.c index 9b659e6..5058b29 100644 --- a/dir.c +++ b/dir.c @@ -1726,6 +1726,17 @@ static int valid_cached_dir(struct dir_struct *dir, if (!untracked) return 0; + if (dir->untracked->use_watchman) { + /* +* With watchman, we can trust the untracked cache's +* valid field. +*/ + if (untracked->valid) + goto skip_stat; + else + invalidate_directory(dir->untracked, untracked); + } + if (stat(path->len ? path->buf : ".", &st)) { invalidate_directory(dir->untracked, untracked); memset(&untracked->stat_data, 0, sizeof(untracked->stat_data)); @@ -1739,6 +1750,7 @@ static int valid_cached_dir(struct dir_struct *dir, return 0; } +skip_stat: if (untracked->check_only != !!check_only) { invalidate_directory(dir->untracked, untracked); return 0; diff --git a/index-helper.c b/index-helper.c index a854ed8..445a0ac 100644 --- a/index-helper.c +++ b/index-helper.c @@ -6,15 +6,18 @@ #include "split-index.h" #include "shm.h" #include "lockfile.h" +#include "watchman-support.h&q
[PATCH v2 11/17] update-index: enable/disable watchman support
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/update-index.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/builtin/update-index.c b/builtin/update-index.c index 1c94ca5..7c08e1c 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -914,6 +914,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) { int newfd, entries, has_errors = 0, nul_term_line = 0; enum uc_mode untracked_cache = UC_UNSPECIFIED; + int use_watchman = -1; int read_from_stdin = 0; int prefix_length = prefix ? strlen(prefix) : 0; int preferred_index_format = 0; @@ -1012,6 +1013,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) N_("test if the filesystem supports untracked cache"), UC_TEST), OPT_SET_INT(0, "force-untracked-cache", &untracked_cache, N_("enable untracked cache without testing the filesystem"), UC_FORCE), + OPT_BOOL(0, "watchman", &use_watchman, + N_("use or not use watchman to reduce refresh cost")), OPT_END() }; @@ -1149,6 +1152,14 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) die("Bug: bad untracked_cache value: %d", untracked_cache); } + if (use_watchman > 0) { + the_index.last_update= xstrdup(""); + the_index.cache_changed |= WATCHMAN_CHANGED; + } else if (!use_watchman) { + the_index.last_update= NULL; + the_index.cache_changed |= WATCHMAN_CHANGED; + } + if (active_cache_changed) { if (newfd < 0) { if (refresh_args.flags & REFRESH_QUIET) -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 07/17] read-cache: add watchman 'WAMA' extension
From: Nguyễn Thái Ngọc Duy The extension contains a bitmap, one bit for each entry in the index. If the n-th bit is zero, the n-th entry is considered unchanged, we can ce_mark_uptodate() it without refreshing. If the bit is non-zero and we found out the corresponding file is clean after refresh, we can clear the bit. In addition, there's a list of directories in the untracked-cache to invalidate (because they have new or modified entries). The 'skipping refresh' bit is not in this patch yet as we would need watchman. More details in later patches. Signed-off-by: Nguyễn Thái Ngọc Duy --- cache.h | 4 ++ dir.h| 3 ++ read-cache.c | 117 ++- 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index d386722..5b10d52 100644 --- a/cache.h +++ b/cache.h @@ -182,6 +182,8 @@ struct cache_entry { #define CE_VALID (0x8000) #define CE_STAGESHIFT 12 +#define CE_WATCHMAN_DIRTY (0x0001) + /* * Range 0x0FFF in ce_flags is divided into * two parts: in-memory flags and on-disk ones. @@ -320,6 +322,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define CACHE_TREE_CHANGED (1 << 5) #define SPLIT_INDEX_ORDERED(1 << 6) #define UNTRACKED_CHANGED (1 << 7) +#define WATCHMAN_CHANGED (1 << 8) struct split_index; struct untracked_cache; @@ -344,6 +347,7 @@ struct index_state { struct untracked_cache *untracked; void *mmap; size_t mmap_size; + char *last_update; }; extern struct index_state the_index; diff --git a/dir.h b/dir.h index 3ec3fb0..3d540de 100644 --- a/dir.h +++ b/dir.h @@ -142,6 +142,9 @@ struct untracked_cache { int gitignore_invalidated; int dir_invalidated; int dir_opened; + /* watchman invalidation data */ + unsigned int use_watchman : 1; + struct string_list invalid_untracked; }; struct dir_struct { diff --git a/read-cache.c b/read-cache.c index fe73828..6d5e871 100644 --- a/read-cache.c +++ b/read-cache.c @@ -19,6 +19,7 @@ #include "split-index.h" #include "utf8.h" #include "shm.h" +#include "ewah/ewok.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -41,11 +42,13 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */ #define CACHE_EXT_LINK 0x6c696e6b/* "link" */ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ +#define CACHE_EXT_WATCHMAN 0x57414D41/* "WAMA" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \ -SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED) +SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | \ +WATCHMAN_CHANGED) struct index_state the_index; static const char *alternate_index_output; @@ -1220,8 +1223,13 @@ int refresh_index(struct index_state *istate, unsigned int flags, continue; new = refresh_cache_ent(istate, ce, options, &cache_errno, &changed); - if (new == ce) + if (new == ce) { + if (ce->ce_flags & CE_WATCHMAN_DIRTY) { + ce->ce_flags &= ~CE_WATCHMAN_DIRTY; + istate->cache_changed |= WATCHMAN_CHANGED; + } continue; + } if (!new) { const char *fmt; @@ -1365,6 +1373,94 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) return 0; } +static void mark_no_watchman(size_t pos, void *data) +{ + struct index_state *istate = data; + assert(pos < istate->cache_nr); + istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY; +} + +static int read_watchman_ext(struct index_state *istate, const void *data, +unsigned long sz) +{ + struct ewah_bitmap *bitmap; + int ret, len; + uint32_t bitmap_size; + uint32_t untracked_nr; + + if (memchr(data, 0, sz) == NULL) + return error("invalid extension"); + + len = strlen(data) + 1; + memcpy(&bitmap_size, (const char *)data + len, 4); + memcpy(&untracked_nr, (const char *)data + len + 4, 4); + untracked_nr = ntohl(untracked_nr); + bitmap_size = ntohl(bitmap_size); + + bitmap = ewah_new(); + ret = ewah_read_mmap(bitmap, (const char *)data + len + 8, bitmap_size); + if (ret != bitmap_size) { + ewah_free(bitmap); + return error("failed to parse ewah bitmap reading watchman index extension"); + } + istate->last_update = xstrdup(data); + ewah_each_bit(bitmap, mar
[PATCH v2 02/17] read-cache: allow to keep mmap'd memory after reading
From: Nguyễn Thái Ngọc Duy Later, we will introduce git index-helper to share this memory with other git processes. Since the memory will be shared, it will never be unmapped (although the kernel may of course choose to page it out). Signed-off-by: Nguyễn Thái Ngọc Duy --- cache.h | 3 +++ read-cache.c | 13 - 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index b829410..4180e2b 100644 --- a/cache.h +++ b/cache.h @@ -333,11 +333,14 @@ struct index_state { struct split_index *split_index; struct cache_time timestamp; unsigned name_hash_initialized : 1, +keep_mmap : 1, initialized : 1; struct hashmap name_hash; struct hashmap dir_hash; unsigned char sha1[20]; struct untracked_cache *untracked; + void *mmap; + size_t mmap_size; }; extern struct index_state the_index; diff --git a/read-cache.c b/read-cache.c index 16cc487..7e387e9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1574,6 +1574,10 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) mmap = xmmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0); if (mmap == MAP_FAILED) die_errno("unable to map index file"); + if (istate->keep_mmap) { + istate->mmap = mmap; + istate->mmap_size = mmap_size; + } close(fd); hdr = mmap; @@ -1626,10 +1630,12 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) src_offset += 8; src_offset += extsize; } - munmap(mmap, mmap_size); + if (!istate->keep_mmap) + munmap(mmap, mmap_size); return istate->cache_nr; unmap: + istate->mmap = NULL; munmap(mmap, mmap_size); die("index file corrupt"); } @@ -1655,6 +1661,7 @@ int read_index_from(struct index_state *istate, const char *path) discard_index(split_index->base); else split_index->base = xcalloc(1, sizeof(*split_index->base)); + split_index->base->keep_mmap = istate->keep_mmap; ret = do_read_index(split_index->base, git_path("sharedindex.%s", sha1_to_hex(split_index->base_sha1)), 1); @@ -1698,6 +1705,10 @@ int discard_index(struct index_state *istate) free(istate->cache); istate->cache = NULL; istate->cache_alloc = 0; + if (istate->keep_mmap && istate->mmap) { + munmap(istate->mmap, istate->mmap_size); + istate->mmap = NULL; + } discard_split_index(istate); free_untracked_cache(istate->untracked); istate->untracked = NULL; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 04/17] index-helper: add --strict
From: Nguyễn Thái Ngọc Duy There are "holes" in the index-helper approach because the shared memory is not verified again by git. If $USER is compromised, shared memory could be modified. But anyone who could do this could already modify $GIT_DIR/index. A more realistic risk is some bugs in index-helper that produce corrupt shared memory. --strict is added to avoid that. Strictly speaking there's still a very small gap where corrupt shared memory could still be read by git: after we write the trailing SHA-1 in the shared memory (thus signaling "this shm is ready") and before verify_shm() detects an error. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git-index-helper.txt | 9 +++ cache.h| 1 + index-helper.c | 48 ++ read-cache.c | 9 --- 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index 59a5abc..7afc5c9 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -22,6 +22,15 @@ OPTIONS Exit if the cached index is not accessed for `` minutes. Specify 0 to wait forever. Default is 10. +--strict:: +--no-strict:: + Strict mode makes index-helper verify the shared memory after + it's created. If the result does not match what's read from + $GIT_DIR/index, the shared memory is destroyed. This makes + index-helper take more than double the amount of time required + for reading an index, but because it will happen in the + background, it's not noticable. `--strict` is enabled by default. + NOTES - $GIT_DIR/index-helper.pipe is a named pipe that the daemon reads diff --git a/cache.h b/cache.h index c3c6d85..5805962 100644 --- a/cache.h +++ b/cache.h @@ -336,6 +336,7 @@ struct index_state { keep_mmap : 1, from_shm : 1, to_shm : 1, +always_verify_trailing_sha1 : 1, initialized : 1; struct hashmap name_hash; struct hashmap dir_hash; diff --git a/index-helper.c b/index-helper.c index 962c973..8d221e0 100644 --- a/index-helper.c +++ b/index-helper.c @@ -15,6 +15,7 @@ struct shm { static struct shm shm_index; static struct shm shm_base_index; +static int to_verify = 1; static void release_index_shm(struct shm *is) { @@ -73,11 +74,56 @@ static void share_index(struct index_state *istate, struct shm *is) hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, is->sha1); } +static int verify_shm(void) +{ + int i; + struct index_state istate; + memset(&istate, 0, sizeof(istate)); + istate.always_verify_trailing_sha1 = 1; + istate.to_shm = 1; + i = read_index(&istate); + if (i != the_index.cache_nr) + goto done; + for (; i < the_index.cache_nr; i++) { + struct cache_entry *base, *ce; + /* namelen is checked separately */ + const unsigned int ondisk_flags = + CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS; + unsigned int ce_flags, base_flags, ret; + base = the_index.cache[i]; + ce = istate.cache[i]; + if (ce->ce_namelen != base->ce_namelen || + strcmp(ce->name, base->name)) { + warning("mismatch at entry %d", i); + break; + } + ce_flags = ce->ce_flags; + base_flags = base->ce_flags; + /* only on-disk flags matter */ + ce->ce_flags &= ondisk_flags; + base->ce_flags &= ondisk_flags; + ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data, +offsetof(struct cache_entry, name) - +offsetof(struct cache_entry, ce_stat_data)); + ce->ce_flags = ce_flags; + base->ce_flags = base_flags; + if (ret) { + warning("mismatch at entry %d", i); + break; + } + } +done: + discard_index(&istate); + return i == the_index.cache_nr; +} + static void share_the_index(void) { if (the_index.split_index && the_index.split_index->base) share_index(the_index.split_index->base, &shm_base_index); share_index(&the_index, &shm_index); + if (to_verify && !verify_shm()) + cleanup_shm(); discard_index(&the_index); } @@ -189,6 +235,8 @@ int main(int argc, char **argv) struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_seconds, N_("exit if not used after some seconds")), + OPT_BOOL(0, "strict", &to_verify, +"verify shared memory after creating"), O
[PATCH v2 08/17] read-cache: invalidate untracked cache data when reading WAMA
When reading the watchman extension, invalidate the listed untracked-cache entries. We don't clear these entries yet; we can only do that once we know that they came from a live watchman (rather than from disk). Signed-off-by: David Turner --- dir.c| 11 +--- dir.h| 3 ++ read-cache.c | 89 3 files changed, 94 insertions(+), 9 deletions(-) diff --git a/dir.c b/dir.c index 69e0be6..9b659e6 100644 --- a/dir.c +++ b/dir.c @@ -597,9 +597,9 @@ static void trim_trailing_spaces(char *buf) * * If "name" has the trailing slash, it'll be excluded in the search. */ -static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, - struct untracked_cache_dir *dir, - const char *name, int len) +struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, +struct untracked_cache_dir *dir, +const char *name, int len) { int first, last; struct untracked_cache_dir *d; @@ -2625,8 +2625,10 @@ static void free_untracked(struct untracked_cache_dir *ucd) void free_untracked_cache(struct untracked_cache *uc) { - if (uc) + if (uc) { free_untracked(uc->root); + string_list_clear(&uc->invalid_untracked, 0); + } free(uc); } @@ -2775,6 +2777,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long return NULL; uc = xcalloc(1, sizeof(*uc)); + string_list_init(&uc->invalid_untracked, 1); strbuf_init(&uc->ident, ident_len); strbuf_add(&uc->ident, ident, ident_len); load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat, diff --git a/dir.h b/dir.h index 3d540de..8fd3f9e 100644 --- a/dir.h +++ b/dir.h @@ -315,4 +315,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked); void add_untracked_cache(struct index_state *istate); void remove_untracked_cache(struct index_state *istate); +struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, +struct untracked_cache_dir *dir, +const char *name, int len); #endif diff --git a/read-cache.c b/read-cache.c index 6d5e871..b4bd15c 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1373,11 +1373,76 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) return 0; } +static struct untracked_cache_dir *find_untracked_cache_dir( + struct untracked_cache *uc, struct untracked_cache_dir *ucd, + const char *name) +{ + int component_len; + const char *end; + struct untracked_cache_dir *dir; + + if (!*name) + return ucd; + + end = strchr(name, '/'); + if (end) + component_len = end - name; + else + component_len = strlen(name); + + dir = lookup_untracked(uc, ucd, name, component_len); + if (dir) + return find_untracked_cache_dir(uc, dir, name + component_len + 1); + + return NULL; +} + static void mark_no_watchman(size_t pos, void *data) { struct index_state *istate = data; + struct cache_entry *ce = istate->cache[pos]; + struct strbuf sb = STRBUF_INIT; + char *c; + struct untracked_cache_dir *dir; + assert(pos < istate->cache_nr); - istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY; + ce->ce_flags |= CE_WATCHMAN_DIRTY; + + if (!istate->untracked || !istate->untracked->root) + return; + + strbuf_add(&sb, ce->name, ce_namelen(ce)); + + for (c = sb.buf + sb.len - 1; c > sb.buf; c--) { + if (*c == '/') { + strbuf_setlen(&sb, c - sb.buf); + break; + } + } + + if (c == sb.buf) { + strbuf_setlen(&sb, 0); + } + + dir = find_untracked_cache_dir(istate->untracked, + istate->untracked->root, sb.buf); + if (dir) + dir->valid = 0; + + strbuf_release(&sb); +} + +static int mark_untracked_invalid(struct string_list_item *item, void *uc) +{ + struct untracked_cache *untracked = uc; + struct untracked_cache_dir *dir; + + dir = find_untracked_cache_dir(untracked, untracked->root, + item->string); + if (dir) + dir->valid = 0; + + return 0; } static int read_watchman_ext(struct index
[PATCH v2 09/17] Add watchman support to reduce index refresh cost
From: Nguyễn Thái Ngọc Duy The previous patch has the logic to clear bits in 'WAMA' bitmap. This patch has logic to set bits as told by watchman. The missing bit, _using_ these bits, are not here yet. A lot of this code is written by David Turner originally, mostly from [1]. I'm just copying and polishing it a bit. [1] http://article.gmane.org/gmane.comp.version-control.git/248006 Signed-off-by: David Turner Signed-off-by: Nguyễn Thái Ngọc Duy --- Makefile | 12 + cache.h| 1 + config.c | 5 ++ configure.ac | 8 environment.c | 3 ++ watchman-support.c | 134 + watchman-support.h | 7 +++ 7 files changed, 170 insertions(+) create mode 100644 watchman-support.c create mode 100644 watchman-support.h diff --git a/Makefile b/Makefile index 2d72771..8bf705b 100644 --- a/Makefile +++ b/Makefile @@ -453,6 +453,7 @@ MSGFMT = msgfmt CURL_CONFIG = curl-config PTHREAD_LIBS = -lpthread PTHREAD_CFLAGS = +WATCHMAN_LIBS = GCOV = gcov export TCL_PATH TCLTK_PATH @@ -1419,6 +1420,13 @@ else LIB_OBJS += thread-utils.o endif +ifdef USE_WATCHMAN + LIB_H += watchman-support.h + LIB_OBJS += watchman-support.o + WATCHMAN_LIBS = -lwatchman + BASIC_CFLAGS += -DUSE_WATCHMAN +endif + ifdef HAVE_PATHS_H BASIC_CFLAGS += -DHAVE_PATHS_H endif @@ -2030,6 +2038,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS $(GITLIBS) $(VCSSVN_LIB) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) \ $(VCSSVN_LIB) +git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) $(WATCHMAN_LIBS) + $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY) $(QUIET_LNCP)$(RM) $@ && \ ln $< $@ 2>/dev/null || \ @@ -2168,6 +2179,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+ + @echo USE_WATCHMAN=\''$(subst ','\'',$(subst ','\'',$(USE_WATCHMAN)))'\' >>$@+ ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+ endif diff --git a/cache.h b/cache.h index 5b10d52..95715fd 100644 --- a/cache.h +++ b/cache.h @@ -688,6 +688,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; +extern int core_watchman_sync_timeout; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; diff --git a/config.c b/config.c index 9ba40bc..e6dc141 100644 --- a/config.c +++ b/config.c @@ -882,6 +882,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.watchmansynctimeout")) { + core_watchman_sync_timeout = git_config_int(var, value); + return 0; + } + if (!strcmp(var, "core.createobject")) { if (!strcmp(value, "rename")) object_creation_mode = OBJECT_CREATION_USES_RENAMES; diff --git a/configure.ac b/configure.ac index 0cd9f46..334d63b 100644 --- a/configure.ac +++ b/configure.ac @@ -1099,6 +1099,14 @@ AC_COMPILE_IFELSE([BSD_SYSCTL_SRC], HAVE_BSD_SYSCTL=]) GIT_CONF_SUBST([HAVE_BSD_SYSCTL]) +# +# Check for watchman client library + +AC_CHECK_LIB([watchman], [watchman_connect], + [USE_WATCHMAN=YesPlease], + [USE_WATCHMAN=]) +GIT_CONF_SUBST([USE_WATCHMAN]) + ## Other checks. # Define USE_PIC if you need the main git objects to be built with -fPIC # in order to build and link perl/Git.so. x86-64 seems to need this. diff --git a/environment.c b/environment.c index 6dec9d0..35e03c7 100644 --- a/environment.c +++ b/environment.c @@ -94,6 +94,9 @@ int core_preload_index = 1; */ int ignore_untracked_cache_config; +int core_watchman_sync_timeout = 300; + + /* This is set by setup_git_dir_gently() and/or git_default_config() */ char *git_work_tree_cfg; static char *work_tree; diff --git a/watchman-support.c b/watchman-support.c new file mode 100644 index 000..b7302b9 --- /dev/null +++ b/watchman-support.c @@ -0,0 +1,134 @@ +#include "cache.h" +#include "watchman-support.h" +#include "strbuf.h" +#include "dir.h" +#include + +static str
[PATCH v2 01/17] read-cache.c: fix constness of verify_hdr()
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- read-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index d9fb78b..16cc487 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1345,7 +1345,7 @@ struct ondisk_cache_entry_extended { ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ ondisk_cache_entry_size(ce_namelen(ce))) -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(const struct cache_header *hdr, unsigned long size) { git_SHA_CTX c; unsigned char sha1[20]; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 04/19] index-helper: new daemon for caching index and related stuff
On Thu, 2016-03-10 at 18:17 +0700, Duy Nguyen wrote: > On Thu, Mar 10, 2016 at 6:21 AM, Junio C Hamano > wrote: > > Junio C Hamano writes: > > > > > David Turner writes: > > > > > > > From: Nguyễn Thái Ngọc Duy > > > > > > > > Instead of reading the index from disk and worrying about disk > > > > corruption, the index is cached in memory (memory bit-flips > > > > happen > > > > too, but hopefully less often). The result is faster read. Read > > > > time > > > > is reduced by 70%. > > > > > > > > The biggest gain is not having to verify the trailing SHA-1, > > > > which > > > > takes lots of time especially on large index files. > > > > Come to think of it, wouldn't it be far less intrusive change to > > just put the index on a ramdisk and skip the trailing SHA-1 > > verification, to obtain a similar result with the same trade off > > (i.e. blindly trusting memory instead of being paranoid)? > > I'm straying off-topic again because this reminded me about lmdb :) > For the record when I looked at lmdb I thought of using it as an > index > format too. It has everything we wanted in index-v5. Good enough that > we would not need index-helper and split-index to work around current > index format. Though I finally decided it didn't fit well. > > On the good side, lmdb is b+ trees, we can update directly on disk > (without rewriting whole file), read directly from mmap'd file and > not > worry about corrupting it. There's no SHA-1 verification either (and > we can do crc32 per entry if we want too). It's good. > > But, it does not fit well the our locking model (but we can change > this). And we sometimes want to create temporary throw-away indexes > (e.g. partial commits) which I don't think is easy to do. And the > reading directly from mmap does not give us much because we have to > do > byte endian conversion anyway. > > Hmm.. on second thought, even though lmdb can't be the default format > (for a bunch of other limitations), it can still be an option for > super big worktrees, just like index-helper being an optional > optimization. Hm.. hm.. if we can support lmdb as index format in > addition to the current format, bringing back some work from Thomas.. > We may still need a daemon or something to deal with watchman, but > the > underlying mechanism is exactly the same... David, Junio, what do you > think? LMDB makes writes faster, since we only have to write the stuff that's changed. That's good. Reads (ignoring SHA verification) will be slightly slower (due to the btree overhead). If, in general, we only had to read part of the index, that would be faster. But a fair amount of git code is written to assume that it is reasonable to iterate over the whole index. So I don't see a win from just switching to LMDB without other changes. (I guess we could parallelize index deserialization more easily with lmdb, but I don't know The original intent of the index was to be a "staging area" to build up a tree before committing it. This implies an alternate view of the index as a diff against some (possibly-empty) tree. An index diff or status operation, then, just requires iterating over the changes. I haven't really dug into merges to understand whether it would be reasonable for them to use a representation like this, and what that would do to speed there. In general, git takes the commit side of the commit-diff duality. This is generally a good thing, because commits are simpler to reason about. But I wonder if we could do better for this specific case. But I don't think switching to LMDB would replace index-helper. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 04/19] index-helper: new daemon for caching index and related stuff
On Wed, 2016-03-09 at 15:09 -0800, Junio C Hamano wrote: > David Turner writes: > > > From: Nguyễn Thái Ngọc Duy > > > > Instead of reading the index from disk and worrying about disk > > corruption, the index is cached in memory (memory bit-flips happen > > too, but hopefully less often). The result is faster read. Read > > time > > is reduced by 70%. > > > > The biggest gain is not having to verify the trailing SHA-1, which > > takes lots of time especially on large index files. But this also > > opens doors for further optimiztions: > > > > - we could create an in-memory format that's essentially the > > memory > >dump of the index to eliminate most of parsing/allocation > >overhead. The mmap'd memory can be used straight away. > > Experiment > >[1] shows we could reduce read time by 88%. > > > > - we could cache non-index info such as name hash > > > > The shared memory's name folows the template "git--" > > where is the trailing SHA-1 of the index file. is > > "index" for cached index files (and may be "name-hash" for name > > -hash > > cache). If such shared memory exists, it contains the same index > > content as on disk. The content is already validated by the daemon > > and > > git won't validate it again (except comparing the trailing SHA-1s). > > This indeed is an interesting approach; what is not explained but > must be is when the on-disk index is updated to reflect the reality > (if I am reading the explanation and the code right, while the > daemon is running, its in-core cache becomes the source of truth by > forcing everybody's read-index-from() to go to the daemon). The > explanation could be "this is only for read side, and updating the > index happens via the traditional 'write a new file and rename it to > the final place' codepath, at which time the daemon must be told to > re-read it." This seems like the explanation (from the current commit message): "Git can poke the daemon to tell it to refresh the index cache, or to keep it alive some more minutes via UNIX signals. It can't give any real index data directly to the daemon. Real data goes to disk first, then the daemon reads and verifies it from there. Poking only happens for $GIT_DIR/index, not temporary index files." I guess this could be rewritten as: "Index validity is ensured by the following method: When a read is requested from the index-helper, it checks the SHA1 of its cached index copy against the on-disk version. If they differ, index-helper rereads the index. In addition, any git process may explicitly suggest a reread via a UNIX signal, but this is only an optimization and it is not necessary for correctness. In addition, Git can signal the daemon with a heartbeat signal, to keep the daemon alive longer." How does that sound? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 03/19] read-cache: allow to keep mmap'd memory after reading
On Wed, 2016-03-09 at 15:02 -0800, Junio C Hamano wrote: > David Turner writes: > > > From: Nguyễn Thái Ngọc Duy > > > > Signed-off-by: Nguyễn Thái Ngọc Duy > > --- > > As usual in Duy's patches, this one seriously lacks "why". And also > makes the reader wonder if the memory region is ever unmapped() and > if so under what condition. How about this: Later, we will introduce git index-helper to share this memory with other git processes. Since the memory will be shared, it will never be unmapped (although the kernel may of course choose to page it out). -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/19] trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics
On Wed, 2016-03-09 at 14:58 -0800, Junio C Hamano wrote: > David Turner writes: ... > > trace_stats() is intended for GIT_TRACE_*_STATS variable group and > > GIT_TRACE_PACK_STATS is more like an example how new vars can be > > added. ... > > + pack_access_nr++; > > } > > This looks rather half-hearted, in that those who are interested in > this new number can run "wc -l" on the pack-access trace log without > adding a new "stats" anyway. It may make the "stats" far more > useful to keep track of the summary information of what would be > written to the pack access log and add to the report_pack_stats() > output things like the average and median distance of seeks > (i.e. differences in the "obj_offset" into the same pack by two > adjacent pack accesse) and the variance, for example? On reflection, I do not think I need this patch; it was in Duy's series and the index stats patch would need to be rewritten slightly to get rid of it, but I'm OK with that. I wanted to make minimal changes to Duy's patches, but I'd rather make larger changes than do work on patches I don't need. So unless Duy objects, I'm going to drop this from the series. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 04/19] index-helper: new daemon for caching index and related stuff
On Wed, 2016-03-09 at 15:21 -0800, Junio C Hamano wrote: > Junio C Hamano writes: > > > David Turner writes: > > > > > From: Nguyễn Thái Ngọc Duy > > > > > > Instead of reading the index from disk and worrying about disk > > > corruption, the index is cached in memory (memory bit-flips > > > happen > > > too, but hopefully less often). The result is faster read. Read > > > time > > > is reduced by 70%. > > > > > > The biggest gain is not having to verify the trailing SHA-1, > > > which > > > takes lots of time especially on large index files. > > Come to think of it, wouldn't it be far less intrusive change to > just put the index on a ramdisk and skip the trailing SHA-1 > verification, to obtain a similar result with the same trade off > (i.e. blindly trusting memory instead of being paranoid)? > 1. If the index were stored on a ramdisk, we would *also* have to write it to durable storage to avoid losing the user's work when they power off. That's more complicated. 2. Duy notes that it is easier to add further optimizations to this -- for instance, we could share a pre-parsed version of the index. It would be harder to add these optimizations to the disk format, because (a) they would take up more space, and (b) they would probably be harder to write in a single pass, which is presently how index writing works. 3. If we wanted to just skip SHA-1 verification, we would not need a ramdisk; it's almost certain that the index would be in the disk cache most of the time, so regular storage should be very nearly as fast as a ramdisk. I think mlock might help ensure that the data remains in the cache, although I'm not sure what the permissions story is on that. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 00/19] index-helper, watchman
This is a rebase and extension of Duy's work on git index-helper and watchman support. Patches 1-14 and are more-or-less just Duy's patches, rebased. I fixed up a race condition with signal handling, but otherwise didn't change much. Patch 15 preserves the untracked cache and watchman extensions across checkouts. Patches 16-18 are new index-helper patches that I wrote to improve the general usability of index-helper. And patch 19 is for discussion only -- if the overall concept is correct, I'll rewrite it into the places where it should go. I didn't do it yet in part because I am not sure folks will agree with it and in part because I didn't want to mess too much with Duy's code before re-sending it. I haven't used this code much. I just wrote patches 16-18 today, in fact. It's got fewer tests than I would like, in part because it is somewhat difficult to test since it involves two separate persistent daemons (index-helper, watchman), and the interaction between them. At Twitter, we're still using something based on my earlier watchman patches[1]. We would like to switch to this (especially if this is the version that mainstream git is going towards). But we have other local patches[2], and I haven't fully finished migrating them to 2.8. My informal testing shows that with the untracked cache and watchman index extensions, and git index-helper[3], performance is slightly better than my earlier code. That's not a surprise, since the index-helper eliminates index verification time. [1] https://github.com/dturner-tw/git/tree/dturner/watchman [2] e.g. something like this: https://github.com/dturner-tw/git/tree/dturner/journal [3] In both cases, I'm using a version which has the SSE ref-parsing and hashmap code; these were rejected due to complexity but they provide a benefit for us, so we still use them. David Turner (5): unpack-trees: preserve index extensions index-helper: rewrite pidfile after daemonizing index-helper: process management index-helper: autorun hack: watchman/untracked cache mashup Nguyễn Thái Ngọc Duy (14): trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics read-cache.c: fix constness of verify_hdr() read-cache: allow to keep mmap'd memory after reading index-helper: new daemon for caching index and related stuff trace.c: add GIT_TRACE_INDEX_STATS for index statistics index-helper: add --strict daemonize(): set a flag before exiting the main process index-helper: add --detach index-helper: add Windows support read-cache: add watchman 'WAMA' extension Add watchman support to reduce index refresh cost read-cache: allow index-helper to prepare shm before git reads it index-helper: use watchman to avoid refreshing index with lstat() update-index: enable/disable watchman support .gitignore | 1 + Documentation/git-index-helper.txt | 56 + Documentation/git.txt | 4 + Makefile | 21 ++ builtin/gc.c | 2 +- builtin/update-index.c | 11 + cache.h| 20 +- config.c | 5 + config.mak.uname | 3 + configure.ac | 8 + daemon.c | 2 +- dir.c | 23 +- dir.h | 6 + environment.c | 3 + git-compat-util.h | 1 + git.c | 37 +++- index-helper.c | 437 read-cache.c | 441 +++-- setup.c| 4 +- sha1_file.c| 24 ++ shm.c | 163 ++ shm.h | 23 ++ t/t7063-status-untracked-cache.sh | 23 ++ t/t7900-index-helper.sh| 23 ++ t/test-lib-functions.sh| 4 + trace.c| 16 ++ trace.h| 1 + unpack-trees.c | 1 + watchman-support.c | 134 +++ watchman-support.h | 7 + 30 files changed, 1481 insertions(+), 23 deletions(-) create mode 100644 Documentation/git-index-helper.txt create mode 100644 index-helper.c create mode 100644 shm.c create mode 100644 shm.h create mode 100755 t/t7900-index-helper.sh create mode 100644 watchman-support.c create mode 100644 watchman-support.h -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/19] trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics
From: Nguyễn Thái Ngọc Duy trace_stats() is intended for GIT_TRACE_*_STATS variable group and GIT_TRACE_PACK_STATS is more like an example how new vars can be added. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git.txt | 3 +++ cache.h | 2 ++ git.c | 1 + sha1_file.c | 24 trace.c | 13 + trace.h | 1 + 6 files changed, 44 insertions(+) diff --git a/Documentation/git.txt b/Documentation/git.txt index 2754af8..794271e 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -1055,6 +1055,9 @@ of clones and fetches. time of each Git command. See 'GIT_TRACE' for available trace output options. +'GIT_TRACE_PACK_STATS':: + Print various statistics. + 'GIT_TRACE_SETUP':: Enables trace messages printing the .git, working tree and current working directory after Git has completed its setup phase. diff --git a/cache.h b/cache.h index b829410..7e01403 100644 --- a/cache.h +++ b/cache.h @@ -1828,4 +1828,6 @@ void sleep_millisec(int millisec); */ void safe_create_dir(const char *dir, int share); +void report_pack_stats(struct trace_key *key); + #endif /* CACHE_H */ diff --git a/git.c b/git.c index 6cc0c07..a4f6f71 100644 --- a/git.c +++ b/git.c @@ -655,6 +655,7 @@ int main(int argc, char **av) git_setup_gettext(); trace_command_performance(argv); + trace_stats(); /* * "git-" is the same as "git ", but we obviously: diff --git a/sha1_file.c b/sha1_file.c index d0f2aa0..14cebdf 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -518,6 +518,7 @@ static unsigned int peak_pack_open_windows; static unsigned int pack_open_windows; static unsigned int pack_open_fds; static unsigned int pack_max_fds; +static unsigned int pack_access_nr; static size_t peak_pack_mapped; static size_t pack_mapped; struct packed_git *packed_git; @@ -543,6 +544,28 @@ void pack_report(void) sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped)); } +void report_pack_stats(struct trace_key *key) +{ + trace_printf_key(key, "\n" +"pack_report: getpagesize()= %10" SZ_FMT "\n" +"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n" +"pack_report: core.packedGitLimit = %10" SZ_FMT "\n" +"pack_report: pack_used_ctr= %10u\n" +"pack_report: pack_mmap_calls = %10u\n" +"pack_report: pack_open_windows= %10u / %10u\n" +"pack_report: pack_mapped = " +"%10" SZ_FMT " / %10" SZ_FMT "\n" +"pack_report: pack accesss = %10u\n", +sz_fmt(getpagesize()), +sz_fmt(packed_git_window_size), +sz_fmt(packed_git_limit), +pack_used_ctr, +pack_mmap_calls, +pack_open_windows, peak_pack_open_windows, +sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped), +pack_access_nr); +} + /* * Open and mmap the index file at path, perform a couple of * consistency checks, then record its information to p. Return 0 on @@ -2238,6 +2261,7 @@ static void write_pack_access_log(struct packed_git *p, off_t obj_offset) static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS); trace_printf_key(&pack_access, "%s %"PRIuMAX"\n", p->pack_name, (uintmax_t)obj_offset); + pack_access_nr++; } int do_check_packed_object_crc; diff --git a/trace.c b/trace.c index 4aeea60..b1d0885 100644 --- a/trace.c +++ b/trace.c @@ -432,3 +432,16 @@ void trace_command_performance(const char **argv) sq_quote_argv(&command_line, argv, 0); command_start_time = getnanotime(); } + +static struct trace_key trace_pack_stats = TRACE_KEY_INIT(PACK_STATS); + +static void print_stats_atexit(void) +{ + report_pack_stats(&trace_pack_stats); +} + +void trace_stats(void) +{ + if (trace_want(&trace_pack_stats)) + atexit(print_stats_atexit); +} diff --git a/trace.h b/trace.h index 179b249..52bda4e 100644 --- a/trace.h +++ b/trace.h @@ -19,6 +19,7 @@ extern void trace_disable(struct trace_key *key); extern uint64_t getnanotime(void); extern void trace_command_performance(const char **argv); extern void trace_verbatim(struct trace_key *key, const void *buf, unsigned len); +extern void trace_stats(void); #ifndef HAVE_VARIADIC_MACROS -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/19] read-cache.c: fix constness of verify_hdr()
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- read-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index d9fb78b..16cc487 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1345,7 +1345,7 @@ struct ondisk_cache_entry_extended { ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ ondisk_cache_entry_size(ce_namelen(ce))) -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(const struct cache_header *hdr, unsigned long size) { git_SHA_CTX c; unsigned char sha1[20]; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/19] read-cache: allow to keep mmap'd memory after reading
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- cache.h | 3 +++ read-cache.c | 13 - 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index 7e01403..c43ef3d 100644 --- a/cache.h +++ b/cache.h @@ -333,11 +333,14 @@ struct index_state { struct split_index *split_index; struct cache_time timestamp; unsigned name_hash_initialized : 1, +keep_mmap : 1, initialized : 1; struct hashmap name_hash; struct hashmap dir_hash; unsigned char sha1[20]; struct untracked_cache *untracked; + void *mmap; + size_t mmap_size; }; extern struct index_state the_index; diff --git a/read-cache.c b/read-cache.c index 16cc487..7e387e9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1574,6 +1574,10 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) mmap = xmmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0); if (mmap == MAP_FAILED) die_errno("unable to map index file"); + if (istate->keep_mmap) { + istate->mmap = mmap; + istate->mmap_size = mmap_size; + } close(fd); hdr = mmap; @@ -1626,10 +1630,12 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) src_offset += 8; src_offset += extsize; } - munmap(mmap, mmap_size); + if (!istate->keep_mmap) + munmap(mmap, mmap_size); return istate->cache_nr; unmap: + istate->mmap = NULL; munmap(mmap, mmap_size); die("index file corrupt"); } @@ -1655,6 +1661,7 @@ int read_index_from(struct index_state *istate, const char *path) discard_index(split_index->base); else split_index->base = xcalloc(1, sizeof(*split_index->base)); + split_index->base->keep_mmap = istate->keep_mmap; ret = do_read_index(split_index->base, git_path("sharedindex.%s", sha1_to_hex(split_index->base_sha1)), 1); @@ -1698,6 +1705,10 @@ int discard_index(struct index_state *istate) free(istate->cache); istate->cache = NULL; istate->cache_alloc = 0; + if (istate->keep_mmap && istate->mmap) { + munmap(istate->mmap, istate->mmap_size); + istate->mmap = NULL; + } discard_split_index(istate); free_untracked_cache(istate->untracked); istate->untracked = NULL; -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 19/19] hack: watchman/untracked cache mashup
DO NOT APPLY THIS This is a hack to add untracked cache info to the watchman WAMA extension. The idea is that we don't have to stat directories if watchman hasn't told us of any changes in those dirs. If this is the right idea, I can go back and add it to the relevant patch(es). But I want to see if other folks think it's the right idea first. Signed-off-by: David Turner --- dir.c | 23 +++-- dir.h | 6 +++ read-cache.c | 142 +++-- watchman-support.c | 21 +++- 4 files changed, 182 insertions(+), 10 deletions(-) diff --git a/dir.c b/dir.c index 69e0be6..5058b29 100644 --- a/dir.c +++ b/dir.c @@ -597,9 +597,9 @@ static void trim_trailing_spaces(char *buf) * * If "name" has the trailing slash, it'll be excluded in the search. */ -static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, - struct untracked_cache_dir *dir, - const char *name, int len) +struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, +struct untracked_cache_dir *dir, +const char *name, int len) { int first, last; struct untracked_cache_dir *d; @@ -1726,6 +1726,17 @@ static int valid_cached_dir(struct dir_struct *dir, if (!untracked) return 0; + if (dir->untracked->use_watchman) { + /* +* With watchman, we can trust the untracked cache's +* valid field. +*/ + if (untracked->valid) + goto skip_stat; + else + invalidate_directory(dir->untracked, untracked); + } + if (stat(path->len ? path->buf : ".", &st)) { invalidate_directory(dir->untracked, untracked); memset(&untracked->stat_data, 0, sizeof(untracked->stat_data)); @@ -1739,6 +1750,7 @@ static int valid_cached_dir(struct dir_struct *dir, return 0; } +skip_stat: if (untracked->check_only != !!check_only) { invalidate_directory(dir->untracked, untracked); return 0; @@ -2625,8 +2637,10 @@ static void free_untracked(struct untracked_cache_dir *ucd) void free_untracked_cache(struct untracked_cache *uc) { - if (uc) + if (uc) { free_untracked(uc->root); + string_list_clear(&uc->invalid_untracked, 0); + } free(uc); } @@ -2775,6 +2789,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long return NULL; uc = xcalloc(1, sizeof(*uc)); + string_list_init(&uc->invalid_untracked, 1); strbuf_init(&uc->ident, ident_len); strbuf_add(&uc->ident, ident, ident_len); load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat, diff --git a/dir.h b/dir.h index 3ec3fb0..8fd3f9e 100644 --- a/dir.h +++ b/dir.h @@ -142,6 +142,9 @@ struct untracked_cache { int gitignore_invalidated; int dir_invalidated; int dir_opened; + /* watchman invalidation data */ + unsigned int use_watchman : 1; + struct string_list invalid_untracked; }; struct dir_struct { @@ -312,4 +315,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked); void add_untracked_cache(struct index_state *istate); void remove_untracked_cache(struct index_state *istate); +struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, +struct untracked_cache_dir *dir, +const char *name, int len); #endif diff --git a/read-cache.c b/read-cache.c index 40d789a..66438d8 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1237,7 +1237,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, if (!new) { const char *fmt; - if (really && cache_errno == EINVAL) { + if (really || cache_errno == EINVAL) { /* If we are doing --really-refresh that * means the index is not valid anymore. */ @@ -1377,11 +1377,82 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) return 0; } +static struct untracked_cache_dir *find_untracked_cache_dir( + struct untracked_cache *uc, struct untracked_cache_dir *ucd, + const char *name) +{ + int component_len; +
[PATCH 15/19] unpack-trees: preserve index extensions
Make git checkout (and other unpack_tree operations) preserve the untracked cache and watchman status. This is valuable for two reasons: 1. Often, an unpack_tree operation will not touch large parts of the working tree, and thus most of the untracked cache will continue to be valid. 2. Even if the untracked cache were entirely invalidated by such an operation, the user has signaled their intention to have such a cache, and we don't want to throw it away. The same logic applies to the watchman state. Signed-off-by: David Turner --- cache.h | 1 + read-cache.c | 8 t/t7063-status-untracked-cache.sh | 23 +++ t/test-lib-functions.sh | 4 unpack-trees.c| 1 + 5 files changed, 37 insertions(+) diff --git a/cache.h b/cache.h index 272c928..03c34f1 100644 --- a/cache.h +++ b/cache.h @@ -572,6 +572,7 @@ extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate); #define REFRESH_DAEMON (1 << 2) extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags); extern int discard_index(struct index_state *); +extern void move_index_extensions(struct index_state *dst, struct index_state *src); extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); diff --git a/read-cache.c b/read-cache.c index 78f5f0e..40d789a 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2591,3 +2591,11 @@ void report_index_stats(struct trace_key *key) "index stats: file writes = %10u\n", nr_read_index, nr_read_shm_index, nr_write_index); } + +void move_index_extensions(struct index_state *dst, struct index_state *src) +{ + dst->untracked = src->untracked; + src->untracked = NULL; + dst->last_update = src->last_update; + src->last_update = NULL; +} diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index a971884..a2c8535 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -646,4 +646,27 @@ test_expect_success 'test ident field is working' ' test_cmp ../expect ../err ' +test_expect_success 'untracked cache survives a checkout' ' + git commit --allow-empty -m empty && + test-dump-untracked-cache >../before && + test_when_finished "git checkout master" && + git checkout -b other_branch && + test-dump-untracked-cache >../after && + test_cmp ../before ../after && + test_commit test && + test-dump-untracked-cache >../before && + git checkout master && + test-dump-untracked-cache >../after && + test_cmp ../before ../after +' + + +test_expect_success 'untracked cache survives a commit' ' + test-dump-untracked-cache >../before && + git add done/two && + git commit -m commit && + test-dump-untracked-cache >../after && + test_cmp ../before ../after +' + test_done diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 8d99eb3..e974b5b 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -186,6 +186,10 @@ test_commit () { test_tick fi && git commit $signoff -m "$1" && + if [ "$(git config core.bare)" = false ] + then + git update-index --force-untracked-cache + fi git tag "${4:-$1}" } diff --git a/unpack-trees.c b/unpack-trees.c index 9f55cc2..fc90eb3 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1215,6 +1215,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options WRITE_TREE_SILENT | WRITE_TREE_REPAIR); } + move_index_extensions(&o->result, o->dst_index); discard_index(o->dst_index); *o->dst_index = o->result; } else { -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/19] read-cache: allow index-helper to prepare shm before git reads it
From: Nguyễn Thái Ngọc Duy If index-helper puts 'W' before pid in $GIT_DIR/index-helper.pid, then git will sleep for a while, expecting to be waken up by SIGUSR1 when index-helper has done shm preparation, or after the timeout. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: David Turner --- read-cache.c | 36 ++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/read-cache.c b/read-cache.c index 85ef15b..57c5df9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1613,14 +1613,46 @@ static void do_poke(struct strbuf *sb, int refresh_cache) PostMessage(hwnd, refresh_cache ? WM_USER : WM_USER + 1, 0, 0); } #else + +static volatile int done_sleeping; + +static void mark_done_sleeping(int sig) +{ + done_sleeping = 1; +} + +/* + * Send a message to the index-helper to let it know that we're going + * to read the index. If refresh_cache is true, then the index-helper + * should re-read the index; otherwise, it should just stay alive. + * + * If the index-helper supports watchman, it will refresh the index + * before it hands it over. Wait up to one second for a response + * indicating that the index has been successfully refreshed. + * + */ static void do_poke(struct strbuf *sb, int refresh_cache) { - char*start = sb->buf; + int wait = sb->buf[0] == 'W'; + char*start = wait ? sb->buf + 1 : sb->buf; char*end = NULL; pid_tpid = strtoul(start, &end, 10); + int ret; + int count = 0; + + done_sleeping = 0; if (!end || end != sb->buf + sb->len) return; - kill(pid, refresh_cache ? SIGHUP : SIGUSR1); + if (!refresh_cache && wait) + signal(SIGHUP, mark_done_sleeping); + ret = kill(pid, refresh_cache ? SIGHUP : SIGUSR1); + if (!refresh_cache && wait) { + if (!ret) + while (!done_sleeping && count++ < 1000) + sleep_millisec(1); + + sigaction(SIGHUP, NULL, NULL); + } } #endif -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 17/19] index-helper: process management
Don't start index-helper when it is already running for a repository. Keep the index-helper pid file's mtime up-to-date. Before starting index-helper, check that (a) the pid file's mtime is recent and (b) there is some process with that pid. Of course, it's possible that the mtime is recent but the index-helper has died anyway (and some other process has taken its pid), but that's pretty unlikely. Provide index-helper --kill (to kill the running index-helper) and index-helper --ignore-existing (to start up a new index-helper even if we believe that there's one already running). Add a test for index-helper's pid-file writing and for --kill. Signed-off-by: David Turner --- index-helper.c | 95 + t/t7900-index-helper.sh | 23 2 files changed, 111 insertions(+), 7 deletions(-) create mode 100755 t/t7900-index-helper.sh diff --git a/index-helper.c b/index-helper.c index dc03e1e..a75da60 100644 --- a/index-helper.c +++ b/index-helper.c @@ -152,6 +152,17 @@ static void refresh(int sig) #ifdef HAVE_SHM +static void touch_pid_file(void) +{ + /* +* If we fail to update the times on index-helper.pid, we've +* probably had the repo deleted out from under us or otherwise +* lost access; might as well give up. +*/ + if (utimes(git_path("index-helper.pid"), NULL)) + exit(0); +} + #ifdef USE_WATCHMAN static void share_watchman(struct index_state *istate, struct shm *is, pid_t pid) @@ -211,9 +222,10 @@ static void prepare_index(int sig, siginfo_t *si, void *context) #endif -static void loop(const char *pid_file, int idle_in_seconds) +static void loop(const char *pid_file, int idle_in_minutes) { struct sigaction sa; + int timeout_count = 0; sigchain_pop(SIGHUP); /* pushed by sigchain_push_common */ sigchain_push(SIGHUP, refresh); @@ -225,19 +237,25 @@ static void loop(const char *pid_file, int idle_in_seconds) sigaction(SIGUSR1, &sa, NULL); refresh(0); - while (sleep(idle_in_seconds)) - ; /* do nothing, all is handled by signal handlers already */ + while (timeout_count < idle_in_minutes) { + if (sleep(60) == EINTR) + timeout_count = 0; + else + timeout_count ++; + touch_pid_file(); + } } #elif defined(GIT_WINDOWS_NATIVE) -static void loop(const char *pid_file, int idle_in_seconds) +static void loop(const char *pid_file, int idle_in_minutes) { HWND hwnd; UINT_PTR timer = 0; MSG msg; HINSTANCE hinst = GetModuleHandle(NULL); WNDCLASS wc; + int timeout_count = 0; /* * Emulate UNIX signals by sending WM_USER+x to a @@ -258,11 +276,18 @@ static void loop(const char *pid_file, int idle_in_seconds) refresh(0); while (1) { - timer = SetTimer(hwnd, timer, idle_in_seconds * 1000, NULL); + timer = SetTimer(hwnd, timer, 60 * 1000, NULL); if (!timer) die(_("no timer!")); - if (!GetMessage(&msg, hwnd, 0, 0) || msg.message == WM_TIMER) + if (!GetMessage(&msg, hwnd, 0, 0)) break; + if (msg.message == WM_TIMER) { + timeout_count ++; + if (timeout_count == idle_in_minutes) + break; + } + timeout_count = 0; + touch_pid_file(); switch (msg.message) { case WM_USER: refresh(0); @@ -288,6 +313,44 @@ static const char * const usage_text[] = { NULL }; +static int already_running(void) +{ + char contents[20] = {0}; + char *end; + int fd, dead, i = 0; + time_t now; + pid_t pid; + struct stat st; + + fd = open(git_path("index-helper.pid"), O_RDONLY); + if (fd < 0) + return 0; + + if (fstat(fd, &st) < 0) + return 0; + + now = time(&now); + + /* +* We touch the pid file every minute, so if a pid file hasn't +* been updated in two minutes, it's out-of-date. +*/ + if (st.st_mtime + 120 < now) + return 0; + + read_in_full(fd, &contents, sizeof(contents)); + + while (!isdigit(contents[i]) && contents[i]) + i++; + pid = strtoll(contents + i, &end, 10); + if (contents == end) + return 0; + dead = kill(pid, 0); + if (!dead) + return pid; + return 0; +} + static const char *write_pid(void) { static struct strbuf sb = STRBUF_INIT; @@ -314,6 +377,8 @@ int main(int
[PATCH 14/19] update-index: enable/disable watchman support
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/update-index.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/builtin/update-index.c b/builtin/update-index.c index 1c94ca5..7c08e1c 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -914,6 +914,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) { int newfd, entries, has_errors = 0, nul_term_line = 0; enum uc_mode untracked_cache = UC_UNSPECIFIED; + int use_watchman = -1; int read_from_stdin = 0; int prefix_length = prefix ? strlen(prefix) : 0; int preferred_index_format = 0; @@ -1012,6 +1013,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) N_("test if the filesystem supports untracked cache"), UC_TEST), OPT_SET_INT(0, "force-untracked-cache", &untracked_cache, N_("enable untracked cache without testing the filesystem"), UC_FORCE), + OPT_BOOL(0, "watchman", &use_watchman, + N_("use or not use watchman to reduce refresh cost")), OPT_END() }; @@ -1149,6 +1152,14 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) die("Bug: bad untracked_cache value: %d", untracked_cache); } + if (use_watchman > 0) { + the_index.last_update= xstrdup(""); + the_index.cache_changed |= WATCHMAN_CHANGED; + } else if (!use_watchman) { + the_index.last_update= NULL; + the_index.cache_changed |= WATCHMAN_CHANGED; + } + if (active_cache_changed) { if (newfd < 0) { if (refresh_args.flags & REFRESH_QUIET) -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 18/19] index-helper: autorun
Introduce a new config option, indexhelper.autorun, to automatically run git index-helper before starting up a builtin git command. This enables users to keep index-helper running without manual intervention. Signed-off-by: David Turner --- git.c | 38 +- index-helper.c | 11 ++- t/t7900-index-helper.sh | 10 ++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/git.c b/git.c index a4f6f71..ccf04ab 100644 --- a/git.c +++ b/git.c @@ -521,6 +521,40 @@ static void strip_extension(const char **argv) #define strip_extension(cmd) #endif +static int want_auto_index_helper(void) +{ + int want = -1; + const char *value = NULL; + const char *key = "indexhelper.autorun"; + + if (git_config_key_is_valid(key) && + !git_config_get_value(key, &value)) { + int b = git_config_maybe_bool(key, value); + want = b >= 0 ? b : 0; + } + return want; +} + +static void maybe_run_index_helper(struct cmd_struct *cmd) +{ + const char *argv[] = {"git-index-helper", "--detach", "--auto", NULL}; + int status; + + if (!(cmd->option & NEED_WORK_TREE)) + return; + + if (want_auto_index_helper() <= 0) + return; + + trace_argv_printf(argv, "trace: auto index-helper:"); + + status = run_command_v_opt(argv, RUN_SILENT_EXEC_FAILURE | RUN_CLEAN_ON_EXIT); + + if (status) { + warning("You specified indexhelper.autorun, but running git-index-helper failed"); + } +} + static void handle_builtin(int argc, const char **argv) { const char *cmd; @@ -536,8 +570,10 @@ static void handle_builtin(int argc, const char **argv) } builtin = get_builtin(cmd); - if (builtin) + if (builtin) { + maybe_run_index_helper(builtin); exit(run_builtin(builtin, argc, argv)); + } } static void execv_dashed_external(const char **argv) diff --git a/index-helper.c b/index-helper.c index a75da60..bc5c328 100644 --- a/index-helper.c +++ b/index-helper.c @@ -379,6 +379,7 @@ int main(int argc, char **argv) int idle_in_minutes = 10, detach = 0; int ignore_existing = 0; int kill_existing = 0; + int nongit = 0, autorun = 0; const char *pid_file; struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_minutes, @@ -388,6 +389,7 @@ int main(int argc, char **argv) OPT_BOOL(0, "detach", &detach, "detach the process"), OPT_BOOL(0, "ignore-existing", &ignore_existing, "run even if another index-helper seems to be running for this repo"), OPT_BOOL(0, "kill", &kill_existing, "kill any running index-helper for this repo"), + OPT_BOOL(0, "auto", &autorun, "this is an automatic run of git index-helper, so certain errors can be solved by silently exiting"), OPT_END() }; @@ -397,11 +399,18 @@ int main(int argc, char **argv) if (argc == 2 && !strcmp(argv[1], "-h")) usage_with_options(usage_text, options); - prefix = setup_git_directory(); + prefix = setup_git_directory_gently(&nongit); if (parse_options(argc, (const char **)argv, prefix, options, usage_text, 0)) die(_("too many arguments")); + if (nongit) { + if (autorun) + exit(0); + else + die("Not a git repository"); + } + if (ignore_existing && kill_existing) die(_("--ignore-existing and --kill don't make sense together")); diff --git a/t/t7900-index-helper.sh b/t/t7900-index-helper.sh index 6708180..e4f9564 100755 --- a/t/t7900-index-helper.sh +++ b/t/t7900-index-helper.sh @@ -20,4 +20,14 @@ test_expect_success 'index-helper creates usable pid file and can be killed' ' ! kill -0 $pid ' +test_expect_success 'index-helper autorun works' ' + rm -f .git/index-helper.pid && + git status && + test_path_is_missing .git/index-helper.pid && + test_config indexhelper.autorun true && + git status && + test_path_is_file .git/index-helper.pid && + git index-helper --kill +' + test_done -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/19] trace.c: add GIT_TRACE_INDEX_STATS for index statistics
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git.txt | 1 + cache.h | 1 + read-cache.c | 16 trace.c | 5 - 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/git.txt b/Documentation/git.txt index 794271e..71a88a8 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -1056,6 +1056,7 @@ of clones and fetches. See 'GIT_TRACE' for available trace output options. 'GIT_TRACE_PACK_STATS':: +'GIT_TRACE_INDEX_STATS':: Print various statistics. 'GIT_TRACE_SETUP':: diff --git a/cache.h b/cache.h index bc2f529..e22296c 100644 --- a/cache.h +++ b/cache.h @@ -1835,5 +1835,6 @@ void sleep_millisec(int millisec); void safe_create_dir(const char *dir, int share); void report_pack_stats(struct trace_key *key); +void report_index_stats(struct trace_key *key); #endif /* CACHE_H */ diff --git a/read-cache.c b/read-cache.c index eb4b9b4..7bd3ce4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -50,6 +50,10 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, struct index_state the_index; static const char *alternate_index_output; +static unsigned int nr_read_index; +static unsigned int nr_read_shm_index; +static unsigned int nr_write_index; + static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { istate->cache[nr] = ce; @@ -1614,6 +1618,7 @@ static int try_shm(struct index_state *istate) istate->mmap = new_mmap; istate->mmap_size = new_size; istate->from_shm = 1; + nr_read_shm_index++; return 0; } @@ -1711,6 +1716,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) } if (!istate->keep_mmap) munmap(mmap, mmap_size); + nr_read_index++; return istate->cache_nr; unmap: @@ -2197,6 +2203,7 @@ static int do_write_index(struct index_state *istate, int newfd, return -1; istate->timestamp.sec = (unsigned int)st.st_mtime; istate->timestamp.nsec = ST_MTIME_NSEC(st); + nr_write_index++; return 0; } @@ -2423,3 +2430,12 @@ void stat_validity_update(struct stat_validity *sv, int fd) fill_stat_data(sv->sd, &st); } } + +void report_index_stats(struct trace_key *key) +{ + trace_printf_key(key, "\n" +"index stats: file reads= %10u\n" +"index stats: cache reads = %10u\n" +"index stats: file writes = %10u\n", +nr_read_index, nr_read_shm_index, nr_write_index); +} diff --git a/trace.c b/trace.c index b1d0885..eea1fa8 100644 --- a/trace.c +++ b/trace.c @@ -434,14 +434,17 @@ void trace_command_performance(const char **argv) } static struct trace_key trace_pack_stats = TRACE_KEY_INIT(PACK_STATS); +static struct trace_key trace_index_stats = TRACE_KEY_INIT(INDEX_STATS); static void print_stats_atexit(void) { report_pack_stats(&trace_pack_stats); + report_index_stats(&trace_index_stats); } void trace_stats(void) { - if (trace_want(&trace_pack_stats)) + if (trace_want(&trace_pack_stats) || + trace_want(&trace_index_stats)) atexit(print_stats_atexit); } -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/19] Add watchman support to reduce index refresh cost
From: Nguyễn Thái Ngọc Duy The previous patch has the logic to clear bits in 'WAMA' bitmap. This patch has logic to set bits as told by watchman. The missing bit, _using_ these bits, are not here yet. A lot of this code is written by David Turner originally, mostly from [1]. I'm just copying and polishing it a bit. [1] http://article.gmane.org/gmane.comp.version-control.git/248006 Signed-off-by: David Turner Signed-off-by: Nguyễn Thái Ngọc Duy --- Makefile | 7 cache.h| 1 + config.c | 5 +++ configure.ac | 8 environment.c | 3 ++ watchman-support.c | 115 + watchman-support.h | 7 7 files changed, 146 insertions(+) create mode 100644 watchman-support.c create mode 100644 watchman-support.h diff --git a/Makefile b/Makefile index a6c668b..e51331c 100644 --- a/Makefile +++ b/Makefile @@ -1416,6 +1416,12 @@ else LIB_OBJS += thread-utils.o endif +ifdef USE_WATCHMAN + LIB_H += watchman-support.h + LIB_OBJS += watchman-support.o + BASIC_CFLAGS += -DUSE_WATCHMAN +endif + ifdef HAVE_PATHS_H BASIC_CFLAGS += -DHAVE_PATHS_H endif @@ -2164,6 +2170,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+ + @echo USE_WATCHMAN=\''$(subst ','\'',$(subst ','\'',$(USE_WATCHMAN)))'\' >>$@+ ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+ endif diff --git a/cache.h b/cache.h index 8f7b4b1..bf20652 100644 --- a/cache.h +++ b/cache.h @@ -688,6 +688,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; +extern int core_watchman_sync_timeout; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; diff --git a/config.c b/config.c index 9ba40bc..e6dc141 100644 --- a/config.c +++ b/config.c @@ -882,6 +882,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.watchmansynctimeout")) { + core_watchman_sync_timeout = git_config_int(var, value); + return 0; + } + if (!strcmp(var, "core.createobject")) { if (!strcmp(value, "rename")) object_creation_mode = OBJECT_CREATION_USES_RENAMES; diff --git a/configure.ac b/configure.ac index 89e2590..6f10a15 100644 --- a/configure.ac +++ b/configure.ac @@ -1092,6 +1092,14 @@ AC_COMPILE_IFELSE([BSD_SYSCTL_SRC], HAVE_BSD_SYSCTL=]) GIT_CONF_SUBST([HAVE_BSD_SYSCTL]) +# +# Check for watchman client library + +AC_CHECK_LIB([watchman], [watchman_connect], + [USE_WATCHMAN=YesPlease], + [USE_WATCHMAN=]) +GIT_CONF_SUBST([USE_WATCHMAN]) + ## Other checks. # Define USE_PIC if you need the main git objects to be built with -fPIC # in order to build and link perl/Git.so. x86-64 seems to need this. diff --git a/environment.c b/environment.c index 6dec9d0..35e03c7 100644 --- a/environment.c +++ b/environment.c @@ -94,6 +94,9 @@ int core_preload_index = 1; */ int ignore_untracked_cache_config; +int core_watchman_sync_timeout = 300; + + /* This is set by setup_git_dir_gently() and/or git_default_config() */ char *git_work_tree_cfg; static char *work_tree; diff --git a/watchman-support.c b/watchman-support.c new file mode 100644 index 000..08e37ae --- /dev/null +++ b/watchman-support.c @@ -0,0 +1,115 @@ +#include "cache.h" +#include "watchman-support.h" +#include "strbuf.h" +#include + +static struct watchman_query *make_query(const char *last_update) +{ + struct watchman_query *query = watchman_query(); + watchman_query_set_fields(query, WATCHMAN_FIELD_NAME | +WATCHMAN_FIELD_EXISTS | +WATCHMAN_FIELD_NEWER); + watchman_query_set_empty_on_fresh(query, 1); + query->sync_timeout = core_watchman_sync_timeout; + if (*last_update) + watchman_query_set_since_oclock(query, last_update); + return query; +} + +static struct watchman_query_result* query_watchman( + struct index_state *istate, struct watchman_connection *connection, + const char *fs_path, const char *last_update) +{ + struc
[PATCH 07/19] daemonize(): set a flag before exiting the main process
From: Nguyễn Thái Ngọc Duy This allows signal handlers and atexit functions to realize this situation and not clean up. Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/gc.c | 2 +- cache.h | 2 +- daemon.c | 2 +- setup.c | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index c583aad..37180de 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -385,7 +385,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) * failure to daemonize is ok, we'll continue * in foreground */ - daemonized = !daemonize(); + daemonized = !daemonize(NULL); } } else add_repack_all_option(); diff --git a/cache.h b/cache.h index 1d7e561..cc1f6f5 100644 --- a/cache.h +++ b/cache.h @@ -530,7 +530,7 @@ extern int set_git_dir_init(const char *git_dir, const char *real_git_dir, int); extern int init_db(const char *template_dir, unsigned int flags); extern void sanitize_stdfds(void); -extern int daemonize(void); +extern int daemonize(int *); #define alloc_nr(x) (((x)+16)*3/2) diff --git a/daemon.c b/daemon.c index 8d45c33..a5cf954 100644 --- a/daemon.c +++ b/daemon.c @@ -1365,7 +1365,7 @@ int main(int argc, char **argv) return execute(); if (detach) { - if (daemonize()) + if (daemonize(NULL)) die("--detach not supported on this platform"); } else sanitize_stdfds(); diff --git a/setup.c b/setup.c index de1a2a7..9adf13f 100644 --- a/setup.c +++ b/setup.c @@ -1017,7 +1017,7 @@ void sanitize_stdfds(void) close(fd); } -int daemonize(void) +int daemonize(int *daemonized) { #ifdef NO_POSIX_GOODIES errno = ENOSYS; @@ -1029,6 +1029,8 @@ int daemonize(void) case -1: die_errno("fork failed"); default: + if (daemonized) + *daemonized = 1; exit(0); } if (setsid() == -1) -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 13/19] index-helper: use watchman to avoid refreshing index with lstat()
From: Nguyễn Thái Ngọc Duy Watchman is hidden behind index-helper. Before git tries to read the index from shm, it notifies index-helper with SIGHUP and sleep, waiting for index-helper to prepare shm. index-helper then contacts watchman, updates 'WAMA' extension and put it in a separate shm and wakes git up with SIGHUP. Git uses this extension to not lstat unchanged entries. Git only trust 'WAMA' extension when it's received from the separate shm, not from disk. Unmarked entries are "clean". Marked entries are dirty from watchman point of view. If it finds out some entries are 'watchman-dirty', but are really unchanged (e.g. the file was changed, then reverted back), then Git will clear the marking in 'WAMA' before writing it down. Hiding watchman behind index-helper means you need both daemons. You can't run watchman alone. Not so good. But on the other hand, 'git' binary is not linked to watchman/json libraries, which is good for packaging. Core git package will run fine without watchman-related packages. If they need watchman, they can install git-index-helper and dependencies. Another reason for tying watchman to index-helper is, when used with untracked cache, we need to keep track of $GIT_WORK_TREE file listing. That kind of list can be kept in index-helper. Signed-off-by: Nguyễn Thái Ngọc Duy --- Makefile | 5 cache.h| 2 ++ index-helper.c | 84 +++--- read-cache.c | 43 +++--- 4 files changed, 127 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index e51331c..d79fc0c 100644 --- a/Makefile +++ b/Makefile @@ -450,6 +450,7 @@ MSGFMT = msgfmt CURL_CONFIG = curl-config PTHREAD_LIBS = -lpthread PTHREAD_CFLAGS = +WATCHMAN_LIBS = GCOV = gcov export TCL_PATH TCLTK_PATH @@ -1419,6 +1420,7 @@ endif ifdef USE_WATCHMAN LIB_H += watchman-support.h LIB_OBJS += watchman-support.o + WATCHMAN_LIBS = -lwatchman BASIC_CFLAGS += -DUSE_WATCHMAN endif @@ -2032,6 +2034,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS $(GITLIBS) $(VCSSVN_LIB) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) \ $(VCSSVN_LIB) +git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) $(WATCHMAN_LIBS) + $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY) $(QUIET_LNCP)$(RM) $@ && \ ln $< $@ 2>/dev/null || \ diff --git a/cache.h b/cache.h index bf20652..272c928 100644 --- a/cache.h +++ b/cache.h @@ -558,6 +558,7 @@ extern int daemonize(int *); /* Initialize and use the cache information */ struct lock_file; +extern int verify_index(const struct index_state *); extern int read_index(struct index_state *); extern int read_index_preload(struct index_state *, const struct pathspec *pathspec); extern int do_read_index(struct index_state *istate, const char *path, @@ -565,6 +566,7 @@ extern int do_read_index(struct index_state *istate, const char *path, extern int read_index_from(struct index_state *, const char *path); extern int is_index_unborn(struct index_state *); extern int read_index_unmerged(struct index_state *); +extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate); #define COMMIT_LOCK(1 << 0) #define CLOSE_LOCK (1 << 1) #define REFRESH_DAEMON (1 << 2) diff --git a/index-helper.c b/index-helper.c index cf26da7..7e7ce9b 100644 --- a/index-helper.c +++ b/index-helper.c @@ -5,15 +5,18 @@ #include "split-index.h" #include "shm.h" #include "lockfile.h" +#include "watchman-support.h" struct shm { unsigned char sha1[20]; void *shm; size_t size; + pid_t pid; }; static struct shm shm_index; static struct shm shm_base_index; +static struct shm shm_watchman; static int daemonized, to_verify = 1; static void release_index_shm(struct shm *is) @@ -25,10 +28,21 @@ static void release_index_shm(struct shm *is) is->shm = NULL; } +static void release_watchman_shm(struct shm *is) +{ + if (!is->shm) + return; + munmap(is->shm, is->size); + git_shm_unlink("git-watchman-%s-%" PRIuMAX, + sha1_to_hex(is->sha1), (uintmax_t)is->pid); + is->shm = NULL; +} + static void cleanup_shm(void) { release_index_shm(&shm_index); release_index_shm(&shm_base_index); + release_watchman_shm(&shm_watchman); } static void cleanup(void) @@ -120,13 +134,15 @@ static void share_the_index(void) if (the_index.split_index && the_index.split_index->base) share_index(the_index.split_index->base, &shm_base_index); share_index(&the_index, &shm_index); - if (to_verify && !verify_shm()) + if (to_verify && !verify_shm()) { cleanup_shm(); - discard_index(&the_index); + discard_index(&the_index
[PATCH 10/19] read-cache: add watchman 'WAMA' extension
From: Nguyễn Thái Ngọc Duy The extension contains a bitmap, one bit for each entry in the index. If the n-th bit is zero, the n-th entry is considered unchanged, we can ce_mark_uptodate() it without refreshing. If the bit is non-zero and we found out the corresponding file is clean after refresh, we can clear the bit. The 'skipping refresh' bit is not in this patch yet as we would need watchman. More details in later patches. Signed-off-by: Nguyễn Thái Ngọc Duy --- cache.h | 4 read-cache.c | 71 ++-- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index cc1f6f5..8f7b4b1 100644 --- a/cache.h +++ b/cache.h @@ -182,6 +182,8 @@ struct cache_entry { #define CE_VALID (0x8000) #define CE_STAGESHIFT 12 +#define CE_WATCHMAN_DIRTY (0x0001) + /* * Range 0x0FFF in ce_flags is divided into * two parts: in-memory flags and on-disk ones. @@ -320,6 +322,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define CACHE_TREE_CHANGED (1 << 5) #define SPLIT_INDEX_ORDERED(1 << 6) #define UNTRACKED_CHANGED (1 << 7) +#define WATCHMAN_CHANGED (1 << 8) struct split_index; struct untracked_cache; @@ -344,6 +347,7 @@ struct index_state { struct untracked_cache *untracked; void *mmap; size_t mmap_size; + char *last_update; }; extern struct index_state the_index; diff --git a/read-cache.c b/read-cache.c index 16fbdf6..85ef15b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -19,6 +19,7 @@ #include "split-index.h" #include "utf8.h" #include "shm.h" +#include "ewah/ewok.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -41,11 +42,13 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */ #define CACHE_EXT_LINK 0x6c696e6b/* "link" */ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ +#define CACHE_EXT_WATCHMAN 0x57414D41/* "WAMA" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \ -SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED) +SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | \ +WATCHMAN_CHANGED) struct index_state the_index; static const char *alternate_index_output; @@ -1224,8 +1227,13 @@ int refresh_index(struct index_state *istate, unsigned int flags, continue; new = refresh_cache_ent(istate, ce, options, &cache_errno, &changed); - if (new == ce) + if (new == ce) { + if (ce->ce_flags & CE_WATCHMAN_DIRTY) { + ce->ce_flags &= ~CE_WATCHMAN_DIRTY; + istate->cache_changed |= WATCHMAN_CHANGED; + } continue; + } if (!new) { const char *fmt; @@ -1369,6 +1377,48 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) return 0; } +static void mark_no_watchman(size_t pos, void *data) +{ + struct index_state *istate = data; + assert(pos < istate->cache_nr); + istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY; +} + +static int read_watchman_ext(struct index_state *istate, const void *data, +unsigned long sz) +{ + struct ewah_bitmap *bitmap; + int ret, len; + + if (memchr(data, 0, sz) == NULL) + return error("invalid extension"); + len = strlen(data) + 1; + bitmap = ewah_new(); + ret = ewah_read_mmap(bitmap, (const char *)data + len, sz - len); + if (ret != sz - len) { + ewah_free(bitmap); + return error("failed to parse ewah bitmap reading watchman index extension"); + } + istate->last_update = xstrdup(data); + ewah_each_bit(bitmap, mark_no_watchman, istate); + ewah_free(bitmap); + return 0; +} + +static void write_watchman_ext(struct strbuf *sb, struct index_state* istate) +{ + struct ewah_bitmap *bitmap; + int i; + + strbuf_add(sb, istate->last_update, strlen(istate->last_update) + 1); + bitmap = ewah_new(); + for (i = 0; i < istate->cache_nr; i++) + if (istate->cache[i]->ce_flags & CE_WATCHMAN_DIRTY) + ewah_set(bitmap, i); + ewah_serialize_strbuf(bitmap, sb); + ewah_free(bitmap); +} + static int read_index_extension(struct index_state *istate, const char *ext, void *data, unsigned long sz) { @@ -1386,6 +1436,11 @@ static int read_index_extension(struct index_state *istate, case CACHE_EXT
[PATCH 16/19] index-helper: rewrite pidfile after daemonizing
When we daemonize, our pid changes, so we need to rewrite the pid file. Signed-off-by: David Turner --- index-helper.c | 48 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/index-helper.c b/index-helper.c index 7e7ce9b..dc03e1e 100644 --- a/index-helper.c +++ b/index-helper.c @@ -288,12 +288,33 @@ static const char * const usage_text[] = { NULL }; -int main(int argc, char **argv) +static const char *write_pid(void) { + static struct strbuf sb = STRBUF_INIT; static struct lock_file lock; - struct strbuf sb = STRBUF_INIT; + int fd; + + strbuf_reset(&sb); + fd = hold_lock_file_for_update(&lock, + git_path("index-helper.pid"), + LOCK_DIE_ON_ERROR); +#ifdef GIT_WINDOWS_NATIVE + strbuf_addstr(&sb, "HWND"); +#elif defined(USE_WATCHMAN) + strbuf_addch(&sb, 'W'); /* see poke_daemon() */ +#endif + strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid()); + write_in_full(fd, sb.buf, sb.len); + commit_lock_file(&lock); + + return sb.buf; +} + +int main(int argc, char **argv) +{ const char *prefix; - int fd, idle_in_minutes = 10, detach = 0; + int idle_in_minutes = 10, detach = 0; + const char *pid_file; struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_minutes, N_("exit if not used after some minutes")), @@ -314,17 +335,7 @@ int main(int argc, char **argv) options, usage_text, 0)) die(_("too many arguments")); - fd = hold_lock_file_for_update(&lock, - git_path("index-helper.pid"), - LOCK_DIE_ON_ERROR); -#ifdef GIT_WINDOWS_NATIVE - strbuf_addstr(&sb, "HWND"); -#elif defined(USE_WATCHMAN) - strbuf_addch(&sb, 'W'); /* see poke_daemon() */ -#endif - strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid()); - write_in_full(fd, sb.buf, sb.len); - commit_lock_file(&lock); + write_pid(); atexit(cleanup); sigchain_push_common(cleanup_on_signal); @@ -332,9 +343,14 @@ int main(int argc, char **argv) if (detach && daemonize(&daemonized)) die_errno("unable to detach"); + /* +* Now that we're daemonized, we need to rewrite the PID file +* because we have a new PID. +*/ + pid_file = write_pid(); + if (!idle_in_minutes) idle_in_minutes = 0x / 60; - loop(sb.buf, idle_in_minutes * 60); - strbuf_release(&sb); + loop(pid_file, idle_in_minutes * 60); return 0; } -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/19] index-helper: add Windows support
From: Nguyễn Thái Ngọc Duy Windows supports shared memory, but the semantics is a bit different than POSIX shm. The most noticeable thing is there's no way to get the shared memory's size by the reader, and wrapping fstat to do that would be hell. So the shm size is added near the end, hidden away from shm users (storing it in headers would cause more problems with munmap, storing it as a separate shm is even worse). PostMessage is used instead of UNIX signals for notification. Lightweight (at least code-wise) on the client side. Signed-off-by: Nguyễn Thái Ngọc Duy --- config.mak.uname | 2 ++ index-helper.c | 48 read-cache.c | 13 shm.c| 96 4 files changed, 159 insertions(+) diff --git a/config.mak.uname b/config.mak.uname index b5108e1..49320c7 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -394,6 +394,7 @@ ifndef DEBUG else BASIC_CFLAGS += -Zi -MDd endif + PROGRAM_OBJS += index-helper.o X = .exe endif ifeq ($(uname_S),Interix) @@ -574,6 +575,7 @@ else NO_CURL = YesPlease endif endif + PROGRAM_OBJS += index-helper.o endif ifeq ($(uname_S),QNX) COMPAT_CFLAGS += -DSA_RESTART=0 diff --git a/index-helper.c b/index-helper.c index 4dd9656..cf26da7 100644 --- a/index-helper.c +++ b/index-helper.c @@ -155,6 +155,51 @@ static void loop(const char *pid_file, int idle_in_seconds) ; /* do nothing, all is handled by signal handlers already */ } +#elif defined(GIT_WINDOWS_NATIVE) + +static void loop(const char *pid_file, int idle_in_seconds) +{ + HWND hwnd; + UINT_PTR timer = 0; + MSG msg; + HINSTANCE hinst = GetModuleHandle(NULL); + WNDCLASS wc; + + /* +* Emulate UNIX signals by sending WM_USER+x to a +* window. Register window class and create a new window to +* catch these messages. +*/ + memset(&wc, 0, sizeof(wc)); + wc.lpfnWndProc = DefWindowProc; + wc.hInstance = hinst; + wc.lpszClassName = "git-index-helper"; + if (!RegisterClass(&wc)) + die_errno(_("could not register new window class")); + + hwnd = CreateWindow("git-index-helper", pid_file, + 0, 0, 0, 1, 1, NULL, NULL, hinst, NULL); + if (!hwnd) + die_errno(_("could not register new window")); + + refresh(0); + while (1) { + timer = SetTimer(hwnd, timer, idle_in_seconds * 1000, NULL); + if (!timer) + die(_("no timer!")); + if (!GetMessage(&msg, hwnd, 0, 0) || msg.message == WM_TIMER) + break; + switch (msg.message) { + case WM_USER: + refresh(0); + break; + default: + /* just reset the timer */ + break; + } + } +} + #else static void loop(const char *pid_file, int idle_in_seconds) @@ -198,6 +243,9 @@ int main(int argc, char **argv) fd = hold_lock_file_for_update(&lock, git_path("index-helper.pid"), LOCK_DIE_ON_ERROR); +#ifdef GIT_WINDOWS_NATIVE + strbuf_addstr(&sb, "HWND"); +#endif strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid()); write_in_full(fd, sb.buf, sb.len); commit_lock_file(&lock); diff --git a/read-cache.c b/read-cache.c index 1a0ab0c..16fbdf6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1546,6 +1546,18 @@ static void post_read_index_from(struct index_state *istate) tweak_untracked_cache(istate); } +#if defined(GIT_WINDOWS_NATIVE) +static void do_poke(struct strbuf *sb, int refresh_cache) +{ + HWND hwnd; + if (!starts_with(sb->buf, "HWND")) + return; + hwnd = FindWindow("git-index-helper", sb->buf); + if (!hwnd) + return; + PostMessage(hwnd, refresh_cache ? WM_USER : WM_USER + 1, 0, 0); +} +#else static void do_poke(struct strbuf *sb, int refresh_cache) { char*start = sb->buf; @@ -1555,6 +1567,7 @@ static void do_poke(struct strbuf *sb, int refresh_cache) return; kill(pid, refresh_cache ? SIGHUP : SIGUSR1); } +#endif static void poke_daemon(struct index_state *istate, const struct stat *st, int refresh_cache) diff --git a/shm.c b/shm.c index 4ec1a00..04d8a35 100644 --- a/shm.c +++ b/shm.c @@ -52,6 +52,102 @@ void git_shm_unlink(const char *fmt, ...) shm_unlink(path); } +#elif defined(GIT_WINDOWS_NATIVE) + +#define SHM_PATH_LEN 82/* a little bit longer than POSIX because of "Local\\" */ + +static ssize_t create_shm_map(int oflag, int perm, ssize_t length, + void **mmap, int prot, int flags, + co
[PATCH 08/19] index-helper: add --detach
From: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git-index-helper.txt | 3 +++ index-helper.c | 10 -- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index ad40366..9ced091 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -31,6 +31,9 @@ OPTIONS for reading an index, but because it will happen in the background, it's not noticable. `--strict` is enabled by default. +--detach:: + Detach from the shell. + NOTES - On UNIX-like systems, $GIT_DIR/index-helper.pid contains the process diff --git a/index-helper.c b/index-helper.c index 1140bc0..4dd9656 100644 --- a/index-helper.c +++ b/index-helper.c @@ -14,7 +14,7 @@ struct shm { static struct shm shm_index; static struct shm shm_base_index; -static int to_verify = 1; +static int daemonized, to_verify = 1; static void release_index_shm(struct shm *is) { @@ -33,6 +33,8 @@ static void cleanup_shm(void) static void cleanup(void) { + if (daemonized) + return; unlink(git_path("index-helper.pid")); cleanup_shm(); } @@ -172,12 +174,13 @@ int main(int argc, char **argv) static struct lock_file lock; struct strbuf sb = STRBUF_INIT; const char *prefix; - int fd, idle_in_minutes = 10; + int fd, idle_in_minutes = 10, detach = 0; struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_minutes, N_("exit if not used after some minutes")), OPT_BOOL(0, "strict", &to_verify, "verify shared memory after creating"), + OPT_BOOL(0, "detach", &detach, "detach the process"), OPT_END() }; @@ -202,6 +205,9 @@ int main(int argc, char **argv) atexit(cleanup); sigchain_push_common(cleanup_on_signal); + if (detach && daemonize(&daemonized)) + die_errno("unable to detach"); + if (!idle_in_minutes) idle_in_minutes = 0x / 60; loop(sb.buf, idle_in_minutes * 60); -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/19] index-helper: new daemon for caching index and related stuff
From: Nguyễn Thái Ngọc Duy Instead of reading the index from disk and worrying about disk corruption, the index is cached in memory (memory bit-flips happen too, but hopefully less often). The result is faster read. Read time is reduced by 70%. The biggest gain is not having to verify the trailing SHA-1, which takes lots of time especially on large index files. But this also opens doors for further optimiztions: - we could create an in-memory format that's essentially the memory dump of the index to eliminate most of parsing/allocation overhead. The mmap'd memory can be used straight away. Experiment [1] shows we could reduce read time by 88%. - we could cache non-index info such as name hash The shared memory's name folows the template "git--" where is the trailing SHA-1 of the index file. is "index" for cached index files (and may be "name-hash" for name-hash cache). If such shared memory exists, it contains the same index content as on disk. The content is already validated by the daemon and git won't validate it again (except comparing the trailing SHA-1s). Git can poke the daemon to tell it to refresh the index cache, or to keep it alive some more minutes via UNIX signals. It can't give any real index data directly to the daemon. Real data goes to disk first, then the daemon reads and verifies it from there. Poking only happens for $GIT_DIR/index, not temporary index files. $GIT_DIR/index-helper.pid contains a reference to daemon process (and it's pid on *nix). The file's mtime is updated every time it's accessed (or should be updated often enough). Old index-helper.pid is considered stale and ignored. index-helper requires POSIX realtime extension. POSIX shm interface however is abstracted away so that Windows support can be added later. On webkit.git with index format v2, duplicating 8 times to 1.4m entries and 200MB in size: (vanilla) 0.986986364 s: read_index_from .git/index (index-helper) 0.267850279 s: read_index_from .git/index Interestingly with index v4, we get less out of index-helper. It makes sense as v4 requires more processing after loading the index: (vanilla) 0.72249 s: read_index_from .git/index (index-helper) 0.302741500 s: read_index_from .git/index Signed-off-by: Nguyễn Thái Ngọc Duy --- .gitignore | 1 + Documentation/git-index-helper.txt | 44 ++ Makefile | 9 +++ cache.h| 3 + config.mak.uname | 1 + git-compat-util.h | 1 + index-helper.c | 162 + read-cache.c | 106 +--- shm.c | 67 +++ shm.h | 23 ++ 10 files changed, 408 insertions(+), 9 deletions(-) create mode 100644 Documentation/git-index-helper.txt create mode 100644 index-helper.c create mode 100644 shm.c create mode 100644 shm.h diff --git a/.gitignore b/.gitignore index 5087ce1..b92f122 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ /git-http-fetch /git-http-push /git-imap-send +/git-index-helper /git-index-pack /git-init /git-init-db diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt new file mode 100644 index 000..9db28cf --- /dev/null +++ b/Documentation/git-index-helper.txt @@ -0,0 +1,44 @@ +git-index-helper(1) +=== + +NAME + +git-index-helper - A simple cache daemon for speeding up index file access + +SYNOPSIS + +[verse] +'git index-helper' [options] + +DESCRIPTION +--- +Keep the index file in memory for faster access. This daemon is per +repository. + +OPTIONS +--- + +--exit-after=:: + Exit if the cached index is not accessed for `` + minutes. Specify 0 to wait forever. Default is 10. + +NOTES +- +On UNIX-like systems, $GIT_DIR/index-helper.pid contains the process +id of the daemon. At least on Linux, shared memory objects are +availble via /dev/shm with the name pattern "git--". +Normally the daemon will clean up shared memory objects when it exits. +But if it crashes, some objects could remain there and they can be +safely deleted with "rm" command. The following signals are used to +control the daemon: + +SIGHUP:: + Reread the index. + +SIGUSR1:: + Let the daemon know the index is to be read. It keeps the + daemon alive longer, unless `--exit-after=0` is used. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 24bef8d..a6c668b 100644 --- a/Makefile +++ b/Makefile @@ -367,6 +367,8 @@ all:: # Define HAVE_BSD_SYSCTL if your platform has a BSD-compatible sysctl function. # # Define HAVE_GETDELIM if your system has the getdelim() function. +# +# Define HAVE_SHM if you platform support shm_* functions in librt. GIT-VERSION-FILE: FORCE @$(SHELL_PATH) ./GIT-VERSION-GEN @@ -802,6 +804,7 @@ LI
[PATCH 06/19] index-helper: add --strict
From: Nguyễn Thái Ngọc Duy There's are "holes" in the index-helper approach because the shared memory is not verified again by git. If $USER is compromised, shared memory could be modified. But then they can already modify $GIT_DIR/index. A more realistic risk is some bugs in index-helper produce corrupt shared memory. --strict is added to avoid that Strictly speaking there's still a very small gap where corrupt shared memory could still be read by git: after we write the trailing SHA-1 in the shared memory (thus signaling "this shm is ready") and before verify_shm() detects an error. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git-index-helper.txt | 9 +++ cache.h| 1 + index-helper.c | 48 ++ read-cache.c | 9 --- 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt index 9db28cf..ad40366 100644 --- a/Documentation/git-index-helper.txt +++ b/Documentation/git-index-helper.txt @@ -22,6 +22,15 @@ OPTIONS Exit if the cached index is not accessed for `` minutes. Specify 0 to wait forever. Default is 10. +--strict:: +--no-strict:: + Strict mode makes index-helper verify the shared memory after + it's created. If the result does not match what's read from + $GIT_DIR/index, the shared memory is destroyed. This makes + index-helper take more than double the amount of time required + for reading an index, but because it will happen in the + background, it's not noticable. `--strict` is enabled by default. + NOTES - On UNIX-like systems, $GIT_DIR/index-helper.pid contains the process diff --git a/cache.h b/cache.h index e22296c..1d7e561 100644 --- a/cache.h +++ b/cache.h @@ -336,6 +336,7 @@ struct index_state { keep_mmap : 1, from_shm : 1, to_shm : 1, +always_verify_trailing_sha1 : 1, initialized : 1; struct hashmap name_hash; struct hashmap dir_hash; diff --git a/index-helper.c b/index-helper.c index cf2971d..1140bc0 100644 --- a/index-helper.c +++ b/index-helper.c @@ -14,6 +14,7 @@ struct shm { static struct shm shm_index; static struct shm shm_base_index; +static int to_verify = 1; static void release_index_shm(struct shm *is) { @@ -69,11 +70,56 @@ static void share_index(struct index_state *istate, struct shm *is) hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, is->sha1); } +static int verify_shm(void) +{ + int i; + struct index_state istate; + memset(&istate, 0, sizeof(istate)); + istate.always_verify_trailing_sha1 = 1; + istate.to_shm = 1; + i = read_index(&istate); + if (i != the_index.cache_nr) + goto done; + for (; i < the_index.cache_nr; i++) { + struct cache_entry *base, *ce; + /* namelen is checked separately */ + const unsigned int ondisk_flags = + CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS; + unsigned int ce_flags, base_flags, ret; + base = the_index.cache[i]; + ce = istate.cache[i]; + if (ce->ce_namelen != base->ce_namelen || + strcmp(ce->name, base->name)) { + warning("mismatch at entry %d", i); + break; + } + ce_flags = ce->ce_flags; + base_flags = base->ce_flags; + /* only on-disk flags matter */ + ce->ce_flags &= ondisk_flags; + base->ce_flags &= ondisk_flags; + ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data, +offsetof(struct cache_entry, name) - +offsetof(struct cache_entry, ce_stat_data)); + ce->ce_flags = ce_flags; + base->ce_flags = base_flags; + if (ret) { + warning("mismatch at entry %d", i); + break; + } + } +done: + discard_index(&istate); + return i == the_index.cache_nr; +} + static void share_the_index(void) { if (the_index.split_index && the_index.split_index->base) share_index(the_index.split_index->base, &shm_base_index); share_index(&the_index, &shm_index); + if (to_verify && !verify_shm()) + cleanup_shm(); discard_index(&the_index); } @@ -130,6 +176,8 @@ int main(int argc, char **argv) struct option options[] = { OPT_INTEGER(0, "exit-after", &idle_in_minutes, N_("exit if not used after some minutes")), + OPT_BOOL(0, "strict", &to_verify, +"verify shared memory after creating"), OPT_END()
Re: [PATCH 0/10] cleaning up check_repository_format_gently
On Tue, 2016-03-01 at 09:35 -0500, Jeff King wrote: > After the discussion in: > > http://thread.gmane.org/gmane.comp.version-control.git/287949/focus > =288002 > > I came up with this series to try to address some of the warts in > check_repository_format_gently(). > > I had hoped to come up with something very small that could go at the > front of the refs-backend series, but as with any time I look at the > setup code, it managed to snowball into a potpourri of hidden > assumptions. > > So I hope David isn't _too_ irritated to see this in his inbox. I commented on two of the patches; the rest look good to me. I rebased on these (minus the repo_version_string change from 10/10). Wasn't too bad. And it did clean up the submodule check. I will wait to resend my series until I hear back about your shortlog/mailmap fix and your suggestion on a better archive --remote fix. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 01/33] setup: call setup_git_directory_gently before accessing refs
On Tue, 2016-03-01 at 18:47 -0500, David Turner wrote: > On Tue, 2016-03-01 at 03:35 -0500, Jeff King wrote: > > On Mon, Feb 29, 2016 at 07:52:34PM -0500, David Turner wrote: > > > > > Usually, git calls some form of setup_git_directory at startup. > > > But > > > sometimes, it doesn't. Usually, that's OK because it's not > > > really > > > using the repository. But in some cases, it is using the repo. > > > In > > > those cases, either setup_git_directory_gently must be called, or > > > the > > > repository (e.g. the refs) must not be accessed. > > > > It's actually not just setup_git_directory(). We can also use > > check_repository_format(), which is used by enter_repo() (and hence > > by > > things like upload-pack). I think the rule really ought to be: if > > we > > didn't have check_repository_format_gently() tell us we have a > > valid > > repo, we should not access any repo elements (refs, objects, etc). > > I'll change that commit message to say > "check_repository_format_gently". > > > > diff --git a/builtin/grep.c b/builtin/grep. > > [snip: this is a probably-good behavior change] > > Agreed. > > > My fix for this was to teach read_mailmap to avoid looking for > > HEAD:.mailmap if we are not in a repository, but to continue with > > the > > others (.mailmap in the cwd, and the mailmap.file config variable). > > ... > > But I do think your patch is a potential regression there, if > > anybody > > does do that. > > Your version sounds better. But I don't see it in the patch set you > sent earlier? > > > > diff --git a/git.c b/git.c > > > index 6cc0c07..51e0508 100644 > > > --- a/git.c > > > +++ b/git.c > > > @@ -376,7 +376,7 @@ static struct cmd_struct commands[] = { > > > { "am", cmd_am, RUN_SETUP | NEED_WORK_TREE }, > > > { "annotate", cmd_annotate, RUN_SETUP }, > > > { "apply", cmd_apply, RUN_SETUP_GENTLY }, > > > - { "archive", cmd_archive }, > > > + { "archive", cmd_archive, RUN_SETUP_GENTLY }, > > > { "bisect--helper", cmd_bisect__helper, RUN_SETUP }, > > > { "blame", cmd_blame, RUN_SETUP }, > > > { "branch", cmd_branch, RUN_SETUP }, > > > > I didn't have to touch this case in my experimenting. I wonder if > > it's > > because I resolved the "grep" case a little differently. > > > > I taught get_ref_cache() to only assert() that we have a repository > > when > > we are looking at the main ref-cache, not a submodule. In theory, > > we > > can > > look at a submodule from inside an outer non-repo (it's not really > > a > > submodule then, but just a plain git dir). I don't think there's > > anything in git right now that says you can't do so, though I think > > your > > refs-backend work does introduce that restriction (because it > > actually > > requires the submodules to use the same backend). > > > > So with that requirement, I think we do need to require a repo even > > to > > access submodule refs. Is that what triggered this change? > > No. What triggered this change was a test failure with your earlier > patch on master -- none of my stuff at all. The failing command was: > > git archive --remote=. HEAD > > When writing my patch, I had assumed that the issue was the > resolve_ref > on the HEAD that's an argument -- but it's not. The actual traceback > is: > > #0 die ( > err=err@entry=0x57ddb0 "BUG: resolve_ref called without > initializing repo") at usage.c:99 > #1 0x004f7ed9 in resolve_ref_1 (sb_refname=0x7c4a50 > , > sb_contents=0x7fffcfc0, sb_path=0x7fffcfe0, > flags=0x7fffdaaa, > sha1=0x7fffd100 "\b\326\377\377\377\177", > resolve_flags=5572384, > refname=0x2 ) > at refs/files-backend.c:1429 > #2 resolve_ref_unsafe (refname=refname@entry=0x550b3b "HEAD", > resolve_flags=resolve_flags@entry=0, > sha1=sha1@entry=0x7fffd100 "\b\326\377\377\377\177", > flags=flags@entry=0x7fffd0fc) at refs/files-backend.c:1600 > #3 0x004ffe69 in read_config () at remote.c:471 > #4 0x00500235 in read_config () at remote.c:705 > #5 remote_get_1 (name=0x7fffdaaa ".", > get_default=get_default@entry=0x4fe230 ) > at remote.c:688 > #6 0x005004ca in remote
Re: [PATCH 10/10] setup: drop GIT_REPO_VERSION constants
On Tue, 2016-03-01 at 09:45 -0500, Jeff King wrote: > - char repo_version_string[10]; > > /* This forces creation of new config file */ > - xsnprintf(repo_version_string, sizeof(repo_version_string), > - "%d", GIT_REPO_VERSION); > - git_config_set("core.repositoryformatversion", > repo_version_string); > + git_config_set("core.repositoryformatversion", "0"); I'm about to use that in my series, so let's not get rid of it here. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 01/33] setup: call setup_git_directory_gently before accessing refs
On Tue, 2016-03-01 at 03:35 -0500, Jeff King wrote: > On Mon, Feb 29, 2016 at 07:52:34PM -0500, David Turner wrote: > > > Usually, git calls some form of setup_git_directory at startup. > > But > > sometimes, it doesn't. Usually, that's OK because it's not really > > using the repository. But in some cases, it is using the repo. In > > those cases, either setup_git_directory_gently must be called, or > > the > > repository (e.g. the refs) must not be accessed. > > It's actually not just setup_git_directory(). We can also use > check_repository_format(), which is used by enter_repo() (and hence > by > things like upload-pack). I think the rule really ought to be: if we > didn't have check_repository_format_gently() tell us we have a valid > repo, we should not access any repo elements (refs, objects, etc). I'll change that commit message to say "check_repository_format_gently". > > diff --git a/builtin/grep.c b/builtin/grep. > [snip: this is a probably-good behavior change] Agreed. > My fix for this was to teach read_mailmap to avoid looking for > HEAD:.mailmap if we are not in a repository, but to continue with the > others (.mailmap in the cwd, and the mailmap.file config variable). > ... > But I do think your patch is a potential regression there, if anybody > does do that. Your version sounds better. But I don't see it in the patch set you sent earlier? > > diff --git a/git.c b/git.c > > index 6cc0c07..51e0508 100644 > > --- a/git.c > > +++ b/git.c > > @@ -376,7 +376,7 @@ static struct cmd_struct commands[] = { > > { "am", cmd_am, RUN_SETUP | NEED_WORK_TREE }, > > { "annotate", cmd_annotate, RUN_SETUP }, > > { "apply", cmd_apply, RUN_SETUP_GENTLY }, > > - { "archive", cmd_archive }, > > + { "archive", cmd_archive, RUN_SETUP_GENTLY }, > > { "bisect--helper", cmd_bisect__helper, RUN_SETUP }, > > { "blame", cmd_blame, RUN_SETUP }, > > { "branch", cmd_branch, RUN_SETUP }, > > I didn't have to touch this case in my experimenting. I wonder if > it's > because I resolved the "grep" case a little differently. > > I taught get_ref_cache() to only assert() that we have a repository > when > we are looking at the main ref-cache, not a submodule. In theory, we > can > look at a submodule from inside an outer non-repo (it's not really a > submodule then, but just a plain git dir). I don't think there's > anything in git right now that says you can't do so, though I think > your > refs-backend work does introduce that restriction (because it > actually > requires the submodules to use the same backend). > > So with that requirement, I think we do need to require a repo even > to > access submodule refs. Is that what triggered this change? No. What triggered this change was a test failure with your earlier patch on master -- none of my stuff at all. The failing command was: git archive --remote=. HEAD When writing my patch, I had assumed that the issue was the resolve_ref on the HEAD that's an argument -- but it's not. The actual traceback is: #0 die ( err=err@entry=0x57ddb0 "BUG: resolve_ref called without initializing repo") at usage.c:99 #1 0x004f7ed9 in resolve_ref_1 (sb_refname=0x7c4a50 , sb_contents=0x7fffcfc0, sb_path=0x7fffcfe0, flags=0x7fffdaaa, sha1=0x7fffd100 "\b\326\377\377\377\177", resolve_flags=5572384, refname=0x2 ) at refs/files-backend.c:1429 #2 resolve_ref_unsafe (refname=refname@entry=0x550b3b "HEAD", resolve_flags=resolve_flags@entry=0, sha1=sha1@entry=0x7fffd100 "\b\326\377\377\377\177", flags=flags@entry=0x7fffd0fc) at refs/files-backend.c:1600 #3 0x004ffe69 in read_config () at remote.c:471 #4 0x00500235 in read_config () at remote.c:705 #5 remote_get_1 (name=0x7fffdaaa ".", get_default=get_default@entry=0x4fe230 ) at remote.c:688 #6 0x005004ca in remote_get (name=) at remote.c:713 #7 0x004159d8 in run_remote_archiver (name_hint=0x0, exec=0x550720 "git-upload-archive", remote=, argv=0x7fffd608, argc=2) at builtin/archive.c:35 #8 cmd_archive (argc=2, argv=0x7fffd608, prefix=0x0) at builtin/archive.c:104 #9 0x00406051 in run_builtin (argv=0x7fffd608, argc=3, p=0x7bd7a0 ) at git.c:357 #10 handle_builtin (argc=3, argv=0x7fffd608) at git.c:540 #11 0x0040519a in main (argc=3, av=) at git.c:671 > I'd think you would need a matching line inside cmd_archive, too. It > should allow "--remote" wit
Re: [PATCH 06/10] setup: refactor repo format reading and verification
On Tue, 2016-03-01 at 09:42 -0500, Jeff King wrote: > +/* > + * Read the repository format characteristics from the config file > "path". If > + * the version cannot be extracted from the file for any reason, the > version > + * field will be set to -1, and all other fields are undefined. > + */ > +void read_repository_format(struct repository_format *format, const > char *path); Generally speaking, I don't care for this interface; I would prefer to return -1 and not change the struct. But I see why it's maybe simpler in this one case. +/* > + * Internally, we need to swap out "fn" here, but we don't want to > expose > + * that to the world. Hence a wrapper around this internal function. > + */ I don't understand this comment. fn is not being swapped out -- it's being passed on directly. > +static void read_repository_format_1(struct repository_format > *format, > + config_fn_t fn, const char > *path) The argument order here is different from git_config_from_file -- is that for a reason? > + if (format->version >= 1 && format->unknown_extensions.nr) { > + int i; > + > + for (i = 0; i < format->unknown_extensions.nr; i++) > + strbuf_addf(err, "unknown repository > extension: %s", > + format > ->unknown_extensions.items[i].string); newline here or something? Otherwise multiple unknown extensions will get jammed together. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 29/33] setup: configure ref storage on setup
On Tue, 2016-03-01 at 17:18 +, Ramsay Jones wrote: > > On 01/03/16 00:53, David Turner wrote: > > This sets up the existing backend early, so that other code which > > reads refs is reading from the right place. > > > > Signed-off-by: David Turner > > Signed-off-by: Junio C Hamano > > --- > > config.c | 1 + > > setup.c | 4 > > 2 files changed, 5 insertions(+) > > > > diff --git a/config.c b/config.c > > index 9ba40bc..cca7e28 100644 > > --- a/config.c > > +++ b/config.c > > @@ -11,6 +11,7 @@ > > #include "strbuf.h" > > #include "quote.h" > > #include "hashmap.h" > > +#include "refs.h" > > #include "string-list.h" > > #include "utf8.h" > > > > I was just skimming these patches as they passed by, and this > caught my eye. If this include is required (eg. to fix a compiler > warning), then it should probably be in an earlier patch. > Otherwise, it should be in a later patch, no? Actually, it's cruft from the previous version of this series :(. I looked at the patch and didn't notice that it was in config.c instead of setup.c. Oops. Will remove. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 30/33] refs: break out resolve_ref_unsafe_submodule
On Tue, 2016-03-01 at 17:21 +, Ramsay Jones wrote: > > On 01/03/16 00:53, David Turner wrote: > > It will soon be useful for resolve_ref_unsafe to support > > submodules. > > But since it is called from so many places, changing it would have > > been painful. Fortunately, it's just a thin wrapper around (the > > former) resolve_ref_1. So now resolve_ref_1 becomes > > resolve_ref_unsafe_submodule, and it passes its submodule argument > > through to read_raw_ref. > > > > The files backend doesn't need this functionality, but it doesn't > > hurt. > > > > Signed-off-by: David Turner > > Signed-off-by: Junio C Hamano > > --- > > refs.c | 41 +--- > > - > > refs/files-backend.c | 8 ++-- > > refs/refs-internal.h | 19 --- > > 3 files changed, 47 insertions(+), 21 deletions(-) > > > > diff --git a/refs.c b/refs.c > > index 5fe0bac..d1cf707 100644 > > --- a/refs.c > > +++ b/refs.c > > @@ -60,6 +60,9 @@ void register_ref_storage_backends(void) > > * entries below when you add a new backend. > > */ > > register_ref_storage_backend(&refs_be_files); > > +#ifdef USE_LIBLMDB > > + register_ref_storage_backend(&refs_be_lmdb); > > +#endif > > Again, just skimming patches, ... > > The lmdb refs backend (hence refs_be_lmdb) is not introduced until > the next patch [31/33], right? Yep. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 31/33] refs: add LMDB refs storage backend
On Tue, 2016-03-01 at 08:31 +0700, Duy Nguyen wrote: > On Tue, Mar 1, 2016 at 7:53 AM, David Turner < > dtur...@twopensource.com> wrote: > > +Weaknesses: > > +--- > > + > > +The reflog format is somewhat inefficient: a binary format could > > store > > +reflog date/time information in somewhat less space. > > This raises an interesting question. What if we want to change lmdb > format in future (e.g. to address this weakness)? Do we need some > sort > of versioning in lmdb database? I suppose you can always add "lmdb2" > backend that shares most of the code with current lmdb backend. Not > sure if that's what you had in mind though. The weakness of versioning inside the LMDB database is that in order to check the version, you need to already have LMDB. That's the argument for "lmdb2". I had sort of hoped that this version would be "good enough" that we could avoid modifying it. So maybe that means we ought to address the reflog format. But I'm not sure that it's that big a deal. What do you think? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v7 09/33] refs: reduce the visibility of do_for_each_ref()
From: Ramsay Jones Now that we have moved do_for_each_ref into refs.c, it no longer needs to be public. Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- refs.c | 19 +++ refs/refs-internal.h | 6 -- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/refs.c b/refs.c index dc5682a..cea5997 100644 --- a/refs.c +++ b/refs.c @@ -1142,6 +1142,17 @@ int head_ref(each_ref_fn fn, void *cb_data) return head_ref_submodule(NULL, fn, cb_data); } +/* + * The common backend for the for_each_*ref* functions + */ +static int do_for_each_ref(const char *submodule, const char *base, + each_ref_fn fn, int trim, int flags, + void *cb_data) +{ + return the_refs_backend->do_for_each_ref(submodule, base, fn, trim, +flags, cb_data); +} + int for_each_ref(each_ref_fn fn, void *cb_data) { return do_for_each_ref(NULL, "", fn, 0, 0, cb_data); @@ -1342,11 +1353,3 @@ int resolve_gitlink_ref(const char *path, const char *refname, { return the_refs_backend->resolve_gitlink_ref(path, refname, sha1); } - -int do_for_each_ref(const char *submodule, const char *base, - each_ref_fn fn, int trim, int flags, - void *cb_data) -{ - return the_refs_backend->do_for_each_ref(submodule, base, fn, trim, -flags, cb_data); -} diff --git a/refs/refs-internal.h b/refs/refs-internal.h index c9b6745..3702737 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -203,12 +203,6 @@ int rename_ref_available(const char *oldname, const char *newname); /* Include broken references in a do_for_each_ref*() iteration: */ #define DO_FOR_EACH_INCLUDE_BROKEN 0x01 -/* - * The common backend for the for_each_*ref* functions - */ -int do_for_each_ref(const char *submodule, const char *base, - each_ref_fn fn, int trim, int flags, void *cb_data); - /* refs backends */ typedef int ref_transaction_commit_fn(struct ref_transaction *transaction, struct strbuf *err); -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v7 28/33] refs: register ref storage backends
Add new function register_ref_storage_backends(). This new function registers all known ref storage backends... once there are any other than the default. For now, it just registers the files backend. Call the function before calling set_ref_storage_backend. Signed-off-by: David Turner Signed-off-by: Junio C Hamano --- builtin/init-db.c | 3 +++ refs.c| 22 ++ refs.h| 2 ++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/builtin/init-db.c b/builtin/init-db.c index af7fe04..04cc522 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -220,6 +220,7 @@ static int create_default_files(const char *template_path) requested_ref_storage_backend); } + register_ref_storage_backends(); if (requested_ref_storage_backend) ref_storage_backend = requested_ref_storage_backend; if (strcmp(ref_storage_backend, "files")) { @@ -502,6 +503,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, init_db_options, init_db_usage, 0); + register_ref_storage_backends(); + if (requested_ref_storage_backend && !ref_storage_backend_exists(requested_ref_storage_backend)) die(_("Unknown ref storage backend %s"), diff --git a/refs.c b/refs.c index 5d69179..5fe0bac 100644 --- a/refs.c +++ b/refs.c @@ -14,14 +14,11 @@ static const char split_transaction_fail_warning[] = N_( "transaction succeeded, but then the update to the per-worktree refs " "failed. Your repository may be in an inconsistent state."); -/* - * We always have a files backend and it is the default. - */ static struct ref_storage_be *the_refs_backend = &refs_be_files; /* * List of all available backends */ -static struct ref_storage_be *refs_backends = &refs_be_files; +static struct ref_storage_be *refs_backends; const char *ref_storage_backend = "files"; @@ -48,6 +45,23 @@ int ref_storage_backend_exists(const char *name) return find_ref_storage_backend(name) != NULL; } +static void register_ref_storage_backend(struct ref_storage_be *be) +{ + be->next = refs_backends; + refs_backends = be; +} + +void register_ref_storage_backends(void) +{ + if (refs_backends) + return; + /* +* Add register_ref_storage_backend(ptr-to-backend) +* entries below when you add a new backend. +*/ + register_ref_storage_backend(&refs_be_files); +} + /* * How to handle various characters in refnames: * 0: An acceptable character for refs diff --git a/refs.h b/refs.h index 1888ad8..c0964f5 100644 --- a/refs.h +++ b/refs.h @@ -523,4 +523,6 @@ int set_ref_storage_backend(const char *name); int ref_storage_backend_exists(const char *name); +void register_ref_storage_backends(void); + #endif /* REFS_H */ -- 2.4.2.767.g62658d5-twtrsrc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v7 33/33] tests: add ref-storage argument
Add a --ref-storage argument to make it easy to test alternate ref storage backends. This means that every test that calls git init or git clone must use the new ref storage argument. Modify many tests to work under alternate ref storage backends. Introduce abstractions for raw ref/reflog reading/writing in tests instead of directly frobbing the filesystem. Conditionally skip tests that are not expected to succeed under this condition. Most of this is straightforward. Of particular note are the following test changes: * The rearrangement of commands in t1401 is because without HEAD in the right place, git doesn't recognized the trash dir as a git repo, so no git commands work. * t1430-bad-ref-name specifically blocks lmdb because other alternate backends might want to keep this test. Signed-off-by: David Turner Signed-off-by: Junio C Hamano --- t/README | 6 +++ t/lib-submodule-update.sh| 15 +-- t/lib-t6000.sh | 7 ++- t/t0008-ignores.sh | 2 +- t/t0062-revision-walking.sh | 6 +++ t/t1021-rerere-in-workdir.sh | 6 +++ t/t1200-tutorial.sh | 8 +++- t/t1302-repo-version.sh | 6 +++ t/t1305-config-include.sh| 17 ++-- t/t1400-update-ref.sh| 6 +++ t/t1401-symbolic-ref.sh | 17 +--- t/t1404-update-ref-df-conflicts.sh | 8 +++- t/t1410-reflog.sh| 6 +++ t/t1430-bad-ref-name.sh | 6 +++ t/t1450-fsck.sh | 12 ++--- t/t1506-rev-parse-diagnosis.sh | 4 +- t/t2013-checkout-submodule.sh| 2 +- t/t2105-update-index-gitfile.sh | 4 +- t/t2107-update-index-basic.sh| 6 +-- t/t2201-add-update-typechange.sh | 4 +- t/t3001-ls-files-others-exclude.sh | 2 +- t/t3010-ls-files-killed-modified.sh | 4 +- t/t3040-subprojects-basic.sh | 4 +- t/t3050-subprojects-fetch.sh | 2 +- t/t3200-branch.sh| 75 ++-- t/t3210-pack-refs.sh | 7 +++ t/t3211-peel-ref.sh | 6 +++ t/t3308-notes-merge.sh | 2 +- t/t3404-rebase-interactive.sh| 2 +- t/t3600-rm.sh| 2 +- t/t3800-mktag.sh | 4 +- t/t3903-stash.sh | 2 +- t/t4010-diff-pathspec.sh | 2 +- t/t4020-diff-external.sh | 2 +- t/t4027-diff-submodule.sh| 2 +- t/t4035-diff-quiet.sh| 2 +- t/t4255-am-submodule.sh | 2 +- t/t5000-tar-tree.sh | 3 +- t/t5304-prune.sh | 2 +- t/t5312-prune-corruption.sh | 11 - t/t5500-fetch-pack.sh| 10 ++--- t/t5510-fetch.sh | 30 ++--- t/t5526-fetch-submodules.sh | 4 +- t/t5527-fetch-odd-refs.sh| 7 +++ t/t5537-fetch-shallow.sh | 7 +++ t/t5700-clone-reference.sh | 42 +- t/t6001-rev-list-graft.sh| 3 +- t/t6010-merge-base.sh| 2 +- t/t6050-replace.sh | 4 +- t/t6120-describe.sh | 6 ++- t/t6301-for-each-ref-errors.sh | 12 ++--- t/t7201-co.sh| 2 +- t/t7300-clean.sh | 25 ++- t/t7400-submodule-basic.sh | 22 +- t/t7402-submodule-rebase.sh | 2 +- t/t7405-submodule-merge.sh | 10 ++--- t/t9104-git-svn-follow-parent.sh | 3 +- t/t9115-git-svn-dcommit-funky-renames.sh | 2 +- t/t9350-fast-export.sh | 6 +-- t/t9902-completion.sh| 4 +- t/t9903-bash-prompt.sh | 2 +- t/test-lib-functions.sh | 53 +- t/test-lib.sh| 11 + 63 files changed, 370 insertions(+), 185 deletions(-) diff --git a/t/README b/t/README index 1dc908e..8e047cb 100644 --- a/t/README +++ b/t/README @@ -178,6 +178,12 @@ appropriately before running "make". this feature by setting the GIT_TEST_CHAIN_LINT environment variable to "1" or "0", respectively. +--ref-storage=:: + If --ref-storage is set, run tests under an alternate ref + backend. Tests that rely on the original files backend will + be skipped. You may also enable or disable this feature by + setting the GIT_TEST_REF_STORAGE environment variable + You can also set the GIT_TEST_INSTALLED environment variable to the bindir of an existing git installation to test that installation. You still need to have built this git sandbox, fr