Re: [PATCH v4 10/17] trailer: if no input file is passed, read from stdin
On Thu, Jan 30, 2014 at 1:49 AM, Christian Couder chrisc...@tuxfamily.org wrote: It is simpler and more natural if the git interpret-trailers is made a filter as its output already goes to sdtout. Signed-off-by: Christian Couder chrisc...@tuxfamily.org --- diff --git a/trailer.c b/trailer.c index 8681aed..73a65e0 100644 --- a/trailer.c +++ b/trailer.c @@ -464,8 +464,13 @@ static struct strbuf **read_input_file(const char *infile) { struct strbuf sb = STRBUF_INIT; - if (strbuf_read_file(sb, infile, 0) 0) - die_errno(_(could not read input file '%s'), infile); + if (infile) { + if (strbuf_read_file(sb, infile, 0) 0) + die_errno(_(could not read input file '%s'), infile); + } else { + if (strbuf_read(sb, fileno(stdin), 0) 0) strbuf_fread(), perhaps? + die_errno(_(could not read from stdin)); + } return strbuf_split(sb, '\n'); } @@ -530,10 +535,8 @@ void process_trailers(const char *infile, int trim_empty, int argc, const char * git_config(git_trailer_config, NULL); - /* Print the non trailer part of infile */ - if (infile) { - process_input_file(infile, infile_tok_first, infile_tok_last); - } + /* Print the non trailer part of infile (or stdin if infile is NULL) */ + process_input_file(infile, infile_tok_first, infile_tok_last); arg_tok_first = process_command_line_args(argc, argv); -- 1.8.5.2.201.gacc5987 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 11/17] trailer: add new_trailer_item() function
On Thu, Jan 30, 2014 at 1:49 AM, Christian Couder chrisc...@tuxfamily.org wrote: This is a small refactoring to prepare for the next steps. Since this is all brand new code, wouldn't it make more sense to structure it in this fashion in the first place when introduced in patch 4/17? It's not clear why it should be introduced with poorer structure and then later cleaned up. Signed-off-by: Christian Couder chrisc...@tuxfamily.org --- trailer.c | 31 +++ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/trailer.c b/trailer.c index 73a65e0..430ff39 100644 --- a/trailer.c +++ b/trailer.c @@ -399,11 +399,27 @@ static void parse_trailer(struct strbuf *tok, struct strbuf *val, const char *tr } } +static struct trailer_item *new_trailer_item(struct trailer_item *conf_item, +const char* tok, const char* val) +{ + struct trailer_item *new = xcalloc(sizeof(struct trailer_item), 1); + new-value = val; + + if (conf_item) { + new-conf = conf_item-conf; + new-token = xstrdup(conf_item-conf-key); + } else { + new-conf = xcalloc(sizeof(struct conf_info), 1); + new-token = tok; + } + + return new; +} + static struct trailer_item *create_trailer_item(const char *string) { struct strbuf tok = STRBUF_INIT; struct strbuf val = STRBUF_INIT; - struct trailer_item *new; struct trailer_item *item; int tok_alnum_len; @@ -415,21 +431,12 @@ static struct trailer_item *create_trailer_item(const char *string) for (item = first_conf_item; item; item = item-next) { if (!strncasecmp(tok.buf, item-conf-key, tok_alnum_len) || !strncasecmp(tok.buf, item-conf-name, tok_alnum_len)) { - new = xcalloc(sizeof(struct trailer_item), 1); - new-conf = item-conf; - new-token = xstrdup(item-conf-key); - new-value = strbuf_detach(val, NULL); strbuf_release(tok); - return new; + return new_trailer_item(item, NULL, strbuf_detach(val, NULL)); } } - new = xcalloc(sizeof(struct trailer_item), 1); - new-conf = xcalloc(sizeof(struct conf_info), 1); - new-token = strbuf_detach(tok, NULL); - new-value = strbuf_detach(val, NULL); - - return new; + return new_trailer_item(NULL, strbuf_detach(tok, NULL), strbuf_detach(val, NULL));; } static void add_trailer_item(struct trailer_item **first, -- 1.8.5.2.201.gacc5987 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Creating own hierarchies under $GITDIR/refs ?
Hi, in the context of an ongoing discussion on the Emacs developer list of converting the Bzr repository of Emacs, one question (with different approaches) is where to put the information regarding preexisting Bazaar revision numbers and bug tracker ids: those are not present in the current Git mirror. Putting them in the commit messages would require a full history rewrite, and if some are missed in the process, this cannot be fixed afterwards. So I mused: refs/heads contains branches, refs/tags contains tags. The respective information would likely easily enough be stored in refs/bzr and refs/bugs and in that manner would not pollute the ordinary tag and branch spaces, rendering git tag and/or git branch output mostly unusable. I tested creating such a directory and entries and indeed references like bzr/39005 then worked. However, cloning from the repository did not copy those directories and references, so without modification, this scheme would not work for cloned repositories. Are there some measures one can take/configure in the parent repository such that (named or all) additional directories inside of $GITDIR/refs would get cloned along with the rest? It would definitely open viable options for dealing with mirrors and/or repository migrations in general. -- David Kastrup -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] userdiff: update Ada patterns
- Allow extra space in is new and is separate - Fix bug in word regex for numbers Signed-off-by: Adrian Johnson ajohn...@redneon.com --- t/t4034/ada/expect | 2 +- userdiff.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t4034/ada/expect b/t/t4034/ada/expect index be2376e..a682d28 100644 --- a/t/t4034/ada/expect +++ b/t/t4034/ada/expect @@ -4,7 +4,7 @@ BOLD+++ b/postRESET CYAN@@ -1,13 +1,13 @@RESET Ada.Text_IO.Put_Line(Hello WorldRED!RESETGREEN?RESET); -1 1eRED-RESET10 16#FE12#E2 3.141_592 'REDxRESETGREENyRESET' +1 RED1e-10RESETGREEN1e10RESET 16#FE12#E2 3.141_592 'REDxRESETGREENyRESET' REDaRESETGREENxRESET+REDb aRESETGREENy xRESET-REDbRESET REDaRESETGREENyRESET GREENxRESET*REDb aRESETGREENy xRESET/REDbRESET diff --git a/userdiff.c b/userdiff.c index ea43a03..e8915bf 100644 --- a/userdiff.c +++ b/userdiff.c @@ -15,13 +15,13 @@ static int drivers_alloc; word_regex |[^[:space:]]|[\xc0-\xff][\x80-\xbf]+ } static struct userdiff_driver builtin_drivers[] = { IPATTERN(ada, -!^(.*[ \t])?(is new|renames|is separate)([ \t].*)?$\n +!^(.*[ \t])?(is[ \t]+new|renames|is[ \t]+separate)([ \t].*)?$\n !^[ \t]*with[ \t].*$\n ^[ \t]*((procedure|function)[ \t]+.*)$\n ^[ \t]*((package|protected|task)[ \t]+.*)$, /* -- */ [a-zA-Z][a-zA-Z0-9_]* -|[0-9][-+0-9#_.eE] +|[-+0-9#_.eE]+ |=|\\.\\.|\\*\\*|:=|/=|=|=|||), IPATTERN(fortran, !^([C*]|[ \t]*!)\n -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
On Sun, Feb 2, 2014 at 5:37 PM, David Kastrup d...@gnu.org wrote: in the context of an ongoing discussion on the Emacs developer list of converting the Bzr repository of Emacs, one question (with different approaches) is where to put the information regarding preexisting Bazaar revision numbers and bug tracker ids: those are not present in the current Git mirror. Putting them in the commit messages would require a full history rewrite, and if some are missed in the process, this cannot be fixed afterwards. What do you need them for? Perhaps putting everything in a file, maybe sorted by SHA-1, would suffice? It should not be too hard to write a script to map bug tracker id to a commit id. The file is for past commits only. New commits can contain these info in their messages. -- Duy -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function
On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote: On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner martinerikwer...@gmail.com wrote: + /* check if work tree is already the prefix */ + if (strncmp(path, work_tree, wtlen) == 0) { + if (path[wtlen] == '/') + memmove(path, path + wtlen + 1, len - wtlen); + else + /* work tree is the root, or the whole path */ + memmove(path, path + wtlen, len - wtlen + 1); + return 0; + } No the 4th time is not the charm yet :) if path is /abc/defghi and work_tree is /abc/def you don't want to return ghi as the prefix here. Ah indeed, this should catch that: diff --git a/setup.c b/setup.c index 2270bd4..5817875 100644 --- a/setup.c +++ b/setup.c @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path) if (strncmp(path, work_tree, wtlen) == 0) { if (path[wtlen] == '/') memmove(path, path + wtlen + 1, len - wtlen); - else + else if (path[wtlen - 1] == '/' || path[wtlen] == '\0') /* work tree is the root, or the whole path */ memmove(path, path + wtlen, len - wtlen + 1); + else + return -1; return 0; } path0 = path; Is it worth adding a test for this as well?: diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index f6f378b..05d3366 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -201,6 +201,10 @@ test_expect_success 'prefix_path works with only absolute path to work tree' ' test_cmp expected actual ' +test_expect_success 'prefix_path rejects absolute path to dir with same beginning as work tree' ' + test_must_fail test-path-utils prefix_path prefix $(pwd)a +' + relative_path /foo/a/b/c/ /foo/a/b/ c/ relative_path /foo/a/b/c/ /foo/a/bc/ relative_path /foo/a//b//c////foo/a/b//c/ POSIX + path0 = path; + path += offset_1st_component(path); + + /* check each level */ + while (*path != '\0') { + path++; To me it looks like we could write for (; *path; path++) { or even for (path += offset_1st_component(path); *path; path++) { but it's personal taste.. Yeah, I think aesthetically I don't like cramming too much into the for loop: for (path += offset_1st_component(path0) + 1; *path; path++) { neither leaving the init expression unused. So as long as it's just personal taste I think I'll stick with the current while loop format. But I'll exchange (*path == '\0') for (*path) though. -- Martin Erik Werner martinerikwer...@gmail.com -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 17/17] Documentation: add documentation for 'git interpret-trailers'
Patch 17/17 of v4 failed to arrive in my inbox for some reason, so I'll just reply to v3 since there's another error I noticed which is still present in v4, plus a comment specific to v4 (see below). On Mon, Jan 27, 2014 at 3:33 PM, Christian Couder chrisc...@tuxfamily.org wrote: From: Eric Sunshine sunsh...@sunshineco.com On Sun, Jan 26, 2014 at 12:00 PM, Christian Couder chrisc...@tuxfamily.org wrote: +By default, a 'token=value' or 'token:value' argument will be added +only if no trailer with the same (token, value) pair is already in the +message. The 'token' and 'value' parts will be trimmed to remove +starting and trailing white spaces, and the resulting trimmed 'token' Other git documentation uniformly spells it as whitespace rather than white spaces. You are right I will change that. The rest of git documentation consistently spells it whitespace, but v4 uses whitespaces. +infile=file:: + Read the commit message from `file` instead of the standard + input. I didn't notice this when reviewing v3, and the same text appears in v4. There are a couple extra hyphens before 'infile', and you want and around file, so: s/infile=file/--infile=file/ -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function
Martin Erik Werner martinerikwer...@gmail.com writes: On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote: On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner martinerikwer...@gmail.com wrote: + /* check if work tree is already the prefix */ + if (strncmp(path, work_tree, wtlen) == 0) { + if (path[wtlen] == '/') + memmove(path, path + wtlen + 1, len - wtlen); + else + /* work tree is the root, or the whole path */ + memmove(path, path + wtlen, len - wtlen + 1); + return 0; + } No the 4th time is not the charm yet :) if path is /abc/defghi and work_tree is /abc/def you don't want to return ghi as the prefix here. Ah indeed, this should catch that: diff --git a/setup.c b/setup.c index 2270bd4..5817875 100644 --- a/setup.c +++ b/setup.c @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path) if (strncmp(path, work_tree, wtlen) == 0) { if (path[wtlen] == '/') memmove(path, path + wtlen + 1, len - wtlen); - else + else if (path[wtlen - 1] == '/' || path[wtlen] == '\0') Is wtlen guaranteed to be nonzero? -- David Kastrup -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
On Sun, Feb 02, 2014 at 12:19:43PM +0100, David Kastrup wrote: Duy Nguyen pclo...@gmail.com writes: The file is for past commits only. New commits can contain these info in their messages. If it's not forgotten. Experience shows that things like issue numbers have a tendency to be omitted, and then they stay missing. At any rate, this is exactly the kind of stuff that tags are useful for, except that using them for all that would render the tag space overcrowded. Actually, I would say this is exactly the sort of thing notes are for. git.git uses them to map commits back to mailing list discussions: git fetch git://github.com/gitster/git +refs/notes/amlog:refs/notes/amlog git log --notes=amlog See also notes.displayRef in git-config(1). Notes aren't fetch by default, but it's not hard for those interested to add a remote.*.fetch line to their config. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function
On 2014-02-02 12.21, David Kastrup wrote: Martin Erik Werner martinerikwer...@gmail.com writes: On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote: On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner martinerikwer...@gmail.com wrote: + /* check if work tree is already the prefix */ + if (strncmp(path, work_tree, wtlen) == 0) { + if (path[wtlen] == '/') + memmove(path, path + wtlen + 1, len - wtlen); + else + /* work tree is the root, or the whole path */ + memmove(path, path + wtlen, len - wtlen + 1); + return 0; + } No the 4th time is not the charm yet :) if path is /abc/defghi and work_tree is /abc/def you don't want to return ghi as the prefix here. Ah indeed, this should catch that: diff --git a/setup.c b/setup.c index 2270bd4..5817875 100644 --- a/setup.c +++ b/setup.c @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path) if (strncmp(path, work_tree, wtlen) == 0) { if (path[wtlen] == '/') memmove(path, path + wtlen + 1, len - wtlen); -else +else if (path[wtlen - 1] == '/' || path[wtlen] == '\0') Is wtlen guaranteed to be nonzero? Another comment: The raw comparison with '/' is probably working well on all POSIX/Linux/Unix systems. To be more portable, the macro is_dir_sep() can be used: if (is_dir_sep(path[wtlen])) -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
John Keeping j...@keeping.me.uk writes: On Sun, Feb 02, 2014 at 12:19:43PM +0100, David Kastrup wrote: Duy Nguyen pclo...@gmail.com writes: The file is for past commits only. New commits can contain these info in their messages. If it's not forgotten. Experience shows that things like issue numbers have a tendency to be omitted, and then they stay missing. At any rate, this is exactly the kind of stuff that tags are useful for, except that using them for all that would render the tag space overcrowded. Actually, I would say this is exactly the sort of thing notes are for. git.git uses them to map commits back to mailing list discussions: But that's the wrong direction. What is needed in the Emacs case is mapping the Bazaar reference numbers (and bug numbers) to commits. While it is true that the history rewriting approach would not deliver this either (short of git log --grep with suitable patterns), I was looking for something less of a crutch here. Notes aren't fetch by default, but it's not hard for those interested to add a remote.*.fetch line to their config. If we are talking about measures everybody has to actively take before getting access to functionality, this does not cross the convenience threshold making it a solution preferred over others. But it's probably feasible to configure a fetch line doing this that will get cloned when first cloning a repository. That's not too hot for people with existing repositories, but since we are talking about a migration from Bazaar anyway, Git users currently are so by choice and so might be more willing to update their configuration if it helps with avoiding a fully new clone. -- David Kastrup -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
On Sun, Feb 2, 2014 at 6:19 PM, David Kastrup d...@gnu.org wrote: Since Git has a working facility for references that is catered to do exactly this kind of mapping and already _does_, it seems like a convenient path to explore. It will not scale. If you make those refs available for cloning/fetching, all of them will be advertised first thing when git starts negotiate. Imagine thousands of refs (and keep increasing) sent to the receiver at the beginning of every connection. Something like reverse git-notes may transfer more efficiently. Or we need to improve git protocol to handle massive refs better, something that's been discussed for a while without any outcome. -- Duy -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
Duy Nguyen pclo...@gmail.com writes: On Sun, Feb 2, 2014 at 6:19 PM, David Kastrup d...@gnu.org wrote: Since Git has a working facility for references that is catered to do exactly this kind of mapping and already _does_, it seems like a convenient path to explore. It will not scale. If you make those refs available for cloning/fetching, all of them will be advertised first thing when git starts negotiate. Imagine thousands of refs (and keep increasing) sent to the receiver at the beginning of every connection. In current LilyPond repository: git tag|wc 969 969 15161 In current Emacs mirror: git tag|wc 12021202 15729 In current Git repository: git tag|wc 498 4984820 Something like reverse git-notes may transfer more efficiently. Or we need to improve git protocol to handle massive refs better, something that's been discussed for a while without any outcome. I think that even disregarding special use of references, _existing_ practice would already appear to warrant being able to deal with thousands of refs in a reasonable manner. It's a reasonable expectation to have a tag per (potentially intermediate) release or release candidate. For any project publishing reproducible daily snapshots, the threshold of 1000 will get reached within few years. Of course, it is relevant information to know that right _now_ references will not scale. But that does not seem like a defensible long-term perspective. -- David Kastrup -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function
On Sun, Feb 02, 2014 at 12:37:16PM +0100, Torsten Bögershausen wrote: On 2014-02-02 12.21, David Kastrup wrote: Martin Erik Werner martinerikwer...@gmail.com writes: On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote: On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner martinerikwer...@gmail.com wrote: + /* check if work tree is already the prefix */ + if (strncmp(path, work_tree, wtlen) == 0) { + if (path[wtlen] == '/') + memmove(path, path + wtlen + 1, len - wtlen); + else + /* work tree is the root, or the whole path */ + memmove(path, path + wtlen, len - wtlen + 1); + return 0; + } No the 4th time is not the charm yet :) if path is /abc/defghi and work_tree is /abc/def you don't want to return ghi as the prefix here. Ah indeed, this should catch that: diff --git a/setup.c b/setup.c index 2270bd4..5817875 100644 --- a/setup.c +++ b/setup.c @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path) if (strncmp(path, work_tree, wtlen) == 0) { if (path[wtlen] == '/') memmove(path, path + wtlen + 1, len - wtlen); - else + else if (path[wtlen - 1] == '/' || path[wtlen] == '\0') Is wtlen guaranteed to be nonzero? Hmm, am I incorrect in thinking if (!work_tree) takes care of that? Another comment: The raw comparison with '/' is probably working well on all POSIX/Linux/Unix systems. To be more portable, the macro is_dir_sep() can be used: if (is_dir_sep(path[wtlen])) Since the path is already normalized by 'normalize_path_copy_len' which seems to guarantee '/'-separation, I have assumed that this was unnecessary? -- Martin Erik Werner martinerikwer...@gmail.com -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function
On Sun, Feb 2, 2014 at 6:13 PM, Martin Erik Werner martinerikwer...@gmail.com wrote: diff --git a/setup.c b/setup.c index 2270bd4..5817875 100644 --- a/setup.c +++ b/setup.c @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path) if (strncmp(path, work_tree, wtlen) == 0) { if (path[wtlen] == '/') memmove(path, path + wtlen + 1, len - wtlen); - else + else if (path[wtlen - 1] == '/' || path[wtlen] == '\0') /* work tree is the root, or the whole path */ memmove(path, path + wtlen, len - wtlen + 1); + else + return -1; No, you can't return here. /abc/defghi might actually be a symlink to /abc/def. If it does not match, let the following loop handle it. -- Duy -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
On Sun, Feb 02, 2014 at 12:42:52PM +0100, David Kastrup wrote: John Keeping j...@keeping.me.uk writes: On Sun, Feb 02, 2014 at 12:19:43PM +0100, David Kastrup wrote: Duy Nguyen pclo...@gmail.com writes: The file is for past commits only. New commits can contain these info in their messages. If it's not forgotten. Experience shows that things like issue numbers have a tendency to be omitted, and then they stay missing. At any rate, this is exactly the kind of stuff that tags are useful for, except that using them for all that would render the tag space overcrowded. Actually, I would say this is exactly the sort of thing notes are for. git.git uses them to map commits back to mailing list discussions: But that's the wrong direction. What is needed in the Emacs case is mapping the Bazaar reference numbers (and bug numbers) to commits. Ah, OK. I hadn't quite read carefully enough. I actually wonder if you could do this with notes and git-grep; for example: git grep -l keeping.me.uk refs/notes/amlog | sed -e 's/.*://' -e 's!/!!g' That should be relatively efficient since you're only looking at the current notes tree. While it is true that the history rewriting approach would not deliver this either (short of git log --grep with suitable patterns), I was looking for something less of a crutch here. Notes aren't fetch by default, but it's not hard for those interested to add a remote.*.fetch line to their config. If we are talking about measures everybody has to actively take before getting access to functionality, this does not cross the convenience threshold making it a solution preferred over others. But it's probably feasible to configure a fetch line doing this that will get cloned when first cloning a repository. I'm assuming you'll need some form of tool (at least a script) to manipulate this feature; it wouldn't be too hard for that to set this up the first time it's run. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function
Another comment: The raw comparison with '/' is probably working well on all POSIX/Linux/Unix systems. To be more portable, the macro is_dir_sep() can be used: if (is_dir_sep(path[wtlen])) Since the path is already normalized by 'normalize_path_copy_len' which seems to guarantee '/'-separation, I have assumed that this was unnecessary? So true, sorry for the noise. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] fast-import.c: always honor the filename case
fast-import should not use strncmp_icase. When it does, files with similar names, but different case can be lost in the import. For example... M 100644 :1 FileName.txt D Filename.txt ...would end up deleting FileName from the index during the fast- import when strncmp_icase is used and core.ignorecase=true. The intent in the above snippet is to rename the file, not delete it. Replacing strncmp_icase with strncmp in fast-import.c fixes the issue. alternatives: * check if the filesystem is case-preserving. If it is, don't set core.ignorecase=true. This, however, exposes another issue where git is tricked by stat into thinking that tracked files are untracked on case-preserving and case-insensitive filesystems. --- fast-import.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fast-import.c b/fast-import.c index f4d9969..62e28c0 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1500,7 +1500,7 @@ static int tree_content_set( t = root-tree; for (i = 0; i t-entry_count; i++) { e = t-entries[i]; - if (e-name-str_len == n !strncmp_icase(p, e-name-str_dat, n)) { + if (e-name-str_len == n !strncmp(p, e-name-str_dat, n)) { if (!slash1) { if (!S_ISDIR(mode) e-versions[1].mode == mode @@ -1593,7 +1593,7 @@ static int tree_content_remove( t = root-tree; for (i = 0; i t-entry_count; i++) { e = t-entries[i]; - if (e-name-str_len == n !strncmp_icase(p, e-name-str_dat, n)) { + if (e-name-str_len == n !strncmp(p, e-name-str_dat, n)) { if (slash1 !S_ISDIR(e-versions[1].mode)) /* * If p names a file in some subdirectory, and a @@ -1663,7 +1663,7 @@ static int tree_content_get( t = root-tree; for (i = 0; i t-entry_count; i++) { e = t-entries[i]; - if (e-name-str_len == n !strncmp_icase(p, e-name-str_dat, n)) { + if (e-name-str_len == n !strncmp(p, e-name-str_dat, n)) { if (!slash1) goto found_entry; if (!S_ISDIR(e-versions[1].mode)) -- 1.8.5.3.1.gac93028.dirty -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [msysGit] Re: [PATCH v2] repack.c: Use move_temp_to_file() instead of rename()
(It seems as if the mail went only to Junio, sorry) On 2014-02-02 16.09, Torsten Bögershausen wrote: On 2014-01-29 19.17, Junio C Hamano wrote: But after a closer inspection, I no longer think that hunk is an improvement. These new packfiles were created by pack-objects, which finishes each packfile it produces by calling finish_tmp_packfile(), and that is where adjust_shared_perm() happens already. As far as pack-objects that was called from repack is concerned, these new packfiles are not temporary; they are finished product. It may be OK to remove them as part of rewind back to the original state, as a later phase of repack failed if we saw a failure (but note that the original git-repack.sh didn't), but a plain vanilla rename(2) without any frills is what we want to happen to them. Thanks for deeper inspection, I now suspect the root cause to be here: -- 8 -- Subject: [PATCH v3] repack.c: Rename and unlink pack file if it exists This comment in builtin/repack.c: * First see if there are packs of the same name and if so * if we can move them out of the way (this can happen if we * repacked immediately after packing fully. indicates that when a repo was fully repacked, and is repacked again, we may run into the situation that new packfiles have the name (and content) as already existing ones. The logic is to rename the existing ones into filename like old-XXX, create the new ones and remove the old- ones. When something went wrong, a manual roll-back could be done be renaming the old- files. The renaming into old- did not work as designed, because file_exists() was done on the wrong file name. This could give problems for a repo on a network share under Windows, as reported by Jochen Haag zwanzi...@googlemail.com Solution: Create the right file name, like this: fname = mkpathdup(%s/pack-%s%s, packdir, and when removing the old-files: fname_old = mkpath(%s/old-%s%s, Rename the variables to match what they are used for: fname, fname_old and fname_tmp Signed-off-by: Torsten Bögershausen tbo...@web.de --- builtin/repack.c | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6284846..de69401 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -258,7 +258,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) for_each_string_list_item(item, names) { for (ext = 0; ext 2; ext++) { char *fname, *fname_old; - fname = mkpathdup(%s/%s%s, packdir, + fname = mkpathdup(%s/pack-%s%s, packdir, item-string, exts[ext]); if (!file_exists(fname)) { free(fname); @@ -314,33 +314,33 @@ int cmd_repack(int argc, const char **argv, const char *prefix) /* Now the ones with the same name are out of the way... */ for_each_string_list_item(item, names) { for (ext = 0; ext 2; ext++) { - char *fname, *fname_old; + char *fname, *fname_tmp; struct stat statbuffer; fname = mkpathdup(%s/pack-%s%s, packdir, item-string, exts[ext]); - fname_old = mkpathdup(%s-%s%s, + fname_tmp = mkpathdup(%s-%s%s, packtmp, item-string, exts[ext]); - if (!stat(fname_old, statbuffer)) { + if (!stat(fname_tmp, statbuffer)) { statbuffer.st_mode = ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); + chmod(fname_tmp, statbuffer.st_mode); } - if (rename(fname_old, fname)) - die_errno(_(renaming '%s' failed), fname_old); + if (rename(fname_tmp, fname)) + die_errno(_(renaming '%s' into '%s' failed), fname_tmp, fname); free(fname); - free(fname_old); + free(fname_tmp); } } /* Remove the old- files */ for_each_string_list_item(item, names) { for (ext = 0; ext 2; ext++) { - char *fname; - fname = mkpath(%s/old-pack-%s%s, + char *fname_old; + fname_old = mkpath(%s/old-%s%s, packdir, item-string, exts[ext]); - if (remove_path(fname)) - warning(_(removing '%s' failed), fname); + if (remove_path(fname_old)) +
[PATCH v5 0/5] Handling of in-tree symlinks for absolute paths
Again! (It seems you missed to CC me in your first reply David, please do :) New reroll, fixing the /dir/repoa and /dir/repolink - /dir/repo issues noted by Duy, and adding corresponding tests. If work tree matches beginning of path but needs further checking, it starts from the end of the work tree length, so as minimize the 'real_path' calls. Martin Erik Werner (5): t0060: Add test for manipulating symlinks via absolute paths t0060: Add test for prefix_path when path == work tree t0060: Add tests for prefix_path when path begins with work tree setup: Add 'abspath_part_inside_repo' function setup: Don't dereference in-tree symlinks for absolute paths setup.c | 100 -- t/t0060-path-utils.sh | 21 +++ 2 files changed, 101 insertions(+), 20 deletions(-) -- 1.8.5.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 4/5] setup: Add 'abspath_part_inside_repo' function
In order to extract the part of an absolute path which lies inside the repo, it is not possible to directly use real_path, since that would dereference symlinks both outside and inside the work tree. Add an 'abspath_part_inside_repo' function which first checks if the work tree is already the prefix, then incrementally checks each path level by temporarily NUL-terminating at each '/' and comparing against the work tree path. If a match is found, it overwrites the input path with the remainder past the work tree (which will be the in-repo part). The path being exactly equal to the work tree is handled separately, since then there is no directory separator between the work tree and in-repo part. This function is currently only intended for use in 'prefix_path_gently'. Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com --- setup.c | 64 1 file changed, 64 insertions(+) diff --git a/setup.c b/setup.c index 5432a31..a2e60ab 100644 --- a/setup.c +++ b/setup.c @@ -6,6 +6,70 @@ static int inside_git_dir = -1; static int inside_work_tree = -1; /* + * No checking if the path is in fact an absolute path is done, and it must + * already be normalized. + * + * Find the part of an absolute path that lies inside the work tree by + * dereferencing symlinks outside the work tree, for example: + * /dir1/repo/dir2/file (work tree is /dir1/repo) - dir2/file + * /dir/file (work tree is /) - dir/file + * /dir/symlink1/symlink2 (symlink1 points to work tree) - symlink2 + * /dir/repolink/file (repolink points to /dir/repo) - file + * /dir/repo (exactly equal to work tree) - (empty string) + */ +static inline int abspath_part_inside_repo(char *path) +{ + size_t len; + size_t wtlen; + char *path0; + int off; + + const char* work_tree = get_git_work_tree(); + if (!work_tree) + return -1; + wtlen = strlen(work_tree); + len = strlen(path); + off = 0; + + /* check if work tree is already the prefix */ + if (strncmp(path, work_tree, wtlen) == 0) { + if (path[wtlen] == '/') { + memmove(path, path + wtlen + 1, len - wtlen); + return 0; + } else if (path[wtlen - 1] == '/' || path[wtlen] == '\0') { + /* work tree is the root, or the whole path */ + memmove(path, path + wtlen, len - wtlen + 1); + return 0; + } + /* work tree might match beginning of a symlink to work tree */ + off = wtlen; + } + path0 = path; + path += offset_1st_component(path) + off; + + /* check each level */ + while (*path) { + path++; + if (*path == '/') { + *path = '\0'; + if (strcmp(real_path(path0), work_tree) == 0) { + memmove(path0, path + 1, len - (path - path0)); + return 0; + } + *path = '/'; + } + } + + /* check whole path */ + if (strcmp(real_path(path0), work_tree) == 0) { + *path0 = '\0'; + return 0; + } + + return -1; +} + +/* * Normalize path, prepending the prefix for relative paths. If * remaining_prefix is not NULL, return the actual prefix still * remains in the path. For example, prefix = sub1/sub2/ and path is -- 1.8.5.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 2/5] t0060: Add test for prefix_path when path == work tree
The current behaviour of prefix_path is to return an empty string if prefixing and absolute path that only contains exactly the work tree. This behaviour is a potential regression point. Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com --- t/t0060-path-utils.sh | 6 ++ 1 file changed, 6 insertions(+) diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 0bba988..b8e92e1 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -195,6 +195,12 @@ test_expect_failure SYMLINKS 'prefix_path works with absolute paths to work tree test $(test-path-utils prefix_path prefix $(pwd)/symlink) = symlink ' +test_expect_success 'prefix_path works with only absolute path to work tree' ' + echo expected + test-path-utils prefix_path prefix $(pwd) actual + test_cmp expected actual +' + relative_path /foo/a/b/c/ /foo/a/b/ c/ relative_path /foo/a/b/c/ /foo/a/bc/ relative_path /foo/a//b//c////foo/a/b//c/ POSIX -- 1.8.5.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 1/5] t0060: Add test for manipulating symlinks via absolute paths
When symlinks in the working tree are manipulated using the absolute path, git dereferences them, and tries to manipulate the link target instead. This is a regression introduced by 18e051a: setup: translate symlinks in filename when using absolute paths (which did not take symlinks in the work tree into consideration). Add a known-breakage tests using the prefix_path function, which currently uses real_path, causing the dereference. Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com --- t/t0060-path-utils.sh | 5 + 1 file changed, 5 insertions(+) diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 07c10c8..0bba988 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -190,6 +190,11 @@ test_expect_success SYMLINKS 'real path works on symlinks' ' test $sym = $(test-path-utils real_path $dir2/syml) ' +test_expect_failure SYMLINKS 'prefix_path works with absolute paths to work tree symlinks' ' + ln -s target symlink + test $(test-path-utils prefix_path prefix $(pwd)/symlink) = symlink +' + relative_path /foo/a/b/c/ /foo/a/b/ c/ relative_path /foo/a/b/c/ /foo/a/bc/ relative_path /foo/a//b//c////foo/a/b//c/ POSIX -- 1.8.5.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 5/5] setup: Don't dereference in-tree symlinks for absolute paths
The 'prefix_path_gently' function currently applies real_path to everything if given an absolute path, dereferencing symlinks both outside and inside the work tree. In order to manipulate symlinks in the work tree using absolute paths, symlinks should only be dereferenced outside the work tree. Modify the 'prefix_path_gently' to first normalize the path in order to make sure path levels are separated by '/', then pass the result to 'abspath_part_inside_repo' to find the in-repo part (without dereferencing any symlinks inside the work tree). For absolute paths, 'prefix_path_gently' did not, nor does now do, any actual prefixing, hence the result from 'abspath_part_in_repo' is returned as-is. Fixes t0060-82. Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com --- setup.c | 36 t/t0060-path-utils.sh | 2 +- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/setup.c b/setup.c index a2e60ab..230505c 100644 --- a/setup.c +++ b/setup.c @@ -86,11 +86,23 @@ char *prefix_path_gently(const char *prefix, int len, const char *orig = path; char *sanitized; if (is_absolute_path(orig)) { - const char *temp = real_path(path); - sanitized = xmalloc(len + strlen(temp) + 1); - strcpy(sanitized, temp); + char *npath; + + npath = xmalloc(strlen(path) + 1); if (remaining_prefix) *remaining_prefix = 0; + if (normalize_path_copy_len(npath, path, remaining_prefix)) { + free(npath); + return NULL; + } + if (abspath_part_inside_repo(npath)) { + free(npath); + return NULL; + } + + sanitized = xmalloc(strlen(npath) + 1); + strcpy(sanitized, npath); + free(npath); } else { sanitized = xmalloc(len + strlen(path) + 1); if (len) @@ -98,26 +110,10 @@ char *prefix_path_gently(const char *prefix, int len, strcpy(sanitized + len, path); if (remaining_prefix) *remaining_prefix = len; - } - if (normalize_path_copy_len(sanitized, sanitized, remaining_prefix)) - goto error_out; - if (is_absolute_path(orig)) { - size_t root_len, len, total; - const char *work_tree = get_git_work_tree(); - if (!work_tree) - goto error_out; - len = strlen(work_tree); - root_len = offset_1st_component(work_tree); - total = strlen(sanitized) + 1; - if (strncmp(sanitized, work_tree, len) || - (len root_len sanitized[len] != '\0' sanitized[len] != '/')) { - error_out: + if (normalize_path_copy_len(sanitized, sanitized, remaining_prefix)) { free(sanitized); return NULL; } - if (sanitized[len] == '/') - len++; - memmove(sanitized, sanitized + len, total - len); } return sanitized; } diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index c0a14f6..f04b82d 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -190,7 +190,7 @@ test_expect_success SYMLINKS 'real path works on symlinks' ' test $sym = $(test-path-utils real_path $dir2/syml) ' -test_expect_failure SYMLINKS 'prefix_path works with absolute paths to work tree symlinks' ' +test_expect_success SYMLINKS 'prefix_path works with absolute paths to work tree symlinks' ' ln -s target symlink test $(test-path-utils prefix_path prefix $(pwd)/symlink) = symlink ' -- 1.8.5.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 3/5] t0060: Add tests for prefix_path when path begins with work tree
One edge-case that isn't currently checked in the tests is the beginning of the path matching the work tree, despite the target not actually being the work tree, for example: path = /dir/repoa work_tree = /dir/repo should fail since the path is outside the repo. However, if /dir/repoa is in fact a symlink that points to /dir/repo, it should instead succeed. Add two tests covering these cases, since they might be potential regression points. --- t/t0060-path-utils.sh | 10 ++ 1 file changed, 10 insertions(+) diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index b8e92e1..c0a14f6 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -201,6 +201,16 @@ test_expect_success 'prefix_path works with only absolute path to work tree' ' test_cmp expected actual ' +test_expect_success 'prefix_path rejects absolute path to dir with same beginning as work tree' ' + test_must_fail test-path-utils prefix_path prefix $(pwd)a +' + +test_expect_success 'prefix_path works with absolute path to a symlink to work tree having same beginning as work tree' ' + git init repo + ln -s repo repolink + test a = $(cd repo test-path-utils prefix_path prefix $(pwd)/../repolink/a) +' + relative_path /foo/a/b/c/ /foo/a/b/ c/ relative_path /foo/a/b/c/ /foo/a/bc/ relative_path /foo/a//b//c////foo/a/b//c/ POSIX -- 1.8.5.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] init-db.c: honor case on case preserving fs
Torsten Bögershausen tbo...@web.de writes: On 2014-02-01 10.14, Reuben Hawkins wrote: Most case-insensitive filesystems are case-preserving. In these filesystems (such as HFS+ on OS X) you can name a file Filename.txt, then rename the file to FileName.txt. That file will be accessible by both filenames, but the case is otherwise honored. We don't want to have git ignore case on these case-preserving filesystem implementations. Yes, we want. Because the file system will treat Filename.txt and FileName.txt the same. Another important thing to remember is that we cannot have these two files at the same time on such a filesystem. Somebody may have Filename.txt in the commit at the tip of the history, you clone/fetch and check it out, and you will have Filename.txt with the original contents. We do not try to corrupt the filename on core.ignorecase filesystem by any canonicalization. But then you may edit that file, and you either deliberately or without knowing (because some of your tools do this behind your back) may end up saving the result as FileName.txt. What happens? When we ask what is the contents of Filename.txt now? (using the original name still in the index) to the underlying system, we will be given what you placed in FileName.txt. We won't see You do not have Filename.txt, but you now have FileName.txt. And that is the behaviour the end users (of not Git, but of a platform with such a filesystem) do expect from their tools. They do not want to see You no longer have Filename.txt, and you have a new file FileName.txt. Now think what git add Filename.txt should do at that point? It should not say I was told to add Filename.txt, but there is no such file, so I'll add nothing. If you run git add -u Filename.txt, it should not say I was told to add Filename.txt, but there is no such file, so I'll remove existing Filename.txt from the index. It must pick up the updated contents from your new FileName.txt, update the index entry Filename.txt, and the next git commit must record it as an update to the same file. If you are on the other hand trying to correct an earlier mistake of having named the file Filename.txt but you now want to rename it FileName.txt, the above behaviour by core.ignorecase may make it a bit cumbersome to do. You can first remove it from the index and then re-add it, I would think, as a workaround. Having to do a workaround is unfortunate but it is an unavoidable consequence of having to choose between the two and having to pick one. Most of the time you do not want such a rename (or rather, the loss of the file Filename.txt and the creation of the unrelated FileName.txt) and a change from Filename.txt to FileName.txt is most likely to be a mistake in the platform tool that mucked with the files on your filesystem, so we choose to make it easy for the user not to be disturbed by such a change. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: splitting a commit that adds new files
Duy Nguyen pclo...@gmail.com writes: I usually start splitting a commit with reset @^ then add -p back. The problem is reset @^ does not keep track of new files added in HEAD, so I often end up forgetting to add new files back (with add -p). I'm thinking of making reset to do add -N automatically for me so I won't miss changes in add -p. But maybe people already know how to deal with this case without adding more code? Is reset -p what you are looking for? I do not use that myself, though. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] fast-import.c: always honor the filename case
On 2014-02-02 14.13, Reuben Hawkins wrote: fast-import should not use strncmp_icase. When it does, files with similar names, but different case can be lost in the import. For example... M 100644 :1 FileName.txt D Filename.txt That seems to be wrong, shouldn't it be D Filename.txt M 100644 :1 FileName.txt How do you generate the export/import stream ? Please see here: https://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html Handling Renames When importing a renamed file or directory, simply delete the old name(s) and modify the new name(s) during the corresponding commit. Git performs rename detection after-the-fact, rather than explicitly during a commit. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Running make rpm fails on a CentOS 6.3 machine
Thanks. I will try to use the rpm from Todd's build. BTW - if I want to create such a build on Fedora that will create el6 packages (e.g. git-1.8.5.3-2.el6.x86_64.rpm), what's the procedure? Thanks, Erez On Thu, Jan 30, 2014 at 8:51 PM, Todd Zullinger t...@pobox.com wrote: Hello, Jonathan Nieder wrote: Erez Zilber wrote: Writing perl.mak for Git Writing perl.mak for Git rename MakeMaker.tmp = perl.mak: No such file or directory at /usr/share/perl5/ExtUtils/MakeMaker.pm line 1024. make[3]: perl.mak: No such file or directory make[3]: perl.mak: No such file or directory make[3]: *** No rule to make target `perl.mak'. Stop. Looks like MakeMaker is racing against itself. Alas, (on a fairly current Debian system, with perl 5.14.2) I'm not able to reproduce that. Instead, I get this: | $ make -j8 rpm [...] | make[2]: Leaving directory `$HOME/rpmbuild/BUILD/git-1.8.5.3/Documentation' | make[1]: Leaving directory `$HOME/rpmbuild/BUILD/git-1.8.5.3' | + exit 0 | Executing(%install): /bin/sh -e /var/tmp/rpm-tmp.WqNYnx | + umask 022 | + cd $HOME/rpmbuild/BUILD | + cd git-1.8.5.3 | + rm -rf $HOME/rpmbuild/BUILDROOT/git-1.8.5.3-1.x86_64 | + make -j12 'CFLAGS=-O2 -g' \DESTDIR=$HOME/rpmbuild/BUILDROOT/git-1.8.5.3-1.x86_64 \ ETC_GITCONFIG=/etc/gitconfig prefix=/usr \ mandir=/usr/share/man htmldir=/usr/share/doc/git-1.8.5.3 \ INSTALLDIRS=vendor install install-doc | make[1]: Entering directory `$HOME/rpmbuild/BUILD/git-1.8.5.3' | make[1]: warning: -jN forced in submake: disabling jobserver mode. | make[1]: *** write jobserver: Bad file descriptor. Stop. | make[1]: *** Waiting for unfinished jobs | make[1]: *** write jobserver: Bad file descriptor. Stop. | error: Bad exit status from /var/tmp/rpm-tmp.WqNYnx (%install) | | | RPM build errors: | Bad exit status from /var/tmp/rpm-tmp.WqNYnx (%install) | make: *** [rpm] Error 1 Known problem? A build without -j8 gets further. It seems like it's not a problem with el6 or git's Makefiles themselves. I haven't used the spec file from git.git in ages, but I have tried to ensure that the one we use in Fedora builds cleanly on el{5,6}. I use this myself to keep an update git on el6, where Red Hat has left git at 1.7.1 since the release of el6 however many years ago. :( Here's a build I ran just now using the latest Fedora SRPM, showing it succeeds with make -j5: http://kojipkgs.fedoraproject.org//work/tasks/3049/6473049/build.log The build task, with all of the resulting rpms and logs is here: http://koji.fedoraproject.org/koji/taskinfo?taskID=6473049 (That will remain for a few days or so, at least. Scratch builds like this aren't kept indefinitely.) This makes me think that there's something in the git.spec in git.git that differs from what we use in Fedora/EPEL. I don't have time to dig into that now, but perhaps someone with time and inclination can diff the spec files and find the cause. I know the Fedora/EPEL spec file and what's in git.git have grown apart a good bit, unfortunately. That's the cost of having a spec file that is meant to work across a very wide array of RPM-based systems, I guess. The Fedora/EPEL spec file is fairly specific to the Fedora/EPEL build tools (mock is the primary build tool). Hope this helps a little in narrowing down the issue. I'm sorry I can't be of more assistance at the moment. -- ToddOpenPGP - KeyID: 0xBEAF0CE3 | URL: www.pobox.com/~tmz/pgp ~~ How am I supposed to hallucinate with all these swirling colors distracting me? -- Lisa Simpson -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Running make rpm fails on a CentOS 6.3 machine
Hi Erez, Erez Zilber wrote: Thanks. I will try to use the rpm from Todd's build. BTW - if I want to create such a build on Fedora that will create el6 packages (e.g. git-1.8.5.3-2.el6.x86_64.rpm), what's the procedure? Something like this (this is from memory): # Install fedpkg $ yum install fedpkg # Checkout the fedora git package (anonymously) $ fedpkg clone -a git cd git Verify the git tarball. This is optional, but I would be remiss to suggest you skip it. We used to include the tarball .asc file in the fedora git repo, but that is no longer available for download. Instead, it must be copied and pasted from Google Code (I'd love to see the .asc files return, FWIW). # To verify the tarball, have fedpkg download the sources first -- if # you don't want to verify the tarball, you can skip this command and # the gpg/sha1sum commands that follow $ fedpkg sources # Copy and paste the gpg signature from # http://code.google.com/p/git-core/downloads/detail?name=git-1.8.5.3.tar.gzcan=2q= # to a local file and use gpg to verify it, then sha1sum to check the # tarballs $ gpg --verify sha1.asc ... $ sha1sum -c sha1.asc # Create an el6 srpm $ fedpkg --dist el6 srpm # Build that package in mock, adding yourself or your build user to # the mock group first $ sudo useradd -a -G mock $USER $ mock -r epel-6-x86_64 git-1.8.5.3-2.el6.src.rpm The first time you build for a given release in mock will take longer than subsequent builds, because mock needs to download a lot of packages for the build root. These packages will be cached for future runs builds. In the end, you should end up with a set of packages and build logs under /var/lib/mock/epel-6-x86_64/result/. Hope this helps, -- ToddOpenPGP - KeyID: 0xBEAF0CE3 | URL: www.pobox.com/~tmz/pgp ~~ Anyone who is capable of getting themselves made President should on no account be allowed to do the job. -- Douglas Adams, The Hitchhiker's Guide to the Galaxy pgpjSoBWcZtsN.pgp Description: PGP signature
Determining update/merge/current state of a workspace
I'm working on the DVC Emacs front-end for git (http://www.emacswiki.org/emacs/DistributedVersionControl), adding features similar to the ones I added for monotone (http://www.monotone.ca). I'm used to monotone and new to git, so this may seem like an odd workflow. I always do 'fetch' and 'merge' separately, never 'pull'. So after a 'fetch', the DVC Emacs front end must determine what needs to happen next. I think there are three cases: 1) 'fetch' did not retrieve any revisions from remote; the last local commit is the head of the branch. The workspace is up to date (it may need to be comitted). 2) 'fetch' retrieved revisions, and there were no local commits since the previous fetch. The last fetch is the head of the branch; if not equal to HEAD, the workspace needs to be updated (via 'merge'). 3) fetch retrieved revisions, and there were local commits since the previous fetch. There are two heads for the branch (the two described above), they need to be merged, then the workspace updated. I'm not sure how 'git fetch' handles case 3); I have not tested that case yet. The question I have is: What git queries can I run to determine which of the three states the current workspace is in? 'rev-parse HEAD' gives the last workspace commit. 'rev-parse refs/remotes/remote/branch' gives the head of the branch in the remote repository as of the most recent fetch. But to distinguish among the cases, I need to determine if one of these two revs is a child of the other or not. I don't see a git query to determine that directly. I could try parsing a 'log' output; I have not investigated that. This is easy in monotone; there is a command 'mtn heads' that gives this result directly (it returns either one or two revs), and another command 'mtn automate toposort' that orders revs topologically (by parent/child relationships). -- -- Stephe -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Determining update/merge/current state of a workspace
On Sun, Feb 02, 2014 at 04:15:09PM -0600, Stephen Leake wrote: I always do 'fetch' and 'merge' separately, never 'pull'. So after a 'fetch', the DVC Emacs front end must determine what needs to happen next. I think there are three cases: Doing the two steps separately is common in git, too. The cases you mention are also something people commonly care about. Both git checkout and git status will print out the relationship between the current branch and its upstream. These are sometimes referred to as ahead/behind messages in the manual (because they are of the form You are N commits ahead of origin/master, etc). 3) fetch retrieved revisions, and there were local commits since the previous fetch. There are two heads for the branch (the two described above), they need to be merged, then the workspace updated. I'm not sure how 'git fetch' handles case 3); I have not tested that case yet. Git's fetch does not have to care about this case. It is responsible only for updating the ref that keeps track of the remote side (e.g., refs/remotes/origin/master). Unlike in some other DVCSs, there is no global concept of a branch in git. The ref refs/heads/master refers to your local branch named master, and the ref refs/remotes/origin/master refers to some remote's branch with the same name. You can reconcile them whenever and however you like, and do not have to do so immediately (or at all). The question I have is: What git queries can I run to determine which of the three states the current workspace is in? If you want to know the relationship between two (or more) commits, you can use `git rev-list` to enumerate them. You can use the symmetric difference operator (...) to walk both sides down to their merge-base. The `--left-right` option will label them according to which side each commit comes from. So try: git rev-list --left-right @{upstream}...HEAD to see the commits, or just: git rev-list --left-right --count @{upstream}...HEAD to just get the counts on each side. Note that I used @{upstream} there instead of naming the branch specifically. The default remote branch with which a local branch will merge can be configured, and does not have to have the same name (or even be a remote branch). But to distinguish among the cases, I need to determine if one of these two revs is a child of the other or not. I don't see a git query to determine that directly. I think what I gave above matches what you are looking for most directly. But as you may have already guessed, you can also use rev-list to find whether one rev is a child of the other (e.g., git rev-list --count a..b != 0). I could try parsing a 'log' output; I have not investigated that. Don't do that. As you might expect, `git log` is built on top of the traversals done by `rev-list`. The latter is preferred as a building block, because it is plumbing whose output is guaranteed not to change in later git versions. I hope that helps, -Peff -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] fast-import.c: always honor the filename case
[+cc Joshua Jensen, who wrote 50906e0] On Sun, Feb 02, 2014 at 07:13:04AM -0600, Reuben Hawkins wrote: fast-import should not use strncmp_icase. I am not sure of that. My gut feeling is that core.ignorecase is completely about the _filesystem_, and that git should generally be case-sensitive internally. But I do not know that everyone agrees. Your commit is basically a revert of 50906e0 (Support case folding in git fast-import when core.ignorecase=true, 2010-10-03). And here's some additional discussion specifically regarding fast-import: http://thread.gmane.org/gmane.comp.version-control.git/200597 So I think there is a discussion to be had[1]. When it does, files with similar names, but different case can be lost in the import. For example... M 100644 :1 FileName.txt D Filename.txt ...would end up deleting FileName from the index during the fast- import when strncmp_icase is used and core.ignorecase=true. The intent in the above snippet is to rename the file, not delete it. There may be a separate bug where fast-import needs to be smarter about ordering operations in such a case (or perhaps fast-export generators need to be more careful about the order of their output). But it might be fixable without disabling case-insensitivity entirely. -Peff [1] I am mostly trying to connect people on various sides of the discussion here. So take my gut feeling above with a grain of salt, as it does not come from experience nor thinking too hard about the issue. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Determining update/merge/current state of a workspace
On Sun, Feb 02, 2014 at 04:15:09PM -0600, Stephen Leake wrote: I'm working on the DVC Emacs front-end for git (http://www.emacswiki.org/emacs/DistributedVersionControl), adding features similar to the ones I added for monotone (http://www.monotone.ca). I'm used to monotone and new to git, so this may seem like an odd workflow. I always do 'fetch' and 'merge' separately, never 'pull'. So after a 'fetch', the DVC Emacs front end must determine what needs to happen next. I think there are three cases: 1) 'fetch' did not retrieve any revisions from remote; the last local commit is the head of the branch. The workspace is up to date (it may need to be comitted). 2) 'fetch' retrieved revisions, and there were no local commits since the previous fetch. The last fetch is the head of the branch; if not equal to HEAD, the workspace needs to be updated (via 'merge'). 3) fetch retrieved revisions, and there were local commits since the previous fetch. There are two heads for the branch (the two described above), they need to be merged, then the workspace updated. I'm not sure how 'git fetch' handles case 3); I have not tested that case yet. Fetch updates your cached origin/master ref to match the remote. Your local master branch and worktree are left as-is. The question I have is: What git queries can I run to determine which of the three states the current workspace is in? 'rev-parse HEAD' gives the last workspace commit. 'rev-parse refs/remotes/remote/branch' gives the head of the branch in the remote repository as of the most recent fetch. But to distinguish among the cases, I need to determine if one of these two revs is a child of the other or not. I don't see a git query to determine that directly. I think you're looking for git merge-base. If you do `git merge-base HEAD origin/master` and its result is equal to `git rev-parse HEAD` then you know that master is an ancestor of origin/master and can be trivially fast-forwarded to origin/master. If you get a SHA-1 that is not equal then there are probably[*] local commits that have happened on master and it should probably be rebased (or merged). [*] other possibilities: someone rebased your upstream, etc. People have differing opinions on how to resolve the diverging history. Topic branches are the gitty approach. Another popular approach to resolving the divergence is what git pull --rebase would have done. (No one really likes what git pull would have done by default when there are local commits) To implement the rebase workflow, do git rebase --autostash origin/master. after fetching. That workflow is probably the simplest for folks who eschew branching or are expats from other vcs. If you're writing a tool you might want to check whether the branch has an upstream configured via `git config branch.$name.remote` and `git config branch.$name.merge` as well. I could try parsing a 'log' output; I have not investigated that. This is easy in monotone; there is a command 'mtn heads' that gives this result directly (it returns either one or two revs), and another command 'mtn automate toposort' that orders revs topologically (by parent/child relationships). `git log` can also tell you whether you have commits that they don't.. What does origin/master have that I don't? git log HEAD..origin/master What do I have that origin/master does not? git log origin/master..HEAD The git log output is easily controlled, though for these questions the mere presence/absense of output tells you what you need to know. cheers, -- David -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: splitting a commit that adds new files
On Sun, Feb 02, 2014 at 10:15:07AM -0800, Junio C Hamano wrote: Duy Nguyen pclo...@gmail.com writes: I usually start splitting a commit with reset @^ then add -p back. The problem is reset @^ does not keep track of new files added in HEAD, so I often end up forgetting to add new files back (with add -p). I'm thinking of making reset to do add -N automatically for me so I won't miss changes in add -p. But maybe people already know how to deal with this case without adding more code? Is reset -p what you are looking for? I do not use that myself, though. I don't think so. He is at a commit that needs split, so the first thing to do is to shift the HEAD back. reset -p is only about changing the index[1]. I suppose you could start with a soft reset, and then reset -p away the changes. But then you are going backwards (this does not belong in the commit, remove it rather than this does belong in the commit, add it). I don't typically have a problem with this because I keep my directory free of untracked files (they are either marked by .gitignore, or something that I am planning to commit). So just running git status gives me a neat list of what needs to be added (and typically git add -N . is a good starting point upon which to build a git add -p session). -Peff [1] I _do_ use reset -p when splitting commits, but I do not think it is useful here. I use it for oops, I staged this change, but it actually belongs in the next commit. Undo my staging, but leave the changes in the working tree for the next one. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
On Sun, Feb 02, 2014 at 11:37:39AM +0100, David Kastrup wrote: So I mused: refs/heads contains branches, refs/tags contains tags. The respective information would likely easily enough be stored in refs/bzr and refs/bugs and in that manner would not pollute the ordinary tag and branch spaces, rendering git tag and/or git branch output mostly unusable. I tested creating such a directory and entries and indeed references like bzr/39005 then worked. Yes. The names refs/tags and refs/heads are special by convention, and there is no reason you cannot have other hierarchies (and indeed, we already have refs/notes and refs/remotes as common hierarchies). However, cloning from the repository did not copy those directories and references, so without modification, this scheme would not work for cloned repositories. Correct. Anyone who wants them will have to ask for them manually, like: git config --add remote.origin.fetch '+refs/bzr/*:refs/bzr/*' after which any git fetch will retrieve them. Are there some measures one can take/configure in the parent repository such that (named or all) additional directories inside of $GITDIR/refs would get cloned along with the rest? No. It is up to the client to decide which parts of the ref namespace they want to fetch. The server only advertises what it has, and the client selects from that. Others mentioned that refs were never really intended to scale to one-per-commit. We serve some repositories with tens of thousands of refs from GitHub, and it does work. On the backend, we even have some repos in the hundreds of thousands (but these are not client facing). Most of the pain points (like O(n^2) loops) have been ironed out, but the two big ones are still: - server ref advertisement lists _all_ refs at the start of the conversation. So, e.g., git fetch git://github.com/Homebrew/homebrew.git sends 2MB of advertisement just so a client can find out nope, nothing to fetch. - the packed-refs storage is rather monolithic. Reading a value from it currently requires parsing the whole file. Likewise, deleting a ref requires rewriting the whole file. So what you are proposing will work, but do note that there is a cost. -Peff -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] userdiff: update Ada patterns
On Sun, Feb 02, 2014 at 09:21:56PM +1030, Adrian Johnson wrote: - Allow extra space in is new and is separate [...] - !^(.*[ \t])?(is new|renames|is separate)([ \t].*)?$\n + !^(.*[ \t])?(is[ \t]+new|renames|is[ \t]+separate)([ \t].*)?$\n I do not know anything about Ada syntax, but this change looks obviously sensible. - Fix bug in word regex for numbers - |[0-9][-+0-9#_.eE] + |[-+0-9#_.eE]+ This makes E or _ a number. Is that right? I think the intent of the original was starts with a digit, and then has one or more of these other things after it. You do not describe the bug, but I guess it would be that it does not match obvious things like 5. Should it be zero or more instead, like: [0-9][-+0-9#_.eE]* ? Also, should -/+ be hoisted to the front? [-+]?[0-9][0-9#_.eE]* Again, I am just guessing, as I am not familiar enough with Ada. -Peff -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Creating own hierarchies under $GITDIR/refs ?
John Keeping j...@keeping.me.uk writes: I actually wonder if you could do this with notes and git-grep; for example: git grep -l keeping.me.uk refs/notes/amlog | sed -e 's/.*://' -e 's!/!!g' That should be relatively efficient since you're only looking at the current notes tree. I added notes handling to gitifyhg and would search it similar to this. Since gitifyhg is two-way, I could not modify the commits. Later, when we converted several repositories (up to 50k commits/80 MB), I appended Hg-commit: $Hg_commit_hash to all the commit messages. This way it shows up on the web interface, users don't have to obtain the notes specially, and git log --grep works naturally. I think it's worth considering this simple solution; existing Git users won't mind recloning once. pgp3uBjty3sk1.pgp Description: PGP signature
Re: Determining update/merge/current state of a workspace
On Sun, Feb 02, 2014 at 03:04:59PM -0800, David Aguilar wrote: I think you're looking for git merge-base. If you do `git merge-base HEAD origin/master` and its result is equal to `git rev-parse HEAD` then you know that master is an ancestor of origin/master and can be trivially fast-forwarded to origin/master. In newer versions of git (1.8.0+), you can use git merge-base --is-ancestor for this instead. The commit message for 5907cda implies that it is more efficient than the old way. -- brian m. carlson / brian with sandals: Houston, Texas, US +1 832 623 2791 | http://www.crustytoothpaste.net/~bmc | My opinion only OpenPGP: RSA v4 4096b: 88AC E9B2 9196 305B A994 7552 F1BA 225C 0223 B187 signature.asc Description: Digital signature
Re: [PATCH v5 5/5] setup: Don't dereference in-tree symlinks for absolute paths
On Sun, Feb 2, 2014 at 11:35 PM, Martin Erik Werner martinerikwer...@gmail.com wrote: diff --git a/setup.c b/setup.c index a2e60ab..230505c 100644 --- a/setup.c +++ b/setup.c @@ -86,11 +86,23 @@ char *prefix_path_gently(const char *prefix, int len, const char *orig = path; char *sanitized; if (is_absolute_path(orig)) { - const char *temp = real_path(path); - sanitized = xmalloc(len + strlen(temp) + 1); - strcpy(sanitized, temp); + char *npath; + + npath = xmalloc(strlen(path) + 1); if (remaining_prefix) *remaining_prefix = 0; + if (normalize_path_copy_len(npath, path, remaining_prefix)) { + free(npath); + return NULL; + } + if (abspath_part_inside_repo(npath)) { + free(npath); + return NULL; + } + + sanitized = xmalloc(strlen(npath) + 1); + strcpy(sanitized, npath); + free(npath); We could replace these three lines with sanitized = npath;. But it's not a big deal imo. The rest of the series looks good. Reviewed-by: Duy Nguyen pclo...@gmail.com } else { sanitized = xmalloc(len + strlen(path) + 1); if (len) -- Duy -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 02/26] pkt-line.c: add packet_write_timeout()
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- cache.h| 1 + pkt-line.c | 15 +++ pkt-line.h | 1 + wrapper.c | 26 ++ 4 files changed, 43 insertions(+) diff --git a/cache.h b/cache.h index dc040fb..718e32b 100644 --- a/cache.h +++ b/cache.h @@ -1231,6 +1231,7 @@ extern void fsync_or_die(int fd, const char *); extern ssize_t read_in_full(int fd, void *buf, size_t count); extern ssize_t write_in_full(int fd, const void *buf, size_t count); +extern ssize_t write_in_full_timeout(int fd, const void *buf, size_t count, int timeout); static inline ssize_t write_str_in_full(int fd, const char *str) { return write_in_full(fd, str, strlen(str)); diff --git a/pkt-line.c b/pkt-line.c index eac45ad..cf681e9 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -94,6 +94,21 @@ void packet_write(int fd, const char *fmt, ...) write_or_die(fd, write_buffer, n); } +int packet_write_timeout(int fd, int timeout, const char *fmt, ...) +{ + static struct strbuf sb = STRBUF_INIT; + va_list args; + unsigned n; + + if (fd == -1) + return -1; + va_start(args, fmt); + strbuf_reset(sb); + n = format_packet(fmt, args); + va_end(args); + return write_in_full_timeout(fd, write_buffer, n, timeout); +} + void packet_buf_write(struct strbuf *buf, const char *fmt, ...) { va_list args; diff --git a/pkt-line.h b/pkt-line.h index 0a838d1..4b93a0c 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -21,6 +21,7 @@ */ void packet_flush(int fd); void packet_write(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3))); +int packet_write_timeout(int fd, int timeout, const char *fmt, ...) __attribute__((format (printf, 3, 4))); void packet_buf_flush(struct strbuf *buf); void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3))); diff --git a/wrapper.c b/wrapper.c index 0cc5636..9a0e289 100644 --- a/wrapper.c +++ b/wrapper.c @@ -214,6 +214,32 @@ ssize_t write_in_full(int fd, const void *buf, size_t count) return total; } +ssize_t write_in_full_timeout(int fd, const void *buf, + size_t count, int timeout) +{ + struct pollfd pfd; + const char *p = buf; + ssize_t total = 0; + + pfd.fd = fd; + pfd.events = POLLOUT; + while (count 0 poll(pfd, 1, timeout) 0 + (pfd.revents POLLOUT)) { + ssize_t written = xwrite(fd, p, count); + if (written 0) + return -1; + if (!written) { + errno = ENOSPC; + return -1; + } + count -= written; + p += written; + total += written; + } + + return count ? -1 : total; +} + int xdup(int fd) { int ret = dup(fd); -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 03/26] pkt-line.c: add packet_read_line_timeout()
This version is also gentler than its friend packet_read_line() because it's designed for side channel I/O that should not abort the program even if the channel is broken. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- cache.h| 1 + pkt-line.c | 35 +++ pkt-line.h | 1 + wrapper.c | 21 + 4 files changed, 58 insertions(+) diff --git a/cache.h b/cache.h index 718e32b..939db46 100644 --- a/cache.h +++ b/cache.h @@ -1230,6 +1230,7 @@ extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char extern void fsync_or_die(int fd, const char *); extern ssize_t read_in_full(int fd, void *buf, size_t count); +extern ssize_t read_in_full_timeout(int fd, void *buf, size_t count, int timeout); extern ssize_t write_in_full(int fd, const void *buf, size_t count); extern ssize_t write_in_full_timeout(int fd, const void *buf, size_t count, int timeout); static inline ssize_t write_str_in_full(int fd, const char *str) diff --git a/pkt-line.c b/pkt-line.c index cf681e9..5a07e97 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -229,3 +229,38 @@ char *packet_read_line_buf(char **src, size_t *src_len, int *dst_len) { return packet_read_line_generic(-1, src, src_len, dst_len); } + +char *packet_read_line_timeout(int fd, int timeout, int *len_p) +{ + char *buf = packet_buffer; + int ret, len, buf_len = sizeof(packet_buffer); + char linelen[4]; + + if (fd == -1) + return NULL; + if ((ret = read_in_full_timeout(fd, linelen, 4, timeout)) 0) + return NULL; + len = packet_length(linelen); + if (len 0) { + error(protocol error: bad line length character: %.4s, linelen); + return NULL; + } + if (!len) { + packet_trace(, 4, 0); + if (len_p) + *len_p = 0; + return ; + } + len -= 4; + if (len = buf_len) { + error(protocol error: bad line length %d, len); + return NULL; + } + if ((ret = read_in_full_timeout(fd, buf, len, timeout)) 0) + return NULL; + buf[len] = '\0'; + if (len_p) + *len_p = len; + packet_trace(buf, len, 0); + return buf; +} diff --git a/pkt-line.h b/pkt-line.h index 4b93a0c..d47dca5 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -69,6 +69,7 @@ int packet_read(int fd, char **src_buffer, size_t *src_len, char * packet is written to it. */ char *packet_read_line(int fd, int *size); +char *packet_read_line_timeout(int fd, int timeout, int *size); /* * Same as packet_read_line, but read from a buf rather than a descriptor; diff --git a/wrapper.c b/wrapper.c index 9a0e289..9cf10b2 100644 --- a/wrapper.c +++ b/wrapper.c @@ -193,6 +193,27 @@ ssize_t read_in_full(int fd, void *buf, size_t count) return total; } +ssize_t read_in_full_timeout(int fd, void *buf, size_t count, int timeout) +{ + char *p = buf; + ssize_t total = 0; + struct pollfd pfd; + + pfd.fd = fd; + pfd.events = POLLIN; + while (count 0 poll(pfd, 1, timeout) 0 + (pfd.revents POLLIN)) { + ssize_t loaded = xread(fd, p, count); + if (loaded = 0) + return -1; + count -= loaded; + p += loaded; + total += loaded; + } + + return count ? -1 : total; +} + ssize_t write_in_full(int fd, const void *buf, size_t count) { const char *p = buf; -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 01/26] pkt-line.c: rename global variable buffer[] to something less generic
buffer is a local variable in some other functions. Rename the global one to make it less confusing. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- pkt-line.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pkt-line.c b/pkt-line.c index bc63b3b..eac45ad 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -64,14 +64,15 @@ void packet_buf_flush(struct strbuf *buf) } #define hex(a) (hexchar[(a) 15]) -static char buffer[1000]; +static char write_buffer[1000]; static unsigned format_packet(const char *fmt, va_list args) { static char hexchar[] = 0123456789abcdef; + char *buffer = write_buffer; unsigned n; - n = vsnprintf(buffer + 4, sizeof(buffer) - 4, fmt, args); - if (n = sizeof(buffer)-4) + n = vsnprintf(buffer + 4, sizeof(write_buffer) - 4, fmt, args); + if (n = sizeof(write_buffer)-4) die(protocol error: impossibly long line); n += 4; buffer[0] = hex(n 12); @@ -90,7 +91,7 @@ void packet_write(int fd, const char *fmt, ...) va_start(args, fmt); n = format_packet(fmt, args); va_end(args); - write_or_die(fd, buffer, n); + write_or_die(fd, write_buffer, n); } void packet_buf_write(struct strbuf *buf, const char *fmt, ...) @@ -101,7 +102,7 @@ void packet_buf_write(struct strbuf *buf, const char *fmt, ...) va_start(args, fmt); n = format_packet(fmt, args); va_end(args); - strbuf_add(buf, buffer, n); + strbuf_add(buf, write_buffer, n); } static int get_packet_data(int fd, char **src_buf, size_t *src_size, -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 00/26] inotify support
I'm happy with this now. The only things left are applying ewah on the watch index extension and maybe improve lookup performance a bit. The former needs jk/pack-bitmap graduated. The latter is not urgent. Oh and maybe address BUGS section (more like known limitations) in git-file-watcher.txt. For early adopters that fear a buggy file-watcher may cause update loss, set GIT_TEST_WATCHED=1 (or 2). It'll do lstat() to verify file-watcher results (so no perf. gain). Beware of race condition that may lead to false positives, mentioned in 20/26 (maybe I should do something about it). The series can also be fetched from https://github.com/pclouds/git.git file-watcher Nguyễn Thái Ngọc Duy (26): pkt-line.c: rename global variable buffer[] to something less generic pkt-line.c: add packet_write_timeout() pkt-line.c: add packet_read_line_timeout() unix-socket: make unlink() optional in unix_stream_listen() Add git-file-watcher and basic connection handling logic file-watcher: check socket directory permission file-watcher: remove socket on exit file-watcher: add --detach read-cache: save trailing sha-1 read-cache: new flag CE_WATCHED to mark what file is watched Clear CE_WATCHED when set CE_VALID alone read-cache: basic hand shaking to the file watcher read-cache: ask file watcher to watch files read-cache: put some limits on file watching read-cache: get changed file list from file watcher git-compat-util.h: add inotify stubs on non-Linux platforms file-watcher: inotify support, watching part file-watcher: inotify support, notification part Wrap CE_VALID test with ce_valid() read-cache: new variable to verify file-watcher results Support running file watcher with the test suite file-watcher: quit if $WATCHER/socket is gone file-watcher: tests for the daemon ls-files: print CE_WATCHED as W (or w with CE_VALID) file-watcher: tests for the client side Disable file-watcher with system inotify on some tests .gitignore |2 + Documentation/config.txt | 19 + Documentation/git-file-watcher.txt (new) | 54 ++ Documentation/git-ls-files.txt |1 + Documentation/technical/index-format.txt |9 + Makefile |3 + builtin/grep.c |2 +- builtin/ls-files.c | 14 +- builtin/update-index.c | 12 +- cache.h | 17 + config.mak.uname |1 + credential-cache--daemon.c |2 +- daemon.c | 30 +- diff-lib.c |4 +- diff.c |2 +- file-watcher-lib.c (new) | 321 + file-watcher-lib.h (new) |8 + file-watcher.c (new) | 1149 ++ git-compat-util.h| 43 ++ pkt-line.c | 61 +- pkt-line.h |2 + read-cache.c | 119 +++- setup.c | 25 + t/t1011-read-tree-sparse-checkout.sh |2 + t/t2104-update-index-skip-worktree.sh|2 + t/t7011-skip-worktree-reading.sh |2 + t/t7012-skip-worktree-writing.sh |2 + t/t7513-file-watcher.sh (new +x) | 382 ++ t/t7514-file-watcher-lib.sh (new +x) | 190 + test-file-watcher.c (new)| 111 +++ unix-socket.c|5 +- unix-socket.h|2 +- unpack-trees.c |2 +- wrapper.c| 47 ++ 34 files changed, 2591 insertions(+), 56 deletions(-) create mode 100644 Documentation/git-file-watcher.txt create mode 100644 file-watcher-lib.c create mode 100644 file-watcher-lib.h create mode 100644 file-watcher.c create mode 100755 t/t7513-file-watcher.sh create mode 100755 t/t7514-file-watcher-lib.sh create mode 100644 test-file-watcher.c -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 07/26] file-watcher: remove socket on exit
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- file-watcher.c | 24 +++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/file-watcher.c b/file-watcher.c index 91f4cfe..9c639ef 100644 --- a/file-watcher.c +++ b/file-watcher.c @@ -56,6 +56,26 @@ static void accept_connection(int fd) pfd_nr++; } +static const char *socket_path; +static int do_not_clean_up; + +static void cleanup(void) +{ + struct strbuf sb = STRBUF_INIT; + if (do_not_clean_up) + return; + strbuf_addf(sb, %s/socket, socket_path); + unlink(sb.buf); + strbuf_release(sb); +} + +static void cleanup_on_signal(int signo) +{ + cleanup(); + sigchain_pop(signo); + raise(signo); +} + static const char permissions_advice[] = N_(The permissions on your socket directory are too loose; other\n processes may be able to read your file listing. Consider running:\n @@ -91,7 +111,6 @@ int main(int argc, const char **argv) { struct strbuf sb = STRBUF_INIT; int i, new_nr, fd, quit = 0, nr_common; - const char *socket_path = NULL; struct option options[] = { OPT_END() }; @@ -113,6 +132,9 @@ int main(int argc, const char **argv) die_errno(_(unable to listen at %s), sb.buf); strbuf_reset(sb); + atexit(cleanup); + sigchain_push_common(cleanup_on_signal); + nr_common = 1; pfd_alloc = pfd_nr = nr_common; pfd = xmalloc(sizeof(*pfd) * pfd_alloc); -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 05/26] Add git-file-watcher and basic connection handling logic
git-file-watcher is a daemon (*) that watches file changes and tells git about them. The intent is to avoid lstat() calls at index refresh time, which could be a lot on big working directory. The actual monitoring needs support from OS (inotify, FSEvents, FindFirstChangeNotification or kqueue) and is not part of this patch or the next few yet. This patch only provides a UNIX socket server. (*) it will be a a daemon in this end, but in this patch it runs in the foreground. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- .gitignore | 1 + Documentation/git-file-watcher.txt (new) | 29 +++ Makefile | 1 + file-watcher.c (new) | 140 +++ 4 files changed, 171 insertions(+) create mode 100644 Documentation/git-file-watcher.txt create mode 100644 file-watcher.c diff --git a/.gitignore b/.gitignore index b5f9def..12c78f0 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,7 @@ /git-fast-import /git-fetch /git-fetch-pack +/git-file-watcher /git-filter-branch /git-fmt-merge-msg /git-for-each-ref diff --git a/Documentation/git-file-watcher.txt b/Documentation/git-file-watcher.txt new file mode 100644 index 000..625a389 --- /dev/null +++ b/Documentation/git-file-watcher.txt @@ -0,0 +1,29 @@ +git-file-watcher(1) +=== + +NAME + +git-file-watcher - File update notification daemon + +SYNOPSIS + +[verse] +'git file-watcher' [options] socket directory + +DESCRIPTION +--- +This program watches file changes in a git working directory and let +Git now what files have been changed so that Git does not have to call +lstat(2) to detect that itself. + +OPTIONS +--- + +SEE ALSO + +linkgit:git-update-index[1], +linkgit:git-config[1] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index dddaf4f..8eef0d6 100644 --- a/Makefile +++ b/Makefile @@ -536,6 +536,7 @@ PROGRAMS += $(EXTRA_PROGRAMS) PROGRAM_OBJS += credential-store.o PROGRAM_OBJS += daemon.o PROGRAM_OBJS += fast-import.o +PROGRAM_OBJS += file-watcher.o PROGRAM_OBJS += http-backend.o PROGRAM_OBJS += imap-send.o PROGRAM_OBJS += sh-i18n--envsubst.o diff --git a/file-watcher.c b/file-watcher.c new file mode 100644 index 000..a6d7f17 --- /dev/null +++ b/file-watcher.c @@ -0,0 +1,140 @@ +#include cache.h +#include sigchain.h +#include parse-options.h +#include exec_cmd.h +#include unix-socket.h + +static const char *const file_watcher_usage[] = { + N_(git file-watcher [options] socket directory), + NULL +}; + +struct connection { + int sock; +}; + +static struct connection **conns; +static struct pollfd *pfd; +static int conns_alloc, pfd_nr, pfd_alloc; + +static int shutdown_connection(int id) +{ + struct connection *conn = conns[id]; + conns[id] = NULL; + pfd[id].fd = -1; + if (!conn) + return 0; + close(conn-sock); + free(conn); + return 0; +} + +static int handle_command(int conn_id) +{ + return 0; +} + +static void accept_connection(int fd) +{ + struct connection *conn; + int client = accept(fd, NULL, NULL); + if (client 0) { + warning(_(accept failed: %s), strerror(errno)); + return; + } + + ALLOC_GROW(pfd, pfd_nr + 1, pfd_alloc); + pfd[pfd_nr].fd = client; + pfd[pfd_nr].events = POLLIN; + pfd[pfd_nr].revents = 0; + + ALLOC_GROW(conns, pfd_nr + 1, conns_alloc); + conn = xmalloc(sizeof(*conn)); + memset(conn, 0, sizeof(*conn)); + conn-sock = client; + conns[pfd_nr] = conn; + pfd_nr++; +} + +int main(int argc, const char **argv) +{ + struct strbuf sb = STRBUF_INIT; + int i, new_nr, fd, quit = 0, nr_common; + const char *socket_path = NULL; + struct option options[] = { + OPT_END() + }; + + git_extract_argv0_path(argv[0]); + git_setup_gettext(); + argc = parse_options(argc, argv, NULL, options, +file_watcher_usage, 0); + if (argc 1) + die(_(socket path missing)); + else if (argc 1) + die(_(too many arguments)); + + socket_path = argv[0]; + strbuf_addf(sb, %s/socket, socket_path); + fd = unix_stream_listen(sb.buf, 0); + if (fd == -1) + die_errno(_(unable to listen at %s), sb.buf); + strbuf_reset(sb); + + nr_common = 1; + pfd_alloc = pfd_nr = nr_common; + pfd = xmalloc(sizeof(*pfd) * pfd_alloc); + pfd[0].fd = fd; + pfd[0].events = POLLIN; + + while (!quit) { + if (poll(pfd, pfd_nr, -1) 0) { + if (errno != EINTR) { + error(Poll failed, resuming: %s, + strerror(errno)); + sleep(1); + } +
[PATCH v3 04/26] unix-socket: make unlink() optional in unix_stream_listen()
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- credential-cache--daemon.c | 2 +- unix-socket.c | 5 +++-- unix-socket.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/credential-cache--daemon.c b/credential-cache--daemon.c index 390f194..1b995a9 100644 --- a/credential-cache--daemon.c +++ b/credential-cache--daemon.c @@ -207,7 +207,7 @@ static void serve_cache(const char *socket_path) { int fd; - fd = unix_stream_listen(socket_path); + fd = unix_stream_listen(socket_path, 1); if (fd 0) die_errno(unable to bind to '%s', socket_path); diff --git a/unix-socket.c b/unix-socket.c index 01f119f..2be1af6 100644 --- a/unix-socket.c +++ b/unix-socket.c @@ -93,7 +93,7 @@ fail: return -1; } -int unix_stream_listen(const char *path) +int unix_stream_listen(const char *path, int replace) { int fd, saved_errno; struct sockaddr_un sa; @@ -103,7 +103,8 @@ int unix_stream_listen(const char *path) return -1; fd = unix_stream_socket(); - unlink(path); + if (replace) + unlink(path); if (bind(fd, (struct sockaddr *)sa, sizeof(sa)) 0) goto fail; diff --git a/unix-socket.h b/unix-socket.h index e271aee..18ee290 100644 --- a/unix-socket.h +++ b/unix-socket.h @@ -2,6 +2,6 @@ #define UNIX_SOCKET_H int unix_stream_connect(const char *path); -int unix_stream_listen(const char *path); +int unix_stream_listen(const char *path, int replace); #endif /* UNIX_SOCKET_H */ -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 06/26] file-watcher: check socket directory permission
Access to the unix socket $WATCHER/socket is covered by $WATCHER's permission. While the file watcher does not carry much information, repo file listing is sometimes not something you want other users to see. Make sure $WATCHER has 0700 permission to stop unwanted access. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- file-watcher.c | 32 1 file changed, 32 insertions(+) diff --git a/file-watcher.c b/file-watcher.c index a6d7f17..91f4cfe 100644 --- a/file-watcher.c +++ b/file-watcher.c @@ -56,6 +56,37 @@ static void accept_connection(int fd) pfd_nr++; } +static const char permissions_advice[] = +N_(The permissions on your socket directory are too loose; other\n + processes may be able to read your file listing. Consider running:\n + \n + chmod 0700 %s); +static void check_socket_directory(const char *path) +{ + struct stat st; + char *path_copy = xstrdup(path); + char *dir = dirname(path_copy); + + if (!stat(dir, st)) { + if (st.st_mode 077) + die(_(permissions_advice), dir); + free(path_copy); + return; + } + + /* +* We must be sure to create the directory with the correct mode, +* not just chmod it after the fact; otherwise, there is a race +* condition in which somebody can chdir to it, sleep, then try to open +* our protected socket. +*/ + if (safe_create_leading_directories_const(dir) 0) + die_errno(_(unable to create directories for '%s'), dir); + if (mkdir(dir, 0700) 0) + die_errno(_(unable to mkdir '%s'), dir); + free(path_copy); +} + int main(int argc, const char **argv) { struct strbuf sb = STRBUF_INIT; @@ -76,6 +107,7 @@ int main(int argc, const char **argv) socket_path = argv[0]; strbuf_addf(sb, %s/socket, socket_path); + check_socket_directory(sb.buf); fd = unix_stream_listen(sb.buf, 0); if (fd == -1) die_errno(_(unable to listen at %s), sb.buf); -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 08/26] file-watcher: add --detach
In daemon mode, stdout and stderr are saved in $WATCHER/log. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- Documentation/git-file-watcher.txt | 2 ++ cache.h| 1 + daemon.c | 30 -- file-watcher.c | 17 + setup.c| 25 + 5 files changed, 49 insertions(+), 26 deletions(-) diff --git a/Documentation/git-file-watcher.txt b/Documentation/git-file-watcher.txt index 625a389..ec81f18 100644 --- a/Documentation/git-file-watcher.txt +++ b/Documentation/git-file-watcher.txt @@ -18,6 +18,8 @@ lstat(2) to detect that itself. OPTIONS --- +--detach:: + Run in background. SEE ALSO diff --git a/cache.h b/cache.h index 939db46..7a836b1 100644 --- a/cache.h +++ b/cache.h @@ -434,6 +434,7 @@ extern int set_git_dir_init(const char *git_dir, const char *real_git_dir, int); extern int init_db(const char *template_dir, unsigned int flags); extern void sanitize_stdfds(void); +extern int daemonize(int *); #define alloc_nr(x) (((x)+16)*3/2) diff --git a/daemon.c b/daemon.c index 503e039..2650504 100644 --- a/daemon.c +++ b/daemon.c @@ -1056,11 +1056,6 @@ static void drop_privileges(struct credentials *cred) /* nothing */ } -static void daemonize(void) -{ - die(--detach not supported on this platform); -} - static struct credentials *prepare_credentials(const char *user_name, const char *group_name) { @@ -1102,24 +1097,6 @@ static struct credentials *prepare_credentials(const char *user_name, return c; } - -static void daemonize(void) -{ - switch (fork()) { - case 0: - break; - case -1: - die_errno(fork failed); - default: - exit(0); - } - if (setsid() == -1) - die_errno(setsid failed); - close(0); - close(1); - close(2); - sanitize_stdfds(); -} #endif static void store_pid(const char *path) @@ -1333,9 +1310,10 @@ int main(int argc, char **argv) if (inetd_mode || serve_mode) return execute(); - if (detach) - daemonize(); - else + if (detach) { + if (daemonize(NULL)) + die(--detach not supported on this platform); + } else sanitize_stdfds(); if (pid_file) diff --git a/file-watcher.c b/file-watcher.c index 9c639ef..1e1ccad 100644 --- a/file-watcher.c +++ b/file-watcher.c @@ -111,7 +111,10 @@ int main(int argc, const char **argv) { struct strbuf sb = STRBUF_INIT; int i, new_nr, fd, quit = 0, nr_common; + int daemon = 0; struct option options[] = { + OPT_BOOL(0, detach, daemon, +N_(run in background)), OPT_END() }; @@ -135,6 +138,20 @@ int main(int argc, const char **argv) atexit(cleanup); sigchain_push_common(cleanup_on_signal); + if (daemon) { + int err; + strbuf_addf(sb, %s/log, socket_path); + err = open(sb.buf, O_CREAT | O_TRUNC | O_WRONLY, 0600); + adjust_shared_perm(sb.buf); + if (err == -1) + die_errno(_(unable to create %s), sb.buf); + if (daemonize(do_not_clean_up)) + die(_(--detach not supported on this platform)); + dup2(err, 1); + dup2(err, 2); + close(err); + } + nr_common = 1; pfd_alloc = pfd_nr = nr_common; pfd = xmalloc(sizeof(*pfd) * pfd_alloc); diff --git a/setup.c b/setup.c index 6c3f85f..757c45f 100644 --- a/setup.c +++ b/setup.c @@ -787,3 +787,28 @@ void sanitize_stdfds(void) if (fd 2) close(fd); } + +int daemonize(int *flag) +{ +#ifndef NO_POSIX_GOODIES + switch (fork()) { + case 0: + break; + case -1: + die_errno(fork failed); + default: + if (flag) + *flag = 1; + exit(0); + } + if (setsid() == -1) + die_errno(setsid failed); + close(0); + close(1); + close(2); + sanitize_stdfds(); + return 0; +#else + return -1; +#endif +} -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 10/26] read-cache: new flag CE_WATCHED to mark what file is watched
This bit is basically dynamic CE_VALID. It marks entries that are being watched by the incoming file-watcher. When an index is loaded, file watcher is contacted and the list of updated paths is retrieved. These paths will have CE_WATCHED cleared and lstat() will be called on them. Those that have CE_WATCHED and not in the list will have CE_VALID turn on to skip lstat(). The setting is temporarily, CE_VALID is not saved to disk if CE_WATCHED is also set. We keep the CE_WATCHED in a new extension, separated from the entries to save some space because extended ce_flags adds 2 bytes per entry and this flag would be present in the majority of entries. When stored as bitmap, this extension could compress very well with ewah algorithm. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- Documentation/technical/index-format.txt | 6 + cache.h | 3 +++ read-cache.c | 41 +++- 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index f352a9b..24fd0ae 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -198,3 +198,9 @@ Git index format - At most three 160-bit object names of the entry in stages from 1 to 3 (nothing is written for a missing stage). +=== File watcher + + The signature of this extension is { 'W', 'A', 'T', 'C' }. + + - A bit map of all entries in the index, n-th bit of m-th byte +corresponds to CE_WATCHED of the m * 8+ n-th index entry. diff --git a/cache.h b/cache.h index f14d535..a0af2a5 100644 --- a/cache.h +++ b/cache.h @@ -169,6 +169,9 @@ struct cache_entry { /* used to temporarily mark paths matched by pathspecs */ #define CE_MATCHED (1 26) +/* set CE_VALID at runtime if the entry is guaranteed not updated */ +#define CE_WATCHED (1 27) + /* * Extended on-disk flags */ diff --git a/read-cache.c b/read-cache.c index 3b6daf1..098d3b6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -33,6 +33,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall #define CACHE_EXT(s) ( (s[0]24)|(s[1]16)|(s[2]8)|(s[3]) ) #define CACHE_EXT_TREE 0x54524545 /* TREE */ #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* REUC */ +#define CACHE_EXT_WATCH 0x57415443 /* WATC */ struct index_state the_index; @@ -1289,6 +1290,19 @@ static int verify_hdr(struct cache_header *hdr, return 0; } +static void read_watch_extension(struct index_state *istate, uint8_t *data, +unsigned long sz) +{ + int i; + if ((istate-cache_nr + 7) / 8 != sz) { + error(invalid 'WATC' extension); + return; + } + for (i = 0; i istate-cache_nr; i++) + if (data[i / 8] (1 (i % 8))) + istate-cache[i]-ce_flags |= CE_WATCHED; +} + static int read_index_extension(struct index_state *istate, const char *ext, void *data, unsigned long sz) { @@ -1299,6 +1313,9 @@ static int read_index_extension(struct index_state *istate, case CACHE_EXT_RESOLVE_UNDO: istate-resolve_undo = resolve_undo_read(data, sz); break; + case CACHE_EXT_WATCH: + read_watch_extension(istate, data, sz); + break; default: if (*ext 'A' || 'Z' *ext) return error(index uses %.4s extension, which we do not understand, @@ -1777,7 +1794,7 @@ int write_index(struct index_state *istate, int newfd) { git_SHA_CTX c; struct cache_header hdr; - int i, err, removed, extended, hdr_version; + int i, err, removed, extended, hdr_version, has_watches = 0; struct cache_entry **cache = istate-cache; int entries = istate-cache_nr; struct stat st; @@ -1786,6 +1803,8 @@ int write_index(struct index_state *istate, int newfd) for (i = removed = extended = 0; i entries; i++) { if (cache[i]-ce_flags CE_REMOVE) removed++; + else if (cache[i]-ce_flags CE_WATCHED) + has_watches++; /* reduce extended entries if possible */ cache[i]-ce_flags = ~CE_EXTENDED; @@ -1857,6 +1876,26 @@ int write_index(struct index_state *istate, int newfd) if (err) return -1; } + if (has_watches) { + int id, sz = (entries - removed + 7) / 8; + uint8_t *data = xmalloc(sz); + memset(data, 0, sz); + for (i = 0, id = 0; i entries has_watches; i++) { + struct cache_entry *ce = cache[i]; + if (ce-ce_flags CE_REMOVE) + continue; + if (ce-ce_flags
[PATCH v3 11/26] Clear CE_WATCHED when set CE_VALID alone
If CE_WATCHED is set, CE_VALID is controlled by CE_WATCHED and will be cleared bfore writing to disk. Users of --assume-unchanged therefore need to clear CE_WATCHED to avoid this side effect. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- builtin/update-index.c | 12 read-cache.c | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index e3a10d7..9283fd6 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -50,9 +50,13 @@ static int mark_ce_flags(const char *path, int flag, int mark) int namelen = strlen(path); int pos = cache_name_pos(path, namelen); if (0 = pos) { - if (mark) - active_cache[pos]-ce_flags |= flag; - else + if (mark) { + struct cache_entry *ce = active_cache[pos]; + if (flag == CE_VALID) + ce-ce_flags = (ce-ce_flags ~CE_WATCHED) | CE_VALID; + else + ce-ce_flags |= flag; + } else active_cache[pos]-ce_flags = ~flag; cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; @@ -235,7 +239,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, ce-ce_namelen = len; ce-ce_mode = create_ce_mode(mode); if (assume_unchanged) - ce-ce_flags |= CE_VALID; + ce-ce_flags = (ce-ce_flags ~CE_WATCHED) | CE_VALID; option = allow_add ? ADD_CACHE_OK_TO_ADD : 0; option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0; if (add_cache_entry(ce, option)) diff --git a/read-cache.c b/read-cache.c index 098d3b6..8961864 100644 --- a/read-cache.c +++ b/read-cache.c @@ -133,7 +133,7 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st) fill_stat_data(ce-ce_stat_data, st); if (assume_unchanged) - ce-ce_flags |= CE_VALID; + ce-ce_flags = (ce-ce_flags ~CE_WATCHED) | CE_VALID; if (S_ISREG(st-st_mode)) ce_mark_uptodate(ce); -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 09/26] read-cache: save trailing sha-1
This will be used as signature to know if the index has changed. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- cache.h | 1 + read-cache.c | 7 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cache.h b/cache.h index 7a836b1..f14d535 100644 --- a/cache.h +++ b/cache.h @@ -279,6 +279,7 @@ struct index_state { initialized : 1; struct hash_table name_hash; struct hash_table dir_hash; + unsigned char sha1[20]; }; extern struct index_state the_index; diff --git a/read-cache.c b/read-cache.c index 33dd676..3b6daf1 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1269,10 +1269,11 @@ struct ondisk_cache_entry_extended { ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ ondisk_cache_entry_size(ce_namelen(ce))) -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(struct cache_header *hdr, + unsigned long size, + unsigned char *sha1) { git_SHA_CTX c; - unsigned char sha1[20]; int hdr_version; if (hdr-hdr_signature != htonl(CACHE_SIGNATURE)) @@ -1461,7 +1462,7 @@ int read_index_from(struct index_state *istate, const char *path) close(fd); hdr = mmap; - if (verify_hdr(hdr, mmap_size) 0) + if (verify_hdr(hdr, mmap_size, istate-sha1) 0) goto unmap; istate-version = ntohl(hdr-hdr_version); -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 12/26] read-cache: basic hand shaking to the file watcher
read_cache() connects to the file watcher, specified by filewatcher.path config, and performs basic hand shaking. CE_WATCHED is cleared if git and file watcher have different views on the index state. All send/receive calls must be complete within a limited time to avoid a buggy file-watcher hang git status forever. And the whole point of doing this is speed. If file watcher can't respond fast enough, for whatever reason, then it should not be used. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- Documentation/config.txt | 10 +++ Documentation/git-file-watcher.txt | 4 +- Makefile | 1 + cache.h| 1 + file-watcher-lib.c (new) | 91 ++ file-watcher-lib.h (new) | 6 ++ file-watcher.c | 152 - read-cache.c | 6 ++ 8 files changed, 269 insertions(+), 2 deletions(-) create mode 100644 file-watcher-lib.c create mode 100644 file-watcher-lib.h diff --git a/Documentation/config.txt b/Documentation/config.txt index 5f4d793..6ad653a 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1042,6 +1042,16 @@ difftool.tool.cmd:: difftool.prompt:: Prompt before each invocation of the diff tool. +filewatcher.path:: + The directory that contains the socket of `git file-watcher`. + If it's not an absolute path, it's relative to $GIT_DIR. An + empty path means no connection to file watcher. + +filewatcher.timeout:: + This is the maximum time in milliseconds that Git waits for + the file watcher to respond before giving up. Default value is + 50. Setting to -1 makes Git wait forever. + fetch.recurseSubmodules:: This option can be either set to a boolean value or to 'on-demand'. Setting it to a boolean changes the behavior of fetch and pull to diff --git a/Documentation/git-file-watcher.txt b/Documentation/git-file-watcher.txt index ec81f18..d91caf3 100644 --- a/Documentation/git-file-watcher.txt +++ b/Documentation/git-file-watcher.txt @@ -14,7 +14,9 @@ DESCRIPTION --- This program watches file changes in a git working directory and let Git now what files have been changed so that Git does not have to call -lstat(2) to detect that itself. +lstat(2) to detect that itself. Config key filewatcher.path needs to +be set to `socket directory` so Git knows how to contact to the file +watcher. OPTIONS --- diff --git a/Makefile b/Makefile index 8eef0d6..1c4d659 100644 --- a/Makefile +++ b/Makefile @@ -798,6 +798,7 @@ LIB_OBJS += entry.o LIB_OBJS += environment.o LIB_OBJS += exec_cmd.o LIB_OBJS += fetch-pack.o +LIB_OBJS += file-watcher-lib.o LIB_OBJS += fsck.o LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o diff --git a/cache.h b/cache.h index a0af2a5..b3ea574 100644 --- a/cache.h +++ b/cache.h @@ -283,6 +283,7 @@ struct index_state { struct hash_table name_hash; struct hash_table dir_hash; unsigned char sha1[20]; + int watcher; }; extern struct index_state the_index; diff --git a/file-watcher-lib.c b/file-watcher-lib.c new file mode 100644 index 000..d0636cc --- /dev/null +++ b/file-watcher-lib.c @@ -0,0 +1,91 @@ +#include cache.h +#include file-watcher-lib.h +#include pkt-line.h +#include unix-socket.h + +static char *watcher_path; +static int WAIT_TIME = 50; /* in ms */ + +static int connect_watcher(const char *path) +{ + struct strbuf sb = STRBUF_INIT; + int fd; + + if (!path || !*path) + return -1; + + strbuf_addf(sb, %s/socket, path); + fd = unix_stream_connect(sb.buf); + strbuf_release(sb); + return fd; +} + +static void reset_watches(struct index_state *istate, int disconnect) +{ + int i; + for (i = 0; i istate-cache_nr; i++) + if (istate-cache[i]-ce_flags CE_WATCHED) { + istate-cache[i]-ce_flags = ~(CE_WATCHED | CE_VALID); + istate-cache_changed = 1; + } + if (disconnect istate-watcher 0) { + close(istate-watcher); + istate-watcher = -1; + } +} + +static int watcher_config(const char *var, const char *value, void *data) +{ + if (!strcmp(var, filewatcher.path)) { + if (is_absolute_path(value)) + watcher_path = xstrdup(value); + else if (*value == '~') + watcher_path = expand_user_path(value); + else + watcher_path = git_pathdup(%s, value); + return 0; + } + if (!strcmp(var, filewatcher.timeout)) { + WAIT_TIME = git_config_int(var, value); + return 0; + } + return 0; +} + +void open_watcher(struct index_state *istate) +{ + static int read_config = 0; + char *msg; + + if (!get_git_work_tree()) { +
[PATCH v3 14/26] read-cache: put some limits on file watching
watch_entries() is a lot of computation and could trigger a lot more lookups in file-watcher. Normally after the first set of watches are in place, we do not need to update often. Moreover if the number of entries is small, the overhead of file watcher may actually slow git down. This patch only allows to update watches if the number of watchable files is over a limit (and there are new files added if this is not the first time). Measurements on Core i5-2520M and Linux 3.7.6, about 920 lstat() take 1ms. Somewhere between 2^16 and 2^17 lstat calls that it starts to take longer than 100ms. 2^16 is chosen at the minimum limit to start using file watcher. Of course this is only sensible default for single-repo use case. Lower it when you need to work with many small repos. Recently updated files are not considered watchable because they are likely to be updated again soon, not worth the ping-pong game with file watcher. The default limit 10min is just a random value. Recent limit is ignored if there are no watched files (e.g. a fresh clone, or after a bad hand shake with file watcher). Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- Documentation/config.txt | 9 +++ Documentation/technical/index-format.txt | 3 +++ cache.h | 1 + file-watcher-lib.c | 42 ++-- read-cache.c | 11 ++--- 5 files changed, 56 insertions(+), 10 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 6ad653a..451c100 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1052,6 +1052,15 @@ filewatcher.timeout:: the file watcher to respond before giving up. Default value is 50. Setting to -1 makes Git wait forever. +filewatcher.minfiles:: + Start watching files if the number of watchable files are + above this limit. Default value is 65536. + +filewatcher.recentlimit:: + Files that are last updated within filewatcher.recentlimit + seconds from now are not considered watchable. Default value + is 600 (5 minutes). + fetch.recurseSubmodules:: This option can be either set to a boolean value or to 'on-demand'. Setting it to a boolean changes the behavior of fetch and pull to diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index 24fd0ae..7081e55 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -204,3 +204,6 @@ Git index format - A bit map of all entries in the index, n-th bit of m-th byte corresponds to CE_WATCHED of the m * 8+ n-th index entry. + + - 1-byte, non-zero indicates the index should be scanned for new +watched entries. diff --git a/cache.h b/cache.h index b3ea574..10ff33e 100644 --- a/cache.h +++ b/cache.h @@ -279,6 +279,7 @@ struct index_state { struct cache_tree *cache_tree; struct cache_time timestamp; unsigned name_hash_initialized : 1, +update_watches : 1, initialized : 1; struct hash_table name_hash; struct hash_table dir_hash; diff --git a/file-watcher-lib.c b/file-watcher-lib.c index 791faae..d4949a5 100644 --- a/file-watcher-lib.c +++ b/file-watcher-lib.c @@ -5,6 +5,8 @@ static char *watcher_path; static int WAIT_TIME = 50; /* in ms */ +static int watch_lowerlimit = 65536; +static int recent_limit = 600; static int connect_watcher(const char *path) { @@ -22,12 +24,17 @@ static int connect_watcher(const char *path) static void reset_watches(struct index_state *istate, int disconnect) { - int i; + int i, changed = 0; for (i = 0; i istate-cache_nr; i++) if (istate-cache[i]-ce_flags CE_WATCHED) { istate-cache[i]-ce_flags = ~(CE_WATCHED | CE_VALID); - istate-cache_changed = 1; + changed = 1; } + recent_limit = 0; + if (changed) { + istate-update_watches = 1; + istate-cache_changed = 1; + } if (disconnect istate-watcher 0) { close(istate-watcher); istate-watcher = -1; @@ -49,6 +56,14 @@ static int watcher_config(const char *var, const char *value, void *data) WAIT_TIME = git_config_int(var, value); return 0; } + if (!strcmp(var, filewatcher.minfiles)) { + watch_lowerlimit = git_config_int(var, value); + return 0; + } + if (!strcmp(var, filewatcher.recentlimit)) { + recent_limit = git_config_int(var, value); + return 0; + } return 0; } @@ -63,12 +78,18 @@ void open_watcher(struct index_state *istate) } if (!read_config) { + int i; /* * can't hook into
[PATCH v3 15/26] read-cache: get changed file list from file watcher
When some paths are watched, they are added to the watched list in file watcher. When a path in this list is updated, the path is moved to changed list and no longer watched. With this patch we have a complete path exchanging picture between git and file-watcher: 1) Hand shake 2) Get the list of changed paths, clear CE_WATCHED on these paths. Set CE_VALID on the remaining CE_WATCHED paths 3) (Optionally) Ask to watch more paths. Set CE_WATCHED on them. CE_VALID is not set so these are still lstat'd 4) Refresh as usual. lstat is skipped on CE_VALID paths. If one of those paths at step 3 are found modified, CE_WATCHED is removed. 5) Write index to disk. Notify file-watcher about new index signature. Ask file watcher to remove the changed paths. A few points: - Changed list remains until step 5. If git crashes or do not write index down, next time it starts, it'll fed the same changed list. - If git writes index down without telling file-watcher about it, next time it starts, hand shake should fail and git should clear all CE_WATCHED. - There's a buffer between starting watch at #3 and saving watch at #5. We do verify paths are clean at #4. But that time all watches should have been active for a while. No chance for race conditions. - #5 is sort of atomic. If git crashes half way through step 5, file watcher should not update its index signature. Which means next time git starts, hand shake fails (because new index's written) so we'll start over. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- cache.h| 1 + file-watcher-lib.c | 99 ++ file-watcher-lib.h | 1 + file-watcher.c | 138 + read-cache.c | 21 +++- 5 files changed, 258 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index 10ff33e..9f7d952 100644 --- a/cache.h +++ b/cache.h @@ -285,6 +285,7 @@ struct index_state { struct hash_table dir_hash; unsigned char sha1[20]; int watcher; + struct string_list *updated_entries; }; extern struct index_state the_index; diff --git a/file-watcher-lib.c b/file-watcher-lib.c index d4949a5..b6b0848 100644 --- a/file-watcher-lib.c +++ b/file-watcher-lib.c @@ -2,6 +2,7 @@ #include file-watcher-lib.h #include pkt-line.h #include unix-socket.h +#include string-list.h static char *watcher_path; static int WAIT_TIME = 50; /* in ms */ @@ -25,6 +26,11 @@ static int connect_watcher(const char *path) static void reset_watches(struct index_state *istate, int disconnect) { int i, changed = 0; + if (istate-updated_entries) { + string_list_clear(istate-updated_entries, 0); + free(istate-updated_entries); + istate-updated_entries = NULL; + } for (i = 0; i istate-cache_nr; i++) if (istate-cache[i]-ce_flags CE_WATCHED) { istate-cache[i]-ce_flags = ~(CE_WATCHED | CE_VALID); @@ -41,6 +47,58 @@ static void reset_watches(struct index_state *istate, int disconnect) } } +static void mark_ce_valid(struct index_state *istate) +{ + struct strbuf sb = STRBUF_INIT; + char *line, *end; + int i, len; + unsigned long n; + if (packet_write_timeout(istate-watcher, WAIT_TIME, get-changed) = 0 || + !(line = packet_read_line_timeout(istate-watcher, WAIT_TIME, len)) || + !starts_with(line, changed )) { + reset_watches(istate, 1); + return; + } + n = strtoul(line + 8, end, 10); + if (end != line + len) { + reset_watches(istate, 1); + return; + } + if (!n) + goto done; + strbuf_grow(sb, n); + if (read_in_full_timeout(istate-watcher, sb.buf, n, WAIT_TIME) != n) { + strbuf_release(sb); + reset_watches(istate, 1); + return; + } + line = sb.buf; + end = line + n; + for (; line end; line += len + 1) { + len = strlen(line); + i = index_name_pos(istate, line, len); + if (i 0) + continue; + if (istate-cache[i]-ce_flags CE_WATCHED) { + istate-cache[i]-ce_flags = ~CE_WATCHED; + istate-cache_changed = 1; + } + if (!istate-updated_entries) { + struct string_list *sl; + sl = xmalloc(sizeof(*sl)); + memset(sl, 0, sizeof(*sl)); + sl-strdup_strings = 1; + istate-updated_entries = sl; + } + string_list_append(istate-updated_entries, line); + } + strbuf_release(sb); +done: + for (i = 0; i istate-cache_nr; i++) + if (istate-cache[i]-ce_flags CE_WATCHED) +
[PATCH v3 16/26] git-compat-util.h: add inotify stubs on non-Linux platforms
This is to avoid spreading #ifdef HAVE_INOTIFY around and keep most code compiled even if it's never active. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- config.mak.uname | 1 + git-compat-util.h | 43 +++ 2 files changed, 44 insertions(+) diff --git a/config.mak.uname b/config.mak.uname index 7d31fad..ee548f5 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -33,6 +33,7 @@ ifeq ($(uname_S),Linux) HAVE_PATHS_H = YesPlease LIBC_CONTAINS_LIBINTL = YesPlease HAVE_DEV_TTY = YesPlease + BASIC_CFLAGS += -DHAVE_INOTIFY endif ifeq ($(uname_S),GNU/kFreeBSD) NO_STRLCPY = YesPlease diff --git a/git-compat-util.h b/git-compat-util.h index cbd86c3..8b55dd0 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -128,6 +128,9 @@ #else #include poll.h #endif +#ifdef HAVE_INOTIFY +#include sys/inotify.h +#endif #if defined(__MINGW32__) /* pull in Windows compatibility stuff */ @@ -721,4 +724,44 @@ void warn_on_inaccessible(const char *path); /* Get the passwd entry for the UID of the current process. */ struct passwd *xgetpwuid_self(void); +#ifndef HAVE_INOTIFY +/* Keep inotify-specific code build, even if it's not used */ + +#define IN_DELETE_SELF 1 +#define IN_MOVE_SELF 2 +#define IN_ATTRIB 4 +#define IN_DELETE 8 +#define IN_MODIFY 16 +#define IN_MOVED_FROM 32 +#define IN_MOVED_TO64 +#define IN_Q_OVERFLOW 128 +#define IN_UNMOUNT 256 +#define IN_CREATE 512 +#define IN_ISDIR 1024 +#define IN_IGNORED 2048 + +struct inotify_event { + int event, mask, wd, len; + char name[FLEX_ARRAY]; +}; + +static inline int inotify_init() +{ + errno = ENOSYS; + return -1; +} + +static inline int inotify_add_watch(int fd, const char *path, int options) +{ + errno = ENOSYS; + return -1; +} + +static inline int inotify_rm_watch(int fd, int wd) +{ + errno = ENOSYS; + return -1; +} +#endif + #endif -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 13/26] read-cache: ask file watcher to watch files
We want to watch files that are never changed because lstat() on those files is a wasted effort. So we sort unwatched files by date and start adding them to the file watcher until it barfs (e.g. hits inotify limit). Note that we still do lstat() on these new watched files because they could have changed before the file watcher could watch them. Watched files may only skip lstat() at the next run. Also, at this early in the index loading process, we don't know what files are dirty and thus can skip watching (we do clear CE_WATCHED on entries that are not verified clean before writing index). So the watches are set, but git ignores its results. Maybe in future we could store the list of dirty files in WATC extension and use it as a hint to skip watching. In the future, file watcher should figure out what paths are watchable, what not (e.g. network filesystems) and reject them. For now it's the user resposibility to set (or unset) filewatcher.path properly. The previous attempt sends paths in batch, 64k per pkt-line, then wait for response. It's designed to stop short in case file watcher is out of resources. But that's a rare case, and send/wait cycles increase latency. Instead we now send everything in one packet, and not in pkt-line to avoid the 64k limit. Then we wait for the response. On webkit.git, normal status -uno takes 0m1.138s. The sending 14M (of 182k paths) takes 52ms extra. Previous approach takes 213ms extra. Of course in the end, extra time is longer because file watcher is basically no-op so far. There is not much room for improvement. If we compress paths to reduce payload, zlib time costs about 300ms (so how small the end result is no longer matters). Even simple prefix compressing (index v4 style) would cost 76ms on processing time alone (reducing payload to 3M). Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- file-watcher-lib.c | 83 + file-watcher-lib.h | 1 + file-watcher.c | 91 ++ read-cache.c | 18 +-- 4 files changed, 191 insertions(+), 2 deletions(-) diff --git a/file-watcher-lib.c b/file-watcher-lib.c index d0636cc..791faae 100644 --- a/file-watcher-lib.c +++ b/file-watcher-lib.c @@ -89,3 +89,86 @@ void open_watcher(struct index_state *istate) return; } } + +static int sort_by_date(const void *a_, const void *b_) +{ + const struct cache_entry *a = *(const struct cache_entry **)a_; + const struct cache_entry *b = *(const struct cache_entry **)b_; + uint32_t seca = a-ce_stat_data.sd_mtime.sec; + uint32_t secb = b-ce_stat_data.sd_mtime.sec; + return seca - secb; +} + +static inline int ce_watchable(struct cache_entry *ce) +{ + return + !(ce-ce_flags CE_WATCHED) + !(ce-ce_flags CE_VALID) + /* +* S_IFGITLINK should not be watched +* obviously. S_IFLNK could be problematic because +* inotify may follow symlinks without IN_DONT_FOLLOW +*/ + S_ISREG(ce-ce_mode); +} + +static void send_watches(struct index_state *istate, +struct cache_entry **sorted, int nr) +{ + struct strbuf sb = STRBUF_INIT; + int i, len = 0; + + for (i = 0; i nr; i++) + len += ce_namelen(sorted[i]) + 1; + + if (packet_write_timeout(istate-watcher, WAIT_TIME, watch %d, len) = 0) + return; + + strbuf_grow(sb, len); + for (i = 0; i nr; i++) + strbuf_add(sb, sorted[i]-name, ce_namelen(sorted[i]) + 1); + + if (write_in_full_timeout(istate-watcher, sb.buf, + sb.len, WAIT_TIME) != sb.len) { + strbuf_release(sb); + return; + } + strbuf_release(sb); + + for (;;) { + char *line, *end; + unsigned long n; + + if (!(line = packet_read_line_timeout(istate-watcher, + WAIT_TIME, len))) + return; + if (starts_with(line, watching )) + continue; + if (!starts_with(line, watched )) + return; + n = strtoul(line + 8, end, 10); + for (i = 0; i n; i++) + sorted[i]-ce_flags |= CE_WATCHED; + istate-cache_changed = 1; + break; + } +} + +void watch_entries(struct index_state *istate) +{ + int i, nr; + struct cache_entry **sorted; + + if (istate-watcher = 0) + return; + for (i = nr = 0; i istate-cache_nr; i++) + if (ce_watchable(istate-cache[i])) + nr++; + sorted = xmalloc(sizeof(*sorted) * nr); + for (i = nr = 0; i istate-cache_nr; i++) + if (ce_watchable(istate-cache[i])) +
[PATCH v3 20/26] read-cache: new variable to verify file-watcher results
If GIT_TEST_WATCHED is set to a non-zero value, Git still uses file watcher if configured. But it does lstat() anyway and notifies the user if a file is changed but the file watcher said otherwise. Note that there is a race condition. Changed paths are retrieved at time X, then refresh and validation at time Y. Even if X and Y are very close, an update can happen between X and Y, causing a false report. If GIT_TEST_WATCHED is set greater than 1, git will abort instead of just warn and move on. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- Documentation/git-file-watcher.txt | 8 cache.h| 5 - read-cache.c | 17 + 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/git-file-watcher.txt b/Documentation/git-file-watcher.txt index d694fea..dd09e30 100644 --- a/Documentation/git-file-watcher.txt +++ b/Documentation/git-file-watcher.txt @@ -25,6 +25,14 @@ OPTIONS --detach:: Run in background. +TROUBLESHOOTING +--- +Setting environment variable `GIT_TEST_WATCHED` to a non-zero number +makes Git communicate with the file watcher, but do lstat anyway to +verify that the file watcher results. Setting to 1 prints warning when +file watcher fails to monitor files correctly. Setting to 2 aborts Git +when it happens. + BUGS On Linux, file watcher may fail to detect changes if you move the work diff --git a/cache.h b/cache.h index c229bf9..806c886 100644 --- a/cache.h +++ b/cache.h @@ -224,7 +224,10 @@ static inline unsigned create_ce_flags(unsigned stage) #define ce_mark_uptodate(ce) ((ce)-ce_flags |= CE_UPTODATE) static inline int ce_valid(const struct cache_entry *ce) { - return ce-ce_flags CE_VALID; + extern int test_watched; + if (!test_watched) + return ce-ce_flags CE_VALID; + return (ce-ce_flags CE_VALID) !(ce-ce_flags CE_WATCHED); } #define ce_permissions(mode) (((mode) 0100) ? 0755 : 0644) diff --git a/read-cache.c b/read-cache.c index 95c9ccb..d5f084a 100644 --- a/read-cache.c +++ b/read-cache.c @@ -37,6 +37,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall #define CACHE_EXT_WATCH 0x57415443 /* WATC */ struct index_state the_index; +int test_watched; static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { @@ -1117,6 +1118,16 @@ static void show_file(const char * fmt, const char * name, int in_porcelain, printf(fmt, name); } +static void report_bad_watcher(struct index_state *istate, + struct cache_entry *ce) +{ + if (test_watched 1) + die(%s is updated but file-watcher said no, + ce-name); + warning(%s is updated but file-watcher said no, + ce-name); +} + int refresh_index(struct index_state *istate, unsigned int flags, const struct pathspec *pathspec, char *seen, const char *header_msg) @@ -1188,6 +1199,9 @@ int refresh_index(struct index_state *istate, unsigned int flags, ce-ce_flags = ~CE_VALID; istate-cache_changed = 1; } + if (test_watched + (ce-ce_flags CE_WATCHED) (ce-ce_flags CE_VALID)) + report_bad_watcher(istate, ce); if (quiet) continue; @@ -1460,6 +1474,9 @@ int read_index_from(struct index_state *istate, const char *path) if (istate-initialized) return istate-cache_nr; + if (getenv(GIT_TEST_WATCHED)) + test_watched = atoi(getenv(GIT_TEST_WATCHED)); + istate-timestamp.sec = 0; istate-timestamp.nsec = 0; fd = open(path, O_RDONLY); -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 17/26] file-watcher: inotify support, watching part
struct dir manages inotify file descriptor and forms a tree. struct file manages a file. When a file is watched, all dirs up to the file is watched. Any changes on a directory impacts all subdirs and files. The way data structure is made might be inotify-specific. I haven't thought of how other file notification mechanisms may be implemented. So there may be some refactoring later when a new OS is supported. Room for improvement: consecutive watched paths likely share the same directory part (even though they are sorted by mtime, not name). Try remember the last dir sequence and reduce lookups. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- Documentation/git-file-watcher.txt | 13 +++ file-watcher.c | 226 - 2 files changed, 238 insertions(+), 1 deletion(-) diff --git a/Documentation/git-file-watcher.txt b/Documentation/git-file-watcher.txt index d91caf3..d694fea 100644 --- a/Documentation/git-file-watcher.txt +++ b/Documentation/git-file-watcher.txt @@ -18,11 +18,24 @@ lstat(2) to detect that itself. Config key filewatcher.path needs to be set to `socket directory` so Git knows how to contact to the file watcher. +This program is only supported under Linux with inotify(7) support. + OPTIONS --- --detach:: Run in background. +BUGS + +On Linux, file watcher may fail to detect changes if you move the work +tree from outside. For example if you have work tree at +`/tmp/foo/work`, you move `/tmp/foo` to `/tmp/bar`, make some changes +in `/tmp/bar/work` and move `/tmp/bar` back to `/tmp/foo`, changes +won't get noticed. Moving `/tmp/foo/work` to something else is fine. + +inotify may not work well with network filesystems and a few special +others. Check inotify documents. + SEE ALSO linkgit:git-update-index[1], diff --git a/file-watcher.c b/file-watcher.c index aa2daf6..d0762e6 100644 --- a/file-watcher.c +++ b/file-watcher.c @@ -11,6 +11,28 @@ static const char *const file_watcher_usage[] = { NULL }; +struct dir; +struct repository; + +struct file { + char *name; + struct dir *parent; + struct repository *repo; + struct file *next; +}; + +struct dir { + char *name; + struct dir *parent; + struct dir **subdirs; + struct file **files; + struct repository *repo; /* only for root note */ + int wd, nr_subdirs, nr_files; +}; + +static struct dir **wds; +static int wds_alloc; + struct repository { char *work_tree; char index_signature[41]; @@ -25,6 +47,7 @@ struct repository { struct string_list updated; int updated_sorted; int updating; + struct dir *root; }; const char *invalid_signature = ; @@ -42,10 +65,199 @@ struct connection { static struct connection **conns; static struct pollfd *pfd; static int conns_alloc, pfd_nr, pfd_alloc; +static int inotify_fd; + +/* + * IN_DONT_FOLLOW does not matter now as we do not monitor + * symlinks. See ce_watchable(). + */ +#define INOTIFY_MASKS (IN_DELETE_SELF | IN_MOVE_SELF | \ + IN_CREATE | IN_ATTRIB | IN_DELETE | IN_MODIFY | \ + IN_MOVED_FROM | IN_MOVED_TO) +static struct dir *create_dir(struct dir *parent, const char *path, + const char *basename) +{ + struct dir *d; + int wd = inotify_add_watch(inotify_fd, path, INOTIFY_MASKS); + if (wd 0) + return NULL; + + d = xmalloc(sizeof(*d)); + memset(d, 0, sizeof(*d)); + d-wd = wd; + d-parent = parent; + d-name = xstrdup(basename); + + ALLOC_GROW(wds, wd + 1, wds_alloc); + wds[wd] = d; + return d; +} + +static int get_dir_pos(struct dir *d, const char *name) +{ + int first, last; + + first = 0; + last = d-nr_subdirs; + while (last first) { + int next = (last + first) 1; + int cmp = strcmp(name, d-subdirs[next]-name); + if (!cmp) + return next; + if (cmp 0) { + last = next; + continue; + } + first = next+1; + } + + return -first-1; +} + +static void free_file(struct dir *d, int pos, int topdown); +static void free_dir(struct dir *d, int topdown) +{ + struct dir *p = d-parent; + int pos; + if (!topdown p (pos = get_dir_pos(p, d-name)) 0) + die(How come this directory is not registered in its parent?); + if (d-repo) + d-repo-root = NULL; + wds[d-wd] = NULL; + inotify_rm_watch(inotify_fd, d-wd); + if (topdown) { + int i; + for (i = 0; i d-nr_subdirs; i++) + free_dir(d-subdirs[i], topdown); + for (i = 0; i d-nr_files; i++) + free_file(d, i, topdown); + } +
[PATCH v3 18/26] file-watcher: inotify support, notification part
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- file-watcher.c | 142 - 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/file-watcher.c b/file-watcher.c index d0762e6..5867942 100644 --- a/file-watcher.c +++ b/file-watcher.c @@ -260,6 +260,131 @@ static int watch_path(struct repository *repo, char *path) return 0; } +static inline void queue_file_changed(struct file *f, struct strbuf *sb) +{ + int len = sb-len; + strbuf_addf(sb, %s%s, f-parent-parent ? / : , f-name); + string_list_append(f-repo-updated, sb-buf); + f-repo-updated_sorted = 0; + strbuf_setlen(sb, len); +} + +static void construct_path(struct dir *d, struct strbuf *sb) +{ + if (!d-parent) + return; + if (!d-parent-parent) { + strbuf_addstr(sb, d-name); + return; + } + construct_path(d-parent, sb); + strbuf_addf(sb, /%s, d-name); +} + +static void file_changed(const struct inotify_event *event, +struct dir *d, int pos) +{ + struct strbuf sb = STRBUF_INIT; + construct_path(d, sb); + queue_file_changed(d-files[pos], sb); + strbuf_release(sb); + free_file(d, pos, 0); +} + +static void dir_changed(const struct inotify_event *event, struct dir *d, + const char *base) +{ + struct strbuf sb = STRBUF_INIT; + int i; + + if (!base) /* top call - base == NULL */ + construct_path(d, sb); + else { + strbuf_addstr(sb, base); + if (sb.len) + strbuf_addch(sb, '/'); + strbuf_addstr(sb, d-name); + } + + for (i = 0; i d-nr_files; i++) + queue_file_changed(d-files[i], sb); + for (i = 0; i d-nr_subdirs; i++) { + dir_changed(event, d-subdirs[i], sb.buf); + if (!base) + free_dir(d-subdirs[i], 1); + } + strbuf_release(sb); + if (!base) + free_dir(d, 0); +} + +static void reset_repo(struct repository *repo, ino_t inode); +static int do_handle_inotify(const struct inotify_event *event) +{ + struct dir *d; + int pos; + + if (event-mask (IN_Q_OVERFLOW | IN_UNMOUNT)) { + int i; + for (i = 0; i nr_repos; i++) + reset_repo(repos[i], 0); + return 0; + } + + if ((event-mask IN_IGNORED) || + /* +* Perhaps left over events that we have not consumed +* before the watch descriptor is removed. +*/ + event-wd = wds_alloc || wds[event-wd] == NULL) + return 0; + + d = wds[event-wd]; + + /* +* If something happened to the watched directory, consider +* everything inside modified +*/ + if (event-mask (IN_DELETE_SELF | IN_MOVE_SELF)) { + dir_changed(event, d, NULL); + return 0; + } + + if (!(event-mask IN_ISDIR)) { + pos = get_file_pos(d, event-name); + if (pos = 0) + file_changed(event, d, pos); + } + + return 0; +} + +static int handle_inotify(int fd) +{ + static char *buf; + static unsigned int buf_len = 0; + unsigned int avail, offset; + int ret, len; + + /* drain the event queue */ + if (ioctl(fd, FIONREAD, avail)) + die_errno(unable to FIONREAD inotify handle); + if (buf_len avail) { + buf = xrealloc(buf, avail); + buf_len = avail; + } + len = read(fd, buf, avail); + if (len = 0) + return -1; + ret = offset = 0; + while (offset len) { + struct inotify_event *event = (void *)(buf + offset); + ret += do_handle_inotify(event); + offset += sizeof(struct inotify_event) + event-len; + } + return ret; +} + static void get_changed_list(int conn_id) { struct strbuf sb = STRBUF_INIT; @@ -466,6 +591,12 @@ static int handle_command(int conn_id) * capabilities. Capabilities in uppercase MUST be * supported. If any side does not understand any of the * advertised uppercase capabilities, it must disconnect. +* +* The way the main event loop is structured, we should get at +* least one handle_inotify() before receiving the next +* command. And handle_inotify() should process all events by +* this point of time. This guarantees our reports won't miss +* anything by the time get-changed is called. */ if ((arg = skip_prefix(msg, hello))) { if (*arg) { /* no capabilities supported yet */ @@ -753,11 +884,15 @@ int main(int argc, const char **argv) close(err); } -
[PATCH v3 26/26] Disable file-watcher with system inotify on some tests
When file watcher is active via GIT_TEST_WATCHER_PATH, `ls-files -v` output may be different (e.g. H becomes W). Disable file watcher in those cases. We could make ls-files turn 'W' back to 'H' for these cases, but not sure it's worth the effort. The intention of running with GIT_TEST_WATCHER_PATH is to exercise file watcher in somewhat real scenarios and the remaining tests are more than enough for that. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- t/t1011-read-tree-sparse-checkout.sh | 2 ++ t/t2104-update-index-skip-worktree.sh | 2 ++ t/t7011-skip-worktree-reading.sh | 2 ++ t/t7012-skip-worktree-writing.sh | 2 ++ 4 files changed, 8 insertions(+) diff --git a/t/t1011-read-tree-sparse-checkout.sh b/t/t1011-read-tree-sparse-checkout.sh index 0c74bee..00e92bc 100755 --- a/t/t1011-read-tree-sparse-checkout.sh +++ b/t/t1011-read-tree-sparse-checkout.sh @@ -14,6 +14,8 @@ test_description='sparse checkout tests . ./test-lib.sh . $TEST_DIRECTORY/lib-read-tree.sh +unset GIT_TEST_WATCHER_PATH + test_expect_success 'setup' ' cat expected -\EOF 100644 77f0ba1734ed79d12881f81b36ee134de6a3327b 0 init.t diff --git a/t/t2104-update-index-skip-worktree.sh b/t/t2104-update-index-skip-worktree.sh index 1d0879b..2b93c58 100755 --- a/t/t2104-update-index-skip-worktree.sh +++ b/t/t2104-update-index-skip-worktree.sh @@ -7,6 +7,8 @@ test_description='skip-worktree bit test' . ./test-lib.sh +unset GIT_TEST_WATCHER_PATH + cat expect.full EOF H 1 H 2 diff --git a/t/t7011-skip-worktree-reading.sh b/t/t7011-skip-worktree-reading.sh index 88d60c1..2242718 100755 --- a/t/t7011-skip-worktree-reading.sh +++ b/t/t7011-skip-worktree-reading.sh @@ -7,6 +7,8 @@ test_description='skip-worktree bit test' . ./test-lib.sh +unset GIT_TEST_WATCHER_PATH + cat expect.full EOF H 1 H 2 diff --git a/t/t7012-skip-worktree-writing.sh b/t/t7012-skip-worktree-writing.sh index 9ceaa40..c8e0eb5 100755 --- a/t/t7012-skip-worktree-writing.sh +++ b/t/t7012-skip-worktree-writing.sh @@ -7,6 +7,8 @@ test_description='test worktree writing operations when skip-worktree is used' . ./test-lib.sh +unset GIT_TEST_WATCHER_PATH + test_expect_success 'setup' ' test_commit init echo modified init.t -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 19/26] Wrap CE_VALID test with ce_valid()
The next patch wants to ignore CE_VALID under some condition but not really clears it. Centralizing its access makes such a change easier. Not all ce_flags CE_VALID is converted though. The tests that really mean _bit_ CE_VALID remains so. The tests that mean ignore worktree are converted. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- builtin/grep.c | 2 +- cache.h| 4 diff-lib.c | 4 ++-- diff.c | 2 +- read-cache.c | 6 +++--- unpack-trees.c | 2 +- 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 63f8603..00526a1 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -386,7 +386,7 @@ static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int * are identical, even if worktree file has been modified, so use * cache version instead */ - if (cached || (ce-ce_flags CE_VALID) || ce_skip_worktree(ce)) { + if (cached || ce_valid(ce) || ce_skip_worktree(ce)) { if (ce_stage(ce)) continue; hit |= grep_sha1(opt, ce-sha1, ce-name, 0, ce-name); diff --git a/cache.h b/cache.h index 9f7d952..c229bf9 100644 --- a/cache.h +++ b/cache.h @@ -222,6 +222,10 @@ static inline unsigned create_ce_flags(unsigned stage) #define ce_uptodate(ce) ((ce)-ce_flags CE_UPTODATE) #define ce_skip_worktree(ce) ((ce)-ce_flags CE_SKIP_WORKTREE) #define ce_mark_uptodate(ce) ((ce)-ce_flags |= CE_UPTODATE) +static inline int ce_valid(const struct cache_entry *ce) +{ + return ce-ce_flags CE_VALID; +} #define ce_permissions(mode) (((mode) 0100) ? 0755 : 0644) static inline unsigned int create_ce_mode(unsigned int mode) diff --git a/diff-lib.c b/diff-lib.c index 346cac6..dcda7f3 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -198,7 +198,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; /* If CE_VALID is set, don't look at workdir for file removal */ - changed = (ce-ce_flags CE_VALID) ? 0 : check_removed(ce, st); + changed = ce_valid(ce) ? 0 : check_removed(ce, st); if (changed) { if (changed 0) { perror(ce-name); @@ -369,7 +369,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, /* if the entry is not checked out, don't examine work tree */ cached = o-index_only || - (idx ((idx-ce_flags CE_VALID) || ce_skip_worktree(idx))); + (idx (ce_valid(idx) || ce_skip_worktree(idx))); /* * Backward compatibility wart - diff-index -m does * not mean do not ignore merges, but match_missing. diff --git a/diff.c b/diff.c index 8e4a6a9..22c73fe 100644 --- a/diff.c +++ b/diff.c @@ -2636,7 +2636,7 @@ static int reuse_worktree_file(const char *name, const unsigned char *sha1, int * If ce is marked as assume unchanged, there is no * guarantee that work tree matches what we are looking for. */ - if ((ce-ce_flags CE_VALID) || ce_skip_worktree(ce)) + if (ce_valid(ce) || ce_skip_worktree(ce)) return 0; /* diff --git a/read-cache.c b/read-cache.c index 5540b06..95c9ccb 100644 --- a/read-cache.c +++ b/read-cache.c @@ -289,8 +289,8 @@ int ie_match_stat(const struct index_state *istate, */ if (!ignore_skip_worktree ce_skip_worktree(ce)) return 0; - if (!ignore_valid (ce-ce_flags CE_VALID)) - return 0; + if (!ignore_valid ce_valid(ce)) + return 0; /* * Intent-to-add entries have not been added, so the index entry @@ -1047,7 +1047,7 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, ce_mark_uptodate(ce); return ce; } - if (!ignore_valid (ce-ce_flags CE_VALID)) { + if (!ignore_valid ce_valid(ce)) { ce_mark_uptodate(ce); return ce; } diff --git a/unpack-trees.c b/unpack-trees.c index 164354d..61c3f35 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1215,7 +1215,7 @@ static int verify_uptodate_1(const struct cache_entry *ce, * if this entry is truly up-to-date because this file may be * overwritten. */ - if ((ce-ce_flags CE_VALID) || ce_skip_worktree(ce)) + if (ce_valid(ce) || ce_skip_worktree(ce)) ; /* keep checking */ else if (o-reset || ce_uptodate(ce)) return 0; -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 25/26] file-watcher: tests for the client side
Similar to t7513, system inotify is taken out to give us better controlled environment. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- file-watcher-lib.c | 21 ++-- t/t7514-file-watcher-lib.sh (new +x) | 190 +++ test-file-watcher.c | 15 +++ 3 files changed, 219 insertions(+), 7 deletions(-) create mode 100755 t/t7514-file-watcher-lib.sh diff --git a/file-watcher-lib.c b/file-watcher-lib.c index 93afb52..8cc3b73 100644 --- a/file-watcher-lib.c +++ b/file-watcher-lib.c @@ -23,7 +23,8 @@ static int connect_watcher(const char *path) return fd; } -static void reset_watches(struct index_state *istate, int disconnect) +static void reset_watches(struct index_state *istate, int disconnect, + const char *reason) { int i, changed = 0; if (istate-updated_entries) { @@ -45,6 +46,8 @@ static void reset_watches(struct index_state *istate, int disconnect) close(istate-watcher); istate-watcher = -1; } + trace_printf_key(GIT_TRACE_WATCHER, reset%s: %s\n, +disconnect ? /disconnect : , reason); } static void mark_ce_valid(struct index_state *istate) @@ -53,15 +56,16 @@ static void mark_ce_valid(struct index_state *istate) char *line, *end; int i, len; unsigned long n; + trace_printf_key(GIT_TRACE_WATCHER, mark_ce_valid\n); if (packet_write_timeout(istate-watcher, WAIT_TIME, get-changed) = 0 || !(line = packet_read_line_timeout(istate-watcher, WAIT_TIME, len)) || !starts_with(line, changed )) { - reset_watches(istate, 1); + reset_watches(istate, 1, invalid get-changed response); return; } n = strtoul(line + 8, end, 10); if (end != line + len) { - reset_watches(istate, 1); + reset_watches(istate, 1, invalid get-changed response); return; } if (!n) @@ -69,7 +73,7 @@ static void mark_ce_valid(struct index_state *istate) strbuf_grow(sb, n); if (read_in_full_timeout(istate-watcher, sb.buf, n, WAIT_TIME) != n) { strbuf_release(sb); - reset_watches(istate, 1); + reset_watches(istate, 1, invalid get-changed payload); return; } line = sb.buf; @@ -131,7 +135,7 @@ void open_watcher(struct index_state *istate) char *msg; if (!get_git_work_tree()) { - reset_watches(istate, 1); + reset_watches(istate, 1, no worktree); return; } @@ -165,10 +169,11 @@ void open_watcher(struct index_state *istate) } istate-watcher = connect_watcher(watcher_path); + trace_printf_key(GIT_TRACE_WATCHER, open watcher %d\n, istate-watcher); if (packet_write_timeout(istate-watcher, WAIT_TIME, hello) = 0 || (msg = packet_read_line_timeout(istate-watcher, WAIT_TIME, NULL)) == NULL || strcmp(msg, hello)) { - reset_watches(istate, 1); + reset_watches(istate, 1, invalid hello response); return; } @@ -177,7 +182,7 @@ void open_watcher(struct index_state *istate) get_git_work_tree()) = 0 || (msg = packet_read_line_timeout(istate-watcher, WAIT_TIME, NULL)) == NULL || strcmp(msg, ok)) { - reset_watches(istate, 0); + reset_watches(istate, 0, inconsistent); istate-update_watches = 1; return; } @@ -265,6 +270,7 @@ void watch_entries(struct index_state *istate) nr++; if (nr watch_lowerlimit) return; + trace_printf_key(GIT_TRACE_WATCHER, watch %d\n, nr); sorted = xmalloc(sizeof(*sorted) * nr); for (i = nr = 0; i istate-cache_nr; i++) if (ce_watchable(istate-cache[i], now)) @@ -280,6 +286,7 @@ void close_watcher(struct index_state *istate, const unsigned char *sha1) int len, i, nr; if (istate-watcher = 0) return; + trace_printf_key(GIT_TRACE_WATCHER, close watcher\n); if (packet_write_timeout(istate-watcher, WAIT_TIME, new-index %s, sha1_to_hex(sha1)) = 0) goto done; diff --git a/t/t7514-file-watcher-lib.sh b/t/t7514-file-watcher-lib.sh new file mode 100755 index 000..8dabb13 --- /dev/null +++ b/t/t7514-file-watcher-lib.sh @@ -0,0 +1,190 @@ +#!/bin/sh + +test_description='File watcher daemon and client tests' + +. ./test-lib.sh + +if git file-watcher --check-support test_have_prereq POSIXPERM; then + : # good +else + skip_all=file-watcher not supported on this system + test_done +fi + +kill_it() { + test-file-watcher $1 EOF /dev/null +die +see you +EOF +} + +GIT_TEST_WATCHER=2
[PATCH v3 24/26] ls-files: print CE_WATCHED as W (or w with CE_VALID)
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- Documentation/git-ls-files.txt | 1 + builtin/ls-files.c | 14 -- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt index c0856a6..bdb17a5 100644 --- a/Documentation/git-ls-files.txt +++ b/Documentation/git-ls-files.txt @@ -123,6 +123,7 @@ a space) at the start of each line: R:: removed/deleted C:: modified/changed K:: to be killed + W:: being watched by `git file-watcher` ?:: other -v:: diff --git a/builtin/ls-files.c b/builtin/ls-files.c index e1cf6d8..f1f7c07 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -46,6 +46,7 @@ static const char *tag_killed = ; static const char *tag_modified = ; static const char *tag_skip_worktree = ; static const char *tag_resolve_undo = ; +static const char *tag_watched = ; static void write_name(const char *name) { @@ -231,6 +232,7 @@ static void show_files(struct dir_struct *dir) if (show_cached || show_stage) { for (i = 0; i active_nr; i++) { const struct cache_entry *ce = active_cache[i]; + const char *tag; if ((dir-flags DIR_SHOW_IGNORED) !ce_excluded(dir, ce)) continue; @@ -238,8 +240,15 @@ static void show_files(struct dir_struct *dir) continue; if (ce-ce_flags CE_UPDATE) continue; - show_ce_entry(ce_stage(ce) ? tag_unmerged : - (ce_skip_worktree(ce) ? tag_skip_worktree : tag_cached), ce); + if (ce_stage(ce)) + tag = tag_unmerged; + else if (ce_skip_worktree(ce)) + tag = tag_skip_worktree; + else if (ce-ce_flags CE_WATCHED) + tag = tag_watched; + else + tag = tag_cached; + show_ce_entry(tag, ce); } } if (show_deleted || show_modified) { @@ -530,6 +539,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix) tag_killed = K ; tag_skip_worktree = S ; tag_resolve_undo = U ; + tag_watched = W ; } if (show_modified || show_others || show_deleted || (dir.flags DIR_SHOW_IGNORED) || show_killed) require_work_tree = 1; -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 22/26] file-watcher: quit if $WATCHER/socket is gone
This is more of an issue in development than in production. When a file-watcher related test fails, the daemon may be left hanging. When you rerun the same test, old $TRASH_DIRECTORY is wiped out and no one can communicate with the old daemon any more. Make the old daemon quit after 5 minutes in such cases. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- file-watcher.c | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/file-watcher.c b/file-watcher.c index 5867942..1e45b25 100644 --- a/file-watcher.c +++ b/file-watcher.c @@ -834,11 +834,22 @@ static void check_socket_directory(const char *path) free(path_copy); } +static void run_housekeeping(void) +{ + struct stat st; + struct strbuf sb = STRBUF_INIT; + strbuf_addf(sb, %s/socket, socket_path); + if (stat(sb.buf, st) || !S_ISSOCK(st.st_mode)) + exit(0); + strbuf_release(sb); +} + int main(int argc, const char **argv) { struct strbuf sb = STRBUF_INIT; int i, new_nr, fd, quit = 0, nr_common; int daemon = 0; + time_t last_checked; struct option options[] = { OPT_BOOL(0, detach, daemon, N_(run in background)), @@ -894,19 +905,25 @@ int main(int argc, const char **argv) pfd[1].events = POLLIN; } + last_checked = time(NULL); while (!quit) { - if (poll(pfd, pfd_nr, -1) 0) { + int ret = poll(pfd, pfd_nr, 30); + int time_for_housekeeping = 0; + if (ret 0) { if (errno != EINTR) { error(Poll failed, resuming: %s, strerror(errno)); sleep(1); } continue; - } + } else if (ret == 0) + time_for_housekeeping = 1; if (inotify_fd (pfd[1].revents POLLIN)) { if (handle_inotify(inotify_fd)) break; + if (last_checked + 300 time(NULL)) + time_for_housekeeping = 1; } for (new_nr = i = nr_common; i pfd_nr; i++) { @@ -949,6 +966,11 @@ int main(int argc, const char **argv) accept_connection(pfd[0].fd); if (pfd[0].revents (POLLHUP | POLLERR | POLLNVAL)) die(_(error on listening socket)); + + if (time_for_housekeeping) { + run_housekeeping(); + last_checked = time(NULL); + } } return 0; } -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 21/26] Support running file watcher with the test suite
This is to force running the test suite with file-watcher with $ mkdir /tmp/watcher $ chmod 700 /tmp/watcher $ git-file-watcher /tmp/watcher/ then open another terminal and run $ export GIT_TEST_WATCHED=2 GIT_TEST_WATCHER=2 $ export GIT_TEST_WATCHER_PATH=/tmp/watcher $ make test TIME_WAIT set set to unlimited by GIT_TEST_WATCHER=2 so the test suite could hang up indefinitely due to a file-watcher bug. Luckily everything passes. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- file-watcher-lib.c | 13 + 1 file changed, 13 insertions(+) diff --git a/file-watcher-lib.c b/file-watcher-lib.c index b6b0848..93afb52 100644 --- a/file-watcher-lib.c +++ b/file-watcher-lib.c @@ -136,6 +136,7 @@ void open_watcher(struct index_state *istate) } if (!read_config) { + const char *s; int i; /* * can't hook into git_default_config because @@ -149,6 +150,18 @@ void open_watcher(struct index_state *istate) if (i == istate-cache_nr) recent_limit = 0; read_config = 1; + + s = getenv(GIT_TEST_WATCHER); + if (s) { + watch_lowerlimit = 1; + recent_limit = 0; + WAIT_TIME = -1; + if (atoi(s) 1) + istate-update_watches = 1; + s = getenv(GIT_TEST_WATCHER_PATH); + if (s) + watcher_path = xstrdup(s); + } } istate-watcher = connect_watcher(watcher_path); -- 1.8.5.2.240.g8478abd -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 23/26] file-watcher: tests for the daemon
test-file-watcher is a simple chat program to talk to file watcher. Typically you would write something like this cat expect EOF # send hello. Oh and this is a comment! hello # Wait for reply and print to stdout. # test-file-watcher does not care about anything after '' hello index foo bar ok EOF test-file-watcher . expect actual and test-file-watcher will execute the commands and get responses. If all go well, actual should be the same as expect. '' and '' denote send and receive packets respectively. '' and '' can be used to send and receive a list of NUL-terminated paths. $GIT_TEST_WATCHER enables a few more commands for testing purposes. The most important one is 'test-mode' where system inotify is taken out and inotify events could be injected via test-file-watcher. There are two debug commands in file-watcher that's not used by the test suite, but would help debugging: setenv and log. They can be used to turn on GIT_TRACE_PACKET then any log command will show, which functions as barrier between events file watcher. GIT_TRACE_WATCHER can also be enabled (dynamically or at startup) to track inotify events. Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com --- .gitignore | 1 + Makefile | 1 + file-watcher.c | 181 ++- t/t7513-file-watcher.sh (new +x) | 382 +++ test-file-watcher.c (new)| 96 ++ 5 files changed, 657 insertions(+), 4 deletions(-) create mode 100755 t/t7513-file-watcher.sh create mode 100644 test-file-watcher.c diff --git a/.gitignore b/.gitignore index 12c78f0..277f929 100644 --- a/.gitignore +++ b/.gitignore @@ -181,6 +181,7 @@ /test-date /test-delta /test-dump-cache-tree +/test-file-watcher /test-scrap-cache-tree /test-genrandom /test-index-version diff --git a/Makefile b/Makefile index 1c4d659..f0dc2cc 100644 --- a/Makefile +++ b/Makefile @@ -555,6 +555,7 @@ TEST_PROGRAMS_NEED_X += test-ctype TEST_PROGRAMS_NEED_X += test-date TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree +TEST_PROGRAMS_NEED_X += test-file-watcher TEST_PROGRAMS_NEED_X += test-genrandom TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS_NEED_X += test-line-buffer diff --git a/file-watcher.c b/file-watcher.c index 1e45b25..3ab0a11 100644 --- a/file-watcher.c +++ b/file-watcher.c @@ -65,7 +65,8 @@ struct connection { static struct connection **conns; static struct pollfd *pfd; static int conns_alloc, pfd_nr, pfd_alloc; -static int inotify_fd; +static int inotify_fd, test_mode; +static int wd_counter = 1; /* * IN_DONT_FOLLOW does not matter now as we do not monitor @@ -78,10 +79,19 @@ static struct dir *create_dir(struct dir *parent, const char *path, const char *basename) { struct dir *d; - int wd = inotify_add_watch(inotify_fd, path, INOTIFY_MASKS); + int wd; + if (!test_mode) + wd = inotify_add_watch(inotify_fd, path, INOTIFY_MASKS); + else { + wd = wd_counter++; + if (wd 8) + wd = -1; + } if (wd 0) return NULL; + trace_printf_key(GIT_TRACE_WATCHER, inotify: watch %d %s\n, +wd, path); d = xmalloc(sizeof(*d)); memset(d, 0, sizeof(*d)); d-wd = wd; @@ -124,7 +134,9 @@ static void free_dir(struct dir *d, int topdown) if (d-repo) d-repo-root = NULL; wds[d-wd] = NULL; - inotify_rm_watch(inotify_fd, d-wd); + if (!test_mode) + inotify_rm_watch(inotify_fd, d-wd); + trace_printf_key(GIT_TRACE_WATCHER, inotify: unwatch %d\n, d-wd); if (topdown) { int i; for (i = 0; i d-nr_subdirs; i++) @@ -265,6 +277,7 @@ static inline void queue_file_changed(struct file *f, struct strbuf *sb) int len = sb-len; strbuf_addf(sb, %s%s, f-parent-parent ? / : , f-name); string_list_append(f-repo-updated, sb-buf); + trace_printf_key(GIT_TRACE_WATCHER, watcher: changed %s\n, sb-buf); f-repo-updated_sorted = 0; strbuf_setlen(sb, len); } @@ -324,6 +337,10 @@ static int do_handle_inotify(const struct inotify_event *event) struct dir *d; int pos; + trace_printf_key(GIT_TRACE_WATCHER, inotify: event %08x wd %d %s\n, +event-mask, event-wd, +event-len ? event-name : N/A); + if (event-mask (IN_Q_OVERFLOW | IN_UNMOUNT)) { int i; for (i = 0; i nr_repos; i++) @@ -385,6 +402,81 @@ static int handle_inotify(int fd) return ret; } +struct constant { + const char *name; + int value; +}; + +#define CONSTANT(x) { #x, x } +static const struct constant inotify_masks[] = { + CONSTANT(IN_DELETE_SELF), + CONSTANT(IN_MOVE_SELF), +