Re: [PATCH v4 10/17] trailer: if no input file is passed, read from stdin

2014-02-02 Thread Eric Sunshine
On Thu, Jan 30, 2014 at 1:49 AM, Christian Couder
chrisc...@tuxfamily.org wrote:
 It is simpler and more natural if the git interpret-trailers
 is made a filter as its output already goes to sdtout.

 Signed-off-by: Christian Couder chrisc...@tuxfamily.org
 ---
 diff --git a/trailer.c b/trailer.c
 index 8681aed..73a65e0 100644
 --- a/trailer.c
 +++ b/trailer.c
 @@ -464,8 +464,13 @@ static struct strbuf **read_input_file(const char 
 *infile)
  {
 struct strbuf sb = STRBUF_INIT;

 -   if (strbuf_read_file(sb, infile, 0)  0)
 -   die_errno(_(could not read input file '%s'), infile);
 +   if (infile) {
 +   if (strbuf_read_file(sb, infile, 0)  0)
 +   die_errno(_(could not read input file '%s'), 
 infile);
 +   } else {
 +   if (strbuf_read(sb, fileno(stdin), 0)  0)

strbuf_fread(), perhaps?

 +   die_errno(_(could not read from stdin));
 +   }

 return strbuf_split(sb, '\n');
  }
 @@ -530,10 +535,8 @@ void process_trailers(const char *infile, int 
 trim_empty, int argc, const char *

 git_config(git_trailer_config, NULL);

 -   /* Print the non trailer part of infile */
 -   if (infile) {
 -   process_input_file(infile, infile_tok_first, 
 infile_tok_last);
 -   }
 +   /* Print the non trailer part of infile (or stdin if infile is NULL) 
 */
 +   process_input_file(infile, infile_tok_first, infile_tok_last);

 arg_tok_first = process_command_line_args(argc, argv);

 --
 1.8.5.2.201.gacc5987
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 11/17] trailer: add new_trailer_item() function

2014-02-02 Thread Eric Sunshine
On Thu, Jan 30, 2014 at 1:49 AM, Christian Couder
chrisc...@tuxfamily.org wrote:
 This is a small refactoring to prepare for the next steps.

Since this is all brand new code, wouldn't it make more sense to
structure it in this fashion in the first place when introduced in
patch 4/17? It's not clear why it should be introduced with poorer
structure and then later cleaned up.

 Signed-off-by: Christian Couder chrisc...@tuxfamily.org
 ---
  trailer.c | 31 +++
  1 file changed, 19 insertions(+), 12 deletions(-)

 diff --git a/trailer.c b/trailer.c
 index 73a65e0..430ff39 100644
 --- a/trailer.c
 +++ b/trailer.c
 @@ -399,11 +399,27 @@ static void parse_trailer(struct strbuf *tok, struct 
 strbuf *val, const char *tr
 }
  }

 +static struct trailer_item *new_trailer_item(struct trailer_item *conf_item,
 +const char* tok, const char* val)
 +{
 +   struct trailer_item *new = xcalloc(sizeof(struct trailer_item), 1);
 +   new-value = val;
 +
 +   if (conf_item) {
 +   new-conf = conf_item-conf;
 +   new-token = xstrdup(conf_item-conf-key);
 +   } else {
 +   new-conf = xcalloc(sizeof(struct conf_info), 1);
 +   new-token = tok;
 +   }
 +
 +   return new;
 +}
 +
  static struct trailer_item *create_trailer_item(const char *string)
  {
 struct strbuf tok = STRBUF_INIT;
 struct strbuf val = STRBUF_INIT;
 -   struct trailer_item *new;
 struct trailer_item *item;
 int tok_alnum_len;

 @@ -415,21 +431,12 @@ static struct trailer_item *create_trailer_item(const 
 char *string)
 for (item = first_conf_item; item; item = item-next) {
 if (!strncasecmp(tok.buf, item-conf-key, tok_alnum_len) ||
 !strncasecmp(tok.buf, item-conf-name, tok_alnum_len)) {
 -   new = xcalloc(sizeof(struct trailer_item), 1);
 -   new-conf = item-conf;
 -   new-token = xstrdup(item-conf-key);
 -   new-value = strbuf_detach(val, NULL);
 strbuf_release(tok);
 -   return new;
 +   return new_trailer_item(item, NULL, 
 strbuf_detach(val, NULL));
 }
 }

 -   new = xcalloc(sizeof(struct trailer_item), 1);
 -   new-conf = xcalloc(sizeof(struct conf_info), 1);
 -   new-token = strbuf_detach(tok, NULL);
 -   new-value = strbuf_detach(val, NULL);
 -
 -   return new;
 +   return new_trailer_item(NULL, strbuf_detach(tok, NULL), 
 strbuf_detach(val, NULL));;
  }

  static void add_trailer_item(struct trailer_item **first,
 --
 1.8.5.2.201.gacc5987


--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread David Kastrup

Hi,

in the context of an ongoing discussion on the Emacs developer list of
converting the Bzr repository of Emacs, one question (with different
approaches) is where to put the information regarding preexisting Bazaar
revision numbers and bug tracker ids: those are not present in the
current Git mirror.

Putting them in the commit messages would require a full history
rewrite, and if some are missed in the process, this cannot be fixed
afterwards.

So I mused: refs/heads contains branches, refs/tags contains tags.  The
respective information would likely easily enough be stored in refs/bzr
and refs/bugs and in that manner would not pollute the ordinary tag
and branch spaces, rendering git tag and/or git branch output mostly
unusable.  I tested creating such a directory and entries and indeed
references like bzr/39005 then worked.

However, cloning from the repository did not copy those directories and
references, so without modification, this scheme would not work for
cloned repositories.

Are there some measures one can take/configure in the parent repository
such that (named or all) additional directories inside of $GITDIR/refs
would get cloned along with the rest?

It would definitely open viable options for dealing with mirrors and/or
repository migrations in general.

-- 
David Kastrup

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] userdiff: update Ada patterns

2014-02-02 Thread Adrian Johnson
- Allow extra space in is new and is separate
- Fix bug in word regex for numbers

Signed-off-by: Adrian Johnson ajohn...@redneon.com
---
 t/t4034/ada/expect | 2 +-
 userdiff.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/t/t4034/ada/expect b/t/t4034/ada/expect
index be2376e..a682d28 100644
--- a/t/t4034/ada/expect
+++ b/t/t4034/ada/expect
@@ -4,7 +4,7 @@
 BOLD+++ b/postRESET
 CYAN@@ -1,13 +1,13 @@RESET
 Ada.Text_IO.Put_Line(Hello WorldRED!RESETGREEN?RESET);
-1 1eRED-RESET10 16#FE12#E2 3.141_592 'REDxRESETGREENyRESET'
+1 RED1e-10RESETGREEN1e10RESET 16#FE12#E2 3.141_592 
'REDxRESETGREENyRESET'
 REDaRESETGREENxRESET+REDb aRESETGREENy xRESET-REDbRESET
 REDaRESETGREENyRESET
 GREENxRESET*REDb aRESETGREENy xRESET/REDbRESET
diff --git a/userdiff.c b/userdiff.c
index ea43a03..e8915bf 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -15,13 +15,13 @@ static int drivers_alloc;
  word_regex |[^[:space:]]|[\xc0-\xff][\x80-\xbf]+ }
 static struct userdiff_driver builtin_drivers[] = {
 IPATTERN(ada,
-!^(.*[ \t])?(is new|renames|is separate)([ \t].*)?$\n
+!^(.*[ \t])?(is[ \t]+new|renames|is[ \t]+separate)([ \t].*)?$\n
 !^[ \t]*with[ \t].*$\n
 ^[ \t]*((procedure|function)[ \t]+.*)$\n
 ^[ \t]*((package|protected|task)[ \t]+.*)$,
 /* -- */
 [a-zA-Z][a-zA-Z0-9_]*
-|[0-9][-+0-9#_.eE]
+|[-+0-9#_.eE]+
 |=|\\.\\.|\\*\\*|:=|/=|=|=|||),
 IPATTERN(fortran,
 !^([C*]|[ \t]*!)\n
-- 
1.8.3.2
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread Duy Nguyen
On Sun, Feb 2, 2014 at 5:37 PM, David Kastrup d...@gnu.org wrote:
 in the context of an ongoing discussion on the Emacs developer list of
 converting the Bzr repository of Emacs, one question (with different
 approaches) is where to put the information regarding preexisting Bazaar
 revision numbers and bug tracker ids: those are not present in the
 current Git mirror.

 Putting them in the commit messages would require a full history
 rewrite, and if some are missed in the process, this cannot be fixed
 afterwards.

What do you need them for? Perhaps putting everything in a file, maybe
sorted by SHA-1, would suffice? It should not be too hard to write a
script to map bug tracker id to a commit id. The file is for past
commits only. New commits can contain these info in their messages.
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function

2014-02-02 Thread Martin Erik Werner
On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote:
 On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner
 martinerikwer...@gmail.com wrote:
  +   /* check if work tree is already the prefix */
  +   if (strncmp(path, work_tree, wtlen) == 0) {
  +   if (path[wtlen] == '/')
  +   memmove(path, path + wtlen + 1, len - wtlen);
  +   else
  +   /* work tree is the root, or the whole path */
  +   memmove(path, path + wtlen, len - wtlen + 1);
  +   return 0;
  +   }
 
 No the 4th time is not the charm yet :) if path is /abc/defghi and
 work_tree is /abc/def you don't want to return ghi as the prefix
 here.

Ah indeed, this should catch that:

diff --git a/setup.c b/setup.c
index 2270bd4..5817875 100644
--- a/setup.c
+++ b/setup.c
@@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path)
if (strncmp(path, work_tree, wtlen) == 0) {
if (path[wtlen] == '/')
memmove(path, path + wtlen + 1, len - wtlen);
-   else
+   else if (path[wtlen - 1] == '/' || path[wtlen] == '\0')
/* work tree is the root, or the whole path */
memmove(path, path + wtlen, len - wtlen + 1);
+   else
+   return -1;
return 0;
}
path0 = path;

Is it worth adding a test for this as well?:

diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index f6f378b..05d3366 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -201,6 +201,10 @@ test_expect_success 'prefix_path works with only absolute 
path to work tree' '
test_cmp expected actual
 '
 
+test_expect_success 'prefix_path rejects absolute path to dir with same 
beginning as work tree' '
+   test_must_fail test-path-utils prefix_path prefix $(pwd)a
+'
+
 relative_path /foo/a/b/c/  /foo/a/b/   c/
 relative_path /foo/a/b/c/  /foo/a/bc/
 relative_path /foo/a//b//c////foo/a/b//c/  POSIX

  +   path0 = path;
  +   path += offset_1st_component(path);
  +
  +   /* check each level */
  +   while (*path != '\0') {
  +   path++;
 
 To me it looks like we could write
 
 for (; *path; path++) {
 
 or even
 
 for (path += offset_1st_component(path); *path; path++) {
 
 but it's personal taste..

Yeah, I think aesthetically I don't like cramming too much into the for loop:

for (path += offset_1st_component(path0) + 1; *path; path++) {

neither leaving the init expression unused. So as long as it's just personal
taste I think I'll stick with the current while loop format. But I'll exchange
(*path == '\0') for (*path) though.

--
Martin Erik Werner martinerikwer...@gmail.com
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 17/17] Documentation: add documentation for 'git interpret-trailers'

2014-02-02 Thread Eric Sunshine
Patch 17/17 of v4 failed to arrive in my inbox for some reason, so
I'll just reply to v3 since there's another error I noticed which is
still present in v4, plus a comment specific to v4 (see below).

On Mon, Jan 27, 2014 at 3:33 PM, Christian Couder
chrisc...@tuxfamily.org wrote:
 From: Eric Sunshine sunsh...@sunshineco.com
 On Sun, Jan 26, 2014 at 12:00 PM, Christian Couder
 chrisc...@tuxfamily.org wrote:
 +By default, a 'token=value' or 'token:value' argument will be added
 +only if no trailer with the same (token, value) pair is already in the
 +message. The 'token' and 'value' parts will be trimmed to remove
 +starting and trailing white spaces, and the resulting trimmed 'token'

 Other git documentation uniformly spells it as whitespace rather
 than white spaces.

 You are right I will change that.

The rest of git documentation consistently spells it whitespace, but
v4 uses whitespaces.

 +infile=file::
 +   Read the commit message from `file` instead of the standard
 +   input.

I didn't notice this when reviewing v3, and the same text appears in
v4. There are a couple extra hyphens before 'infile', and you want 
and  around file, so:

s/infile=file/--infile=file/
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function

2014-02-02 Thread David Kastrup
Martin Erik Werner martinerikwer...@gmail.com writes:

 On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote:
 On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner
 martinerikwer...@gmail.com wrote:
  +   /* check if work tree is already the prefix */
  +   if (strncmp(path, work_tree, wtlen) == 0) {
  +   if (path[wtlen] == '/')
  +   memmove(path, path + wtlen + 1, len - wtlen);
  +   else
  +   /* work tree is the root, or the whole path */
  +   memmove(path, path + wtlen, len - wtlen + 1);
  +   return 0;
  +   }
 
 No the 4th time is not the charm yet :) if path is /abc/defghi and
 work_tree is /abc/def you don't want to return ghi as the prefix
 here.

 Ah indeed, this should catch that:

 diff --git a/setup.c b/setup.c
 index 2270bd4..5817875 100644
 --- a/setup.c
 +++ b/setup.c
 @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path)
   if (strncmp(path, work_tree, wtlen) == 0) {
   if (path[wtlen] == '/')
   memmove(path, path + wtlen + 1, len - wtlen);
 - else
 + else if (path[wtlen - 1] == '/' || path[wtlen] == '\0')

Is wtlen guaranteed to be nonzero?

-- 
David Kastrup

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread John Keeping
On Sun, Feb 02, 2014 at 12:19:43PM +0100, David Kastrup wrote:
 Duy Nguyen pclo...@gmail.com writes:
 
  The file is for past commits only.
 
  New commits can contain these info in their messages.
 
 If it's not forgotten.  Experience shows that things like issue numbers
 have a tendency to be omitted, and then they stay missing.
 
 At any rate, this is exactly the kind of stuff that tags are useful for,
 except that using them for all that would render the tag space
 overcrowded.

Actually, I would say this is exactly the sort of thing notes are for.

git.git uses them to map commits back to mailing list discussions:

git fetch git://github.com/gitster/git +refs/notes/amlog:refs/notes/amlog 
git log --notes=amlog

See also notes.displayRef in git-config(1).

Notes aren't fetch by default, but it's not hard for those interested to
add a remote.*.fetch line to their config.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function

2014-02-02 Thread Torsten Bögershausen
On 2014-02-02 12.21, David Kastrup wrote:
 Martin Erik Werner martinerikwer...@gmail.com writes:
 
 On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote:
 On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner
 martinerikwer...@gmail.com wrote:
 +   /* check if work tree is already the prefix */
 +   if (strncmp(path, work_tree, wtlen) == 0) {
 +   if (path[wtlen] == '/')
 +   memmove(path, path + wtlen + 1, len - wtlen);
 +   else
 +   /* work tree is the root, or the whole path */
 +   memmove(path, path + wtlen, len - wtlen + 1);
 +   return 0;
 +   }

 No the 4th time is not the charm yet :) if path is /abc/defghi and
 work_tree is /abc/def you don't want to return ghi as the prefix
 here.

 Ah indeed, this should catch that:

 diff --git a/setup.c b/setup.c
 index 2270bd4..5817875 100644
 --- a/setup.c
 +++ b/setup.c
 @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path)
  if (strncmp(path, work_tree, wtlen) == 0) {
  if (path[wtlen] == '/')
  memmove(path, path + wtlen + 1, len - wtlen);
 -else
 +else if (path[wtlen - 1] == '/' || path[wtlen] == '\0')
 
 Is wtlen guaranteed to be nonzero?
 
Another comment:
The raw comparison with '/' is probably working well on all
POSIX/Linux/Unix systems.

To be more portable, the macro
is_dir_sep()
can be used:

if (is_dir_sep(path[wtlen]))

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread David Kastrup
John Keeping j...@keeping.me.uk writes:

 On Sun, Feb 02, 2014 at 12:19:43PM +0100, David Kastrup wrote:
 Duy Nguyen pclo...@gmail.com writes:
 
  The file is for past commits only.
 
  New commits can contain these info in their messages.
 
 If it's not forgotten.  Experience shows that things like issue numbers
 have a tendency to be omitted, and then they stay missing.
 
 At any rate, this is exactly the kind of stuff that tags are useful for,
 except that using them for all that would render the tag space
 overcrowded.

 Actually, I would say this is exactly the sort of thing notes are for.

 git.git uses them to map commits back to mailing list discussions:

But that's the wrong direction.  What is needed in the Emacs case is
mapping the Bazaar reference numbers (and bug numbers) to commits.

While it is true that the history rewriting approach would not deliver
this either (short of git log --grep with suitable patterns), I was
looking for something less of a crutch here.

 Notes aren't fetch by default, but it's not hard for those interested
 to add a remote.*.fetch line to their config.

If we are talking about measures everybody has to actively take before
getting access to functionality, this does not cross the convenience
threshold making it a solution preferred over others.  But it's probably
feasible to configure a fetch line doing this that will get cloned when
first cloning a repository.  That's not too hot for people with existing
repositories, but since we are talking about a migration from Bazaar
anyway, Git users currently are so by choice and so might be more
willing to update their configuration if it helps with avoiding a fully
new clone.

-- 
David Kastrup
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread Duy Nguyen
On Sun, Feb 2, 2014 at 6:19 PM, David Kastrup d...@gnu.org wrote:
 Since Git has a working facility for references that is catered to do
 exactly this kind of mapping and already _does_, it seems like a
 convenient path to explore.

It will not scale. If you make those refs available for
cloning/fetching, all of them will be advertised first thing when git
starts negotiate. Imagine thousands of refs (and keep increasing) sent
to the receiver at the beginning of every connection. Something like
reverse git-notes may transfer more efficiently. Or we need to
improve git protocol to handle massive refs better, something that's
been discussed for a while without any outcome.
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread David Kastrup
Duy Nguyen pclo...@gmail.com writes:

 On Sun, Feb 2, 2014 at 6:19 PM, David Kastrup d...@gnu.org wrote:
 Since Git has a working facility for references that is catered to do
 exactly this kind of mapping and already _does_, it seems like a
 convenient path to explore.

 It will not scale. If you make those refs available for
 cloning/fetching, all of them will be advertised first thing when git
 starts negotiate. Imagine thousands of refs (and keep increasing) sent
 to the receiver at the beginning of every connection.

In current LilyPond repository:
git tag|wc
969 969   15161

In current Emacs mirror:
git tag|wc
   12021202   15729

In current Git repository:
git tag|wc
498 4984820

 Something like reverse git-notes may transfer more efficiently. Or
 we need to improve git protocol to handle massive refs better,
 something that's been discussed for a while without any outcome.

I think that even disregarding special use of references, _existing_
practice would already appear to warrant being able to deal with
thousands of refs in a reasonable manner.

It's a reasonable expectation to have a tag per (potentially
intermediate) release or release candidate.  For any project publishing
reproducible daily snapshots, the threshold of 1000 will get reached
within few years.

Of course, it is relevant information to know that right _now_
references will not scale.  But that does not seem like a defensible
long-term perspective.

-- 
David Kastrup
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function

2014-02-02 Thread Martin Erik Werner
On Sun, Feb 02, 2014 at 12:37:16PM +0100, Torsten Bögershausen wrote:
 On 2014-02-02 12.21, David Kastrup wrote:
  Martin Erik Werner martinerikwer...@gmail.com writes:
  
  On Sun, Feb 02, 2014 at 09:19:04AM +0700, Duy Nguyen wrote:
  On Sun, Feb 2, 2014 at 8:59 AM, Martin Erik Werner
  martinerikwer...@gmail.com wrote:
  +   /* check if work tree is already the prefix */
  +   if (strncmp(path, work_tree, wtlen) == 0) {
  +   if (path[wtlen] == '/')
  +   memmove(path, path + wtlen + 1, len - wtlen);
  +   else
  +   /* work tree is the root, or the whole path */
  +   memmove(path, path + wtlen, len - wtlen + 1);
  +   return 0;
  +   }
 
  No the 4th time is not the charm yet :) if path is /abc/defghi and
  work_tree is /abc/def you don't want to return ghi as the prefix
  here.
 
  Ah indeed, this should catch that:
 
  diff --git a/setup.c b/setup.c
  index 2270bd4..5817875 100644
  --- a/setup.c
  +++ b/setup.c
  @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path)
 if (strncmp(path, work_tree, wtlen) == 0) {
 if (path[wtlen] == '/')
 memmove(path, path + wtlen + 1, len - wtlen);
  -  else
  +  else if (path[wtlen - 1] == '/' || path[wtlen] == '\0')
  
  Is wtlen guaranteed to be nonzero?

Hmm, am I incorrect in thinking
if (!work_tree)
takes care of that?

 Another comment:
 The raw comparison with '/' is probably working well on all
 POSIX/Linux/Unix systems.
 
 To be more portable, the macro
 is_dir_sep()
 can be used:
 
 if (is_dir_sep(path[wtlen]))

Since the path is already normalized by 'normalize_path_copy_len' which
seems to guarantee '/'-separation, I have assumed that this was
unnecessary?

--
Martin Erik Werner martinerikwer...@gmail.com

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function

2014-02-02 Thread Duy Nguyen
On Sun, Feb 2, 2014 at 6:13 PM, Martin Erik Werner
martinerikwer...@gmail.com wrote:
 diff --git a/setup.c b/setup.c
 index 2270bd4..5817875 100644
 --- a/setup.c
 +++ b/setup.c
 @@ -32,9 +32,11 @@ static inline int abspath_part_inside_repo(char *path)
 if (strncmp(path, work_tree, wtlen) == 0) {
 if (path[wtlen] == '/')
 memmove(path, path + wtlen + 1, len - wtlen);
 -   else
 +   else if (path[wtlen - 1] == '/' || path[wtlen] == '\0')
 /* work tree is the root, or the whole path */
 memmove(path, path + wtlen, len - wtlen + 1);
 +   else
 +   return -1;

No, you can't return here. /abc/defghi might actually be a symlink
to /abc/def. If it does not match, let the following loop handle it.
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread John Keeping
On Sun, Feb 02, 2014 at 12:42:52PM +0100, David Kastrup wrote:
 John Keeping j...@keeping.me.uk writes:
 
  On Sun, Feb 02, 2014 at 12:19:43PM +0100, David Kastrup wrote:
  Duy Nguyen pclo...@gmail.com writes:
  
   The file is for past commits only.
  
   New commits can contain these info in their messages.
  
  If it's not forgotten.  Experience shows that things like issue numbers
  have a tendency to be omitted, and then they stay missing.
  
  At any rate, this is exactly the kind of stuff that tags are useful for,
  except that using them for all that would render the tag space
  overcrowded.
 
  Actually, I would say this is exactly the sort of thing notes are for.
 
  git.git uses them to map commits back to mailing list discussions:
 
 But that's the wrong direction.  What is needed in the Emacs case is
 mapping the Bazaar reference numbers (and bug numbers) to commits.

Ah, OK.  I hadn't quite read carefully enough.

I actually wonder if you could do this with notes and git-grep; for
example:

git grep -l keeping.me.uk refs/notes/amlog |
sed -e 's/.*://' -e 's!/!!g'

That should be relatively efficient since you're only looking at the
current notes tree.

 While it is true that the history rewriting approach would not deliver
 this either (short of git log --grep with suitable patterns), I was
 looking for something less of a crutch here.
 
  Notes aren't fetch by default, but it's not hard for those interested
  to add a remote.*.fetch line to their config.
 
 If we are talking about measures everybody has to actively take before
 getting access to functionality, this does not cross the convenience
 threshold making it a solution preferred over others.  But it's probably
 feasible to configure a fetch line doing this that will get cloned when
 first cloning a repository.

I'm assuming you'll need some form of tool (at least a script) to
manipulate this feature; it wouldn't be too hard for that to set this up
the first time it's run.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] setup: Add 'abspath_part_inside_repo' function

2014-02-02 Thread Torsten Bögershausen
 
 Another comment:
 The raw comparison with '/' is probably working well on all
 POSIX/Linux/Unix systems.

 To be more portable, the macro
 is_dir_sep()
 can be used:

 if (is_dir_sep(path[wtlen]))
 
 Since the path is already normalized by 'normalize_path_copy_len' which
 seems to guarantee '/'-separation, I have assumed that this was
 unnecessary?

So true, sorry for the noise.

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] fast-import.c: always honor the filename case

2014-02-02 Thread Reuben Hawkins
fast-import should not use strncmp_icase.  When it does, files with
similar names, but different case can be lost in the import.  For
example...

M 100644 :1 FileName.txt
D Filename.txt

...would end up deleting FileName from the index during the fast-
import when strncmp_icase is used and core.ignorecase=true.  The
intent in the above snippet is to rename the file, not delete it.

Replacing strncmp_icase with strncmp in fast-import.c fixes the
issue.

alternatives:
* check if the filesystem is case-preserving.  If it is, don't
  set core.ignorecase=true.  This, however, exposes another issue
  where git is tricked by stat into thinking that tracked files
  are untracked on case-preserving and case-insensitive filesystems.
---
 fast-import.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index f4d9969..62e28c0 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1500,7 +1500,7 @@ static int tree_content_set(
t = root-tree;
for (i = 0; i  t-entry_count; i++) {
e = t-entries[i];
-   if (e-name-str_len == n  !strncmp_icase(p, 
e-name-str_dat, n)) {
+   if (e-name-str_len == n  !strncmp(p, e-name-str_dat, n)) {
if (!slash1) {
if (!S_ISDIR(mode)
 e-versions[1].mode == mode
@@ -1593,7 +1593,7 @@ static int tree_content_remove(
t = root-tree;
for (i = 0; i  t-entry_count; i++) {
e = t-entries[i];
-   if (e-name-str_len == n  !strncmp_icase(p, 
e-name-str_dat, n)) {
+   if (e-name-str_len == n  !strncmp(p, e-name-str_dat, n)) {
if (slash1  !S_ISDIR(e-versions[1].mode))
/*
 * If p names a file in some subdirectory, and a
@@ -1663,7 +1663,7 @@ static int tree_content_get(
t = root-tree;
for (i = 0; i  t-entry_count; i++) {
e = t-entries[i];
-   if (e-name-str_len == n  !strncmp_icase(p, 
e-name-str_dat, n)) {
+   if (e-name-str_len == n  !strncmp(p, e-name-str_dat, n)) {
if (!slash1)
goto found_entry;
if (!S_ISDIR(e-versions[1].mode))
-- 
1.8.5.3.1.gac93028.dirty

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [msysGit] Re: [PATCH v2] repack.c: Use move_temp_to_file() instead of rename()

2014-02-02 Thread Torsten Bögershausen
(It seems as if the mail went only to Junio, sorry)
On 2014-02-02 16.09, Torsten Bögershausen wrote:
 On 2014-01-29 19.17, Junio C Hamano wrote:
 But after a closer inspection, I no longer think that hunk is an
 improvement.  These new packfiles were created by pack-objects,
 which finishes each packfile it produces by calling
 finish_tmp_packfile(), and that is where adjust_shared_perm()
 happens already.  As far as pack-objects that was called from
 repack is concerned, these new packfiles are not temporary; they
 are finished product.  It may be OK to remove them as part of
 rewind back to the original state, as a later phase of repack
 failed if we saw a failure (but note that the original
 git-repack.sh didn't), but a plain vanilla rename(2) without any
 frills is what we want to happen to them.
 Thanks for deeper inspection, I now suspect the root cause to be here:

 -- 8 --
 Subject: [PATCH v3] repack.c: Rename and unlink pack file if it exists

 This comment in builtin/repack.c:
   * First see if there are packs of the same name and if so
   * if we can move them out of the way (this can happen if we
   * repacked immediately after packing fully.
 indicates that when a repo was fully repacked, and is repacked again,
 we may run into the situation that new packfiles have the name
 (and content) as already existing ones.

 The logic is to rename the existing ones into filename like old-XXX,
 create the new ones and remove the old- ones.
 When something went wrong, a manual roll-back could be done be renaming
 the old- files.

 The renaming into old- did not work as designed, because file_exists()
 was done on the wrong file name.
 This could give problems for a repo on a network share under Windows,
 as reported by Jochen Haag zwanzi...@googlemail.com

 Solution:
 Create the right file name, like this:
   fname = mkpathdup(%s/pack-%s%s, packdir,
 and when removing the old-files:
   fname_old = mkpath(%s/old-%s%s,

 Rename the variables to match what they are used for:
 fname, fname_old and fname_tmp

 Signed-off-by: Torsten Bögershausen tbo...@web.de
 ---
  builtin/repack.c | 24 
  1 file changed, 12 insertions(+), 12 deletions(-)

 diff --git a/builtin/repack.c b/builtin/repack.c
 index 6284846..de69401 100644
 --- a/builtin/repack.c
 +++ b/builtin/repack.c
 @@ -258,7 +258,7 @@ int cmd_repack(int argc, const char **argv, const char 
 *prefix)
   for_each_string_list_item(item, names) {
   for (ext = 0; ext  2; ext++) {
   char *fname, *fname_old;
 - fname = mkpathdup(%s/%s%s, packdir,
 + fname = mkpathdup(%s/pack-%s%s, packdir,
   item-string, exts[ext]);
   if (!file_exists(fname)) {
   free(fname);
 @@ -314,33 +314,33 @@ int cmd_repack(int argc, const char **argv, const char 
 *prefix)
   /* Now the ones with the same name are out of the way... */
   for_each_string_list_item(item, names) {
   for (ext = 0; ext  2; ext++) {
 - char *fname, *fname_old;
 + char *fname, *fname_tmp;
   struct stat statbuffer;
   fname = mkpathdup(%s/pack-%s%s,
   packdir, item-string, exts[ext]);
 - fname_old = mkpathdup(%s-%s%s,
 + fname_tmp = mkpathdup(%s-%s%s,
   packtmp, item-string, exts[ext]);
 - if (!stat(fname_old, statbuffer)) {
 + if (!stat(fname_tmp, statbuffer)) {
   statbuffer.st_mode = ~(S_IWUSR | S_IWGRP | 
 S_IWOTH);
 - chmod(fname_old, statbuffer.st_mode);
 + chmod(fname_tmp, statbuffer.st_mode);
   }
 - if (rename(fname_old, fname))
 - die_errno(_(renaming '%s' failed), fname_old);
 + if (rename(fname_tmp, fname))
 + die_errno(_(renaming '%s' into '%s' failed), 
 fname_tmp, fname);
   free(fname);
 - free(fname_old);
 + free(fname_tmp);
   }
   }
  
   /* Remove the old- files */
   for_each_string_list_item(item, names) {
   for (ext = 0; ext  2; ext++) {
 - char *fname;
 - fname = mkpath(%s/old-pack-%s%s,
 + char *fname_old;
 + fname_old = mkpath(%s/old-%s%s,
   packdir,
   item-string,
   exts[ext]);
 - if (remove_path(fname))
 - warning(_(removing '%s' failed), fname);
 + if (remove_path(fname_old))
 +  

[PATCH v5 0/5] Handling of in-tree symlinks for absolute paths

2014-02-02 Thread Martin Erik Werner
Again!

(It seems you missed to CC me in your first reply David, please do :)

New reroll, fixing the /dir/repoa and /dir/repolink - /dir/repo issues noted by
Duy, and adding corresponding tests. If work tree matches beginning of path but
needs further checking, it starts from the end of the work tree length, so as
minimize the 'real_path' calls.

Martin Erik Werner (5):
  t0060: Add test for manipulating symlinks via absolute paths
  t0060: Add test for prefix_path when path == work tree
  t0060: Add tests for prefix_path when path begins with work tree
  setup: Add 'abspath_part_inside_repo' function
  setup: Don't dereference in-tree symlinks for absolute paths

 setup.c   | 100 --
 t/t0060-path-utils.sh |  21 +++
 2 files changed, 101 insertions(+), 20 deletions(-)

-- 
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 4/5] setup: Add 'abspath_part_inside_repo' function

2014-02-02 Thread Martin Erik Werner
In order to extract the part of an absolute path which lies inside the
repo, it is not possible to directly use real_path, since that would
dereference symlinks both outside and inside the work tree.

Add an 'abspath_part_inside_repo' function which first checks if the
work tree is already the prefix, then incrementally checks each path
level by temporarily NUL-terminating at each '/' and comparing against
the work tree path. If a match is found, it overwrites the input path
with the remainder past the work tree (which will be the in-repo part).

The path being exactly equal to the work tree is handled separately,
since then there is no directory separator between the work tree and
in-repo part.

This function is currently only intended for use in
'prefix_path_gently'.

Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com
---
 setup.c | 64 
 1 file changed, 64 insertions(+)

diff --git a/setup.c b/setup.c
index 5432a31..a2e60ab 100644
--- a/setup.c
+++ b/setup.c
@@ -6,6 +6,70 @@ static int inside_git_dir = -1;
 static int inside_work_tree = -1;
 
 /*
+ * No checking if the path is in fact an absolute path is done, and it must
+ * already be normalized.
+ *
+ * Find the part of an absolute path that lies inside the work tree by
+ * dereferencing symlinks outside the work tree, for example:
+ * /dir1/repo/dir2/file   (work tree is /dir1/repo)  - dir2/file
+ * /dir/file  (work tree is /)   - dir/file
+ * /dir/symlink1/symlink2 (symlink1 points to work tree) - symlink2
+ * /dir/repolink/file (repolink points to /dir/repo) - file
+ * /dir/repo  (exactly equal to work tree)   - (empty string)
+ */
+static inline int abspath_part_inside_repo(char *path)
+{
+   size_t len;
+   size_t wtlen;
+   char *path0;
+   int off;
+
+   const char* work_tree = get_git_work_tree();
+   if (!work_tree)
+   return -1;
+   wtlen = strlen(work_tree);
+   len = strlen(path);
+   off = 0;
+
+   /* check if work tree is already the prefix */
+   if (strncmp(path, work_tree, wtlen) == 0) {
+   if (path[wtlen] == '/') {
+   memmove(path, path + wtlen + 1, len - wtlen);
+   return 0;
+   } else if (path[wtlen - 1] == '/' || path[wtlen] == '\0') {
+   /* work tree is the root, or the whole path */
+   memmove(path, path + wtlen, len - wtlen + 1);
+   return 0;
+   }
+   /* work tree might match beginning of a symlink to work tree */
+   off = wtlen;
+   }
+   path0 = path;
+   path += offset_1st_component(path) + off;
+
+   /* check each level */
+   while (*path) {
+   path++;
+   if (*path == '/') {
+   *path = '\0';
+   if (strcmp(real_path(path0), work_tree) == 0) {
+   memmove(path0, path + 1, len - (path - path0));
+   return 0;
+   }
+   *path = '/';
+   }
+   }
+
+   /* check whole path */
+   if (strcmp(real_path(path0), work_tree) == 0) {
+   *path0 = '\0';
+   return 0;
+   }
+
+   return -1;
+}
+
+/*
  * Normalize path, prepending the prefix for relative paths. If
  * remaining_prefix is not NULL, return the actual prefix still
  * remains in the path. For example, prefix = sub1/sub2/ and path is
-- 
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/5] t0060: Add test for prefix_path when path == work tree

2014-02-02 Thread Martin Erik Werner
The current behaviour of prefix_path is to return an empty string if
prefixing and absolute path that only contains exactly the work tree.
This behaviour is a potential regression point.

Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com
---
 t/t0060-path-utils.sh | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index 0bba988..b8e92e1 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -195,6 +195,12 @@ test_expect_failure SYMLINKS 'prefix_path works with 
absolute paths to work tree
test $(test-path-utils prefix_path prefix $(pwd)/symlink) = 
symlink
 '
 
+test_expect_success 'prefix_path works with only absolute path to work tree' '
+   echo  expected 
+   test-path-utils prefix_path prefix $(pwd) actual 
+   test_cmp expected actual
+'
+
 relative_path /foo/a/b/c/  /foo/a/b/   c/
 relative_path /foo/a/b/c/  /foo/a/bc/
 relative_path /foo/a//b//c////foo/a/b//c/  POSIX
-- 
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/5] t0060: Add test for manipulating symlinks via absolute paths

2014-02-02 Thread Martin Erik Werner
When symlinks in the working tree are manipulated using the absolute
path, git dereferences them, and tries to manipulate the link target
instead.

This is a regression introduced by 18e051a:
  setup: translate symlinks in filename when using absolute paths
(which did not take symlinks in the work tree into consideration).

Add a known-breakage tests using the prefix_path function, which
currently uses real_path, causing the dereference.

Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com
---
 t/t0060-path-utils.sh | 5 +
 1 file changed, 5 insertions(+)

diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index 07c10c8..0bba988 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -190,6 +190,11 @@ test_expect_success SYMLINKS 'real path works on symlinks' 
'
test $sym = $(test-path-utils real_path $dir2/syml)
 '
 
+test_expect_failure SYMLINKS 'prefix_path works with absolute paths to work 
tree symlinks' '
+   ln -s target symlink 
+   test $(test-path-utils prefix_path prefix $(pwd)/symlink) = 
symlink
+'
+
 relative_path /foo/a/b/c/  /foo/a/b/   c/
 relative_path /foo/a/b/c/  /foo/a/bc/
 relative_path /foo/a//b//c////foo/a/b//c/  POSIX
-- 
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 5/5] setup: Don't dereference in-tree symlinks for absolute paths

2014-02-02 Thread Martin Erik Werner
The 'prefix_path_gently' function currently applies real_path to
everything if given an absolute path, dereferencing symlinks both
outside and inside the work tree. In order to manipulate symlinks in the
work tree using absolute paths, symlinks should only be dereferenced
outside the work tree.

Modify the 'prefix_path_gently' to first normalize the path in order to
make sure path levels are separated by '/', then pass the result to
'abspath_part_inside_repo' to find the in-repo part (without dereferencing
any symlinks inside the work tree).

For absolute paths, 'prefix_path_gently' did not, nor does now do, any
actual prefixing, hence the result from 'abspath_part_in_repo' is
returned as-is.

Fixes t0060-82.

Signed-off-by: Martin Erik Werner martinerikwer...@gmail.com
---
 setup.c   | 36 
 t/t0060-path-utils.sh |  2 +-
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/setup.c b/setup.c
index a2e60ab..230505c 100644
--- a/setup.c
+++ b/setup.c
@@ -86,11 +86,23 @@ char *prefix_path_gently(const char *prefix, int len,
const char *orig = path;
char *sanitized;
if (is_absolute_path(orig)) {
-   const char *temp = real_path(path);
-   sanitized = xmalloc(len + strlen(temp) + 1);
-   strcpy(sanitized, temp);
+   char *npath;
+
+   npath = xmalloc(strlen(path) + 1);
if (remaining_prefix)
*remaining_prefix = 0;
+   if (normalize_path_copy_len(npath, path, remaining_prefix)) {
+   free(npath);
+   return NULL;
+   }
+   if (abspath_part_inside_repo(npath)) {
+   free(npath);
+   return NULL;
+   }
+
+   sanitized = xmalloc(strlen(npath) + 1);
+   strcpy(sanitized, npath);
+   free(npath);
} else {
sanitized = xmalloc(len + strlen(path) + 1);
if (len)
@@ -98,26 +110,10 @@ char *prefix_path_gently(const char *prefix, int len,
strcpy(sanitized + len, path);
if (remaining_prefix)
*remaining_prefix = len;
-   }
-   if (normalize_path_copy_len(sanitized, sanitized, remaining_prefix))
-   goto error_out;
-   if (is_absolute_path(orig)) {
-   size_t root_len, len, total;
-   const char *work_tree = get_git_work_tree();
-   if (!work_tree)
-   goto error_out;
-   len = strlen(work_tree);
-   root_len = offset_1st_component(work_tree);
-   total = strlen(sanitized) + 1;
-   if (strncmp(sanitized, work_tree, len) ||
-   (len  root_len  sanitized[len] != '\0'  sanitized[len] 
!= '/')) {
-   error_out:
+   if (normalize_path_copy_len(sanitized, sanitized, 
remaining_prefix)) {
free(sanitized);
return NULL;
}
-   if (sanitized[len] == '/')
-   len++;
-   memmove(sanitized, sanitized + len, total - len);
}
return sanitized;
 }
diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index c0a14f6..f04b82d 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -190,7 +190,7 @@ test_expect_success SYMLINKS 'real path works on symlinks' '
test $sym = $(test-path-utils real_path $dir2/syml)
 '
 
-test_expect_failure SYMLINKS 'prefix_path works with absolute paths to work 
tree symlinks' '
+test_expect_success SYMLINKS 'prefix_path works with absolute paths to work 
tree symlinks' '
ln -s target symlink 
test $(test-path-utils prefix_path prefix $(pwd)/symlink) = 
symlink
 '
-- 
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/5] t0060: Add tests for prefix_path when path begins with work tree

2014-02-02 Thread Martin Erik Werner
One edge-case that isn't currently checked in the tests is the beginning
of the path matching the work tree, despite the target not actually
being the work tree, for example:

  path = /dir/repoa
  work_tree = /dir/repo

should fail since the path is outside the repo. However, if /dir/repoa
is in fact a symlink that points to /dir/repo, it should instead
succeed.

Add two tests covering these cases, since they might be potential
regression points.
---
 t/t0060-path-utils.sh | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index b8e92e1..c0a14f6 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -201,6 +201,16 @@ test_expect_success 'prefix_path works with only absolute 
path to work tree' '
test_cmp expected actual
 '
 
+test_expect_success 'prefix_path rejects absolute path to dir with same 
beginning as work tree' '
+   test_must_fail test-path-utils prefix_path prefix $(pwd)a
+'
+
+test_expect_success 'prefix_path works with absolute path to a symlink to work 
tree having  same beginning as work tree' '
+   git init repo 
+   ln -s repo repolink 
+   test a = $(cd repo  test-path-utils prefix_path prefix 
$(pwd)/../repolink/a)
+'
+
 relative_path /foo/a/b/c/  /foo/a/b/   c/
 relative_path /foo/a/b/c/  /foo/a/bc/
 relative_path /foo/a//b//c////foo/a/b//c/  POSIX
-- 
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] init-db.c: honor case on case preserving fs

2014-02-02 Thread Junio C Hamano
Torsten Bögershausen tbo...@web.de writes:

 On 2014-02-01 10.14, Reuben Hawkins wrote:
 Most case-insensitive filesystems are case-preserving. In these
 filesystems (such as HFS+ on OS X) you can name a file Filename.txt,
 then rename the file to FileName.txt.  That file will be accessible
 by both filenames, but the case is otherwise honored.  We don't want
 to have git ignore case on these case-preserving filesystem
 implementations.

 Yes, we want.
 Because the file system will treat Filename.txt and FileName.txt
 the same.

Another important thing to remember is that we cannot have these two
files at the same time on such a filesystem.

Somebody may have Filename.txt in the commit at the tip of the
history, you clone/fetch and check it out, and you will have
Filename.txt with the original contents.  We do not try to corrupt
the filename on core.ignorecase filesystem by any canonicalization.

But then you may edit that file, and you either deliberately or
without knowing (because some of your tools do this behind your
back) may end up saving the result as FileName.txt.  What happens?

When we ask what is the contents of Filename.txt now? (using the
original name still in the index) to the underlying system, we will
be given what you placed in FileName.txt.  We won't see You do not
have Filename.txt, but you now have FileName.txt.

And that is the behaviour the end users (of not Git, but of a
platform with such a filesystem) do expect from their tools.  They
do not want to see You no longer have Filename.txt, and you have a
new file FileName.txt.

Now think what git add Filename.txt should do at that point?  It
should not say I was told to add Filename.txt, but there is no such
file, so I'll add nothing.  If you run git add -u Filename.txt,
it should not say I was told to add Filename.txt, but there is no
such file, so I'll remove existing Filename.txt from the index.

It must pick up the updated contents from your new FileName.txt,
update the index entry Filename.txt, and the next git commit
must record it as an update to the same file.

If you are on the other hand trying to correct an earlier mistake of
having named the file Filename.txt but you now want to rename it
FileName.txt, the above behaviour by core.ignorecase may make it a
bit cumbersome to do.  You can first remove it from the index and
then re-add it, I would think, as a workaround.  Having to do a
workaround is unfortunate but it is an unavoidable consequence of
having to choose between the two and having to pick one.  Most of
the time you do not want such a rename (or rather, the loss of the
file Filename.txt and the creation of the unrelated FileName.txt)
and a change from Filename.txt to FileName.txt is most likely to be
a mistake in the platform tool that mucked with the files on your
filesystem, so we choose to make it easy for the user not to be
disturbed by such a change.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: splitting a commit that adds new files

2014-02-02 Thread Junio C Hamano
Duy Nguyen pclo...@gmail.com writes:

 I usually start splitting a commit with reset @^ then add -p back.
 The problem is reset @^ does not keep track of new files added in
 HEAD, so I often end up forgetting to add new files back (with add
 -p). I'm thinking of making reset to do add -N automatically for
 me so I won't miss changes in add -p. But maybe people already know
 how to deal with this case without adding more code?

Is reset -p what you are looking for?  I do not use that myself,
though.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] fast-import.c: always honor the filename case

2014-02-02 Thread Torsten Bögershausen
On 2014-02-02 14.13, Reuben Hawkins wrote:
 fast-import should not use strncmp_icase.  When it does, files with
 similar names, but different case can be lost in the import.  For
 example...
 
 M 100644 :1 FileName.txt
 D Filename.txt
That seems to be wrong, shouldn't it be
D Filename.txt
M 100644 :1 FileName.txt

How do you generate the export/import stream ?

Please see here:
https://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
Handling Renames

When importing a renamed file or directory, simply delete the old name(s) and 
modify the new name(s) during the corresponding commit. 
Git performs rename detection after-the-fact, rather than explicitly during a 
commit.





--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Running make rpm fails on a CentOS 6.3 machine

2014-02-02 Thread Erez Zilber
Thanks. I will try to use the rpm from Todd's build. BTW - if I want
to create such a build on Fedora that will create el6 packages (e.g.
git-1.8.5.3-2.el6.x86_64.rpm), what's the procedure?

Thanks,
Erez

On Thu, Jan 30, 2014 at 8:51 PM, Todd Zullinger t...@pobox.com wrote:
 Hello,


 Jonathan Nieder wrote:

 Erez Zilber wrote:

 Writing perl.mak for Git Writing perl.mak for Git rename MakeMaker.tmp =
 perl.mak: No such file or directory at
 /usr/share/perl5/ExtUtils/MakeMaker.pm line 1024. make[3]: perl.mak: No such
 file or directory make[3]: perl.mak: No such file or directory make[3]: ***
 No rule to make target `perl.mak'.  Stop.


 Looks like MakeMaker is racing against itself.  Alas, (on a fairly current
 Debian system, with perl 5.14.2) I'm not able to reproduce that.

 Instead, I get this:

 | $ make -j8 rpm [...]
 | make[2]: Leaving directory
 `$HOME/rpmbuild/BUILD/git-1.8.5.3/Documentation' | make[1]: Leaving
 directory `$HOME/rpmbuild/BUILD/git-1.8.5.3' | + exit 0 |
 Executing(%install): /bin/sh -e /var/tmp/rpm-tmp.WqNYnx | + umask 022 | + cd
 $HOME/rpmbuild/BUILD | + cd git-1.8.5.3 | + rm -rf
 $HOME/rpmbuild/BUILDROOT/git-1.8.5.3-1.x86_64 | + make -j12 'CFLAGS=-O2 -g'
 \DESTDIR=$HOME/rpmbuild/BUILDROOT/git-1.8.5.3-1.x86_64 \
 ETC_GITCONFIG=/etc/gitconfig prefix=/usr \  mandir=/usr/share/man
 htmldir=/usr/share/doc/git-1.8.5.3 \  INSTALLDIRS=vendor install
 install-doc | make[1]: Entering directory `$HOME/rpmbuild/BUILD/git-1.8.5.3'
 | make[1]: warning: -jN forced in submake: disabling jobserver mode. |
 make[1]: *** write jobserver: Bad file descriptor.  Stop. | make[1]: ***
 Waiting for unfinished jobs | make[1]: *** write jobserver: Bad file
 descriptor.  Stop. | error: Bad exit status from /var/tmp/rpm-tmp.WqNYnx
 (%install) | | | RPM build errors:
 | Bad exit status from /var/tmp/rpm-tmp.WqNYnx (%install) | make: ***
 [rpm] Error 1

 Known problem?  A build without -j8 gets further.


 It seems like it's not a problem with el6 or git's Makefiles themselves.  I
 haven't used the spec file from git.git in ages, but I have tried to ensure
 that the one we use in Fedora builds cleanly on el{5,6}.  I use this myself
 to keep an update git on el6, where Red Hat has left git at 1.7.1 since the
 release of el6 however many years ago. :(

 Here's a build I ran just now using the latest Fedora SRPM, showing it
 succeeds with make -j5:

 http://kojipkgs.fedoraproject.org//work/tasks/3049/6473049/build.log

 The build task, with all of the resulting rpms and logs is here:

 http://koji.fedoraproject.org/koji/taskinfo?taskID=6473049

 (That will remain for a few days or so, at least.  Scratch builds like this
 aren't kept indefinitely.)

 This makes me think that there's something in the git.spec in git.git that
 differs from what we use in Fedora/EPEL.  I don't have time to dig into that
 now, but perhaps someone with time and inclination can diff the spec files
 and find the cause.

 I know the Fedora/EPEL spec file and what's in git.git have grown apart a
 good bit, unfortunately.  That's the cost of having a spec file that is
 meant to work across a very wide array of RPM-based systems, I guess.  The
 Fedora/EPEL spec file is fairly specific to the Fedora/EPEL build tools
 (mock is the primary build tool).

 Hope this helps a little in narrowing down the issue.  I'm sorry I can't be
 of more assistance at the moment.

 --
 ToddOpenPGP - KeyID: 0xBEAF0CE3 | URL: www.pobox.com/~tmz/pgp
 ~~
 How am I supposed to hallucinate with all these swirling colors
 distracting me?
 -- Lisa Simpson

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Running make rpm fails on a CentOS 6.3 machine

2014-02-02 Thread Todd Zullinger

Hi Erez,

Erez Zilber wrote:
Thanks. I will try to use the rpm from Todd's build. BTW - if I want 
to create such a build on Fedora that will create el6 packages (e.g. 
git-1.8.5.3-2.el6.x86_64.rpm), what's the procedure?


Something like this (this is from memory):

# Install fedpkg
$ yum install fedpkg

# Checkout the fedora git package (anonymously)
$ fedpkg clone -a git  cd git

Verify the git tarball.  This is optional, but I would be remiss to 
suggest you skip it.  We used to include the tarball .asc file in the 
fedora git repo, but that is no longer available for download.  
Instead, it must be copied and pasted from Google Code (I'd love to 
see the .asc files return, FWIW).


# To verify the tarball, have fedpkg download the sources first -- if 
# you don't want to verify the tarball, you can skip this command and 
# the gpg/sha1sum commands that follow

$ fedpkg sources

# Copy and paste the gpg signature from 
# http://code.google.com/p/git-core/downloads/detail?name=git-1.8.5.3.tar.gzcan=2q= 
# to a local file and use gpg to verify it, then sha1sum to check the 
# tarballs

$ gpg --verify sha1.asc
...
$ sha1sum -c sha1.asc

# Create an el6 srpm
$ fedpkg --dist el6 srpm

# Build that package in mock, adding yourself or your build user to 
# the mock group first

$ sudo useradd -a -G mock $USER
$ mock -r epel-6-x86_64 git-1.8.5.3-2.el6.src.rpm

The first time you build for a given release in mock will take longer 
than subsequent builds, because mock needs to download a lot of 
packages for the build root.  These packages will be cached for future 
runs builds.


In the end, you should end up with a set of packages and build logs 
under /var/lib/mock/epel-6-x86_64/result/.


Hope this helps,

--
ToddOpenPGP - KeyID: 0xBEAF0CE3 | URL: www.pobox.com/~tmz/pgp
~~
Anyone who is capable of getting themselves made President should on
no account be allowed to do the job.
-- Douglas Adams, The Hitchhiker's Guide to the Galaxy



pgpjSoBWcZtsN.pgp
Description: PGP signature


Determining update/merge/current state of a workspace

2014-02-02 Thread Stephen Leake
I'm working on the DVC Emacs front-end for git
(http://www.emacswiki.org/emacs/DistributedVersionControl), adding
features similar to the ones I added for monotone
(http://www.monotone.ca). I'm used to monotone and new to git, so this
may seem like an odd workflow.

I always do 'fetch' and 'merge' separately, never 'pull'. So after a
'fetch', the DVC Emacs front end must determine what needs to happen
next. I think there are three cases:

1) 'fetch' did not retrieve any revisions from remote; the last local
   commit is the head of the branch.

The workspace is up to date (it may need to be comitted).

2) 'fetch' retrieved revisions, and there were no local commits since
   the previous fetch.

The last fetch is the head of the branch; if not equal to HEAD, the
workspace needs to be updated (via 'merge').

3) fetch retrieved revisions, and there were local commits since
   the previous fetch.

   There are two heads for the branch (the two described above), they
   need to be merged, then the workspace updated.

I'm not sure how 'git fetch' handles case 3); I have not tested that
case yet.

The question I have is:

What git queries can I run to determine which of the three states the
current workspace is in?

'rev-parse HEAD' gives the last workspace commit.

'rev-parse refs/remotes/remote/branch' gives the head of the branch
in the remote repository as of the most recent fetch.

But to distinguish among the cases, I need to determine if one of these
two revs is a child of the other or not. I don't see a git query to
determine that directly.

I could try parsing a 'log' output; I have not investigated that.

This is easy in monotone; there is a command 'mtn heads' that gives this
result directly (it returns either one or two revs), and another command
'mtn automate toposort' that orders revs topologically (by parent/child
relationships).

-- 
-- Stephe
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Determining update/merge/current state of a workspace

2014-02-02 Thread Jeff King
On Sun, Feb 02, 2014 at 04:15:09PM -0600, Stephen Leake wrote:

 I always do 'fetch' and 'merge' separately, never 'pull'. So after a
 'fetch', the DVC Emacs front end must determine what needs to happen
 next. I think there are three cases:

Doing the two steps separately is common in git, too. The cases you
mention are also something people commonly care about. Both git
checkout and git status will print out the relationship between the
current branch and its upstream. These are sometimes referred to as
ahead/behind messages in the manual (because they are of the form You
are N commits ahead of origin/master, etc).

 3) fetch retrieved revisions, and there were local commits since
the previous fetch.
 
There are two heads for the branch (the two described above), they
need to be merged, then the workspace updated.
 
 I'm not sure how 'git fetch' handles case 3); I have not tested that
 case yet.

Git's fetch does not have to care about this case. It is responsible
only for updating the ref that keeps track of the remote side (e.g.,
refs/remotes/origin/master). Unlike in some other DVCSs, there is no
global concept of a branch in git. The ref refs/heads/master refers
to your local branch named master, and the ref refs/remotes/origin/master
refers to some remote's branch with the same name. You can reconcile
them whenever and however you like, and do not have to do so immediately
(or at all).

 The question I have is:
 
 What git queries can I run to determine which of the three states the
 current workspace is in?

If you want to know the relationship between two (or more) commits, you
can use `git rev-list` to enumerate them. You can use the symmetric
difference operator (...) to walk both sides down to their merge-base.
The `--left-right` option will label them according to which side each
commit comes from. So try:

  git rev-list --left-right @{upstream}...HEAD

to see the commits, or just:

  git rev-list --left-right --count @{upstream}...HEAD

to just get the counts on each side. Note that I used @{upstream}
there instead of naming the branch specifically. The default remote
branch with which a local branch will merge can be configured, and does
not have to have the same name (or even be a remote branch).

 But to distinguish among the cases, I need to determine if one of these
 two revs is a child of the other or not. I don't see a git query to
 determine that directly.

I think what I gave above matches what you are looking for most
directly. But as you may have already guessed, you can also use rev-list
to find whether one rev is a child of the other (e.g., git rev-list
--count a..b != 0).

 I could try parsing a 'log' output; I have not investigated that.

Don't do that. As you might expect, `git log` is built on top of the
traversals done by `rev-list`. The latter is preferred as a building
block, because it is plumbing whose output is guaranteed not to change
in later git versions.

I hope that helps,

-Peff
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] fast-import.c: always honor the filename case

2014-02-02 Thread Jeff King
[+cc Joshua Jensen, who wrote 50906e0]

On Sun, Feb 02, 2014 at 07:13:04AM -0600, Reuben Hawkins wrote:

 fast-import should not use strncmp_icase.

I am not sure of that. My gut feeling is that core.ignorecase is
completely about the _filesystem_, and that git should generally be
case-sensitive internally. But I do not know that everyone agrees. Your
commit is basically a revert of 50906e0 (Support case folding in git
fast-import when core.ignorecase=true, 2010-10-03). And here's some
additional discussion specifically regarding fast-import:

  http://thread.gmane.org/gmane.comp.version-control.git/200597

So I think there is a discussion to be had[1].

 When it does, files with
 similar names, but different case can be lost in the import.  For
 example...
 
 M 100644 :1 FileName.txt
 D Filename.txt
 
 ...would end up deleting FileName from the index during the fast-
 import when strncmp_icase is used and core.ignorecase=true.  The
 intent in the above snippet is to rename the file, not delete it.

There may be a separate bug where fast-import needs to be smarter about
ordering operations in such a case (or perhaps fast-export generators
need to be more careful about the order of their output). But it might
be fixable without disabling case-insensitivity entirely.

-Peff

[1] I am mostly trying to connect people on various sides of the
discussion here. So take my gut feeling above with a grain of
salt, as it does not come from experience nor thinking too hard
about the issue.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Determining update/merge/current state of a workspace

2014-02-02 Thread David Aguilar
On Sun, Feb 02, 2014 at 04:15:09PM -0600, Stephen Leake wrote:
 I'm working on the DVC Emacs front-end for git
 (http://www.emacswiki.org/emacs/DistributedVersionControl), adding
 features similar to the ones I added for monotone
 (http://www.monotone.ca). I'm used to monotone and new to git, so this
 may seem like an odd workflow.
 
 I always do 'fetch' and 'merge' separately, never 'pull'. So after a
 'fetch', the DVC Emacs front end must determine what needs to happen
 next. I think there are three cases:
 
 1) 'fetch' did not retrieve any revisions from remote; the last local
commit is the head of the branch.
 
 The workspace is up to date (it may need to be comitted).
 
 2) 'fetch' retrieved revisions, and there were no local commits since
the previous fetch.
 
 The last fetch is the head of the branch; if not equal to HEAD, the
 workspace needs to be updated (via 'merge').
 
 3) fetch retrieved revisions, and there were local commits since
the previous fetch.
 
There are two heads for the branch (the two described above), they
need to be merged, then the workspace updated.
 
 I'm not sure how 'git fetch' handles case 3); I have not tested that
 case yet.

Fetch updates your cached origin/master ref to match the remote.
Your local master branch and worktree are left as-is.

 The question I have is:
 
 What git queries can I run to determine which of the three states the
 current workspace is in?
 
 'rev-parse HEAD' gives the last workspace commit.
 
 'rev-parse refs/remotes/remote/branch' gives the head of the branch
 in the remote repository as of the most recent fetch.
 
 But to distinguish among the cases, I need to determine if one of these
 two revs is a child of the other or not. I don't see a git query to
 determine that directly.

I think you're looking for git merge-base.

If you do `git merge-base HEAD origin/master`
and its result is equal to `git rev-parse HEAD`
then you know that master is an ancestor of origin/master
and can be trivially fast-forwarded to origin/master.

If you get a SHA-1 that is not equal then there are probably[*]
local commits that have happened on master and it should probably
be rebased (or merged).

[*] other possibilities: someone rebased your upstream, etc.


People have differing opinions on how to resolve the diverging
history. Topic branches are the gitty approach.

Another popular approach to resolving the divergence is what
git pull --rebase would have done.

(No one really likes what git pull would have done by default
 when there are local commits)

To implement the rebase workflow, do git rebase --autostash origin/master.
after fetching.  That workflow is probably the simplest for
folks who eschew branching or are expats from other vcs.

If you're writing a tool you might want to check whether
the branch has an upstream configured via `git config branch.$name.remote`
and `git config branch.$name.merge` as well.


 I could try parsing a 'log' output; I have not investigated that.
 This is easy in monotone; there is a command 'mtn heads' that gives this
 result directly (it returns either one or two revs), and another command
 'mtn automate toposort' that orders revs topologically (by parent/child
 relationships).

`git log` can also tell you whether you have commits that they don't..

What does origin/master have that I don't?

git log HEAD..origin/master

What do I have that origin/master does not?

git log origin/master..HEAD

The git log output is easily controlled, though for these questions
the mere presence/absense of output tells you what you need to know.

cheers,
-- 
David
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: splitting a commit that adds new files

2014-02-02 Thread Jeff King
On Sun, Feb 02, 2014 at 10:15:07AM -0800, Junio C Hamano wrote:

 Duy Nguyen pclo...@gmail.com writes:
 
  I usually start splitting a commit with reset @^ then add -p back.
  The problem is reset @^ does not keep track of new files added in
  HEAD, so I often end up forgetting to add new files back (with add
  -p). I'm thinking of making reset to do add -N automatically for
  me so I won't miss changes in add -p. But maybe people already know
  how to deal with this case without adding more code?
 
 Is reset -p what you are looking for?  I do not use that myself,
 though.

I don't think so. He is at a commit that needs split, so the first thing
to do is to shift the HEAD back. reset -p is only about changing the
index[1]. I suppose you could start with a soft reset, and then reset
-p away the changes. But then you are going backwards (this does not
belong in the commit, remove it rather than this does belong in the
commit, add it).

I don't typically have a problem with this because I keep my directory
free of untracked files (they are either marked by .gitignore, or
something that I am planning to commit). So just running git status
gives me a neat list of what needs to be added (and typically git add
-N . is a good starting point upon which to build a git add -p
session).

-Peff

[1] I _do_ use reset -p when splitting commits, but I do not think it
is useful here. I use it for oops, I staged this change, but it
actually belongs in the next commit. Undo my staging, but leave the
changes in the working tree for the next one.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread Jeff King
On Sun, Feb 02, 2014 at 11:37:39AM +0100, David Kastrup wrote:

 So I mused: refs/heads contains branches, refs/tags contains tags.  The
 respective information would likely easily enough be stored in refs/bzr
 and refs/bugs and in that manner would not pollute the ordinary tag
 and branch spaces, rendering git tag and/or git branch output mostly
 unusable.  I tested creating such a directory and entries and indeed
 references like bzr/39005 then worked.

Yes. The names refs/tags and refs/heads are special by convention,
and there is no reason you cannot have other hierarchies (and indeed, we
already have refs/notes and refs/remotes as common hierarchies).

 However, cloning from the repository did not copy those directories and
 references, so without modification, this scheme would not work for
 cloned repositories.

Correct. Anyone who wants them will have to ask for them manually, like:

  git config --add remote.origin.fetch '+refs/bzr/*:refs/bzr/*'

after which any git fetch will retrieve them.

 Are there some measures one can take/configure in the parent repository
 such that (named or all) additional directories inside of $GITDIR/refs
 would get cloned along with the rest?

No. It is up to the client to decide which parts of the ref namespace
they want to fetch. The server only advertises what it has, and the
client selects from that.


Others mentioned that refs were never really intended to scale to
one-per-commit. We serve some repositories with tens of thousands of
refs from GitHub, and it does work. On the backend, we even have some
repos in the hundreds of thousands (but these are not client facing).
Most of the pain points (like O(n^2) loops) have been ironed out, but
the two big ones are still:

  - server ref advertisement lists _all_ refs at the start of the
conversation. So, e.g.,

git fetch git://github.com/Homebrew/homebrew.git

sends 2MB of advertisement just so a client can find out nope,
nothing to fetch.

  - the packed-refs storage is rather monolithic. Reading a value from
it currently requires parsing the whole file. Likewise, deleting a
ref requires rewriting the whole file.

So what you are proposing will work, but do note that there is a cost.

-Peff
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] userdiff: update Ada patterns

2014-02-02 Thread Jeff King
On Sun, Feb 02, 2014 at 09:21:56PM +1030, Adrian Johnson wrote:

 - Allow extra space in is new and is separate
 [...]
 -  !^(.*[ \t])?(is new|renames|is separate)([ \t].*)?$\n
 +  !^(.*[ \t])?(is[ \t]+new|renames|is[ \t]+separate)([ \t].*)?$\n

I do not know anything about Ada syntax, but this change looks obviously
sensible.

 - Fix bug in word regex for numbers
 -  |[0-9][-+0-9#_.eE]
 +  |[-+0-9#_.eE]+

This makes E or _ a number. Is that right?

I think the intent of the original was starts with a digit, and then
has one or more of these other things after it. You do not describe the
bug, but I guess it would be that it does not match obvious things like
5. Should it be zero or more instead, like:

  [0-9][-+0-9#_.eE]*

? Also, should -/+ be hoisted to the front?

  [-+]?[0-9][0-9#_.eE]*

Again, I am just guessing, as I am not familiar enough with Ada.

-Peff
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Creating own hierarchies under $GITDIR/refs ?

2014-02-02 Thread Jed Brown
John Keeping j...@keeping.me.uk writes:
 I actually wonder if you could do this with notes and git-grep; for
 example:

 git grep -l keeping.me.uk refs/notes/amlog |
 sed -e 's/.*://' -e 's!/!!g'

 That should be relatively efficient since you're only looking at the
 current notes tree.

I added notes handling to gitifyhg and would search it similar to this.
Since gitifyhg is two-way, I could not modify the commits.  Later, when
we converted several repositories (up to 50k commits/80 MB), I appended

  Hg-commit: $Hg_commit_hash

to all the commit messages.  This way it shows up on the web interface,
users don't have to obtain the notes specially, and git log --grep
works naturally.  I think it's worth considering this simple solution;
existing Git users won't mind recloning once.


pgp3uBjty3sk1.pgp
Description: PGP signature


Re: Determining update/merge/current state of a workspace

2014-02-02 Thread brian m. carlson
On Sun, Feb 02, 2014 at 03:04:59PM -0800, David Aguilar wrote:
 I think you're looking for git merge-base.
 
 If you do `git merge-base HEAD origin/master`
 and its result is equal to `git rev-parse HEAD`
 then you know that master is an ancestor of origin/master
 and can be trivially fast-forwarded to origin/master.

In newer versions of git (1.8.0+), you can use git merge-base
--is-ancestor for this instead.  The commit message for 5907cda implies
that it is more efficient than the old way.

-- 
brian m. carlson / brian with sandals: Houston, Texas, US
+1 832 623 2791 | http://www.crustytoothpaste.net/~bmc | My opinion only
OpenPGP: RSA v4 4096b: 88AC E9B2 9196 305B A994 7552 F1BA 225C 0223 B187


signature.asc
Description: Digital signature


Re: [PATCH v5 5/5] setup: Don't dereference in-tree symlinks for absolute paths

2014-02-02 Thread Duy Nguyen
On Sun, Feb 2, 2014 at 11:35 PM, Martin Erik Werner
martinerikwer...@gmail.com wrote:
 diff --git a/setup.c b/setup.c
 index a2e60ab..230505c 100644
 --- a/setup.c
 +++ b/setup.c
 @@ -86,11 +86,23 @@ char *prefix_path_gently(const char *prefix, int len,
 const char *orig = path;
 char *sanitized;
 if (is_absolute_path(orig)) {
 -   const char *temp = real_path(path);
 -   sanitized = xmalloc(len + strlen(temp) + 1);
 -   strcpy(sanitized, temp);
 +   char *npath;
 +
 +   npath = xmalloc(strlen(path) + 1);
 if (remaining_prefix)
 *remaining_prefix = 0;
 +   if (normalize_path_copy_len(npath, path, remaining_prefix)) {
 +   free(npath);
 +   return NULL;
 +   }
 +   if (abspath_part_inside_repo(npath)) {
 +   free(npath);
 +   return NULL;
 +   }
 +
 +   sanitized = xmalloc(strlen(npath) + 1);
 +   strcpy(sanitized, npath);
 +   free(npath);

We could replace these three lines with sanitized = npath;. But it's
not a big deal imo. The rest of the series looks good.

Reviewed-by: Duy Nguyen pclo...@gmail.com

 } else {
 sanitized = xmalloc(len + strlen(path) + 1);
 if (len)
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 02/26] pkt-line.c: add packet_write_timeout()

2014-02-02 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 cache.h|  1 +
 pkt-line.c | 15 +++
 pkt-line.h |  1 +
 wrapper.c  | 26 ++
 4 files changed, 43 insertions(+)

diff --git a/cache.h b/cache.h
index dc040fb..718e32b 100644
--- a/cache.h
+++ b/cache.h
@@ -1231,6 +1231,7 @@ extern void fsync_or_die(int fd, const char *);
 
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
+extern ssize_t write_in_full_timeout(int fd, const void *buf, size_t count, 
int timeout);
 static inline ssize_t write_str_in_full(int fd, const char *str)
 {
return write_in_full(fd, str, strlen(str));
diff --git a/pkt-line.c b/pkt-line.c
index eac45ad..cf681e9 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -94,6 +94,21 @@ void packet_write(int fd, const char *fmt, ...)
write_or_die(fd, write_buffer, n);
 }
 
+int packet_write_timeout(int fd, int timeout, const char *fmt, ...)
+{
+   static struct strbuf sb = STRBUF_INIT;
+   va_list args;
+   unsigned n;
+
+   if (fd == -1)
+   return -1;
+   va_start(args, fmt);
+   strbuf_reset(sb);
+   n = format_packet(fmt, args);
+   va_end(args);
+   return write_in_full_timeout(fd, write_buffer, n, timeout);
+}
+
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
 {
va_list args;
diff --git a/pkt-line.h b/pkt-line.h
index 0a838d1..4b93a0c 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -21,6 +21,7 @@
  */
 void packet_flush(int fd);
 void packet_write(int fd, const char *fmt, ...) __attribute__((format (printf, 
2, 3)));
+int packet_write_timeout(int fd, int timeout, const char *fmt, ...) 
__attribute__((format (printf, 3, 4)));
 void packet_buf_flush(struct strbuf *buf);
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...) 
__attribute__((format (printf, 2, 3)));
 
diff --git a/wrapper.c b/wrapper.c
index 0cc5636..9a0e289 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -214,6 +214,32 @@ ssize_t write_in_full(int fd, const void *buf, size_t 
count)
return total;
 }
 
+ssize_t write_in_full_timeout(int fd, const void *buf,
+ size_t count, int timeout)
+{
+   struct pollfd pfd;
+   const char *p = buf;
+   ssize_t total = 0;
+
+   pfd.fd = fd;
+   pfd.events = POLLOUT;
+   while (count  0  poll(pfd, 1, timeout)  0 
+  (pfd.revents  POLLOUT)) {
+   ssize_t written = xwrite(fd, p, count);
+   if (written  0)
+   return -1;
+   if (!written) {
+   errno = ENOSPC;
+   return -1;
+   }
+   count -= written;
+   p += written;
+   total += written;
+   }
+
+   return count ? -1 : total;
+}
+
 int xdup(int fd)
 {
int ret = dup(fd);
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 03/26] pkt-line.c: add packet_read_line_timeout()

2014-02-02 Thread Nguyễn Thái Ngọc Duy
This version is also gentler than its friend packet_read_line()
because it's designed for side channel I/O that should not abort the
program even if the channel is broken.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 cache.h|  1 +
 pkt-line.c | 35 +++
 pkt-line.h |  1 +
 wrapper.c  | 21 +
 4 files changed, 58 insertions(+)

diff --git a/cache.h b/cache.h
index 718e32b..939db46 100644
--- a/cache.h
+++ b/cache.h
@@ -1230,6 +1230,7 @@ extern int write_or_whine_pipe(int fd, const void *buf, 
size_t count, const char
 extern void fsync_or_die(int fd, const char *);
 
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
+extern ssize_t read_in_full_timeout(int fd, void *buf, size_t count, int 
timeout);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
 extern ssize_t write_in_full_timeout(int fd, const void *buf, size_t count, 
int timeout);
 static inline ssize_t write_str_in_full(int fd, const char *str)
diff --git a/pkt-line.c b/pkt-line.c
index cf681e9..5a07e97 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -229,3 +229,38 @@ char *packet_read_line_buf(char **src, size_t *src_len, 
int *dst_len)
 {
return packet_read_line_generic(-1, src, src_len, dst_len);
 }
+
+char *packet_read_line_timeout(int fd, int timeout, int *len_p)
+{
+   char *buf = packet_buffer;
+   int ret, len, buf_len = sizeof(packet_buffer);
+   char linelen[4];
+
+   if (fd == -1)
+   return NULL;
+   if ((ret = read_in_full_timeout(fd, linelen, 4, timeout))  0)
+   return NULL;
+   len = packet_length(linelen);
+   if (len  0) {
+   error(protocol error: bad line length character: %.4s, 
linelen);
+   return NULL;
+   }
+   if (!len) {
+   packet_trace(, 4, 0);
+   if (len_p)
+   *len_p = 0;
+   return ;
+   }
+   len -= 4;
+   if (len = buf_len) {
+   error(protocol error: bad line length %d, len);
+   return NULL;
+   }
+   if ((ret = read_in_full_timeout(fd, buf, len, timeout))  0)
+   return NULL;
+   buf[len] = '\0';
+   if (len_p)
+   *len_p = len;
+   packet_trace(buf, len, 0);
+   return buf;
+}
diff --git a/pkt-line.h b/pkt-line.h
index 4b93a0c..d47dca5 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -69,6 +69,7 @@ int packet_read(int fd, char **src_buffer, size_t *src_len, 
char
  * packet is written to it.
  */
 char *packet_read_line(int fd, int *size);
+char *packet_read_line_timeout(int fd, int timeout, int *size);
 
 /*
  * Same as packet_read_line, but read from a buf rather than a descriptor;
diff --git a/wrapper.c b/wrapper.c
index 9a0e289..9cf10b2 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -193,6 +193,27 @@ ssize_t read_in_full(int fd, void *buf, size_t count)
return total;
 }
 
+ssize_t read_in_full_timeout(int fd, void *buf, size_t count, int timeout)
+{
+   char *p = buf;
+   ssize_t total = 0;
+   struct pollfd pfd;
+
+   pfd.fd = fd;
+   pfd.events = POLLIN;
+   while (count  0  poll(pfd, 1, timeout)  0 
+  (pfd.revents  POLLIN)) {
+   ssize_t loaded = xread(fd, p, count);
+   if (loaded = 0)
+   return -1;
+   count -= loaded;
+   p += loaded;
+   total += loaded;
+   }
+
+   return count ? -1 : total;
+}
+
 ssize_t write_in_full(int fd, const void *buf, size_t count)
 {
const char *p = buf;
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 01/26] pkt-line.c: rename global variable buffer[] to something less generic

2014-02-02 Thread Nguyễn Thái Ngọc Duy
buffer is a local variable in some other functions. Rename the
global one to make it less confusing.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 pkt-line.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pkt-line.c b/pkt-line.c
index bc63b3b..eac45ad 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -64,14 +64,15 @@ void packet_buf_flush(struct strbuf *buf)
 }
 
 #define hex(a) (hexchar[(a)  15])
-static char buffer[1000];
+static char write_buffer[1000];
 static unsigned format_packet(const char *fmt, va_list args)
 {
static char hexchar[] = 0123456789abcdef;
+   char *buffer = write_buffer;
unsigned n;
 
-   n = vsnprintf(buffer + 4, sizeof(buffer) - 4, fmt, args);
-   if (n = sizeof(buffer)-4)
+   n = vsnprintf(buffer + 4, sizeof(write_buffer) - 4, fmt, args);
+   if (n = sizeof(write_buffer)-4)
die(protocol error: impossibly long line);
n += 4;
buffer[0] = hex(n  12);
@@ -90,7 +91,7 @@ void packet_write(int fd, const char *fmt, ...)
va_start(args, fmt);
n = format_packet(fmt, args);
va_end(args);
-   write_or_die(fd, buffer, n);
+   write_or_die(fd, write_buffer, n);
 }
 
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
@@ -101,7 +102,7 @@ void packet_buf_write(struct strbuf *buf, const char *fmt, 
...)
va_start(args, fmt);
n = format_packet(fmt, args);
va_end(args);
-   strbuf_add(buf, buffer, n);
+   strbuf_add(buf, write_buffer, n);
 }
 
 static int get_packet_data(int fd, char **src_buf, size_t *src_size,
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 00/26] inotify support

2014-02-02 Thread Nguyễn Thái Ngọc Duy
I'm happy with this now. The only things left are applying ewah on the
watch index extension and maybe improve lookup performance a bit. The
former needs jk/pack-bitmap graduated. The latter is not urgent. Oh
and maybe address BUGS section (more like known limitations) in
git-file-watcher.txt.

For early adopters that fear a buggy file-watcher may cause update
loss, set GIT_TEST_WATCHED=1 (or 2). It'll do lstat() to verify
file-watcher results (so no perf. gain). Beware of race condition
that may lead to false positives, mentioned in 20/26 (maybe I should
do something about it).

The series can also be fetched from

https://github.com/pclouds/git.git file-watcher

Nguyễn Thái Ngọc Duy (26):
  pkt-line.c: rename global variable buffer[] to something less generic
  pkt-line.c: add packet_write_timeout()
  pkt-line.c: add packet_read_line_timeout()
  unix-socket: make unlink() optional in unix_stream_listen()
  Add git-file-watcher and basic connection handling logic
  file-watcher: check socket directory permission
  file-watcher: remove socket on exit
  file-watcher: add --detach
  read-cache: save trailing sha-1
  read-cache: new flag CE_WATCHED to mark what file is watched
  Clear CE_WATCHED when set CE_VALID alone
  read-cache: basic hand shaking to the file watcher
  read-cache: ask file watcher to watch files
  read-cache: put some limits on file watching
  read-cache: get changed file list from file watcher
  git-compat-util.h: add inotify stubs on non-Linux platforms
  file-watcher: inotify support, watching part
  file-watcher: inotify support, notification part
  Wrap CE_VALID test with ce_valid()
  read-cache: new variable to verify file-watcher results
  Support running file watcher with the test suite
  file-watcher: quit if $WATCHER/socket is gone
  file-watcher: tests for the daemon
  ls-files: print CE_WATCHED as W (or w with CE_VALID)
  file-watcher: tests for the client side
  Disable file-watcher with system inotify on some tests

 .gitignore   |2 +
 Documentation/config.txt |   19 +
 Documentation/git-file-watcher.txt (new) |   54 ++
 Documentation/git-ls-files.txt   |1 +
 Documentation/technical/index-format.txt |9 +
 Makefile |3 +
 builtin/grep.c   |2 +-
 builtin/ls-files.c   |   14 +-
 builtin/update-index.c   |   12 +-
 cache.h  |   17 +
 config.mak.uname |1 +
 credential-cache--daemon.c   |2 +-
 daemon.c |   30 +-
 diff-lib.c   |4 +-
 diff.c   |2 +-
 file-watcher-lib.c (new) |  321 +
 file-watcher-lib.h (new) |8 +
 file-watcher.c (new) | 1149 ++
 git-compat-util.h|   43 ++
 pkt-line.c   |   61 +-
 pkt-line.h   |2 +
 read-cache.c |  119 +++-
 setup.c  |   25 +
 t/t1011-read-tree-sparse-checkout.sh |2 +
 t/t2104-update-index-skip-worktree.sh|2 +
 t/t7011-skip-worktree-reading.sh |2 +
 t/t7012-skip-worktree-writing.sh |2 +
 t/t7513-file-watcher.sh (new +x) |  382 ++
 t/t7514-file-watcher-lib.sh (new +x) |  190 +
 test-file-watcher.c (new)|  111 +++
 unix-socket.c|5 +-
 unix-socket.h|2 +-
 unpack-trees.c   |2 +-
 wrapper.c|   47 ++
 34 files changed, 2591 insertions(+), 56 deletions(-)
 create mode 100644 Documentation/git-file-watcher.txt
 create mode 100644 file-watcher-lib.c
 create mode 100644 file-watcher-lib.h
 create mode 100644 file-watcher.c
 create mode 100755 t/t7513-file-watcher.sh
 create mode 100755 t/t7514-file-watcher-lib.sh
 create mode 100644 test-file-watcher.c

-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 07/26] file-watcher: remove socket on exit

2014-02-02 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 file-watcher.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/file-watcher.c b/file-watcher.c
index 91f4cfe..9c639ef 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -56,6 +56,26 @@ static void accept_connection(int fd)
pfd_nr++;
 }
 
+static const char *socket_path;
+static int do_not_clean_up;
+
+static void cleanup(void)
+{
+   struct strbuf sb = STRBUF_INIT;
+   if (do_not_clean_up)
+   return;
+   strbuf_addf(sb, %s/socket, socket_path);
+   unlink(sb.buf);
+   strbuf_release(sb);
+}
+
+static void cleanup_on_signal(int signo)
+{
+   cleanup();
+   sigchain_pop(signo);
+   raise(signo);
+}
+
 static const char permissions_advice[] =
 N_(The permissions on your socket directory are too loose; other\n
processes may be able to read your file listing. Consider running:\n
@@ -91,7 +111,6 @@ int main(int argc, const char **argv)
 {
struct strbuf sb = STRBUF_INIT;
int i, new_nr, fd, quit = 0, nr_common;
-   const char *socket_path = NULL;
struct option options[] = {
OPT_END()
};
@@ -113,6 +132,9 @@ int main(int argc, const char **argv)
die_errno(_(unable to listen at %s), sb.buf);
strbuf_reset(sb);
 
+   atexit(cleanup);
+   sigchain_push_common(cleanup_on_signal);
+
nr_common = 1;
pfd_alloc = pfd_nr = nr_common;
pfd = xmalloc(sizeof(*pfd) * pfd_alloc);
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 05/26] Add git-file-watcher and basic connection handling logic

2014-02-02 Thread Nguyễn Thái Ngọc Duy
git-file-watcher is a daemon (*) that watches file changes and tells
git about them. The intent is to avoid lstat() calls at index refresh
time, which could be a lot on big working directory.

The actual monitoring needs support from OS (inotify, FSEvents,
FindFirstChangeNotification or kqueue) and is not part of this patch
or the next few yet. This patch only provides a UNIX socket server.

(*) it will be a a daemon in this end, but in this patch it runs in
the foreground.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 .gitignore   |   1 +
 Documentation/git-file-watcher.txt (new) |  29 +++
 Makefile |   1 +
 file-watcher.c (new) | 140 +++
 4 files changed, 171 insertions(+)
 create mode 100644 Documentation/git-file-watcher.txt
 create mode 100644 file-watcher.c

diff --git a/.gitignore b/.gitignore
index b5f9def..12c78f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,6 +56,7 @@
 /git-fast-import
 /git-fetch
 /git-fetch-pack
+/git-file-watcher
 /git-filter-branch
 /git-fmt-merge-msg
 /git-for-each-ref
diff --git a/Documentation/git-file-watcher.txt 
b/Documentation/git-file-watcher.txt
new file mode 100644
index 000..625a389
--- /dev/null
+++ b/Documentation/git-file-watcher.txt
@@ -0,0 +1,29 @@
+git-file-watcher(1)
+===
+
+NAME
+
+git-file-watcher - File update notification daemon
+
+SYNOPSIS
+
+[verse]
+'git file-watcher' [options] socket directory
+
+DESCRIPTION
+---
+This program watches file changes in a git working directory and let
+Git now what files have been changed so that Git does not have to call
+lstat(2) to detect that itself.
+
+OPTIONS
+---
+
+SEE ALSO
+
+linkgit:git-update-index[1],
+linkgit:git-config[1]
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index dddaf4f..8eef0d6 100644
--- a/Makefile
+++ b/Makefile
@@ -536,6 +536,7 @@ PROGRAMS += $(EXTRA_PROGRAMS)
 PROGRAM_OBJS += credential-store.o
 PROGRAM_OBJS += daemon.o
 PROGRAM_OBJS += fast-import.o
+PROGRAM_OBJS += file-watcher.o
 PROGRAM_OBJS += http-backend.o
 PROGRAM_OBJS += imap-send.o
 PROGRAM_OBJS += sh-i18n--envsubst.o
diff --git a/file-watcher.c b/file-watcher.c
new file mode 100644
index 000..a6d7f17
--- /dev/null
+++ b/file-watcher.c
@@ -0,0 +1,140 @@
+#include cache.h
+#include sigchain.h
+#include parse-options.h
+#include exec_cmd.h
+#include unix-socket.h
+
+static const char *const file_watcher_usage[] = {
+   N_(git file-watcher [options] socket directory),
+   NULL
+};
+
+struct connection {
+   int sock;
+};
+
+static struct connection **conns;
+static struct pollfd *pfd;
+static int conns_alloc, pfd_nr, pfd_alloc;
+
+static int shutdown_connection(int id)
+{
+   struct connection *conn = conns[id];
+   conns[id] = NULL;
+   pfd[id].fd = -1;
+   if (!conn)
+   return 0;
+   close(conn-sock);
+   free(conn);
+   return 0;
+}
+
+static int handle_command(int conn_id)
+{
+   return 0;
+}
+
+static void accept_connection(int fd)
+{
+   struct connection *conn;
+   int client = accept(fd, NULL, NULL);
+   if (client  0) {
+   warning(_(accept failed: %s), strerror(errno));
+   return;
+   }
+
+   ALLOC_GROW(pfd, pfd_nr + 1, pfd_alloc);
+   pfd[pfd_nr].fd = client;
+   pfd[pfd_nr].events = POLLIN;
+   pfd[pfd_nr].revents = 0;
+
+   ALLOC_GROW(conns, pfd_nr + 1, conns_alloc);
+   conn = xmalloc(sizeof(*conn));
+   memset(conn, 0, sizeof(*conn));
+   conn-sock = client;
+   conns[pfd_nr] = conn;
+   pfd_nr++;
+}
+
+int main(int argc, const char **argv)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int i, new_nr, fd, quit = 0, nr_common;
+   const char *socket_path = NULL;
+   struct option options[] = {
+   OPT_END()
+   };
+
+   git_extract_argv0_path(argv[0]);
+   git_setup_gettext();
+   argc = parse_options(argc, argv, NULL, options,
+file_watcher_usage, 0);
+   if (argc  1)
+   die(_(socket path missing));
+   else if (argc  1)
+   die(_(too many arguments));
+
+   socket_path = argv[0];
+   strbuf_addf(sb, %s/socket, socket_path);
+   fd = unix_stream_listen(sb.buf, 0);
+   if (fd == -1)
+   die_errno(_(unable to listen at %s), sb.buf);
+   strbuf_reset(sb);
+
+   nr_common = 1;
+   pfd_alloc = pfd_nr = nr_common;
+   pfd = xmalloc(sizeof(*pfd) * pfd_alloc);
+   pfd[0].fd = fd;
+   pfd[0].events = POLLIN;
+
+   while (!quit) {
+   if (poll(pfd, pfd_nr, -1)  0) {
+   if (errno != EINTR) {
+   error(Poll failed, resuming: %s,
+ strerror(errno));
+   sleep(1);
+   }
+

[PATCH v3 04/26] unix-socket: make unlink() optional in unix_stream_listen()

2014-02-02 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 credential-cache--daemon.c | 2 +-
 unix-socket.c  | 5 +++--
 unix-socket.h  | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/credential-cache--daemon.c b/credential-cache--daemon.c
index 390f194..1b995a9 100644
--- a/credential-cache--daemon.c
+++ b/credential-cache--daemon.c
@@ -207,7 +207,7 @@ static void serve_cache(const char *socket_path)
 {
int fd;
 
-   fd = unix_stream_listen(socket_path);
+   fd = unix_stream_listen(socket_path, 1);
if (fd  0)
die_errno(unable to bind to '%s', socket_path);
 
diff --git a/unix-socket.c b/unix-socket.c
index 01f119f..2be1af6 100644
--- a/unix-socket.c
+++ b/unix-socket.c
@@ -93,7 +93,7 @@ fail:
return -1;
 }
 
-int unix_stream_listen(const char *path)
+int unix_stream_listen(const char *path, int replace)
 {
int fd, saved_errno;
struct sockaddr_un sa;
@@ -103,7 +103,8 @@ int unix_stream_listen(const char *path)
return -1;
fd = unix_stream_socket();
 
-   unlink(path);
+   if (replace)
+   unlink(path);
if (bind(fd, (struct sockaddr *)sa, sizeof(sa))  0)
goto fail;
 
diff --git a/unix-socket.h b/unix-socket.h
index e271aee..18ee290 100644
--- a/unix-socket.h
+++ b/unix-socket.h
@@ -2,6 +2,6 @@
 #define UNIX_SOCKET_H
 
 int unix_stream_connect(const char *path);
-int unix_stream_listen(const char *path);
+int unix_stream_listen(const char *path, int replace);
 
 #endif /* UNIX_SOCKET_H */
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 06/26] file-watcher: check socket directory permission

2014-02-02 Thread Nguyễn Thái Ngọc Duy
Access to the unix socket $WATCHER/socket is covered by $WATCHER's
permission. While the file watcher does not carry much information,
repo file listing is sometimes not something you want other users to
see. Make sure $WATCHER has 0700 permission to stop unwanted access.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 file-watcher.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/file-watcher.c b/file-watcher.c
index a6d7f17..91f4cfe 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -56,6 +56,37 @@ static void accept_connection(int fd)
pfd_nr++;
 }
 
+static const char permissions_advice[] =
+N_(The permissions on your socket directory are too loose; other\n
+   processes may be able to read your file listing. Consider running:\n
+   \n
+  chmod 0700 %s);
+static void check_socket_directory(const char *path)
+{
+   struct stat st;
+   char *path_copy = xstrdup(path);
+   char *dir = dirname(path_copy);
+
+   if (!stat(dir, st)) {
+   if (st.st_mode  077)
+   die(_(permissions_advice), dir);
+   free(path_copy);
+   return;
+   }
+
+   /*
+* We must be sure to create the directory with the correct mode,
+* not just chmod it after the fact; otherwise, there is a race
+* condition in which somebody can chdir to it, sleep, then try to open
+* our protected socket.
+*/
+   if (safe_create_leading_directories_const(dir)  0)
+   die_errno(_(unable to create directories for '%s'), dir);
+   if (mkdir(dir, 0700)  0)
+   die_errno(_(unable to mkdir '%s'), dir);
+   free(path_copy);
+}
+
 int main(int argc, const char **argv)
 {
struct strbuf sb = STRBUF_INIT;
@@ -76,6 +107,7 @@ int main(int argc, const char **argv)
 
socket_path = argv[0];
strbuf_addf(sb, %s/socket, socket_path);
+   check_socket_directory(sb.buf);
fd = unix_stream_listen(sb.buf, 0);
if (fd == -1)
die_errno(_(unable to listen at %s), sb.buf);
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 08/26] file-watcher: add --detach

2014-02-02 Thread Nguyễn Thái Ngọc Duy
In daemon mode, stdout and stderr are saved in $WATCHER/log.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/git-file-watcher.txt |  2 ++
 cache.h|  1 +
 daemon.c   | 30 --
 file-watcher.c | 17 +
 setup.c| 25 +
 5 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/Documentation/git-file-watcher.txt 
b/Documentation/git-file-watcher.txt
index 625a389..ec81f18 100644
--- a/Documentation/git-file-watcher.txt
+++ b/Documentation/git-file-watcher.txt
@@ -18,6 +18,8 @@ lstat(2) to detect that itself.
 
 OPTIONS
 ---
+--detach::
+   Run in background.
 
 SEE ALSO
 
diff --git a/cache.h b/cache.h
index 939db46..7a836b1 100644
--- a/cache.h
+++ b/cache.h
@@ -434,6 +434,7 @@ extern int set_git_dir_init(const char *git_dir, const char 
*real_git_dir, int);
 extern int init_db(const char *template_dir, unsigned int flags);
 
 extern void sanitize_stdfds(void);
+extern int daemonize(int *);
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
diff --git a/daemon.c b/daemon.c
index 503e039..2650504 100644
--- a/daemon.c
+++ b/daemon.c
@@ -1056,11 +1056,6 @@ static void drop_privileges(struct credentials *cred)
/* nothing */
 }
 
-static void daemonize(void)
-{
-   die(--detach not supported on this platform);
-}
-
 static struct credentials *prepare_credentials(const char *user_name,
 const char *group_name)
 {
@@ -1102,24 +1097,6 @@ static struct credentials *prepare_credentials(const 
char *user_name,
 
return c;
 }
-
-static void daemonize(void)
-{
-   switch (fork()) {
-   case 0:
-   break;
-   case -1:
-   die_errno(fork failed);
-   default:
-   exit(0);
-   }
-   if (setsid() == -1)
-   die_errno(setsid failed);
-   close(0);
-   close(1);
-   close(2);
-   sanitize_stdfds();
-}
 #endif
 
 static void store_pid(const char *path)
@@ -1333,9 +1310,10 @@ int main(int argc, char **argv)
if (inetd_mode || serve_mode)
return execute();
 
-   if (detach)
-   daemonize();
-   else
+   if (detach) {
+   if (daemonize(NULL))
+   die(--detach not supported on this platform);
+   } else
sanitize_stdfds();
 
if (pid_file)
diff --git a/file-watcher.c b/file-watcher.c
index 9c639ef..1e1ccad 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -111,7 +111,10 @@ int main(int argc, const char **argv)
 {
struct strbuf sb = STRBUF_INIT;
int i, new_nr, fd, quit = 0, nr_common;
+   int daemon = 0;
struct option options[] = {
+   OPT_BOOL(0, detach, daemon,
+N_(run in background)),
OPT_END()
};
 
@@ -135,6 +138,20 @@ int main(int argc, const char **argv)
atexit(cleanup);
sigchain_push_common(cleanup_on_signal);
 
+   if (daemon) {
+   int err;
+   strbuf_addf(sb, %s/log, socket_path);
+   err = open(sb.buf, O_CREAT | O_TRUNC | O_WRONLY, 0600);
+   adjust_shared_perm(sb.buf);
+   if (err == -1)
+   die_errno(_(unable to create %s), sb.buf);
+   if (daemonize(do_not_clean_up))
+   die(_(--detach not supported on this platform));
+   dup2(err, 1);
+   dup2(err, 2);
+   close(err);
+   }
+
nr_common = 1;
pfd_alloc = pfd_nr = nr_common;
pfd = xmalloc(sizeof(*pfd) * pfd_alloc);
diff --git a/setup.c b/setup.c
index 6c3f85f..757c45f 100644
--- a/setup.c
+++ b/setup.c
@@ -787,3 +787,28 @@ void sanitize_stdfds(void)
if (fd  2)
close(fd);
 }
+
+int daemonize(int *flag)
+{
+#ifndef NO_POSIX_GOODIES
+   switch (fork()) {
+   case 0:
+   break;
+   case -1:
+   die_errno(fork failed);
+   default:
+   if (flag)
+   *flag = 1;
+   exit(0);
+   }
+   if (setsid() == -1)
+   die_errno(setsid failed);
+   close(0);
+   close(1);
+   close(2);
+   sanitize_stdfds();
+   return 0;
+#else
+   return -1;
+#endif
+}
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 10/26] read-cache: new flag CE_WATCHED to mark what file is watched

2014-02-02 Thread Nguyễn Thái Ngọc Duy
This bit is basically dynamic CE_VALID. It marks entries that are
being watched by the incoming file-watcher. When an index is loaded,
file watcher is contacted and the list of updated paths is retrieved.

These paths will have CE_WATCHED cleared and lstat() will be called on
them. Those that have CE_WATCHED and not in the list will have
CE_VALID turn on to skip lstat(). The setting is temporarily, CE_VALID
is not saved to disk if CE_WATCHED is also set.

We keep the CE_WATCHED in a new extension, separated from the entries
to save some space because extended ce_flags adds 2 bytes per entry
and this flag would be present in the majority of entries. When stored
as bitmap, this extension could compress very well with ewah algorithm.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/technical/index-format.txt |  6 +
 cache.h  |  3 +++
 read-cache.c | 41 +++-
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index f352a9b..24fd0ae 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -198,3 +198,9 @@ Git index format
   - At most three 160-bit object names of the entry in stages from 1 to 3
 (nothing is written for a missing stage).
 
+=== File watcher
+
+  The signature of this extension is { 'W', 'A', 'T', 'C' }.
+
+  - A bit map of all entries in the index, n-th bit of m-th byte
+corresponds to CE_WATCHED of the m * 8+ n-th index entry.
diff --git a/cache.h b/cache.h
index f14d535..a0af2a5 100644
--- a/cache.h
+++ b/cache.h
@@ -169,6 +169,9 @@ struct cache_entry {
 /* used to temporarily mark paths matched by pathspecs */
 #define CE_MATCHED   (1  26)
 
+/* set CE_VALID at runtime if the entry is guaranteed not updated */
+#define CE_WATCHED   (1  27)
+
 /*
  * Extended on-disk flags
  */
diff --git a/read-cache.c b/read-cache.c
index 3b6daf1..098d3b6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -33,6 +33,7 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce, int reall
 #define CACHE_EXT(s) ( (s[0]24)|(s[1]16)|(s[2]8)|(s[3]) )
 #define CACHE_EXT_TREE 0x54524545  /* TREE */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* REUC */
+#define CACHE_EXT_WATCH 0x57415443   /* WATC */
 
 struct index_state the_index;
 
@@ -1289,6 +1290,19 @@ static int verify_hdr(struct cache_header *hdr,
return 0;
 }
 
+static void read_watch_extension(struct index_state *istate, uint8_t *data,
+unsigned long sz)
+{
+   int i;
+   if ((istate-cache_nr + 7) / 8 != sz) {
+   error(invalid 'WATC' extension);
+   return;
+   }
+   for (i = 0; i  istate-cache_nr; i++)
+   if (data[i / 8]  (1  (i % 8)))
+   istate-cache[i]-ce_flags |= CE_WATCHED;
+}
+
 static int read_index_extension(struct index_state *istate,
const char *ext, void *data, unsigned long sz)
 {
@@ -1299,6 +1313,9 @@ static int read_index_extension(struct index_state 
*istate,
case CACHE_EXT_RESOLVE_UNDO:
istate-resolve_undo = resolve_undo_read(data, sz);
break;
+   case CACHE_EXT_WATCH:
+   read_watch_extension(istate, data, sz);
+   break;
default:
if (*ext  'A' || 'Z'  *ext)
return error(index uses %.4s extension, which we do 
not understand,
@@ -1777,7 +1794,7 @@ int write_index(struct index_state *istate, int newfd)
 {
git_SHA_CTX c;
struct cache_header hdr;
-   int i, err, removed, extended, hdr_version;
+   int i, err, removed, extended, hdr_version, has_watches = 0;
struct cache_entry **cache = istate-cache;
int entries = istate-cache_nr;
struct stat st;
@@ -1786,6 +1803,8 @@ int write_index(struct index_state *istate, int newfd)
for (i = removed = extended = 0; i  entries; i++) {
if (cache[i]-ce_flags  CE_REMOVE)
removed++;
+   else if (cache[i]-ce_flags  CE_WATCHED)
+   has_watches++;
 
/* reduce extended entries if possible */
cache[i]-ce_flags = ~CE_EXTENDED;
@@ -1857,6 +1876,26 @@ int write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
+   if (has_watches) {
+   int id, sz = (entries - removed + 7) / 8;
+   uint8_t *data = xmalloc(sz);
+   memset(data, 0, sz);
+   for (i = 0, id = 0; i  entries  has_watches; i++) {
+   struct cache_entry *ce = cache[i];
+   if (ce-ce_flags  CE_REMOVE)
+   continue;
+   if (ce-ce_flags  

[PATCH v3 11/26] Clear CE_WATCHED when set CE_VALID alone

2014-02-02 Thread Nguyễn Thái Ngọc Duy
If CE_WATCHED is set, CE_VALID is controlled by CE_WATCHED and will be
cleared bfore writing to disk. Users of --assume-unchanged therefore
need to clear CE_WATCHED to avoid this side effect.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 builtin/update-index.c | 12 
 read-cache.c   |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index e3a10d7..9283fd6 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -50,9 +50,13 @@ static int mark_ce_flags(const char *path, int flag, int 
mark)
int namelen = strlen(path);
int pos = cache_name_pos(path, namelen);
if (0 = pos) {
-   if (mark)
-   active_cache[pos]-ce_flags |= flag;
-   else
+   if (mark) {
+   struct cache_entry *ce = active_cache[pos];
+   if (flag == CE_VALID)
+   ce-ce_flags = (ce-ce_flags  ~CE_WATCHED) | 
CE_VALID;
+   else
+   ce-ce_flags |= flag;
+   } else
active_cache[pos]-ce_flags = ~flag;
cache_tree_invalidate_path(active_cache_tree, path);
active_cache_changed = 1;
@@ -235,7 +239,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned 
char *sha1,
ce-ce_namelen = len;
ce-ce_mode = create_ce_mode(mode);
if (assume_unchanged)
-   ce-ce_flags |= CE_VALID;
+   ce-ce_flags = (ce-ce_flags  ~CE_WATCHED) | CE_VALID;
option = allow_add ? ADD_CACHE_OK_TO_ADD : 0;
option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0;
if (add_cache_entry(ce, option))
diff --git a/read-cache.c b/read-cache.c
index 098d3b6..8961864 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -133,7 +133,7 @@ void fill_stat_cache_info(struct cache_entry *ce, struct 
stat *st)
fill_stat_data(ce-ce_stat_data, st);
 
if (assume_unchanged)
-   ce-ce_flags |= CE_VALID;
+   ce-ce_flags = (ce-ce_flags  ~CE_WATCHED) | CE_VALID;
 
if (S_ISREG(st-st_mode))
ce_mark_uptodate(ce);
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 09/26] read-cache: save trailing sha-1

2014-02-02 Thread Nguyễn Thái Ngọc Duy
This will be used as signature to know if the index has changed.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 cache.h  | 1 +
 read-cache.c | 7 ---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/cache.h b/cache.h
index 7a836b1..f14d535 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 initialized : 1;
struct hash_table name_hash;
struct hash_table dir_hash;
+   unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index 33dd676..3b6daf1 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1269,10 +1269,11 @@ struct ondisk_cache_entry_extended {
ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
ondisk_cache_entry_size(ce_namelen(ce)))
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(struct cache_header *hdr,
+ unsigned long size,
+ unsigned char *sha1)
 {
git_SHA_CTX c;
-   unsigned char sha1[20];
int hdr_version;
 
if (hdr-hdr_signature != htonl(CACHE_SIGNATURE))
@@ -1461,7 +1462,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
close(fd);
 
hdr = mmap;
-   if (verify_hdr(hdr, mmap_size)  0)
+   if (verify_hdr(hdr, mmap_size, istate-sha1)  0)
goto unmap;
 
istate-version = ntohl(hdr-hdr_version);
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 12/26] read-cache: basic hand shaking to the file watcher

2014-02-02 Thread Nguyễn Thái Ngọc Duy
read_cache() connects to the file watcher, specified by
filewatcher.path config, and performs basic hand shaking. CE_WATCHED
is cleared if git and file watcher have different views on the index
state.

All send/receive calls must be complete within a limited time to avoid
a buggy file-watcher hang git status forever. And the whole point of
doing this is speed. If file watcher can't respond fast enough, for
whatever reason, then it should not be used.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/config.txt   |  10 +++
 Documentation/git-file-watcher.txt |   4 +-
 Makefile   |   1 +
 cache.h|   1 +
 file-watcher-lib.c (new)   |  91 ++
 file-watcher-lib.h (new)   |   6 ++
 file-watcher.c | 152 -
 read-cache.c   |   6 ++
 8 files changed, 269 insertions(+), 2 deletions(-)
 create mode 100644 file-watcher-lib.c
 create mode 100644 file-watcher-lib.h

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 5f4d793..6ad653a 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1042,6 +1042,16 @@ difftool.tool.cmd::
 difftool.prompt::
Prompt before each invocation of the diff tool.
 
+filewatcher.path::
+   The directory that contains the socket of `git  file-watcher`.
+   If it's not an absolute path, it's relative to $GIT_DIR. An
+   empty path means no connection to file watcher.
+
+filewatcher.timeout::
+   This is the maximum time in milliseconds that Git waits for
+   the file watcher to respond before giving up. Default value is
+   50. Setting to -1 makes Git wait forever.
+
 fetch.recurseSubmodules::
This option can be either set to a boolean value or to 'on-demand'.
Setting it to a boolean changes the behavior of fetch and pull to
diff --git a/Documentation/git-file-watcher.txt 
b/Documentation/git-file-watcher.txt
index ec81f18..d91caf3 100644
--- a/Documentation/git-file-watcher.txt
+++ b/Documentation/git-file-watcher.txt
@@ -14,7 +14,9 @@ DESCRIPTION
 ---
 This program watches file changes in a git working directory and let
 Git now what files have been changed so that Git does not have to call
-lstat(2) to detect that itself.
+lstat(2) to detect that itself. Config key filewatcher.path needs to
+be set to `socket directory` so Git knows how to contact to the file
+watcher.
 
 OPTIONS
 ---
diff --git a/Makefile b/Makefile
index 8eef0d6..1c4d659 100644
--- a/Makefile
+++ b/Makefile
@@ -798,6 +798,7 @@ LIB_OBJS += entry.o
 LIB_OBJS += environment.o
 LIB_OBJS += exec_cmd.o
 LIB_OBJS += fetch-pack.o
+LIB_OBJS += file-watcher-lib.o
 LIB_OBJS += fsck.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
diff --git a/cache.h b/cache.h
index a0af2a5..b3ea574 100644
--- a/cache.h
+++ b/cache.h
@@ -283,6 +283,7 @@ struct index_state {
struct hash_table name_hash;
struct hash_table dir_hash;
unsigned char sha1[20];
+   int watcher;
 };
 
 extern struct index_state the_index;
diff --git a/file-watcher-lib.c b/file-watcher-lib.c
new file mode 100644
index 000..d0636cc
--- /dev/null
+++ b/file-watcher-lib.c
@@ -0,0 +1,91 @@
+#include cache.h
+#include file-watcher-lib.h
+#include pkt-line.h
+#include unix-socket.h
+
+static char *watcher_path;
+static int WAIT_TIME = 50; /* in ms */
+
+static int connect_watcher(const char *path)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int fd;
+
+   if (!path || !*path)
+   return -1;
+
+   strbuf_addf(sb, %s/socket, path);
+   fd = unix_stream_connect(sb.buf);
+   strbuf_release(sb);
+   return fd;
+}
+
+static void reset_watches(struct index_state *istate, int disconnect)
+{
+   int i;
+   for (i = 0; i  istate-cache_nr; i++)
+   if (istate-cache[i]-ce_flags  CE_WATCHED) {
+   istate-cache[i]-ce_flags = ~(CE_WATCHED | CE_VALID);
+   istate-cache_changed = 1;
+   }
+   if (disconnect  istate-watcher  0) {
+   close(istate-watcher);
+   istate-watcher = -1;
+   }
+}
+
+static int watcher_config(const char *var, const char *value, void *data)
+{
+   if (!strcmp(var, filewatcher.path)) {
+   if (is_absolute_path(value))
+   watcher_path = xstrdup(value);
+   else if (*value == '~')
+   watcher_path = expand_user_path(value);
+   else
+   watcher_path = git_pathdup(%s, value);
+   return 0;
+   }
+   if (!strcmp(var, filewatcher.timeout)) {
+   WAIT_TIME = git_config_int(var, value);
+   return 0;
+   }
+   return 0;
+}
+
+void open_watcher(struct index_state *istate)
+{
+   static int read_config = 0;
+   char *msg;
+
+   if (!get_git_work_tree()) {
+  

[PATCH v3 14/26] read-cache: put some limits on file watching

2014-02-02 Thread Nguyễn Thái Ngọc Duy
watch_entries() is a lot of computation and could trigger a lot more
lookups in file-watcher. Normally after the first set of watches are
in place, we do not need to update often. Moreover if the number of
entries is small, the overhead of file watcher may actually slow git
down.

This patch only allows to update watches if the number of watchable
files is over a limit (and there are new files added if this is not
the first time). Measurements on Core i5-2520M and Linux 3.7.6, about
920 lstat() take 1ms. Somewhere between 2^16 and 2^17 lstat calls that
it starts to take longer than 100ms. 2^16 is chosen at the minimum
limit to start using file watcher.

Of course this is only sensible default for single-repo use
case. Lower it when you need to work with many small repos.

Recently updated files are not considered watchable because they are
likely to be updated again soon, not worth the ping-pong game with
file watcher. The default limit 10min is just a random value. Recent
limit is ignored if there are no watched files (e.g. a fresh clone, or
after a bad hand shake with file watcher).

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/config.txt |  9 +++
 Documentation/technical/index-format.txt |  3 +++
 cache.h  |  1 +
 file-watcher-lib.c   | 42 ++--
 read-cache.c | 11 ++---
 5 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 6ad653a..451c100 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1052,6 +1052,15 @@ filewatcher.timeout::
the file watcher to respond before giving up. Default value is
50. Setting to -1 makes Git wait forever.
 
+filewatcher.minfiles::
+   Start watching files if the number of watchable files are
+   above this limit. Default value is 65536.
+
+filewatcher.recentlimit::
+   Files that are last updated within filewatcher.recentlimit
+   seconds from now are not considered watchable. Default value
+   is 600 (5 minutes).
+
 fetch.recurseSubmodules::
This option can be either set to a boolean value or to 'on-demand'.
Setting it to a boolean changes the behavior of fetch and pull to
diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index 24fd0ae..7081e55 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -204,3 +204,6 @@ Git index format
 
   - A bit map of all entries in the index, n-th bit of m-th byte
 corresponds to CE_WATCHED of the m * 8+ n-th index entry.
+
+  - 1-byte, non-zero indicates the index should be scanned for new
+watched entries.
diff --git a/cache.h b/cache.h
index b3ea574..10ff33e 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
struct cache_tree *cache_tree;
struct cache_time timestamp;
unsigned name_hash_initialized : 1,
+update_watches : 1,
 initialized : 1;
struct hash_table name_hash;
struct hash_table dir_hash;
diff --git a/file-watcher-lib.c b/file-watcher-lib.c
index 791faae..d4949a5 100644
--- a/file-watcher-lib.c
+++ b/file-watcher-lib.c
@@ -5,6 +5,8 @@
 
 static char *watcher_path;
 static int WAIT_TIME = 50; /* in ms */
+static int watch_lowerlimit = 65536;
+static int recent_limit = 600;
 
 static int connect_watcher(const char *path)
 {
@@ -22,12 +24,17 @@ static int connect_watcher(const char *path)
 
 static void reset_watches(struct index_state *istate, int disconnect)
 {
-   int i;
+   int i, changed = 0;
for (i = 0; i  istate-cache_nr; i++)
if (istate-cache[i]-ce_flags  CE_WATCHED) {
istate-cache[i]-ce_flags = ~(CE_WATCHED | CE_VALID);
-   istate-cache_changed = 1;
+   changed = 1;
}
+   recent_limit = 0;
+   if (changed) {
+   istate-update_watches = 1;
+   istate-cache_changed = 1;
+   }
if (disconnect  istate-watcher  0) {
close(istate-watcher);
istate-watcher = -1;
@@ -49,6 +56,14 @@ static int watcher_config(const char *var, const char 
*value, void *data)
WAIT_TIME = git_config_int(var, value);
return 0;
}
+   if (!strcmp(var, filewatcher.minfiles)) {
+   watch_lowerlimit = git_config_int(var, value);
+   return 0;
+   }
+   if (!strcmp(var, filewatcher.recentlimit)) {
+   recent_limit = git_config_int(var, value);
+   return 0;
+   }
return 0;
 }
 
@@ -63,12 +78,18 @@ void open_watcher(struct index_state *istate)
}
 
if (!read_config) {
+   int i;
/*
 * can't hook into 

[PATCH v3 15/26] read-cache: get changed file list from file watcher

2014-02-02 Thread Nguyễn Thái Ngọc Duy
When some paths are watched, they are added to the watched list in
file watcher. When a path in this list is updated, the path is moved
to changed list and no longer watched.

With this patch we have a complete path exchanging picture between git
and file-watcher:

1) Hand shake

2) Get the list of changed paths, clear CE_WATCHED on these paths. Set
   CE_VALID on the remaining CE_WATCHED paths

3) (Optionally) Ask to watch more paths. Set CE_WATCHED on
   them. CE_VALID is not set so these are still lstat'd

4) Refresh as usual. lstat is skipped on CE_VALID paths. If one of
   those paths at step 3 are found modified, CE_WATCHED is removed.

5) Write index to disk. Notify file-watcher about new index
   signature. Ask file watcher to remove the changed paths.

A few points:

 - Changed list remains until step 5. If git crashes or do not write
   index down, next time it starts, it'll fed the same changed list.

 - If git writes index down without telling file-watcher about it,
   next time it starts, hand shake should fail and git should clear
   all CE_WATCHED.

 - There's a buffer between starting watch at #3 and saving watch at
   #5. We do verify paths are clean at #4. But that time all watches
   should have been active for a while. No chance for race conditions.

 - #5 is sort of atomic. If git crashes half way through step 5, file
   watcher should not update its index signature. Which means next
   time git starts, hand shake fails (because new index's written) so
   we'll start over.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 cache.h|   1 +
 file-watcher-lib.c |  99 ++
 file-watcher-lib.h |   1 +
 file-watcher.c | 138 +
 read-cache.c   |  21 +++-
 5 files changed, 258 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 10ff33e..9f7d952 100644
--- a/cache.h
+++ b/cache.h
@@ -285,6 +285,7 @@ struct index_state {
struct hash_table dir_hash;
unsigned char sha1[20];
int watcher;
+   struct string_list *updated_entries;
 };
 
 extern struct index_state the_index;
diff --git a/file-watcher-lib.c b/file-watcher-lib.c
index d4949a5..b6b0848 100644
--- a/file-watcher-lib.c
+++ b/file-watcher-lib.c
@@ -2,6 +2,7 @@
 #include file-watcher-lib.h
 #include pkt-line.h
 #include unix-socket.h
+#include string-list.h
 
 static char *watcher_path;
 static int WAIT_TIME = 50; /* in ms */
@@ -25,6 +26,11 @@ static int connect_watcher(const char *path)
 static void reset_watches(struct index_state *istate, int disconnect)
 {
int i, changed = 0;
+   if (istate-updated_entries) {
+   string_list_clear(istate-updated_entries, 0);
+   free(istate-updated_entries);
+   istate-updated_entries = NULL;
+   }
for (i = 0; i  istate-cache_nr; i++)
if (istate-cache[i]-ce_flags  CE_WATCHED) {
istate-cache[i]-ce_flags = ~(CE_WATCHED | CE_VALID);
@@ -41,6 +47,58 @@ static void reset_watches(struct index_state *istate, int 
disconnect)
}
 }
 
+static void mark_ce_valid(struct index_state *istate)
+{
+   struct strbuf sb = STRBUF_INIT;
+   char *line, *end;
+   int i, len;
+   unsigned long n;
+   if (packet_write_timeout(istate-watcher, WAIT_TIME, get-changed) = 
0 ||
+   !(line = packet_read_line_timeout(istate-watcher, WAIT_TIME, 
len)) ||
+   !starts_with(line, changed )) {
+   reset_watches(istate, 1);
+   return;
+   }
+   n = strtoul(line + 8, end, 10);
+   if (end != line + len) {
+   reset_watches(istate, 1);
+   return;
+   }
+   if (!n)
+   goto done;
+   strbuf_grow(sb, n);
+   if (read_in_full_timeout(istate-watcher, sb.buf, n, WAIT_TIME) != n) {
+   strbuf_release(sb);
+   reset_watches(istate, 1);
+   return;
+   }
+   line = sb.buf;
+   end = line + n;
+   for (; line  end; line += len + 1) {
+   len = strlen(line);
+   i = index_name_pos(istate, line, len);
+   if (i  0)
+   continue;
+   if (istate-cache[i]-ce_flags  CE_WATCHED) {
+   istate-cache[i]-ce_flags = ~CE_WATCHED;
+   istate-cache_changed = 1;
+   }
+   if (!istate-updated_entries) {
+   struct string_list *sl;
+   sl = xmalloc(sizeof(*sl));
+   memset(sl, 0, sizeof(*sl));
+   sl-strdup_strings = 1;
+   istate-updated_entries = sl;
+   }
+   string_list_append(istate-updated_entries, line);
+   }
+   strbuf_release(sb);
+done:
+   for (i = 0; i  istate-cache_nr; i++)
+   if (istate-cache[i]-ce_flags  CE_WATCHED)
+ 

[PATCH v3 16/26] git-compat-util.h: add inotify stubs on non-Linux platforms

2014-02-02 Thread Nguyễn Thái Ngọc Duy
This is to avoid spreading #ifdef HAVE_INOTIFY around and keep most
code compiled even if it's never active.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 config.mak.uname  |  1 +
 git-compat-util.h | 43 +++
 2 files changed, 44 insertions(+)

diff --git a/config.mak.uname b/config.mak.uname
index 7d31fad..ee548f5 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -33,6 +33,7 @@ ifeq ($(uname_S),Linux)
HAVE_PATHS_H = YesPlease
LIBC_CONTAINS_LIBINTL = YesPlease
HAVE_DEV_TTY = YesPlease
+   BASIC_CFLAGS += -DHAVE_INOTIFY
 endif
 ifeq ($(uname_S),GNU/kFreeBSD)
NO_STRLCPY = YesPlease
diff --git a/git-compat-util.h b/git-compat-util.h
index cbd86c3..8b55dd0 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -128,6 +128,9 @@
 #else
 #include poll.h
 #endif
+#ifdef HAVE_INOTIFY
+#include sys/inotify.h
+#endif
 
 #if defined(__MINGW32__)
 /* pull in Windows compatibility stuff */
@@ -721,4 +724,44 @@ void warn_on_inaccessible(const char *path);
 /* Get the passwd entry for the UID of the current process. */
 struct passwd *xgetpwuid_self(void);
 
+#ifndef HAVE_INOTIFY
+/* Keep inotify-specific code build, even if it's not used */
+
+#define IN_DELETE_SELF 1
+#define IN_MOVE_SELF   2
+#define IN_ATTRIB  4
+#define IN_DELETE  8
+#define IN_MODIFY  16
+#define IN_MOVED_FROM  32
+#define IN_MOVED_TO64
+#define IN_Q_OVERFLOW  128
+#define IN_UNMOUNT 256
+#define IN_CREATE  512
+#define IN_ISDIR   1024
+#define IN_IGNORED 2048
+
+struct inotify_event {
+   int event, mask, wd, len;
+   char name[FLEX_ARRAY];
+};
+
+static inline int inotify_init()
+{
+   errno = ENOSYS;
+   return -1;
+}
+
+static inline int inotify_add_watch(int fd, const char *path, int options)
+{
+   errno = ENOSYS;
+   return -1;
+}
+
+static inline int inotify_rm_watch(int fd, int wd)
+{
+   errno = ENOSYS;
+   return -1;
+}
+#endif
+
 #endif
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 13/26] read-cache: ask file watcher to watch files

2014-02-02 Thread Nguyễn Thái Ngọc Duy
We want to watch files that are never changed because lstat() on those
files is a wasted effort. So we sort unwatched files by date and start
adding them to the file watcher until it barfs (e.g. hits inotify
limit).

Note that we still do lstat() on these new watched files because they
could have changed before the file watcher could watch them. Watched
files may only skip lstat() at the next run.

Also, at this early in the index loading process, we don't know what
files are dirty and thus can skip watching (we do clear CE_WATCHED on
entries that are not verified clean before writing index). So the
watches are set, but git ignores its results. Maybe in future we could
store the list of dirty files in WATC extension and use it as a hint
to skip watching.

In the future, file watcher should figure out what paths are
watchable, what not (e.g. network filesystems) and reject them. For
now it's the user resposibility to set (or unset) filewatcher.path
properly.

The previous attempt sends paths in batch, 64k per pkt-line, then wait
for response. It's designed to stop short in case file watcher is out
of resources. But that's a rare case, and send/wait cycles increase
latency.

Instead we now send everything in one packet, and not in pkt-line to
avoid the 64k limit. Then we wait for the response. On webkit.git,
normal status -uno takes 0m1.138s. The sending 14M (of 182k paths)
takes 52ms extra. Previous approach takes 213ms extra. Of course in
the end, extra time is longer because file watcher is basically no-op
so far.

There is not much room for improvement. If we compress paths to reduce
payload, zlib time costs about 300ms (so how small the end result is
no longer matters). Even simple prefix compressing (index v4 style)
would cost 76ms on processing time alone (reducing payload to 3M).

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 file-watcher-lib.c | 83 +
 file-watcher-lib.h |  1 +
 file-watcher.c | 91 ++
 read-cache.c   | 18 +--
 4 files changed, 191 insertions(+), 2 deletions(-)

diff --git a/file-watcher-lib.c b/file-watcher-lib.c
index d0636cc..791faae 100644
--- a/file-watcher-lib.c
+++ b/file-watcher-lib.c
@@ -89,3 +89,86 @@ void open_watcher(struct index_state *istate)
return;
}
 }
+
+static int sort_by_date(const void *a_, const void *b_)
+{
+   const struct cache_entry *a = *(const struct cache_entry **)a_;
+   const struct cache_entry *b = *(const struct cache_entry **)b_;
+   uint32_t seca = a-ce_stat_data.sd_mtime.sec;
+   uint32_t secb = b-ce_stat_data.sd_mtime.sec;
+   return seca - secb;
+}
+
+static inline int ce_watchable(struct cache_entry *ce)
+{
+   return
+   !(ce-ce_flags  CE_WATCHED) 
+   !(ce-ce_flags  CE_VALID) 
+   /*
+* S_IFGITLINK should not be watched
+* obviously. S_IFLNK could be problematic because
+* inotify may follow symlinks without IN_DONT_FOLLOW
+*/
+   S_ISREG(ce-ce_mode);
+}
+
+static void send_watches(struct index_state *istate,
+struct cache_entry **sorted, int nr)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int i, len = 0;
+
+   for (i = 0; i  nr; i++)
+   len += ce_namelen(sorted[i]) + 1;
+
+   if (packet_write_timeout(istate-watcher, WAIT_TIME, watch %d, len) 
= 0)
+   return;
+
+   strbuf_grow(sb, len);
+   for (i = 0; i  nr; i++)
+   strbuf_add(sb, sorted[i]-name, ce_namelen(sorted[i]) + 1);
+
+   if (write_in_full_timeout(istate-watcher, sb.buf,
+ sb.len, WAIT_TIME) != sb.len) {
+   strbuf_release(sb);
+   return;
+   }
+   strbuf_release(sb);
+
+   for (;;) {
+   char *line, *end;
+   unsigned long n;
+
+   if (!(line = packet_read_line_timeout(istate-watcher,
+ WAIT_TIME, len)))
+   return;
+   if (starts_with(line, watching ))
+   continue;
+   if (!starts_with(line, watched ))
+   return;
+   n = strtoul(line + 8, end, 10);
+   for (i = 0; i  n; i++)
+   sorted[i]-ce_flags |= CE_WATCHED;
+   istate-cache_changed = 1;
+   break;
+   }
+}
+
+void watch_entries(struct index_state *istate)
+{
+   int i, nr;
+   struct cache_entry **sorted;
+
+   if (istate-watcher = 0)
+   return;
+   for (i = nr = 0; i  istate-cache_nr; i++)
+   if (ce_watchable(istate-cache[i]))
+   nr++;
+   sorted = xmalloc(sizeof(*sorted) * nr);
+   for (i = nr = 0; i  istate-cache_nr; i++)
+   if (ce_watchable(istate-cache[i]))
+  

[PATCH v3 20/26] read-cache: new variable to verify file-watcher results

2014-02-02 Thread Nguyễn Thái Ngọc Duy
If GIT_TEST_WATCHED is set to a non-zero value, Git still uses file
watcher if configured. But it does lstat() anyway and notifies the
user if a file is changed but the file watcher said otherwise.

Note that there is a race condition. Changed paths are retrieved at
time X, then refresh and validation at time Y. Even if X and Y are
very close, an update can happen between X and Y, causing a false
report.

If GIT_TEST_WATCHED is set greater than 1, git will abort instead of
just warn and move on.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/git-file-watcher.txt |  8 
 cache.h|  5 -
 read-cache.c   | 17 +
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/Documentation/git-file-watcher.txt 
b/Documentation/git-file-watcher.txt
index d694fea..dd09e30 100644
--- a/Documentation/git-file-watcher.txt
+++ b/Documentation/git-file-watcher.txt
@@ -25,6 +25,14 @@ OPTIONS
 --detach::
Run in background.
 
+TROUBLESHOOTING
+---
+Setting environment variable `GIT_TEST_WATCHED` to a non-zero number
+makes Git communicate with the file watcher, but do lstat anyway to
+verify that the file watcher results. Setting to 1 prints warning when
+file watcher fails to monitor files correctly. Setting to 2 aborts Git
+when it happens.
+
 BUGS
 
 On Linux, file watcher may fail to detect changes if you move the work
diff --git a/cache.h b/cache.h
index c229bf9..806c886 100644
--- a/cache.h
+++ b/cache.h
@@ -224,7 +224,10 @@ static inline unsigned create_ce_flags(unsigned stage)
 #define ce_mark_uptodate(ce) ((ce)-ce_flags |= CE_UPTODATE)
 static inline int ce_valid(const struct cache_entry *ce)
 {
-   return ce-ce_flags  CE_VALID;
+   extern int test_watched;
+   if (!test_watched)
+   return ce-ce_flags  CE_VALID;
+   return (ce-ce_flags  CE_VALID)  !(ce-ce_flags  CE_WATCHED);
 }
 
 #define ce_permissions(mode) (((mode)  0100) ? 0755 : 0644)
diff --git a/read-cache.c b/read-cache.c
index 95c9ccb..d5f084a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -37,6 +37,7 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce, int reall
 #define CACHE_EXT_WATCH 0x57415443   /* WATC */
 
 struct index_state the_index;
+int test_watched;
 
 static void set_index_entry(struct index_state *istate, int nr, struct 
cache_entry *ce)
 {
@@ -1117,6 +1118,16 @@ static void show_file(const char * fmt, const char * 
name, int in_porcelain,
printf(fmt, name);
 }
 
+static void report_bad_watcher(struct index_state *istate,
+  struct cache_entry *ce)
+{
+   if (test_watched  1)
+   die(%s is updated but file-watcher said no,
+   ce-name);
+   warning(%s is updated but file-watcher said no,
+   ce-name);
+}
+
 int refresh_index(struct index_state *istate, unsigned int flags,
  const struct pathspec *pathspec,
  char *seen, const char *header_msg)
@@ -1188,6 +1199,9 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
ce-ce_flags = ~CE_VALID;
istate-cache_changed = 1;
}
+   if (test_watched 
+   (ce-ce_flags  CE_WATCHED)  (ce-ce_flags  
CE_VALID))
+   report_bad_watcher(istate, ce);
if (quiet)
continue;
 
@@ -1460,6 +1474,9 @@ int read_index_from(struct index_state *istate, const 
char *path)
if (istate-initialized)
return istate-cache_nr;
 
+   if (getenv(GIT_TEST_WATCHED))
+   test_watched = atoi(getenv(GIT_TEST_WATCHED));
+
istate-timestamp.sec = 0;
istate-timestamp.nsec = 0;
fd = open(path, O_RDONLY);
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 17/26] file-watcher: inotify support, watching part

2014-02-02 Thread Nguyễn Thái Ngọc Duy
struct dir manages inotify file descriptor and forms a tree. struct
file manages a file. When a file is watched, all dirs up to the file
is watched. Any changes on a directory impacts all subdirs and files.

The way data structure is made might be inotify-specific. I haven't
thought of how other file notification mechanisms may be
implemented. So there may be some refactoring later when a new OS is
supported.

Room for improvement: consecutive watched paths likely share the same
directory part (even though they are sorted by mtime, not name). Try
remember the last dir sequence and reduce lookups.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/git-file-watcher.txt |  13 +++
 file-watcher.c | 226 -
 2 files changed, 238 insertions(+), 1 deletion(-)

diff --git a/Documentation/git-file-watcher.txt 
b/Documentation/git-file-watcher.txt
index d91caf3..d694fea 100644
--- a/Documentation/git-file-watcher.txt
+++ b/Documentation/git-file-watcher.txt
@@ -18,11 +18,24 @@ lstat(2) to detect that itself. Config key filewatcher.path 
needs to
 be set to `socket directory` so Git knows how to contact to the file
 watcher.
 
+This program is only supported under Linux with inotify(7) support.
+
 OPTIONS
 ---
 --detach::
Run in background.
 
+BUGS
+
+On Linux, file watcher may fail to detect changes if you move the work
+tree from outside. For example if you have work tree at
+`/tmp/foo/work`, you move `/tmp/foo` to `/tmp/bar`, make some changes
+in `/tmp/bar/work` and move `/tmp/bar` back to `/tmp/foo`, changes
+won't get noticed. Moving `/tmp/foo/work` to something else is fine.
+
+inotify may not work well with network filesystems and a few special
+others. Check inotify documents.
+
 SEE ALSO
 
 linkgit:git-update-index[1],
diff --git a/file-watcher.c b/file-watcher.c
index aa2daf6..d0762e6 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -11,6 +11,28 @@ static const char *const file_watcher_usage[] = {
NULL
 };
 
+struct dir;
+struct repository;
+
+struct file {
+   char *name;
+   struct dir *parent;
+   struct repository *repo;
+   struct file *next;
+};
+
+struct dir {
+   char *name;
+   struct dir *parent;
+   struct dir **subdirs;
+   struct file **files;
+   struct repository *repo; /* only for root note */
+   int wd, nr_subdirs, nr_files;
+};
+
+static struct dir **wds;
+static int wds_alloc;
+
 struct repository {
char *work_tree;
char index_signature[41];
@@ -25,6 +47,7 @@ struct repository {
struct string_list updated;
int updated_sorted;
int updating;
+   struct dir *root;
 };
 
 const char *invalid_signature = ;
@@ -42,10 +65,199 @@ struct connection {
 static struct connection **conns;
 static struct pollfd *pfd;
 static int conns_alloc, pfd_nr, pfd_alloc;
+static int inotify_fd;
+
+/*
+ * IN_DONT_FOLLOW does not matter now as we do not monitor
+ * symlinks. See ce_watchable().
+ */
+#define INOTIFY_MASKS (IN_DELETE_SELF | IN_MOVE_SELF | \
+  IN_CREATE | IN_ATTRIB | IN_DELETE | IN_MODIFY |  \
+  IN_MOVED_FROM | IN_MOVED_TO)
+static struct dir *create_dir(struct dir *parent, const char *path,
+ const char *basename)
+{
+   struct dir *d;
+   int wd = inotify_add_watch(inotify_fd, path, INOTIFY_MASKS);
+   if (wd  0)
+   return NULL;
+
+   d = xmalloc(sizeof(*d));
+   memset(d, 0, sizeof(*d));
+   d-wd = wd;
+   d-parent = parent;
+   d-name = xstrdup(basename);
+
+   ALLOC_GROW(wds, wd + 1, wds_alloc);
+   wds[wd] = d;
+   return d;
+}
+
+static int get_dir_pos(struct dir *d, const char *name)
+{
+   int first, last;
+
+   first = 0;
+   last = d-nr_subdirs;
+   while (last  first) {
+   int next = (last + first)  1;
+   int cmp = strcmp(name, d-subdirs[next]-name);
+   if (!cmp)
+   return next;
+   if (cmp  0) {
+   last = next;
+   continue;
+   }
+   first = next+1;
+   }
+
+   return -first-1;
+}
+
+static void free_file(struct dir *d, int pos, int topdown);
+static void free_dir(struct dir *d, int topdown)
+{
+   struct dir *p = d-parent;
+   int pos;
+   if (!topdown  p  (pos = get_dir_pos(p, d-name))  0)
+   die(How come this directory is not registered in its parent?);
+   if (d-repo)
+   d-repo-root = NULL;
+   wds[d-wd] = NULL;
+   inotify_rm_watch(inotify_fd, d-wd);
+   if (topdown) {
+   int i;
+   for (i = 0; i  d-nr_subdirs; i++)
+   free_dir(d-subdirs[i], topdown);
+   for (i = 0; i  d-nr_files; i++)
+   free_file(d, i, topdown);
+   }
+   

[PATCH v3 18/26] file-watcher: inotify support, notification part

2014-02-02 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 file-watcher.c | 142 -
 1 file changed, 141 insertions(+), 1 deletion(-)

diff --git a/file-watcher.c b/file-watcher.c
index d0762e6..5867942 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -260,6 +260,131 @@ static int watch_path(struct repository *repo, char *path)
return 0;
 }
 
+static inline void queue_file_changed(struct file *f, struct strbuf *sb)
+{
+   int len = sb-len;
+   strbuf_addf(sb, %s%s, f-parent-parent ? / : , f-name);
+   string_list_append(f-repo-updated, sb-buf);
+   f-repo-updated_sorted = 0;
+   strbuf_setlen(sb, len);
+}
+
+static void construct_path(struct dir *d, struct strbuf *sb)
+{
+   if (!d-parent)
+   return;
+   if (!d-parent-parent) {
+   strbuf_addstr(sb, d-name);
+   return;
+   }
+   construct_path(d-parent, sb);
+   strbuf_addf(sb, /%s, d-name);
+}
+
+static void file_changed(const struct inotify_event *event,
+struct dir *d, int pos)
+{
+   struct strbuf sb = STRBUF_INIT;
+   construct_path(d, sb);
+   queue_file_changed(d-files[pos], sb);
+   strbuf_release(sb);
+   free_file(d, pos, 0);
+}
+
+static void dir_changed(const struct inotify_event *event, struct dir *d,
+   const char *base)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int i;
+
+   if (!base)  /* top call - base == NULL */
+   construct_path(d, sb);
+   else {
+   strbuf_addstr(sb, base);
+   if (sb.len)
+   strbuf_addch(sb, '/');
+   strbuf_addstr(sb, d-name);
+   }
+
+   for (i = 0; i  d-nr_files; i++)
+   queue_file_changed(d-files[i], sb);
+   for (i = 0; i  d-nr_subdirs; i++) {
+   dir_changed(event, d-subdirs[i], sb.buf);
+   if (!base)
+   free_dir(d-subdirs[i], 1);
+   }
+   strbuf_release(sb);
+   if (!base)
+   free_dir(d, 0);
+}
+
+static void reset_repo(struct repository *repo, ino_t inode);
+static int do_handle_inotify(const struct inotify_event *event)
+{
+   struct dir *d;
+   int pos;
+
+   if (event-mask  (IN_Q_OVERFLOW | IN_UNMOUNT)) {
+   int i;
+   for (i = 0; i  nr_repos; i++)
+   reset_repo(repos[i], 0);
+   return 0;
+   }
+
+   if ((event-mask  IN_IGNORED) ||
+   /*
+* Perhaps left over events that we have not consumed
+* before the watch descriptor is removed.
+*/
+   event-wd = wds_alloc || wds[event-wd] == NULL)
+   return 0;
+
+   d = wds[event-wd];
+
+   /*
+* If something happened to the watched directory, consider
+* everything inside modified
+*/
+   if (event-mask  (IN_DELETE_SELF | IN_MOVE_SELF)) {
+   dir_changed(event, d, NULL);
+   return 0;
+   }
+
+   if (!(event-mask  IN_ISDIR)) {
+   pos = get_file_pos(d, event-name);
+   if (pos = 0)
+   file_changed(event, d, pos);
+   }
+
+   return 0;
+}
+
+static int handle_inotify(int fd)
+{
+   static char *buf;
+   static unsigned int buf_len = 0;
+   unsigned int avail, offset;
+   int ret, len;
+
+   /* drain the event queue */
+   if (ioctl(fd, FIONREAD, avail))
+   die_errno(unable to FIONREAD inotify handle);
+   if (buf_len  avail) {
+   buf = xrealloc(buf, avail);
+   buf_len = avail;
+   }
+   len = read(fd, buf, avail);
+   if (len = 0)
+   return -1;
+   ret = offset = 0;
+   while (offset  len) {
+   struct inotify_event *event = (void *)(buf + offset);
+   ret += do_handle_inotify(event);
+   offset += sizeof(struct inotify_event) + event-len;
+   }
+   return ret;
+}
+
 static void get_changed_list(int conn_id)
 {
struct strbuf sb = STRBUF_INIT;
@@ -466,6 +591,12 @@ static int handle_command(int conn_id)
 * capabilities. Capabilities in uppercase MUST be
 * supported. If any side does not understand any of the
 * advertised uppercase capabilities, it must disconnect.
+*
+* The way the main event loop is structured, we should get at
+* least one handle_inotify() before receiving the next
+* command. And handle_inotify() should process all events by
+* this point of time. This guarantees our reports won't miss
+* anything by the time get-changed is called.
 */
if ((arg = skip_prefix(msg, hello))) {
if (*arg) { /* no capabilities supported yet */
@@ -753,11 +884,15 @@ int main(int argc, const char **argv)
close(err);
}
 
-   

[PATCH v3 26/26] Disable file-watcher with system inotify on some tests

2014-02-02 Thread Nguyễn Thái Ngọc Duy
When file watcher is active via GIT_TEST_WATCHER_PATH, `ls-files -v`
output may be different (e.g. H becomes W). Disable file watcher
in those cases.

We could make ls-files turn 'W' back to 'H' for these cases, but not
sure it's worth the effort. The intention of running with
GIT_TEST_WATCHER_PATH is to exercise file watcher in somewhat real
scenarios and the remaining tests are more than enough for that.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 t/t1011-read-tree-sparse-checkout.sh  | 2 ++
 t/t2104-update-index-skip-worktree.sh | 2 ++
 t/t7011-skip-worktree-reading.sh  | 2 ++
 t/t7012-skip-worktree-writing.sh  | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/t/t1011-read-tree-sparse-checkout.sh 
b/t/t1011-read-tree-sparse-checkout.sh
index 0c74bee..00e92bc 100755
--- a/t/t1011-read-tree-sparse-checkout.sh
+++ b/t/t1011-read-tree-sparse-checkout.sh
@@ -14,6 +14,8 @@ test_description='sparse checkout tests
 . ./test-lib.sh
 . $TEST_DIRECTORY/lib-read-tree.sh
 
+unset GIT_TEST_WATCHER_PATH
+
 test_expect_success 'setup' '
cat expected -\EOF 
100644 77f0ba1734ed79d12881f81b36ee134de6a3327b 0   init.t
diff --git a/t/t2104-update-index-skip-worktree.sh 
b/t/t2104-update-index-skip-worktree.sh
index 1d0879b..2b93c58 100755
--- a/t/t2104-update-index-skip-worktree.sh
+++ b/t/t2104-update-index-skip-worktree.sh
@@ -7,6 +7,8 @@ test_description='skip-worktree bit test'
 
 . ./test-lib.sh
 
+unset GIT_TEST_WATCHER_PATH
+
 cat expect.full EOF
 H 1
 H 2
diff --git a/t/t7011-skip-worktree-reading.sh b/t/t7011-skip-worktree-reading.sh
index 88d60c1..2242718 100755
--- a/t/t7011-skip-worktree-reading.sh
+++ b/t/t7011-skip-worktree-reading.sh
@@ -7,6 +7,8 @@ test_description='skip-worktree bit test'
 
 . ./test-lib.sh
 
+unset GIT_TEST_WATCHER_PATH
+
 cat expect.full EOF
 H 1
 H 2
diff --git a/t/t7012-skip-worktree-writing.sh b/t/t7012-skip-worktree-writing.sh
index 9ceaa40..c8e0eb5 100755
--- a/t/t7012-skip-worktree-writing.sh
+++ b/t/t7012-skip-worktree-writing.sh
@@ -7,6 +7,8 @@ test_description='test worktree writing operations when 
skip-worktree is used'
 
 . ./test-lib.sh
 
+unset GIT_TEST_WATCHER_PATH
+
 test_expect_success 'setup' '
test_commit init 
echo modified  init.t 
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 19/26] Wrap CE_VALID test with ce_valid()

2014-02-02 Thread Nguyễn Thái Ngọc Duy
The next patch wants to ignore CE_VALID under some condition but not
really clears it. Centralizing its access makes such a change easier.

Not all ce_flags  CE_VALID is converted though. The tests that
really mean _bit_ CE_VALID remains so. The tests that mean ignore
worktree are converted.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 builtin/grep.c | 2 +-
 cache.h| 4 
 diff-lib.c | 4 ++--
 diff.c | 2 +-
 read-cache.c   | 6 +++---
 unpack-trees.c | 2 +-
 6 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 63f8603..00526a1 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -386,7 +386,7 @@ static int grep_cache(struct grep_opt *opt, const struct 
pathspec *pathspec, int
 * are identical, even if worktree file has been modified, so 
use
 * cache version instead
 */
-   if (cached || (ce-ce_flags  CE_VALID) || 
ce_skip_worktree(ce)) {
+   if (cached || ce_valid(ce) || ce_skip_worktree(ce)) {
if (ce_stage(ce))
continue;
hit |= grep_sha1(opt, ce-sha1, ce-name, 0, ce-name);
diff --git a/cache.h b/cache.h
index 9f7d952..c229bf9 100644
--- a/cache.h
+++ b/cache.h
@@ -222,6 +222,10 @@ static inline unsigned create_ce_flags(unsigned stage)
 #define ce_uptodate(ce) ((ce)-ce_flags  CE_UPTODATE)
 #define ce_skip_worktree(ce) ((ce)-ce_flags  CE_SKIP_WORKTREE)
 #define ce_mark_uptodate(ce) ((ce)-ce_flags |= CE_UPTODATE)
+static inline int ce_valid(const struct cache_entry *ce)
+{
+   return ce-ce_flags  CE_VALID;
+}
 
 #define ce_permissions(mode) (((mode)  0100) ? 0755 : 0644)
 static inline unsigned int create_ce_mode(unsigned int mode)
diff --git a/diff-lib.c b/diff-lib.c
index 346cac6..dcda7f3 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -198,7 +198,7 @@ int run_diff_files(struct rev_info *revs, unsigned int 
option)
continue;
 
/* If CE_VALID is set, don't look at workdir for file removal */
-   changed = (ce-ce_flags  CE_VALID) ? 0 : check_removed(ce, 
st);
+   changed = ce_valid(ce) ? 0 : check_removed(ce, st);
if (changed) {
if (changed  0) {
perror(ce-name);
@@ -369,7 +369,7 @@ static void do_oneway_diff(struct unpack_trees_options *o,
 
/* if the entry is not checked out, don't examine work tree */
cached = o-index_only ||
-   (idx  ((idx-ce_flags  CE_VALID) || ce_skip_worktree(idx)));
+   (idx  (ce_valid(idx) || ce_skip_worktree(idx)));
/*
 * Backward compatibility wart - diff-index -m does
 * not mean do not ignore merges, but match_missing.
diff --git a/diff.c b/diff.c
index 8e4a6a9..22c73fe 100644
--- a/diff.c
+++ b/diff.c
@@ -2636,7 +2636,7 @@ static int reuse_worktree_file(const char *name, const 
unsigned char *sha1, int
 * If ce is marked as assume unchanged, there is no
 * guarantee that work tree matches what we are looking for.
 */
-   if ((ce-ce_flags  CE_VALID) || ce_skip_worktree(ce))
+   if (ce_valid(ce) || ce_skip_worktree(ce))
return 0;
 
/*
diff --git a/read-cache.c b/read-cache.c
index 5540b06..95c9ccb 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -289,8 +289,8 @@ int ie_match_stat(const struct index_state *istate,
 */
if (!ignore_skip_worktree  ce_skip_worktree(ce))
return 0;
-   if (!ignore_valid  (ce-ce_flags  CE_VALID))
-   return 0;
+   if (!ignore_valid  ce_valid(ce))
+   return 0;
 
/*
 * Intent-to-add entries have not been added, so the index entry
@@ -1047,7 +1047,7 @@ static struct cache_entry *refresh_cache_ent(struct 
index_state *istate,
ce_mark_uptodate(ce);
return ce;
}
-   if (!ignore_valid  (ce-ce_flags  CE_VALID)) {
+   if (!ignore_valid  ce_valid(ce)) {
ce_mark_uptodate(ce);
return ce;
}
diff --git a/unpack-trees.c b/unpack-trees.c
index 164354d..61c3f35 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1215,7 +1215,7 @@ static int verify_uptodate_1(const struct cache_entry *ce,
 * if this entry is truly up-to-date because this file may be
 * overwritten.
 */
-   if ((ce-ce_flags  CE_VALID) || ce_skip_worktree(ce))
+   if (ce_valid(ce) || ce_skip_worktree(ce))
; /* keep checking */
else if (o-reset || ce_uptodate(ce))
return 0;
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 25/26] file-watcher: tests for the client side

2014-02-02 Thread Nguyễn Thái Ngọc Duy
Similar to t7513, system inotify is taken out to give us better
controlled environment.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 file-watcher-lib.c   |  21 ++--
 t/t7514-file-watcher-lib.sh (new +x) | 190 +++
 test-file-watcher.c  |  15 +++
 3 files changed, 219 insertions(+), 7 deletions(-)
 create mode 100755 t/t7514-file-watcher-lib.sh

diff --git a/file-watcher-lib.c b/file-watcher-lib.c
index 93afb52..8cc3b73 100644
--- a/file-watcher-lib.c
+++ b/file-watcher-lib.c
@@ -23,7 +23,8 @@ static int connect_watcher(const char *path)
return fd;
 }
 
-static void reset_watches(struct index_state *istate, int disconnect)
+static void reset_watches(struct index_state *istate, int disconnect,
+ const char *reason)
 {
int i, changed = 0;
if (istate-updated_entries) {
@@ -45,6 +46,8 @@ static void reset_watches(struct index_state *istate, int 
disconnect)
close(istate-watcher);
istate-watcher = -1;
}
+   trace_printf_key(GIT_TRACE_WATCHER, reset%s: %s\n,
+disconnect ? /disconnect : , reason);
 }
 
 static void mark_ce_valid(struct index_state *istate)
@@ -53,15 +56,16 @@ static void mark_ce_valid(struct index_state *istate)
char *line, *end;
int i, len;
unsigned long n;
+   trace_printf_key(GIT_TRACE_WATCHER, mark_ce_valid\n);
if (packet_write_timeout(istate-watcher, WAIT_TIME, get-changed) = 
0 ||
!(line = packet_read_line_timeout(istate-watcher, WAIT_TIME, 
len)) ||
!starts_with(line, changed )) {
-   reset_watches(istate, 1);
+   reset_watches(istate, 1, invalid get-changed response);
return;
}
n = strtoul(line + 8, end, 10);
if (end != line + len) {
-   reset_watches(istate, 1);
+   reset_watches(istate, 1, invalid get-changed response);
return;
}
if (!n)
@@ -69,7 +73,7 @@ static void mark_ce_valid(struct index_state *istate)
strbuf_grow(sb, n);
if (read_in_full_timeout(istate-watcher, sb.buf, n, WAIT_TIME) != n) {
strbuf_release(sb);
-   reset_watches(istate, 1);
+   reset_watches(istate, 1, invalid get-changed payload);
return;
}
line = sb.buf;
@@ -131,7 +135,7 @@ void open_watcher(struct index_state *istate)
char *msg;
 
if (!get_git_work_tree()) {
-   reset_watches(istate, 1);
+   reset_watches(istate, 1, no worktree);
return;
}
 
@@ -165,10 +169,11 @@ void open_watcher(struct index_state *istate)
}
 
istate-watcher = connect_watcher(watcher_path);
+   trace_printf_key(GIT_TRACE_WATCHER, open watcher %d\n, 
istate-watcher);
if (packet_write_timeout(istate-watcher, WAIT_TIME, hello) = 0 ||
(msg = packet_read_line_timeout(istate-watcher, WAIT_TIME, NULL)) 
== NULL ||
strcmp(msg, hello)) {
-   reset_watches(istate, 1);
+   reset_watches(istate, 1, invalid hello response);
return;
}
 
@@ -177,7 +182,7 @@ void open_watcher(struct index_state *istate)
 get_git_work_tree()) = 0 ||
(msg = packet_read_line_timeout(istate-watcher, WAIT_TIME, NULL)) 
== NULL ||
strcmp(msg, ok)) {
-   reset_watches(istate, 0);
+   reset_watches(istate, 0, inconsistent);
istate-update_watches = 1;
return;
}
@@ -265,6 +270,7 @@ void watch_entries(struct index_state *istate)
nr++;
if (nr  watch_lowerlimit)
return;
+   trace_printf_key(GIT_TRACE_WATCHER, watch %d\n, nr);
sorted = xmalloc(sizeof(*sorted) * nr);
for (i = nr = 0; i  istate-cache_nr; i++)
if (ce_watchable(istate-cache[i], now))
@@ -280,6 +286,7 @@ void close_watcher(struct index_state *istate, const 
unsigned char *sha1)
int len, i, nr;
if (istate-watcher = 0)
return;
+   trace_printf_key(GIT_TRACE_WATCHER, close watcher\n);
if (packet_write_timeout(istate-watcher, WAIT_TIME,
 new-index %s, sha1_to_hex(sha1)) = 0)
goto done;
diff --git a/t/t7514-file-watcher-lib.sh b/t/t7514-file-watcher-lib.sh
new file mode 100755
index 000..8dabb13
--- /dev/null
+++ b/t/t7514-file-watcher-lib.sh
@@ -0,0 +1,190 @@
+#!/bin/sh
+
+test_description='File watcher daemon and client tests'
+
+. ./test-lib.sh
+
+if git file-watcher --check-support  test_have_prereq POSIXPERM; then
+   : # good
+else
+   skip_all=file-watcher not supported on this system
+   test_done
+fi
+
+kill_it() {
+   test-file-watcher $1 EOF /dev/null
+die
+see you
+EOF
+}
+
+GIT_TEST_WATCHER=2

[PATCH v3 24/26] ls-files: print CE_WATCHED as W (or w with CE_VALID)

2014-02-02 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/git-ls-files.txt |  1 +
 builtin/ls-files.c | 14 --
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index c0856a6..bdb17a5 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -123,6 +123,7 @@ a space) at the start of each line:
R:: removed/deleted
C:: modified/changed
K:: to be killed
+   W:: being watched by `git file-watcher`
?:: other
 
 -v::
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index e1cf6d8..f1f7c07 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -46,6 +46,7 @@ static const char *tag_killed = ;
 static const char *tag_modified = ;
 static const char *tag_skip_worktree = ;
 static const char *tag_resolve_undo = ;
+static const char *tag_watched = ;
 
 static void write_name(const char *name)
 {
@@ -231,6 +232,7 @@ static void show_files(struct dir_struct *dir)
if (show_cached || show_stage) {
for (i = 0; i  active_nr; i++) {
const struct cache_entry *ce = active_cache[i];
+   const char *tag;
if ((dir-flags  DIR_SHOW_IGNORED) 
!ce_excluded(dir, ce))
continue;
@@ -238,8 +240,15 @@ static void show_files(struct dir_struct *dir)
continue;
if (ce-ce_flags  CE_UPDATE)
continue;
-   show_ce_entry(ce_stage(ce) ? tag_unmerged :
-   (ce_skip_worktree(ce) ? tag_skip_worktree : 
tag_cached), ce);
+   if (ce_stage(ce))
+   tag = tag_unmerged;
+   else if (ce_skip_worktree(ce))
+   tag = tag_skip_worktree;
+   else if (ce-ce_flags  CE_WATCHED)
+   tag = tag_watched;
+   else
+   tag = tag_cached;
+   show_ce_entry(tag, ce);
}
}
if (show_deleted || show_modified) {
@@ -530,6 +539,7 @@ int cmd_ls_files(int argc, const char **argv, const char 
*cmd_prefix)
tag_killed = K ;
tag_skip_worktree = S ;
tag_resolve_undo = U ;
+   tag_watched = W ;
}
if (show_modified || show_others || show_deleted || (dir.flags  
DIR_SHOW_IGNORED) || show_killed)
require_work_tree = 1;
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 22/26] file-watcher: quit if $WATCHER/socket is gone

2014-02-02 Thread Nguyễn Thái Ngọc Duy
This is more of an issue in development than in production. When a
file-watcher related test fails, the daemon may be left hanging. When
you rerun the same test, old $TRASH_DIRECTORY is wiped out and no one
can communicate with the old daemon any more. Make the old daemon quit
after 5 minutes in such cases.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 file-watcher.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/file-watcher.c b/file-watcher.c
index 5867942..1e45b25 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -834,11 +834,22 @@ static void check_socket_directory(const char *path)
free(path_copy);
 }
 
+static void run_housekeeping(void)
+{
+   struct stat st;
+   struct strbuf sb = STRBUF_INIT;
+   strbuf_addf(sb, %s/socket, socket_path);
+   if (stat(sb.buf, st) || !S_ISSOCK(st.st_mode))
+   exit(0);
+   strbuf_release(sb);
+}
+
 int main(int argc, const char **argv)
 {
struct strbuf sb = STRBUF_INIT;
int i, new_nr, fd, quit = 0, nr_common;
int daemon = 0;
+   time_t last_checked;
struct option options[] = {
OPT_BOOL(0, detach, daemon,
 N_(run in background)),
@@ -894,19 +905,25 @@ int main(int argc, const char **argv)
pfd[1].events = POLLIN;
}
 
+   last_checked = time(NULL);
while (!quit) {
-   if (poll(pfd, pfd_nr, -1)  0) {
+   int ret = poll(pfd, pfd_nr, 30);
+   int time_for_housekeeping = 0;
+   if (ret  0) {
if (errno != EINTR) {
error(Poll failed, resuming: %s,
  strerror(errno));
sleep(1);
}
continue;
-   }
+   } else if (ret == 0)
+   time_for_housekeeping = 1;
 
if (inotify_fd  (pfd[1].revents  POLLIN)) {
if (handle_inotify(inotify_fd))
break;
+   if (last_checked + 300  time(NULL))
+   time_for_housekeeping = 1;
}
 
for (new_nr = i = nr_common; i  pfd_nr; i++) {
@@ -949,6 +966,11 @@ int main(int argc, const char **argv)
accept_connection(pfd[0].fd);
if (pfd[0].revents  (POLLHUP | POLLERR | POLLNVAL))
die(_(error on listening socket));
+
+   if (time_for_housekeeping) {
+   run_housekeeping();
+   last_checked = time(NULL);
+   }
}
return 0;
 }
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 21/26] Support running file watcher with the test suite

2014-02-02 Thread Nguyễn Thái Ngọc Duy
This is to force running the test suite with file-watcher with

  $ mkdir /tmp/watcher
  $ chmod 700 /tmp/watcher
  $ git-file-watcher /tmp/watcher/

then open another terminal and run

  $ export GIT_TEST_WATCHED=2 GIT_TEST_WATCHER=2
  $ export GIT_TEST_WATCHER_PATH=/tmp/watcher
  $ make test

TIME_WAIT set set to unlimited by GIT_TEST_WATCHER=2 so the test suite
could hang up indefinitely due to a file-watcher bug. Luckily
everything passes.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 file-watcher-lib.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/file-watcher-lib.c b/file-watcher-lib.c
index b6b0848..93afb52 100644
--- a/file-watcher-lib.c
+++ b/file-watcher-lib.c
@@ -136,6 +136,7 @@ void open_watcher(struct index_state *istate)
}
 
if (!read_config) {
+   const char *s;
int i;
/*
 * can't hook into git_default_config because
@@ -149,6 +150,18 @@ void open_watcher(struct index_state *istate)
if (i == istate-cache_nr)
recent_limit = 0;
read_config = 1;
+
+   s = getenv(GIT_TEST_WATCHER);
+   if (s) {
+   watch_lowerlimit = 1;
+   recent_limit = 0;
+   WAIT_TIME = -1;
+   if (atoi(s)  1)
+   istate-update_watches = 1;
+   s = getenv(GIT_TEST_WATCHER_PATH);
+   if (s)
+   watcher_path = xstrdup(s);
+   }
}
 
istate-watcher = connect_watcher(watcher_path);
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 23/26] file-watcher: tests for the daemon

2014-02-02 Thread Nguyễn Thái Ngọc Duy
test-file-watcher is a simple chat program to talk to file
watcher. Typically you would write something like this

  cat expect EOF
  # send hello. Oh and this is a comment!
  hello
  # Wait for reply and print to stdout.
  # test-file-watcher does not care about anything after ''
  hello
  index foo bar
  ok
  EOF
  test-file-watcher . expect actual

and test-file-watcher will execute the commands and get responses. If
all go well, actual should be the same as expect. '' and ''
denote send and receive packets respectively. '' and '' can be
used to send and receive a list of NUL-terminated paths.

$GIT_TEST_WATCHER enables a few more commands for testing purposes.
The most important one is 'test-mode' where system inotify is taken
out and inotify events could be injected via test-file-watcher.

There are two debug commands in file-watcher that's not used by the
test suite, but would help debugging: setenv and log. They can be used
to turn on GIT_TRACE_PACKET then any log command will show, which
functions as barrier between events file watcher.

GIT_TRACE_WATCHER can also be enabled (dynamically or at startup) to
track inotify events.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 .gitignore   |   1 +
 Makefile |   1 +
 file-watcher.c   | 181 ++-
 t/t7513-file-watcher.sh (new +x) | 382 +++
 test-file-watcher.c (new)|  96 ++
 5 files changed, 657 insertions(+), 4 deletions(-)
 create mode 100755 t/t7513-file-watcher.sh
 create mode 100644 test-file-watcher.c

diff --git a/.gitignore b/.gitignore
index 12c78f0..277f929 100644
--- a/.gitignore
+++ b/.gitignore
@@ -181,6 +181,7 @@
 /test-date
 /test-delta
 /test-dump-cache-tree
+/test-file-watcher
 /test-scrap-cache-tree
 /test-genrandom
 /test-index-version
diff --git a/Makefile b/Makefile
index 1c4d659..f0dc2cc 100644
--- a/Makefile
+++ b/Makefile
@@ -555,6 +555,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
+TEST_PROGRAMS_NEED_X += test-file-watcher
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-index-version
 TEST_PROGRAMS_NEED_X += test-line-buffer
diff --git a/file-watcher.c b/file-watcher.c
index 1e45b25..3ab0a11 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -65,7 +65,8 @@ struct connection {
 static struct connection **conns;
 static struct pollfd *pfd;
 static int conns_alloc, pfd_nr, pfd_alloc;
-static int inotify_fd;
+static int inotify_fd, test_mode;
+static int wd_counter = 1;
 
 /*
  * IN_DONT_FOLLOW does not matter now as we do not monitor
@@ -78,10 +79,19 @@ static struct dir *create_dir(struct dir *parent, const 
char *path,
  const char *basename)
 {
struct dir *d;
-   int wd = inotify_add_watch(inotify_fd, path, INOTIFY_MASKS);
+   int wd;
+   if (!test_mode)
+   wd = inotify_add_watch(inotify_fd, path, INOTIFY_MASKS);
+   else {
+   wd = wd_counter++;
+   if (wd  8)
+   wd = -1;
+   }
if (wd  0)
return NULL;
 
+   trace_printf_key(GIT_TRACE_WATCHER, inotify: watch %d %s\n,
+wd, path);
d = xmalloc(sizeof(*d));
memset(d, 0, sizeof(*d));
d-wd = wd;
@@ -124,7 +134,9 @@ static void free_dir(struct dir *d, int topdown)
if (d-repo)
d-repo-root = NULL;
wds[d-wd] = NULL;
-   inotify_rm_watch(inotify_fd, d-wd);
+   if (!test_mode)
+   inotify_rm_watch(inotify_fd, d-wd);
+   trace_printf_key(GIT_TRACE_WATCHER, inotify: unwatch %d\n, d-wd);
if (topdown) {
int i;
for (i = 0; i  d-nr_subdirs; i++)
@@ -265,6 +277,7 @@ static inline void queue_file_changed(struct file *f, 
struct strbuf *sb)
int len = sb-len;
strbuf_addf(sb, %s%s, f-parent-parent ? / : , f-name);
string_list_append(f-repo-updated, sb-buf);
+   trace_printf_key(GIT_TRACE_WATCHER, watcher: changed %s\n, sb-buf);
f-repo-updated_sorted = 0;
strbuf_setlen(sb, len);
 }
@@ -324,6 +337,10 @@ static int do_handle_inotify(const struct inotify_event 
*event)
struct dir *d;
int pos;
 
+   trace_printf_key(GIT_TRACE_WATCHER, inotify: event %08x wd %d %s\n,
+event-mask, event-wd,
+event-len ? event-name : N/A);
+
if (event-mask  (IN_Q_OVERFLOW | IN_UNMOUNT)) {
int i;
for (i = 0; i  nr_repos; i++)
@@ -385,6 +402,81 @@ static int handle_inotify(int fd)
return ret;
 }
 
+struct constant {
+   const char *name;
+   int value;
+};
+
+#define CONSTANT(x) { #x, x }
+static const struct constant inotify_masks[] = {
+   CONSTANT(IN_DELETE_SELF),
+   CONSTANT(IN_MOVE_SELF),
+