Re: [PATCH v2] connect: in ref advertisement, shallows are last

2017-09-21 Thread Brandon Williams
ING_REF;
>  
>   *list = NULL;
> - for (saw_response = 0; ; saw_response = 1) {
> - struct ref *ref;
> - struct object_id old_oid;
> - char *name;
> - int len, name_len;
> - char *buffer = packet_buffer;
> - const char *arg;
> -
> - len = packet_read(in, _buf, _len,
> -   packet_buffer, sizeof(packet_buffer),
> -   PACKET_READ_GENTLE_ON_EOF |
> -   PACKET_READ_CHOMP_NEWLINE);
> - if (len < 0)
> - die_initial_contact(saw_response);
> -
> - if (!len)
> - break;
> -
> - if (len > 4 && skip_prefix(buffer, "ERR ", ))
> - die("remote error: %s", arg);
> -
> - if (len == GIT_SHA1_HEXSZ + strlen("shallow ") &&
> - skip_prefix(buffer, "shallow ", )) {
> - if (get_oid_hex(arg, _oid))
> - die("protocol error: expected shallow sha-1, 
> got '%s'", arg);
> - if (!shallow_points)
> - die("repository on the other end cannot be 
> shallow");
> - oid_array_append(shallow_points, _oid);
> - continue;
> - }
> -
> - if (len < GIT_SHA1_HEXSZ + 2 || get_oid_hex(buffer, _oid) ||
> - buffer[GIT_SHA1_HEXSZ] != ' ')
> - die("protocol error: expected sha/ref, got '%s'", 
> buffer);
> - name = buffer + GIT_SHA1_HEXSZ + 1;
> -
> - name_len = strlen(name);
> - if (len != name_len + GIT_SHA1_HEXSZ + 1) {
> - free(server_capabilities);
> - server_capabilities = xstrdup(name + name_len + 1);
> - }
>  
> - if (extra_have && !strcmp(name, ".have")) {
> - oid_array_append(extra_have, _oid);
> - continue;
> - }
> -
> - if (!strcmp(name, "capabilities^{}")) {
> - if (saw_response)
> - die("protocol error: unexpected 
> capabilities^{}");
> - if (got_dummy_ref_with_capabilities_declaration)
> - die("protocol error: multiple capabilities^{}");
> - got_dummy_ref_with_capabilities_declaration = 1;
> - continue;
> + while ((len = read_remote_ref(in, _buf, _len, ))) {
> + switch (state) {
> + case EXPECTING_REF:
> + if (process_ref(, len, , flags, extra_have))
> + break;
> + /* fallthrough */
> +     case EXPECTING_SHALLOW:
> + if (process_shallow(, shallow_points))
> + break;
> + die("protocol error: unexpected '%s'", packet_buffer);
> + default:
> + die("unexpected state %d", state);
>   }
> -
> - if (!check_ref(name, flags))
> - continue;
> -
> - if (got_dummy_ref_with_capabilities_declaration)
> - die("protocol error: unexpected ref after 
> capabilities^{}");
> -
> - ref = alloc_ref(buffer + GIT_SHA1_HEXSZ + 1);
> - oidcpy(>old_oid, _oid);
> - *list = ref;
> - list = >next;
>   }
>  
>   annotate_refs_with_symref_info(*orig_list);
> -- 
> 2.14.1.728.g20a5b67d5.dirty
> 

-- 
Brandon Williams


Re: [PATCH 3/8] daemon: recognize hidden request arguments

2017-09-21 Thread Brandon Williams
On 09/20, Jonathan Tan wrote:
> On Wed, 20 Sep 2017 17:24:43 -0700
> Jonathan Tan <jonathanta...@google.com> wrote:
> 
> > On Wed, 13 Sep 2017 14:54:43 -0700
> > Brandon Williams <bmw...@google.com> wrote:
> > 
> > > A normal request to git-daemon is structured as
> > > "command path/to/repo\0host=..\0" and due to a bug in an old version of
> > > git-daemon 73bb33a94 (daemon: Strictly parse the "extra arg" part of the
> > > command, 2009-06-04) we aren't able to place any extra args (separated
> > > by NULs) besides the host.
> > > 
> > > In order to get around this limitation teach git-daemon to recognize
> > > additional request arguments hidden behind a second NUL byte.  Requests
> > > can then be structured like:
> > > "command path/to/repo\0host=..\0\0version=1\0key=value\0".  git-daemon
> > > can then parse out the extra arguments and set 'GIT_PROTOCOL'
> > > accordingly.
> > 
> > A test in this patch (if possible) would be nice, but it is probably
> > clearer to test this when one of the commands (e.g. upload-pack) is
> > done. Could a test be added to the next patch to verify (using
> > GIT_TRACE_PACKET, maybe) that the expected strings are sent? Then
> > mention in this commit message that this will be tested in the next
> > patch too.
> 
> Ah, I see that it is tested in 6/8. You can ignore this comment.

Yeah I felt it would have been difficult to test any earlier without
both the client and server sides done.

-- 
Brandon Williams


Re: [PATCH 2/2] Document the string_list structure

2017-09-21 Thread Brandon Williams
On 09/21, Han-Wen Nienhuys wrote:
> Signed-off-by: Han-Wen Nienhuys <han...@google.com>
> ---
>  string-list.h | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/string-list.h b/string-list.h
> index 29bfb7ae4..08b534166 100644
> --- a/string-list.h
> +++ b/string-list.h
> @@ -8,6 +8,12 @@ struct string_list_item {
>  
>  typedef int (*compare_strings_fn)(const char *, const char *);
>  
> +/* A resizable array of strings. The strings are owned if

nit: the start of the comment block should be on its own line:

  /*
   * A resizable ...

> + * 'strdup_strings' is set. It can be used as a sorted array, and a
> + * custom comparison may be given in 'cmp'. The field 'items[i].util'
> + * may be used to implement an array of pairs. In that case, the
> + * caller is responsible for managing memory pointed to by 'util'.
> + */

The util field can be freed if 'free_util' is set when calling
string_list_clear() but yeah, in general the caller is managing that
memory especially if a complex free function is required (in the case of
allocating a struct with dynamically allocated fields to be stored in
the util feild).

Just pointing that out, and I'm happy with this and the previous patch.

>  struct string_list {
>   struct string_list_item *items;
>   unsigned int nr, alloc;
> -- 
> 2.14.1.821.g8fa685d3b7-goog
> 



-- 
Brandon Williams


Re: [PATCH 1.5/8] connect: die when a capability line comes after a ref

2017-09-20 Thread Brandon Williams
On 09/20, Jeff King wrote:
> On Wed, Sep 20, 2017 at 11:48:32AM -0700, Brandon Williams wrote:
> 
> > Commit eb398797c (connect: advertized capability is not a ref,
> > 2016-09-09) taught 'get_remote_heads()' to recognize that the
> > 'capabilities^{}' line isn't a ref but required that the
> > 'capabilities^{}' line came during the first response from the server.
> > A future patch will introduce a version string sent by the server during
> > its first response which can then cause a client to unnecessarily die if
> > a 'capabilities^{}' line sent as the first ref.
> > 
> > Teach 'get_remote_heads()' to instead die if a 'capabilities^{}' line is
> > sent after a ref.
> 
> Hmm. I think I understand why you'd want this loosening. But why are we
> sending a version line to a client that we don't know is speaking v2?
> IOW, shouldn't we be reporting the version to the client in the normal
> capabilities when we don't know for sure that they can handle the new
> field? Otherwise we're breaking existing clients.

The client requested the version, this is the servers response.  So
older clients shouldn't be broken because they wouldn't be requesting
the newer versions.

> 
> Or is this only for v2 clients, and we've changed the protocol but
> get_remote_heads() just needs to be updated, too?

A client which didn't request protocol v1 (I'm calling the current
protocol v0, and v1 is just v0 with the initial response from the server
containing a version string) should not receive a version string in the
initial response.  The problem is that when introducing the version
string to protocol version 1, I didn't want to have to do a huge
refactoring of ALL of the current transport code so I stuck the version
check in get_remote_heads() since v1 is exactly the same as v0, except
for the first line from the server.

When we introduce v2, I'm sure we'll have to do more refactoring to
separate out the logic for the different versions.
> 
> > diff --git a/connect.c b/connect.c
> > index df56c0cbf..af5096ec6 100644
> > --- a/connect.c
> > +++ b/connect.c
> > @@ -124,10 +124,11 @@ struct ref **get_remote_heads(int in, char *src_buf, 
> > size_t src_len,
> >  * response does not necessarily mean an ACL problem, though.
> >  */
> > int saw_response;
> > +   int seen_ref;
> > int got_dummy_ref_with_capabilities_declaration = 0;
> > 
> > *list = NULL;
> > -   for (saw_response = 0; ; saw_response = 1) {
> > +   for (saw_response = 0, seen_ref = 0; ; saw_response = 1) {
> 
> If we're not going to update it in the right-hand side of the for-loop,
> should we perhaps not be initializing it in the left-hand side? I.e.,
> can we just do:
> 
>   seen_ref = 0;
> 
> above the loop, like we initialize "list"?
> 
> (For that matter, could we just be checking whether *list is NULL?)

True, that would probably be the better way to do this.

> 
> > @@ -165,6 +166,8 @@ struct ref **get_remote_heads(int in, char *src_buf, 
> > size_t src_len,
> > 
> > name_len = strlen(name);
> > if (len != name_len + GIT_SHA1_HEXSZ + 1) {
> > +   if (seen_ref)
> > +   ; /* NEEDSWORK: Error out for multiple 
> > capabilities lines? */
> > free(server_capabilities);
> > server_capabilities = xstrdup(name + name_len + 1);
> > }
> 
> Interesting question. Probably it would be fine to. Coincidentally I ran
> across a similar case. It seems that upload-pack will read multiple
> capabilities lines back from the client. I.e., if it gets:
> 
>   want 1234abcd... foo
>   want 5678abcd... bar
> 
> then it will turn on both the "foo" and "bar" capabilities. I'm pretty
> sure this is unintended, and is somewhat counter to the way that clients
> handle multiple lines (which is to forget the old line and respect only
> the new one, as shown in the quoted hunk).
> 
> I wonder if we should be outlawing extra capabilities in both
> directions. I don't _think_ we've ever relied on that working, and I
> don't have much sympathy for any 3rd-party implementation that does
> (though I doubt that any exists).
> 
> That tangent aside, I do this hunk is kind of orthogonal to the point of
> your patch. We're talking about potential _tightening_ here, whereas the
> point of your patch is loosening. And it's not clear to me what we want
> to tighten:
> 
>   - should capabilities come as part of the first response, even if we
> have no refs? In which case we really want "if (saw_response)" here.
> 
>   - should they came as part of the first ref (

[PATCH 1.5/8] connect: die when a capability line comes after a ref

2017-09-20 Thread Brandon Williams
Commit eb398797c (connect: advertized capability is not a ref,
2016-09-09) taught 'get_remote_heads()' to recognize that the
'capabilities^{}' line isn't a ref but required that the
'capabilities^{}' line came during the first response from the server.
A future patch will introduce a version string sent by the server during
its first response which can then cause a client to unnecessarily die if
a 'capabilities^{}' line sent as the first ref.

Teach 'get_remote_heads()' to instead die if a 'capabilities^{}' line is
sent after a ref.

Reported-by: Miguel Alcon <mal...@google.com>
Signed-off-by: Brandon Williams <bmw...@google.com>
---
This is a fix to the bug we found when internally deploying this series.  It
just makes it so that a capability line wont cause a client to error out if its
not the first response, because it won't be the first response come protocol
v1.

 connect.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/connect.c b/connect.c
index df56c0cbf..af5096ec6 100644
--- a/connect.c
+++ b/connect.c
@@ -124,10 +124,11 @@ struct ref **get_remote_heads(int in, char *src_buf, 
size_t src_len,
 * response does not necessarily mean an ACL problem, though.
 */
int saw_response;
+   int seen_ref;
int got_dummy_ref_with_capabilities_declaration = 0;

*list = NULL;
-   for (saw_response = 0; ; saw_response = 1) {
+   for (saw_response = 0, seen_ref = 0; ; saw_response = 1) {
struct ref *ref;
struct object_id old_oid;
char *name;
@@ -165,6 +166,8 @@ struct ref **get_remote_heads(int in, char *src_buf, size_t 
src_len,

name_len = strlen(name);
if (len != name_len + GIT_SHA1_HEXSZ + 1) {
+   if (seen_ref)
+   ; /* NEEDSWORK: Error out for multiple 
capabilities lines? */
free(server_capabilities);
server_capabilities = xstrdup(name + name_len + 1);
}
@@ -175,7 +178,7 @@ struct ref **get_remote_heads(int in, char *src_buf, size_t 
src_len,
}

if (!strcmp(name, "capabilities^{}")) {
-   if (saw_response)
+   if (seen_ref)
die("protocol error: unexpected 
capabilities^{}");
if (got_dummy_ref_with_capabilities_declaration)
die("protocol error: multiple capabilities^{}");
@@ -193,6 +196,7 @@ struct ref **get_remote_heads(int in, char *src_buf, size_t 
src_len,
oidcpy(>old_oid, _oid);
*list = ref;
list = >next;
+   seen_ref = 1;
}

annotate_refs_with_symref_info(*orig_list);
--
2.14.1.821.g8fa685d3b7-goog



Re: [PATCH v2] Improve performance of git status --ignored

2017-09-19 Thread Brandon Williams
;   if (subdir_state > dir_state)
>   dir_state = subdir_state;
>   }
>  
>   if (check_only) {
> + if (stop_at_first_file) {
> + /*
> +  * If stopping at first file, then
> +  * signal that a file was found by
> +  * returning `path_excluded`. This is
> +  * to return a consistent value
> +  * regardless of whether an ignored or
> +  * excluded file happened to be
> +  * encountered 1st.
> +  *
> +  * In current usage, the
> +  * `stop_at_first_file` is passed when
> +  * an ancestor directory has matched
> +  * an exclude pattern, so any found
> +  * files will be excluded.
> +  */
> + if (dir_state >= path_excluded) {
> + dir_state = path_excluded;
> + break;
> + }
> + }
> +
>   /* abort early if maximum state has been reached */
>   if (dir_state == path_untracked) {
>   if (cdir.fdir)
> @@ -2108,7 +2143,7 @@ int read_directory(struct dir_struct *dir, struct 
> index_state *istate,
>*/
>   dir->untracked = NULL;
>   if (!len || treat_leading_path(dir, istate, path, len, pathspec))
> - read_directory_recursive(dir, istate, path, len, untracked, 0, 
> pathspec);
> + read_directory_recursive(dir, istate, path, len, untracked, 0, 
> 0, pathspec);
>   QSORT(dir->entries, dir->nr, cmp_dir_entry);
>   QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry);
>  
> -- 
> 2.7.4
> 

-- 
Brandon Williams


Re: [PATCH 2/8] protocol: introduce protocol extention mechanisms

2017-09-18 Thread Brandon Williams
On 09/18, Stefan Beller wrote:
> >> From a users POV this may be frustrating as I would imagine that
> >> people want to run
> >>
> >>   git config --global protocol.version 2
> >>
> >> to try it out and then realize that some of their hosts do not speak
> >> 2, so they have to actually configure it per repo/remote.
> >
> > The point would be to be able to set this globally, not per-repo.  Even
> > if a repo doesn't speak v2 then it should be able to gracefully degrade
> > to v1 without the user having to do anything.  The reason why there is
> > this escape hatch is if doing the protocol negotiation out of band
> > causing issues with communicating with a server that it can be shut off.
> 
> In the current situation it is easy to assume that if v1 (and not v0)
> is configured, that the users intent is "to try out v1 and fallback
> gracefully to v0".
> 
> But this will change over time in the future!
> 
> Eventually people will have the desire to say:
> "Use version N+1, but never version N", because N has
> performance or security issues; the user might not want
> to bother to try N or even actively want to be affirmed that
> Git will never use version N.
> 
> In this future we need a mechanism, that either contains a
> white list or black list of protocols. To keep it simple (I assume
> there won't be many protocol versions), a single white list will do.
> 
> However transitioning from the currently proposed "try the new
> configured thing and fallback to whatever" to "this is the exact list
> of options that Git will try for you" will be hard, as we may break people
> if we do not unconditionally fall back to v0.
> 
> That is why I propose to start with an explicit white list as then we
> do not have to have a transition plan or otherwise work around the
> issue. Also it doesn't hurt now to use
> 
> git config --global protocol.version v1,v0
> 
> instead compared to the proposed configuration above.
> (Even better yet, then people could play around with "v1 only"
> and see how it falls apart on old servers)

Except we can't start with an explicit whitelist because we must
fallback to v0 if v1 isn't supported otherwise we would break people.

That is unless we have the semantics of: If not configured v0 will be
used, otherwise only use the configured protocol versions.

-- 
Brandon Williams


Re: [PATCH 2/8] protocol: introduce protocol extention mechanisms

2017-09-18 Thread Brandon Williams
On 09/13, Stefan Beller wrote:
> On Wed, Sep 13, 2017 at 2:54 PM, Brandon Williams <bmw...@google.com> wrote:
> > Create protocol.{c,h} and provide functions which future servers and
> > clients can use to determine which protocol to use or is being used.
> >
> > Also introduce the 'GIT_PROTOCOL' environment variable which will be
> > used to communicate a colon separated list of keys with optional values
> > to a server.  Unknown keys and values must be tolerated.  This mechanism
> > is used to communicate which version of the wire protocol a client would
> > like to use with a server.
> >
> > Signed-off-by: Brandon Williams <bmw...@google.com>
> > ---
> >  Documentation/config.txt | 16 +++
> >  Documentation/git.txt|  5 
> >  Makefile |  1 +
> >  cache.h  |  7 +
> >  protocol.c   | 72 
> > 
> >  protocol.h   | 15 ++
> >  6 files changed, 116 insertions(+)
> >  create mode 100644 protocol.c
> >  create mode 100644 protocol.h
> >
> > diff --git a/Documentation/config.txt b/Documentation/config.txt
> > index dc4e3f58a..d5b28a32c 100644
> > --- a/Documentation/config.txt
> > +++ b/Documentation/config.txt
> > @@ -2517,6 +2517,22 @@ The protocol names currently used by git are:
> >  `hg` to allow the `git-remote-hg` helper)
> >  --
> >
> > +protocol.version::
> 
> It would be cool to set a set of versions that are good. (I am not sure
> if that can be deferred to a later patch.)
> 
>   Consider we'd have versions 0,1,2,3,4 in the future:
>   In an ideal world the client and server would talk using v4
>   as it is the most advanced protocol, right?
>   Maybe a security/performance issue is found on the server side
>   with say protocol v3. Still no big deal as we are speaking v4.
>   But then consider an issue is found on the client side with v4.
>   Then the client would happily talk 0..3 while the server would
>   love to talk using 0,1,2,4.
> 
> The way I think about protocol version negotiation is that
> both parties involved have a set of versions that they tolerate
> to talk (they might understand more than the tolerated set, but the
> user forbade some), and the goal of the negotiation is to find
> the highest version number that is part of both the server set
> and client set. So quite naturally with this line of thinking the
> configuration is to configure a set of versions, which is what
> I propose here. Maybe even in the wire format, separated
> with colons?

I'm sure it wouldn't take too much to change this to be a multi-valued
config.  Because after this series there is just v0 and v1 I didn't
think through this case too much.  If others agree then I can go ahead
and make it so in a reroll.

> 
> > +   If set, clients will attempt to communicate with a server using
> > +   the specified protocol version.  If unset, no attempt will be
> > +   made by the client to communicate using a particular protocol
> > +   version, this results in protocol version 0 being used.
> 
> This sounds as if we're going to be really shy at first and only
> users that care will try out new versions at their own risk.
> From a users POV this may be frustrating as I would imagine that
> people want to run
> 
>   git config --global protocol.version 2
> 
> to try it out and then realize that some of their hosts do not speak
> 2, so they have to actually configure it per repo/remote.

The point would be to be able to set this globally, not per-repo.  Even
if a repo doesn't speak v2 then it should be able to gracefully degrade
to v1 without the user having to do anything.  The reason why there is
this escape hatch is if doing the protocol negotiation out of band
causing issues with communicating with a server that it can be shut off.


> > +   Supported versions:
> 
> > +* `0` - the original wire protocol.
> 
> In the future this may be misleading as it doesn't specify the date of
> when it was original. e.g. are capabilities already supported in "original"?
> 
> Maybe phrase it as "wire protocol as of v2.14" ? (Though this sounds
> as if new capabilities added in the future are not allowed)

Yeah I can see how this could be misleading, though I'm not sure how
best to word it to avoid that.

> 
> 
> > +
> > +extern enum protocol_version parse_protocol_version(const char *value);
> > +extern enum protocol_version get_protocol_version_config(void);
> > +extern enum protocol_version determine_protocol_version_server(void);
> > +extern enum protocol_version determine_protocol_version_client(const char 
> > *server_response);
> 
> Here is a good place to have some comments.

-- 
Brandon Williams


Re: [PATCH 3/8] daemon: recognize hidden request arguments

2017-09-18 Thread Brandon Williams
On 09/13, Stefan Beller wrote:
> On Wed, Sep 13, 2017 at 2:54 PM, Brandon Williams <bmw...@google.com> wrote:
> > A normal request to git-daemon is structured as
> > "command path/to/repo\0host=..\0" and due to a bug in an old version of
> > git-daemon 73bb33a94 (daemon: Strictly parse the "extra arg" part of the
> > command, 2009-06-04) we aren't able to place any extra args (separated
> > by NULs) besides the host.
> >
> > In order to get around this limitation teach git-daemon to recognize
> > additional request arguments hidden behind a second NUL byte.  Requests
> > can then be structured like:
> > "command path/to/repo\0host=..\0\0version=1\0key=value\0".  git-daemon
> > can then parse out the extra arguments and set 'GIT_PROTOCOL'
> > accordingly.
> >
> > Signed-off-by: Brandon Williams <bmw...@google.com>
> > ---
> >  daemon.c | 71 
> > +++-
> >  1 file changed, 61 insertions(+), 10 deletions(-)
> >
> > diff --git a/daemon.c b/daemon.c
> > index 30747075f..250dbf82c 100644
> > --- a/daemon.c
> > +++ b/daemon.c
> > @@ -282,7 +282,7 @@ static const char *path_ok(const char *directory, 
> > struct hostinfo *hi)
> > return NULL;/* Fallthrough. Deny by default */
> >  }
> >
> > -typedef int (*daemon_service_fn)(void);
> > +typedef int (*daemon_service_fn)(const struct argv_array *env);
> >  struct daemon_service {
> > const char *name;
> > const char *config_name;
> > @@ -363,7 +363,7 @@ static int run_access_hook(struct daemon_service 
> > *service, const char *dir,
> >  }
> >
> >  static int run_service(const char *dir, struct daemon_service *service,
> > -  struct hostinfo *hi)
> > +  struct hostinfo *hi, const struct argv_array *env)
> >  {
> > const char *path;
> > int enabled = service->enabled;
> > @@ -422,7 +422,7 @@ static int run_service(const char *dir, struct 
> > daemon_service *service,
> >  */
> > signal(SIGTERM, SIG_IGN);
> >
> > -   return service->fn();
> > +   return service->fn(env);
> >  }
> >
> >  static void copy_to_log(int fd)
> > @@ -462,25 +462,34 @@ static int run_service_command(struct child_process 
> > *cld)
> > return finish_command(cld);
> >  }
> >
> > -static int upload_pack(void)
> > +static int upload_pack(const struct argv_array *env)
> >  {
> > struct child_process cld = CHILD_PROCESS_INIT;
> > argv_array_pushl(, "upload-pack", "--strict", NULL);
> > argv_array_pushf(, "--timeout=%u", timeout);
> > +
> > +   argv_array_pushv(_array, env->argv);
> > +
> > return run_service_command();
> >  }
> >
> > -static int upload_archive(void)
> > +static int upload_archive(const struct argv_array *env)
> >  {
> > struct child_process cld = CHILD_PROCESS_INIT;
> > argv_array_push(, "upload-archive");
> > +
> > +   argv_array_pushv(_array, env->argv);
> > +
> > return run_service_command();
> >  }
> >
> > -static int receive_pack(void)
> > +static int receive_pack(const struct argv_array *env)
> >  {
> > struct child_process cld = CHILD_PROCESS_INIT;
> > argv_array_push(, "receive-pack");
> > +
> > +   argv_array_pushv(_array, env->argv);
> > +
> > return run_service_command();
> >  }
> >
> > @@ -574,7 +583,7 @@ static void canonicalize_client(struct strbuf *out, 
> > const char *in)
> >  /*
> >   * Read the host as supplied by the client connection.
> >   */
> > -static void parse_host_arg(struct hostinfo *hi, char *extra_args, int 
> > buflen)
> > +static char *parse_host_arg(struct hostinfo *hi, char *extra_args, int 
> > buflen)
> >  {
> > char *val;
> > int vallen;
> > @@ -602,6 +611,39 @@ static void parse_host_arg(struct hostinfo *hi, char 
> > *extra_args, int buflen)
> > if (extra_args < end && *extra_args)
> > die("Invalid request");
> > }
> > +
> > +   return extra_args;
> > +}
> > +
> > +static void parse_extra_args(struct argv_array *env, const char 
> > *extra_args,
> > +int bufle

Re: [PATCH 2/4] push, fetch: error out for submodule entries not pointing to commits

2017-09-18 Thread Brandon Williams
On 09/12, Jonathan Nieder wrote:
> From: Stefan Beller <sbel...@google.com>
> 
> The check_has_commit helper uses resolves a submodule entry to a
> commit, when validating its existence. As a side effect this means
> tolerates a submodule entry pointing to a tag, which is not a valid
> submodule entry that git commands would know how to cope with.
> 
> Tighten the check to require an actual commit, not a tag pointing to a
> commit.
> 
> Also improve the error handling when a submodule entry points to
> non-commit (e.g., a blob) to error out instead of warning and
> pretending the pointed to object doesn't exist.
> 
> Signed-off-by: Stefan Beller <sbel...@google.com>
> Signed-off-by: Jonathan Nieder <jrnie...@gmail.com>

Looks good!

> ---
>  submodule.c| 33 +
>  t/t5531-deep-submodule-push.sh | 10 ++
>  2 files changed, 35 insertions(+), 8 deletions(-)
> 
> diff --git a/submodule.c b/submodule.c
> index 3cea8221e0..e0da55920d 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -767,19 +767,36 @@ static int append_oid_to_argv(const struct object_id 
> *oid, void *data)
>   return 0;
>  }
>  
> +struct has_commit_data {
> + int result;
> + const char *path;
> +};
> +
>  static int check_has_commit(const struct object_id *oid, void *data)
>  {
> - int *has_commit = data;
> + struct has_commit_data *cb = data;
>  
> - if (!lookup_commit_reference(oid))
> - *has_commit = 0;
> + enum object_type type = sha1_object_info(oid->hash, NULL);
>  
> - return 0;
> + switch (type) {
> + case OBJ_COMMIT:
> + return 0;
> + case OBJ_BAD:
> + /*
> +  * Object is missing or invalid. If invalid, an error message
> +  * has already been printed.
> +  */
> + cb->result = 0;
> + return 0;
> + default:
> + die(_("submodule entry '%s' (%s) is a %s, not a commit"),
> + cb->path, oid_to_hex(oid), typename(type));
> + }
>  }
>  
>  static int submodule_has_commits(const char *path, struct oid_array *commits)
>  {
> - int has_commit = 1;
> + struct has_commit_data has_commit = { 1, path };
>  
>   /*
>* Perform a cheap, but incorrect check for the existence of 'commits'.
> @@ -795,7 +812,7 @@ static int submodule_has_commits(const char *path, struct 
> oid_array *commits)
>  
>   oid_array_for_each_unique(commits, check_has_commit, _commit);
>  
> - if (has_commit) {
> + if (has_commit.result) {
>   /*
>* Even if the submodule is checked out and the commit is
>* present, make sure it exists in the submodule's object store
> @@ -814,12 +831,12 @@ static int submodule_has_commits(const char *path, 
> struct oid_array *commits)
>   cp.dir = path;
>  
>   if (capture_command(, , GIT_MAX_HEXSZ + 1) || out.len)
> - has_commit = 0;
> + has_commit.result = 0;
>  
>   strbuf_release();
>   }
>  
> - return has_commit;
> + return has_commit.result;
>  }
>  
>  static int submodule_needs_pushing(const char *path, struct oid_array 
> *commits)
> diff --git a/t/t5531-deep-submodule-push.sh b/t/t5531-deep-submodule-push.sh
> index 0f84a53146..39cb2c1c34 100755
> --- a/t/t5531-deep-submodule-push.sh
> +++ b/t/t5531-deep-submodule-push.sh
> @@ -298,6 +298,16 @@ test_expect_success 'push succeeds if submodule commit 
> disabling recursion from
>   )
>  '
>  
> +test_expect_success 'submodule entry pointing at a tag is error' '
> + git -C work/gar/bage tag -a test1 -m "tag" &&
> + tag=$(git -C work/gar/bage rev-parse test1^{tag}) &&
> + git -C work update-index --cacheinfo 16 "$tag" gar/bage &&
> + git -C work commit -m "bad commit" &&
> + test_when_finished "git -C work reset --hard HEAD^" &&
> + test_must_fail git -C work push --recurse-submodules=on-demand 
> ../pub.git master 2>err &&
> + test_i18ngrep "is a tag, not a commit" err
> +'
> +
>  test_expect_success 'push fails if recurse submodules option passed as yes' '
>   (
>   cd work/gar/bage &&
> -- 
> 2.14.1.690.gbb1197296e
> 

-- 
Brandon Williams


Re: [RFC PATCH v2 1/2] implement fetching of moved submodules

2017-09-18 Thread Brandon Williams
lookup(_submodule_paths, ce->name))
> + if 
> (!unsorted_string_list_lookup(_submodule_names,
> +  
> submodule->name))
>   continue;
>   default_argv = "on-demand";
>   }
> @@ -1159,13 +1166,15 @@ static int get_next_submodule(struct child_process 
> *cp,
>   if (spf->default_option == 
> RECURSE_SUBMODULES_OFF)
>   continue;
>   if (spf->default_option == 
> RECURSE_SUBMODULES_ON_DEMAND) {
> - if 
> (!unsorted_string_list_lookup(_submodule_paths, ce->name))
> + if 
> (!unsorted_string_list_lookup(_submodule_names,
> +   
> submodule->name))
>   continue;
>   default_argv = "on-demand";
>   }
>   }
>   } else if (spf->command_line_option == 
> RECURSE_SUBMODULES_ON_DEMAND) {
> - if 
> (!unsorted_string_list_lookup(_submodule_paths, ce->name))
> + if 
> (!unsorted_string_list_lookup(_submodule_names,
> +  submodule->name))
>   continue;
>   default_argv = "on-demand";
>   }
> @@ -1258,7 +1267,7 @@ int fetch_populated_submodules(const struct argv_array 
> *options,
>  
>   argv_array_clear();
>  out:
> - string_list_clear(_submodule_paths, 1);
> + string_list_clear(_submodule_names, 1);
>   return spf.result;
>  }
>  
> diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
> index 42251f7f3a..22a7358b6f 100755
> --- a/t/t5526-fetch-submodules.sh
> +++ b/t/t5526-fetch-submodules.sh
> @@ -530,4 +530,39 @@ test_expect_success 'fetching submodule into a broken 
> repository' '
>   test_must_fail git -C dst fetch --recurse-submodules
>  '
>  
> +test_expect_success "fetch new commits when submodule got renamed" '
> + git clone . downstream_rename &&
> + (
> + cd downstream_rename &&
> + git submodule update --init &&
> +# NEEDSWORK: we omitted --recursive for the submodule update here since
> +# that does not work. See test 7001 for mv "moving nested submodules"
> +# for details. Once that is fixed we should add the --recursive option
> +# here.
> + git checkout -b rename &&
> + git mv submodule submodule_renamed &&
> + (
> + cd submodule_renamed &&
> + git checkout -b rename_sub &&
> + echo a >a &&
> + git add a &&
> + git commit -ma &&
> + git push origin rename_sub &&
> + git rev-parse HEAD >../../expect
> + ) &&
> + git add submodule_renamed &&
> + git commit -m "update renamed submodule" &&
> + git push origin rename
> + ) &&
> + (
> + cd downstream &&
> + git fetch --recurse-submodules=on-demand &&
> + (
> + cd submodule &&
> + git rev-parse origin/rename_sub >../../actual
> + )
> + ) &&
> + test_cmp expect actual
> +'
> +
>  test_done
> -- 
> 2.14.1.145.gb3622a4
> 

-- 
Brandon Williams


Re: [RFC PATCH v2 2/2] submodule: simplify decision tree whether to or not to fetch

2017-09-18 Thread Brandon Williams
On 09/15, Heiko Voigt wrote:
> To make extending this logic later easier.
> 
> Signed-off-by: Heiko Voigt <hvo...@hvoigt.net>

I like the result of this patch, its much cleaner.

> ---
>  submodule.c | 74 
> ++---
>  1 file changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/submodule.c b/submodule.c
> index 38b9905e43..fa44fc59f2 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -1118,6 +1118,31 @@ struct submodule_parallel_fetch {
>  };
>  #define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0}
>  
> +static int get_fetch_recurse_config(const struct submodule *submodule,
> + struct submodule_parallel_fetch *spf)
> +{
> + if (spf->command_line_option != RECURSE_SUBMODULES_DEFAULT)
> + return spf->command_line_option;
> +
> + if (submodule) {
> + char *key;
> + const char *value;
> +
> + int fetch_recurse = submodule->fetch_recurse;
> + key = xstrfmt("submodule.%s.fetchRecurseSubmodules", 
> submodule->name);
> + if (!repo_config_get_string_const(the_repository, key, )) 
> {
> + fetch_recurse = parse_fetch_recurse_submodules_arg(key, 
> value);
> + }
> + free(key);
> +
> + if (fetch_recurse != RECURSE_SUBMODULES_NONE)
> + /* local config overrules everything except commandline 
> */
> + return fetch_recurse;
> + }
> +
> + return spf->default_option;
> +}
> +
>  static int get_next_submodule(struct child_process *cp,
> struct strbuf *err, void *data, void **task_cb)
>  {
> @@ -1137,46 +1162,21 @@ static int get_next_submodule(struct child_process 
> *cp,
>  
>   submodule = submodule_from_path(_oid, ce->name);
>  
> - default_argv = "yes";
> - if (spf->command_line_option == RECURSE_SUBMODULES_DEFAULT) {
> - int fetch_recurse = RECURSE_SUBMODULES_NONE;
> -
> - if (submodule) {
> - char *key;
> - const char *value;
> -
> - fetch_recurse = submodule->fetch_recurse;
> - key = 
> xstrfmt("submodule.%s.fetchRecurseSubmodules", submodule->name);
> - if 
> (!repo_config_get_string_const(the_repository, key, )) {
> - fetch_recurse = 
> parse_fetch_recurse_submodules_arg(key, value);
> - }
> - free(key);
> - }
> -
> - if (fetch_recurse != RECURSE_SUBMODULES_NONE) {
> - if (fetch_recurse == RECURSE_SUBMODULES_OFF)
> - continue;
> - if (fetch_recurse == 
> RECURSE_SUBMODULES_ON_DEMAND) {
> - if 
> (!unsorted_string_list_lookup(_submodule_names,
> -  
> submodule->name))
> - continue;
> - default_argv = "on-demand";
> - }
> - } else {
> - if (spf->default_option == 
> RECURSE_SUBMODULES_OFF)
> - continue;
> - if (spf->default_option == 
> RECURSE_SUBMODULES_ON_DEMAND) {
> - if 
> (!unsorted_string_list_lookup(_submodule_names,
> -   
> submodule->name))
> - continue;
> - default_argv = "on-demand";
> - }
> - }
> - } else if (spf->command_line_option == 
> RECURSE_SUBMODULES_ON_DEMAND) {
> - if 
> (!unsorted_string_list_lookup(_submodule_names,
> + switch (get_fetch_recurse_config(submodule, spf))
> + {
> + default:
> + case RECURSE_SUBMODULES_DEFAULT:
> + case RECURSE_SUBMODULES_ON_DEMAND:
> + if (!submodule || 
> !unsorted_string_list_lookup(_submodule_names,
>submodule->name))
>       continue;
>   default_argv = "on-demand";
> + break;
> + case RECURSE_SUBMODULES_ON:
> + default_argv = "yes";
> + break;
> + case RECURSE_SUBMODULES_OFF:
> + continue;
>   }
>  
>   strbuf_addf(_path, "%s/%s", spf->work_tree, ce->name);
> -- 
> 2.14.1.145.gb3622a4
> 

-- 
Brandon Williams


Re: [PATCH] protocol: make parse_protocol_version() private

2017-09-18 Thread Brandon Williams
On 09/17, Ramsay Jones wrote:
> 
> Signed-off-by: Ramsay Jones <ram...@ramsayjones.plus.com>
> ---
> 
> Hi Brandon,
> 
> If you need to re-roll your 'bw/protocol-v1' branch, could you please
> squash this into the relevant patch (commit 45954f179e, "protocol:
> introduce protocol extention mechanisms", 13-09-2017).
> 
> This assumes you agree that this symbol does not need to be public; if
> not, then please just ignore! ;-)

Thanks!  I've updated my local version of the series to reflect this.

> 
> Thanks!
> 
> ATB,
> Ramsay Jones
> 
>  protocol.c | 2 +-
>  protocol.h | 1 -
>  2 files changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/protocol.c b/protocol.c
> index 1b16c7b9a..369503065 100644
> --- a/protocol.c
> +++ b/protocol.c
> @@ -2,7 +2,7 @@
>  #include "config.h"
>  #include "protocol.h"
>  
> -enum protocol_version parse_protocol_version(const char *value)
> +static enum protocol_version parse_protocol_version(const char *value)
>  {
>   if (!strcmp(value, "0"))
>   return protocol_v0;
> diff --git a/protocol.h b/protocol.h
> index 2fa6486d0..18f9a5235 100644
> --- a/protocol.h
> +++ b/protocol.h
> @@ -7,7 +7,6 @@ enum protocol_version {
>   protocol_v1 = 1,
>  };
>  
> -extern enum protocol_version parse_protocol_version(const char *value);
>  extern enum protocol_version get_protocol_version_config(void);
>  extern enum protocol_version determine_protocol_version_server(void);
>  extern enum protocol_version determine_protocol_version_client(const char 
> *server_response);
> -- 
> 2.14.0

-- 
Brandon Williams


Re: RFC v3: Another proposed hash function transition plan

2017-09-14 Thread Brandon Williams
On 09/14, Johannes Schindelin wrote:
> Hi Jonathan,
> 
> On Wed, 13 Sep 2017, Jonathan Nieder wrote:
> 
> > As a side note, I am probably misreading, but I found this set of
> > paragraphs a bit condescending.  It sounds to me like you are saying
> > "You are making the wrong choice of hash function and everything else
> > you are describing is irrelevant when compared to that monumental
> > mistake.  Please stop working on things I don't consider important".
> > With that reading it is quite demotivating to read.
> 
> I am sorry you read it that way. I did not feel condescending when I wrote
> that mail, I felt annoyed by the side track, and anxious. In my mind, the
> transition is too important for side tracking, and I worry that we are not
> fast enough (imagine what would happen if a better attack was discovered
> that is not as easily detected as the one we know about?).
> 
> > An alternative reading is that you are saying that the transition plan
> > described in this thread is not ironed out.  Can you spell that out
> > more?  What particular aspect of the transition plan (which is of
> > course orthogonal to the choice of hash function) are you discontent
> > with?
> 
> My impression from reading Junio's mail was that he does not consider the
> transition plan ironed out yet, and that he wants to spend time on
> discussing generation numbers right now.
> 
> I was in particularly frightened by the suggestion to "reboot" [*1*].
> Hopefully I misunderstand and he meant "finishing touches" instead.
> 
> As to *my* opinion: after reading https://goo.gl/gh2Mzc (is it really
> correct that its last update has been on March 6th?), my only concern is
> really that it still talks about SHA3-256 when I think that the
> performance benefits of SHA-256 (think: "Git at scale", and also hardware
> support) really make the latter a better choice.
> 
> In order to be "ironed out", I think we need to talk about the
> implementation detail "Translation table". This is important. It needs to
> be *fast*.

Agreed, when that document was written it was hand waved as an
implementation detail but once we should probably stare ironing out
those details soon so that we have a concrete plan in place.

> 
> Speaking of *fast*, I could imagine that it would make sense to store the
> SHA-1 objects on disk, still, instead of converting them on the fly. I am
> not sure whether this is something we need to define in the document,
> though, as it may very well be premature optimization; Maybe mention that
> we could do this if necessary?
> 
> Apart from that, I would *love* to see this document as The Official Plan
> that I can Show To The Manager so that I can ask to Allocate Time.

Speaking of having a concrete plan, we discussed in office the other day
about finally converting the doc into a Documentation patch.  That was
always are intention but after writing up the doc we got busy working on
other projects.  Getting it in as a patch (with a more concrete road map)
is probably the next step we'd need to take.

I do want to echo what jonathan has said in other parts of this thread,
that the transition plan itself doesn't depend on which hash function we
end up going with in the end.  I fully expect that for the transition
plan to succeed that we'll have infrastructure for dropping in different
hash functions so that we can do some sort of benchmarking before
selecting one to use.  This would also give us the ability to more
easily transition to another hash function when the time comes.

-- 
Brandon Williams


[PATCH 8/8] i5700: add interop test for protocol transition

2017-09-13 Thread Brandon Williams
Signed-off-by: Brandon Williams <bmw...@google.com>
---
 t/interop/i5700-protocol-transition.sh | 68 ++
 1 file changed, 68 insertions(+)
 create mode 100755 t/interop/i5700-protocol-transition.sh

diff --git a/t/interop/i5700-protocol-transition.sh 
b/t/interop/i5700-protocol-transition.sh
new file mode 100755
index 0..9e83428a8
--- /dev/null
+++ b/t/interop/i5700-protocol-transition.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+VERSION_A=.
+VERSION_B=v2.0.0
+
+: ${LIB_GIT_DAEMON_PORT:=5600}
+LIB_GIT_DAEMON_COMMAND='git.b daemon'
+
+test_description='clone and fetch by client who is trying to use a new 
protocol'
+. ./interop-lib.sh
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+
+start_git_daemon --export-all
+
+repo=$GIT_DAEMON_DOCUMENT_ROOT_PATH/repo
+
+test_expect_success "create repo served by $VERSION_B" '
+   git.b init "$repo" &&
+   git.b -C "$repo" commit --allow-empty -m one
+'
+
+test_expect_success "git:// clone with $VERSION_A and protocol v1" '
+   GIT_TRACE_PACKET=1 git.a -c protocol.version=1 clone 
"$GIT_DAEMON_URL/repo" child 2>log &&
+   git.a -C child log -1 --format=%s >actual &&
+   git.b -C "$repo" log -1 --format=%s >expect &&
+   test_cmp expect actual &&
+   grep "version=1" log
+'
+
+test_expect_success "git:// fetch with $VERSION_A and protocol v1" '
+   git.b -C "$repo" commit --allow-empty -m two &&
+   git.b -C "$repo" log -1 --format=%s >expect &&
+
+   GIT_TRACE_PACKET=1 git.a -C child -c protocol.version=1 fetch 2>log &&
+   git.a -C child log -1 --format=%s FETCH_HEAD >actual &&
+
+   test_cmp expect actual &&
+   grep "version=1" log &&
+   ! grep "version 1" log
+'
+
+stop_git_daemon
+
+test_expect_success "create repo served by $VERSION_B" '
+   git.b init parent &&
+   git.b -C parent commit --allow-empty -m one
+'
+
+test_expect_success "file:// clone with $VERSION_A and protocol v1" '
+   GIT_TRACE_PACKET=1 git.a -c protocol.version=1 clone 
--upload-pack="git.b upload-pack" parent child2 2>log &&
+   git.a -C child2 log -1 --format=%s >actual &&
+   git.b -C parent log -1 --format=%s >expect &&
+   test_cmp expect actual &&
+   ! grep "version 1" log
+'
+
+test_expect_success "file:// fetch with $VERSION_A and protocol v1" '
+   git.b -C parent commit --allow-empty -m two &&
+   git.b -C parent log -1 --format=%s >expect &&
+
+   GIT_TRACE_PACKET=1 git.a -C child2 -c protocol.version=1 fetch 
--upload-pack="git.b upload-pack" 2>log &&
+   git.a -C child2 log -1 --format=%s FETCH_HEAD >actual &&
+
+   test_cmp expect actual &&
+   ! grep "version 1" log
+'
+
+test_done
-- 
2.14.1.690.gbb1197296e-goog



[PATCH 7/8] http: tell server that the client understands v1

2017-09-13 Thread Brandon Williams
Tell a server that protocol v1 can be used by sending the http header
'Git-Protocol' indicating this.

Also teach the apache http server to pass through the 'Git-Protocol'
header as an environment variable 'GIT_PROTOCOL'.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 cache.h |  2 ++
 http.c  | 18 +
 t/lib-httpd/apache.conf |  7 +
 t/t5700-protocol-v1.sh  | 69 +
 4 files changed, 96 insertions(+)

diff --git a/cache.h b/cache.h
index 8839b1ed4..82a643968 100644
--- a/cache.h
+++ b/cache.h
@@ -450,6 +450,8 @@ static inline enum object_type object_type(unsigned int 
mode)
  * 'key[=value]'.  Presence of unknown keys must be tolerated.
  */
 #define GIT_PROTOCOL_ENVIRONMENT "GIT_PROTOCOL"
+/* HTTP header used to handshake the wire protocol */
+#define GIT_PROTOCOL_HEADER "Git-Protocol"
 
 /*
  * This environment variable is expected to contain a boolean indicating
diff --git a/http.c b/http.c
index 9e40a465f..ffb719216 100644
--- a/http.c
+++ b/http.c
@@ -12,6 +12,7 @@
 #include "gettext.h"
 #include "transport.h"
 #include "packfile.h"
+#include "protocol.h"
 
 static struct trace_key trace_curl = TRACE_KEY_INIT(CURL);
 #if LIBCURL_VERSION_NUM >= 0x070a08
@@ -897,6 +898,21 @@ static void set_from_env(const char **var, const char 
*envname)
*var = val;
 }
 
+static void protocol_http_header(void)
+{
+   if (get_protocol_version_config() > 0) {
+   struct strbuf protocol_header = STRBUF_INIT;
+
+   strbuf_addf(_header, GIT_PROTOCOL_HEADER ": 
version=%d",
+   get_protocol_version_config());
+
+
+   extra_http_headers = curl_slist_append(extra_http_headers,
+  protocol_header.buf);
+   strbuf_release(_header);
+   }
+}
+
 void http_init(struct remote *remote, const char *url, int proactive_auth)
 {
char *low_speed_limit;
@@ -927,6 +943,8 @@ void http_init(struct remote *remote, const char *url, int 
proactive_auth)
if (remote)
var_override(_proxy_authmethod, 
remote->http_proxy_authmethod);
 
+   protocol_http_header();
+
pragma_header = curl_slist_append(http_copy_default_headers(),
"Pragma: no-cache");
no_pragma_header = curl_slist_append(http_copy_default_headers(),
diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf
index 0642ae7e6..df1943631 100644
--- a/t/lib-httpd/apache.conf
+++ b/t/lib-httpd/apache.conf
@@ -67,6 +67,9 @@ LockFile accept.lock
 
LoadModule unixd_module modules/mod_unixd.so
 
+
+   LoadModule setenvif_module modules/mod_setenvif.so
+
 
 
 PassEnv GIT_VALGRIND
@@ -76,6 +79,10 @@ PassEnv ASAN_OPTIONS
 PassEnv GIT_TRACE
 PassEnv GIT_CONFIG_NOSYSTEM
 
+= 2.4>
+   SetEnvIf Git-Protocol ".*" GIT_PROTOCOL=$0
+
+
 Alias /dumb/ www/
 Alias /auth/dumb/ www/auth/dumb/
 
diff --git a/t/t5700-protocol-v1.sh b/t/t5700-protocol-v1.sh
index 1988bbce6..65127 100755
--- a/t/t5700-protocol-v1.sh
+++ b/t/t5700-protocol-v1.sh
@@ -220,4 +220,73 @@ test_expect_success 'push with ssh:// using protocol v1' '
grep "push< version 1" log
 '
 
+# Test protocol v1 with 'http://' transport
+#
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create repo to be served by http:// transport' '
+   git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+   git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack 
true &&
+   test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one
+'
+
+test_expect_success 'clone with http:// using protocol v1' '
+   GIT_TRACE_PACKET=1 GIT_TRACE_CURL=1 git -c protocol.version=1 \
+   clone "$HTTPD_URL/smart/http_parent" http_child 2>log &&
+
+   git -C http_child log -1 --format=%s >actual &&
+   git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" log -1 --format=%s 
>expect &&
+   test_cmp expect actual &&
+
+   # Client requested to use protocol v1
+   grep "Git-Protocol: version=1" log &&
+   # Server responded using protocol v1
+   grep "git< version 1" log
+'
+
+test_expect_success 'fetch with http:// using protocol v1' '
+   test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" two &&
+
+   GIT_TRACE_PACKET=1 git -C http_child -c protocol.version=1 \
+   fetch 2>log &&
+
+   git -C http_child log -1 --format=%s FETCH_HEAD >actual &&
+   git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" log -1 --format=%s 
>expect &&
+   test_cmp expect actual &&
+
+   # Server responded using prot

[PATCH 6/8] connect: tell server that the client understands v1

2017-09-13 Thread Brandon Williams
Teach the connection logic to tell a serve that it understands protocol
v1.  This is done in 2 different ways for the built in protocols.

1. git://
   A normal request is structured as "command path/to/repo\0host=..\0"
   and due to a bug in an old version of git-daemon 73bb33a94 (daemon:
   Strictly parse the "extra arg" part of the command, 2009-06-04) we
   aren't able to place any extra args (separated by NULs) besides the
   host.  In order to get around this limitation put protocol version
   information after a second NUL byte so the request is structured
   like: "command path/to/repo\0host=..\0\0version=1\0".  git-daemon can
   then parse out the version number and set GIT_PROTOCOL.

2. ssh://, file://
   Set GIT_PROTOCOL envvar with the desired protocol version.  The
   envvar can be sent across ssh by using '-o SendEnv=GIT_PROTOCOL' and
   having the server whitelist this envvar.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 connect.c  |  37 ++--
 t/t5700-protocol-v1.sh | 223 +
 2 files changed, 255 insertions(+), 5 deletions(-)
 create mode 100755 t/t5700-protocol-v1.sh

diff --git a/connect.c b/connect.c
index 2702e1f2e..40b388acd 100644
--- a/connect.c
+++ b/connect.c
@@ -815,6 +815,7 @@ struct child_process *git_connect(int fd[2], const char 
*url,
printf("Diag: path=%s\n", path ? path : "NULL");
conn = NULL;
} else if (protocol == PROTO_GIT) {
+   struct strbuf request = STRBUF_INIT;
/*
 * Set up virtual host information based on where we will
 * connect, unless the user has overridden us in
@@ -842,12 +843,24 @@ struct child_process *git_connect(int fd[2], const char 
*url,
 * Note: Do not add any other headers here!  Doing so
 * will cause older git-daemon servers to crash.
 */
-   packet_write_fmt(fd[1],
-"%s %s%chost=%s%c",
-prog, path, 0,
-target_host, 0);
+   strbuf_addf(,
+   "%s %s%chost=%s%c",
+   prog, path, 0,
+   target_host, 0);
+
+   /* If using a new version put that stuff here after a second 
null byte */
+   if (get_protocol_version_config() > 0) {
+   strbuf_addch(, '\0');
+   strbuf_addf(, "version=%d%c",
+   get_protocol_version_config(), '\0');
+   }
+
+   packet_write(fd[1], request.buf, request.len);
+
free(target_host);
+   strbuf_release();
} else {
+   const char *const *var;
conn = xmalloc(sizeof(*conn));
child_process_init(conn);
 
@@ -859,7 +872,9 @@ struct child_process *git_connect(int fd[2], const char 
*url,
sq_quote_buf(, path);
 
/* remove repo-local variables from the environment */
-   conn->env = local_repo_env;
+   for (var = local_repo_env; *var; var++)
+   argv_array_push(>env_array, *var);
+
conn->use_shell = 1;
conn->in = conn->out = -1;
if (protocol == PROTO_SSH) {
@@ -912,6 +927,14 @@ struct child_process *git_connect(int fd[2], const char 
*url,
}
 
argv_array_push(>args, ssh);
+
+   if (get_protocol_version_config() > 0) {
+   argv_array_push(>args, "-o");
+   argv_array_push(>args, "SendEnv=" 
GIT_PROTOCOL_ENVIRONMENT);
+   argv_array_pushf(>env_array, 
GIT_PROTOCOL_ENVIRONMENT "=version=%d",
+get_protocol_version_config());
+   }
+
if (flags & CONNECT_IPV4)
argv_array_push(>args, "-4");
else if (flags & CONNECT_IPV6)
@@ -926,6 +949,10 @@ struct child_process *git_connect(int fd[2], const char 
*url,
argv_array_push(>args, ssh_host);
} else {
transport_check_allowed("file");
+   if (get_protocol_version_config() > 0) {
+   argv_array_pushf(>env_array, 
GIT_PROTOCOL_ENVIRONMENT "=version=%d",
+get_protocol_version_config());
+   }
}
argv_array_push(>args, cmd.buf);
 
diff --git a/t/t5700-protocol-v1.sh b/t/t5700-protocol-v1.s

[PATCH 1/8] pkt-line: add packet_write function

2017-09-13 Thread Brandon Williams
Add a function which can be used to write the contents of an arbitrary
buffer.  This makes it easy to build up data in a buffer before writing
the packet instead of formatting the entire contents of the packet using
'packet_write_fmt()'.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 pkt-line.c | 6 ++
 pkt-line.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/pkt-line.c b/pkt-line.c
index 7db911957..cf98f371b 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -188,6 +188,12 @@ static int packet_write_gently(const int fd_out, const 
char *buf, size_t size)
return error("packet write failed");
 }
 
+void packet_write(const int fd_out, const char *buf, size_t size)
+{
+   if (packet_write_gently(fd_out, buf, size))
+   die_errno("packet write failed");
+}
+
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
 {
va_list args;
diff --git a/pkt-line.h b/pkt-line.h
index 66ef610fc..d9e9783b1 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -22,6 +22,7 @@
 void packet_flush(int fd);
 void packet_write_fmt(int fd, const char *fmt, ...) __attribute__((format 
(printf, 2, 3)));
 void packet_buf_flush(struct strbuf *buf);
+void packet_write(const int fd_out, const char *buf, size_t size);
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...) 
__attribute__((format (printf, 2, 3)));
 int packet_flush_gently(int fd);
 int packet_write_fmt_gently(int fd, const char *fmt, ...) 
__attribute__((format (printf, 2, 3)));
-- 
2.14.1.690.gbb1197296e-goog



[PATCH 5/8] connect: teach client to recognize v1 server response

2017-09-13 Thread Brandon Williams
Teach a client to recognize that a server understands protocol v1 by
looking at the first pkt-line the server sends in response.  This is
done by looking for the response "version 1" send by upload-pack or
receive-pack.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 connect.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/connect.c b/connect.c
index 49b28b83b..2702e1f2e 100644
--- a/connect.c
+++ b/connect.c
@@ -11,6 +11,7 @@
 #include "string-list.h"
 #include "sha1-array.h"
 #include "transport.h"
+#include "protocol.h"
 
 static char *server_capabilities;
 static const char *parse_feature_value(const char *, const char *, int *);
@@ -142,6 +143,27 @@ struct ref **get_remote_heads(int in, char *src_buf, 
size_t src_len,
if (len < 0)
die_initial_contact(saw_response);
 
+   /* Only check for version information on first response */
+   if (!saw_response) {
+   switch (determine_protocol_version_client(buffer)) {
+   case protocol_v1:
+   /*
+* First pkt-line contained the version string.
+* Continue on to process the ref advertisement.
+*/
+   continue;
+   case protocol_v0:
+   /*
+* Server is speaking protocol v0 and sent a
+* ref so we need to process it.
+*/
+   break;
+   default:
+   die("server is speaking an unknown protocol");
+   break;
+   }
+   }
+
if (!len)
break;
 
-- 
2.14.1.690.gbb1197296e-goog



[PATCH 4/8] upload-pack, receive-pack: introduce protocol version 1

2017-09-13 Thread Brandon Williams
Teach upload-pack and receive-pack to understand and respond using
protocol version 1, if requested.

Protocol version 1 is simply the original and current protocol (what I'm
calling version 0) with the addition of a single packet line, which
precedes the ref advertisement, indicating the protocol version being
spoken.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/receive-pack.c | 14 ++
 upload-pack.c  | 17 -
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 52c63ebfd..aebe77cc3 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -24,6 +24,7 @@
 #include "tmp-objdir.h"
 #include "oidset.h"
 #include "packfile.h"
+#include "protocol.h"
 
 static const char * const receive_pack_usage[] = {
N_("git receive-pack "),
@@ -1963,6 +1964,19 @@ int cmd_receive_pack(int argc, const char **argv, const 
char *prefix)
else if (0 <= receive_unpack_limit)
unpack_limit = receive_unpack_limit;
 
+   switch (determine_protocol_version_server()) {
+   case protocol_v1:
+   if (advertise_refs || !stateless_rpc)
+   packet_write_fmt(1, "version 1\n");
+   /*
+* v1 is just the original protocol with a version string,
+* so just fall through after writing the version string.
+*/
+   case protocol_v0:
+   default:
+   break;
+   }
+
if (advertise_refs || !stateless_rpc) {
write_head_info();
}
diff --git a/upload-pack.c b/upload-pack.c
index 7efff2fbf..5cab39819 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -18,6 +18,7 @@
 #include "parse-options.h"
 #include "argv-array.h"
 #include "prio-queue.h"
+#include "protocol.h"
 
 static const char * const upload_pack_usage[] = {
N_("git upload-pack [] "),
@@ -1067,6 +1068,20 @@ int cmd_main(int argc, const char **argv)
die("'%s' does not appear to be a git repository", dir);
 
git_config(upload_pack_config, NULL);
-   upload_pack();
+
+   switch (determine_protocol_version_server()) {
+   case protocol_v1:
+   if (advertise_refs || !stateless_rpc)
+   packet_write_fmt(1, "version 1\n");
+   /*
+* v1 is just the original protocol with a version string,
+* so just fall through after writing the version string.
+*/
+   case protocol_v0:
+   default:
+   upload_pack();
+   break;
+   }
+
return 0;
 }
-- 
2.14.1.690.gbb1197296e-goog



[PATCH 0/8] protocol transition

2017-09-13 Thread Brandon Williams
Here is the non-RFC version of of my proposed protocol transition plan which
can be found here:
https://public-inbox.org/git/20170824225328.8174-1-bmw...@google.com/

The main take away from the comments on the RFC were that the first transition
shouldn't be drastic, so this patch set introduces protocol v1 which is simply
protocol v0 (which is what I'm calling the current git wire protocol) with a
single pkt-line containing a version string before the ref advertisement.

I have included tests for protocol version 1 to verify that it works with the
following transports: git://, file://, ssh://, and http://.  I have also
included an interop test to ensure that sending the version request out of band
doesn't cause issues with older servers.

Any and all comments and feedback are welcome, thanks!

Brandon Williams (8):
  pkt-line: add packet_write function
  protocol: introduce protocol extention mechanisms
  daemon: recognize hidden request arguments
  upload-pack, receive-pack: introduce protocol version 1
  connect: teach client to recognize v1 server response
  connect: tell server that the client understands v1
  http: tell server that the client understands v1
  i5700: add interop test for protocol transition

 Documentation/config.txt   |  16 ++
 Documentation/git.txt  |   5 +
 Makefile   |   1 +
 builtin/receive-pack.c |  14 ++
 cache.h|   9 +
 connect.c  |  59 ++-
 daemon.c   |  71 ++--
 http.c |  18 ++
 pkt-line.c |   6 +
 pkt-line.h |   1 +
 protocol.c |  72 
 protocol.h |  15 ++
 t/interop/i5700-protocol-transition.sh |  68 
 t/lib-httpd/apache.conf|   7 +
 t/t5700-protocol-v1.sh | 292 +
 upload-pack.c  |  17 +-
 16 files changed, 655 insertions(+), 16 deletions(-)
 create mode 100644 protocol.c
 create mode 100644 protocol.h
 create mode 100755 t/interop/i5700-protocol-transition.sh
 create mode 100755 t/t5700-protocol-v1.sh

-- 
2.14.1.690.gbb1197296e-goog



[PATCH 3/8] daemon: recognize hidden request arguments

2017-09-13 Thread Brandon Williams
A normal request to git-daemon is structured as
"command path/to/repo\0host=..\0" and due to a bug in an old version of
git-daemon 73bb33a94 (daemon: Strictly parse the "extra arg" part of the
command, 2009-06-04) we aren't able to place any extra args (separated
by NULs) besides the host.

In order to get around this limitation teach git-daemon to recognize
additional request arguments hidden behind a second NUL byte.  Requests
can then be structured like:
"command path/to/repo\0host=..\0\0version=1\0key=value\0".  git-daemon
can then parse out the extra arguments and set 'GIT_PROTOCOL'
accordingly.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 daemon.c | 71 +++-
 1 file changed, 61 insertions(+), 10 deletions(-)

diff --git a/daemon.c b/daemon.c
index 30747075f..250dbf82c 100644
--- a/daemon.c
+++ b/daemon.c
@@ -282,7 +282,7 @@ static const char *path_ok(const char *directory, struct 
hostinfo *hi)
return NULL;/* Fallthrough. Deny by default */
 }
 
-typedef int (*daemon_service_fn)(void);
+typedef int (*daemon_service_fn)(const struct argv_array *env);
 struct daemon_service {
const char *name;
const char *config_name;
@@ -363,7 +363,7 @@ static int run_access_hook(struct daemon_service *service, 
const char *dir,
 }
 
 static int run_service(const char *dir, struct daemon_service *service,
-  struct hostinfo *hi)
+  struct hostinfo *hi, const struct argv_array *env)
 {
const char *path;
int enabled = service->enabled;
@@ -422,7 +422,7 @@ static int run_service(const char *dir, struct 
daemon_service *service,
 */
signal(SIGTERM, SIG_IGN);
 
-   return service->fn();
+   return service->fn(env);
 }
 
 static void copy_to_log(int fd)
@@ -462,25 +462,34 @@ static int run_service_command(struct child_process *cld)
return finish_command(cld);
 }
 
-static int upload_pack(void)
+static int upload_pack(const struct argv_array *env)
 {
struct child_process cld = CHILD_PROCESS_INIT;
argv_array_pushl(, "upload-pack", "--strict", NULL);
argv_array_pushf(, "--timeout=%u", timeout);
+
+   argv_array_pushv(_array, env->argv);
+
return run_service_command();
 }
 
-static int upload_archive(void)
+static int upload_archive(const struct argv_array *env)
 {
struct child_process cld = CHILD_PROCESS_INIT;
argv_array_push(, "upload-archive");
+
+   argv_array_pushv(_array, env->argv);
+
return run_service_command();
 }
 
-static int receive_pack(void)
+static int receive_pack(const struct argv_array *env)
 {
struct child_process cld = CHILD_PROCESS_INIT;
argv_array_push(, "receive-pack");
+
+   argv_array_pushv(_array, env->argv);
+
return run_service_command();
 }
 
@@ -574,7 +583,7 @@ static void canonicalize_client(struct strbuf *out, const 
char *in)
 /*
  * Read the host as supplied by the client connection.
  */
-static void parse_host_arg(struct hostinfo *hi, char *extra_args, int buflen)
+static char *parse_host_arg(struct hostinfo *hi, char *extra_args, int buflen)
 {
char *val;
int vallen;
@@ -602,6 +611,39 @@ static void parse_host_arg(struct hostinfo *hi, char 
*extra_args, int buflen)
if (extra_args < end && *extra_args)
die("Invalid request");
}
+
+   return extra_args;
+}
+
+static void parse_extra_args(struct argv_array *env, const char *extra_args,
+int buflen)
+{
+   const char *end = extra_args + buflen;
+   struct strbuf git_protocol = STRBUF_INIT;
+
+   for (; extra_args < end; extra_args += strlen(extra_args) + 1) {
+   const char *arg = extra_args;
+
+   /*
+* Parse the extra arguments, adding most to 'git_protocol'
+* which will be used to set the 'GIT_PROTOCOL' envvar in the
+* service that will be run.
+*
+* If there ends up being a particular arg in the future that
+* git-daemon needs to parse specificly (like the 'host' arg)
+* then it can be parsed here and not added to 'git_protocol'.
+*/
+   if (*arg) {
+   if (git_protocol.len > 0)
+   strbuf_addch(_protocol, ':');
+   strbuf_addstr(_protocol, arg);
+   }
+   }
+
+   if (git_protocol.len > 0)
+   argv_array_pushf(env, GIT_PROTOCOL_ENVIRONMENT "=%s",
+git_protocol.buf);
+   strbuf_release(_protocol);
 }
 
 /*
@@ -695,6 +737,7 @@ static int execute(void)
int pktlen, len, i;
char *addr = getenv(&qu

[PATCH 2/8] protocol: introduce protocol extention mechanisms

2017-09-13 Thread Brandon Williams
Create protocol.{c,h} and provide functions which future servers and
clients can use to determine which protocol to use or is being used.

Also introduce the 'GIT_PROTOCOL' environment variable which will be
used to communicate a colon separated list of keys with optional values
to a server.  Unknown keys and values must be tolerated.  This mechanism
is used to communicate which version of the wire protocol a client would
like to use with a server.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 Documentation/config.txt | 16 +++
 Documentation/git.txt|  5 
 Makefile |  1 +
 cache.h  |  7 +
 protocol.c   | 72 
 protocol.h   | 15 ++
 6 files changed, 116 insertions(+)
 create mode 100644 protocol.c
 create mode 100644 protocol.h

diff --git a/Documentation/config.txt b/Documentation/config.txt
index dc4e3f58a..d5b28a32c 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2517,6 +2517,22 @@ The protocol names currently used by git are:
 `hg` to allow the `git-remote-hg` helper)
 --
 
+protocol.version::
+   If set, clients will attempt to communicate with a server using
+   the specified protocol version.  If unset, no attempt will be
+   made by the client to communicate using a particular protocol
+   version, this results in protocol version 0 being used.
+   Supported versions:
++
+--
+
+* `0` - the original wire protocol.
+
+* `1` - the original wire protocol with the addition of a version string
+  in the initial respose from the server.
+
+--
+
 pull.ff::
By default, Git does not create an extra merge commit when merging
a commit that is a descendant of the current commit. Instead, the
diff --git a/Documentation/git.txt b/Documentation/git.txt
index 6e3a6767e..299f75c7b 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -697,6 +697,11 @@ of clones and fetches.
which feed potentially-untrusted URLS to git commands.  See
linkgit:git-config[1] for more details.
 
+`GIT_PROTOCOL`::
+   For internal use only.  Used in handshaking the wire protocol.
+   Contains a colon ':' separated list of keys with optional values
+   'key[=value]'.  Presence of unknown keys must be tolerated.
+
 Discussion[[Discussion]]
 
 
diff --git a/Makefile b/Makefile
index f2bb7f2f6..1f300bd6b 100644
--- a/Makefile
+++ b/Makefile
@@ -837,6 +837,7 @@ LIB_OBJS += pretty.o
 LIB_OBJS += prio-queue.o
 LIB_OBJS += progress.o
 LIB_OBJS += prompt.o
+LIB_OBJS += protocol.o
 LIB_OBJS += quote.o
 LIB_OBJS += reachable.o
 LIB_OBJS += read-cache.o
diff --git a/cache.h b/cache.h
index a916bc79e..8839b1ed4 100644
--- a/cache.h
+++ b/cache.h
@@ -444,6 +444,13 @@ static inline enum object_type object_type(unsigned int 
mode)
 #define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS"
 #define GIT_QUARANTINE_ENVIRONMENT "GIT_QUARANTINE_PATH"
 
+/*
+ * Environment variable used in handshaking the wire protocol.
+ * Contains a colon ':' separated list of keys with optional values
+ * 'key[=value]'.  Presence of unknown keys must be tolerated.
+ */
+#define GIT_PROTOCOL_ENVIRONMENT "GIT_PROTOCOL"
+
 /*
  * This environment variable is expected to contain a boolean indicating
  * whether we should or should not treat:
diff --git a/protocol.c b/protocol.c
new file mode 100644
index 0..1b16c7b9a
--- /dev/null
+++ b/protocol.c
@@ -0,0 +1,72 @@
+#include "cache.h"
+#include "config.h"
+#include "protocol.h"
+
+enum protocol_version parse_protocol_version(const char *value)
+{
+   if (!strcmp(value, "0"))
+   return protocol_v0;
+   else if (!strcmp(value, "1"))
+   return protocol_v1;
+   else
+   return protocol_unknown_version;
+}
+
+enum protocol_version get_protocol_version_config(void)
+{
+   const char *value;
+   if (!git_config_get_string_const("protocol.version", )) {
+   enum protocol_version version = parse_protocol_version(value);
+
+   if (version == protocol_unknown_version)
+   die("unknown value for config 'protocol.version': %s",
+   value);
+
+   return version;
+   }
+
+   return protocol_v0;
+}
+
+enum protocol_version determine_protocol_version_server(void)
+{
+   const char *git_protocol = getenv(GIT_PROTOCOL_ENVIRONMENT);
+   enum protocol_version version = protocol_v0;
+
+   if (git_protocol) {
+   struct string_list list = STRING_LIST_INIT_DUP;
+   const struct string_list_item *item;
+   string_list_split(, git_protocol, ':', -1);
+
+   for_each_string_list_item(item, ) {
+   const char *value;
+  

Re: RFC v3: Another proposed hash function transition plan

2017-09-11 Thread Brandon Williams
On 09/08, Junio C Hamano wrote:
> Junio C Hamano <gits...@pobox.com> writes:
> 
> > One thing I still do not know how I feel about after re-reading the
> > thread, and I didn't find the above doc, is Linus's suggestion to
> > use the objects themselves as NewHash-to-SHA-1 mapper [*1*].  
> > ...
> > [Reference]
> >
> > *1* <CA+55aFxj7Vtwac64RfAz_u=u4tob4xg+2pdbdfnpjdmgatc...@mail.gmail.com>
> 
> I think this falls into the same category as the often-talked-about
> addition of the "generation number" field.  It is very tempting to
> add these "mechanically derivable but expensive to compute" pieces
> of information to the sha3-content while converting from
> sha1-content and creating anew.  

We didn't discuss that in the doc since this particular transition plan
we made uses an external NewHash-to-SHA1 map instead of an internal one
because we believe that at some point we would be able to drop
compatibility with SHA1.  Now I suspect that wont happen for a long time
but I think it would be preferable over carrying the SHA1 luggage
indefinitely.  At some point, then, we would be able to stop hashing
objects twice (once with SHA1 and once with NewHash) instead of always
requiring that we hash them with each hash function which was used
historically.

> 
> Because the "sha1-name" or the "generation number" can mechanically
> be computed, as long as everybody agrees to _always_ place them in
> the sha3-content, the same sha1-content will be converted into
> exactly the same sha3-content without ambiguity, and converting them
> back to sha1-content while pushing to an older repository will
> correctly produce the original sha1-content, as it would just be the
> matter of simply stripping these extra pieces of information.
> 
> The reason why I still feel a bit uneasy about adding these things
> (aside from the fact that sha1-name thing will be a baggage we would
> need to carry forever even after we completely wean ourselves off of
> the old hash) is because I am not sure what we should do when we
> encounter sha3-content in the wild that has these things _wrong_.
> An object that exists today in the SHA-1 world is fetched into the
> new repository and converted to SHA-3 contents, and Linus's extra
> "original SHA-1 name" field is added to the object's header while
> recording the SHA-3 content.  But for whatever reason, the original
> SHA-1 name is recorded incorrectly in the resulting SHA-3 object.

This wasn't one of the issues that I thought of but it just makes the
argument against adding sha1's to the sha3 content stronger.

> 
> The same thing could happen if we decide to bake "generation number"
> in the SHA-3 commit objects.  One possible definition would be that
> a root commit will have gen #0; a commit with 1 or more parents will
> get max(parents' gen numbers) + 1 as its gen number.  But somebody
> may botch the counting and records sum(parents' gen numbers) as its
> gen number.
> 
> In these cases, not just the SHA3-content but also the resulting
> SHA-3 object name would be different from the name of the object
> that would have recorded the same contents correctly.  So converting
> back to SHA-1 world from these botched SHA-3 contents may produce
> the original contents, but we may end up with multiple "plausibly
> looking" set of SHA-3 objects that (clain to) correspond to a single
> SHA-1 object, only one of which is a valid one.
> 
> Our "git fsck" already treats certain brokenness (like a tree whose
> entry has mode that is 0-padded to the left) as broken but still
> tolerate them.  I am not sure if it is sufficient to diagnose and
> declare broken and invalid when we see sha3-content that records
> these "mechanically derivable but expensive to compute" pieces of
> information incorrectly.
> 
> I am leaning towards saying "yes, catching in fsck is enough" and
> suggesting to add generation number to sha3-content of the commit
> objects, and to add even the "original sha1 name" thing if we find
> good use of it.  But I cannot shake this nagging feeling off that I
> am missing some huge problems that adding these fields and opening
> ourselves to more classes of broken objects.
> 
> Thoughts?
> 
> 

-- 
Brandon Williams


Re: [PATCH 09/10] set_git_dir: handle feeding gitdir to itself

2017-09-07 Thread Brandon Williams
On 09/05, Jeff King wrote:
> Ideally we'd free the existing gitdir field before assigning
> the new one, to avoid a memory leak. But we can't do so
> safely because some callers do the equivalent of:
> 
>   set_git_dir(get_git_dir());
> 
> We can detect that case as a noop, but there are even more
> complicated cases like:
> 
>   set_git_dir(remove_leading_path(worktree, get_git_dir());
> 
> where we really do need to do some work, but the original
> string must remain valid.
> 
> Rather than put the burden on callers to make a copy of the
> string (only to free it later, since we'll make a copy of it
> ourselves), let's solve the problem inside set_git_dir(). We
> can make a copy of the pointer for the old gitdir, and then
> avoid freeing it until after we've made our new copy.
> 

This patch and the one before it look good to me.  Thanks for fixing
this issue!

> Signed-off-by: Jeff King <p...@peff.net>
> ---
>  repository.c | 10 +++---
>  setup.c  |  5 -
>  2 files changed, 3 insertions(+), 12 deletions(-)
> 
> diff --git a/repository.c b/repository.c
> index 52f1821c6b..97c732bd48 100644
> --- a/repository.c
> +++ b/repository.c
> @@ -56,16 +56,12 @@ static void repo_setup_env(struct repository *repo)
>  void repo_set_gitdir(struct repository *repo, const char *path)
>  {
>   const char *gitfile = read_gitfile(path);
> + char *old_gitdir = repo->gitdir;
>  
> - /*
> -  * NEEDSWORK: Eventually we want to be able to free gitdir and the rest
> -  * of the environment before reinitializing it again, but we have some
> -  * crazy code paths where we try to set gitdir with the current gitdir
> -  * and we don't want to free gitdir before copying the passed in value.
> -  */
>   repo->gitdir = xstrdup(gitfile ? gitfile : path);
> -
>   repo_setup_env(repo);
> +
> + free(old_gitdir);
>  }
>  
>  /*
> diff --git a/setup.c b/setup.c
> index 23950173fc..6d8380acd2 100644
> --- a/setup.c
> +++ b/setup.c
> @@ -399,11 +399,6 @@ void setup_work_tree(void)
>   if (getenv(GIT_WORK_TREE_ENVIRONMENT))
>   setenv(GIT_WORK_TREE_ENVIRONMENT, ".", 1);
>  
> - /*
> -  * NEEDSWORK: this call can essentially be set_git_dir(get_git_dir())
> -  * which can cause some problems when trying to free the old value of
> -  * gitdir.
> -  */
>   set_git_dir(remove_leading_path(git_dir, work_tree));
>   initialized = 1;
>  }
> -- 
> 2.14.1.721.gc5bc1565f1
> 

-- 
Brandon Williams


Re: [RFC 7/7] upload-pack: ack version 2

2017-09-01 Thread Brandon Williams
On 09/01, Bryan Turner wrote:
> On Thu, Aug 24, 2017 at 3:53 PM, Brandon Williams <bmw...@google.com> wrote:
> > +
> > +   version = getenv("GIT_PROTOCOL");
> > +   if (!strcmp(version, "2"))
> > +   upload_pack_v2();
> > +
> 
> I think the "if" check here needs some type of NULL check for
> "version" before calling "strcmp". Without that, if the "GIT_PROTOCOL"
> environment variable isn't set, git-upload-pack SEGVs.
> 
> This came up when I was testing the "protocol-v2" branch with
> Bitbucket Server. For performance reasons, we sometimes run ref
> advertisements as a separate process, when serving fetches, to allow
> throttling the ref advertisement separately from actually generating
> and sending a packfile. Since CI servers continuously poll for
> changes, but usually don't find any, we want to be able to serve ref
> advertisements, which have minimal overhead, with much higher
> parallelism than serving packs. To do that, we run "git-upload-pack
> --advertize-refs" directly, and that command has never *required*
> "GIT_PROTOCOL" before this change.

Thanks for pointing this out.  Since this was an RFC I wasn't being
careful with doing these sorts of checks :).  I'm currently working on
the non-RFC version of this series and it is getting close to being in a
state where I can send it out for more careful review.

-- 
Brandon Williams


Re: [PATCH 00/39] per-repository object store, part 1

2017-08-30 Thread Brandon Williams
ent to reprepare_packed_git
>   pack: add repository argument to sha1_file_name
>   pack: add repository argument to map_sha1_file
>   pack: allow install_packed_git to handle arbitrary repositories
>   pack: allow rearrange_packed_git to handle arbitrary repositories
>   pack: allow prepare_packed_git_mru to handle arbitrary repositories
>   pack: allow prepare_packed_git_one to handle arbitrary repositories
>   pack: allow prepare_packed_git to handle arbitrary repositories
>   pack: allow reprepare_packed_git to handle arbitrary repositories
>   pack: allow sha1_file_name to handle arbitrary repositories
>   pack: allow stat_sha1_file to handle arbitrary repositories
>   pack: allow open_sha1_file to handle arbitrary repositories
>   pack: allow map_sha1_file_1 to handle arbitrary repositories
>   pack: allow map_sha1_file to handle arbitrary repositories
>   pack: allow sha1_loose_object_info to handle arbitrary repositories
> 
> Stefan Beller (15):
>   repository: introduce object store field
>   object-store: move alt_odb_list and alt_odb_tail to object store
> struct
>   sha1_file: add repository argument to alt_odb_usable
>   sha1_file: add repository argument to link_alt_odb_entry
>   sha1_file: add repository argument to read_info_alternates
>   sha1_file: add repository argument to link_alt_odb_entries
>   sha1_file: add repository argument to stat_sha1_file
>   sha1_file: add repository argument to open_sha1_file
>   sha1_file: add repository argument to map_sha1_file_1
>   sha1_file: add repository argument to sha1_loose_object_info
>   object-store: add repository argument to prepare_alt_odb
>   object-store: add repository argument to foreach_alt_odb
>   sha1_file: allow alt_odb_usable to handle arbitrary repositories
>   object-store: allow prepare_alt_odb to handle arbitrary repositories
>   object-store: allow foreach_alt_odb to handle arbitrary repositories
> 
>  builtin/count-objects.c |  10 ++-
>  builtin/fsck.c  |  15 ++--
>  builtin/gc.c|   8 +-
>  builtin/index-pack.c|   1 +
>  builtin/pack-objects.c  |  23 +++--
>  builtin/pack-redundant.c|   8 +-
>  builtin/receive-pack.c  |   4 +-
>  builtin/submodule--helper.c |   4 +-
>  bulk-checkin.c  |   3 +-
>  cache.h |  50 ++-
>  contrib/coccinelle/packed_git.cocci |  15 
>  fast-import.c   |  10 ++-
>  fetch-pack.c|   3 +-
>  http-backend.c  |   8 +-
>  http-push.c |   1 +
>  http-walker.c   |   4 +-
>  http.c  |   9 +-
>  mru.h   |   1 +
>  object-store.h  |  71 
>  pack-bitmap.c   |   6 +-
>  pack-check.c|   1 +
>  pack-revindex.c |   1 +
>  packfile.c  |  94 ++--
>  packfile.h  |   6 +-
>  reachable.c |   1 +
>  repository.c|   4 +-
>  repository.h|   7 ++
>  server-info.c       |   8 +-
>  sha1_file.c | 165 
> 
>  sha1_name.c |  11 ++-
>  streaming.c |   5 +-
>  transport.c |   4 +-
>  32 files changed, 344 insertions(+), 217 deletions(-)
>  create mode 100644 contrib/coccinelle/packed_git.cocci
>  create mode 100644 object-store.h
> 
> -- 
> 2.14.1.581.gf28d330327
> 

-- 
Brandon Williams


Re: [PATCH 07/39] sha1_file: add repository argument to alt_odb_usable

2017-08-30 Thread Brandon Williams
On 08/29, Jonathan Nieder wrote:
> From: Stefan Beller <sbel...@google.com>
> 
> Add a repository argument to allow the alt_odb_usable caller to be
> more specific about which repository to act on. This is a small
> mechanical change; it doesn't change the implementation to handle
> repositories other than the_repository yet.
> 
> Since the implementation does not yet work with other repositories,
> use a wrapper macro to enforce that the caller passes in
> the_repository as the first argument. It would be more appealing to
> use BUILD_ASSERT_OR_ZERO to enforce this, but that doesn't work
> because it requires a compile-time constant and common compilers like
> gcc 4.8.4 do not consider "r == the_repository" a compile-time
> constant.

Very clever trick :)

> 
> Signed-off-by: Stefan Beller <sbel...@google.com>
> Signed-off-by: Jonathan Nieder <jrnie...@gmail.com>
> ---
>  sha1_file.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/sha1_file.c b/sha1_file.c
> index 7c6ffd205a..1c757b44a3 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -280,7 +280,9 @@ static const char *alt_sha1_path(struct 
> alternate_object_database *alt,
>  /*
>   * Return non-zero iff the path is usable as an alternate object database.
>   */
> -static int alt_odb_usable(struct strbuf *path, const char *normalized_objdir)
> +#define alt_odb_usable(r, p, n) alt_odb_usable_##r(p, n)
> +static int alt_odb_usable_the_repository(struct strbuf *path,
> +  const char *normalized_objdir)
>  {
>   struct alternate_object_database *alt;
>  
> @@ -348,7 +350,7 @@ static int link_alt_odb_entry(const char *entry, const 
> char *relative_base,
>   while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
>   strbuf_setlen(, pathbuf.len - 1);
>  
> - if (!alt_odb_usable(, normalized_objdir)) {
> + if (!alt_odb_usable(the_repository, , normalized_objdir)) {
>   strbuf_release();
>   return -1;
>   }
> -- 
> 2.14.1.581.gf28d330327
> 

-- 
Brandon Williams


Re: [RFC 0/7] transitioning to protocol v2

2017-08-30 Thread Brandon Williams
On 08/30, Bryan Turner wrote:
> On Fri, Aug 25, 2017 at 10:29 AM, Jeff King <p...@peff.net> wrote:
> > On Thu, Aug 24, 2017 at 03:53:21PM -0700, Brandon Williams wrote:
> >
> >> The biggest question I'm trying to answer is if these are reasonable ways 
> >> with
> >> which to communicate a request to a server to use a newer protocol, without
> >> breaking current servers/clients.  As far as I've tested, with patches 1-5
> >> applied I can still communicate with current servers without causing any
> >> problems.
> >
> > Current git.git servers, I assume?. How much do we want to care about
> > alternate implementations? I would not be surprised if other git://
> > implementations are more picky about cruft after the virtual-host field
> > (though I double-checked GitHub's implementation at least, and it is
> > fine).
> >
> > I don't think libgit2 implements the server side. That leaves probably
> > JGit, Microsoft's VSTS (which I think is custom), and whatever Atlassian
> > and GitLab use.
> 
> Before I manually apply the patches to test how they work with
> Bitbucket Server, are they applied on a branch somewhere where I can
> just fetch them? If not, I'll apply them manually and verify.

I just pushed this set of patches up to: 
https://github.com/bmwill/git/tree/protocol-v2
so you should be able to fetch them from there (saves you from having to
manually applying the patches).

> Just based on the description, though, I expect no issues. We don't
> currently support the git:// protocol. Our HTTP handling passes
> headers through to the receive-pack and upload-pack processes as
> environment variables (with a little massaging), but doesn't consider
> them itself; it only considers the URL and "service" query parameter
> to decide what command to run and to detect "dumb" requests. Our SSH
> handling ignores any environment variables provided and does not
> forward them to the git process, similar to VSTS.
> 
> I'll confirm explicitly, to be certain, but just based on reading the
> overview and knowing our code I think the described approaches should
> work fine.

Perfect!  Thanks for taking the time to verify that this will work.

-- 
Brandon Williams


Re: [PATCH] config: use a static lock_file struct

2017-08-30 Thread Brandon Williams
On 08/30, Jeff King wrote:
> On Wed, Aug 30, 2017 at 12:57:31PM -0700, Brandon Williams wrote:
> 
> > > And I think we're fine there even with a doubly-linked list. It's still
> > > the single update of the "next" pointer that controls that second
> > > traversal.
> > 
> > I know it was mentioned earlier but if this is a critical section, and
> > it would be bad if it was interrupted, then couldn't we turn off
> > interrupts before attempting to remove an item from the list?
> 
> If we have to, that's an option. I mostly didn't want to get into the
> portability headaches of signal-handling if we can avoid it.
> 
> Right now sigchain uses plain old signal(). We do use sigaction() in a
> few places, but in a vanilla way. So the portability questions are:
> 
>   - can we rely on using more interesting bits of sigaction like
> sa_mask?  Probably.
> 
>   - which flags should we be setting in sa_flags to get the same
> behavior we've been getting with signal()? Or alternatively, which
> behavior do we want?
> 
> On Linux and BSD systems, at least, signal() defers repeat instances of
> the handled signal. So getting two quick SIGTERMs will not interrupt the
> handler to deliver the second. But getting SIGTERM followed by SIGINT would.
> 
> We could extend that protection by having sigchain_push_common() set
> sa_mask to cover all of the related signals. On Linux and BSD the
> current code using signal() also implies SA_RESTART. We could add that
> to our flags, though I suspect in practice it doesn't matter. Whenever
> we establish a handler like this our intent is to never return from it.
> 
> That just protects us from calling the _same_ handler from itself. But
> that's probably enough in practice, and means handlers don't have to
> worry about "critical sections". The other alternative is sigprocmask()
> to block signals entirely during a section. I'm not sure if there are
> portability questions there (it looks like we have a mingw wrapper
> there, but it's a complete noop).

Yeah there's a lot about signals that I'm not very clear on.  I do know
that Eric helped me out on the fork-exec series I worked on earlier in
the year and I believe it was to turn on/off signals during process
launches in 45afb1ca9 (run-command: block signals between fork and
execve, 2017-04-19).  Though that bit of code is strictly for unix so I
wouldn't know how that would work on windows machines.  Portability does
seem to always be a challenging problem.

-- 
Brandon Williams


Re: [PATCH] config: use a static lock_file struct

2017-08-30 Thread Brandon Williams
On 08/30, Jeff King wrote:
> On Wed, Aug 30, 2017 at 08:55:01AM +0200, Michael Haggerty wrote:
> 
> > > + tempfile->active = 0;
> > > + for (p = _list; *p; p = &(*p)->next) {
> > > + if (*p == tempfile) {
> > > + *p = tempfile->next;
> > > + break;
> > > + }
> > >   }
> > >  }
> > 
> > `deactivate_tempfile()` is O(N) in the number of active tempfiles. This
> > could get noticeable for, say, updating 30k references, which involves
> > obtaining 30k reference locks. I think that code adds the locks in
> > alphabetical order and also removes them in alphabetical order, so the
> > overall effort would go like O(N²). I'm guessing that this would be
> > measurable but not fatal for realistic numbers of references, but it
> > should at least be benchmarked.
> > 
> > There are three obvious ways to make this O(1) again:
> > 
> > * Make the list doubly-linked.
> 
> Yeah, I noticed this when writing it, and the doubly-linked list was my
> first thought. It's much more straight-forward than making guesses about
> traversal order, and we already have a nice implementation in list.h.

I agree that a doubly-linked list is probably the best way to go in
order to solve the performance issue.

> 
> What made me hesitate for this demonstration was that it turns the
> removal into _two_ pointer updates. That made me more nervous about the
> racy case where we get a single handler while already deleting
> something. Specifically when we have "a <-> b <-> c" and we've updated
> "a->next" to point to "c" but "c->prev" still points to "b".
> 
> But even with the singly-linked list we're not fully race-proof
> (somebody could set *p to NULL between the time we look at it and
> dereference it). What we really care about is not two versions of the
> function running simultaneously, but one version getting interrupted by
> another and having the second one see a sane state (because we'll never
> return to the first signal handler; the second one will raise() and
> die).
> 
> And I think we're fine there even with a doubly-linked list. It's still
> the single update of the "next" pointer that controls that second
> traversal.
> 
> -Peff

I know it was mentioned earlier but if this is a critical section, and
it would be bad if it was interrupted, then couldn't we turn off
interrupts before attempting to remove an item from the list?

-- 
Brandon Williams


Re: [RFC 0/7] transitioning to protocol v2

2017-08-30 Thread Brandon Williams
On 08/30, Jeff Hostetler wrote:
> 
> 
> On 8/29/2017 11:06 PM, Jeff King wrote:
> >On Tue, Aug 29, 2017 at 04:08:25PM -0400, Jeff Hostetler wrote:
> >
> >>I just wanted to jump in here and say I've done some initial
> >>testing of this against VSTS and so far it seems fine.  And yes,
> >>we have a custom git server.
> >
> >Great, thank you for checking.
> >
> >>VSTS doesn't support the "git://" protocol, so the double-null trick
> >>isn't an issue for us.  But "https://; worked just fine.  I'm still
> >>asking around internally whether we support passing SSH environment
> >>variables.
> >
> >The key thing for ssh is not whether you support passing environment
> >variables. It's whether you quietly ignore unknown variables rather than
> >cutting off the connection.
> >
> >To support the v2 protocol you'd need to pass the new variables, but
> >you'd also need to modify your server to actually do something useful
> >with them anyway. At this point we're mostly concerned with whether we
> >can safely pass the variables to current implementations unconditionally
> >and get a reasonable outcome.
> 
> Right.  I just spoke with our server folks and, currently, our SSH
> support quietly eats ALL variables.   So we're safe :-)
> 
> I'm starting a conversation with them to pass them thru so we can
> be ready for this.  (Assuming we choose to go this way.)

Perfect! Thanks again.

-- 
Brandon Williams


Re: [RFC 0/7] transitioning to protocol v2

2017-08-29 Thread Brandon Williams
On 08/29, Jeff Hostetler wrote:
> 
> 
> On 8/25/2017 1:35 PM, Jonathan Nieder wrote:
> >Hi,
> >
> >Jeff King wrote:
> >>On Thu, Aug 24, 2017 at 03:53:21PM -0700, Brandon Williams wrote:
> >
> >>>Another version of Git's wire protocol is a topic that has been discussed 
> >>>and
> >>>attempted by many in the community over the years.  The biggest challenge, 
> >>>as
> >>>far as I understand, has been coming up with a transition plan to using 
> >>>the new
> >>>server without breaking existing clients and servers.  As such this RFC is
> >>>really only concerned with solidifying a transition plan.  Once it has been
> >>>decided how we can transition to a new protocol we can get into decided 
> >>>what
> >>>this new protocol would look like (though it would obviously eliminate the 
> >>>ref
> >>>advertisement ;).
> >>
> >
> >>I don't think libgit2 implements the server side. That leaves probably
> >>JGit, Microsoft's VSTS (which I think is custom), and whatever Atlassian
> >>and GitLab use.
> >
> >I'd be happy if someone tests the patches against those. :)
> 
> I just wanted to jump in here and say I've done some initial
> testing of this against VSTS and so far it seems fine.  And yes,
> we have a custom git server.
> 
> VSTS doesn't support the "git://" protocol, so the double-null trick
> isn't an issue for us.  But "https://; worked just fine.  I'm still
> asking around internally whether we support passing SSH environment
> variables.
> 
> Jeff
> 

Thanks for checking on this, I really appreciate it.  Please let me know
if anything I haven't thought of becomes an issue.

I'm currently working on getting these patches into a more polished
state to be used (as discussed elsewhere on this thread) as a precursor
to an actual v2.

-- 
Brandon Williams


Re: [PATCH] config: use a static lock_file struct

2017-08-29 Thread Brandon Williams
On 08/29, Brandon Williams wrote:
> On 08/29, Jeff King wrote:
> > On Tue, Aug 29, 2017 at 02:53:41PM -0400, Jeff King wrote:
> > 
> > > It looks like the config code has a minor-ish leak. Patch to follow.
> > 
> > Here it is.
> > 
> > -- >8 --
> > Subject: [PATCH] config: use a static lock_file struct
> > 
> > When modifying git config, we xcalloc() a struct lock_file
> > but never free it. This is necessary because the tempfile
> > code (upon which the locking code is built) requires that
> > the resulting struct remain valid through the life of the
> > program. However, it also confuses leak-checkers like
> > valgrind because only the inner "struct tempfile" is still
> > reachable; no pointer to the outer lock_file is kept.
> 
> Is this just due to a limitation in the tempfile code?  Would it be
> possible to improve the tempfile code such that we don't need to require
> that a tempfile, once created, is required to exist for the remaining
> life of the program?
> 
> > 
> > Other code paths solve this by using a single static lock
> > struct. We can do the same here, because we know that we'll
> > only lock and modify one config file at a time (and
> > assertions within the lockfile code will ensure that this
> > remains the case).
> > 
> > That removes a real leak (when we fail to free the struct
> > after locking fails) as well as removes the valgrind false
> > positive. It also means that doing N sequential
> > config-writes will use a constant amount of memory, rather
> > than leaving stale lock_files for each.
> > 
> > Note that since "lock" is no longer a pointer, it can't be
> > NULL anymore. But that's OK. We used that feature only to
> > avoid calling rollback_lock_file() on an already-committed
> > lock. Since the lockfile code keeps its own "active" flag,
> > it's a noop to rollback an inactive lock, and we don't have
> > to worry about this ourselves.
> > 
> > Signed-off-by: Jeff King <p...@peff.net>
> > ---
> > In the long run we may want to drop the "tempfiles must remain forever"
> > rule. This is certainly not the first time it has caused confusion or
> > leaks. And I don't think it's a fundamental issue, just the way the code
> > is written. But in the interim, this fix is probably worth doing.

I didn't read far enough apparently :)  I took a look at this once and
found that the in order to do this we would just need to be careful in
modifying a list of tempfiles.

> > 
> >  config.c | 24 +++-
> >  1 file changed, 7 insertions(+), 17 deletions(-)
> > 
> > diff --git a/config.c b/config.c
> > index d0d8ce823a..1603f96e40 100644
> > --- a/config.c
> > +++ b/config.c
> > @@ -2450,7 +2450,7 @@ int git_config_set_multivar_in_file_gently(const char 
> > *config_filename,
> >  {
> > int fd = -1, in_fd = -1;
> > int ret;
> > -   struct lock_file *lock = NULL;
> > +   static struct lock_file lock;
> > char *filename_buf = NULL;
> > char *contents = NULL;
> > size_t contents_sz;
> > @@ -2469,8 +2469,7 @@ int git_config_set_multivar_in_file_gently(const char 
> > *config_filename,
> >  * The lock serves a purpose in addition to locking: the new
> >  * contents of .git/config will be written into it.
> >  */
> > -   lock = xcalloc(1, sizeof(struct lock_file));
> > -   fd = hold_lock_file_for_update(lock, config_filename, 0);
> > +   fd = hold_lock_file_for_update(, config_filename, 0);
> > if (fd < 0) {
> > error_errno("could not lock config file %s", config_filename);
> > free(store.key);
> > @@ -2583,8 +2582,8 @@ int git_config_set_multivar_in_file_gently(const char 
> > *config_filename,
> > close(in_fd);
> > in_fd = -1;
> >  
> > -   if (chmod(get_lock_file_path(lock), st.st_mode & 0) < 0) {
> > -   error_errno("chmod on %s failed", 
> > get_lock_file_path(lock));
> > +   if (chmod(get_lock_file_path(), st.st_mode & 0) < 0) {
> > +   error_errno("chmod on %s failed", 
> > get_lock_file_path());
> > ret = CONFIG_NO_WRITE;
> > goto out_free;
> > }
> > @@ -2639,28 +2638,19 @@ int git_config_set_multivar_in_file_gently(const 
> > char *config_filename,
> > contents = NULL;
> > }
> >  
> > -   if (commit_

Re: [PATCH] config: use a static lock_file struct

2017-08-29 Thread Brandon Williams
   /* Invalidate the config cache */
>   git_config_clear();
>  
>  out_free:
> - if (lock)
> - rollback_lock_file(lock);
> + rollback_lock_file();
>   free(filename_buf);
>   if (contents)
>   munmap(contents, contents_sz);
> @@ -2669,7 +2659,7 @@ int git_config_set_multivar_in_file_gently(const char 
> *config_filename,
>   return ret;
>  
>  write_err_out:
> - ret = write_error(get_lock_file_path(lock));
> + ret = write_error(get_lock_file_path());
>   goto out_free;
>  
>  }
> -- 
> 2.14.1.721.gc5bc1565f1
> 

-- 
Brandon Williams


Re: [PATCH 00/10] Implement transactions for the packed ref store

2017-08-29 Thread Brandon Williams
On 08/29, Michael Haggerty wrote:
> This should be the second-to-last patch series in the quest for
> mmapped packed-refs.
> 
> Before this patch series, there is still more coupling than necessary
> between `files_ref_store` and `packed_ref_store`:
> 
> * `files_ref_store` adds packed references (e.g., during `git clone`
>   and `git pack-refs`) by inserting them into the `packed_ref_store`'s
>   internal `ref_cache`, then calling `commit_packed_refs()`. But
>   `files_ref_store` shouldn't even *know* that `packed_ref_store` uses
>   a `ref_cache`, let alone muck about in it.
> 
> * `files_ref_store` deletes packed references by calling
>   `repack_without_refs()`.
> 
> Instead, implement reference transactions and `delete_refs()` for
> `packed_ref_store`, and change `files_ref_store` to use these standard
> methods rather than internal `packed_ref_store` methods.
> 
> This patch series finishes up the encapsulation of `packed_ref_store`.
> At the end of the series, the outside world only interacts with it via
> the refs API plus a couple of locking-related functions. That will
> make it easy for the next patch series to change the internal workings
> of `packed_ref_store` without affecting `files_ref_store`.
> 
> Along the way, we fix some longstanding problems with reference
> updates. Quoting from patch [08/10]:
> 
> First, the old code didn't obtain the `packed-refs` lock until
> `files_transaction_finish()`. This means that a failure to acquire the
> `packed-refs` lock (e.g., due to contention with another process)
> wasn't detected until it was too late (problems like this are supposed
> to be detected in the "prepare" phase). The new code acquires the
> `packed-refs` lock in `files_transaction_prepare()`, the same stage of
> the processing when the loose reference locks are being acquired,
> removing another reason why the "prepare" phase might succeed and the
> "finish" phase might nevertheless fail.
> 
> Second, the old code deleted the loose version of a reference before
> deleting any packed version of the same reference. This left a moment
> when another process might think that the packed version of the
> reference is current, which is incorrect. (Even worse, the packed
> version of the reference can be arbitrarily old, and might even point
> at an object that has since been garbage-collected.)
> 
> Third, if a reference deletion fails to acquire the `packed-refs` lock
> altogether, then the old code might leave the repository in the
> incorrect state (possibly corrupt) described in the previous
> paragraph.
> 
> Now we activate the new "packed-refs" file (sans any references that
> are being deleted) *before* deleting the corresponding loose
> references. But we hold the "packed-refs" lock until after the loose
> references have been finalized, thus preventing a simultaneous
> "pack-refs" process from packing the loose version of the reference in
> the time gap, which would otherwise defeat our attempt to delete it.
> 
> This patch series is also available as branch
> `packed-ref-transactions` in my fork on GitHub [1]. A draft of the
> patch series to change `packed_ref_store` to use mmap and get rid of
> its `ref_cache` is available as branch `mmap-packed-refs` from the
> same source.

Overall the patches look sane to me, though I don't believe I'm
qualified in this area to give you a complete thumbs up since I don't
understand the refs code super well yet.  I do like reading patch from
you as you do a great job of laying out what you are doing in code,
comments and commit messages, something I'm trying to get better at :)

-- 
Brandon Williams


Re: [PATCH 09/10] packed-backend: rip out some now-unused code

2017-08-29 Thread Brandon Williams
ile, sb.buf) < 0) {
> - strbuf_addf(err, "unable to create file %s: %s",
> - sb.buf, strerror(errno));
> - strbuf_release();
> - goto out;
> - }
> - strbuf_release();
> -
> - out = fdopen_tempfile(>tempfile, "w");
> - if (!out) {
> - strbuf_addf(err, "unable to fdopen packed-refs tempfile: %s",
> - strerror(errno));
> - goto error;
> - }
> -
> - if (fprintf(out, "%s", PACKED_REFS_HEADER) < 0) {
> - strbuf_addf(err, "error writing to %s: %s",
> - get_tempfile_path(>tempfile), 
> strerror(errno));
> - goto error;
> - }
> -
> - iter = cache_ref_iterator_begin(packed_ref_cache->cache, NULL, 0);
> - while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
> - struct object_id peeled;
> - int peel_error = ref_iterator_peel(iter, );
> -
> - if (write_packed_entry(out, iter->refname, iter->oid->hash,
> -peel_error ? NULL : peeled.hash)) {
> - strbuf_addf(err, "error writing to %s: %s",
> - get_tempfile_path(>tempfile),
> - strerror(errno));
> - ref_iterator_abort(iter);
> - goto error;
> - }
> - }
> -
> - if (ok != ITER_DONE) {
> - strbuf_addf(err, "unable to rewrite packed-refs file: "
> - "error iterating over old contents");
> - goto error;
> - }
> -
> - if (rename_tempfile(>tempfile, packed_refs_path)) {
> - strbuf_addf(err, "error replacing %s: %s",
> - refs->path, strerror(errno));
> - goto out;
> - }
> -
> - ret = 0;
> - goto out;
> -
> -error:
> - delete_tempfile(>tempfile);
> -
> -out:
> - free(packed_refs_path);
> - return ret;
> -}
> -
> -/*
> - * Rewrite the packed-refs file, omitting any refs listed in
> - * 'refnames'. On error, leave packed-refs unchanged, write an error
> - * message to 'err', and return a nonzero value. The packed refs lock
> - * must be held when calling this function; it will still be held when
> - * the function returns.
> - *
> - * The refs in 'refnames' needn't be sorted. `err` must not be NULL.
> - */
> -int repack_without_refs(struct ref_store *ref_store,
> - struct string_list *refnames, struct strbuf *err)
> -{
> - struct packed_ref_store *refs =
> - packed_downcast(ref_store, REF_STORE_WRITE | REF_STORE_MAIN,
> - "repack_without_refs");
> - struct ref_dir *packed;
> - struct string_list_item *refname;
> - int needs_repacking = 0, removed = 0;
> -
> - packed_assert_main_repository(refs, "repack_without_refs");
> - assert(err);
> -
> - if (!is_lock_file_locked(>lock))
> - die("BUG: repack_without_refs called without holding lock");
> -
> - /* Look for a packed ref */
> - for_each_string_list_item(refname, refnames) {
> - if (get_packed_ref(refs, refname->string)) {
> - needs_repacking = 1;
> - break;
> - }
> - }
> -
> - /* Avoid locking if we have nothing to do */
> - if (!needs_repacking)
> - return 0; /* no refname exists in packed refs */
> -
> - packed = get_packed_refs(refs);
> -
> - /* Remove refnames from the cache */
> - for_each_string_list_item(refname, refnames)
> - if (remove_entry_from_dir(packed, refname->string) != -1)
> - removed = 1;
> - if (!removed) {
> - /*
> -  * All packed entries disappeared while we were
> -  * acquiring the lock.
> -  */
> - clear_packed_ref_cache(refs);
> - return 0;
> - }
> -
> - /* Write what remains */
> - return commit_packed_refs(>base, err);
> -}
> -
>  static int packed_init_db(struct ref_store *ref_store, struct strbuf *err)
>  {
>   /* Nothing to do. */
> diff --git a/refs/packed-backend.h b/refs/packed-backend.h
> index 7af2897757..61687e408a 100644
> --- a/refs/packed-backend.h
> +++ b/refs/packed-backend.h
> @@ -23,12 +23,4 @@ int packed_refs_lock(struct ref_store *ref_store, int 
> flags, struct strbuf *err)
>  void packed_refs_unlock(struct ref_store *ref_store);
>  int packed_refs_is_locked(struct ref_store *ref_store);
>  
> -void add_packed_ref(struct ref_store *ref_store,
> - const char *refname, const struct object_id *oid);
> -
> -int commit_packed_refs(struct ref_store *ref_store, struct strbuf *err);
> -
> -int repack_without_refs(struct ref_store *ref_store,
> - struct string_list *refnames, struct strbuf *err);
> -
>  #endif /* REFS_PACKED_BACKEND_H */
> -- 
> 2.14.1
> 

-- 
Brandon Williams


Re: [PATCH 04/10] packed_delete_refs(): implement method

2017-08-29 Thread Brandon Williams
On 08/29, Michael Haggerty wrote:
> Implement `packed_delete_refs()` using a reference transaction. This
> means that `files_delete_refs()` can use `refs_delete_refs()` instead
> of `repack_without_refs()` to delete any packed references, decreasing
> the coupling between the classes.
> 
> Signed-off-by: Michael Haggerty <mhag...@alum.mit.edu>
> ---
>  refs/files-backend.c  |  2 +-
>  refs/packed-backend.c | 45 -
>  2 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index fccbc24ac4..2c78f63494 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -1157,7 +1157,7 @@ static int files_delete_refs(struct ref_store 
> *ref_store, const char *msg,
>   if (packed_refs_lock(refs->packed_ref_store, 0, ))
>   goto error;
>  
> - if (repack_without_refs(refs->packed_ref_store, refnames, )) {
> + if (refs_delete_refs(refs->packed_ref_store, msg, refnames, flags)) {
>   packed_refs_unlock(refs->packed_ref_store);
>   goto error;
>   }
> diff --git a/refs/packed-backend.c b/refs/packed-backend.c
> index d19b3bfba5..83a088118f 100644
> --- a/refs/packed-backend.c
> +++ b/refs/packed-backend.c
> @@ -1082,7 +1082,50 @@ static int packed_initial_transaction_commit(struct 
> ref_store *ref_store,
>  static int packed_delete_refs(struct ref_store *ref_store, const char *msg,
>struct string_list *refnames, unsigned int flags)
>  {
> - die("BUG: not implemented yet");
> + struct packed_ref_store *refs =
> + packed_downcast(ref_store, REF_STORE_WRITE, "delete_refs");
> + struct strbuf err = STRBUF_INIT;
> + struct ref_transaction *transaction;
> + struct string_list_item *item;
> + int ret;
> +
> + (void)refs; /* We need the check above, but don't use the variable */

Can't say I've seen a line like this before, Is the intent to just mark
that this variable won't be used like you mention in the comment?

> +
> + if (!refnames->nr)
> + return 0;
> +
> + /*
> +  * Since we don't check the references' old_oids, the
> +  * individual updates can't fail, so we can pack all of the
> +  * updates into a single transaction.
> +  */
> +
> + transaction = ref_store_transaction_begin(ref_store, );
> + if (!transaction)
> + return -1;
> +
> + for_each_string_list_item(item, refnames) {
> + if (ref_transaction_delete(transaction, item->string, NULL,
> +flags, msg, )) {
> + warning(_("could not delete reference %s: %s"),
> + item->string, err.buf);
> + strbuf_reset();
> + }
> + }
> +
> + ret = ref_transaction_commit(transaction, );
> +
> + if (ret) {
> + if (refnames->nr == 1)
> + error(_("could not delete reference %s: %s"),
> +   refnames->items[0].string, err.buf);
> + else
> +     error(_("could not delete references: %s"), err.buf);
> + }
> +
> + ref_transaction_free(transaction);
> + strbuf_release();
> + return ret;
>  }
>  
>  static int packed_pack_refs(struct ref_store *ref_store, unsigned int flags)
> -- 
> 2.14.1
> 

-- 
Brandon Williams


Re: [PATCH] Add t/helper/test-write-cache to .gitignore

2017-08-28 Thread Brandon Williams
On 08/28, Jonathan Tan wrote:
> This new binary was introduced in commit 3921a0b ("perf: add test for
> writing the index", 2017-08-21), but a .gitignore entry was not added
> for it. Add that entry.
> 
> Signed-off-by: Jonathan Tan <jonathanta...@google.com>

Looks good to me

> ---
>  t/helper/.gitignore | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/t/helper/.gitignore b/t/helper/.gitignore
> index 721650256..7c9d28a83 100644
> --- a/t/helper/.gitignore
> +++ b/t/helper/.gitignore
> @@ -35,3 +35,4 @@
>  /test-svn-fe
>  /test-urlmatch-normalization
>  /test-wildmatch
> +/test-write-cache
> -- 
> 2.14.1.581.gf28d330327-goog
> 

-- 
Brandon Williams


Re: [PATCH/RFC] push: anonymize URL in error output

2017-08-25 Thread Brandon Williams
ttp-push-smart.sh
> >> @@ -377,5 +377,23 @@ test_expect_success 'push status output scrubs 
> >> password' '
> >>grep "^To $HTTPD_URL/smart/test_repo.git" status
> >>  '
> >>  
> >> +cat >"$HTTPD_DOCUMENT_ROOT_PATH/test_repo.git/hooks/update" < >> +#!/bin/sh
> >> +exit 1
> >> +EOF
> >> +chmod a+x "$HTTPD_DOCUMENT_ROOT_PATH/test_repo.git/hooks/update"
> >> +
> >> +cat >exp < >> +error: failed to push some refs to '$HTTPD_URL/smart/test_repo.git'
> >> +EOF
> > 
> > I know the t5541 script, which is old and messy, led you into these bad
> > constructs. But usually in modern tests we:
> > 
> >  1. Try to keep all commands inside test_expect blocks to catch
> > unexpected failures or unwanted output.
> > 
> >  2. Use write_script for writing scripts, like:
> > 
> >   write_script "$HTTPD_DOCUMENT_ROOT_PATH/test_repo.git/hooks/update" 
> > <<-\EOF
> >   exit 1
> >   EOF
> > 
> >  3. Backslash our here-doc delimiter to suppress interpolation.
> > 
> >> +test_expect_success 'failed push status output scrubs password' '
> >> +  cd "$ROOT_PATH"/test_repo_clone &&
> >> +  test_must_fail git push "$HTTPD_URL_USER_PASS/smart/test_repo.git" 
> >> +HEAD:scrub_err 2>stderr &&
> >> +  grep "^error: failed to push some refs" stderr >act &&
> >> +  test_i18ncmp exp act
> >> +'
> >> +rm -f "$HTTPD_DOCUMENT_ROOT_PATH/test_repo.git/hooks/update"
> > 
> > Similarly, this "rm" should probably be a test_when_finished in the
> > block with the write_script (unless you really need to carry it over
> > several test_expect blocks, in which case there should be an explicit
> > test_expect cleaning it up).
> Thanks! You're right. I just followed examples in the file.
> Updated [1], will send with the next patch version.
> 
> > 
> > Instead of grepping for the exact error, should we instead grep for the
> > password to make sure it is not present on _any_ line?
> > 
> > -Peff
> > 
> One possible issue I see is that this will make it overlap with the
> 'push status output scrubs password' case above. But if it's not a
> problem, I can replace last two lines with just a 'test_i18ngrep !'
> 
> [1]:
> https://github.com/sainaen/git/blob/af17713/t/t5541-http-push-smart.sh#L380-L392

-- 
Brandon Williams


Re: [RFC 3/7] protocol: tell server that the client understands v2

2017-08-25 Thread Brandon Williams
On 08/25, Brandon Williams wrote:
> On 08/25, Junio C Hamano wrote:
> > Brandon Williams <bmw...@google.com> writes:
> > 
> > > + /* If using a new version put that stuff here after a second 
> > > null byte */
> > > + strbuf_addch(, '\0');
> > > + strbuf_addf(, "version=%d%c", 2, '\0');
> > > + /* subsequent supported versions can also be added */
> > > + strbuf_addf(, "version=%d%c", 3, '\0');
> > 
> > Isn't this last one meant only as a comment?
> 
> Sorry since this was structured as an RFC I didn't go back through the
> code with a fine tooth comb to ensure I removed or commented out any
> debugging statements.  Stefan also pointed out to me that I left in an
> if (0) statement somewhere haha.

Oh one more thing about this line.  I added it to show (and check) that
git-daemon would be able to cope with more than one field being added as
I wanted to avoid the issue we had when adding the 'host' field where
additional fields aren't allowed as they aren't gracefully ignored by
the server.

-- 
Brandon Williams


Re: [RFC 3/7] protocol: tell server that the client understands v2

2017-08-25 Thread Brandon Williams
On 08/25, Junio C Hamano wrote:
> Brandon Williams <bmw...@google.com> writes:
> 
> > +   /* If using a new version put that stuff here after a second 
> > null byte */
> > +   strbuf_addch(, '\0');
> > +   strbuf_addf(, "version=%d%c", 2, '\0');
> > +   /* subsequent supported versions can also be added */
> > +   strbuf_addf(, "version=%d%c", 3, '\0');
> 
> Isn't this last one meant only as a comment?

Sorry since this was structured as an RFC I didn't go back through the
code with a fine tooth comb to ensure I removed or commented out any
debugging statements.  Stefan also pointed out to me that I left in an
if (0) statement somewhere haha.

-- 
Brandon Williams


Re: [RFC 0/7] transitioning to protocol v2

2017-08-25 Thread Brandon Williams
On 08/25, Jeff King wrote:
> On Fri, Aug 25, 2017 at 10:35:50AM -0700, Jonathan Nieder wrote:
> 
> > > Sadly, while splitting these things apart makes the protocol
> > > conceptually cleaner, I'm not sure if we can consider them separately
> > > and avoid adding an extra round-trip to the protocol.
> > 
> > How about the idea of using this mechanism to implement a protocol
> > "v1"?
> > 
> > The reply would be the same as today, except that it has a "protocol
> > v1" pkt-line at the beginning.  So this doesn't change the number of
> > round-trips --- it just validates the protocol migration approach.
> 
> I'm not that worried about validating the ideas here to shoe-horn a
> version field. I'm worried about what "step 2" is going to look like,
> and whether "we shoe-horned a version field" can be extended to "we
> shoe-horned arbitrary data".

I know that this initial RFC didn't mention adding arbitrary data in the
initial request but I fully intend that the final version will allow
such a thing.  One such idea (via the envvar, though the same can be
done with git-daemon) would be to have GIT_PROTOCOL hold colon
delimited values, versions could be indicated by "v1", "v2", etc while
arbitrary key/values as "key=value" such that the envvar would look
like: 'v1:v2:key1=value1:key2=value2'.  This would mean that the client
understands protocol version 1 and 2 (so either are acceptable for the
server to select since there is a change the server doesn't understand
v2 or some other higher version number) and that if supported it wants
key1 to have value 'value1' and key2 to have value 'value2'.

As you said the initial request to git-daemon is limited in length (I
think envvars have a length limit too?) so we would still be limited in
how much data we can send in that initial request and so we should
design a new protocol in such a way that doesn't hinge on adding extra
data in that first request (aside from a version number) but that can
use it to speed things up if at all possible.

> > Can we do that by making it a patch / letting it cook for a while in
> > 'next'? :)
> 
> If people actually ran 'next', that would help. I was hoping that we
> could get it out to the masses behind a feature flag, and dangle it in
> front of them with "this will improve fetch performance if you turn it
> on". But that carrot implies going all the way through the follow-on
> steps of designing the performance-improving v2 extensions and getting
> them implemented on the server side.

We run 'next' here so we will be able to get at least a little bit of
feedback from a small subset of users.

-- 
Brandon Williams


[RFC 4/7] t: fix ssh tests to cope with using '-o SendEnv=GIT_PROTOCOL'

2017-08-24 Thread Brandon Williams
Update some of our tests to cope with ssh being launched with the option
to send the protocol version.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 t/lib-proto-disable.sh   |  1 +
 t/t5601-clone.sh | 10 +-
 t/t5602-clone-remote-exec.sh |  4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/t/lib-proto-disable.sh b/t/lib-proto-disable.sh
index 83babe57d..d19c88f96 100644
--- a/t/lib-proto-disable.sh
+++ b/t/lib-proto-disable.sh
@@ -194,6 +194,7 @@ setup_ssh_wrapper () {
test_expect_success 'setup ssh wrapper' '
write_script ssh-wrapper <<-\EOF &&
echo >&2 "ssh: $*"
+   shift; shift
host=$1; shift
cd "$TRASH_DIRECTORY/$host" &&
eval "$*"
diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
index 9c56f771b..7e65013c5 100755
--- a/t/t5601-clone.sh
+++ b/t/t5601-clone.sh
@@ -332,13 +332,13 @@ expect_ssh () {
1)
;;
2)
-   echo "ssh: $1 git-upload-pack '$2'"
+   echo "ssh: -o SendEnv=GIT_PROTOCOL $1 git-upload-pack 
'$2'"
;;
3)
-   echo "ssh: $1 $2 git-upload-pack '$3'"
+   echo "ssh: -o SendEnv=GIT_PROTOCOL $1 $2 
git-upload-pack '$3'"
;;
*)
-   echo "ssh: $1 $2 git-upload-pack '$3' $4"
+   echo "ssh: $1 -o SendEnv=GIT_PROTOCOL $2 $3 
git-upload-pack '$4'"
esac
} >"$TRASH_DIRECTORY/ssh-expect" &&
(cd "$TRASH_DIRECTORY" && test_cmp ssh-expect ssh-output)
@@ -390,7 +390,7 @@ test_expect_success 'double quoted plink.exe in 
GIT_SSH_COMMAND' '
copy_ssh_wrapper_as "$TRASH_DIRECTORY/plink.exe" &&
GIT_SSH_COMMAND="\"$TRASH_DIRECTORY/plink.exe\" -v" \
git clone "[myhost:123]:src" ssh-bracket-clone-plink-3 &&
-   expect_ssh "-v -P 123" myhost src
+   expect_ssh "-v" "-P 123" myhost src
 '
 
 SQ="'"
@@ -398,7 +398,7 @@ test_expect_success 'single quoted plink.exe in 
GIT_SSH_COMMAND' '
copy_ssh_wrapper_as "$TRASH_DIRECTORY/plink.exe" &&
GIT_SSH_COMMAND="$SQ$TRASH_DIRECTORY/plink.exe$SQ -v" \
git clone "[myhost:123]:src" ssh-bracket-clone-plink-4 &&
-   expect_ssh "-v -P 123" myhost src
+   expect_ssh "-v" "-P 123" myhost src
 '
 
 test_expect_success 'GIT_SSH_VARIANT overrides plink detection' '
diff --git a/t/t5602-clone-remote-exec.sh b/t/t5602-clone-remote-exec.sh
index cbcceab9d..b0d80cadd 100755
--- a/t/t5602-clone-remote-exec.sh
+++ b/t/t5602-clone-remote-exec.sh
@@ -13,14 +13,14 @@ test_expect_success setup '
 
 test_expect_success 'clone calls git upload-pack unqualified with no -u 
option' '
test_must_fail env GIT_SSH=./not_ssh git clone localhost:/path/to/repo 
junk &&
-   echo "localhost git-upload-pack '\''/path/to/repo'\''" >expected &&
+   echo "-o SendEnv=GIT_PROTOCOL localhost git-upload-pack 
'\''/path/to/repo'\''" >expected &&
test_cmp expected not_ssh_output
 '
 
 test_expect_success 'clone calls specified git upload-pack with -u option' '
test_must_fail env GIT_SSH=./not_ssh \
git clone -u ./something/bin/git-upload-pack 
localhost:/path/to/repo junk &&
-   echo "localhost ./something/bin/git-upload-pack '\''/path/to/repo'\''" 
>expected &&
+   echo "-o SendEnv=GIT_PROTOCOL localhost ./something/bin/git-upload-pack 
'\''/path/to/repo'\''" >expected &&
test_cmp expected not_ssh_output
 '
 
-- 
2.14.1.342.g6490525c54-goog



[RFC 5/7] http: send Git-Protocol-Version header

2017-08-24 Thread Brandon Williams
Tell a serve that protocol v2 can be used by sending an http header
indicating this.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 http.c  | 7 +++
 t/t5551-http-fetch-smart.sh | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/http.c b/http.c
index fa8666a21..504a14a5a 100644
--- a/http.c
+++ b/http.c
@@ -896,6 +896,11 @@ static void set_from_env(const char **var, const char 
*envname)
*var = val;
 }
 
+static const char *get_version(void)
+{
+   return "Git-Protocol-Version: 2";
+}
+
 void http_init(struct remote *remote, const char *url, int proactive_auth)
 {
char *low_speed_limit;
@@ -926,6 +931,8 @@ void http_init(struct remote *remote, const char *url, int 
proactive_auth)
if (remote)
var_override(_proxy_authmethod, 
remote->http_proxy_authmethod);
 
+   extra_http_headers = curl_slist_append(extra_http_headers, 
get_version());
+
pragma_header = curl_slist_append(http_copy_default_headers(),
"Pragma: no-cache");
no_pragma_header = curl_slist_append(http_copy_default_headers(),
diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
index a51b7e20d..ce13f2425 100755
--- a/t/t5551-http-fetch-smart.sh
+++ b/t/t5551-http-fetch-smart.sh
@@ -27,6 +27,7 @@ cat >exp < GET /smart/repo.git/info/refs?service=git-upload-pack HTTP/1.1
 > Accept: */*
 > Accept-Encoding: gzip
+> Git-Protocol-Version: 2
 > Pragma: no-cache
 < HTTP/1.1 200 OK
 < Pragma: no-cache
@@ -34,6 +35,7 @@ cat >exp < POST /smart/repo.git/git-upload-pack HTTP/1.1
 > Accept-Encoding: gzip
+> Git-Protocol-Version: 2
 > Content-Type: application/x-git-upload-pack-request
 > Accept: application/x-git-upload-pack-result
 > Content-Length: xxx
-- 
2.14.1.342.g6490525c54-goog



[RFC 2/7] pkt-line: add strbuf_packet_read

2017-08-24 Thread Brandon Williams
Add function which can be used to read the contents of a single pkt-line
into a strbuf.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 pkt-line.c | 21 +
 pkt-line.h |  1 +
 2 files changed, 22 insertions(+)

diff --git a/pkt-line.c b/pkt-line.c
index cf98f371b..875524ab8 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -312,6 +312,27 @@ int packet_read(int fd, char **src_buf, size_t *src_len,
return len;
 }
 
+int strbuf_packet_read(int fd_in, struct strbuf *sb_out, int options)
+{
+   int packet_len;
+   strbuf_grow(sb_out, LARGE_PACKET_DATA_MAX);
+   packet_len = packet_read(fd_in, NULL, NULL,
+/*
+ * strbuf_grow() above always allocates one 
extra byte to
+ * store a '\0' at the end of the string. 
packet_read()
+ * writes a '\0' extra byte at the end, too. 
Let it know
+ * that there is already room for the extra 
byte.
+ */
+sb_out->buf, LARGE_PACKET_DATA_MAX+1,
+options);
+   if (packet_len < 0)
+   return packet_len;
+
+   sb_out->len = packet_len;
+
+   return sb_out->len;
+}
+
 static char *packet_read_line_generic(int fd,
  char **src, size_t *src_len,
  int *dst_len)
diff --git a/pkt-line.h b/pkt-line.h
index d9e9783b1..c24c4f290 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -65,6 +65,7 @@ int write_packetized_from_buf(const char *src_in, size_t len, 
int fd_out);
 int packet_read(int fd, char **src_buffer, size_t *src_len, char
*buffer, unsigned size, int options);
 
+int strbuf_packet_read(int fd_in, struct strbuf *sb_out, int options);
 /*
  * Convenience wrapper for packet_read that is not gentle, and sets the
  * CHOMP_NEWLINE option. The return value is NULL for a flush packet,
-- 
2.14.1.342.g6490525c54-goog



[RFC 6/7] transport: teach client to recognize v2 server response

2017-08-24 Thread Brandon Williams
Teach a client to recognize that a server understand protocol v2 by
looking at the first pkt-line the server sends in response.  This is
done by looking for the response "version 2" sent by upload-pack.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/fetch-pack.c |   4 +-
 builtin/send-pack.c  |   5 +-
 connect.c| 165 ++-
 remote-curl.c|   7 ++-
 remote.h |  22 ++-
 transport.c  |  60 ---
 6 files changed, 178 insertions(+), 85 deletions(-)

diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index 366b9d13f..a2a5e1c73 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -52,6 +52,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char 
*prefix)
struct fetch_pack_args args;
struct oid_array shallow = OID_ARRAY_INIT;
struct string_list deepen_not = STRING_LIST_INIT_DUP;
+   struct remote_refs_scanner scanner;
 
packet_trace_identity("fetch-pack");
 
@@ -193,7 +194,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char 
*prefix)
if (!conn)
return args.diag_url ? 0 : 1;
}
-   get_remote_heads(fd[0], NULL, 0, , 0, NULL, );
+   remote_refs_scanner_init(, , 0, NULL, );
+   get_remote_heads(fd[0], NULL, 0, );
 
ref = fetch_pack(, fd, conn, ref, dest, sought, nr_sought,
 , pack_lockfile_ptr);
diff --git a/builtin/send-pack.c b/builtin/send-pack.c
index fc4f0bb5f..92ec1f871 100644
--- a/builtin/send-pack.c
+++ b/builtin/send-pack.c
@@ -154,6 +154,7 @@ int cmd_send_pack(int argc, const char **argv, const char 
*prefix)
int progress = -1;
int from_stdin = 0;
struct push_cas_option cas = {0};
+   struct remote_refs_scanner scanner;
 
struct option options[] = {
OPT__VERBOSITY(),
@@ -256,8 +257,8 @@ int cmd_send_pack(int argc, const char **argv, const char 
*prefix)
args.verbose ? CONNECT_VERBOSE : 0);
}
 
-   get_remote_heads(fd[0], NULL, 0, _refs, REF_NORMAL,
-_have, );
+   remote_refs_scanner_init(, _refs, REF_NORMAL, 
_have, );
+   get_remote_heads(fd[0], NULL, 0, );
 
transport_verify_remote_names(nr_refspecs, refspecs);
 
diff --git a/connect.c b/connect.c
index d609267be..732b651d9 100644
--- a/connect.c
+++ b/connect.c
@@ -107,97 +107,124 @@ static void annotate_refs_with_symref_info(struct ref 
*ref)
string_list_clear(, 0);
 }
 
-/*
- * Read all the refs from the other end
- */
-struct ref **get_remote_heads(int in, char *src_buf, size_t src_len,
+void remote_refs_scanner_init(struct remote_refs_scanner *scanner,
  struct ref **list, unsigned int flags,
  struct oid_array *extra_have,
  struct oid_array *shallow_points)
 {
-   struct ref **orig_list = list;
+   memset(scanner, 0, sizeof(*scanner));
+
+   scanner->orig_list = list;
+   *list = NULL;
+   scanner->list = list;
+   scanner->flags = flags;
+   scanner->extra_have = extra_have;
+   scanner->shallow_points = shallow_points;
+}
+
+int process_ref(struct remote_refs_scanner *scanner,
+   const char *buffer, int len)
+{
+   struct ref *ref;
+   struct object_id old_oid;
+   const char *name;
+   int name_len;
+   const char *arg;
+   int ret = 1;
+
+   if (len < 0)
+   die_initial_contact(scanner->seen_response);
+
+   if (!len) {
+   ret = 0;
+   goto out;
+   }
+
+   if (len > 4 && skip_prefix(buffer, "ERR ", ))
+   die("remote error: %s", arg);
+
+   if (len == GIT_SHA1_HEXSZ + strlen("shallow ") &&
+   skip_prefix(buffer, "shallow ", )) {
+   if (get_oid_hex(arg, _oid))
+   die("protocol error: expected shallow sha-1, got '%s'", 
arg);
+   if (!scanner->shallow_points)
+   die("repository on the other end cannot be shallow");
+   oid_array_append(scanner->shallow_points, _oid);
+   goto out;
+   }
+
+   if (len < GIT_SHA1_HEXSZ + 2 || get_oid_hex(buffer, _oid) ||
+   buffer[GIT_SHA1_HEXSZ] != ' ')
+   die("protocol error: expected sha/ref, got '%s'", buffer);
+   name = buffer + GIT_SHA1_HEXSZ + 1;
+
+   name_len = strlen(name);
+   if (len != name_len + GIT_SHA1_HEXSZ + 1) {
+   free(server_capabilities);
+   server_capabilities = xstrdup(name + name_len + 1);
+   }
+
+   if (scanner->extra_have && !strcmp(name, ".have")) {
+   oid_array_append(scanner->extra_have, _oid)

[RFC 1/7] pkt-line: add packet_write function

2017-08-24 Thread Brandon Williams
Add a function which can be used to write the contents of an arbitrary
buffer.  This makes it easy to build up data in a strbuf before writing
the packet.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 pkt-line.c | 6 ++
 pkt-line.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/pkt-line.c b/pkt-line.c
index 7db911957..cf98f371b 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -188,6 +188,12 @@ static int packet_write_gently(const int fd_out, const 
char *buf, size_t size)
return error("packet write failed");
 }
 
+void packet_write(const int fd_out, const char *buf, size_t size)
+{
+   if (packet_write_gently(fd_out, buf, size))
+   die_errno("packet write failed");
+}
+
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
 {
va_list args;
diff --git a/pkt-line.h b/pkt-line.h
index 66ef610fc..d9e9783b1 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -22,6 +22,7 @@
 void packet_flush(int fd);
 void packet_write_fmt(int fd, const char *fmt, ...) __attribute__((format 
(printf, 2, 3)));
 void packet_buf_flush(struct strbuf *buf);
+void packet_write(const int fd_out, const char *buf, size_t size);
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...) 
__attribute__((format (printf, 2, 3)));
 int packet_flush_gently(int fd);
 int packet_write_fmt_gently(int fd, const char *fmt, ...) 
__attribute__((format (printf, 2, 3)));
-- 
2.14.1.342.g6490525c54-goog



[RFC 7/7] upload-pack: ack version 2

2017-08-24 Thread Brandon Williams
Signed-off-by: Brandon Williams <bmw...@google.com>
---
 upload-pack.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/upload-pack.c b/upload-pack.c
index 7efff2fbf..0f853152f 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1032,9 +1032,15 @@ static int upload_pack_config(const char *var, const 
char *value, void *unused)
return parse_hide_refs_config(var, value, "uploadpack");
 }
 
+void upload_pack_v2(void)
+{
+   packet_write_fmt(1, "%s\n", "version 2");
+}
+
 int cmd_main(int argc, const char **argv)
 {
const char *dir;
+   const char *version;
int strict = 0;
struct option options[] = {
OPT_BOOL(0, "stateless-rpc", _rpc,
@@ -1067,6 +1073,11 @@ int cmd_main(int argc, const char **argv)
die("'%s' does not appear to be a git repository", dir);
 
git_config(upload_pack_config, NULL);
+
+   version = getenv("GIT_PROTOCOL");
+   if (!strcmp(version, "2"))
+   upload_pack_v2();
+
upload_pack();
return 0;
 }
-- 
2.14.1.342.g6490525c54-goog



[RFC 3/7] protocol: tell server that the client understands v2

2017-08-24 Thread Brandon Williams
Teach the connection logic to tell a serve that it understands protocol
v2.  This is done in 2 different ways for the built in protocols.

1. git://
   A normal request is structured as "command path/to/repo\0host=..\0"
   and due to a bug in an old version of git-daemon 73bb33a94 (daemon:
   Strictly parse the "extra arg" part of the command, 2009-06-04) we
   aren't able to place any extra args (separated by NULs) besides the
   host.  In order to get around this limitation put protocol version
   information after a second NUL byte so the request is structured
   like: "command path/to/repo\0host=..\0\0version=2\0".  git-daemon can
   then parse out the version number and set GIT_PROTOCOL.

2. ssh://, file://
   Set GIT_PROTOCOL envvar with the desired protocol version.  The
   envvar can be sent across ssh by using '-o SendEnv=GIT_PROTOCOL' and
   having the server whitelist this envvar.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 connect.c | 31 ++-
 daemon.c  | 28 +---
 2 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/connect.c b/connect.c
index 49b28b83b..d609267be 100644
--- a/connect.c
+++ b/connect.c
@@ -793,6 +793,7 @@ struct child_process *git_connect(int fd[2], const char 
*url,
printf("Diag: path=%s\n", path ? path : "NULL");
conn = NULL;
} else if (protocol == PROTO_GIT) {
+   struct strbuf request = STRBUF_INIT;
/*
 * Set up virtual host information based on where we will
 * connect, unless the user has overridden us in
@@ -820,12 +821,23 @@ struct child_process *git_connect(int fd[2], const char 
*url,
 * Note: Do not add any other headers here!  Doing so
 * will cause older git-daemon servers to crash.
 */
-   packet_write_fmt(fd[1],
-"%s %s%chost=%s%c",
-prog, path, 0,
-target_host, 0);
+   strbuf_addf(,
+   "%s %s%chost=%s%c",
+   prog, path, 0,
+   target_host, 0);
+
+   /* If using a new version put that stuff here after a second 
null byte */
+   strbuf_addch(, '\0');
+   strbuf_addf(, "version=%d%c", 2, '\0');
+   /* subsequent supported versions can also be added */
+   strbuf_addf(, "version=%d%c", 3, '\0');
+
+   packet_write(fd[1], request.buf, request.len);
+
free(target_host);
+   strbuf_release();
} else {
+   const char *const *var;
conn = xmalloc(sizeof(*conn));
child_process_init(conn);
 
@@ -837,7 +849,9 @@ struct child_process *git_connect(int fd[2], const char 
*url,
sq_quote_buf(, path);
 
/* remove repo-local variables from the environment */
-   conn->env = local_repo_env;
+   for (var = local_repo_env; *var; var++)
+   argv_array_push(>env_array, *var);
+
conn->use_shell = 1;
conn->in = conn->out = -1;
if (protocol == PROTO_SSH) {
@@ -890,6 +904,12 @@ struct child_process *git_connect(int fd[2], const char 
*url,
}
 
argv_array_push(>args, ssh);
+
+   /* protocol v2! */
+   argv_array_push(>args, "-o");
+   argv_array_push(>args, "SendEnv=GIT_PROTOCOL");
+   argv_array_push(>env_array, "GIT_PROTOCOL=2");
+
if (flags & CONNECT_IPV4)
argv_array_push(>args, "-4");
else if (flags & CONNECT_IPV6)
@@ -904,6 +924,7 @@ struct child_process *git_connect(int fd[2], const char 
*url,
argv_array_push(>args, ssh_host);
} else {
transport_check_allowed("file");
+   argv_array_push(>env_array, "GIT_PROTOCOL=2");
}
argv_array_push(>args, cmd.buf);
 
diff --git a/daemon.c b/daemon.c
index 30747075f..76a7b2d64 100644
--- a/daemon.c
+++ b/daemon.c
@@ -574,7 +574,7 @@ static void canonicalize_client(struct strbuf *out, const 
char *in)
 /*
  * Read the host as supplied by the client connection.
  */
-static void parse_host_arg(struct hostinfo *hi, char *extra_args, int buflen)
+static char *parse_host_arg(struct hostinfo *hi, char *extra_args, int buflen)
 {
char *val;
int vallen;
@@ -602,6 +602,22 @@ static void parse_host_arg(struct hostinfo *hi, char 
*extra_args, int buflen)

[RFC 0/7] transitioning to protocol v2

2017-08-24 Thread Brandon Williams
Another version of Git's wire protocol is a topic that has been discussed and
attempted by many in the community over the years.  The biggest challenge, as
far as I understand, has been coming up with a transition plan to using the new
server without breaking existing clients and servers.  As such this RFC is
really only concerned with solidifying a transition plan.  Once it has been
decided how we can transition to a new protocol we can get into decided what
this new protocol would look like (though it would obviously eliminate the ref
advertisement ;).

The best way to preserve functionality with old servers and clients would be to
communicate using the same end point but have the client send a bit of extra
information with its initial request.  This extra information would need to be
sent in such a way that old servers ignore it and operate normally (using
protocol v1).  The client would then need to be able to look at a server's
response to determine whether the server understands and is speaking v2 or has
ignored the clients request to use a newer protocol and is speaking v1.

Patches 1-5 enable a client to unconditionally send this back-channel
information to a server.  This is done by sending a version number after a
second NUL byte in git://, in the envvar GIT_PROTOCOL in file:// and ssh://,
and in an http header in http://, https://.  Patches 6-7 teach a client and
upload-pack to send and recognize a request to use protocol v2.

The biggest question I'm trying to answer is if these are reasonable ways with
which to communicate a request to a server to use a newer protocol, without
breaking current servers/clients.  As far as I've tested, with patches 1-5
applied I can still communicate with current servers without causing any
problems.

Any comments/discussion is welcome!

Brandon Williams (7):
  pkt-line: add packet_write function
  pkt-line: add strbuf_packet_read
  protocol: tell server that the client understands v2
  t: fix ssh tests to cope with using '-o SendEnv=GIT_PROTOCOL'
  http: send Git-Protocol-Version header
  transport: teach client to recognize v2 server response
  upload-pack: ack version 2

 builtin/fetch-pack.c |   4 +-
 builtin/send-pack.c  |   5 +-
 connect.c| 196 +++
 daemon.c |  28 ++-
 http.c   |   7 ++
 pkt-line.c   |  27 ++
 pkt-line.h   |   2 +
 remote-curl.c|   7 +-
 remote.h |  22 -
 t/lib-proto-disable.sh   |   1 +
 t/t5551-http-fetch-smart.sh  |   2 +
 t/t5601-clone.sh |  10 +--
 t/t5602-clone-remote-exec.sh |   4 +-
 transport.c  |  60 +++--
 upload-pack.c|  11 +++
 15 files changed, 286 insertions(+), 100 deletions(-)

-- 
2.14.1.342.g6490525c54-goog



Re: What's cooking in git.git (Aug 2017, #05; Tue, 22)

2017-08-22 Thread Brandon Williams
On 08/22, Junio C Hamano wrote:
> 
> * bw/submodule-config-cleanup (2017-08-03) 17 commits
>  - submodule: remove gitmodules_config
>  - unpack-trees: improve loading of .gitmodules
>  - submodule-config: lazy-load a repository's .gitmodules file
>  - submodule-config: move submodule-config functions to submodule-config.c
>  - submodule-config: remove support for overlaying repository config
>  - diff: stop allowing diff to have submodules configured in .git/config
>  - submodule: remove submodule_config callback routine
>  - unpack-trees: don't respect submodule.update
>  - submodule: don't rely on overlayed config when setting diffopts
>  - fetch: don't overlay config with submodule-config
>  - submodule--helper: don't overlay config in update-clone
>  - submodule--helper: don't overlay config in remote_submodule_branch
>  - add, reset: ensure submodules can be added or reset
>  - submodule: don't use submodule_from_name
>  - t7411: check configuration parsing errors
>  - Merge branch 'bc/object-id' into bw/submodule-config-cleanup
>  - Merge branch 'bw/grep-recurse-submodules' into bw/submodule-config-cleanup
> 
>  Code clean-up to avoid mixing values read from the .gitmodules file
>  and values read from the .git/config file.
> 
>  So, after the recent discussion around submodule..update (and
>  its resolution "use submodule..active; "reset --hard" must
>  ignore the .update thing"), this is now good to go, I presume?
>  Please yell at me that I am clueless if that is not the case ;-)

Yep I came to the same conclusion that you did so this should be good to
go!

-- 
Brandon Williams


Re: [PATCH] pull: respect submodule update configuration

2017-08-22 Thread Brandon Williams
On 08/22, Stefan Beller wrote:
> On Tue, Aug 22, 2017 at 7:50 AM, Lars Schneider
> <larsxschnei...@gmail.com> wrote:
> >
> > OK. I change my scripts to use ".active" and it seems to work nicely.
> >
> > I noticed one oddity, though:
> >
> > If I clone a repo using `git clone --recursive ` then the local
> > Git config of the repo gets the following entry:
> >
> > [submodule]
> > active = .
> 
> bb62e0a99f (clone: teach --recurse-submodules to optionally take a
> pathspec, 2017-03-17) makes it clear that this is intentional for
> --recurse-submodules, but doesn't exactly state that --recurse will
> behave the same. The idea here is that at clone time you can already
> give
> 
> git clone --recurse=:(exclude)sub0  
> 
> and have your desired set of submodules there.
> Combined with the changes in the attr system, b0db704652
> (pathspec: allow querying for attributes, 2017-03-13)
> you could make up things like this:
> 
>   $ cat .gitattributes
>   /sub0 label0
>   /sub1
>   /sub2 label1 label2
>   /sub3 label1
>   /platform-specifc-subs/* label1 label2
> 
> and then get a clone via
> 
> git clone --recurse=:(attr:label2).  
> 
> for example. The labeling via the attributes allows for
> complex patterns, but a relatively easy command line, that you
> can share with coworkers.
> 
> > Is this intentional? Something in the git/git test harness seems to prevent
> > that. I was not able to write a test to replicate the issue.
> >
> > Any idea?
> 
> I do not seem to understand the perceived bug?
> The setting of submodule.active= seems intentional to me,
> but how would you not reproduce it? Maybe Brandon has an idea.
> 

When adding '.active' we wanted it to be as flexible as possible.  So
you can either use a pathspec with 'submodule.active' to catch multiple
submodules as being active or you can turn on/off individual submodules
with 'submodule..active' (this has precedent over the more general
'submodule.active' config).

The intent was if a user supplies --recurse-submodules (I believe i
removed the docs for --recursive in order to make the CLI more consistent
with other commands, so --recursive is just a synonym for
--recurse-submodules) then they clearly wanted all the submodules cloned
and checked out.  With the '.active' config the way to specify this
is to make 'submodule.active = .'.  In the old world every submodule
would need to have its URL copied into the config.  This way the config
is kept cleaner as it only has a single entry added.

As stefan mentioned you can specify a value for 'submodule.active' to
take as an arg to --recurse-submodules (the default being '.' or all
submodules) so you can do clever things like group submodules using
attributes, you can even repeat the flag to provided a more complex
pathspec.

Hopefully that answers your question :D

-- 
Brandon Williams


Re: [PATCH] pull: respect submodule update configuration

2017-08-21 Thread Brandon Williams
On 08/21, Stefan Beller wrote:
> On Mon, Aug 21, 2017 at 10:20 AM, Lars Schneider
> <larsxschnei...@gmail.com> wrote:
> >
> >> On 21 Aug 2017, at 18:55, Stefan Beller <sbel...@google.com> wrote:
> >>
> >> On Mon, Aug 21, 2017 at 9:20 AM, Heiko Voigt <hvo...@hvoigt.net> wrote:
> >>
> >>>> So I am a bit curious to learn which part of this change you dislike
> >>>> and why.
> >>>
> >>> I am also curious. Isn't this the same strategy we are using in other
> >>> places?
> >>>
> >>
> >> I dislike it because the UX feels crude.  When reading the documentation,
> >> it seems to me as if submodule. can be one of the following
> >>
> >>(none, checkout, rebase, merge, !)
> >>
> >> This is perfect for "submodule-update", whose primary goal is
> >> to update submodules *somehow*. However other commands
> >>
> >>git rebase --recurse
> >>git merge --recurse
> >>git checkout --recurse
> >>
> >> have a different primary mode of operation (note how their name
> >> is one of the modes from the set above), so it may get confusing
> >> for a user.
> >>
> >> 'none'  and '!' seem like they would be okay
> >> for any of the commands above but then:
> >>
> >>git config submodule..update "!..."
> >>git reset --hard --recurse
> >>git status
> >># submodule is reported, because "!..." did not 'reset'.
> >>
> >> Anyway. That dislike is just a minor gut feeling about the UX/UI
> >> being horrible. I wrote the patch to keep the conversation going,
> >> and if it fixes Lars problem, let's take it for now.
> >
> > Well, I need just a way to disable certain Submodules completely.
> > If you show me how "git config --local submodule.sub.active false"
> > works then I don't need this patch.

Yeah if you want to completely disable a submodule (as in not even check
it out) then setting .active to false would do that.  But as stefan
pointed out and IIRC 'submodule update --init' with no pathspec sets all
submodules to be active.  Perhaps it should only init submodules who
don't already have an explicit active flag set.

> >
> > I tried to make it work here:
> > https://public-inbox.org/git/89ab8aa3-8e19-46ba-b169-d1ea4cf4a...@gmail.com/
> 
> (A) you need to set expect there as well, to have sub{2,4,5} be expected
> there as well.
> 
> (B) That may hint at another (UX) bug.
> 
> The test case there uses "git submodule update --init".
> The init flag will set all submodules to active.
> 
> Maybe you want
> 
> git config submodule.active ":(exclude)sub0"
> git config --add submodule.active ":(exclude)sub2"
> git config --add submodule.active "."
> # Read: anything except sub0 and sub2 are interesting
> 
> git submodule update
> # no init flag, needed even for new submodules IIUC

-- 
Brandon Williams


Re: [PATCH] files-backend: cheapen refname_available check when locking refs

2017-08-17 Thread Brandon Williams
On 08/17, Jeff King wrote:
> On Thu, Aug 17, 2017 at 05:12:50PM +0200, Michael Haggerty wrote:
> 
> > I was testing this using the reporter's recipe (but fetching from a
> > local clone), and found the following surprising timing numbers:
> > 
> > b05855b5bc (before the slowdown): 22.7 s
> > 524a9fdb51 (immediately after the slowdown): 13 minutes
> > 4e81f1ecf1 (after this fix): 14.5 s
> > 
> > The fact that the fetch is now significantly *faster* than before the
> > slowdown seems not to have anything to do with the reference code.
> 
> I bisected this (with some hackery, since the commits in the middle all
> take 13 minutes to run). The other speedup is indeed unrelated, and is
> due to Brandon's aacc5c1a81 (submodule: refactor logic to determine
> changed submodules, 2017-05-01).
> 
> The commit message doesn't mention performance (it's mostly about code
> reduction). I think the speedup comes from using
> diff_tree_combined_merge() instead of manually diffing each commit
> against its parents. But I didn't do further timings to verify that (I'm
> reporting it here mostly as an interesting curiosity for submodule
> folks).

Haha always great to see an unintended improvement in performance!  Yeah
that commit was mostly about removing duplicate code but I'm glad that
it ended up being a benefit to perf too.

> 
> > diff --git a/refs/files-backend.c b/refs/files-backend.c
> > index e9b95592b6..f2a420c611 100644
> > --- a/refs/files-backend.c
> > +++ b/refs/files-backend.c
> > @@ -631,11 +631,11 @@ static int lock_raw_ref(struct files_ref_store *refs,
> >  
> > /*
> >  * If the ref did not exist and we are creating it,
> > -* make sure there is no existing ref that conflicts
> > -* with refname:
> > +* make sure there is no existing packed ref that
> > +* conflicts with refname:
> >  */
> > if (refs_verify_refname_available(
> > -   >base, refname,
> > +   refs->packed_ref_store, refname,
> > extras, skip, err))
> >     goto error_return;
> > }
> 
> This seems too easy to be true. :) But I think it matches what we were
> doing before 524a9fdb51 (so it's correct), and the performance numbers
> don't lie.
> 
> -Peff

-- 
Brandon Williams


Re: [RFC PATCH 2/2] submodule: simplify decision tree whether to or not to fetch

2017-08-17 Thread Brandon Williams
On 08/17, Stefan Beller wrote:
> On Thu, Aug 17, 2017 at 4:00 AM, Heiko Voigt <hvo...@hvoigt.net> wrote:
> > To make extending this logic later easier.
> >
> > Signed-off-by: Heiko Voigt <hvo...@hvoigt.net>
> > ---
> > I am quite sure I replicated the same logic but a few more eyes would be
> > appreciated.
> 
> A code cleanup is appreciated!
> 
> I thought Brandon had a series in flight doing a very similar cleanup here,
> but in master..pu there is nothing to be found.

Yeah there are 2 series in flight which will probably conflict here.
bw/grep-recurse-submodules and bw/submodule-config-cleanup

> 
> > Cheers Heiko
> 
> The code looks good to me.
> 
> Cheers!
> Stefan
> 
> >
> >  submodule.c | 55 +++
> >  1 file changed, 27 insertions(+), 28 deletions(-)
> >
> > diff --git a/submodule.c b/submodule.c
> > index 3ed78ac..a1011f4 100644
> > --- a/submodule.c
> > +++ b/submodule.c
> > @@ -1171,6 +1171,21 @@ int submodule_touches_in_range(struct object_id 
> > *excl_oid,
> > return ret;
> >  }
> >
> > +static int get_fetch_recurse_config(const struct submodule *submodule, int 
> > command_line_option)
> > +{
> > +   if (command_line_option != RECURSE_SUBMODULES_DEFAULT)
> > +   return command_line_option;
> > +
> > +   if (submodule && submodule->fetch_recurse != 
> > RECURSE_SUBMODULES_NONE)
> > +   /* local config overrules everything except commandline */
> > +   return submodule->fetch_recurse;
> > +
> > +   if (gitmodules_is_unmerged)
> > +   return RECURSE_SUBMODULES_OFF;
> > +
> > +   return config_fetch_recurse_submodules;
> > +}
> > +
> >  struct submodule_parallel_fetch {
> > int count;
> > struct argv_array args;
> > @@ -1203,37 +1218,21 @@ static int get_next_submodule(struct child_process 
> > *cp,
> > if (!submodule)
> > submodule = submodule_from_name(_oid, 
> > ce->name);
> >
> > -   default_argv = "yes";
> > -   if (spf->command_line_option == RECURSE_SUBMODULES_DEFAULT) 
> > {
> > -   if (submodule &&
> > -   submodule->fetch_recurse !=
> > -   RECURSE_SUBMODULES_NONE) {
> > -   if (submodule->fetch_recurse ==
> > -   RECURSE_SUBMODULES_OFF)
> > -   continue;
> > -   if (submodule->fetch_recurse ==
> > -   
> > RECURSE_SUBMODULES_ON_DEMAND) {
> > -   if 
> > (!unsorted_string_list_lookup(_submodule_names,
> > -
> > submodule->name))
> > -   continue;
> > -   default_argv = "on-demand";
> > -   }
> > -   } else {
> > -   if ((config_fetch_recurse_submodules == 
> > RECURSE_SUBMODULES_OFF) ||
> > -   gitmodules_is_unmerged)
> > -   continue;
> > -   if (config_fetch_recurse_submodules == 
> > RECURSE_SUBMODULES_ON_DEMAND) {
> > -   if 
> > (!unsorted_string_list_lookup(_submodule_names,
> > -
> > submodule->name))
> > -   continue;
> > -   default_argv = "on-demand";
> > -   }
> > -   }
> > -   } else if (spf->command_line_option == 
> > RECURSE_SUBMODULES_ON_DEMAND) {
> > -   if 
> > (!unsorted_string_list_lookup(_submodule_names,
> > +   switch (get_fetch_recurse_config(submodule, 
> > spf->command_line_option))
> > +   {
> > +   default:
> > +   case RECURSE_SUBMODULES_DEFAULT:
> > +   case RECURSE_SUBMODULES_ON_DEMAND:
> > +   if (!submodule || 
> > !unsorted_string_list_lookup(_submodule_names,
> >  submodule->name))
> > continue;
> > default_argv = "on-demand";
> > +   break;
> > +   case RECURSE_SUBMODULES_ON:
> > +   default_argv = "yes";
> > +   break;
> > +   case RECURSE_SUBMODULES_OFF:
> > +   continue;
> > }
> >
> > strbuf_addf(_path, "%s/%s", spf->work_tree, 
> > ce->name);
> > --
> > 2.0.0.274.g6b2cd91
> >

-- 
Brandon Williams


Re: [PATCH] convert add_to_alternates_file to use repository struct

2017-08-15 Thread Brandon Williams
nction.  Is this 
intentional?  

>  {
>   struct strbuf objdirbuf = STRBUF_INIT;
>   struct strbuf entry = STRBUF_INIT;
> @@ -470,7 +472,8 @@ static void read_info_alternates(const char * 
> relative_base, int depth)
>   map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
>   close(fd);
>  
> - link_alt_odb_entries(map, mapsz, '\n', relative_base, depth);
> + link_alt_odb_entries(the_repository, map, mapsz, '\n',
> +  relative_base, depth);
>  
>   munmap(map, mapsz);
>  }
> @@ -487,10 +490,11 @@ struct alternate_object_database *alloc_alt_odb(const 
> char *dir)
>   return ent;
>  }
>  
> -void add_to_alternates_file(const char *reference)
> +void add_to_alternates_file(struct repository *r, const char *reference)
>  {
>   struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
> - char *alts = git_pathdup("objects/info/alternates");
> + char *alts = repo_git_path(r, "objects/info/alternates");
> +
>   FILE *in, *out;
>  
>   hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR);
> @@ -527,7 +531,8 @@ void add_to_alternates_file(const char *reference)
>   if (commit_lock_file(lock))
>   die_errno("unable to move new alternates file into 
> place");
>   if (alt_odb_tail)
> - link_alt_odb_entries(reference, strlen(reference), 
> '\n', NULL, 0);
> + link_alt_odb_entries(r, reference, strlen(reference),
> +  '\n', NULL, 0);
>   }
>   free(alts);
>  }
> @@ -540,7 +545,8 @@ void add_to_alternates_memory(const char *reference)
>    */
>   prepare_alt_odb();
>  
> - link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
> + link_alt_odb_entries(the_repository, reference, strlen(reference),
> +  '\n', NULL, 0);
>  }
>  
>  /*
> @@ -643,7 +649,8 @@ void prepare_alt_odb(void)
>   if (!alt) alt = "";
>  
>   alt_odb_tail = _odb_list;
> - link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0);
> + link_alt_odb_entries(the_repository, alt, strlen(alt),
> +  PATH_SEP, NULL, 0);
>  
>   read_info_alternates(get_object_directory(), 0);
>  }
> -- 
> 2.14.0.rc0.3.g6c2e499285
> 

-- 
Brandon Williams


Re: [PATCH] sha1_file: make read_info_alternates static

2017-08-15 Thread Brandon Williams
On 08/15, Stefan Beller wrote:
> read_info_alternates is not used from outside, so let's make it static.
> 
> We have to declare the function before link_alt_odb_entry instead of
> moving the code around, link_alt_odb_entry calls read_info_alternates,
> which in turn calls link_alt_odb_entry.
> 
> Signed-off-by: Stefan Beller <sbel...@google.com>
> ---
> 
>   This helps in a later refactoring (moving the object
>   store to the_repository struct), too.
>   
>   Thanks,
>   Stefan
> 
>  cache.h | 1 -
>  sha1_file.c | 3 ++-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/cache.h b/cache.h
> index 1c69d2a05a..4109efcf24 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1551,7 +1551,6 @@ extern struct alternate_object_database {
>   char path[FLEX_ARRAY];
>  } *alt_odb_list;
>  extern void prepare_alt_odb(void);
> -extern void read_info_alternates(const char * relative_base, int depth);
>  extern char *compute_alternate_path(const char *path, struct strbuf *err);
>  typedef int alt_odb_fn(struct alternate_object_database *, void *);
>  extern int foreach_alt_odb(alt_odb_fn, void*);
> diff --git a/sha1_file.c b/sha1_file.c
> index b60ae15f70..9186e2c6c7 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -347,6 +347,7 @@ static int alt_odb_usable(struct strbuf *path, const char 
> *normalized_objdir)
>   * SHA1, an extra slash for the first level indirection, and the
>   * terminating NUL.
>   */
> +static void read_info_alternates(const char * relative_base, int depth);
>  static int link_alt_odb_entry(const char *entry, const char *relative_base,
>   int depth, const char *normalized_objdir)
>  {
> @@ -448,7 +449,7 @@ static void link_alt_odb_entries(const char *alt, int 
> len, int sep,
>   strbuf_release();
>  }
>  
> -void read_info_alternates(const char * relative_base, int depth)
> +static void read_info_alternates(const char * relative_base, int depth)
>  {
>   char *map;
>   size_t mapsz;
> -- 
> 2.14.0.rc0.3.g6c2e499285
> 

Looks good to me.  Only nit is I would fix the style to not have a space
after the '*' ;)

-- 
Brandon Williams


Re: [PATCH v2 1/2] clang-format: outline the git project's coding style

2017-08-15 Thread Brandon Williams
On 08/15, Ben Peart wrote:
> 
> 
> On 8/14/2017 6:02 PM, Stefan Beller wrote:
> >On Mon, Aug 14, 2017 at 2:30 PM, Brandon Williams <bmw...@google.com> wrote:
> >>Add a '.clang-format' file which outlines the git project's coding
> >>style.  This can be used with clang-format to auto-format .c and .h
> >>files to conform with git's style.
> >>
> >>Signed-off-by: Brandon Williams <bmw...@google.com>
> >
> >Applying this patch and running
> > clang-format -i -style file *.c *.h builtin/*.c
> >produces a diff, that I'd mostly agree with.
> >This style guide is close to our current style.
> >
> 
> I'm happy to see progress being made in helping reduce the time
> spent manually reviewing and fixing style formatting errors.  In an
> effort to help, I installed this in Windows and tried it as well.
> The tools all appear to be working fine and are supported on
> Windows.
> 
> For the most part, the formatting rules look pretty consistent with
> the existing style.  I ran the same test and looked at the diffs and
> saw a couple of things that looked odd. For example, how it wrapped
> the "static int" on the function header below was different.  Not
> sure why as it didn't wrap all the other function headers the same
> even later in the file it didn't do that with "static void
> mute_routine"
> 
> diff --git a/apply.c b/apply.c
> index f2d599141d..bb77242e3d 100644
> --- a/apply.c
> +++ b/apply.c
> @@ -58,12 +59,11 @@ static int parse_whitespace_option(struct
> apply_state *state, const char *option
> return error(_("unrecognized whitespace option '%s'"), option);
>  }
> 
> -static int parse_ignorewhitespace_option(struct apply_state *state,
> -const char *option)
> +static int
> +parse_ignorewhitespace_option(struct apply_state *state, const char
> *option)
>  {
> -   if (!option || !strcmp(option, "no") ||
> -   !strcmp(option, "false") || !strcmp(option, "never") ||
> -   !strcmp(option, "none")) {
> +   if (!option || !strcmp(option, "no") || !strcmp(option, "false") ||
> +   !strcmp(option, "never") || !strcmp(option, "none")) {
> state->ws_ignore_action = ignore_ws_none;
> return 0;
> }
> 
> 
> Later in the file it wraps some of them again: (add_line_info,
> prepare_image, find_name_common, etc).  Again, it appears to be
> inconsistent but there must be some rule that is causing this
> behavior.

These have to deal with setting the penalties.  When a line gets to be
too long the tool needs to find a place to break the line based on a
penalty system.  The current .clang-format file I sent out has values
for the penalties which would most likely need to be tweaked through
trial and error.

> 
> 
> 
> Here is an example of how it wrapped bit fields differently.  Again,
> it didn't seem to be consistent with itself as just below this, it
> left them on separate lines.
> 
> 
> @@ -182,8 +185,7 @@ struct fragment {
>  * but some codepaths store an allocated buffer.
>  */
> const char *patch;
> -   unsigned free_patch:1,
> -   rejected:1;
> +   unsigned free_patch : 1, rejected : 1;
> int size;
> int linenr;
> struct fragment *next;

If the return type was replicated then it would probably format the
different struct members on their own line.

> 
> 
> Big thanks to those working on this!
> 
> >As noted in patch 2/2 we'd now need an easy way to
> >expose this for use in various situations, such as
> >* contributor wanting to format their patch
> >* reformatting code for readability
> >
> >Thanks,
> >Stefan
> >

-- 
Brandon Williams


Re: [PATCH v2 1/2] clang-format: outline the git project's coding style

2017-08-15 Thread Brandon Williams
On 08/15, Ben Peart wrote:
> 
> 
> On 8/14/2017 5:30 PM, Brandon Williams wrote:
> >Add a '.clang-format' file which outlines the git project's coding
> >style.  This can be used with clang-format to auto-format .c and .h
> >files to conform with git's style.
> >
> >Signed-off-by: Brandon Williams <bmw...@google.com>
> >---
> >  .clang-format | 165 
> > ++
> >  1 file changed, 165 insertions(+)
> >  create mode 100644 .clang-format
> >
> 
> I applied this and then ran:
> clang-format -i -style file *.c *.h builtin/*.c
> 
> The modified files generate lots of line ending warnings.
> Apparently clang-format isn't respecting the git settings for line
> endings as it converted CRLF to LF in all the files it edited.
> 
> $ git add .
> warning: LF will be replaced by CRLF in alloc.c.
> The file will have its original line endings in your working directory.
> warning: LF will be replaced by CRLF in base85.c.
> The file will have its original line endings in your working directory.
> warning: LF will be replaced by CRLF in bisect.h.
> [...]
> 

I would think that there  must be some setting to permit CRLF on windows
machines.  As I work on a unix machine I don't see this sort of
behavior.

-- 
Brandon Williams


Re: [PATCH v2 2/2] Makefile: add style build rule

2017-08-14 Thread Brandon Williams
On 08/14, Junio C Hamano wrote:
> Junio C Hamano <gits...@pobox.com> writes:
> 
> > By the way, I do not know which vintage of /usr/bin/git-clang-format
> > I happen to have on my box, but I needed a crude workaround patch
> > (attached at the end) ...
> 
> I guess you hit the same thing while our messages crossing ;-)
> 
> 
> > As to what it does, the first example I tried may not have been a
> > great one.  I got this:
> >
> > git clang-format --style file --diff --extensions c,h
> > diff --git a/cache.h b/cache.h
> > index 73e0085186..6462fe25bc 100644
> > --- a/cache.h
> > +++ b/cache.h
> > @@ -1498,11 +1498,8 @@ struct checkout {
> > const char *base_dir;
> > int base_dir_len;
> > struct delayed_checkout *delayed_checkout;
> > -   unsigned force:1,
> > -quiet:1,
> > -not_new:1,
> > -   a_new_field:1,
> > -refresh_cache:1;
> > +   unsigned force : 1, quiet : 1, not_new : 1, a_new_field : 1,
> > +   refresh_cache : 1;
> >  };
> >  #define CHECKOUT_INIT { NULL, "" }
> >  
> > which is not wrong per-se, but I have a mixed feelings.  I do not
> > want it to complain if the original tried to fit many items on a
> > single line, but if the original wanted to have one item per line,
> > I'd rather see it kept as-is.
> 
> To clarify, the above is after I added a_new_field that is one-bit
> wide without doing anything else.  I do not mind the checker
> complaining the existing force, quiet, etc. whose widths are all
> spelled without SP around ':', because they appear near-by, as a
> collateral damage.  My only gripe is that the result got squished
> into a single line.

Yeah the result doesn't look too good and I'm not sure which option to
tweak for that.  I'm sure that the problem would fix itself if all the
bit fields where defined on their own lines:

  unsigned force : 1;
  unsigned not_new : 1; 
  ... etc ...

I'm sure there are a bunch of other things that we'd need to tweak
before this is ready to be used by all contributors.  Specifically the
penalties to help determine when to break a line.

> 
> > Anyway, we cannot have perfect checker from the day one, and
> > considering this is an initial attempt, I'd say it is a good start.

-- 
Brandon Williams


Re: [PATCH v2 1/2] clang-format: outline the git project's coding style

2017-08-14 Thread Brandon Williams
On 08/14, Jeff King wrote:
> On Mon, Aug 14, 2017 at 06:48:31PM -0400, Jeff King wrote:
> 
> > On Mon, Aug 14, 2017 at 02:30:45PM -0700, Brandon Williams wrote:
> > 
> > > +# Align escaped newlines as far left as possible
> > > +# #define A   \
> > > +#   int ; \
> > > +#   int b;\
> > > +#   int ;
> > > +AlignEscapedNewlines: Left
> > 
> > I get:
> > 
> >   $ git clang-format-5.0 --style file -p --extensions c,h
> >   YAML:34:23: error: unknown key 'AlignEscapedNewlines'
> >   AlignEscapedNewlines: Left
> > ^~~~
> >   Error reading /home/peff/compile/git/.clang-format: Invalid argument
> > 
> > Same with clang-format-3.8.
> 
> And if I remove that line, I get:
> 
>   YAML:155:25: error: unknown key 'PenaltyBreakAssignment'
>   PenaltyBreakAssignment: 100
> 
> Removing that gives:
> 
>   YAML:86:22: error: unknown key 'BreakStringLiterals'
>   BreakStringLiterals: false
> 
> And removing that gives me a clean output. I have no idea why my clang
> doesn't like these (but presumably yours does). It's clang-format-5.0 in
> Debian unstable (and clang-format-3.8, etc).
> 
> -Peff

Those must be features in version 6 (which is what I seem to have
installed on my machine).

-- 
Brandon Williams


Re: [PATCH v2 0/2] clang-format

2017-08-14 Thread Brandon Williams
On 08/14, Brandon Williams wrote:
> Changes in v2:
>  * Changed a couple rules to be more inline with our coding style.
>  * Added a Makefile build rule to run git-clang-format on the diff of the
>working tree to suggest style changes.
> 
> I found that the llvm project also has the git-clang-format tool which will
> allow for doing formating changes based on diffs so that the parts of the code
> you didn't touch won't be formated.  It also has a nice '-p' option to only
> apply hunks of suggested changes.  I also saw what Ramsay did and attempted to
> add a Makefile rule to run clang-format.  This bit may need more tweaking to
> get it right.

Forgot to include the [RFC] bit because from the discussion it seems
like we still have a lot to talk about before we decide that this is the
path we are taking.

> 
> Brandon Williams (2):
>   clang-format: outline the git project's coding style
>   Makefile: add style build rule
> 
>  .clang-format | 165 
> ++
>  Makefile  |   4 ++
>  2 files changed, 169 insertions(+)
>  create mode 100644 .clang-format
> 
> -- 
> 2.14.1.480.gb18f417b89-goog
> 

-- 
Brandon Williams


[PATCH v2 1/2] clang-format: outline the git project's coding style

2017-08-14 Thread Brandon Williams
Add a '.clang-format' file which outlines the git project's coding
style.  This can be used with clang-format to auto-format .c and .h
files to conform with git's style.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 .clang-format | 165 ++
 1 file changed, 165 insertions(+)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0..3ede2628d
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,165 @@
+# Defaults
+
+# Use tabs whenever we need to fill whitespace that spans at least from one tab
+# stop to the next one.
+UseTab: Always
+TabWidth: 8
+IndentWidth: 8
+ContinuationIndentWidth: 8
+ColumnLimit: 80
+
+# C Language specifics
+Language: Cpp
+
+# Align parameters on the open bracket
+# someLongFunction(argument1,
+#  argument2);
+AlignAfterOpenBracket: Align
+
+# Don't align consecutive assignments
+# int  = 12;
+# int b = 14;
+AlignConsecutiveAssignments: false
+
+# Don't align consecutive declarations
+# int  = 12;
+# double b = 3.14;
+AlignConsecutiveDeclarations: false
+
+# Align escaped newlines as far left as possible
+# #define A   \
+#   int ; \
+#   int b;\
+#   int ;
+AlignEscapedNewlines: Left
+
+# Align operands of binary and ternary expressions
+# int aaa = bbb +
+#   cc;
+AlignOperands: true
+
+# Don't align trailing comments
+# int a; // Comment a
+# int b = 2; // Comment b
+AlignTrailingComments: false
+
+# By default don't allow putting parameters onto the next line
+# myFunction(foo, bar, baz);
+AllowAllParametersOfDeclarationOnNextLine: false
+
+# Don't allow short braced statements to be on a single line
+# if (a)   not   if (a) return;
+#   return;
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+
+# By default don't add a line break after the return type of top-level 
functions
+# int foo();
+AlwaysBreakAfterReturnType: None
+
+# Pack as many parameters or arguments onto the same line as possible
+# int myFunction(int , int ,
+#int );
+BinPackArguments: true
+BinPackParameters: true
+
+# Attach braces to surrounding context except break before braces on function
+# definitions.
+# void foo()
+# {
+#if (true) {
+#} else {
+#}
+# };
+BreakBeforeBraces: Linux
+
+# Break after operators
+# int valuve = a +
+#  bb -
+#  ccc;
+BreakBeforeBinaryOperators: None
+BreakBeforeTernaryOperators: false
+
+# Don't break string literals
+BreakStringLiterals: false
+
+# Use the same indentation level as for the switch statement.
+# Switch statement body is always indented one level more than case labels.
+IndentCaseLabels: false
+
+# Don't indent a function definition or declaration if it is wrapped after the
+# type
+IndentWrappedFunctionNames: false
+
+# Align pointer to the right
+# int *a;
+PointerAlignment: Right
+
+# Don't insert a space after a cast
+# x = (int32)y;notx = (int32) y;
+SpaceAfterCStyleCast: false
+
+# Insert spaces before and after assignment operators
+# int a = 5;notint a=5;
+# a += 42; a+=42;
+SpaceBeforeAssignmentOperators: true
+
+# Put a space before opening parentheses only after control statement keywords.
+# void f() {
+#   if (true) {
+# f();
+#   }
+# }
+SpaceBeforeParens: ControlStatements
+
+# Don't insert spaces inside empty '()'
+SpaceInEmptyParentheses: false
+
+# The number of spaces before trailing line comments (// - comments).
+# This does not affect trailing block comments (/* - comments).
+SpacesBeforeTrailingComments: 1
+
+# Don't insert spaces in casts
+# x = (int32) y;notx = ( int32 ) y;
+SpacesInCStyleCastParentheses: false
+
+# Don't insert spaces inside container literals
+# var arr = [1, 2, 3];notvar arr = [ 1, 2, 3 ];
+SpacesInContainerLiterals: false
+
+# Don't insert spaces after '(' or before ')'
+# f(arg);notf( arg );
+SpacesInParentheses: false
+
+# Don't insert spaces after '[' or before ']'
+# int a[5];notint a[ 5 ];
+SpacesInSquareBrackets: false
+
+# Insert a space after '{' and before '}' in struct initializers
+Cpp11BracedListStyle: false
+
+# A list of macros that should be interpreted as foreach loops instead of as
+# function calls.
+ForEachMacros: ['for_each_string_list_item']
+
+# The maximum number of consecutive empty lines to keep.
+MaxEmptyLinesToKeep: 1
+
+# No empty line at the start of a block.
+KeepEmptyLinesAtTheStartOfBlocks: false
+
+# Penalties
+# This decides what order things should be done if a line is too long
+PenaltyBreakAssignment: 100
+PenaltyBreakBeforeFirstCallParameter: 100
+PenaltyBreakComment: 100
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 100
+PenaltyExcessCharacter: 5
+PenaltyReturnTypeOnItsO

[PATCH v2 0/2] clang-format

2017-08-14 Thread Brandon Williams
Changes in v2:
 * Changed a couple rules to be more inline with our coding style.
 * Added a Makefile build rule to run git-clang-format on the diff of the
   working tree to suggest style changes.

I found that the llvm project also has the git-clang-format tool which will
allow for doing formating changes based on diffs so that the parts of the code
you didn't touch won't be formated.  It also has a nice '-p' option to only
apply hunks of suggested changes.  I also saw what Ramsay did and attempted to
add a Makefile rule to run clang-format.  This bit may need more tweaking to
get it right.

Brandon Williams (2):
  clang-format: outline the git project's coding style
  Makefile: add style build rule

 .clang-format | 165 ++
 Makefile  |   4 ++
 2 files changed, 169 insertions(+)
 create mode 100644 .clang-format

-- 
2.14.1.480.gb18f417b89-goog



[PATCH v2 2/2] Makefile: add style build rule

2017-08-14 Thread Brandon Williams
Add the 'style' build rule which will run git-clang-format on the diff
between HEAD and the current worktree.  The result is a diff of
suggested changes.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 Makefile | 4 
 1 file changed, 4 insertions(+)

diff --git a/Makefile b/Makefile
index ba4359ef8..acfd096b7 100644
--- a/Makefile
+++ b/Makefile
@@ -2414,6 +2414,10 @@ $(SP_OBJ): %.sp: %.c GIT-CFLAGS FORCE
 .PHONY: sparse $(SP_OBJ)
 sparse: $(SP_OBJ)
 
+.PHONY: style
+style:
+   git clang-format --style file --diff --extensions c,h
+
 check: common-cmds.h
@if sparse; \
then \
-- 
2.14.1.480.gb18f417b89-goog



Re: [RFC] clang-format: outline the git project's coding style

2017-08-11 Thread Brandon Williams
On 08/09, Stefan Beller wrote:
> On Wed, Aug 9, 2017 at 3:53 PM, Stefan Beller <sbel...@google.com> wrote:
> 
> > I would think based on these options, a pre commit hook can be
> > written that formats precisely the touched lines of code of each file.
> 
> I did not search enough, "clang-tidy-diff.py --fix" should be all that is 
> needed

I believe clang-tidy is different from clang-format am I mistaken?

-- 
Brandon Williams


Re: [RFC] clang-format: outline the git project's coding style

2017-08-11 Thread Brandon Williams
esh_cache:1;
> + unsigned force : 1,
> +  quiet : 1,
> +  not_new : 1,
> +  refresh_cache : 1;
>  };
>  #define CHECKOUT_INIT { NULL, "" }
>  
> @@ -1534,7 +1534,7 @@ extern struct alternate_object_database {
>   char path[FLEX_ARRAY];
>  } *alt_odb_list;
>  extern void prepare_alt_odb(void);
> -extern void read_info_alternates(const char * relative_base, int depth);
> +extern void read_info_alternates(const char *relative_base, int depth);
>  extern char *compute_alternate_path(const char *path, struct strbuf *err);
>  typedef int alt_odb_fn(struct alternate_object_database *, void *);
>  extern int foreach_alt_odb(alt_odb_fn, void*);
> @@ -1587,10 +1587,10 @@ extern struct packed_git {
>   int index_version;
>   time_t mtime;
>   int pack_fd;
> - unsigned pack_local:1,
> -  pack_keep:1,
> -  freshened:1,
> -  do_not_close:1;
> + unsigned pack_local : 1,
> +  pack_keep : 1,
> +  freshened : 1,
> +  do_not_close : 1;
>   unsigned char sha1[20];
>   struct revindex_entry *revindex;
>   /* something like ".git/objects/pack/x.pack" */
> @@ -1767,10 +1767,10 @@ struct object_info {
>   union {
>   /*
>* struct {
> -  *  ... Nothing to expose in this case
> +  *  ... Nothing to expose in this case
>* } cached;
>* struct {
> -  *  ... Nothing to expose in this case
> +  *  ... Nothing to expose in this case
>* } loose;
>*/
>   struct {

-- 
Brandon Williams


Re: [PATCH v1 1/1] dir: teach status to show ignored directories

2017-08-11 Thread Brandon Williams
On 08/10, Jameson Miller wrote:
> Teach Git to optionally show ignored directories when showing all
> untracked files. The git status command exposes the options to report
> ignored and/or untracked files. However, when reporting all untracked
> files (--untracked-files=all), all individual ignored files are reported
> as well. It is not currently possible to get the reporting behavior of
> the --ignored flag, while also reporting all untracked files. This
> change exposes a flag to report all untracked files while not showing
> individual files in ignored directories.
> 
> Motivation:
> Our application (Visual Studio) needs all untracked files listed
> individually, but does not need all ignored files listed individually.
> Reporting all ignored files can affect the time it takes for status
> to run. For a representative repository, here are some measurements
> showing a large perf improvement for this scenario:
> 
> | Command | Reported ignored entries | Time (s) |
> | --- |  |  |
> | 1   | 0| 1.3  |
> | 2   | 1024 | 4.2  |
> | 3   | 174904   | 7.5  |
> | 4   | 1046 | 1.6  |
> 
> Commands:
>  1) status
>  2) status --ignored
>  3) status --ignored --untracked-files=all
>  4) status --ignored --untracked-files=all --show-ignored-directory
> 
> This changes exposes a --show-ignored-directory flag to the git status
> command. This flag is utilized when running git status with the
> --ignored and --untracked-files options to not list ignored individual
> ignored files contained in directories that match an ignore pattern.

I can't help feeling that there is a better way express this with a
better UI.  I'm not saying this is wrong, I'm just not sure how
--show-ignored-directory would work when not paired with --ignored and
--untracked-files.  Does it require --ignored to also be given?

> 
> Part of the perf improvement comes from the tweak to
> read_directory_recursive to stop scanning the file system after it
> encounters the first file. When a directory is ignored, all it needs to
> determine is if the directory is empty or not. The logic currently keeps
> scanning the file system until it finds an untracked file. However, as
> the directory is ignored, all the contained contents are also marked
> excluded. For ignored directories that contain a large number of files,
> this can take some time.
> 
> Signed-off-by: Jameson Miller <jam...@microsoft.com>

-- 
Brandon Williams


Re: [RFC] clang-format: outline the git project's coding style

2017-08-10 Thread Brandon Williams
On 08/10, Junio C Hamano wrote:
> Brandon Williams <bmw...@google.com> writes:
> 
> > On 08/10, Junio C Hamano wrote:
> >
> >> I vaguely recall that there was a discussion to have SubmitGit wait
> >> for success from Travis CI; if that is already in place, then I can
> >> sort of see how it would help individual contributors to have the
> >> style checker in that pipeline as well.  
> >> 
> >> I have a mixed feelings about "fixing" styles automatically, though.
> >
> > I still think we are far away from a world where we can fix style
> > automatically.  If we do want to keep pursuing this there are a number
> > steps we'd want to take first.
> >
> > 1. Settle on a concrete style and document it using a formatter's rules
> >(in say a .clang-format file).  This style would most likely need to
> >be tuned a little bit, at least the 'Penalty' configuration would
> >need to be tuned which (as far as I understand it) is used to
> >determine which rule to break first to ensure a line isn't too long.
> 
> Yes.  I think this is what you started to get the ball rolling.
> Together with what checkpatch.pl already diagnoses, I think we can
> get a guideline that is more or less reasonable.
> 
> > 2. Start getting contributors to use the tool to format their patches.
> >This would include having some script or hook that a contributor
> >could run to only format the sections of code that they touched.
> 
> This, too.  Running checkpatch.pl (possibly combined with a bit of
> tweaking it to match our needs) already catches many of the issues,
> so a tool with a similar interface would be easy to use, I would
> imagine.
> 
> > 3. Slowly the code base would begin to have a uniform style.  At
> >some point we may want to then reformat the remaining sections of the
> >code base.  At this point we could have some automated bot that fixes
> >style.
> 
> I suspect I am discussing this based on a different assumption.
> 
> I think the primary goal of this effort is to make it easier to
> cleanse the new patches that appear on the list of trivial style
> issues, so that contributors and reviewers do not have to spend
> bandwidth and brain cycles during the review.  And I have been
> assuming that we can do so even without waiting for a "tree wide"
> code churn on existing code to complete.

Yes that's one of the steps I missed we can call it 2.5 ;)  (3) could be
a long term goal which is what I was trying to get at by saying:

> > 3. Slowly the code base would begin to have a uniform style.

-- 
Brandon Williams


Re: [RFC] clang-format: outline the git project's coding style

2017-08-10 Thread Brandon Williams
On 08/10, Junio C Hamano wrote:
> Johannes Schindelin <johannes.schinde...@gmx.de> writes:
> 
> > On Wed, 9 Aug 2017, Stefan Beller wrote:
> >
> >> > I am sure that something even better will be possible: a Continuous
> >> > "Integration" that fixes the coding style automatically by using
> >> > `filter-branch` (avoiding the merge conflicts that would arise if
> >> > `rebase -i` was used).
> >> 
> >> I do not quite follow. Is that to be used by Junio while integrating
> >> branches?
> >
> > I was more thinking about a bot on GitHub. "Code cleanup as a service".
> 
> I vaguely recall that there was a discussion to have SubmitGit wait
> for success from Travis CI; if that is already in place, then I can
> sort of see how it would help individual contributors to have the
> style checker in that pipeline as well.  
> 
> I have a mixed feelings about "fixing" styles automatically, though.
> 

I still think we are far away from a world where we can fix style
automatically.  If we do want to keep pursuing this there are a number
steps we'd want to take first.

1. Settle on a concrete style and document it using a formatter's rules
   (in say a .clang-format file).  This style would most likely need to
   be tuned a little bit, at least the 'Penalty' configuration would
   need to be tuned which (as far as I understand it) is used to
   determine which rule to break first to ensure a line isn't too long.

2. Start getting contributors to use the tool to format their patches.
   This would include having some script or hook that a contributor
   could run to only format the sections of code that they touched.

3. Slowly the code base would begin to have a uniform style.  At
   some point we may want to then reformat the remaining sections of the
   code base.  At this point we could have some automated bot that fixes
   style.

I'm sure I missed a step in there somewhere though.

-- 
Brandon Williams


Re: [RFC] clang-format: outline the git project's coding style

2017-08-08 Thread Brandon Williams
On 08/08, Stefan Beller wrote:
> On Tue, Aug 8, 2017 at 5:05 AM, Johannes Schindelin
> <johannes.schinde...@gmx.de> wrote:
> > Hi Brandon,
> >
> > On Mon, 7 Aug 2017, Brandon Williams wrote:
> >
> >> Add a '.clang-format' file which outlines the git project's coding
> >> style.  This can be used with clang-format to auto-format .c and .h
> >> files to conform with git's style.
> >>
> >> Signed-off-by: Brandon Williams <bmw...@google.com>
> >> ---
> >>
> >> I'm sure this sort of thing comes up every so often on the list but back at
> >> git-merge I mentioned how it would be nice to not have to worry about style
> >> when reviewing patches as that is something mechanical and best left to a
> >> machine (for the most part).
> >
> > Amen.
> >
> > If I never have to see a review mentioning an unwrapped line, I am quite
> > certain I will be quite content.
> >
> > Ciao,
> > Dscho
> 
> As a thought experiment I'd like to propose to take it one step further:
> 
>   If the code was formatted perfectly in one style such that a formatter for
>   this style would not produce changes when rerun again on the code, then
>   each individual could have a clean/smudge filter to work in their preferred
>   style, and only the exchange and storage of code is in a mutual agreed
>   style. If the mutually agreed style is close to what I prefer, I don't have 
> to
>   use clean/smudge filters.
> 
> Additionally to this patch, we'd want to either put a note into
> SubmittingPatches or Documentation/gitworkflows.txt to hint at
> how to use this formatting to just affect the patch that is currently
> worked on or rather a pre-commit hook?

I'm sure both of these things will probably happen if we decide to make
use of a code-formatter.  This RFC is really just trying to ask the
question: "Is this something we want to entertain doing?"
My response would be 'Yes' but there's many other opinions to consider
first :)

-- 
Brandon Williams


Re: [RFC] clang-format: outline the git project's coding style

2017-08-08 Thread Brandon Williams
On 08/08, Junio C Hamano wrote:
> Brandon Williams <bmw...@google.com> writes:
> 
> > Add a '.clang-format' file which outlines the git project's coding
> > style.  This can be used with clang-format to auto-format .c and .h
> > files to conform with git's style.
> >
> > Signed-off-by: Brandon Williams <bmw...@google.com>
> > ---
> >
> > I'm sure this sort of thing comes up every so often on the list but back at
> > git-merge I mentioned how it would be nice to not have to worry about style
> > when reviewing patches as that is something mechanical and best left to a
> > machine (for the most part).  I saw that 'clang-format' was brought up on 
> > the
> > list once before a couple years ago
> > (https://public-inbox.org/git/20150121220903.ga10...@peff.net/) but nothing
> > really came of it.  I spent a little bit of time combing through the various
> > options and came up with this config based on the general style of our code
> > base.  The big issue though is that our code base isn't consistent so try as
> > you might you wont be able to come up with a config which matches 
> > everything we
> > do (mostly due to the inconsistencies in our code base).
> >
> > Anyway, I thought I'd bring this topic back up and see how people feel 
> > about it.
> 
> I gave just one pass over all the rules you have here.  I didn't
> think too deeply about implications of some of them, but most of
> them looked sensible.  Thanks for compiling this set of rules.
> 
> Having said that, it is easier to refine individual rules you have
> below than to make sure that among the develoepers there is a shared
> notion of how these rules are to be used.  If we get that part wrong,
> we'd see unpleasant consequences, e.g.
> 
>  - We may see unwanted code churn on existing codebase, only for the
>sake of satisfying the formatting rules specified here.

This is an issue when you have an inconsistent code base such as ours as
the tool, even when used to format a diff, will format the surrounding
context.

> 
>  - We may see far more style-only critique to patches posted on the
>list simply because there are more readers than writers, and it
>is likely that running the tool to nitpick other people's patches
>is far easier than writing these patches in the first place (and
>forgetting to ask the formatter to nitpick before sending them
>out).

I would hope that use of such a tool would eventually completely
eliminate style-only critiques.  

> 
>  - Human aesthetics judgement often is necessary to overrule
>mechanical rules (e.g. A human may have two pairs of <ptr, len>
>parameters on separate lines in a function declaration;
>BinPackParameters may try to break after ptrA, lenA, ptrB before
>lenB in such a case).

I know that when you introduce a formatter there are bound to be some
issues where a human would choose one way over the other.  If we start
using a formatter I would expect some of the penalties would need to be
tweaked overtime before we rely too heavily on it.

> 
> We need to set our expectation and a guideline to apply these rules
> straight, before introducing something like this.

> 
> 
> >  .clang-format | 166 
> > ++
> >  1 file changed, 166 insertions(+)
> >  create mode 100644 .clang-format
> >
> > diff --git a/.clang-format b/.clang-format
> > new file mode 100644
> > index 0..7f28dc259
> > --- /dev/null
> > +++ b/.clang-format
> > @@ -0,0 +1,166 @@
> > +# Defaults
> > +
> > +# Use tabs whenever we need to fill whitespace that spans at least from 
> > one tab
> > +# stop to the next one.
> > +UseTab: Always
> > +TabWidth: 8
> > +IndentWidth: 8
> > +ContinuationIndentWidth: 8
> > +ColumnLimit: 80
> 
> I often deliberately chomp my lines much shorter than this limit,
> and also I deliberately write a line that is a tad longer than this
> limit some other times, if doing so makes the result easier to read.
> And I know other develoepers with good taste do the same.  It is
> pointless to have a discussion that begins with "who uses a physical
> terminal these days that can only show 80-columns?" to tweak this
> value, I think.  It is more important to give a guideline on what to
> do when lines in your code goes over this limit.
> 
> A mechanical "formatter" would just find an appropriate point in a
> line with least "penalty" and chomp it into two.  But an appropriate
> way to make such a code that is way too deeply indented readable may
> instead be judicious use o

[RFC] clang-format: outline the git project's coding style

2017-08-07 Thread Brandon Williams
Add a '.clang-format' file which outlines the git project's coding
style.  This can be used with clang-format to auto-format .c and .h
files to conform with git's style.

Signed-off-by: Brandon Williams <bmw...@google.com>
---

I'm sure this sort of thing comes up every so often on the list but back at
git-merge I mentioned how it would be nice to not have to worry about style
when reviewing patches as that is something mechanical and best left to a
machine (for the most part).  I saw that 'clang-format' was brought up on the
list once before a couple years ago
(https://public-inbox.org/git/20150121220903.ga10...@peff.net/) but nothing
really came of it.  I spent a little bit of time combing through the various
options and came up with this config based on the general style of our code
base.  The big issue though is that our code base isn't consistent so try as
you might you wont be able to come up with a config which matches everything we
do (mostly due to the inconsistencies in our code base).

Anyway, I thought I'd bring this topic back up and see how people feel about it.

 .clang-format | 166 ++
 1 file changed, 166 insertions(+)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0..7f28dc259
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,166 @@
+# Defaults
+
+# Use tabs whenever we need to fill whitespace that spans at least from one tab
+# stop to the next one.
+UseTab: Always
+TabWidth: 8
+IndentWidth: 8
+ContinuationIndentWidth: 8
+ColumnLimit: 80
+
+# C Language specifics
+Language: Cpp
+
+# Align parameters on the open bracket
+# someLongFunction(argument1,
+#  argument2);
+AlignAfterOpenBracket: Align
+
+# Don't align consecutive assignments
+# int  = 12;
+# int b = 14;
+AlignConsecutiveAssignments: false
+
+# Don't align consecutive declarations
+# int  = 12;
+# double b = 3.14;
+AlignConsecutiveDeclarations: false
+
+# Align escaped newlines as far left as possible
+# #define A   \
+#   int ; \
+#   int b;\
+#   int ;
+AlignEscapedNewlines: Left
+
+# Align operands of binary and ternary expressions
+# int aaa = bbb +
+#   cc;
+AlignOperands: true
+
+# Don't align trailing comments
+# int a; // Comment a
+# int b = 2; // Comment b
+AlignTrailingComments: false
+
+# By default don't allow putting parameters onto the next line
+# myFunction(foo, bar, baz);
+AllowAllParametersOfDeclarationOnNextLine: false
+
+# Don't allow short braced statements to be on a single line
+# if (a)   not   if (a) return;
+#   return;
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+
+# Add a line break after the return type of top-level functions
+# int
+# foo();
+AlwaysBreakAfterReturnType: TopLevel
+
+# Pack as many parameters or arguments onto the same line as possible
+# int myFunction(int , int ,
+#int );
+BinPackArguments: true
+BinPackParameters: true
+
+# Attach braces to surrounding context except break before braces on function
+# definitions.
+# void foo()
+# {
+#if (true) {
+#} else {
+#}
+# };
+BreakBeforeBraces: Linux
+
+# Break after operators
+# int valuve = a +
+#  bb -
+#  ccc;
+BreakBeforeBinaryOperators: None
+BreakBeforeTernaryOperators: false
+
+# Don't break string literals
+BreakStringLiterals: false
+
+# Use the same indentation level as for the switch statement.
+# Switch statement body is always indented one level more than case labels.
+IndentCaseLabels: false
+
+# Don't indent a function definition or declaration if it is wrapped after the
+# type
+IndentWrappedFunctionNames: false
+
+# Align pointer to the right
+# int *a;
+PointerAlignment: Right
+
+# Insert a space after a cast
+# x = (int32) y;notx = (int32)y;
+SpaceAfterCStyleCast: true
+
+# Insert spaces before and after assignment operators
+# int a = 5;notint a=5;
+# a += 42; a+=42;
+SpaceBeforeAssignmentOperators: true
+
+# Put a space before opening parentheses only after control statement keywords.
+# void f() {
+#   if (true) {
+# f();
+#   }
+# }
+SpaceBeforeParens: ControlStatements
+
+# Don't insert spaces inside empty '()'
+SpaceInEmptyParentheses: false
+
+# The number of spaces before trailing line comments (// - comments).
+# This does not affect trailing block comments (/* - comments).
+SpacesBeforeTrailingComments: 1
+
+# Don't insert spaces in casts
+# x = (int32) y;notx = ( int32 ) y;
+SpacesInCStyleCastParentheses: false
+
+# Don't insert spaces inside container literals
+# var arr = [1, 2, 3];notvar arr = [ 1, 2, 3 ];
+SpacesInContainerLiterals: false
+
+# Don't insert spaces after '(' or before ')'
+# f(arg);not

Re: [PATCH v2 10/15] diff: stop allowing diff to have submodules configured in .git/config

2017-08-04 Thread Brandon Williams
On 08/03, Junio C Hamano wrote:
> Brandon Williams <bmw...@google.com> writes:
> 
> > Traditionally a submodule is comprised of a gitlink as well as a
> > corresponding entry in the .gitmodules file.  Diff doesn't follow this
> > paradigm as its config callback routine falls back to populating the
> > submodule-config if a config entry starts with 'submodule.'.
> >
> > Remove this behavior in order to be consistent with how the
> > submodule-config is populated, via calling 'gitmodules_config()' or
> > 'repo_read_gitmodules()'.
> 
> I am all for dropping special cases deep in the diff machinery, even
> though there may be submodule users who care about submodule.*.ignore
> 
> Does this change mean we can eventually get rid of the ugly
> DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG hack and also need for a patch
> like 03/15?

I think that this is a step toward getting rid of that.  We can either
do two things: 1) deprecate submodule.*.ignore and don't respect it
anymore or 2) flip the polarity of that flag so that by default we
don't respect the submodule.*.ignore config and instead callers must opt
in instead of the current opt out behavior.

> 
> >
> > Signed-off-by: Brandon Williams <bmw...@google.com>
> > ---
> >  diff.c|  3 ---
> >  t/t4027-diff-submodule.sh | 67 
> > ---
> >  2 files changed, 70 deletions(-)
> >
> > diff --git a/diff.c b/diff.c
> > index 85e714f6c..e43519b88 100644
> > --- a/diff.c
> > +++ b/diff.c
> > @@ -346,9 +346,6 @@ int git_diff_basic_config(const char *var, const char 
> > *value, void *cb)
> > return 0;
> > }
> >  
> > -   if (starts_with(var, "submodule."))
> > -   return parse_submodule_config_option(var, value);
> > -
> > if (git_diff_heuristic_config(var, value, cb) < 0)
> > return -1;
> >  
> > diff --git a/t/t4027-diff-submodule.sh b/t/t4027-diff-submodule.sh
> > index 518bf9524..2ffd11a14 100755
> > --- a/t/t4027-diff-submodule.sh
> > +++ b/t/t4027-diff-submodule.sh
> > @@ -113,35 +113,6 @@ test_expect_success 'git diff HEAD with dirty 
> > submodule (work tree, refs match)'
> > ! test -s actual4
> >  '
> >  
> > -test_expect_success 'git diff HEAD with dirty submodule (work tree, refs 
> > match) [.git/config]' '
> > -   git config diff.ignoreSubmodules all &&
> > -   git diff HEAD >actual &&
> > -   ! test -s actual &&
> > -   git config submodule.subname.ignore none &&
> > -   git config submodule.subname.path sub &&
> > -   git diff HEAD >actual &&
> > -   sed -e "1,/^@@/d" actual >actual.body &&
> > -   expect_from_to >expect.body $subprev $subprev-dirty &&
> > -   test_cmp expect.body actual.body &&
> > -   git config submodule.subname.ignore all &&
> > -   git diff HEAD >actual2 &&
> > -   ! test -s actual2 &&
> > -   git config submodule.subname.ignore untracked &&
> > -   git diff HEAD >actual3 &&
> > -   sed -e "1,/^@@/d" actual3 >actual3.body &&
> > -   expect_from_to >expect.body $subprev $subprev-dirty &&
> > -   test_cmp expect.body actual3.body &&
> > -   git config submodule.subname.ignore dirty &&
> > -   git diff HEAD >actual4 &&
> > -   ! test -s actual4 &&
> > -   git diff HEAD --ignore-submodules=none >actual &&
> > -   sed -e "1,/^@@/d" actual >actual.body &&
> > -   expect_from_to >expect.body $subprev $subprev-dirty &&
> > -   test_cmp expect.body actual.body &&
> > -   git config --remove-section submodule.subname &&
> > -   git config --unset diff.ignoreSubmodules
> > -'
> > -
> >  test_expect_success 'git diff HEAD with dirty submodule (work tree, refs 
> > match) [.gitmodules]' '
> > git config diff.ignoreSubmodules dirty &&
> > git diff HEAD >actual &&
> > @@ -208,24 +179,6 @@ test_expect_success 'git diff HEAD with dirty 
> > submodule (untracked, refs match)'
> > ! test -s actual4
> >  '
> >  
> > -test_expect_success 'git diff HEAD with dirty submodule (untracked, refs 
> > match) [.git/config]' '
> > -   git config submodule.subname.ignore all &&
> > -   git config submodule.subname.path sub &&
> > -   git diff HEAD >actual2 &&
> > -   ! test -s actual2 &

Re: [PATCH v2 02/15] submodule: don't use submodule_from_name

2017-08-04 Thread Brandon Williams
On 08/03, Stefan Beller wrote:
> On Thu, Aug 3, 2017 at 11:19 AM, Brandon Williams <bmw...@google.com> wrote:
> > The function 'submodule_from_name()' is being used incorrectly here as a
> > submodule path is being used instead of a submodule name.  Since the
> > correct function to use with a path to a submodule is already being used
> > ('submodule_from_path()') let's remove the call to
> > 'submodule_from_name()'.
> >
> > Signed-off-by: Brandon Williams <bmw...@google.com>
> 
> In case a reroll is needed, you could incorperate Jens feedback
> stating that 851e18c385 should have done it.

K I'll add that into the commit message.

> 
> > ---
> >  submodule.c | 2 --
> >  1 file changed, 2 deletions(-)
> >
> > diff --git a/submodule.c b/submodule.c
> > index 5139b9256..19bd13bb2 100644
> > --- a/submodule.c
> > +++ b/submodule.c
> > @@ -1177,8 +1177,6 @@ static int get_next_submodule(struct child_process 
> > *cp,
> > continue;
> >
> > submodule = submodule_from_path(_oid, ce->name);
> > -   if (!submodule)
> > -   submodule = submodule_from_name(_oid, 
> > ce->name);
> >
> >     default_argv = "yes";
> > if (spf->command_line_option == RECURSE_SUBMODULES_DEFAULT) 
> > {
> > --
> > 2.14.0.rc1.383.gd1ce394fe2-goog
> >

-- 
Brandon Williams


Re: [PATCH] clone: teach recursive clones to respect -q

2017-08-04 Thread Brandon Williams
On 08/04, Junio C Hamano wrote:
> Brandon Williams <bmw...@google.com> writes:
> 
> > Teach 'git clone --recurse-submodules' to respect the '-q' option by
> > passing down the quiet flag to the process which handles cloning of
> > submodules.
> >
> > Signed-off-by: Brandon Williams <bmw...@google.com>
> > ---
> >  builtin/clone.c| 3 +++
> >  t/t7400-submodule-basic.sh | 6 ++
> >  2 files changed, 9 insertions(+)
> >
> > diff --git a/builtin/clone.c b/builtin/clone.c
> > index 08b5cc433..f7e17d229 100644
> > --- a/builtin/clone.c
> > +++ b/builtin/clone.c
> > @@ -768,6 +768,9 @@ static int checkout(int submodule_progress)
> > if (submodule_progress)
> > argv_array_push(, "--progress");
> >  
> > +   if (option_verbosity < 0)
> > +   argv_array_push(, "--quiet");
> > +
> > err = run_command_v_opt(args.argv, RUN_GIT_CMD);
> > argv_array_clear();
> > }
> 
> Good spotting.  Makes me wonder if there still are other options we
> might have missed, but we can incrementally improve as bug report
> comes in ;-)

Yeah I've noticed that the number one difficulty with doing submodule
operations in another process is making sure that the correct options
are passed down.

> 
> Will queue.  Thanks.

Thanks!

> 
> > diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
> > index dcac364c5..e9c3335b7 100755
> > --- a/t/t7400-submodule-basic.sh
> > +++ b/t/t7400-submodule-basic.sh
> > @@ -1289,4 +1289,10 @@ test_expect_success 'init properly sets the config' '
> > test_must_fail git -C multisuper_clone config --get 
> > submodule.sub1.active
> >  '
> >  
> > +test_expect_success 'recursive clone respects -q' '
> > +   test_when_finished "rm -rf multisuper_clone" &&
> > +   git clone -q --recurse-submodules multisuper multisuper_clone >actual &&
> > +   test_must_be_empty actual
> > +'
> > +
> >  test_done

-- 
Brandon Williams


[PATCH] clone: teach recursive clones to respect -q

2017-08-03 Thread Brandon Williams
Teach 'git clone --recurse-submodules' to respect the '-q' option by
passing down the quiet flag to the process which handles cloning of
submodules.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/clone.c| 3 +++
 t/t7400-submodule-basic.sh | 6 ++
 2 files changed, 9 insertions(+)

diff --git a/builtin/clone.c b/builtin/clone.c
index 08b5cc433..f7e17d229 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -768,6 +768,9 @@ static int checkout(int submodule_progress)
if (submodule_progress)
argv_array_push(, "--progress");
 
+   if (option_verbosity < 0)
+   argv_array_push(, "--quiet");
+
err = run_command_v_opt(args.argv, RUN_GIT_CMD);
argv_array_clear();
}
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index dcac364c5..e9c3335b7 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -1289,4 +1289,10 @@ test_expect_success 'init properly sets the config' '
test_must_fail git -C multisuper_clone config --get 
submodule.sub1.active
 '
 
+test_expect_success 'recursive clone respects -q' '
+   test_when_finished "rm -rf multisuper_clone" &&
+   git clone -q --recurse-submodules multisuper multisuper_clone >actual &&
+   test_must_be_empty actual
+'
+
 test_done
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 01/15] t7411: check configuration parsing errors

2017-08-03 Thread Brandon Williams
Check for configuration parsing errors in '.gitmodules' in t7411, which
is explicitly testing the submodule-config subsystem, instead of in
t7400.  Also explicitly use the test helper instead of relying on the
gitmodules file from being read in status.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 t/t7400-submodule-basic.sh  | 10 --
 t/t7411-submodule-config.sh | 15 +++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index dcac364c5..717447526 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -46,16 +46,6 @@ test_expect_success 'submodule update aborts on missing 
gitmodules url' '
test_must_fail git submodule init
 '
 
-test_expect_success 'configuration parsing' '
-   test_when_finished "rm -f .gitmodules" &&
-   cat >.gitmodules <<-\EOF &&
-   [submodule "s"]
-   path
-   ignore
-   EOF
-   test_must_fail git status
-'
-
 test_expect_success 'setup - repository in init subdirectory' '
mkdir init &&
(
diff --git a/t/t7411-submodule-config.sh b/t/t7411-submodule-config.sh
index eea36f1db..7d6b25ba2 100755
--- a/t/t7411-submodule-config.sh
+++ b/t/t7411-submodule-config.sh
@@ -31,6 +31,21 @@ test_expect_success 'submodule config cache setup' '
)
 '
 
+test_expect_success 'configuration parsing with error' '
+   test_when_finished "rm -rf repo" &&
+   test_create_repo repo &&
+   cat >repo/.gitmodules <<-\EOF &&
+   [submodule "s"]
+   path
+   ignore
+   EOF
+   (
+   cd repo &&
+   test_must_fail test-submodule-config "" s 2>actual &&
+   test_i18ngrep "bad config" actual
+   )
+'
+
 cat >super/expect <

[PATCH v2 02/15] submodule: don't use submodule_from_name

2017-08-03 Thread Brandon Williams
The function 'submodule_from_name()' is being used incorrectly here as a
submodule path is being used instead of a submodule name.  Since the
correct function to use with a path to a submodule is already being used
('submodule_from_path()') let's remove the call to
'submodule_from_name()'.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 submodule.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/submodule.c b/submodule.c
index 5139b9256..19bd13bb2 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1177,8 +1177,6 @@ static int get_next_submodule(struct child_process *cp,
continue;
 
submodule = submodule_from_path(_oid, ce->name);
-   if (!submodule)
-   submodule = submodule_from_name(_oid, ce->name);
 
default_argv = "yes";
if (spf->command_line_option == RECURSE_SUBMODULES_DEFAULT) {
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 00/15] submodule-config cleanup

2017-08-03 Thread Brandon Williams
Changes in v2:
 * Rebased on latest 'bw/grep-recurse-submodules' branch (Still also requires
   the 'bc/object-id' series).
 * Changed unpack-trees.c (checkout command) so that it no longer respects the
   'submodule..update' config since it really didn't make much sense for
   it to respect it.
 * The above point also enabled me to fix some issues that coverity found with
   how I was overlaying the repo config with the submodule update strategy.
   Instead the update strategy parsing logic is separated into two functions so
   that just the enum can be determined from a string (which is all
   update-clone needed).

Brandon Williams (15):
  t7411: check configuration parsing errors
  submodule: don't use submodule_from_name
  add, reset: ensure submodules can be added or reset
  submodule--helper: don't overlay config in remote_submodule_branch
  submodule--helper: don't overlay config in update-clone
  fetch: don't overlay config with submodule-config
  submodule: don't rely on overlayed config when setting diffopts
  unpack-trees: don't respect submodule.update
  submodule: remove submodule_config callback routine
  diff: stop allowing diff to have submodules configured in .git/config
  submodule-config: remove support for overlaying repository config
  submodule-config: move submodule-config functions to
submodule-config.c
  submodule-config: lazy-load a repository's .gitmodules file
  unpack-trees: improve loading of .gitmodules
  submodule: remove gitmodules_config

 builtin/add.c|   1 +
 builtin/checkout.c   |   3 +-
 builtin/commit.c |   1 -
 builtin/diff-files.c |   1 -
 builtin/diff-index.c |   1 -
 builtin/diff-tree.c  |   1 -
 builtin/diff.c   |   2 -
 builtin/fetch.c  |   5 --
 builtin/grep.c   |   4 --
 builtin/ls-files.c   |   6 +-
 builtin/mv.c |   1 -
 builtin/read-tree.c  |   2 -
 builtin/reset.c  |   3 +-
 builtin/rm.c |   1 -
 builtin/submodule--helper.c  |  51 --
 diff.c   |   3 -
 submodule-config.c   |  65 +
 submodule-config.h   |   8 +--
 submodule.c  | 148 ++-
 submodule.h  |   6 +-
 t/helper/test-submodule-config.c |   7 --
 t/t4027-diff-submodule.sh|  67 --
 t/t7400-submodule-basic.sh   |  10 ---
 t/t7411-submodule-config.sh  |  87 ---
 unpack-trees.c   |  81 +
 25 files changed, 192 insertions(+), 373 deletions(-)

-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 11/15] submodule-config: remove support for overlaying repository config

2017-08-03 Thread Brandon Williams
All callers have been migrated to explicitly read any configuration they
need.  The support for handling it automatically in submodule-config is
no longer needed.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 submodule-config.h   |  1 -
 t/helper/test-submodule-config.c |  6 
 t/t7411-submodule-config.sh  | 72 
 3 files changed, 79 deletions(-)

diff --git a/submodule-config.h b/submodule-config.h
index cccd34b92..84c2cf515 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -34,7 +34,6 @@ extern int option_fetch_parse_recurse_submodules(const struct 
option *opt,
 const char *arg, int unset);
 extern int parse_update_recurse_submodules_arg(const char *opt, const char 
*arg);
 extern int parse_push_recurse_submodules_arg(const char *opt, const char *arg);
-extern int parse_submodule_config_option(const char *var, const char *value);
 extern int submodule_config_option(struct repository *repo,
   const char *var, const char *value);
 extern const struct submodule *submodule_from_name(
diff --git a/t/helper/test-submodule-config.c b/t/helper/test-submodule-config.c
index e13fbcc1b..f4a7c431c 100644
--- a/t/helper/test-submodule-config.c
+++ b/t/helper/test-submodule-config.c
@@ -10,11 +10,6 @@ static void die_usage(int argc, const char **argv, const 
char *msg)
exit(1);
 }
 
-static int git_test_config(const char *var, const char *value, void *cb)
-{
-   return parse_submodule_config_option(var, value);
-}
-
 int cmd_main(int argc, const char **argv)
 {
const char **arg = argv;
@@ -38,7 +33,6 @@ int cmd_main(int argc, const char **argv)
 
setup_git_directory();
gitmodules_config();
-   git_config(git_test_config, NULL);
 
while (*arg) {
struct object_id commit_oid;
diff --git a/t/t7411-submodule-config.sh b/t/t7411-submodule-config.sh
index 7d6b25ba2..46c09c776 100755
--- a/t/t7411-submodule-config.sh
+++ b/t/t7411-submodule-config.sh
@@ -122,78 +122,6 @@ test_expect_success 'using different treeishs works' '
)
 '
 
-cat >super/expect_url <super/expect_local_path <actual &&
-   test_cmp expect_url actual &&
-   git config submodule.a.path c &&
-   test-submodule-config \
-   "" c \
-   "" submodule \
-   >actual &&
-   test_cmp expect_local_path actual &&
-   git config submodule.a.url "$old_a" &&
-   git config submodule.submodule.url "$old_submodule" &&
-   git config --unset submodule.a.path c
-   )
-'
-
-cat >super/expect_url <actual &&
-   test_cmp expect_url actual &&
-   git config submodule.submodule.url "$old_submodule" &&
-   git submodule init b
-   )
-'
-
-cat >super/expect_fetchrecurse_die.err <actual.out 2>actual.err &&
-   touch expect_fetchrecurse_die.out &&
-   test_cmp expect_fetchrecurse_die.out actual.out  &&
-   test_cmp expect_fetchrecurse_die.err actual.err  &&
-   git config --unset submodule.submodule.fetchrecursesubmodules
-   )
-'
-
 test_expect_success 'error in history in fetchrecursesubmodule lets continue' '
(cd super &&
git config -f .gitmodules \
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 10/15] diff: stop allowing diff to have submodules configured in .git/config

2017-08-03 Thread Brandon Williams
Traditionally a submodule is comprised of a gitlink as well as a
corresponding entry in the .gitmodules file.  Diff doesn't follow this
paradigm as its config callback routine falls back to populating the
submodule-config if a config entry starts with 'submodule.'.

Remove this behavior in order to be consistent with how the
submodule-config is populated, via calling 'gitmodules_config()' or
'repo_read_gitmodules()'.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 diff.c|  3 ---
 t/t4027-diff-submodule.sh | 67 ---
 2 files changed, 70 deletions(-)

diff --git a/diff.c b/diff.c
index 85e714f6c..e43519b88 100644
--- a/diff.c
+++ b/diff.c
@@ -346,9 +346,6 @@ int git_diff_basic_config(const char *var, const char 
*value, void *cb)
return 0;
}
 
-   if (starts_with(var, "submodule."))
-   return parse_submodule_config_option(var, value);
-
if (git_diff_heuristic_config(var, value, cb) < 0)
return -1;
 
diff --git a/t/t4027-diff-submodule.sh b/t/t4027-diff-submodule.sh
index 518bf9524..2ffd11a14 100755
--- a/t/t4027-diff-submodule.sh
+++ b/t/t4027-diff-submodule.sh
@@ -113,35 +113,6 @@ test_expect_success 'git diff HEAD with dirty submodule 
(work tree, refs match)'
! test -s actual4
 '
 
-test_expect_success 'git diff HEAD with dirty submodule (work tree, refs 
match) [.git/config]' '
-   git config diff.ignoreSubmodules all &&
-   git diff HEAD >actual &&
-   ! test -s actual &&
-   git config submodule.subname.ignore none &&
-   git config submodule.subname.path sub &&
-   git diff HEAD >actual &&
-   sed -e "1,/^@@/d" actual >actual.body &&
-   expect_from_to >expect.body $subprev $subprev-dirty &&
-   test_cmp expect.body actual.body &&
-   git config submodule.subname.ignore all &&
-   git diff HEAD >actual2 &&
-   ! test -s actual2 &&
-   git config submodule.subname.ignore untracked &&
-   git diff HEAD >actual3 &&
-   sed -e "1,/^@@/d" actual3 >actual3.body &&
-   expect_from_to >expect.body $subprev $subprev-dirty &&
-   test_cmp expect.body actual3.body &&
-   git config submodule.subname.ignore dirty &&
-   git diff HEAD >actual4 &&
-   ! test -s actual4 &&
-   git diff HEAD --ignore-submodules=none >actual &&
-   sed -e "1,/^@@/d" actual >actual.body &&
-   expect_from_to >expect.body $subprev $subprev-dirty &&
-   test_cmp expect.body actual.body &&
-   git config --remove-section submodule.subname &&
-   git config --unset diff.ignoreSubmodules
-'
-
 test_expect_success 'git diff HEAD with dirty submodule (work tree, refs 
match) [.gitmodules]' '
git config diff.ignoreSubmodules dirty &&
git diff HEAD >actual &&
@@ -208,24 +179,6 @@ test_expect_success 'git diff HEAD with dirty submodule 
(untracked, refs match)'
! test -s actual4
 '
 
-test_expect_success 'git diff HEAD with dirty submodule (untracked, refs 
match) [.git/config]' '
-   git config submodule.subname.ignore all &&
-   git config submodule.subname.path sub &&
-   git diff HEAD >actual2 &&
-   ! test -s actual2 &&
-   git config submodule.subname.ignore untracked &&
-   git diff HEAD >actual3 &&
-   ! test -s actual3 &&
-   git config submodule.subname.ignore dirty &&
-   git diff HEAD >actual4 &&
-   ! test -s actual4 &&
-   git diff --ignore-submodules=none HEAD >actual &&
-   sed -e "1,/^@@/d" actual >actual.body &&
-   expect_from_to >expect.body $subprev $subprev-dirty &&
-   test_cmp expect.body actual.body &&
-   git config --remove-section submodule.subname
-'
-
 test_expect_success 'git diff HEAD with dirty submodule (untracked, refs 
match) [.gitmodules]' '
git config --add -f .gitmodules submodule.subname.ignore all &&
git config --add -f .gitmodules submodule.subname.path sub &&
@@ -261,26 +214,6 @@ test_expect_success 'git diff between submodule commits' '
! test -s actual
 '
 
-test_expect_success 'git diff between submodule commits [.git/config]' '
-   git diff HEAD^..HEAD >actual &&
-   sed -e "1,/^@@/d" actual >actual.body &&
-   expect_from_to >expect.body $subtip $subprev &&
-   test_cmp expect.body actual.body &&
-   git config submodule.subname.ignore dirty &&
-   git config submodule.subname.path sub &&
-

[PATCH v2 04/15] submodule--helper: don't overlay config in remote_submodule_branch

2017-08-03 Thread Brandon Williams
Don't rely on overlaying the repository's config on top of the
submodule-config, instead query the repository's config directly for the
branch field.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/submodule--helper.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 1e49ce580..f71f4270d 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1066,17 +1066,24 @@ static int resolve_relative_path(int argc, const char 
**argv, const char *prefix
 static const char *remote_submodule_branch(const char *path)
 {
const struct submodule *sub;
+   const char *branch = NULL;
+   char *key;
+
gitmodules_config();
-   git_config(submodule_config, NULL);
 
sub = submodule_from_path(_oid, path);
if (!sub)
return NULL;
 
-   if (!sub->branch)
+   key = xstrfmt("submodule.%s.branch", sub->name);
+   if (repo_config_get_string_const(the_repository, key, ))
+   branch = sub->branch;
+   free(key);
+
+   if (!branch)
return "master";
 
-   if (!strcmp(sub->branch, ".")) {
+   if (!strcmp(branch, ".")) {
unsigned char sha1[20];
const char *refname = resolve_ref_unsafe("HEAD", 0, sha1, NULL);
 
@@ -1094,7 +1101,7 @@ static const char *remote_submodule_branch(const char 
*path)
return refname;
}
 
-   return sub->branch;
+   return branch;
 }
 
 static int resolve_remote_submodule_branch(int argc, const char **argv,
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 13/15] submodule-config: lazy-load a repository's .gitmodules file

2017-08-03 Thread Brandon Williams
In order to use the submodule-config subsystem, callers first need to
initialize it by calling 'repo_read_gitmodules()' or
'gitmodules_config()' (which just redirects to
'repo_read_gitmodules()').  There are a couple of callers who need to
load an explicit revision of the repository's .gitmodules file (grep) or
need to modify the .gitmodules file so they would need to load it before
modify the file (checkout), but the majority of callers are simply
reading the .gitmodules file present in the working tree.  For the
common case it would be nice to avoid the boilerplate of initializing
the submodule-config system before using it, so instead let's perform
lazy-loading of the submodule-config system.

Remove the calls to reading the gitmodules file from ls-files to show
that lazy-loading the .gitmodules file works.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/ls-files.c |  5 -
 submodule-config.c | 27 ++-
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index d14612057..bd74ee07d 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -211,8 +211,6 @@ static void show_submodule(struct repository *superproject,
if (repo_read_index() < 0)
die("index file corrupt");
 
-   repo_read_gitmodules();
-
show_files(, dir);
 
repo_clear();
@@ -611,9 +609,6 @@ int cmd_ls_files(int argc, const char **argv, const char 
*cmd_prefix)
if (require_work_tree && !is_inside_work_tree())
setup_work_tree();
 
-   if (recurse_submodules)
-   repo_read_gitmodules(the_repository);
-
if (recurse_submodules &&
(show_stage || show_deleted || show_others || show_unmerged ||
 show_killed || show_modified || show_resolve_undo || with_tree))
diff --git a/submodule-config.c b/submodule-config.c
index 86636654b..56d9d76d4 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -18,6 +18,7 @@ struct submodule_cache {
struct hashmap for_path;
struct hashmap for_name;
unsigned initialized:1;
+   unsigned gitmodules_read:1;
 };
 
 /*
@@ -93,6 +94,7 @@ static void submodule_cache_clear(struct submodule_cache 
*cache)
hashmap_free(>for_path, 1);
hashmap_free(>for_name, 1);
cache->initialized = 0;
+   cache->gitmodules_read = 0;
 }
 
 void submodule_cache_free(struct submodule_cache *cache)
@@ -557,8 +559,6 @@ static int gitmodules_cb(const char *var, const char 
*value, void *data)
struct repository *repo = data;
struct parse_config_parameter parameter;
 
-   submodule_cache_check_init(repo);
-
parameter.cache = repo->submodule_cache;
parameter.treeish_name = NULL;
parameter.gitmodules_sha1 = null_sha1;
@@ -569,6 +569,8 @@ static int gitmodules_cb(const char *var, const char 
*value, void *data)
 
 void repo_read_gitmodules(struct repository *repo)
 {
+   submodule_cache_check_init(repo);
+
if (repo->worktree) {
char *gitmodules;
 
@@ -582,6 +584,8 @@ void repo_read_gitmodules(struct repository *repo)
 
free(gitmodules);
}
+
+   repo->submodule_cache->gitmodules_read = 1;
 }
 
 void gitmodules_config_oid(const struct object_id *commit_oid)
@@ -589,24 +593,37 @@ void gitmodules_config_oid(const struct object_id 
*commit_oid)
struct strbuf rev = STRBUF_INIT;
struct object_id oid;
 
+   submodule_cache_check_init(the_repository);
+
if (gitmodule_oid_from_commit(commit_oid, , )) {
git_config_from_blob_oid(gitmodules_cb, rev.buf,
 , the_repository);
}
strbuf_release();
+
+   the_repository->submodule_cache->gitmodules_read = 1;
+}
+
+static void gitmodules_read_check(struct repository *repo)
+{
+   submodule_cache_check_init(repo);
+
+   /* read the repo's .gitmodules file if it hasn't been already */
+   if (!repo->submodule_cache->gitmodules_read)
+   repo_read_gitmodules(repo);
 }
 
 const struct submodule *submodule_from_name(const struct object_id 
*treeish_name,
const char *name)
 {
-   submodule_cache_check_init(the_repository);
+   gitmodules_read_check(the_repository);
return config_from(the_repository->submodule_cache, treeish_name, name, 
lookup_name);
 }
 
 const struct submodule *submodule_from_path(const struct object_id 
*treeish_name,
const char *path)
 {
-   submodule_cache_check_init(the_repository);
+   gitmodules_read_check(the_repository);
return config_from(the_repository->submodule_cache, treeish_name, path, 
lookup_path);
 }
 
@@ -614,7 +631,7 @@ const struct submodule *submodule_from_cache(struct 
repository *repo,
 const struct object_id

[PATCH v2 06/15] fetch: don't overlay config with submodule-config

2017-08-03 Thread Brandon Williams
Don't rely on overlaying the repository's config on top of the
submodule-config, instead query the repository's config directly for the
fetch_recurse field.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/fetch.c |  1 -
 submodule.c | 24 +---
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index d84c26391..3fe99073d 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1362,7 +1362,6 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
 
if (recurse_submodules != RECURSE_SUBMODULES_OFF) {
gitmodules_config();
-   git_config(submodule_config, NULL);
}
 
if (all) {
diff --git a/submodule.c b/submodule.c
index 8a9b964ce..59e3d0828 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1194,14 +1194,24 @@ static int get_next_submodule(struct child_process *cp,
 
default_argv = "yes";
if (spf->command_line_option == RECURSE_SUBMODULES_DEFAULT) {
-   if (submodule &&
-   submodule->fetch_recurse !=
-   RECURSE_SUBMODULES_NONE) {
-   if (submodule->fetch_recurse ==
-   RECURSE_SUBMODULES_OFF)
+   int fetch_recurse = RECURSE_SUBMODULES_NONE;
+
+   if (submodule) {
+   char *key;
+   const char *value;
+
+   fetch_recurse = submodule->fetch_recurse;
+   key = 
xstrfmt("submodule.%s.fetchRecurseSubmodules", submodule->name);
+   if 
(!repo_config_get_string_const(the_repository, key, )) {
+   fetch_recurse = 
parse_fetch_recurse_submodules_arg(key, value);
+   }
+   free(key);
+   }
+
+   if (fetch_recurse != RECURSE_SUBMODULES_NONE) {
+   if (fetch_recurse == RECURSE_SUBMODULES_OFF)
continue;
-   if (submodule->fetch_recurse ==
-   RECURSE_SUBMODULES_ON_DEMAND) {
+   if (fetch_recurse == 
RECURSE_SUBMODULES_ON_DEMAND) {
if 
(!unsorted_string_list_lookup(_submodule_paths, ce->name))
continue;
default_argv = "on-demand";
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 15/15] submodule: remove gitmodules_config

2017-08-03 Thread Brandon Williams
Now that the submodule-config subsystem can lazily read the gitmodules
file we no longer need to explicitly pre-read the gitmodules by calling
'gitmodules_config()' so let's remove it.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/checkout.c   |  1 -
 builtin/commit.c |  1 -
 builtin/diff-files.c |  1 -
 builtin/diff-index.c |  1 -
 builtin/diff-tree.c  |  1 -
 builtin/diff.c   |  2 --
 builtin/fetch.c  |  4 
 builtin/grep.c   |  4 
 builtin/mv.c |  1 -
 builtin/read-tree.c  |  2 --
 builtin/reset.c  |  2 --
 builtin/rm.c |  1 -
 builtin/submodule--helper.c  | 14 --
 submodule.c  | 15 ---
 submodule.h  |  2 --
 t/helper/test-submodule-config.c |  1 -
 16 files changed, 53 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 246e0cd16..63ae16afc 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -1179,7 +1179,6 @@ int cmd_checkout(int argc, const char **argv, const char 
*prefix)
opts.prefix = prefix;
opts.show_progress = -1;
 
-   gitmodules_config();
git_config(git_checkout_config, );
 
opts.track = BRANCH_TRACK_UNSPECIFIED;
diff --git a/builtin/commit.c b/builtin/commit.c
index 4bbac014a..18ad714d9 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -195,7 +195,6 @@ static void determine_whence(struct wt_status *s)
 static void status_init_config(struct wt_status *s, config_fn_t fn)
 {
wt_status_prepare(s);
-   gitmodules_config();
git_config(fn, s);
determine_whence(s);
init_diff_ui_defaults();
diff --git a/builtin/diff-files.c b/builtin/diff-files.c
index 17bf84d18..e88493ffe 100644
--- a/builtin/diff-files.c
+++ b/builtin/diff-files.c
@@ -26,7 +26,6 @@ int cmd_diff_files(int argc, const char **argv, const char 
*prefix)
 
git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
init_revisions(, prefix);
-   gitmodules_config();
rev.abbrev = 0;
precompose_argv(argc, argv);
 
diff --git a/builtin/diff-index.c b/builtin/diff-index.c
index 185e6f9b5..9d772f8f2 100644
--- a/builtin/diff-index.c
+++ b/builtin/diff-index.c
@@ -23,7 +23,6 @@ int cmd_diff_index(int argc, const char **argv, const char 
*prefix)
 
git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
init_revisions(, prefix);
-   gitmodules_config();
rev.abbrev = 0;
precompose_argv(argc, argv);
 
diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
index 31d2cb410..d66499909 100644
--- a/builtin/diff-tree.c
+++ b/builtin/diff-tree.c
@@ -110,7 +110,6 @@ int cmd_diff_tree(int argc, const char **argv, const char 
*prefix)
 
git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
init_revisions(opt, prefix);
-   gitmodules_config();
opt->abbrev = 0;
opt->diff = 1;
opt->disable_stdin = 1;
diff --git a/builtin/diff.c b/builtin/diff.c
index 7cde6abbc..7e3ebcea3 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -315,8 +315,6 @@ int cmd_diff(int argc, const char **argv, const char 
*prefix)
no_index = DIFF_NO_INDEX_IMPLICIT;
}
 
-   if (!no_index)
-   gitmodules_config();
init_diff_ui_defaults();
git_config(git_diff_ui_config, NULL);
precompose_argv(argc, argv);
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 3fe99073d..132e3224e 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1360,10 +1360,6 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
if (depth || deepen_since || deepen_not.nr)
deepen = 1;
 
-   if (recurse_submodules != RECURSE_SUBMODULES_OFF) {
-   gitmodules_config();
-   }
-
if (all) {
if (argc == 1)
die(_("fetch --all does not take a repository 
argument"));
diff --git a/builtin/grep.c b/builtin/grep.c
index ac06d2d33..2d65f27d0 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1048,10 +1048,6 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
}
 #endif
 
-   if (recurse_submodules) {
-   gitmodules_config();
-   }
-
if (show_in_pager && (cached || list.nr))
die(_("--open-files-in-pager only works on the worktree"));
 
diff --git a/builtin/mv.c b/builtin/mv.c
index 94fbaaa5d..ffdd5f01a 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -131,7 +131,6 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
struct stat st;
struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
 
-   gitmodules_config();
git_config(git_default_config, NULL);
 
  

[PATCH v2 07/15] submodule: don't rely on overlayed config when setting diffopts

2017-08-03 Thread Brandon Williams
Don't rely on overlaying the repository's config on top of the
submodule-config, instead query the repository's config directory for
the ignore field.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 submodule.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/submodule.c b/submodule.c
index 59e3d0828..a32043893 100644
--- a/submodule.c
+++ b/submodule.c
@@ -165,8 +165,16 @@ void set_diffopt_flags_from_submodule_config(struct 
diff_options *diffopt,
 {
const struct submodule *submodule = submodule_from_path(_oid, 
path);
if (submodule) {
-   if (submodule->ignore)
-   handle_ignore_submodules_arg(diffopt, 
submodule->ignore);
+   const char *ignore;
+   char *key;
+
+   key = xstrfmt("submodule.%s.ignore", submodule->name);
+   if (repo_config_get_string_const(the_repository, key, ))
+   ignore = submodule->ignore;
+   free(key);
+
+   if (ignore)
+   handle_ignore_submodules_arg(diffopt, ignore);
else if (is_gitmodules_unmerged(_index))
DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES);
}
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 09/15] submodule: remove submodule_config callback routine

2017-08-03 Thread Brandon Williams
Remove the last remaining caller of 'submodule_config()' as well as the
function itself.

With 'submodule_config()' being removed the submodule-config API can be
a little simpler as callers don't need to worry about whether or not
they need to overlay the repository's config on top of the
submodule-config.  This also makes it more difficult to accidentally
add non-submodule specific configuration to the .gitmodules file.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/submodule--helper.c |  1 -
 submodule.c | 25 ++---
 submodule.h |  1 -
 3 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 36df7ab78..ba767c704 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1205,7 +1205,6 @@ static int absorb_git_dirs(int argc, const char **argv, 
const char *prefix)
 git_submodule_helper_usage, 0);
 
gitmodules_config();
-   git_config(submodule_config, NULL);
 
if (module_list_compute(argc, argv, prefix, , ) < 0)
return 1;
diff --git a/submodule.c b/submodule.c
index f913c2341..3b383d8c4 100644
--- a/submodule.c
+++ b/submodule.c
@@ -180,27 +180,6 @@ void set_diffopt_flags_from_submodule_config(struct 
diff_options *diffopt,
}
 }
 
-/* For loading from the .gitmodules file. */
-static int git_modules_config(const char *var, const char *value, void *cb)
-{
-   if (starts_with(var, "submodule."))
-   return parse_submodule_config_option(var, value);
-   return 0;
-}
-
-/* Loads all submodule settings from the config. */
-int submodule_config(const char *var, const char *value, void *cb)
-{
-   if (!strcmp(var, "submodule.recurse")) {
-   int v = git_config_bool(var, value) ?
-   RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
-   config_update_recurse_submodules = v;
-   return 0;
-   } else {
-   return git_modules_config(var, value, cb);
-   }
-}
-
 /* Cheap function that only determines if we're interested in submodules at 
all */
 int git_default_submodule_config(const char *var, const char *value, void *cb)
 {
@@ -271,8 +250,8 @@ void gitmodules_config_oid(const struct object_id 
*commit_oid)
struct object_id oid;
 
if (gitmodule_oid_from_commit(commit_oid, , )) {
-   git_config_from_blob_oid(submodule_config, rev.buf,
-, NULL);
+   git_config_from_blob_oid(gitmodules_cb, rev.buf,
+, the_repository);
}
strbuf_release();
 }
diff --git a/submodule.h b/submodule.h
index 48586efe7..4f70c4944 100644
--- a/submodule.h
+++ b/submodule.h
@@ -40,7 +40,6 @@ extern int remove_path_from_gitmodules(const char *path);
 extern void stage_updated_gitmodules(void);
 extern void set_diffopt_flags_from_submodule_config(struct diff_options *,
const char *path);
-extern int submodule_config(const char *var, const char *value, void *cb);
 extern int git_default_submodule_config(const char *var, const char *value, 
void *cb);
 
 struct option;
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 12/15] submodule-config: move submodule-config functions to submodule-config.c

2017-08-03 Thread Brandon Williams
Migrate the functions used to initialize the submodule-config to
submodule-config.c so that the callback routine used in the
initialization process can be static and prevent it from being used
outside of initializing the submodule-config through the main API.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/ls-files.c |  1 +
 submodule-config.c | 38 +++---
 submodule-config.h |  7 ++-
 submodule.c| 35 ---
 submodule.h|  2 --
 5 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index b8514a002..d14612057 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -19,6 +19,7 @@
 #include "pathspec.h"
 #include "run-command.h"
 #include "submodule.h"
+#include "submodule-config.h"
 
 static int abbrev;
 static int show_deleted;
diff --git a/submodule-config.c b/submodule-config.c
index 0b429e942..86636654b 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -449,9 +449,9 @@ static int parse_config(const char *var, const char *value, 
void *data)
return ret;
 }
 
-int gitmodule_oid_from_commit(const struct object_id *treeish_name,
- struct object_id *gitmodules_oid,
- struct strbuf *rev)
+static int gitmodule_oid_from_commit(const struct object_id *treeish_name,
+struct object_id *gitmodules_oid,
+struct strbuf *rev)
 {
int ret = 0;
 
@@ -552,9 +552,9 @@ static void submodule_cache_check_init(struct repository 
*repo)
submodule_cache_init(repo->submodule_cache);
 }
 
-int submodule_config_option(struct repository *repo,
-   const char *var, const char *value)
+static int gitmodules_cb(const char *var, const char *value, void *data)
 {
+   struct repository *repo = data;
struct parse_config_parameter parameter;
 
submodule_cache_check_init(repo);
@@ -567,9 +567,33 @@ int submodule_config_option(struct repository *repo,
return parse_config(var, value, );
 }
 
-int parse_submodule_config_option(const char *var, const char *value)
+void repo_read_gitmodules(struct repository *repo)
 {
-   return submodule_config_option(the_repository, var, value);
+   if (repo->worktree) {
+   char *gitmodules;
+
+   if (repo_read_index(repo) < 0)
+   return;
+
+   gitmodules = repo_worktree_path(repo, GITMODULES_FILE);
+
+   if (!is_gitmodules_unmerged(repo->index))
+   git_config_from_file(gitmodules_cb, gitmodules, repo);
+
+   free(gitmodules);
+   }
+}
+
+void gitmodules_config_oid(const struct object_id *commit_oid)
+{
+   struct strbuf rev = STRBUF_INIT;
+   struct object_id oid;
+
+   if (gitmodule_oid_from_commit(commit_oid, , )) {
+   git_config_from_blob_oid(gitmodules_cb, rev.buf,
+, the_repository);
+   }
+   strbuf_release();
 }
 
 const struct submodule *submodule_from_name(const struct object_id 
*treeish_name,
diff --git a/submodule-config.h b/submodule-config.h
index 84c2cf515..e3845831f 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -34,8 +34,8 @@ extern int option_fetch_parse_recurse_submodules(const struct 
option *opt,
 const char *arg, int unset);
 extern int parse_update_recurse_submodules_arg(const char *opt, const char 
*arg);
 extern int parse_push_recurse_submodules_arg(const char *opt, const char *arg);
-extern int submodule_config_option(struct repository *repo,
-  const char *var, const char *value);
+extern void repo_read_gitmodules(struct repository *repo);
+extern void gitmodules_config_oid(const struct object_id *commit_oid);
 extern const struct submodule *submodule_from_name(
const struct object_id *commit_or_tree, const char *name);
 extern const struct submodule *submodule_from_path(
@@ -43,9 +43,6 @@ extern const struct submodule *submodule_from_path(
 extern const struct submodule *submodule_from_cache(struct repository *repo,
const struct object_id 
*treeish_name,
const char *key);
-extern int gitmodule_oid_from_commit(const struct object_id *commit_oid,
-struct object_id *gitmodules_oid,
-struct strbuf *rev);
 extern void submodule_free(void);
 
 #endif /* SUBMODULE_CONFIG_H */
diff --git a/submodule.c b/submodule.c
index 3b383d8c4..c1cef1c37 100644
--- a/submodule.c
+++ b/submodule.c
@@ -216,46 +216,11 @@ void load_submodule_cache(void)
gitmodules_config();
 }
 
-static int gi

[PATCH v2 14/15] unpack-trees: improve loading of .gitmodules

2017-08-03 Thread Brandon Williams
When recursing submodules 'check_updates()' needs to have strict control
over the submodule-config subsystem to ensure that the gitmodules file
has been read before checking cache entries which are marked for
removal as well ensuring the proper gitmodules file is read before
updating cache entries.

Because of this let's not rely on callers of 'check_updates()' to read
the gitmodules file before calling 'check_updates()' and handle the
reading explicitly.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 unpack-trees.c | 43 +++
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 5dce7ff7d..3c7f464fa 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1,5 +1,6 @@
 #define NO_THE_INDEX_COMPATIBILITY_MACROS
 #include "cache.h"
+#include "repository.h"
 #include "config.h"
 #include "dir.h"
 #include "tree.h"
@@ -268,22 +269,28 @@ static int check_submodule_move_head(const struct 
cache_entry *ce,
return 0;
 }
 
-static void reload_gitmodules_file(struct index_state *index,
-  struct checkout *state)
+/*
+ * Preform the loading of the repository's gitmodules file.  This function is
+ * used by 'check_update()' to perform loading of the gitmodules file in two
+ * differnt situations:
+ * (1) before removing entries from the working tree if the gitmodules file has
+ * been marked for removal.  This situation is specified by 'state' == 
NULL.
+ * (2) before checking out entries to the working tree if the gitmodules file
+ * has been marked for update.  This situation is specified by 'state' != 
NULL.
+ */
+static void load_gitmodules_file(struct index_state *index,
+struct checkout *state)
 {
-   int i;
-   for (i = 0; i < index->cache_nr; i++) {
-   struct cache_entry *ce = index->cache[i];
-   if (ce->ce_flags & CE_UPDATE) {
-   int r = strcmp(ce->name, GITMODULES_FILE);
-   if (r < 0)
-   continue;
-   else if (r == 0) {
-   submodule_free();
-   checkout_entry(ce, state, NULL);
-   gitmodules_config();
-   } else
-   break;
+   int pos = index_name_pos(index, GITMODULES_FILE, 
strlen(GITMODULES_FILE));
+
+   if (pos >= 0) {
+   struct cache_entry *ce = index->cache[pos];
+   if (!state && ce->ce_flags & CE_WT_REMOVE) {
+   repo_read_gitmodules(the_repository);
+   } else if (state && (ce->ce_flags & CE_UPDATE)) {
+   submodule_free();
+   checkout_entry(ce, state, NULL);
+   repo_read_gitmodules(the_repository);
}
}
 }
@@ -343,6 +350,10 @@ static int check_updates(struct unpack_trees_options *o)
 
if (o->update)
git_attr_set_direction(GIT_ATTR_CHECKOUT, index);
+
+   if (should_update_submodules() && o->update && !o->dry_run)
+   load_gitmodules_file(index, NULL);
+
for (i = 0; i < index->cache_nr; i++) {
const struct cache_entry *ce = index->cache[i];
 
@@ -356,7 +367,7 @@ static int check_updates(struct unpack_trees_options *o)
remove_scheduled_dirs();
 
if (should_update_submodules() && o->update && !o->dry_run)
-   reload_gitmodules_file(index, );
+   load_gitmodules_file(index, );
 
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 08/15] unpack-trees: don't respect submodule.update

2017-08-03 Thread Brandon Williams
The 'submodule.update' config was historically used and respected by the
'submodule update' command because update handled a variety of different
ways it updated a submodule.  As we begin teaching other commands about
submodules it makes more sense for the different settings of
'submodule.update' to be handled by the individual commands themselves
(checkout, rebase, merge, etc) so it shouldn't be respected by the
native checkout command.

Also remove the overlaying of the repository's config (via using
'submodule_config()') from the commands which use the unpack-trees
logic (checkout, read-tree, reset).

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/checkout.c |  2 +-
 submodule.c|  1 -
 unpack-trees.c | 38 --
 3 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 9661e1bcb..246e0cd16 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -858,7 +858,7 @@ static int git_checkout_config(const char *var, const char 
*value, void *cb)
}
 
if (starts_with(var, "submodule."))
-   return submodule_config(var, value, NULL);
+   return git_default_submodule_config(var, value, NULL);
 
return git_xmerge_config(var, value, NULL);
 }
diff --git a/submodule.c b/submodule.c
index a32043893..f913c2341 100644
--- a/submodule.c
+++ b/submodule.c
@@ -235,7 +235,6 @@ void load_submodule_cache(void)
return;
 
gitmodules_config();
-   git_config(submodule_config, NULL);
 }
 
 static int gitmodules_cb(const char *var, const char *value, void *data)
diff --git a/unpack-trees.c b/unpack-trees.c
index 05335fe5b..5dce7ff7d 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -255,28 +255,17 @@ static int check_submodule_move_head(const struct 
cache_entry *ce,
 {
unsigned flags = SUBMODULE_MOVE_HEAD_DRY_RUN;
const struct submodule *sub = submodule_from_ce(ce);
+
if (!sub)
return 0;
 
if (o->reset)
flags |= SUBMODULE_MOVE_HEAD_FORCE;
 
-   switch (sub->update_strategy.type) {
-   case SM_UPDATE_UNSPECIFIED:
-   case SM_UPDATE_CHECKOUT:
-   if (submodule_move_head(ce->name, old_id, new_id, flags))
-   return o->gently ? -1 :
-   add_rejected_path(o, 
ERROR_WOULD_LOSE_SUBMODULE, ce->name);
-   return 0;
-   case SM_UPDATE_NONE:
-   return 0;
-   case SM_UPDATE_REBASE:
-   case SM_UPDATE_MERGE:
-   case SM_UPDATE_COMMAND:
-   default:
-   warning(_("submodule update strategy not supported for 
submodule '%s'"), ce->name);
-   return -1;
-   }
+   if (submodule_move_head(ce->name, old_id, new_id, flags))
+   return o->gently ? -1 :
+  add_rejected_path(o, 
ERROR_WOULD_LOSE_SUBMODULE, ce->name);
+   return 0;
 }
 
 static void reload_gitmodules_file(struct index_state *index,
@@ -293,7 +282,6 @@ static void reload_gitmodules_file(struct index_state 
*index,
submodule_free();
checkout_entry(ce, state, NULL);
gitmodules_config();
-   git_config(submodule_config, NULL);
} else
break;
}
@@ -308,19 +296,9 @@ static void unlink_entry(const struct cache_entry *ce)
 {
const struct submodule *sub = submodule_from_ce(ce);
if (sub) {
-   switch (sub->update_strategy.type) {
-   case SM_UPDATE_UNSPECIFIED:
-   case SM_UPDATE_CHECKOUT:
-   case SM_UPDATE_REBASE:
-   case SM_UPDATE_MERGE:
-   /* state.force is set at the caller. */
-   submodule_move_head(ce->name, "HEAD", NULL,
-   SUBMODULE_MOVE_HEAD_FORCE);
-   break;
-   case SM_UPDATE_NONE:
-   case SM_UPDATE_COMMAND:
-   return; /* Do not touch the submodule. */
-   }
+   /* state.force is set at the caller. */
+   submodule_move_head(ce->name, "HEAD", NULL,
+   SUBMODULE_MOVE_HEAD_FORCE);
}
if (!check_leading_path(ce->name, ce_namelen(ce)))
return;
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 03/15] add, reset: ensure submodules can be added or reset

2017-08-03 Thread Brandon Williams
Commit aee9c7d65 (Submodules: Add the new "ignore" config option for
diff and status) introduced the ignore configuration option for
submodules so that configured submodules could be omitted from the
status and diff commands.  Because this flag is respected in the diff
machinery it has the unintended consequence of potentially prohibiting
users from adding or resetting a submodule, even when a path to the
submodule is explicitly given.

Ensure that submodules can be added or set, even if they are configured
to be ignored, by setting the `DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG` diff
flag.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/add.c   | 1 +
 builtin/reset.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/builtin/add.c b/builtin/add.c
index e888fb8c5..6f271512f 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -116,6 +116,7 @@ int add_files_to_cache(const char *prefix,
rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
rev.diffopt.format_callback = update_callback;
rev.diffopt.format_callback_data = 
+   rev.diffopt.flags |= DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG;
rev.max_count = 0; /* do not compare unmerged paths with stage #2 */
run_diff_files(, DIFF_RACY_IS_MODIFIED);
return !!data.add_errors;
diff --git a/builtin/reset.c b/builtin/reset.c
index 046403ed6..772d078b8 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -156,6 +156,7 @@ static int read_from_tree(const struct pathspec *pathspec,
opt.output_format = DIFF_FORMAT_CALLBACK;
opt.format_callback = update_index_from_diff;
opt.format_callback_data = _to_add;
+   opt.flags |= DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG;
 
if (do_diff_cache(tree_oid, ))
return 1;
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v2 05/15] submodule--helper: don't overlay config in update-clone

2017-08-03 Thread Brandon Williams
Don't rely on overlaying the repository's config on top of the
submodule-config, instead query the repository's config directly for the
url and the update strategy configuration.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/submodule--helper.c | 23 +++
 submodule.c | 38 ++
 submodule.h |  1 +
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index f71f4270d..36df7ab78 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -780,6 +780,10 @@ static int prepare_to_clone_next_submodule(const struct 
cache_entry *ce,
   struct strbuf *out)
 {
const struct submodule *sub = NULL;
+   const char *url = NULL;
+   const char *update_string;
+   enum submodule_update_type update_type;
+   char *key;
struct strbuf displaypath_sb = STRBUF_INIT;
struct strbuf sb = STRBUF_INIT;
const char *displaypath = NULL;
@@ -808,9 +812,17 @@ static int prepare_to_clone_next_submodule(const struct 
cache_entry *ce,
goto cleanup;
}
 
+   key = xstrfmt("submodule.%s.update", sub->name);
+   if (!repo_config_get_string_const(the_repository, key, _string)) 
{
+   update_type = parse_submodule_update_type(update_string);
+   } else {
+   update_type = sub->update_strategy.type;
+   }
+   free(key);
+
if (suc->update.type == SM_UPDATE_NONE
|| (suc->update.type == SM_UPDATE_UNSPECIFIED
-   && sub->update_strategy.type == SM_UPDATE_NONE)) {
+   && update_type == SM_UPDATE_NONE)) {
strbuf_addf(out, _("Skipping submodule '%s'"), displaypath);
strbuf_addch(out, '\n');
goto cleanup;
@@ -822,6 +834,11 @@ static int prepare_to_clone_next_submodule(const struct 
cache_entry *ce,
goto cleanup;
}
 
+   strbuf_reset();
+   strbuf_addf(, "submodule.%s.url", sub->name);
+   if (repo_config_get_string_const(the_repository, sb.buf, ))
+   url = sub->url;
+
strbuf_reset();
strbuf_addf(, "%s/.git", ce->name);
needs_cloning = !file_exists(sb.buf);
@@ -851,7 +868,7 @@ static int prepare_to_clone_next_submodule(const struct 
cache_entry *ce,
argv_array_push(>args, "--depth=1");
argv_array_pushl(>args, "--path", sub->path, NULL);
argv_array_pushl(>args, "--name", sub->name, NULL);
-   argv_array_pushl(>args, "--url", sub->url, NULL);
+   argv_array_pushl(>args, "--url", url, NULL);
if (suc->references.nr) {
struct string_list_item *item;
for_each_string_list_item(item, >references)
@@ -1025,9 +1042,7 @@ static int update_clone(int argc, const char **argv, 
const char *prefix)
if (pathspec.nr)
suc.warn_if_uninitialized = 1;
 
-   /* Overlay the parsed .gitmodules file with .git/config */
gitmodules_config();
-   git_config(submodule_config, NULL);
 
run_processes_parallel(max_jobs,
   update_clone_get_next_task,
diff --git a/submodule.c b/submodule.c
index 19bd13bb2..8a9b964ce 100644
--- a/submodule.c
+++ b/submodule.c
@@ -398,24 +398,38 @@ void die_path_inside_submodule(const struct index_state 
*istate,
}
 }
 
-int parse_submodule_update_strategy(const char *value,
-   struct submodule_update_strategy *dst)
+enum submodule_update_type parse_submodule_update_type(const char *value)
 {
-   free((void*)dst->command);
-   dst->command = NULL;
if (!strcmp(value, "none"))
-   dst->type = SM_UPDATE_NONE;
+   return SM_UPDATE_NONE;
else if (!strcmp(value, "checkout"))
-   dst->type = SM_UPDATE_CHECKOUT;
+   return SM_UPDATE_CHECKOUT;
else if (!strcmp(value, "rebase"))
-   dst->type = SM_UPDATE_REBASE;
+   return SM_UPDATE_REBASE;
else if (!strcmp(value, "merge"))
-   dst->type = SM_UPDATE_MERGE;
-   else if (skip_prefix(value, "!", )) {
-   dst->type = SM_UPDATE_COMMAND;
-   dst->command = xstrdup(value);
-   } else
+   return SM_UPDATE_MERGE;
+   else if (*value == '!')
+   return SM_UPDATE_COMMAND;
+   else
+   return SM_UPDATE_UNSPECIFIED;
+}
+
+int parse_submodule_update_strategy(const char *value,
+   struct submodule_update_strategy *dst)
+{
+   enum submodule_update_type type;
+
+   free((void*)dst->command);
+   

[PATCH v4 10/10] grep: recurse in-process using 'struct repository'

2017-08-02 Thread Brandon Williams
Convert grep to use 'struct repository' which enables recursing into
submodules to be handled in-process.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 Documentation/git-grep.txt |   7 -
 builtin/grep.c | 396 ++---
 cache.h|   1 -
 git.c  |   2 +-
 grep.c |  13 --
 grep.h |   1 -
 setup.c|  12 +-
 7 files changed, 88 insertions(+), 344 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 5033483db..720c7850e 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -95,13 +95,6 @@ OPTIONS
 option the prefix of all submodule output will be the name of
the parent project's  object.
 
---parent-basename ::
-   For internal use only.  In order to produce uniform output with the
-   --recurse-submodules option, this option can be used to provide the
-   basename of a parent's  object to a submodule so the submodule
-   can prefix its output with the parent's name rather than the SHA1 of
-   the submodule.
-
 -a::
 --text::
Process binary files as if they were text.
diff --git a/builtin/grep.c b/builtin/grep.c
index 7e79eb1a7..cd0e51f3c 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -28,13 +28,7 @@ static char const * const grep_usage[] = {
NULL
 };
 
-static const char *super_prefix;
 static int recurse_submodules;
-static struct argv_array submodule_options = ARGV_ARRAY_INIT;
-static const char *parent_basename;
-
-static int grep_submodule_launch(struct grep_opt *opt,
-const struct grep_source *gs);
 
 #define GREP_NUM_THREADS_DEFAULT 8
 static int num_threads;
@@ -186,10 +180,7 @@ static void *run(void *arg)
break;
 
opt->output_priv = w;
-   if (w->source.type == GREP_SOURCE_SUBMODULE)
-   hit |= grep_submodule_launch(opt, >source);
-   else
-   hit |= grep_source(opt, >source);
+   hit |= grep_source(opt, >source);
grep_source_clear_data(>source);
work_done(w);
}
@@ -327,21 +318,13 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
 {
struct strbuf pathbuf = STRBUF_INIT;
 
-   if (super_prefix) {
-   strbuf_add(, filename, tree_name_len);
-   strbuf_addstr(, super_prefix);
-   strbuf_addstr(, filename + tree_name_len);
+   if (opt->relative && opt->prefix_length) {
+   quote_path_relative(filename + tree_name_len, opt->prefix, 
);
+   strbuf_insert(, 0, filename, tree_name_len);
} else {
strbuf_addstr(, filename);
}
 
-   if (opt->relative && opt->prefix_length) {
-   char *name = strbuf_detach(, NULL);
-   quote_path_relative(name + tree_name_len, opt->prefix, 
);
-   strbuf_insert(, 0, name, tree_name_len);
-   free(name);
-   }
-
 #ifndef NO_PTHREADS
if (num_threads) {
add_work(opt, GREP_SOURCE_OID, pathbuf.buf, path, oid);
@@ -366,15 +349,10 @@ static int grep_file(struct grep_opt *opt, const char 
*filename)
 {
struct strbuf buf = STRBUF_INIT;
 
-   if (super_prefix)
-   strbuf_addstr(, super_prefix);
-   strbuf_addstr(, filename);
-
-   if (opt->relative && opt->prefix_length) {
-   char *name = strbuf_detach(, NULL);
-   quote_path_relative(name, opt->prefix, );
-   free(name);
-   }
+   if (opt->relative && opt->prefix_length)
+   quote_path_relative(filename, opt->prefix, );
+   else
+   strbuf_addstr(, filename);
 
 #ifndef NO_PTHREADS
if (num_threads) {
@@ -421,284 +399,89 @@ static void run_pager(struct grep_opt *opt, const char 
*prefix)
exit(status);
 }
 
-static void compile_submodule_options(const struct grep_opt *opt,
- const char **argv,
- int cached, int untracked,
- int opt_exclude, int use_index,
- int pattern_type_arg)
-{
-   struct grep_pat *pattern;
-
-   if (recurse_submodules)
-   argv_array_push(_options, "--recurse-submodules");
-
-   if (cached)
-   argv_array_push(_options, "--cached");
-   if (!use_index)
-   argv_array_push(_options, "--no-index");
-   if (untracked)
-   argv_array_push(_options, "--untracked");
-   if (opt_exclude > 0)
-   argv_array_push(_options, "--exclude-standard");
-
-   if (opt->inv

[PATCH v4 05/10] submodule: remove submodule.fetchjobs from submodule-config parsing

2017-08-02 Thread Brandon Williams
The '.gitmodules' file should only contain information pertinent to
configuring individual submodules (name to path mapping, URL where to
obtain the submodule, etc.) while other configuration like the number of
jobs to use when fetching submodules should be a part of the
repository's config.

Remove the 'submodule.fetchjobs' configuration option from the general
submodule-config parsing and instead rely on using the
'config_from_gitmodules()' in order to maintain backwards compatibility
with this config being placed in the '.gitmodules' file.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/fetch.c | 18 +-
 builtin/submodule--helper.c | 17 +
 submodule-config.c  |  8 
 submodule-config.h  |  1 +
 submodule.c | 16 +---
 submodule.h |  1 -
 6 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index c87e59f3b..ade092bf8 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -39,7 +39,7 @@ static int prune = -1; /* unspecified */
 static int all, append, dry_run, force, keep, multiple, update_head_ok, 
verbosity, deepen_relative;
 static int progress = -1;
 static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen;
-static int max_children = -1;
+static int max_children = 1;
 static enum transport_family family;
 static const char *depth;
 static const char *deepen_since;
@@ -68,9 +68,24 @@ static int git_fetch_config(const char *k, const char *v, 
void *cb)
recurse_submodules = r;
}
 
+   if (!strcmp(k, "submodule.fetchjobs")) {
+   max_children = parse_submodule_fetchjobs(k, v);
+   return 0;
+   }
+
return git_default_config(k, v, cb);
 }
 
+static int gitmodules_fetch_config(const char *var, const char *value, void 
*cb)
+{
+   if (!strcmp(var, "submodule.fetchjobs")) {
+   max_children = parse_submodule_fetchjobs(var, value);
+   return 0;
+   }
+
+   return 0;
+}
+
 static int parse_refmap_arg(const struct option *opt, const char *arg, int 
unset)
 {
ALLOC_GROW(refmap_array, refmap_nr + 1, refmap_alloc);
@@ -1311,6 +1326,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
for (i = 1; i < argc; i++)
strbuf_addf(_rla, " %s", argv[i]);
 
+   config_from_gitmodules(gitmodules_fetch_config, NULL);
git_config(git_fetch_config, NULL);
 
argc = parse_options(argc, argv, prefix,
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 6abdad329..6d9600d4f 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -960,10 +960,19 @@ static int update_clone_task_finished(int result,
return 0;
 }
 
+static int gitmodules_update_clone_config(const char *var, const char *value,
+ void *cb)
+{
+   int *max_jobs = cb;
+   if (!strcmp(var, "submodule.fetchjobs"))
+   *max_jobs = parse_submodule_fetchjobs(var, value);
+   return 0;
+}
+
 static int update_clone(int argc, const char **argv, const char *prefix)
 {
const char *update = NULL;
-   int max_jobs = -1;
+   int max_jobs = 1;
struct string_list_item *item;
struct pathspec pathspec;
struct submodule_update_clone suc = SUBMODULE_UPDATE_CLONE_INIT;
@@ -1000,6 +1009,9 @@ static int update_clone(int argc, const char **argv, 
const char *prefix)
};
suc.prefix = prefix;
 
+   config_from_gitmodules(gitmodules_update_clone_config, _jobs);
+   git_config(gitmodules_update_clone_config, _jobs);
+
argc = parse_options(argc, argv, prefix, module_update_clone_options,
 git_submodule_helper_usage, 0);
 
@@ -1017,9 +1029,6 @@ static int update_clone(int argc, const char **argv, 
const char *prefix)
gitmodules_config();
git_config(submodule_config, NULL);
 
-   if (max_jobs < 0)
-   max_jobs = parallel_submodules();
-
run_processes_parallel(max_jobs,
   update_clone_get_next_task,
   update_clone_start_failure,
diff --git a/submodule-config.c b/submodule-config.c
index 5fe2d0787..70400f553 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -248,6 +248,14 @@ static int parse_fetch_recurse(const char *opt, const char 
*arg,
}
 }
 
+int parse_submodule_fetchjobs(const char *var, const char *value)
+{
+   int fetchjobs = git_config_int(var, value);
+   if (fetchjobs < 0)
+   die(_("negative values not allowed for submodule.fetchjobs"));
+   return fetchjobs;
+}
+
 int parse_fetch_recurse_submodules_arg(const char *opt, const char *arg)
 {
return parse_fetch_recurse(opt, arg, 1);
diff --git a/submodule-config.h b/submodule-co

[PATCH v4 09/10] submodule: merge repo_read_gitmodules and gitmodules_config

2017-08-02 Thread Brandon Williams
Since 69aba5329 (submodule: add repo_read_gitmodules) there have been
two ways to load a repository's .gitmodules file:
'repo_read_gitmodules()' is used if you have a repository object you are
working with or 'gitmodules_config()' if you are implicitly working with
'the_repository'.  Merge the logic of these two functions to remove
duplicate code.

In addition, 'repo_read_gitmodules()' can segfault by passing in a NULL
pointer to 'git_config_from_file()' if a repository doesn't have a
worktree.  Instead check for the existence of a worktree before
attempting to load the .gitmodules file.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 submodule.c | 37 +
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/submodule.c b/submodule.c
index 3b0e70c51..9d5eacaf9 100644
--- a/submodule.c
+++ b/submodule.c
@@ -230,23 +230,6 @@ void load_submodule_cache(void)
git_config(submodule_config, NULL);
 }
 
-void gitmodules_config(void)
-{
-   const char *work_tree = get_git_work_tree();
-   if (work_tree) {
-   struct strbuf gitmodules_path = STRBUF_INIT;
-   strbuf_addstr(_path, work_tree);
-   strbuf_addstr(_path, "/" GITMODULES_FILE);
-   if (read_cache() < 0)
-   die("index file corrupt");
-
-   if (!is_gitmodules_unmerged(_index))
-   git_config_from_file(git_modules_config,
-   gitmodules_path.buf, NULL);
-   strbuf_release(_path);
-   }
-}
-
 static int gitmodules_cb(const char *var, const char *value, void *data)
 {
struct repository *repo = data;
@@ -255,10 +238,24 @@ static int gitmodules_cb(const char *var, const char 
*value, void *data)
 
 void repo_read_gitmodules(struct repository *repo)
 {
-   char *gitmodules_path = repo_worktree_path(repo, GITMODULES_FILE);
+   if (repo->worktree) {
+   char *gitmodules;
+
+   if (repo_read_index(repo) < 0)
+   return;
 
-   git_config_from_file(gitmodules_cb, gitmodules_path, repo);
-   free(gitmodules_path);
+   gitmodules = repo_worktree_path(repo, GITMODULES_FILE);
+
+   if (!is_gitmodules_unmerged(repo->index))
+   git_config_from_file(gitmodules_cb, gitmodules, repo);
+
+   free(gitmodules);
+   }
+}
+
+void gitmodules_config(void)
+{
+   repo_read_gitmodules(the_repository);
 }
 
 void gitmodules_config_sha1(const unsigned char *commit_sha1)
-- 
2.14.0.rc1.383.gd1ce394fe2-goog



[PATCH v4 06/10] submodule: remove fetch.recursesubmodules from submodule-config parsing

2017-08-02 Thread Brandon Williams
Remove the 'fetch.recursesubmodules' configuration option from the
general submodule-config parsing and instead rely on using
'config_from_gitmodules()' in order to maintain backwards compatibility
with this config being placed in the '.gitmodules' file.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 builtin/fetch.c |  8 +++-
 submodule.c | 19 ++-
 submodule.h |  2 +-
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ade092bf8..d84c26391 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -71,6 +71,9 @@ static int git_fetch_config(const char *k, const char *v, 
void *cb)
if (!strcmp(k, "submodule.fetchjobs")) {
max_children = parse_submodule_fetchjobs(k, v);
return 0;
+   } else if (!strcmp(k, "fetch.recursesubmodules")) {
+   recurse_submodules = parse_fetch_recurse_submodules_arg(k, v);
+   return 0;
}
 
return git_default_config(k, v, cb);
@@ -81,6 +84,9 @@ static int gitmodules_fetch_config(const char *var, const 
char *value, void *cb)
if (!strcmp(var, "submodule.fetchjobs")) {
max_children = parse_submodule_fetchjobs(var, value);
return 0;
+   } else if (!strcmp(var, "fetch.recursesubmodules")) {
+   recurse_submodules = parse_fetch_recurse_submodules_arg(var, 
value);
+   return 0;
}
 
return 0;
@@ -1355,7 +1361,6 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
deepen = 1;
 
if (recurse_submodules != RECURSE_SUBMODULES_OFF) {
-   set_config_fetch_recurse_submodules(recurse_submodules_default);
gitmodules_config();
git_config(submodule_config, NULL);
}
@@ -1399,6 +1404,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
result = fetch_populated_submodules(,
submodule_prefix,
recurse_submodules,
+   recurse_submodules_default,
verbosity < 0,
max_children);
argv_array_clear();
diff --git a/submodule.c b/submodule.c
index aa4fb1eaa..1d9d2ce09 100644
--- a/submodule.c
+++ b/submodule.c
@@ -20,7 +20,6 @@
 #include "worktree.h"
 #include "parse-options.h"
 
-static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND;
 static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
 static struct string_list changed_submodule_paths = STRING_LIST_INIT_DUP;
 static int initialized_fetch_ref_tips;
@@ -160,10 +159,6 @@ static int git_modules_config(const char *var, const char 
*value, void *cb)
 {
if (starts_with(var, "submodule."))
return parse_submodule_config_option(var, value);
-   else if (!strcmp(var, "fetch.recursesubmodules")) {
-   config_fetch_recurse_submodules = 
parse_fetch_recurse_submodules_arg(var, value);
-   return 0;
-   }
return 0;
 }
 
@@ -714,11 +709,6 @@ void show_submodule_inline_diff(FILE *f, const char *path,
clear_commit_marks(right, ~0);
 }
 
-void set_config_fetch_recurse_submodules(int value)
-{
-   config_fetch_recurse_submodules = value;
-}
-
 int should_update_submodules(void)
 {
return config_update_recurse_submodules == RECURSE_SUBMODULES_ON;
@@ -1164,10 +1154,11 @@ struct submodule_parallel_fetch {
const char *work_tree;
const char *prefix;
int command_line_option;
+   int default_option;
int quiet;
int result;
 };
-#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0}
+#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0}
 
 static int get_next_submodule(struct child_process *cp,
  struct strbuf *err, void *data, void **task_cb)
@@ -1205,10 +1196,10 @@ static int get_next_submodule(struct child_process *cp,
default_argv = "on-demand";
}
} else {
-   if ((config_fetch_recurse_submodules == 
RECURSE_SUBMODULES_OFF) ||
+   if ((spf->default_option == 
RECURSE_SUBMODULES_OFF) ||
gitmodules_is_unmerged)
continue;
-   if (config_fetch_recurse_submodules == 
RECURSE_SUBMODULES_ON_DEMAND) {
+   if (spf->default_option == 
RECURSE_SUBMODULES_ON_DEMAND) {
if 
(!unsorted

[PATCH v4 03/10] cache.h: add GITMODULES_FILE macro

2017-08-02 Thread Brandon Williams
Add a macro to be used when specifying the '.gitmodules' file and
convert any existing hard coded '.gitmodules' file strings to use the
new macro.

Signed-off-by: Brandon Williams <bmw...@google.com>
Signed-off-by: Stefan Beller <sbel...@google.com>
---
 cache.h|  1 +
 submodule.c| 20 ++--
 unpack-trees.c |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/cache.h b/cache.h
index 71fe09264..d59f767e2 100644
--- a/cache.h
+++ b/cache.h
@@ -433,6 +433,7 @@ static inline enum object_type object_type(unsigned int 
mode)
 #define GITATTRIBUTES_FILE ".gitattributes"
 #define INFOATTRIBUTES_FILE "info/attributes"
 #define ATTRIBUTE_MACRO_PREFIX "[attr]"
+#define GITMODULES_FILE ".gitmodules"
 #define GIT_NOTES_REF_ENVIRONMENT "GIT_NOTES_REF"
 #define GIT_NOTES_DEFAULT_REF "refs/notes/commits"
 #define GIT_NOTES_DISPLAY_REF_ENVIRONMENT "GIT_NOTES_DISPLAY_REF"
diff --git a/submodule.c b/submodule.c
index 6531c5d60..64ad5c12d 100644
--- a/submodule.c
+++ b/submodule.c
@@ -63,7 +63,7 @@ int update_path_in_gitmodules(const char *oldpath, const char 
*newpath)
struct strbuf entry = STRBUF_INIT;
const struct submodule *submodule;
 
-   if (!file_exists(".gitmodules")) /* Do nothing without .gitmodules */
+   if (!file_exists(GITMODULES_FILE)) /* Do nothing without .gitmodules */
return -1;
 
if (gitmodules_is_unmerged)
@@ -77,7 +77,7 @@ int update_path_in_gitmodules(const char *oldpath, const char 
*newpath)
strbuf_addstr(, "submodule.");
strbuf_addstr(, submodule->name);
strbuf_addstr(, ".path");
-   if (git_config_set_in_file_gently(".gitmodules", entry.buf, newpath) < 
0) {
+   if (git_config_set_in_file_gently(GITMODULES_FILE, entry.buf, newpath) 
< 0) {
/* Maybe the user already did that, don't error out here */
warning(_("Could not update .gitmodules entry %s"), entry.buf);
strbuf_release();
@@ -97,7 +97,7 @@ int remove_path_from_gitmodules(const char *path)
struct strbuf sect = STRBUF_INIT;
const struct submodule *submodule;
 
-   if (!file_exists(".gitmodules")) /* Do nothing without .gitmodules */
+   if (!file_exists(GITMODULES_FILE)) /* Do nothing without .gitmodules */
return -1;
 
if (gitmodules_is_unmerged)
@@ -110,7 +110,7 @@ int remove_path_from_gitmodules(const char *path)
}
strbuf_addstr(, "submodule.");
strbuf_addstr(, submodule->name);
-   if (git_config_rename_section_in_file(".gitmodules", sect.buf, NULL) < 
0) {
+   if (git_config_rename_section_in_file(GITMODULES_FILE, sect.buf, NULL) 
< 0) {
/* Maybe the user already did that, don't error out here */
warning(_("Could not remove .gitmodules entry for %s"), path);
strbuf_release();
@@ -122,7 +122,7 @@ int remove_path_from_gitmodules(const char *path)
 
 void stage_updated_gitmodules(void)
 {
-   if (add_file_to_cache(".gitmodules", 0))
+   if (add_file_to_cache(GITMODULES_FILE, 0))
die(_("staging updated .gitmodules failed"));
 }
 
@@ -230,21 +230,21 @@ void gitmodules_config(void)
struct strbuf gitmodules_path = STRBUF_INIT;
int pos;
strbuf_addstr(_path, work_tree);
-   strbuf_addstr(_path, "/.gitmodules");
+   strbuf_addstr(_path, "/" GITMODULES_FILE);
if (read_cache() < 0)
die("index file corrupt");
-   pos = cache_name_pos(".gitmodules", 11);
+   pos = cache_name_pos(GITMODULES_FILE, 11);
if (pos < 0) { /* .gitmodules not found or isn't merged */
pos = -1 - pos;
if (active_nr > pos) {  /* there is a .gitmodules */
const struct cache_entry *ce = 
active_cache[pos];
if (ce_namelen(ce) == 11 &&
-   !memcmp(ce->name, ".gitmodules", 11))
+   !memcmp(ce->name, GITMODULES_FILE, 11))
gitmodules_is_unmerged = 1;
}
} else if (pos < active_nr) {
struct stat st;
-   if (lstat(".gitmodules", ) == 0 &&
+   if (lstat(GITMODULES_FILE, ) == 0 &&
ce_match_stat(active_cache[pos], , 0) & 
DATA_CHANGED)
gitmodules_is_modified = 1;
}
@@ -264,7 +264,7 @@ static int gitmodules_cb(const

[PATCH v4 08/10] submodule: check for unmerged .gitmodules outside of config parsing

2017-08-02 Thread Brandon Williams
Add 'is_gitmodules_unmerged()' function which can be used to determine
in the '.gitmodules' file is unmerged based on the passed in index
instead of relying on a global variable which is set during the
submodule-config parsing.

Signed-off-by: Brandon Williams <bmw...@google.com>
---
 submodule.c | 47 +++
 submodule.h |  1 +
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/submodule.c b/submodule.c
index 677b5c401..3b0e70c51 100644
--- a/submodule.c
+++ b/submodule.c
@@ -27,14 +27,25 @@ static struct oid_array ref_tips_before_fetch;
 static struct oid_array ref_tips_after_fetch;
 
 /*
- * The following flag is set if the .gitmodules file is unmerged. We then
- * disable recursion for all submodules where .git/config doesn't have a
- * matching config entry because we can't guess what might be configured in
- * .gitmodules unless the user resolves the conflict. When a command line
- * option is given (which always overrides configuration) this flag will be
- * ignored.
+ * Check if the .gitmodules file is unmerged. Parsing of the .gitmodules file
+ * will be disabled because we can't guess what might be configured in
+ * .gitmodules unless the user resolves the conflict.
  */
-static int gitmodules_is_unmerged;
+int is_gitmodules_unmerged(const struct index_state *istate)
+{
+   int pos = index_name_pos(istate, GITMODULES_FILE, 
strlen(GITMODULES_FILE));
+   if (pos < 0) { /* .gitmodules not found or isn't merged */
+   pos = -1 - pos;
+   if (istate->cache_nr > pos) {  /* there is a .gitmodules */
+   const struct cache_entry *ce = istate->cache[pos];
+   if (ce_namelen(ce) == strlen(GITMODULES_FILE) &&
+   !strcmp(ce->name, GITMODULES_FILE))
+   return 1;
+   }
+   }
+
+   return 0;
+}
 
 /*
  * Check if the .gitmodules file has unstaged modifications.  This must be
@@ -71,7 +82,7 @@ int update_path_in_gitmodules(const char *oldpath, const char 
*newpath)
if (!file_exists(GITMODULES_FILE)) /* Do nothing without .gitmodules */
return -1;
 
-   if (gitmodules_is_unmerged)
+   if (is_gitmodules_unmerged(_index))
die(_("Cannot change unmerged .gitmodules, resolve merge 
conflicts first"));
 
submodule = submodule_from_path(null_sha1, oldpath);
@@ -105,7 +116,7 @@ int remove_path_from_gitmodules(const char *path)
if (!file_exists(GITMODULES_FILE)) /* Do nothing without .gitmodules */
return -1;
 
-   if (gitmodules_is_unmerged)
+   if (is_gitmodules_unmerged(_index))
die(_("Cannot change unmerged .gitmodules, resolve merge 
conflicts first"));
 
submodule = submodule_from_path(null_sha1, path);
@@ -156,7 +167,7 @@ void set_diffopt_flags_from_submodule_config(struct 
diff_options *diffopt,
if (submodule) {
if (submodule->ignore)
handle_ignore_submodules_arg(diffopt, 
submodule->ignore);
-   else if (gitmodules_is_unmerged)
+   else if (is_gitmodules_unmerged(_index))
DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES);
}
 }
@@ -224,23 +235,12 @@ void gitmodules_config(void)
const char *work_tree = get_git_work_tree();
if (work_tree) {
struct strbuf gitmodules_path = STRBUF_INIT;
-   int pos;
strbuf_addstr(_path, work_tree);
strbuf_addstr(_path, "/" GITMODULES_FILE);
if (read_cache() < 0)
die("index file corrupt");
-   pos = cache_name_pos(GITMODULES_FILE, 11);
-   if (pos < 0) { /* .gitmodules not found or isn't merged */
-   pos = -1 - pos;
-   if (active_nr > pos) {  /* there is a .gitmodules */
-   const struct cache_entry *ce = 
active_cache[pos];
-   if (ce_namelen(ce) == 11 &&
-   !memcmp(ce->name, GITMODULES_FILE, 11))
-   gitmodules_is_unmerged = 1;
-   }
-   }
 
-   if (!gitmodules_is_unmerged)
+   if (!is_gitmodules_unmerged(_index))
git_config_from_file(git_modules_config,
gitmodules_path.buf, NULL);
strbuf_release(_path);
@@ -1198,8 +1198,7 @@ static int get_next_submodule(struct child_process *cp,
default_argv = "on-demand";
}
} else {
-   if ((spf->default_option == 
RECURSE_SUBMODULES_OFF) ||
- 

<    5   6   7   8   9   10   11   12   13   14   >