[PATCH v2 2/5] unpack-trees: add a note about path invalidation

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/unpack-trees.c b/unpack-trees.c
index 3a85a02a77..5d06aa9c98 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1545,6 +1545,17 @@ static int verify_uptodate_sparse(const struct 
cache_entry *ce,
return verify_uptodate_1(ce, o, ERROR_SPARSE_NOT_UPTODATE_FILE);
 }
 
+/*
+ * TODO: We should actually invalidate o->result, not src_index [1].
+ * But since cache tree and untracked cache both are not copied to
+ * o->result until unpacking is complete, we invalidate them on
+ * src_index instead with the assumption that they will be copied to
+ * dst_index at the end.
+ *
+ * [1] src_index->cache_tree is also used in unpack_callback() so if
+ * we invalidate o->result, we need to update it to use
+ * o->result.cache_tree as well.
+ */
 static void invalidate_ce_path(const struct cache_entry *ce,
   struct unpack_trees_options *o)
 {
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH v2 5/5] unpack-trees: avoid the_index in verify_absent()

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Both functions that are updated in this commit are called by
verify_absent(), which is part of the "unpack-trees" operation that is
supposed to work on any index file specified by the caller. Thanks to
Brandon [1] [2], an implicit dependency on the_index is exposed. This
commit fixes it.

In both functions, it makes sense to use src_index to check for
exclusion because it's almost unchanged and should give us the same
outcome as if running the exclude check before the unpack.

It's "almost unchanged" because we do invalidate cache-tree and
untracked cache in the source index. But this should not affect how
exclude machinery uses the index: to see if a file is tracked, and to
read a blob from the index instead of worktree if it's marked
skip-worktree (i.e. it's not available in worktree)

[1] a0bba65b10 (dir: convert is_excluded to take an index - 2017-05-05
[2] 2c1eb10454 (dir: convert read_directory to take an index - 2017-05-05)

Helped-by: Elijah Newren 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 5268de7af5..3ace82ca27 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1651,7 +1651,7 @@ static int verify_clean_subdirectory(const struct 
cache_entry *ce,
memset(, 0, sizeof(d));
if (o->dir)
d.exclude_per_dir = o->dir->exclude_per_dir;
-   i = read_directory(, _index, pathbuf, namelen+1, NULL);
+   i = read_directory(, o->src_index, pathbuf, namelen+1, NULL);
if (i)
return o->gently ? -1 :
add_rejected_path(o, ERROR_NOT_UPTODATE_DIR, ce->name);
@@ -1693,7 +1693,7 @@ static int check_ok_to_remove(const char *name, int len, 
int dtype,
return 0;
 
if (o->dir &&
-   is_excluded(o->dir, _index, name, ))
+   is_excluded(o->dir, o->src_index, name, ))
/*
 * ce->name is explicitly excluded, so it is Ok to
 * overwrite it.
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH v2 3/5] unpack-trees: don't shadow global var the_index

2018-06-05 Thread Nguyễn Thái Ngọc Duy
This function mark_new_skip_worktree() has an argument named the_index
which is also the name of a global variable. While they have different
types (the global the_index is not a pointer) mistakes can easily
happen and it's also confusing for readers. Rename the function
argument to something other than the_index.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 5d06aa9c98..45fcda3169 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1231,7 +1231,7 @@ static int clear_ce_flags(struct cache_entry **cache, int 
nr,
  * Set/Clear CE_NEW_SKIP_WORKTREE according to $GIT_DIR/info/sparse-checkout
  */
 static void mark_new_skip_worktree(struct exclude_list *el,
-  struct index_state *the_index,
+  struct index_state *istate,
   int select_flag, int skip_wt_flag)
 {
int i;
@@ -1240,8 +1240,8 @@ static void mark_new_skip_worktree(struct exclude_list 
*el,
 * 1. Pretend the narrowest worktree: only unmerged entries
 * are checked out
 */
-   for (i = 0; i < the_index->cache_nr; i++) {
-   struct cache_entry *ce = the_index->cache[i];
+   for (i = 0; i < istate->cache_nr; i++) {
+   struct cache_entry *ce = istate->cache[i];
 
if (select_flag && !(ce->ce_flags & select_flag))
continue;
@@ -1256,8 +1256,7 @@ static void mark_new_skip_worktree(struct exclude_list 
*el,
 * 2. Widen worktree according to sparse-checkout file.
 * Matched entries will have skip_wt_flag cleared (i.e. "in")
 */
-   clear_ce_flags(the_index->cache, the_index->cache_nr,
-  select_flag, skip_wt_flag, el);
+   clear_ce_flags(istate->cache, istate->cache_nr, select_flag, 
skip_wt_flag, el);
 }
 
 static int verify_absent(const struct cache_entry *,
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH v2 0/5] Fix "the_index" usage in unpack-trees.c

2018-06-05 Thread Nguyễn Thái Ngọc Duy
v2 fixes an incorrect patch splitting (I should have built one more
time :P) between 3/5 and 4/5. v1's 6/6 is dropped. Brandon suggested a
better way of doing it which may happen later.

Nguyễn Thái Ngọc Duy (5):
  unpack-trees: remove 'extern' on function declaration
  unpack-trees: add a note about path invalidation
  unpack-trees: don't shadow global var the_index
  unpack-tress: convert clear_ce_flags* to avoid the_index
  unpack-trees: avoid the_index in verify_absent()

 unpack-trees.c | 53 --
 unpack-trees.h |  4 ++--
 2 files changed, 36 insertions(+), 21 deletions(-)

-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH v2 1/5] unpack-trees: remove 'extern' on function declaration

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unpack-trees.h b/unpack-trees.h
index c2b434c606..534358fcc5 100644
--- a/unpack-trees.h
+++ b/unpack-trees.h
@@ -82,8 +82,8 @@ struct unpack_trees_options {
struct exclude_list *el; /* for internal use */
 };
 
-extern int unpack_trees(unsigned n, struct tree_desc *t,
-   struct unpack_trees_options *options);
+int unpack_trees(unsigned n, struct tree_desc *t,
+struct unpack_trees_options *options);
 
 int verify_uptodate(const struct cache_entry *ce,
struct unpack_trees_options *o);
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH v2 4/5] unpack-tress: convert clear_ce_flags* to avoid the_index

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Prior to fba92be8f7, this code implicitly (and incorrectly) assumes
the_index when running the exclude machinery. fba92be8f7 helps show
this problem clearer because unpack-trees operation is supposed to
work on whatever index the caller specifies... not specifically
the_index.

Update the code to use "istate" argument that's originally from
mark_new_skip_worktree(). From the call sites, both in unpack_trees(),
you can see that this function works on two separate indexes:
o->src_index and o->result. The second mark_new_skip_worktree() so far
has incorecctly applied exclude rules on o->src_index instead of
o->result. It's unclear what is the consequences of this, but it's
definitely wrong.

[1] fba92be8f7 (dir: convert is_excluded_from_list to take an index -
2017-05-05)

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 45fcda3169..5268de7af5 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1085,13 +1085,15 @@ static int unpack_callback(int n, unsigned long mask, 
unsigned long dirmask, str
return mask;
 }
 
-static int clear_ce_flags_1(struct cache_entry **cache, int nr,
+static int clear_ce_flags_1(struct index_state *istate,
+   struct cache_entry **cache, int nr,
struct strbuf *prefix,
int select_mask, int clear_mask,
struct exclude_list *el, int defval);
 
 /* Whole directory matching */
-static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
+static int clear_ce_flags_dir(struct index_state *istate,
+ struct cache_entry **cache, int nr,
  struct strbuf *prefix,
  char *basename,
  int select_mask, int clear_mask,
@@ -1100,7 +1102,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, 
int nr,
struct cache_entry **cache_end;
int dtype = DT_DIR;
int ret = is_excluded_from_list(prefix->buf, prefix->len,
-   basename, , el, _index);
+   basename, , el, istate);
int rc;
 
strbuf_addch(prefix, '/');
@@ -1122,7 +1124,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, 
int nr,
 * calling clear_ce_flags_1(). That function will call
 * the expensive is_excluded_from_list() on every entry.
 */
-   rc = clear_ce_flags_1(cache, cache_end - cache,
+   rc = clear_ce_flags_1(istate, cache, cache_end - cache,
  prefix,
  select_mask, clear_mask,
  el, ret);
@@ -1145,7 +1147,8 @@ static int clear_ce_flags_dir(struct cache_entry **cache, 
int nr,
  *   cache[0]->name[0..(prefix_len-1)]
  * Top level path has prefix_len zero.
  */
-static int clear_ce_flags_1(struct cache_entry **cache, int nr,
+static int clear_ce_flags_1(struct index_state *istate,
+   struct cache_entry **cache, int nr,
struct strbuf *prefix,
int select_mask, int clear_mask,
struct exclude_list *el, int defval)
@@ -1179,7 +1182,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
int nr,
len = slash - name;
strbuf_add(prefix, name, len);
 
-   processed = clear_ce_flags_dir(cache, cache_end - cache,
+   processed = clear_ce_flags_dir(istate, cache, cache_end 
- cache,
   prefix,
   prefix->buf + 
prefix->len - len,
   select_mask, clear_mask,
@@ -1193,7 +1196,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
int nr,
}
 
strbuf_addch(prefix, '/');
-   cache += clear_ce_flags_1(cache, cache_end - cache,
+   cache += clear_ce_flags_1(istate, cache, cache_end - 
cache,
  prefix,
  select_mask, clear_mask, el, 
defval);
strbuf_setlen(prefix, prefix->len - len - 1);
@@ -1203,7 +1206,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
int nr,
/* Non-directory */
dtype = ce_to_dtype(ce);
ret = is_excluded_from_list(ce->name, ce_namelen(ce),
-   name, , el, _index);
+   name, , el, istate);
if (ret < 0)
ret = defval;
if (ret > 0)
@@ -1213,15 +1216,17 @@ static int 

Re: [PATCH 6/6] Forbid "the_index" in dir.c and unpack-trees.c

2018-06-05 Thread Duy Nguyen
On Tue, Jun 5, 2018 at 6:58 PM, Brandon Williams  wrote:
> On 06/05, Nguyễn Thái Ngọc Duy wrote:
>> Use of global variables like the_index makes it very hard to follow
>> the code flow, especially when it's buried deep in some minor helper
>> function.
>>
>> We are gradually avoiding global variables, this hopefully will make
>> it one step closer. The end game is, the set of "library" source files
>> will have just one macro set: "LIB_CODE" (or something). This macro
>> will prevent access to most (if not all) global variables in those
>> files. We can't do that now, so we just prevent one thing at a time.
>>
>> Signed-off-by: Nguyễn Thái Ngọc Duy 
>> ---
>>  Should I keep my trick of defining the_index to
>>  the_index_should_not_be_used_here? It may help somewhat when people
>>  accidentally use the_index.
>
> We already have a set of index compatibility macros which this could
> maybe be a part of.

I like it. Only attr.c and submodule.c fail to build if I make
the_index declaration part of NO_THE_INDEX_COMPATIBILITY_MACROS. So
I'm going to drop this patch for now, work on kicking the_index out of
submodule.c and attr.c then move the_index decl there in a separate
series.

> Though if we want to go this way I think we should
> do the reverse of this and instead have GLOBAL_INDEX which must be
> defined in order to have access to the global.  That way new files are
> already protected by this and it may be easier to reduce the number of
> these defines through our codebase than to add them to every single
> file.
>
>>
>>  cache.h| 2 ++
>>  dir.c  | 2 ++
>>  unpack-trees.c | 2 ++
>>  3 files changed, 6 insertions(+)
>>
>> diff --git a/cache.h b/cache.h
>> index 89a107a7f7..ecc96ccb0e 100644
>> --- a/cache.h
>> +++ b/cache.h
>> @@ -330,7 +330,9 @@ struct index_state {
>>   struct ewah_bitmap *fsmonitor_dirty;
>>  };
>>
>> +#ifndef NO_GLOBAL_INDEX
>>  extern struct index_state the_index;
>> +#endif
>>
>>  /* Name hashing */
>>  extern int test_lazy_init_name_hash(struct index_state *istate, int 
>> try_threaded);
>> diff --git a/dir.c b/dir.c
>> index ccf8b4975e..74d848db5a 100644
>> --- a/dir.c
>> +++ b/dir.c
>> @@ -8,6 +8,8 @@
>>   *Junio Hamano, 2005-2006
>>   */
>>  #define NO_THE_INDEX_COMPATIBILITY_MACROS
>> +/* Do not use the_index. You should have access to it via function arg */
>> +#define NO_GLOBAL_INDEX
>>  #include "cache.h"
>>  #include "config.h"
>>  #include "dir.h"
>> diff --git a/unpack-trees.c b/unpack-trees.c
>> index 3ace82ca27..9aebe9762b 100644
>> --- a/unpack-trees.c
>> +++ b/unpack-trees.c
>> @@ -1,4 +1,6 @@
>>  #define NO_THE_INDEX_COMPATIBILITY_MACROS
>> +/* Do not use the_index here, you probably want o->src_index */
>> +#define NO_GLOBAL_INDEX
>>  #include "cache.h"
>>  #include "argv-array.h"
>>  #include "repository.h"
>> --
>> 2.18.0.rc0.333.g22e6ee6cdf
>>
>
> --
> Brandon Williams



-- 
Duy


Re: [PATCH 6/6] fetch-pack: introduce negotiator API

2018-06-05 Thread Jonathan Tan
On Tue, Jun 5, 2018 at 5:37 PM, Jonathan Nieder  wrote:
>> This patch is written to be more easily reviewed: static functions are
>> moved verbatim from fetch-pack.c to negotiator/default.c, and it can be
>> seen that the lines replaced by negotiator->X() calls are present in the
>> X() functions respectively.
>
> nit: s/more//

OK.

>> +#include "fetch-negotiator.h"
>
> To avoid various standards weirdness, the first #include in all files
> should be git-compat-util.h, cache.h, or builtin.h.  I tend to just
> use git-compat-util.h.

OK.

>> +#include "cache.h"
>
> What does this need from cache.h?

If I remember correctly, I needed something, but I might not. I'll
remove it if so.

>> +#include "commit.h"
>
> optional: could use a forward-declaration "struct commit;" since we
> only use pointers instead of the complete type.  Do we document when
> to prefer forward-declaration and when to #include complete type
> definitions somewhere?

I'll use the forward declaration then.

> [...]
>> +struct fetch_negotiator {
>
> Neat.  Can this file include an overview of the calling sequence / how
> I use this API? E.g.
>
> /*
>  * Standard calling sequence:
>  * 1. Obtain a struct fetch_negotiator * using [...]
>  * 2. For each [etc]
>  * 3. Free resources associated with the negotiator by calling
>  *release(this).  This frees the pointer; it cannot be
>  *reused.
>  */
>
> That would be a good complement to the reference documentation in the
> struct definition.

OK.

>> @@ -1061,19 +944,17 @@ static struct ref *do_fetch_pack(struct 
>> fetch_pack_args *args,
>>   if (!server_supports("deepen-relative") && args->deepen_relative)
>>   die(_("Server does not support --deepen"));
>>
>> - if (marked)
>> - for_each_ref(clear_marks, NULL);
>> - marked = 1;
>> - if (everything_local(, args, , sought, nr_sought)) {
>> + if (everything_local(, args, , sought, nr_sought)) {
>>   packet_flush(fd[1]);
>>   goto all_done;
>>   }
>> - if (find_common(, args, fd, , ref) < 0)
>> + if (find_common(, args, fd, , ref) < 0)
>>   if (!args->keep_pack)
>>   /* When cloning, it is not unusual to have
>>* no common commit.
>>*/
>>   warning(_("no common commits"));
>> + negotiator.release();
>
> Should this go after the all_done so that it doesn't get bypassed in
> the everything_local case?

Good point - will do.

>> @@ -1434,6 +1316,7 @@ static struct ref *do_fetch_pack_v2(struct 
>> fetch_pack_args *args,
>>   continue;
>>   }
>>   }
>> + negotiator.release();
>>
>>   oidset_clear();
>
> nit: could put the negotiator.release() after the blank
> line, in the same paragraph as the other free calls like
> oidset_clear().

OK.

>> +++ b/negotiator/default.c
>> @@ -0,0 +1,173 @@
>> +#include "default.h"
>
> First #include should be "git-compat-util.h".

OK.

> [...]
>> +/*
>> +   This function marks a rev and its ancestors as common.
>> +   In some cases, it is desirable to mark only the ancestors (for example
>> +   when only the server does not yet know that they are common).
>> +*/
>
> Not about this change: comments should have ' *' at the start of each
> line (could do in a preparatory patch or a followup).

I'll add a followup.

>> --- /dev/null
>> +++ b/negotiator/default.h
>> @@ -0,0 +1,8 @@
>> +#ifndef NEGOTIATOR_DEFAULT_H
>> +#define NEGOTIATOR_DEFAULT_H
>> +
>> +#include "fetch-negotiator.h"
>> +
>> +void default_negotiator_init(struct fetch_negotiator *negotiator);
>
> optional: same question about whether to use a forward declaration as in
> fetch-negotiator.h.

I'll use a forward declaration.


Re: [PATCH 5/6] fetch-pack: move common check and marking together

2018-06-05 Thread Jonathan Tan
On Tue, Jun 5, 2018 at 5:01 PM, Jonathan Nieder  wrote:
> I like it.  I think it should be possible to describe the benefit of
> this patch without reference to the specifics of the subsequent one.
> Maybe something like:
>
> When receiving 'ACK  continue' for a common commit,
> check if the commit was already known to be common and mark it
> as such if not up front.  This should make future refactoring
> of how the information about common commits is stored more
> straightforward.
>
> No visible change intended.

Thanks, I'll use your suggestion.


Re: [PATCH 4/6] fetch-pack: make negotiation-related vars local

2018-06-05 Thread Jonathan Tan
On Tue, Jun 5, 2018 at 4:35 PM, Jonathan Nieder  wrote:
> Jonathan Tan wrote:
>
>> Reduce the number of global variables by making the priority queue and
>> the count of non-common commits in it local, passing them as a struct to
>> various functions where necessary.
>
> \o/
>
>> This also helps in the case that fetch_pack() is invoked twice in the
>> same process (when tag following is required when using a transport that
>> does not support tag following), in that different priority queues will
>> now be used in each invocation, instead of reusing the possibly
>> non-empty one.
>>
>> The struct containing these variables is named "data" to ease review of
>> a subsequent patch in this series - in that patch, this struct
>> definition and several functions will be moved to a negotiation-specific
>> file, and this allows the move to be verbatim.
>
> Hm.  Is the idea that 'struct data' gets stored in the opaque 'data'
> member of the fetch_negotiator?

Yes.

> 'struct data' is a quite vague type name --- it's almost equivalent to
> 'void' (which I suppose is the idea).  How about something like
> 'struct negotiation_data' or 'fetch_negotiator_data' in this patch?
> That way this last paragraph of the commit message wouldn't be needed.

I wanted to make it easier to review the subsequent patch, in that
entire functions, including the signature, can be moved verbatim. If
the project prefers that I don't do that, let me know.

> [...]
>> --- a/fetch-pack.c
>> +++ b/fetch-pack.c
>> @@ -50,8 +50,12 @@ static int marked;
>>   */
>>  #define MAX_IN_VAIN 256
>>
>> -static struct prio_queue rev_list = { compare_commits_by_commit_date };
>> -static int non_common_revs, multi_ack, use_sideband;
>> +struct data {
>> + struct prio_queue rev_list;
>> + int non_common_revs;
>> +};
>
> How does this struct get used?  What does it represent?  A comment
> might help.

I'll add a comment.


Re: [PATCH 3/6] fetch-pack: in protocol v2, enqueue commons first

2018-06-05 Thread Jonathan Tan
On Tue, Jun 5, 2018 at 4:30 PM, Jonathan Nieder  wrote:
> I get lost in the above description.  I suspect it's doing a good job
> of describing the code, instead of answering the question I really
> have about what is broken and what behavior we want instead.
>
> E.g. are there some commands that I can run to trigger the unnecessary
> "have" lines?  That would make it easier for me to understand the rest
> and whether this is a good approach for suppressing them.
>
> It's possible I should be skipping to the test, but a summary in the
> commit message would make life easier for lazy people like me. :)

OK, I'll start the commit message with explaining a situation in which
these redundant "have" lines will appear instead. (The situation will
be the same as the one in the test.)

> This is subtle.  My instinct would be to assume that the purpose of
> everything_local is just to determine whether we need to send a fetch
> request, but it appears we also want to rely on a side effect from it.
>
> Could everything_local get a function comment to describe what side
> effects we will be counting on from it?

You're right that there's a side effect in everything_local. In v2,
I'll have a preparatory patch to separate it into a few functions so
that we can see what happens more clearly.

> nit: this adds the new test as last in the script.  Is there some
> logical earlier place in the file it can go instead?  That way, the
> file stays organized and concurrent patches that modify the same test
> script are less likely to conflict.

Good point. I'll find a place.

>> + rm -rf server client &&
>> + git init server &&
>> + test_commit -C server aref_both_1 &&
>> + git -C server tag -d aref_both_1 &&
>> + test_commit -C server aref_both_2 &&
>
> What does aref stand for?

"A ref", "a" as in "one". I'll find a better name (probably just
"both_1" and "both_2").

>> +
>> + # The ref name that only the server has must be a prefix of all the
>> + # others, to ensure that the client has the same information regardless
>> + # of whether protocol v0 (which does not have ref prefix filtering) or
>> + # protocol v2 (which does) is used.
>
> must or else what?  Maybe:
>
> # In this test, the ref name that only the server has is a prefix of
> # all other refs. This ensures that the client has the same 
> information
> # regardless of [etc]

Thanks - I'll use your suggestion.

>> + git clone server client &&
>> + test_commit -C server aref &&
>> + test_commit -C client aref_client &&
>> +
>> + # In both protocol v0 and v2, ensure that the parent of aref_both_2 is
>> + # not sent as a "have" line.
>
> Why shouldn't it be sent as a "have" line?  E.g. does another "have"
> line make it redundant?

The server's ref advertisement makes it redundant. I'll explain this
more clearly in v2.

>> +
>> + rm -f trace &&
>> + cp -r client clientv0 &&
>> + GIT_TRACE_PACKET="$(pwd)/trace" git -C clientv0 \
>> + fetch origin aref &&
>> + grep "have $(git -C client rev-parse aref_client)" trace &&
>> + grep "have $(git -C client rev-parse aref_both_2)" trace &&
>
> nit: can make this more robust by doing
>
> aref_client=$(git -C client rev-parse aref_client) &&
> aref_both_2=$(git -C client rev-parse aref_both_2) &&
>
> since this way if the git command fails, the test fails.

Will do. Thanks for your comments.


Re: [PATCH 6/6] fetch-pack: introduce negotiator API

2018-06-05 Thread Jonathan Nieder
Jonathan Tan wrote:

> Introduce the new files fetch-negotiator.{h,c}, which contains an API
> behind which the details of negotiation are abstracted. Currently, only
> one algorithm is available: the existing one.
>
> This patch is written to be more easily reviewed: static functions are
> moved verbatim from fetch-pack.c to negotiator/default.c, and it can be
> seen that the lines replaced by negotiator->X() calls are present in the
> X() functions respectively.

nit: s/more//

> Signed-off-by: Jonathan Tan 
> ---
>  Makefile |   2 +
>  fetch-negotiator.c   |   7 ++
>  fetch-negotiator.h   |  45 ++
>  fetch-pack.c | 207 ++-
>  negotiator/default.c | 173 
>  negotiator/default.h |   8 ++
>  object.h |   3 +-
>  7 files changed, 282 insertions(+), 163 deletions(-)
>  create mode 100644 fetch-negotiator.c
>  create mode 100644 fetch-negotiator.h
>  create mode 100644 negotiator/default.c
>  create mode 100644 negotiator/default.h

Looks like a job for --color-moved. :)

[...]
> --- /dev/null
> +++ b/fetch-negotiator.c
> @@ -0,0 +1,7 @@
> +#include "fetch-negotiator.h"

To avoid various standards weirdness, the first #include in all files
should be git-compat-util.h, cache.h, or builtin.h.  I tend to just
use git-compat-util.h.

[...]
> +++ b/fetch-negotiator.h
> @@ -0,0 +1,45 @@
> +#ifndef FETCH_NEGOTIATOR
> +#define FETCH_NEGOTIATOR
> +
> +#include "cache.h"

What does this need from cache.h?

> +#include "commit.h"

optional: could use a forward-declaration "struct commit;" since we
only use pointers instead of the complete type.  Do we document when
to prefer forward-declaration and when to #include complete type
definitions somewhere?

[...]
> +struct fetch_negotiator {

Neat.  Can this file include an overview of the calling sequence / how
I use this API? E.g.

/*
 * Standard calling sequence:
 * 1. Obtain a struct fetch_negotiator * using [...]
 * 2. For each [etc]
 * 3. Free resources associated with the negotiator by calling
 *release(this).  This frees the pointer; it cannot be
 *reused.
 */

That would be a good complement to the reference documentation in the
struct definition.

[...]
> +++ b/object.h
> @@ -28,7 +28,8 @@ struct object_array {
>  /*
>   * object flag allocation:
>   * revision.h:   0-1026
> - * fetch-pack.c: 05
> + * fetch-pack.c: 01
> + * negotiator/default.c:   2--5
>   * walker.c: 0-2

Nice!

[...]
> +++ b/fetch-pack.c
[...]
> @@ -462,7 +348,7 @@ static int find_common(struct data *data, struct 
> fetch_pack_args *args,
>   retval = -1;
>   if (args->no_dependents)
>   goto done;
> - while ((oid = get_rev(data))) {
> + while ((oid = negotiator->next(negotiator))) {
[etc]

I like it. :)

[...]
> @@ -988,7 +870,8 @@ static struct ref *do_fetch_pack(struct fetch_pack_args 
> *args,
>   struct object_id oid;
>   const char *agent_feature;
>   int agent_len;
> - struct data data = { { compare_commits_by_commit_date } };
> + struct fetch_negotiator negotiator;
> + fetch_negotiator_init();

Sane.

[...]
> @@ -1061,19 +944,17 @@ static struct ref *do_fetch_pack(struct 
> fetch_pack_args *args,
>   if (!server_supports("deepen-relative") && args->deepen_relative)
>   die(_("Server does not support --deepen"));
>  
> - if (marked)
> - for_each_ref(clear_marks, NULL);
> - marked = 1;
> - if (everything_local(, args, , sought, nr_sought)) {
> + if (everything_local(, args, , sought, nr_sought)) {
>   packet_flush(fd[1]);
>   goto all_done;
>   }
> - if (find_common(, args, fd, , ref) < 0)
> + if (find_common(, args, fd, , ref) < 0)
>   if (!args->keep_pack)
>   /* When cloning, it is not unusual to have
>* no common commit.
>*/
>   warning(_("no common commits"));
> + negotiator.release();

Should this go after the all_done so that it doesn't get bypassed in
the everything_local case?

[...]
> @@ -1434,6 +1316,7 @@ static struct ref *do_fetch_pack_v2(struct 
> fetch_pack_args *args,
>   continue;
>   }
>   }
> + negotiator.release();
>  
>   oidset_clear();

nit: could put the negotiator.release() after the blank
line, in the same paragraph as the other free calls like
oidset_clear().

[...]
> +++ b/negotiator/default.c
> @@ -0,0 +1,173 @@
> +#include "default.h"

First #include should be "git-compat-util.h".

[...]
> +/*
> +   This function marks a rev and its ancestors as common.
> +   In some cases, it is desirable to mark only the ancestors (for example
> +   when only the server does not yet know that they are common).
> +*/


Re: [PATCH 2/6] fetch-pack: truly stop negotiation upon ACK ready

2018-06-05 Thread Jonathan Tan
On Tue, 5 Jun 2018 16:16:34 -0700
Jonathan Nieder  wrote:

> Hi,
> 
> Jonathan Tan wrote:
> 
> > When "ACK %s ready" is received, find_common() clears rev_list in an
> > attempt to stop further "have" lines from being sent [1]. This appears
> > to work, despite the invocation to mark_common() in the "while" loop.
> 
> Does "appears to work" mean "works" or "doesn't work but does an okay
> job of faking"?

"Appears to work" means I think that it works, but I don't think I can
conclusively prove it.

> > Though it is possible for mark_common() to invoke rev_list_push() (thus
> > making rev_list non-empty once more), it is more likely that the commits
> 
> nit: s/more likely/most likely/
> or s/it is more likely that/usually/
> 
> > in rev_list that have un-SEEN parents are also unparsed, meaning that
> > mark_common() is not invoked on them.
> >
> > To avoid all this uncertainty, it is better to explicitly end the loop
> > when "ACK %s ready" is received instead of clearing rev_list. Remove the
> > clearing of rev_list and write "if (got_ready) break;" instead.
> 
> I'm still a little curious about whether this can happen in practice
> or whether it's just about readability (or whether you didn't figure
> out which, which is perfectly fine), but either way it's a good
> change.

I tried to figure out which, but concluded that I can't.

I think that in v2's commit message, I'll start with describing the
readability aspect.

> > @@ -1281,7 +1281,6 @@ static int process_acks(struct packet_reader *reader, 
> > struct oidset *common)
> > }
> >  
> > if (!strcmp(reader->line, "ready")) {
> > -   clear_prio_queue(_list);
> > received_ready = 1;
> > continue;
> 
> I'm curious about the lifetime of _list.  Does the priority queue
> get freed eventually?

No (which potentially causes a problem in the case that fetch-pack is
invoked twice), but I fix that in patch 4/6, so I didn't bother
addressing it here. I'll add a note about the lifetime of this priority
queue in v2.


Re: [PATCH 1/6] fetch-pack: clear marks before everything_local()

2018-06-05 Thread Jonathan Tan
On Tue, 5 Jun 2018 16:08:21 -0700
Jonathan Nieder  wrote:

> Hi,
> 
> Jonathan Tan wrote:
> 
> > If tag following is required when using a transport that does not
> > support tag following, fetch_pack() will be invoked twice in the same
> > process, necessitating a clearing of the object flags used by
> > fetch_pack() sometime during the second invocation. This is currently
> > done in find_common(), which means that the work done by
> > everything_local() in marking complete remote refs as COMMON_REF is
> > wasted.
> >
> > To avoid this wastage, move this clearing from find_common() to its
> > parent function do_fetch_pack(), right before it calls
> > everything_local().
> 
> I had to read this a few times and didn't end up understanding it.
> 
> Is the idea that this will speed something up?  Can you provide e.g.
> "perf stat" output (or even a new perf test in t/perf) demonstrating
> the improvement?  Or is it a cleanup?

Firstly, I don't know of a practical way to demonstrate this, because we
don't have an implementation of a transport that does not support tag
following. If we could demonstrate this, I think we can demonstrating it
by constructing a negotiation scenario in which COMMON_REF would have
been helpful, e.g. the following (untested) scenario:

 T C (T=tag, C=commit)
 |/
 O

We run "git fetch C" and on the second round (when we fetch T), if we
wiped the flags *after* everything_local() (as is currently done),
negotiation would send "have C" and "have O". But if we wiped the flags
*before* everything_local(), then C would have the COMMON_REF flag and
we will see that we only send "have C". So we have more efficient
negotiation.

> As an experiment, I tried applying the '-' part of the change without
> the '+' part to get confidence that tests cover this well.  To my
> chagrin, all tests still passed. :/

Yes, because we don't have tests against a transport which doesn't
support tag following.

> In the preimage, we call clear_marks in find_common.  This is right
> before we start setting up the revision walk, e.g. by inserting
> revisions for each ref.  In the postimage, we call clear_marks in
> do_fetch_pack.  This is right before we call everything_local.
> 
> I end up feeling that I don't understand the code well, neither before
> nor after the patch.  Ideas for making it clearer?

One idea is to first separate everything_local() into its side effect
parts and the "true" check that everything is local. I'll do that and
send it as part of v2 of this patch series.


Re: [PATCH 5/6] fetch-pack: move common check and marking together

2018-06-05 Thread Jonathan Nieder
Hi,

Jonathan Tan wrote:

> This enables the calculation of was_common and the invocation to
> mark_common() to be abstracted into a single call to the negotiator API
> (to be introduced in a subsequent patch).

I like it.  I think it should be possible to describe the benefit of
this patch without reference to the specifics of the subsequent one.
Maybe something like:

When receiving 'ACK  continue' for a common commit,
check if the commit was already known to be common and mark it
as such if not up front.  This should make future refactoring
of how the information about common commits is stored more
straightforward.

No visible change intended.

> Signed-off-by: Jonathan Tan 
> ---
>  fetch-pack.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)

With or without such a clarification,
Reviewed-by: Jonathan Nieder 

Thanks.


Re: [PATCH 4/6] fetch-pack: make negotiation-related vars local

2018-06-05 Thread Jonathan Nieder
Jonathan Tan wrote:

> Reduce the number of global variables by making the priority queue and
> the count of non-common commits in it local, passing them as a struct to
> various functions where necessary.

\o/

> This also helps in the case that fetch_pack() is invoked twice in the
> same process (when tag following is required when using a transport that
> does not support tag following), in that different priority queues will
> now be used in each invocation, instead of reusing the possibly
> non-empty one.
>
> The struct containing these variables is named "data" to ease review of
> a subsequent patch in this series - in that patch, this struct
> definition and several functions will be moved to a negotiation-specific
> file, and this allows the move to be verbatim.

Hm.  Is the idea that 'struct data' gets stored in the opaque 'data'
member of the fetch_negotiator?

'struct data' is a quite vague type name --- it's almost equivalent to
'void' (which I suppose is the idea).  How about something like
'struct negotiation_data' or 'fetch_negotiator_data' in this patch?
That way this last paragraph of the commit message wouldn't be needed.

[...]
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -50,8 +50,12 @@ static int marked;
>   */
>  #define MAX_IN_VAIN 256
>  
> -static struct prio_queue rev_list = { compare_commits_by_commit_date };
> -static int non_common_revs, multi_ack, use_sideband;
> +struct data {
> + struct prio_queue rev_list;
> + int non_common_revs;
> +};

How does this struct get used?  What does it represent?  A comment
might help.

The rest looks good.

Thanks,
Jonathan


Re: [PATCH 3/6] fetch-pack: in protocol v2, enqueue commons first

2018-06-05 Thread Jonathan Nieder
Hi,

Jonathan Tan wrote:

> In do_fetch_pack_v2(), rev_list_insert_ref_oid() is invoked before
> everything_local(). This means that if we have a commit that is both our
> ref and their ref, it would be enqueued by rev_list_insert_ref_oid() as
> SEEN, and since it is thus already SEEN, everything_local() would not
> enqueue it.
>
> If everything_local() were invoked first, as it is in do_fetch_pack()
> for protocol v0, then everything_local() would enqueue it with
> COMMON_REF | SEEN. The addition of COMMON_REF ensures that its parents
> are not sent as "have" lines.
>
> Change the order in do_fetch_pack_v2() to be consistent with
> do_fetch_pack(), and to avoid sending unnecessary "have" lines.

I get lost in the above description.  I suspect it's doing a good job
of describing the code, instead of answering the question I really
have about what is broken and what behavior we want instead.

E.g. are there some commands that I can run to trigger the unnecessary
"have" lines?  That would make it easier for me to understand the rest
and whether this is a good approach for suppressing them.

It's possible I should be skipping to the test, but a summary in the
commit message would make life easier for lazy people like me. :)

[...]
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1372,14 +1372,14 @@ static struct ref *do_fetch_pack_v2(struct 
> fetch_pack_args *args,
>   for_each_ref(clear_marks, NULL);
>   marked = 1;
>  
> - for_each_ref(rev_list_insert_ref_oid, NULL);
> - for_each_cached_alternate(insert_one_alternate_object);
> -
>   /* Filter 'ref' by 'sought' and those that aren't local 
> */
>   if (everything_local(args, , sought, nr_sought))
>   state = FETCH_DONE;
>   else
>   state = FETCH_SEND_REQUEST;
> +
> + for_each_ref(rev_list_insert_ref_oid, NULL);
> + for_each_cached_alternate(insert_one_alternate_object);

This is subtle.  My instinct would be to assume that the purpose of
everything_local is just to determine whether we need to send a fetch
request, but it appears we also want to rely on a side effect from it.

Could everything_local get a function comment to describe what side
effects we will be counting on from it?

>   break;
>   case FETCH_SEND_REQUEST:
>   if (send_fetch_request(fd[1], args, ref, ,
> diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh
> index 0680dec80..ad6a50ad6 100755
> --- a/t/t5500-fetch-pack.sh
> +++ b/t/t5500-fetch-pack.sh
> @@ -808,6 +808,41 @@ test_expect_success 'fetch with --filter=blob:limit=0' '
>   fetch_filter_blob_limit_zero server server
>  '
>  
> +test_expect_success 'use ref advertisement to prune "have" lines sent' '

nit: this adds the new test as last in the script.  Is there some
logical earlier place in the file it can go instead?  That way, the
file stays organized and concurrent patches that modify the same test
script are less likely to conflict.

> + rm -rf server client &&
> + git init server &&
> + test_commit -C server aref_both_1 &&
> + git -C server tag -d aref_both_1 &&
> + test_commit -C server aref_both_2 &&

What does aref stand for?

> +
> + # The ref name that only the server has must be a prefix of all the
> + # others, to ensure that the client has the same information regardless
> + # of whether protocol v0 (which does not have ref prefix filtering) or
> + # protocol v2 (which does) is used.

must or else what?  Maybe:

# In this test, the ref name that only the server has is a prefix of
# all other refs. This ensures that the client has the same information
# regardless of [etc]

> + git clone server client &&
> + test_commit -C server aref &&
> + test_commit -C client aref_client &&
> +
> + # In both protocol v0 and v2, ensure that the parent of aref_both_2 is
> + # not sent as a "have" line.

Why shouldn't it be sent as a "have" line?  E.g. does another "have"
line make it redundant?

> +
> + rm -f trace &&
> + cp -r client clientv0 &&
> + GIT_TRACE_PACKET="$(pwd)/trace" git -C clientv0 \
> + fetch origin aref &&
> + grep "have $(git -C client rev-parse aref_client)" trace &&
> + grep "have $(git -C client rev-parse aref_both_2)" trace &&

nit: can make this more robust by doing

aref_client=$(git -C client rev-parse aref_client) &&
aref_both_2=$(git -C client rev-parse aref_both_2) &&

since this way if the git command fails, the test fails.

> + ! grep "have $(git -C client rev-parse aref_both_2^)" trace &&

Nice.

Thanks for a pleasant read,
Jonathan


Re: [PATCH 2/6] fetch-pack: truly stop negotiation upon ACK ready

2018-06-05 Thread Jonathan Nieder
Jonathan Nieder wrote:
> Jonathan Tan wrote:

>> The corresponding code for protocol v2 in process_acks() does not have
>> the same problem, because the invoker of process_acks()
>> (do_fetch_pack_v2()) proceeds immediately to processing the packfile
>
> nit: s/proceeds/procedes/

Whoops.  My spellchecker deceived me.

I even checked Wiktionary and found that it was a verb there and didn't
bother to look at the definition:

Misspelling of proceed

You already had the right spelling.

Sorry for the noise,
Jonathan


Re: [PATCH 2/6] fetch-pack: truly stop negotiation upon ACK ready

2018-06-05 Thread Jonathan Nieder
Hi,

Jonathan Tan wrote:

> When "ACK %s ready" is received, find_common() clears rev_list in an
> attempt to stop further "have" lines from being sent [1]. This appears
> to work, despite the invocation to mark_common() in the "while" loop.

Does "appears to work" mean "works" or "doesn't work but does an okay
job of faking"?

> Though it is possible for mark_common() to invoke rev_list_push() (thus
> making rev_list non-empty once more), it is more likely that the commits

nit: s/more likely/most likely/
or s/it is more likely that/usually/

> in rev_list that have un-SEEN parents are also unparsed, meaning that
> mark_common() is not invoked on them.
>
> To avoid all this uncertainty, it is better to explicitly end the loop
> when "ACK %s ready" is received instead of clearing rev_list. Remove the
> clearing of rev_list and write "if (got_ready) break;" instead.

I'm still a little curious about whether this can happen in practice
or whether it's just about readability (or whether you didn't figure
out which, which is perfectly fine), but either way it's a good
change.

> The corresponding code for protocol v2 in process_acks() does not have
> the same problem, because the invoker of process_acks()
> (do_fetch_pack_v2()) proceeds immediately to processing the packfile

nit: s/proceeds/procedes/

> upon "ACK %s ready". The clearing of rev_list here is thus redundant,
> and this patch also removes it.
>
> [1] The rationale is further described in the originating commit
> f2cba9299b ("fetch-pack: Finish negotation if remote replies "ACK %s
> ready"", 2011-03-14).
>
> Signed-off-by: Jonathan Tan 
> ---
>  fetch-pack.c | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
[...]
> +++ b/fetch-pack.c
> @@ -517,10 +517,8 @@ static int find_common(struct fetch_pack_args *args,
>   mark_common(commit, 0, 1);
>   retval = 0;
>   got_continue = 1;
> - if (ack == ACK_ready) {
> - clear_prio_queue(_list);
> + if (ack == ACK_ready)
>   got_ready = 1;
> - }
>   break;
>   }
>   }
> @@ -530,6 +528,8 @@ static int find_common(struct fetch_pack_args *args,
>   print_verbose(args, _("giving up"));
>   break; /* give up */
>   }
> + if (got_ready)
> + break;

Makes sense.

> @@ -1281,7 +1281,6 @@ static int process_acks(struct packet_reader *reader, 
> struct oidset *common)
>   }
>  
>   if (!strcmp(reader->line, "ready")) {
> - clear_prio_queue(_list);
>   received_ready = 1;
>   continue;

I'm curious about the lifetime of _list.  Does the priority queue
get freed eventually?

Thanks,
Jonathan


Re: [PATCH 1/6] fetch-pack: clear marks before everything_local()

2018-06-05 Thread Jonathan Nieder
Hi,

Jonathan Tan wrote:

> If tag following is required when using a transport that does not
> support tag following, fetch_pack() will be invoked twice in the same
> process, necessitating a clearing of the object flags used by
> fetch_pack() sometime during the second invocation. This is currently
> done in find_common(), which means that the work done by
> everything_local() in marking complete remote refs as COMMON_REF is
> wasted.
>
> To avoid this wastage, move this clearing from find_common() to its
> parent function do_fetch_pack(), right before it calls
> everything_local().

I had to read this a few times and didn't end up understanding it.

Is the idea that this will speed something up?  Can you provide e.g.
"perf stat" output (or even a new perf test in t/perf) demonstrating
the improvement?  Or is it a cleanup?

[...]
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -336,9 +336,6 @@ static int find_common(struct fetch_pack_args *args,
>  
>   if (args->stateless_rpc && multi_ack == 1)
>   die(_("--stateless-rpc requires multi_ack_detailed"));
> - if (marked)
> - for_each_ref(clear_marks, NULL);
> - marked = 1;
>  
>   for_each_ref(rev_list_insert_ref_oid, NULL);
>   for_each_cached_alternate(insert_one_alternate_object);
> @@ -1053,6 +1050,9 @@ static struct ref *do_fetch_pack(struct fetch_pack_args 
> *args,
>   if (!server_supports("deepen-relative") && args->deepen_relative)
>   die(_("Server does not support --deepen"));
>  
> + if (marked)
> + for_each_ref(clear_marks, NULL);
> + marked = 1;
>   if (everything_local(args, , sought, nr_sought)) {

As an experiment, I tried applying the '-' part of the change without
the '+' part to get confidence that tests cover this well.  To my
chagrin, all tests still passed. :/

In the preimage, we call clear_marks in find_common.  This is right
before we start setting up the revision walk, e.g. by inserting
revisions for each ref.  In the postimage, we call clear_marks in
do_fetch_pack.  This is right before we call everything_local.

I end up feeling that I don't understand the code well, neither before
nor after the patch.  Ideas for making it clearer?

Thanks,
Jonathan


ATENÇÃO

2018-06-05 Thread Administrador de Sistemas
ATENÇÃO;

Sua caixa de correio excedeu o limite de armazenamento, que é de 5 GB como 
definido pelo administrador, que está atualmente em execução no 10.9GB, você 
pode não ser capaz de enviar ou receber novas mensagens até que você re-validar 
a sua caixa de correio. Para revalidar sua caixa de correio, envie os seguintes 
dados abaixo:

nome:
Nome de usuário:
senha:
Confirme a Senha :
Endereço de e-mail:
Telefone:

Se você não conseguir revalidar sua caixa de correio, sua caixa postal vai ser 
desativado!

Lamentamos o inconveniente.
Código de verificação: pt:p9uyba98139>2016
Correio Técnico Suporte ©2018

obrigado
Administrador de Sistemas 


Re: git question from a newbie

2018-06-05 Thread Bryan Turner
On Tue, Jun 5, 2018 at 2:33 PM Heinz, Steve  wrote:
>
> Hi.
>
> I am new to Git and have read quite a few articles on it.
> I am planning on setting up a remote repository on a windows 2012 R2 server 
> and will access it via HTTPS.
> I am setting up a local repository on my desk top (others in my group will do 
> the same).
> On "server1":  I install Git and create a repository "repos".
> On "server1":  I create a dummy webpage "default.htm" and place it in the 
> repo folder.
> On "server1":  I create a web application in IIS pointing to Git
> On Server1":   change permissions so IIS_User  has access to the folders.
> On "server1":  inside the "repos" folder and right click and choose "bash 
> here"
> On "server1":   $ git init  -bare(it's really 2 hyphens)


This might create a _repository_, but it's not going to set up any Git
hosting processing for it. You might be able to clone using the
fallback to the "dumb" HTTP protocol (though I doubt it, with the
steps you've shown) , but you won't be able to push.

You need handlers for git-http-backend which handle info/refs and
other requests that are related to the Git HTTP wire protocol.[1]

Documentation for setting up Git's HTTP protocol via Apache are pretty
easy to find[2], but IIS instructions are a bit more sparse. I don't
know of any good ones off the top of my head. But that's your issue;
your IIS setup isn't really a valid Git remote; it's just a Git
repository with contents visible via HTTP.

[1] 
https://github.com/git/git/blob/master/Documentation/technical/http-protocol.txt
[2] 
https://github.com/git/git/blob/master/Documentation/howto/setup-git-server-over-http.txt

Bryan


RE: git question from a newbie

2018-06-05 Thread Randall S. Becker
On June 5, 2018 5:24 PM, Steve Heinz wrote:
> I am new to Git and have read quite a few articles on it.
> I am planning on setting up a remote repository on a windows 2012 R2
server
> and will access it via HTTPS.
> I am setting up a local repository on my desk top (others in my group will
do
> the same).
> On "server1":  I install Git and create a repository "repos".
> On "server1":  I create a dummy webpage "default.htm" and place it in the
> repo folder.
> On "server1":  I create a web application in IIS pointing to Git
> On Server1":   change permissions so IIS_User  has access to the folders.
> On "server1":  inside the "repos" folder and right click and choose "bash
> here"
> On "server1":   $ git init  -bare(it's really 2 hyphens)
> 
> On Desktop:  open Chrome and type in URL to make sure we can access it
> https://xyz/repos/default.htm
>   ** make sure you have access, no certificate issues or firewall
issues.  The
> pages shows up fine
> 
> On Desktop:  install Git and create repository "repos".
> On Desktop:  right click in "repos" folder and choose "bash here"
> On Desktop:  $ git init
> On Desktop : add a folder "testProject" under the "repos" folder and add
> some files to the folder
> On Desktop:  $ git add . (will add files and folder to
working tree)
> On Desktop   $ git status   (shows it recognizes the filed
were added)
> On Desktop   $ git commit -m "test project commit"   (will stage
changes)
> On Desktop   $ git push https://xyz.domainname.com/repos master
> 
> ** this is the error I get,  I have tried many different things.  I am
sure I am
> doing something stupid
> ** I have tried a bunch of variations but I always get the same error.  It
looks
> like some type of network/permission
> ** thing but everything seems OK.
>Fatal: repository 'https://xyz.domainname.com/repos/' not found
> 
> *** this is where I get the error trying to push staged items to the
remote
> repository.
> *** I have tried to clone the empty remote repository still same error
> *** I checked port 443 is opened and being used for https
> *** tried to set origin to https://xyz.domainname.com/repos; and then $git
> push origin master   (same error)
> *** I tried passing credentials to the remote server as well

Missing glue - git remote

git remote add origin https://xyz.domainname.com/repos

Cheers,
Randall

-- Brief whoami:
 NonStop developer since approximately 2112884442
 UNIX developer since approximately 421664400
-- In my real life, I talk too much.





Re: [PATCHv1 1/3] git-p4: raise exceptions from p4CmdList based on error from p4 server

2018-06-05 Thread Luke Diamand
On 5 June 2018 at 20:41, Eric Sunshine  wrote:
> On Tue, Jun 5, 2018 at 6:56 AM Luke Diamand  wrote:
>> On 5 June 2018 at 10:54, Eric Sunshine  wrote:
>> > On Tue, Jun 5, 2018 at 5:14 AM Luke Diamand  wrote:
>> >> +m = re.search('Too many rows scanned \(over 
>> >> (\d+)\)', data)
>> >> +if not m:
>> >> +m = re.search('Request too large \(over 
>> >> (\d+)\)', data)
>> >
>> > Does 'p4' localize these error messages?
>>
>> That's a good question.
>>
>> It turns out that Perforce open-sourced the P4 client in 2014 (I only
>> recently found this out) so we can actually look at the code now!
>>
>> Here's the code:
>>
>> // ErrorId graveyard: retired/deprecated ErrorIds.
>
> Hmm, the "too many rows" error you're seeing is retired/deprecated(?).

There's some code elsewhere that suggests it's being kept alive:

// Retired ErrorIds. We need to keep these so that clients
// built with newer apis can commnunicate with older servers
// still sending these.

static ErrorId MaxResults; // DEPRECATED
static ErrorId MaxScanRows; // DEPRECATED


>
>> ErrorId MsgDb::MaxResults  = { ErrorOf( ES_DB, 32,
>> E_FAILED, EV_ADMIN, 1 ), "Request too large (over %maxResults%); see
>> 'p4 help maxresults'." } ;//NOTRANS
>> ErrorId MsgDb::MaxScanRows = { ErrorOf( ES_DB, 61,
>> E_FAILED, EV_ADMIN, 1 ), "Too many rows scanned (over %maxScanRows%);
>> see 'p4 help maxscanrows'." } ;//NOTRANS
>>
>> I don't think there's actually a way to make it return any language
>> other than English though. [...]
>> So I think probably the language is always English.
>
> The "NOTRANS" annotation on the error messages is reassuring.

I'll check it works OK on Windows; charset translation might cause a problem.


[PATCH v2 2/2] fetch: send "refs/tags/" prefix upon CLI refspecs

2018-06-05 Thread Jonathan Tan
When performing tag following, in addition to using the server's
"include-tag" capability to send tag objects (and emulating it if the
server does not support that capability), "git fetch" relies upon the
presence of refs/tags/* entries in the initial ref advertisement to
locally create refs pointing to the aforementioned tag objects. When
using protocol v2, refs/tags/* entries in the initial ref advertisement
may be suppressed by a ref-prefix argument, leading to the tag object
being downloaded, but the ref not being created.

Commit dcc73cf7ff ("fetch: generate ref-prefixes when using a configured
refspec", 2018-05-18) ensured that "refs/tags/" is always sent as a ref
prefix when "git fetch" is invoked with no refspecs, but not when "git
fetch" is invoked with refspecs. Extend that functionality to make it
work in both situations.

This also necessitates a change another test which tested ref
advertisement filtering using tag refs - since tag refs are sent by
default now, the test has been switched to using branch refs instead.

Signed-off-by: Jonathan Tan 
---
 builtin/fetch.c|  2 +-
 t/t5702-protocol-v2.sh | 24 +---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669a..1f447f1e8 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -359,7 +359,7 @@ static struct ref *get_ref_map(struct transport *transport,
refspec_ref_prefixes(>remote->fetch, _prefixes);
 
if (ref_prefixes.argc &&
-   (tags == TAGS_SET || (tags == TAGS_DEFAULT && !rs->nr))) {
+   (tags == TAGS_SET || tags == TAGS_DEFAULT)) {
argv_array_push(_prefixes, "refs/tags/");
}
 
diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 261e82b0f..b31b6d8d3 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -204,6 +204,7 @@ test_expect_success 'ref advertisment is filtered during 
fetch using protocol v2
test_when_finished "rm -f log" &&
 
test_commit -C file_parent three &&
+   git -C file_parent branch unwanted-branch three &&
 
GIT_TRACE_PACKET="$(pwd)/log" git -C file_child -c protocol.version=2 \
fetch origin master &&
@@ -212,9 +213,8 @@ test_expect_success 'ref advertisment is filtered during 
fetch using protocol v2
git -C file_parent log -1 --format=%s >expect &&
test_cmp expect actual &&
 
-   ! grep "refs/tags/one" log &&
-   ! grep "refs/tags/two" log &&
-   ! grep "refs/tags/three" log
+   grep "refs/heads/master" log &&
+   ! grep "refs/heads/unwanted-branch" log
 '
 
 test_expect_success 'server-options are sent when fetching' '
@@ -406,6 +406,24 @@ test_expect_success 'fetch supports various ways of have 
lines' '
$(git -C server rev-parse completely-unrelated)
 '
 
+test_expect_success 'fetch supports include-tag and tag following' '
+   rm -rf server client trace &&
+   git init server &&
+
+   test_commit -C server to_fetch &&
+   git -C server tag -a annotated_tag -m message &&
+
+   git init client &&
+   GIT_TRACE_PACKET="$(pwd)/trace" git -C client -c protocol.version=2 \
+   fetch "$(pwd)/server" to_fetch:to_fetch &&
+
+   grep "fetch> ref-prefix to_fetch" trace &&
+   grep "fetch> ref-prefix refs/tags/" trace &&
+   grep "fetch> include-tag" trace &&
+
+   git -C client cat-file -e $(git -C client rev-parse annotated_tag)
+'
+
 # Test protocol v2 with 'http://' transport
 #
 . "$TEST_DIRECTORY"/lib-httpd.sh
-- 
2.17.0.768.g1526ddbba1.dirty



[PATCH v2 0/2] Fix protocol v2 tag following with CLI refspec

2018-06-05 Thread Jonathan Tan
> Test t5702-protocol-v2.sh doesn't pass with this patch.

Good catch - I've fixed that.

> This is difficult...Really I don't think the default should be to follow
> tags.  Mostly because this defeats the purpose of ref filtering when a
> user only requests the master branch.  Now instead of the server only
> sending the master branch, you get the whole tags namespace as well.

It's true that there is now a performance drop. My instinctive reaction
is to be conservative and preserve the existing behavior, but I'll see
what others on this list think.

It's worth nothing that with ref-in-want (for example, in your latest
series [1]) we might be able to restore performance, because the server
can send refs/tags/X with the packfile instead of sending all
refs/tags/X refs initially to the client.

[1] https://public-inbox.org/git/20180605175144.4225-1-bmw...@google.com/

Jonathan Tan (2):
  t5702: test fetch with multiple refspecs at a time
  fetch: send "refs/tags/" prefix upon CLI refspecs

 builtin/fetch.c|  2 +-
 t/t5702-protocol-v2.sh | 71 --
 2 files changed, 69 insertions(+), 4 deletions(-)

-- 
2.17.0.768.g1526ddbba1.dirty



[PATCH v2 1/2] t5702: test fetch with multiple refspecs at a time

2018-06-05 Thread Jonathan Tan
Extend the protocol v2 tests to also test fetches with multiple refspecs
specified. This also covers the previously uncovered cases of fetching
with prefix matching and fetching by SHA-1.

Signed-off-by: Jonathan Tan 
---
 t/t5702-protocol-v2.sh | 47 ++
 1 file changed, 47 insertions(+)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index a4fe6508b..261e82b0f 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -359,6 +359,53 @@ test_expect_success 'default refspec is used to filter ref 
when fetchcing' '
grep "ref-prefix refs/tags/" log
 '
 
+test_expect_success 'fetch supports various ways of have lines' '
+   rm -rf server client trace &&
+   git init server &&
+   test_commit -C server dwim &&
+   TREE=$(git -C server rev-parse HEAD^{tree}) &&
+   git -C server tag exact \
+   $(git -C server commit-tree -m a "$TREE") &&
+   git -C server tag dwim-unwanted \
+   $(git -C server commit-tree -m b "$TREE") &&
+   git -C server tag exact-unwanted \
+   $(git -C server commit-tree -m c "$TREE") &&
+   git -C server tag prefix1 \
+   $(git -C server commit-tree -m d "$TREE") &&
+   git -C server tag prefix2 \
+   $(git -C server commit-tree -m e "$TREE") &&
+   git -C server tag fetch-by-sha1 \
+   $(git -C server commit-tree -m f "$TREE") &&
+   git -C server tag completely-unrelated \
+   $(git -C server commit-tree -m g "$TREE") &&
+   
+   git init client &&
+   GIT_TRACE_PACKET="$(pwd)/trace" git -C client -c protocol.version=2 \
+   fetch "file://$(pwd)/server" \
+   dwim \
+   refs/tags/exact \
+   refs/tags/prefix*:refs/tags/prefix* \
+   "$(git -C server rev-parse fetch-by-sha1)" &&
+
+   # Ensure that the appropriate prefixes are sent (using a sample)
+   grep "fetch> ref-prefix dwim" trace &&
+   grep "fetch> ref-prefix refs/heads/dwim" trace &&
+   grep "fetch> ref-prefix refs/tags/prefix" trace &&
+
+   # Ensure that the correct objects are returned
+   git -C client cat-file -e $(git -C server rev-parse dwim) &&
+   git -C client cat-file -e $(git -C server rev-parse exact) &&
+   git -C client cat-file -e $(git -C server rev-parse prefix1) &&
+   git -C client cat-file -e $(git -C server rev-parse prefix2) &&
+   git -C client cat-file -e $(git -C server rev-parse fetch-by-sha1) &&
+   test_must_fail git -C client cat-file -e \
+   $(git -C server rev-parse dwim-unwanted) &&
+   test_must_fail git -C client cat-file -e \
+   $(git -C server rev-parse exact-unwanted) &&
+   test_must_fail git -C client cat-file -e \
+   $(git -C server rev-parse completely-unrelated)
+'
+
 # Test protocol v2 with 'http://' transport
 #
 . "$TEST_DIRECTORY"/lib-httpd.sh
-- 
2.17.0.768.g1526ddbba1.dirty



git question from a newbie

2018-06-05 Thread Heinz, Steve
Hi.

I am new to Git and have read quite a few articles on it.
I am planning on setting up a remote repository on a windows 2012 R2 server and 
will access it via HTTPS.
I am setting up a local repository on my desk top (others in my group will do 
the same).
On "server1":  I install Git and create a repository "repos".
On "server1":  I create a dummy webpage "default.htm" and place it in the repo 
folder.
On "server1":  I create a web application in IIS pointing to Git
On Server1":   change permissions so IIS_User  has access to the folders.
On "server1":  inside the "repos" folder and right click and choose "bash here"
On "server1":   $ git init  -bare(it's really 2 hyphens)

On Desktop:  open Chrome and type in URL to make sure we can access it
https://xyz/repos/default.htm
  ** make sure you have access, no certificate issues or firewall issues.  
The pages shows up fine

On Desktop:  install Git and create repository "repos".
On Desktop:  right click in "repos" folder and choose "bash here"
On Desktop:  $ git init
On Desktop : add a folder "testProject" under the "repos" folder and add some 
files to the folder
On Desktop:  $ git add . (will add files and folder to working 
tree)
On Desktop   $ git status   (shows it recognizes the filed were 
added)
On Desktop   $ git commit -m "test project commit"   (will stage 
changes)
On Desktop   $ git push https://xyz.domainname.com/repos master

** this is the error I get,  I have tried many different things.  I am sure I 
am doing something stupid
** I have tried a bunch of variations but I always get the same error.  It 
looks like some type of network/permission
** thing but everything seems OK.
   Fatal: repository 'https://xyz.domainname.com/repos/' not found

*** this is where I get the error trying to push staged items to the remote 
repository.
*** I have tried to clone the empty remote repository still same error
*** I checked port 443 is opened and being used for https
*** tried to set origin to https://xyz.domainname.com/repos; and then $git push 
origin master   (same error)
*** I tried passing credentials to the remote server as well


Any ideas would be greatly appreciated.
Thanks
Steve



The information contained in this email message is intended only for the 
private and confidential use of the recipient(s) named above, unless the sender 
expressly agrees otherwise. In no event shall AAA Northeast or any of its 
affiliates accept any responsibility for the loss, use or misuse of any 
information including confidential information, which is sent to AAA Northeast 
or its affiliates via email, or email attachment. AAA Northeast does not 
guarantee the accuracy of any email or email attachment. If the reader of this 
message is not the intended recipient and/or you have received this email in 
error, you must take no action based on the information in this email and you 
are hereby notified that any dissemination, misuse or copying or disclosure of 
this communication is strictly prohibited. If you have received this 
communication in error, please notify us immediately by email and delete the 
original message.


Re: [PATCH 2/2] fetch: send "refs/tags/" prefix upon CLI refspecs

2018-06-05 Thread Brandon Williams
On 06/05, Jonathan Tan wrote:
> When performing tag following, in addition to using the server's
> "include-tag" capability to send tag objects (and emulating it if the
> server does not support that capability), "git fetch" relies upon the
> presence of refs/tags/* entries in the initial ref advertisement to
> locally create refs pointing to the aforementioned tag objects. When
> using protocol v2, refs/tags/* entries in the initial ref advertisement
> may be suppressed by a ref-prefix argument, leading to the tag object
> being downloaded, but the ref not being created.
> 
> Commit dcc73cf7ff ("fetch: generate ref-prefixes when using a configured
> refspec", 2018-05-18) ensured that "refs/tags/" is always sent as a ref
> prefix when "git fetch" is invoked with no refspecs, but not when "git
> fetch" is invoked with refspecs. Extend that functionality to make it
> work in both situations.
> 
> Signed-off-by: Jonathan Tan 
> ---
>  builtin/fetch.c|  2 +-
>  t/t5702-protocol-v2.sh | 18 ++
>  2 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index ea5b9669a..1f447f1e8 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -359,7 +359,7 @@ static struct ref *get_ref_map(struct transport 
> *transport,
>   refspec_ref_prefixes(>remote->fetch, _prefixes);
>  
>   if (ref_prefixes.argc &&
> - (tags == TAGS_SET || (tags == TAGS_DEFAULT && !rs->nr))) {
> + (tags == TAGS_SET || tags == TAGS_DEFAULT)) {
>   argv_array_push(_prefixes, "refs/tags/");
>   }

This is difficult...Really I don't think the default should be to follow
tags.  Mostly because this defeats the purpose of ref filtering when a
user only requests the master branch.  Now instead of the server only
sending the master branch, you get the whole tags namespace as well.

>  
> diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
> index 261e82b0f..6733579c1 100755
> --- a/t/t5702-protocol-v2.sh
> +++ b/t/t5702-protocol-v2.sh
> @@ -406,6 +406,24 @@ test_expect_success 'fetch supports various ways of have 
> lines' '
>   $(git -C server rev-parse completely-unrelated)
>  '
>  
> +test_expect_success 'fetch supports include-tag and tag following' '
> + rm -rf server client trace &&
> + git init server &&
> +
> + test_commit -C server to_fetch &&
> + git -C server tag -a annotated_tag -m message &&
> +
> + git init client &&
> + GIT_TRACE_PACKET="$(pwd)/trace" git -C client -c protocol.version=2 \
> + fetch "$(pwd)/server" to_fetch:to_fetch &&
> +
> + grep "fetch> ref-prefix to_fetch" trace &&
> + grep "fetch> ref-prefix refs/tags/" trace &&
> + grep "fetch> include-tag" trace &&
> +
> + git -C client cat-file -e $(git -C client rev-parse annotated_tag)
> +'
> +
>  # Test protocol v2 with 'http://' transport
>  #
>  . "$TEST_DIRECTORY"/lib-httpd.sh
> -- 
> 2.17.0.768.g1526ddbba1.dirty
> 

-- 
Brandon Williams


Re: [PATCH 2/2] fetch: send "refs/tags/" prefix upon CLI refspecs

2018-06-05 Thread Brandon Williams
On 06/05, Jonathan Tan wrote:
> When performing tag following, in addition to using the server's
> "include-tag" capability to send tag objects (and emulating it if the
> server does not support that capability), "git fetch" relies upon the
> presence of refs/tags/* entries in the initial ref advertisement to
> locally create refs pointing to the aforementioned tag objects. When
> using protocol v2, refs/tags/* entries in the initial ref advertisement
> may be suppressed by a ref-prefix argument, leading to the tag object
> being downloaded, but the ref not being created.
> 
> Commit dcc73cf7ff ("fetch: generate ref-prefixes when using a configured
> refspec", 2018-05-18) ensured that "refs/tags/" is always sent as a ref
> prefix when "git fetch" is invoked with no refspecs, but not when "git
> fetch" is invoked with refspecs. Extend that functionality to make it
> work in both situations.
> 
> Signed-off-by: Jonathan Tan 

Test t5702-protocol-v2.sh doesn't pass with this patch.

> ---
>  builtin/fetch.c|  2 +-
>  t/t5702-protocol-v2.sh | 18 ++
>  2 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index ea5b9669a..1f447f1e8 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -359,7 +359,7 @@ static struct ref *get_ref_map(struct transport 
> *transport,
>   refspec_ref_prefixes(>remote->fetch, _prefixes);
>  
>   if (ref_prefixes.argc &&
> - (tags == TAGS_SET || (tags == TAGS_DEFAULT && !rs->nr))) {
> + (tags == TAGS_SET || tags == TAGS_DEFAULT)) {
>   argv_array_push(_prefixes, "refs/tags/");
>   }
>  
> diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
> index 261e82b0f..6733579c1 100755
> --- a/t/t5702-protocol-v2.sh
> +++ b/t/t5702-protocol-v2.sh
> @@ -406,6 +406,24 @@ test_expect_success 'fetch supports various ways of have 
> lines' '
>   $(git -C server rev-parse completely-unrelated)
>  '
>  
> +test_expect_success 'fetch supports include-tag and tag following' '
> + rm -rf server client trace &&
> + git init server &&
> +
> + test_commit -C server to_fetch &&
> + git -C server tag -a annotated_tag -m message &&
> +
> + git init client &&
> + GIT_TRACE_PACKET="$(pwd)/trace" git -C client -c protocol.version=2 \
> + fetch "$(pwd)/server" to_fetch:to_fetch &&
> +
> + grep "fetch> ref-prefix to_fetch" trace &&
> + grep "fetch> ref-prefix refs/tags/" trace &&
> + grep "fetch> include-tag" trace &&
> +
> + git -C client cat-file -e $(git -C client rev-parse annotated_tag)
> +'
> +
>  # Test protocol v2 with 'http://' transport
>  #
>  . "$TEST_DIRECTORY"/lib-httpd.sh
> -- 
> 2.17.0.768.g1526ddbba1.dirty
> 

-- 
Brandon Williams


[PATCH 0/2] Fix protocol v2 tag following with CLI refspec

2018-06-05 Thread Jonathan Tan
After the events that led up to Jonathan Nieder's patch fixing fetch by
SHA-1 in protocol v2 [1], while expanding protocol v2's test coverage, I
found a bug with tag following. Patch 2 is a patch that fixes that bug
(and patch 1 is a related but independent test that I had written
beforehand).

[1] 
https://public-inbox.org/git/20180531072339.ga43...@aiede.svl.corp.google.com/

Jonathan Tan (2):
  t5702: test fetch with multiple refspecs at a time
  fetch: send "refs/tags/" prefix upon CLI refspecs

 builtin/fetch.c|  2 +-
 t/t5702-protocol-v2.sh | 65 ++
 2 files changed, 66 insertions(+), 1 deletion(-)

-- 
2.17.0.768.g1526ddbba1.dirty



[PATCH 1/2] t5702: test fetch with multiple refspecs at a time

2018-06-05 Thread Jonathan Tan
Extend the protocol v2 tests to also test fetches with multiple refspecs
specified. This also covers the previously uncovered cases of fetching
with prefix matching and fetching by SHA-1.

Signed-off-by: Jonathan Tan 
---
 t/t5702-protocol-v2.sh | 47 ++
 1 file changed, 47 insertions(+)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index a4fe6508b..261e82b0f 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -359,6 +359,53 @@ test_expect_success 'default refspec is used to filter ref 
when fetchcing' '
grep "ref-prefix refs/tags/" log
 '
 
+test_expect_success 'fetch supports various ways of have lines' '
+   rm -rf server client trace &&
+   git init server &&
+   test_commit -C server dwim &&
+   TREE=$(git -C server rev-parse HEAD^{tree}) &&
+   git -C server tag exact \
+   $(git -C server commit-tree -m a "$TREE") &&
+   git -C server tag dwim-unwanted \
+   $(git -C server commit-tree -m b "$TREE") &&
+   git -C server tag exact-unwanted \
+   $(git -C server commit-tree -m c "$TREE") &&
+   git -C server tag prefix1 \
+   $(git -C server commit-tree -m d "$TREE") &&
+   git -C server tag prefix2 \
+   $(git -C server commit-tree -m e "$TREE") &&
+   git -C server tag fetch-by-sha1 \
+   $(git -C server commit-tree -m f "$TREE") &&
+   git -C server tag completely-unrelated \
+   $(git -C server commit-tree -m g "$TREE") &&
+   
+   git init client &&
+   GIT_TRACE_PACKET="$(pwd)/trace" git -C client -c protocol.version=2 \
+   fetch "file://$(pwd)/server" \
+   dwim \
+   refs/tags/exact \
+   refs/tags/prefix*:refs/tags/prefix* \
+   "$(git -C server rev-parse fetch-by-sha1)" &&
+
+   # Ensure that the appropriate prefixes are sent (using a sample)
+   grep "fetch> ref-prefix dwim" trace &&
+   grep "fetch> ref-prefix refs/heads/dwim" trace &&
+   grep "fetch> ref-prefix refs/tags/prefix" trace &&
+
+   # Ensure that the correct objects are returned
+   git -C client cat-file -e $(git -C server rev-parse dwim) &&
+   git -C client cat-file -e $(git -C server rev-parse exact) &&
+   git -C client cat-file -e $(git -C server rev-parse prefix1) &&
+   git -C client cat-file -e $(git -C server rev-parse prefix2) &&
+   git -C client cat-file -e $(git -C server rev-parse fetch-by-sha1) &&
+   test_must_fail git -C client cat-file -e \
+   $(git -C server rev-parse dwim-unwanted) &&
+   test_must_fail git -C client cat-file -e \
+   $(git -C server rev-parse exact-unwanted) &&
+   test_must_fail git -C client cat-file -e \
+   $(git -C server rev-parse completely-unrelated)
+'
+
 # Test protocol v2 with 'http://' transport
 #
 . "$TEST_DIRECTORY"/lib-httpd.sh
-- 
2.17.0.768.g1526ddbba1.dirty



[PATCH 2/2] fetch: send "refs/tags/" prefix upon CLI refspecs

2018-06-05 Thread Jonathan Tan
When performing tag following, in addition to using the server's
"include-tag" capability to send tag objects (and emulating it if the
server does not support that capability), "git fetch" relies upon the
presence of refs/tags/* entries in the initial ref advertisement to
locally create refs pointing to the aforementioned tag objects. When
using protocol v2, refs/tags/* entries in the initial ref advertisement
may be suppressed by a ref-prefix argument, leading to the tag object
being downloaded, but the ref not being created.

Commit dcc73cf7ff ("fetch: generate ref-prefixes when using a configured
refspec", 2018-05-18) ensured that "refs/tags/" is always sent as a ref
prefix when "git fetch" is invoked with no refspecs, but not when "git
fetch" is invoked with refspecs. Extend that functionality to make it
work in both situations.

Signed-off-by: Jonathan Tan 
---
 builtin/fetch.c|  2 +-
 t/t5702-protocol-v2.sh | 18 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669a..1f447f1e8 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -359,7 +359,7 @@ static struct ref *get_ref_map(struct transport *transport,
refspec_ref_prefixes(>remote->fetch, _prefixes);
 
if (ref_prefixes.argc &&
-   (tags == TAGS_SET || (tags == TAGS_DEFAULT && !rs->nr))) {
+   (tags == TAGS_SET || tags == TAGS_DEFAULT)) {
argv_array_push(_prefixes, "refs/tags/");
}
 
diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 261e82b0f..6733579c1 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -406,6 +406,24 @@ test_expect_success 'fetch supports various ways of have 
lines' '
$(git -C server rev-parse completely-unrelated)
 '
 
+test_expect_success 'fetch supports include-tag and tag following' '
+   rm -rf server client trace &&
+   git init server &&
+
+   test_commit -C server to_fetch &&
+   git -C server tag -a annotated_tag -m message &&
+
+   git init client &&
+   GIT_TRACE_PACKET="$(pwd)/trace" git -C client -c protocol.version=2 \
+   fetch "$(pwd)/server" to_fetch:to_fetch &&
+
+   grep "fetch> ref-prefix to_fetch" trace &&
+   grep "fetch> ref-prefix refs/tags/" trace &&
+   grep "fetch> include-tag" trace &&
+
+   git -C client cat-file -e $(git -C client rev-parse annotated_tag)
+'
+
 # Test protocol v2 with 'http://' transport
 #
 . "$TEST_DIRECTORY"/lib-httpd.sh
-- 
2.17.0.768.g1526ddbba1.dirty



[PATCH v2 08/10] rerere: factor out handle_conflict function

2018-06-05 Thread Thomas Gummerer
Factor out the handle_conflict function, which handles a single
conflict in a path.  This is a preparation for the next step, where
this function will be re-used.  No functional changes intended.

Signed-off-by: Thomas Gummerer 
---
 rerere.c | 143 +--
 1 file changed, 65 insertions(+), 78 deletions(-)

diff --git a/rerere.c b/rerere.c
index da3744b86b..fac90663b0 100644
--- a/rerere.c
+++ b/rerere.c
@@ -302,38 +302,6 @@ static void rerere_io_putstr(const char *str, struct 
rerere_io *io)
ferr_puts(str, io->output, >wrerror);
 }
 
-/*
- * Write a conflict marker to io->output (if defined).
- */
-static void rerere_io_putconflict(int ch, int size, struct rerere_io *io)
-{
-   char buf[64];
-
-   while (size) {
-   if (size <= sizeof(buf) - 2) {
-   memset(buf, ch, size);
-   buf[size] = '\n';
-   buf[size + 1] = '\0';
-   size = 0;
-   } else {
-   int sz = sizeof(buf) - 1;
-
-   /*
-* Make sure we will not write everything out
-* in this round by leaving at least 1 byte
-* for the next round, giving the next round
-* a chance to add the terminating LF.  Yuck.
-*/
-   if (size <= sz)
-   sz -= (sz - size) + 1;
-   memset(buf, ch, sz);
-   buf[sz] = '\0';
-   size -= sz;
-   }
-   rerere_io_putstr(buf, io);
-   }
-}
-
 static void rerere_io_putmem(const char *mem, size_t sz, struct rerere_io *io)
 {
if (io->output)
@@ -384,37 +352,25 @@ static int is_cmarker(char *buf, int marker_char, int 
marker_size)
return isspace(*buf);
 }
 
-/*
- * Read contents a file with conflicts, normalize the conflicts
- * by (1) discarding the common ancestor version in diff3-style,
- * (2) reordering our side and their side so that whichever sorts
- * alphabetically earlier comes before the other one, while
- * computing the "conflict ID", which is just an SHA-1 hash of
- * one side of the conflict, NUL, the other side of the conflict,
- * and NUL concatenated together.
- *
- * Return 1 if conflict hunks are found, 0 if there are no conflict
- * hunks and -1 if an error occured.
- */
-static int handle_path(unsigned char *sha1, struct rerere_io *io, int 
marker_size)
+static void rerere_strbuf_putconflict(struct strbuf *buf, int ch, size_t size)
+{
+   strbuf_addchars(buf, ch, size);
+   strbuf_addch(buf, '\n');
+}
+
+static int handle_conflict(struct strbuf *out, struct rerere_io *io,
+  int marker_size, git_SHA_CTX *ctx)
 {
-   git_SHA_CTX ctx;
-   int has_conflicts = 0;
enum {
-   RR_CONTEXT = 0, RR_SIDE_1, RR_SIDE_2, RR_ORIGINAL
-   } hunk = RR_CONTEXT;
+   RR_SIDE_1 = 0, RR_SIDE_2, RR_ORIGINAL
+   } hunk = RR_SIDE_1;
struct strbuf one = STRBUF_INIT, two = STRBUF_INIT;
struct strbuf buf = STRBUF_INIT;
-
-   if (sha1)
-   git_SHA1_Init();
-
+   int has_conflicts = 1;
while (!io->getline(, io)) {
-   if (is_cmarker(buf.buf, '<', marker_size)) {
-   if (hunk != RR_CONTEXT)
-   goto bad;
-   hunk = RR_SIDE_1;
-   } else if (is_cmarker(buf.buf, '|', marker_size)) {
+   if (is_cmarker(buf.buf, '<', marker_size))
+   goto bad;
+   else if (is_cmarker(buf.buf, '|', marker_size)) {
if (hunk != RR_SIDE_1)
goto bad;
hunk = RR_ORIGINAL;
@@ -427,42 +383,73 @@ static int handle_path(unsigned char *sha1, struct 
rerere_io *io, int marker_siz
goto bad;
if (strbuf_cmp(, ) > 0)
strbuf_swap(, );
-   has_conflicts = 1;
-   hunk = RR_CONTEXT;
-   rerere_io_putconflict('<', marker_size, io);
-   rerere_io_putmem(one.buf, one.len, io);
-   rerere_io_putconflict('=', marker_size, io);
-   rerere_io_putmem(two.buf, two.len, io);
-   rerere_io_putconflict('>', marker_size, io);
-   if (sha1) {
-   git_SHA1_Update(, one.buf ? one.buf : "",
+   rerere_strbuf_putconflict(out, '<', marker_size);
+   strbuf_addbuf(out, );
+   rerere_strbuf_putconflict(out, '=', marker_size);
+   strbuf_addbuf(out, );
+   rerere_strbuf_putconflict(out, '>', marker_size);
+   

[PATCH v2 02/10] rerere: lowercase error messages

2018-06-05 Thread Thomas Gummerer
Documentation/CodingGuidelines mentions that error messages should be
lowercase.  Prior to marking them for translation follow that pattern
in rerere as well.

Signed-off-by: Thomas Gummerer 
---
 rerere.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/rerere.c b/rerere.c
index 4b4869662d..eca182023f 100644
--- a/rerere.c
+++ b/rerere.c
@@ -484,12 +484,12 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
io.input = fopen(path, "r");
io.io.wrerror = 0;
if (!io.input)
-   return error_errno("Could not open %s", path);
+   return error_errno("could not open %s", path);
 
if (output) {
io.io.output = fopen(output, "w");
if (!io.io.output) {
-   error_errno("Could not write %s", output);
+   error_errno("could not write %s", output);
fclose(io.input);
return -1;
}
@@ -499,15 +499,15 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
 
fclose(io.input);
if (io.io.wrerror)
-   error("There were errors while writing %s (%s)",
+   error("there were errors while writing %s (%s)",
  path, strerror(io.io.wrerror));
if (io.io.output && fclose(io.io.output))
-   io.io.wrerror = error_errno("Failed to flush %s", path);
+   io.io.wrerror = error_errno("failed to flush %s", path);
 
if (hunk_no < 0) {
if (output)
unlink_or_warn(output);
-   return error("Could not parse conflict hunks in %s", path);
+   return error("could not parse conflict hunks in %s", path);
}
if (io.io.wrerror)
return -1;
@@ -690,11 +690,11 @@ static int merge(const struct rerere_id *id, const char 
*path)
/* Update "path" with the resolution */
f = fopen(path, "w");
if (!f)
-   return error_errno("Could not open %s", path);
+   return error_errno("could not open %s", path);
if (fwrite(result.ptr, result.size, 1, f) != 1)
-   error_errno("Could not write %s", path);
+   error_errno("could not write %s", path);
if (fclose(f))
-   return error_errno("Writing %s failed", path);
+   return error_errno("writing %s failed", path);
 
 out:
free(cur.ptr);
@@ -721,7 +721,7 @@ static void update_paths(struct string_list *update)
 
if (write_locked_index(_index, _lock,
   COMMIT_LOCK | SKIP_IF_UNCHANGED))
-   die("Unable to write new index file");
+   die("unable to write new index file");
 }
 
 static void remove_variant(struct rerere_id *id)
@@ -879,7 +879,7 @@ static int is_rerere_enabled(void)
return rr_cache_exists;
 
if (!rr_cache_exists && mkdir_in_gitdir(git_path_rr_cache()))
-   die("Could not create directory %s", git_path_rr_cache());
+   die("could not create directory %s", git_path_rr_cache());
return 1;
 }
 
@@ -1032,7 +1032,7 @@ static int rerere_forget_one_path(const char *path, 
struct string_list *rr)
 */
ret = handle_cache(path, sha1, NULL);
if (ret < 1)
-   return error("Could not parse conflict hunks in '%s'", path);
+   return error("could not parse conflict hunks in '%s'", path);
 
/* Nuke the recorded resolution for the conflict */
id = new_rerere_id(sha1);
-- 
2.17.0.410.g65aef3a6c4



[PATCH v2 06/10] rerere: fix crash when conflict goes unresolved

2018-06-05 Thread Thomas Gummerer
Currently when a user doesn't resolve a conflict in a file, but
commits the file with the conflict markers, and later the file ends up
in a state in which rerere can't handle it, subsequent rerere
operations that are interested in that path, such as 'rerere clear' or
'rerere forget ' will fail, or even worse in the case of 'rerere
clear' segfault.

Such states include nested conflicts, or an extra conflict marker that
doesn't have any match.

This is because the first 'git rerere' when there was only one
conflict in the file leaves an entry in the MERGE_RR file behind.  The
next 'git rerere' will then pick the rerere ID for that file up, and
not assign a new ID as it can't successfully calculate one.  It will
however still try to do the rerere operation, because of the existing
ID.  As the handle_file function fails, it will remove the 'preimage'
for the ID in the process, while leaving the ID in the MERGE_RR file.

Now when 'rerere clear' for example is run, it will segfault in
'has_rerere_resolution', because status is NULL.

To fix this, remove the rerere ID from the MERGE_RR file in the case
when we can't handle it, and remove the corresponding variant from
.git/rr-cache/.  Removing it unconditionally is fine here, because if
the user would have resolved the conflict and ran rerere, the entry
would no longer be in the MERGE_RR file, so we wouldn't have this
problem in the first place, while if the conflict was not resolved,
the only thing that's left in the folder is the 'preimage', which by
itself will be regenerated by git if necessary, so the user won't
loose any work.

Note that other variants that have the same conflict ID will not be
touched.

Signed-off-by: Thomas Gummerer 
---
 rerere.c  | 12 +++-
 t/t4200-rerere.sh | 22 ++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/rerere.c b/rerere.c
index ef23abe4dd..220020187b 100644
--- a/rerere.c
+++ b/rerere.c
@@ -824,10 +824,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
struct rerere_id *id;
unsigned char sha1[20];
const char *path = conflict.items[i].string;
-   int ret;
-
-   if (string_list_has_string(rr, path))
-   continue;
+   int ret, has_string;
 
/*
 * Ask handle_file() to scan and assign a
@@ -835,7 +832,12 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 * yet.
 */
ret = handle_file(path, sha1, NULL);
-   if (ret < 1)
+   has_string = string_list_has_string(rr, path);
+   if (ret < 0 && has_string) {
+   remove_variant(string_list_lookup(rr, path)->util);
+   string_list_remove(rr, path, 1);
+   }
+   if (ret < 1 || has_string)
continue;
 
id = new_rerere_id(sha1);
diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh
index eaf18c81cb..5ce411b70d 100755
--- a/t/t4200-rerere.sh
+++ b/t/t4200-rerere.sh
@@ -580,4 +580,26 @@ test_expect_success 'multiple identical conflicts' '
count_pre_post 0 0
 '
 
+test_expect_success 'rerere with unexpected conflict markers does not crash' '
+   git reset --hard &&
+
+   git checkout -b branch-1 master &&
+   echo "bar" >test &&
+   git add test &&
+   git commit -q -m two &&
+
+   git reset --hard &&
+   git checkout -b branch-2 master &&
+   echo "foo" >test &&
+   git add test &&
+   git commit -q -a -m one &&
+
+   test_must_fail git merge branch-1 &&
+   sed "s/bar/>>> a/" >test.tmp 

[PATCH v2 04/10] rerere: mark strings for translation

2018-06-05 Thread Thomas Gummerer
'git rerere' is considered a plumbing command and as such its output
should be translated.  Its functionality is also only enabled through
a config setting, so scripts really shouldn't rely on its output
either way.

Signed-off-by: Thomas Gummerer 
---
 builtin/rerere.c |  4 +--
 rerere.c | 68 
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/builtin/rerere.c b/builtin/rerere.c
index e0c67c98e9..5ed941b91f 100644
--- a/builtin/rerere.c
+++ b/builtin/rerere.c
@@ -75,7 +75,7 @@ int cmd_rerere(int argc, const char **argv, const char 
*prefix)
if (!strcmp(argv[0], "forget")) {
struct pathspec pathspec;
if (argc < 2)
-   warning("'git rerere forget' without paths is 
deprecated");
+   warning(_("'git rerere forget' without paths is 
deprecated"));
parse_pathspec(, 0, PATHSPEC_PREFER_CWD,
   prefix, argv + 1);
return rerere_forget();
@@ -107,7 +107,7 @@ int cmd_rerere(int argc, const char **argv, const char 
*prefix)
const char *path = merge_rr.items[i].string;
const struct rerere_id *id = merge_rr.items[i].util;
if (diff_two(rerere_path(id, "preimage"), path, path, 
path))
-   die("unable to generate diff for '%s'", 
rerere_path(id, NULL));
+   die(_("unable to generate diff for '%s'"), 
rerere_path(id, NULL));
}
} else
usage_with_options(rerere_usage, options);
diff --git a/rerere.c b/rerere.c
index 0e5956a51c..74ce422634 100644
--- a/rerere.c
+++ b/rerere.c
@@ -212,7 +212,7 @@ static void read_rr(struct string_list *rr)
 
/* There has to be the hash, tab, path and then NUL */
if (buf.len < 42 || get_sha1_hex(buf.buf, sha1))
-   die("corrupt MERGE_RR");
+   die(_("corrupt MERGE_RR"));
 
if (buf.buf[40] != '.') {
variant = 0;
@@ -221,10 +221,10 @@ static void read_rr(struct string_list *rr)
errno = 0;
variant = strtol(buf.buf + 41, , 10);
if (errno)
-   die("corrupt MERGE_RR");
+   die(_("corrupt MERGE_RR"));
}
if (*(path++) != '\t')
-   die("corrupt MERGE_RR");
+   die(_("corrupt MERGE_RR"));
buf.buf[40] = '\0';
id = new_rerere_id_hex(buf.buf);
id->variant = variant;
@@ -259,12 +259,12 @@ static int write_rr(struct string_list *rr, int out_fd)
rr->items[i].string, 0);
 
if (write_in_full(out_fd, buf.buf, buf.len) < 0)
-   die("unable to write rerere record");
+   die(_("unable to write rerere record"));
 
strbuf_release();
}
if (commit_lock_file(_lock) != 0)
-   die("unable to write rerere record");
+   die(_("unable to write rerere record"));
return 0;
 }
 
@@ -484,12 +484,12 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
io.input = fopen(path, "r");
io.io.wrerror = 0;
if (!io.input)
-   return error_errno("could not open '%s'", path);
+   return error_errno(_("could not open '%s'"), path);
 
if (output) {
io.io.output = fopen(output, "w");
if (!io.io.output) {
-   error_errno("could not write '%s'", output);
+   error_errno(_("could not write '%s'"), output);
fclose(io.input);
return -1;
}
@@ -499,15 +499,15 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
 
fclose(io.input);
if (io.io.wrerror)
-   error("there were errors while writing '%s' (%s)",
+   error(_("there were errors while writing '%s' (%s)"),
  path, strerror(io.io.wrerror));
if (io.io.output && fclose(io.io.output))
-   io.io.wrerror = error_errno("failed to flush '%s'", path);
+   io.io.wrerror = error_errno(_("failed to flush '%s'"), path);
 
if (hunk_no < 0) {
if (output)
unlink_or_warn(output);
-   return error("could not parse conflict hunks in '%s'", path);
+   return error(_("could not parse conflict hunks in '%s'"), path);
}
if (io.io.wrerror)
return -1;
@@ -568,7 +568,7 @@ static int find_conflict(struct string_list *conflict)
 {
int i;
if (read_cache() < 0)
-   return 

[PATCH v2 10/10] rerere: recalculate conflict ID when unresolved conflict is committed

2018-06-05 Thread Thomas Gummerer
Currently when a user doesn't resolve a conflict, commits the results,
and does an operation which creates another conflict, rerere will use
the ID of the previously unresolved conflict for the new conflict.
This is because the conflict is kept in the MERGE_RR file, which
'rerere' reads every time it is invoked.

After the new conflict is solved, rerere will record the resolution
with the ID of the old conflict.  So in order to replay the conflict,
both merges would have to be re-done, instead of just the last one, in
order for rerere to be able to automatically resolve the conflict.

Instead of that, assign a new conflict ID if there are still conflicts
in a file and the file had conflicts at a previous step.  This ID
matches the conflict we actually resolved at the corresponding step.

Note that there are no backwards compatibility worries here, as rerere
would have failed to even normalize the conflict before this patch
series.

Signed-off-by: Thomas Gummerer 
---
 rerere.c  | 7 +++
 t/t4200-rerere.sh | 7 +++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/rerere.c b/rerere.c
index f611db7873..644f185180 100644
--- a/rerere.c
+++ b/rerere.c
@@ -818,7 +818,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
struct rerere_id *id;
unsigned char sha1[20];
const char *path = conflict.items[i].string;
-   int ret, has_string;
+   int ret;
 
/*
 * Ask handle_file() to scan and assign a
@@ -826,12 +826,11 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 * yet.
 */
ret = handle_file(path, sha1, NULL);
-   has_string = string_list_has_string(rr, path);
-   if (ret < 0 && has_string) {
+   if (ret != 0 && string_list_has_string(rr, path)) {
remove_variant(string_list_lookup(rr, path)->util);
string_list_remove(rr, path, 1);
}
-   if (ret < 1 || has_string)
+   if (ret < 1)
continue;
 
id = new_rerere_id(sha1);
diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh
index f433848ccb..9578215ff2 100755
--- a/t/t4200-rerere.sh
+++ b/t/t4200-rerere.sh
@@ -636,6 +636,13 @@ test_expect_success 'rerere with inner conflict markers' '
git commit -q -m "will solve conflicts later" &&
test_must_fail git merge A &&
cat test >actual &&
+   test_cmp expect actual &&
+
+   git add test &&
+   git commit -m "rerere solved conflict" &&
+   git reset --hard HEAD~ &&
+   test_must_fail git merge A &&
+   cat test >actual &&
test_cmp expect actual
 '
 
-- 
2.17.0.410.g65aef3a6c4



[PATCH v2 07/10] rerere: only return whether a path has conflicts or not

2018-06-05 Thread Thomas Gummerer
We currently return the exact number of conflict hunks a certain path
has from the 'handle_paths' function.  However all of its callers only
care whether there are conflicts or not or if there is an error.
Return only that information, and document that only that information
is returned.  This will simplify the code in the subsequent steps.

Signed-off-by: Thomas Gummerer 
---
 rerere.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/rerere.c b/rerere.c
index 220020187b..da3744b86b 100644
--- a/rerere.c
+++ b/rerere.c
@@ -393,12 +393,13 @@ static int is_cmarker(char *buf, int marker_char, int 
marker_size)
  * one side of the conflict, NUL, the other side of the conflict,
  * and NUL concatenated together.
  *
- * Return the number of conflict hunks found.
+ * Return 1 if conflict hunks are found, 0 if there are no conflict
+ * hunks and -1 if an error occured.
  */
 static int handle_path(unsigned char *sha1, struct rerere_io *io, int 
marker_size)
 {
git_SHA_CTX ctx;
-   int hunk_no = 0;
+   int has_conflicts = 0;
enum {
RR_CONTEXT = 0, RR_SIDE_1, RR_SIDE_2, RR_ORIGINAL
} hunk = RR_CONTEXT;
@@ -426,7 +427,7 @@ static int handle_path(unsigned char *sha1, struct 
rerere_io *io, int marker_siz
goto bad;
if (strbuf_cmp(, ) > 0)
strbuf_swap(, );
-   hunk_no++;
+   has_conflicts = 1;
hunk = RR_CONTEXT;
rerere_io_putconflict('<', marker_size, io);
rerere_io_putmem(one.buf, one.len, io);
@@ -462,7 +463,7 @@ static int handle_path(unsigned char *sha1, struct 
rerere_io *io, int marker_siz
git_SHA1_Final(sha1, );
if (hunk != RR_CONTEXT)
return -1;
-   return hunk_no;
+   return has_conflicts;
 }
 
 /*
@@ -471,7 +472,7 @@ static int handle_path(unsigned char *sha1, struct 
rerere_io *io, int marker_siz
  */
 static int handle_file(const char *path, unsigned char *sha1, const char 
*output)
 {
-   int hunk_no = 0;
+   int has_conflicts = 0;
struct rerere_io_file io;
int marker_size = ll_merge_marker_size(path);
 
@@ -491,7 +492,7 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
}
}
 
-   hunk_no = handle_path(sha1, (struct rerere_io *), marker_size);
+   has_conflicts = handle_path(sha1, (struct rerere_io *), marker_size);
 
fclose(io.input);
if (io.io.wrerror)
@@ -500,14 +501,14 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
if (io.io.output && fclose(io.io.output))
io.io.wrerror = error_errno(_("failed to flush '%s'"), path);
 
-   if (hunk_no < 0) {
+   if (has_conflicts < 0) {
if (output)
unlink_or_warn(output);
return error(_("could not parse conflict hunks in '%s'"), path);
}
if (io.io.wrerror)
return -1;
-   return hunk_no;
+   return has_conflicts;
 }
 
 /*
@@ -955,7 +956,7 @@ static int handle_cache(const char *path, unsigned char 
*sha1, const char *outpu
mmfile_t mmfile[3] = {{NULL}};
mmbuffer_t result = {NULL, 0};
const struct cache_entry *ce;
-   int pos, len, i, hunk_no;
+   int pos, len, i, has_conflicts;
struct rerere_io_mem io;
int marker_size = ll_merge_marker_size(path);
 
@@ -1009,11 +1010,11 @@ static int handle_cache(const char *path, unsigned char 
*sha1, const char *outpu
 * Grab the conflict ID and optionally write the original
 * contents with conflict markers out.
 */
-   hunk_no = handle_path(sha1, (struct rerere_io *), marker_size);
+   has_conflicts = handle_path(sha1, (struct rerere_io *), marker_size);
strbuf_release();
if (io.io.output)
fclose(io.io.output);
-   return hunk_no;
+   return has_conflicts;
 }
 
 static int rerere_forget_one_path(const char *path, struct string_list *rr)
-- 
2.17.0.410.g65aef3a6c4



[PATCH v2 00/10] rerere: handle nested conflicts

2018-06-05 Thread Thomas Gummerer
The previous round was at
<20180520211210.1248-1-t.gumme...@gmail.com>.

Thanks Junio for the comments on the previous round.

Changes since v2:
 - lowercase the first letter in some error/warning messages before
   marking them for translation
 - wrap paths in output in single quotes, for consistency, and to make
   some of the messages the same as ones that are already translated
 - mark messages in builtin/rerere.c for translation as well, which I
   had previously forgotten.
 - expanded the technical documentation on rerere.  The entire
   document is basically rewritten.
 - changed the test in 6/10 to just fake a conflict marker inside of
   one of the hunks instead of using an inner conflict created by a
   merge.  This is to make sure the codepath is still hit after we
   handle inner conflicts properly.
 - added tests for handling inner conflict markers
 - added one commit to recalculate the conflict ID when an unresolved
   conflict is committed, and the subsequent operation conflicts again
   in the same file.  More explanation in the commit message of that
   commit.

range-diff below.  A few commits changed enough for range-diff
to give up showing the differences in those, they are probably best
reviewed as the whole patch anyway:

1:  901b638400 ! 1:  2825342cc2 rerere: unify error message when read_cache 
fails
@@ -1,6 +1,6 @@
 Author: Thomas Gummerer 
 
-rerere: unify error message when read_cache fails
+rerere: unify error messages when read_cache fails
 
 We have multiple different variants of the error message we show to
 the user if 'read_cache' fails.  The "Could not read index" variant we
-:  -- > 2:  d1500028aa rerere: lowercase error messages
-:  -- > 3:  ed3601ee71 rerere: wrap paths in output in sq
2:  c48ffededd ! 4:  6ead84a199 rerere: mark strings for translation
@@ -9,6 +9,28 @@
 
 Signed-off-by: Thomas Gummerer 
 
+diff --git a/builtin/rerere.c b/builtin/rerere.c
+--- a/builtin/rerere.c
 b/builtin/rerere.c
+@@
+   if (!strcmp(argv[0], "forget")) {
+   struct pathspec pathspec;
+   if (argc < 2)
+-  warning("'git rerere forget' without paths is 
deprecated");
++  warning(_("'git rerere forget' without paths is 
deprecated"));
+   parse_pathspec(, 0, PATHSPEC_PREFER_CWD,
+  prefix, argv + 1);
+   return rerere_forget();
+@@
+   const char *path = merge_rr.items[i].string;
+   const struct rerere_id *id = merge_rr.items[i].util;
+   if (diff_two(rerere_path(id, "preimage"), path, path, 
path))
+-  die("unable to generate diff for '%s'", 
rerere_path(id, NULL));
++  die(_("unable to generate diff for '%s'"), 
rerere_path(id, NULL));
+   }
+   } else
+   usage_with_options(rerere_usage, options);
+
 diff --git a/rerere.c b/rerere.c
 --- a/rerere.c
 +++ b/rerere.c
@@ -53,14 +75,14 @@
io.input = fopen(path, "r");
io.io.wrerror = 0;
if (!io.input)
--  return error_errno("Could not open %s", path);
-+  return error_errno(_("Could not open %s"), path);
+-  return error_errno("could not open '%s'", path);
++  return error_errno(_("could not open '%s'"), path);
  
if (output) {
io.io.output = fopen(output, "w");
if (!io.io.output) {
--  error_errno("Could not write %s", output);
-+  error_errno(_("Could not write %s"), output);
+-  error_errno("could not write '%s'", output);
++  error_errno(_("could not write '%s'"), output);
fclose(io.input);
return -1;
}
@@ -68,18 +90,18 @@
  
fclose(io.input);
if (io.io.wrerror)
--  error("There were errors while writing %s (%s)",
-+  error(_("There were errors while writing %s (%s)"),
+-  error("there were errors while writing '%s' (%s)",
++  error(_("there were errors while writing '%s' (%s)"),
  path, strerror(io.io.wrerror));
if (io.io.output && fclose(io.io.output))
--  io.io.wrerror = error_errno("Failed to flush %s", path);
-+  io.io.wrerror = error_errno(_("Failed to flush %s"), path);
+-  io.io.wrerror = error_errno("failed to flush '%s'", path);
++  io.io.wrerror = error_errno(_("failed to flush '%s'"), path);
  
if (hunk_no < 0) {
if (output)
unlink_or_warn(output);
--  return error("Could not parse conflict hunks in %s", path);
-+  return error(_("Could not parse conflict hunks 

[PATCH v2 09/10] rerere: teach rerere to handle nested conflicts

2018-06-05 Thread Thomas Gummerer
Currently rerere can't handle nested conflicts and will error out when
it encounters such conflicts.  Do that by recursively calling the
'handle_conflict' function to normalize the conflict.

The conflict ID calculation here deserves some explanation:

As we are using the same handle_conflict function, the nested conflict
is normalized the same way as for non-nested conflicts, which means
the ancestor in the diff3 case is stripped out, and the parts of the
conflict are ordered alphabetically.

The conflict ID is however is only calculated in the top level
handle_conflict call, so it will include the markers that 'rerere'
adds to the output.  e.g. say there's the following conflict:

<<< HEAD
1
===
<<< HEAD
3
===
2
>>> branch-2
>>> branch-3~

it would be reordered as follows in the preimage:

<<<
1
===
<<<
2
===
3
>>>
>>>

and the conflict ID would be calculated as
sha1(1<<<
2
===
3
>>>)

Stripping out vs. leaving the conflict markers in place should have no
practical impact, but it simplifies the implementation.

Signed-off-by: Thomas Gummerer 
---

I couldn't actually get the conflict markers the right way just using
merge-recursive.  But I think that would be fixed either way by
d694a17986 ("ll-merge: use a longer conflict marker for internal
merge", 2016-04-14), if I read that correctly.

Either way I still think this can be an improvement for when the user
commits merge conflicts (even though they shouldn't do that in the
first place), and for possible other edge cases that I'm not able to
produce right now, but I may just not be creative enough for those.

 Documentation/technical/rerere.txt | 40 ++
 rerere.c   | 14 ---
 t/t4200-rerere.sh  | 38 
 3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/Documentation/technical/rerere.txt 
b/Documentation/technical/rerere.txt
index 2c517fe0fc..7077ab4a08 100644
--- a/Documentation/technical/rerere.txt
+++ b/Documentation/technical/rerere.txt
@@ -140,3 +140,43 @@ SHA1('BC').
 If there are multiple conflicts in one file, the sha1 is calculated
 the same way with all hunks appended to each other, in the order in
 which they appear in the file, separated by a  character.
+
+Nested conflicts
+
+
+Nested conflicts are handled very similarly to "simple" conflicts.
+Same as before, labels on conflict markers and diff3 output is
+stripped, and the conflict hunks are sorted, for both the outer and
+the inner conflict.
+
+The only difference is in how the conflict ID is calculated.  For the
+inner conflict, the conflict markers themselves are not stripped out
+before calculating the sha1.
+
+Say we have the following conflict for example:
+
+<<< HEAD
+1
+===
+<<< HEAD
+3
+===
+2
+>>> branch-2
+>>> branch-3~
+
+After stripping out the labels of the conflict markers, the conflict
+would look as follows:
+
+<<<
+1
+===
+<<<
+3
+===
+2
+>>>
+>>>
+
+and finally the conflict ID would be calculated as:
+`sha1('1<<<\n3\n===\n2\n>>>')`
diff --git a/rerere.c b/rerere.c
index fac90663b0..f611db7873 100644
--- a/rerere.c
+++ b/rerere.c
@@ -365,12 +365,18 @@ static int handle_conflict(struct strbuf *out, struct 
rerere_io *io,
RR_SIDE_1 = 0, RR_SIDE_2, RR_ORIGINAL
} hunk = RR_SIDE_1;
struct strbuf one = STRBUF_INIT, two = STRBUF_INIT;
-   struct strbuf buf = STRBUF_INIT;
+   struct strbuf buf = STRBUF_INIT, conflict = STRBUF_INIT;
int has_conflicts = 1;
while (!io->getline(, io)) {
-   if (is_cmarker(buf.buf, '<', marker_size))
-   goto bad;
-   else if (is_cmarker(buf.buf, '|', marker_size)) {
+   if (is_cmarker(buf.buf, '<', marker_size)) {
+   if (handle_conflict(, io, marker_size, NULL) < 
0)
+   goto bad;
+   if (hunk == RR_SIDE_1)
+   strbuf_addbuf(, );
+   else
+   strbuf_addbuf(, );
+   strbuf_release();
+   } else if (is_cmarker(buf.buf, '|', marker_size)) {
if (hunk != RR_SIDE_1)
goto bad;
hunk = RR_ORIGINAL;
diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh
index 5ce411b70d..f433848ccb 100755
--- a/t/t4200-rerere.sh
+++ b/t/t4200-rerere.sh
@@ -602,4 +602,42 @@ test_expect_success 'rerere with unexpected conflict 
markers does not crash' '
git rerere clear
 '
 
+test_expect_success 'rerere with inner conflict markers' '
+   git reset --hard &&
+
+   git checkout -b A master &&
+   

[PATCH v2 03/10] rerere: wrap paths in output in sq

2018-06-05 Thread Thomas Gummerer
It looks like most paths in the output in the git codebase are wrapped
in single quotes.  Standardize on that in rerere as well.

Apart from being more consistent, this also makes some of the strings
match strings that are already translated in other parts of the
codebase, thus reducing the work for translators, when the strings are
marked for translation in a subsequent commit.

Signed-off-by: Thomas Gummerer 
---
 builtin/rerere.c |  2 +-
 rerere.c | 26 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/builtin/rerere.c b/builtin/rerere.c
index 0bc40298c2..e0c67c98e9 100644
--- a/builtin/rerere.c
+++ b/builtin/rerere.c
@@ -107,7 +107,7 @@ int cmd_rerere(int argc, const char **argv, const char 
*prefix)
const char *path = merge_rr.items[i].string;
const struct rerere_id *id = merge_rr.items[i].util;
if (diff_two(rerere_path(id, "preimage"), path, path, 
path))
-   die("unable to generate diff for %s", 
rerere_path(id, NULL));
+   die("unable to generate diff for '%s'", 
rerere_path(id, NULL));
}
} else
usage_with_options(rerere_usage, options);
diff --git a/rerere.c b/rerere.c
index eca182023f..0e5956a51c 100644
--- a/rerere.c
+++ b/rerere.c
@@ -484,12 +484,12 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
io.input = fopen(path, "r");
io.io.wrerror = 0;
if (!io.input)
-   return error_errno("could not open %s", path);
+   return error_errno("could not open '%s'", path);
 
if (output) {
io.io.output = fopen(output, "w");
if (!io.io.output) {
-   error_errno("could not write %s", output);
+   error_errno("could not write '%s'", output);
fclose(io.input);
return -1;
}
@@ -499,15 +499,15 @@ static int handle_file(const char *path, unsigned char 
*sha1, const char *output
 
fclose(io.input);
if (io.io.wrerror)
-   error("there were errors while writing %s (%s)",
+   error("there were errors while writing '%s' (%s)",
  path, strerror(io.io.wrerror));
if (io.io.output && fclose(io.io.output))
-   io.io.wrerror = error_errno("failed to flush %s", path);
+   io.io.wrerror = error_errno("failed to flush '%s'", path);
 
if (hunk_no < 0) {
if (output)
unlink_or_warn(output);
-   return error("could not parse conflict hunks in %s", path);
+   return error("could not parse conflict hunks in '%s'", path);
}
if (io.io.wrerror)
return -1;
@@ -684,17 +684,17 @@ static int merge(const struct rerere_id *id, const char 
*path)
 * Mark that "postimage" was used to help gc.
 */
if (utime(rerere_path(id, "postimage"), NULL) < 0)
-   warning_errno("failed utime() on %s",
+   warning_errno("failed utime() on '%s'",
  rerere_path(id, "postimage"));
 
/* Update "path" with the resolution */
f = fopen(path, "w");
if (!f)
-   return error_errno("could not open %s", path);
+   return error_errno("could not open '%s'", path);
if (fwrite(result.ptr, result.size, 1, f) != 1)
-   error_errno("could not write %s", path);
+   error_errno("could not write '%s'", path);
if (fclose(f))
-   return error_errno("writing %s failed", path);
+   return error_errno("writing '%s' failed", path);
 
 out:
free(cur.ptr);
@@ -879,7 +879,7 @@ static int is_rerere_enabled(void)
return rr_cache_exists;
 
if (!rr_cache_exists && mkdir_in_gitdir(git_path_rr_cache()))
-   die("could not create directory %s", git_path_rr_cache());
+   die("could not create directory '%s'", git_path_rr_cache());
return 1;
 }
 
@@ -1068,9 +1068,9 @@ static int rerere_forget_one_path(const char *path, 
struct string_list *rr)
filename = rerere_path(id, "postimage");
if (unlink(filename)) {
if (errno == ENOENT)
-   error("no remembered resolution for %s", path);
+   error("no remembered resolution for '%s'", path);
else
-   error_errno("cannot unlink %s", filename);
+   error_errno("cannot unlink '%s'", filename);
goto fail_exit;
}
 
@@ -1089,7 +1089,7 @@ static int rerere_forget_one_path(const char *path, 
struct string_list *rr)
item = string_list_insert(rr, path);
free_rerere_id(item);
item->util = id;
-   fprintf(stderr, 

[PATCH v2 05/10] rerere: add some documentation

2018-06-05 Thread Thomas Gummerer
Add some documentation for the logic behind the conflict normalization
in rerere.

Helped-by: Junio C Hamano 
Signed-off-by: Thomas Gummerer 
---
 Documentation/technical/rerere.txt | 142 +
 rerere.c   |   4 -
 2 files changed, 142 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/technical/rerere.txt

diff --git a/Documentation/technical/rerere.txt 
b/Documentation/technical/rerere.txt
new file mode 100644
index 00..2c517fe0fc
--- /dev/null
+++ b/Documentation/technical/rerere.txt
@@ -0,0 +1,142 @@
+Rerere
+==
+
+This document describes the rerere logic.
+
+Conflict normalization
+--
+
+To ensure recorded conflict resolutions can be looked up in the rerere
+database, even when branches are merged in a different order,
+different branches are merged that result in the same conflict, or
+when different conflict style settings are used, rerere normalizes the
+conflicts before writing them to the rerere database.
+
+Differnt conflict styles and branch names are dealt with by stripping
+that information from the conflict markers, and removing extraneous
+information from the `diff3` conflict style.
+
+Branches being merged in different order are dealt with by sorting the
+conflict hunks.  More on each of those parts in the following
+sections.
+
+Once these two normalization operations are applied, a conflict ID is
+created based on the normalized conflict, which is later used by
+rerere to look up the conflict in the rerere database.
+
+Stripping extraneous information
+
+
+Say we have three branches AB, AC and AC2.  The common ancestor of
+these branches has a file with with a line with the string "A" (for
+brevity this line is called "line A" for brevity in the following) in
+it.  In branch AB this line is changed to "B", in AC, this line is
+changed to C, and branch AC2 is forked off of AC, after the line was
+changed to C.
+
+Now forking a branch ABAC off of branch AB and then merging AC into it,
+we'd get a conflict like the following:
+
+<<< HEAD
+B
+===
+C
+>>> AC
+
+Now doing the analogous with AC2 (forking a branch ABAC2 off of branch
+AB and then merging branch AC2 into it), maybe using the diff3
+conflict style, we'd get a conflict like the following:
+
+<<< HEAD
+B
+||| merged common ancestors
+A
+===
+C
+>>> AC2
+
+By resolving this conflict, to leave line D, the user declares:
+
+After examining what branches AB and AC did, I believe that making
+line A into line D is the best thing to do that is compatible with
+what AB and AC wanted to do.
+
+As branch AC2 refers to the same commit as AC, the above implies that
+this is also compatible what AB and AC2 wanted to do.
+
+By extension, this means that rerere should recognize that the above
+conflicts are the same.  To do this, the labels on the conflict
+markers are stripped, and the diff3 output is removed.  The above
+examples would both result in the following normalized conflict:
+
+<<<
+B
+===
+C
+>>>
+
+Sorting hunks
+~
+
+As before, lets imagine that a common ancestor had a file with line A
+its early part, and line X in its late part.  And then four branches
+are forked that do these things:
+
+- AB: changes A to B
+- AC: changes A to C
+- XY: changes X to Y
+- XZ: changes X to Z
+
+Now, forking a branch ABAC off of branch AB and then merging AC into
+it, and forking a branch ACAB off of branch AC and then merging AB
+into it, would yield the conflict in a different order.  The former
+would say "A became B or C, what now?" while the latter would say "A
+became C or B, what now?"
+
+As a reminder, the act of merging AC into ABAC and resolving the
+conflict to leave line D means that the user declares:
+
+After examining what branches AB and AC did, I believe that
+making line A into line D is the best thing to do that is
+compatible with what AB and AC wanted to do.
+
+So the conflict we would see when merging AB into ACAB should be
+resolved the same way---it is the resolution that is in line with that
+declaration.
+
+Imagine that similarly previously a branch XYXZ was forked from XY,
+and XZ was merged into it, and resolved "X became Y or Z" into "X
+became W".
+
+Now, if a branch ABXY was forked from AB and then merged XY, then ABXY
+would have line B in its early part and line Y in its later part.
+Such a merge would be quite clean.  We can construct 4 combinations
+using these four branches ((AB, AC) x (XY, XZ)).
+
+Merging ABXY and ACXZ would make "an early A became B or C, a late X
+became Y or Z" conflict, while merging ACXY and ABXZ would make "an
+early A became C or B, a late X became Y or Z".  We can see there are
+4 combinations of ("B or C", "C or B") x ("X or Y", "Y or X").
+
+By sorting, the conflict is given its canonical name, namely, 

[PATCH v2 01/10] rerere: unify error messages when read_cache fails

2018-06-05 Thread Thomas Gummerer
We have multiple different variants of the error message we show to
the user if 'read_cache' fails.  The "Could not read index" variant we
are using in 'rerere.c' is currently not used anywhere in translated
form.

As a subsequent commit will mark all output that comes from 'rerere.c'
for translation, make the life of the translators a little bit easier
by using a string that is used elsewhere, and marked for translation
there, and thus most likely already translated.

"index file corrupt" seems to be the most common error message we show
when 'read_cache' fails, so use that here as well.

Signed-off-by: Thomas Gummerer 
---
 rerere.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rerere.c b/rerere.c
index 18cae2d11c..4b4869662d 100644
--- a/rerere.c
+++ b/rerere.c
@@ -568,7 +568,7 @@ static int find_conflict(struct string_list *conflict)
 {
int i;
if (read_cache() < 0)
-   return error("Could not read index");
+   return error("index file corrupt");
 
for (i = 0; i < active_nr;) {
int conflict_type;
@@ -601,7 +601,7 @@ int rerere_remaining(struct string_list *merge_rr)
if (setup_rerere(merge_rr, RERERE_READONLY))
return 0;
if (read_cache() < 0)
-   return error("Could not read index");
+   return error("index file corrupt");
 
for (i = 0; i < active_nr;) {
int conflict_type;
@@ -1104,7 +1104,7 @@ int rerere_forget(struct pathspec *pathspec)
struct string_list merge_rr = STRING_LIST_INIT_DUP;
 
if (read_cache() < 0)
-   return error("Could not read index");
+   return error("index file corrupt");
 
fd = setup_rerere(_rr, RERERE_NOAUTOUPDATE);
if (fd < 0)
-- 
2.17.0.410.g65aef3a6c4



Re: [RFC PATCH 1/2] docs: reflect supported fetch options of git pull

2018-06-05 Thread Rafael Ascensão
On Tue, Jun 05, 2018 at 06:05:56PM +0200, Duy Nguyen wrote:
> A better option may be making git-pull accept those options as well. I
> see no reason git-pull should support options that git-fetch does (at
> least most of them).

I sent this as a RFC, mostly to discuss what is the correct path to
follow. Updating the documentation was trivial and would still be useful
if nothing came out from this.

> It's basically a couple of OPT_PASSTRHU though so not very hard to do.

My impression was that in the past git was very permissive on adding new
options but nowadays it tries exercise more restraint. But not sure how
relevant this is anyways, as pull already supports the majority of the
options from both `fetch` and `merge`.

> PS. Anybody up to making parse-options accept multiple struct option
> arrays? This way we can have much better option passthru without
> specifying them again and again.

If the path is adding just couple of OPT_PASSTRHU, I could do it. But
I'll wait and see if someone picks your parse-options suggestion.

Thanks for the review.

--
Rafael Ascensão


Re: [PATCH] docs: link to gitsubmodules

2018-06-05 Thread Brandon Williams
On 06/05, Jonathan Nieder wrote:
> Jonathan Nieder wrote:
> 
> > --- i/Documentation/config.txt
> > +++ w/Documentation/config.txt
> > @@ -3327,13 +3327,13 @@ submodule..ignore::
> >  submodule..active::
> > Boolean value indicating if the submodule is of interest to git
> > commands.  This config option takes precedence over the
> > -   submodule.active config option. See linkgit:git-submodule[1] for
> > +   submodule.active config option. See linkgit:gitsubmodules[7] for
> > details.
> >  
> >  submodule.active::
> > A repeated field which contains a pathspec used to match against a
> > submodule's path to determine if the submodule is of interest to git
> > -   commands. See linkgit:git-submodule[1] for details.
> > +   commands. See linkgit:gitsubmodule[7] for details.
> 
> Gah, and I can't spell.  This one should have been
> linkgit:gitsubmodules[7].  Updated diff below.  Tested using
> 
>   make -C Documentation/ git-config.html gitsubmodules.html
>   w3m Documentation/git-config.html
> 
> Thanks and sorry for the noise,
> Jonathan
> 
> diff --git i/Documentation/config.txt w/Documentation/config.txt
> index 1277731aa4..340eb1f3c4 100644
> --- i/Documentation/config.txt
> +++ w/Documentation/config.txt
> @@ -3327,13 +3327,13 @@ submodule..ignore::
>  submodule..active::
>   Boolean value indicating if the submodule is of interest to git
>   commands.  This config option takes precedence over the
> - submodule.active config option. See linkgit:git-submodule[1] for
> + submodule.active config option. See linkgit:gitsubmodules[7] for
>   details.
>  
>  submodule.active::
>   A repeated field which contains a pathspec used to match against a
>   submodule's path to determine if the submodule is of interest to git
> - commands. See linkgit:git-submodule[1] for details.
> + commands. See linkgit:gitsubmodules[7] for details.
>  
>  submodule.recurse::
>   Specifies if commands recurse into submodules by default. This

Yep this is what I meant.

-- 
Brandon Williams


Re: [PATCH] docs: link to gitsubmodules

2018-06-05 Thread Jonathan Nieder
Jonathan Nieder wrote:

> --- i/Documentation/config.txt
> +++ w/Documentation/config.txt
> @@ -3327,13 +3327,13 @@ submodule..ignore::
>  submodule..active::
>   Boolean value indicating if the submodule is of interest to git
>   commands.  This config option takes precedence over the
> - submodule.active config option. See linkgit:git-submodule[1] for
> + submodule.active config option. See linkgit:gitsubmodules[7] for
>   details.
>  
>  submodule.active::
>   A repeated field which contains a pathspec used to match against a
>   submodule's path to determine if the submodule is of interest to git
> - commands. See linkgit:git-submodule[1] for details.
> + commands. See linkgit:gitsubmodule[7] for details.

Gah, and I can't spell.  This one should have been
linkgit:gitsubmodules[7].  Updated diff below.  Tested using

make -C Documentation/ git-config.html gitsubmodules.html
w3m Documentation/git-config.html

Thanks and sorry for the noise,
Jonathan

diff --git i/Documentation/config.txt w/Documentation/config.txt
index 1277731aa4..340eb1f3c4 100644
--- i/Documentation/config.txt
+++ w/Documentation/config.txt
@@ -3327,13 +3327,13 @@ submodule..ignore::
 submodule..active::
Boolean value indicating if the submodule is of interest to git
commands.  This config option takes precedence over the
-   submodule.active config option. See linkgit:git-submodule[1] for
+   submodule.active config option. See linkgit:gitsubmodules[7] for
details.
 
 submodule.active::
A repeated field which contains a pathspec used to match against a
submodule's path to determine if the submodule is of interest to git
-   commands. See linkgit:git-submodule[1] for details.
+   commands. See linkgit:gitsubmodules[7] for details.
 
 submodule.recurse::
Specifies if commands recurse into submodules by default. This


Re: [PATCH] docs: link to gitsubmodules

2018-06-05 Thread Brandon Williams
On 06/05, Ævar Arnfjörð Bjarmason wrote:
> 
> On Tue, Jun 05 2018, Brandon Williams wrote:
> 
> > Add a link to gitsubmodules(7) under the `submodule.active` entry in
> > git-config(1).
> 
> Did you mean to change either the subject or content of this patch? Your
> subject says gitsubmodules(7), but you link to git-submodule(1).

Yep I meant for it to be to gitsubmodules(7), turns out I don't know how
our documentation is built :)

-- 
Brandon Williams


Re: [PATCH] docs: link to gitsubmodules

2018-06-05 Thread Jonathan Nieder
Hi,

Brandon Williams wrote:

> Add a link to gitsubmodules(7) under the `submodule.active` entry in
> git-config(1).
>
> Signed-off-by: Brandon Williams 
> ---
>  Documentation/config.txt | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/config.txt b/Documentation/config.txt
> index ab641bf5a..1277731aa 100644
> --- a/Documentation/config.txt
> +++ b/Documentation/config.txt
> @@ -3327,12 +3327,13 @@ submodule..ignore::
>  submodule..active::
>   Boolean value indicating if the submodule is of interest to git
>   commands.  This config option takes precedence over the
> - submodule.active config option.
> + submodule.active config option. See linkgit:git-submodule[1] for
> + details.

This takes the user to gitsubmodules(7), but with a hop to
git-submodule(1) along the way there:

DESCRIPTION
   Inspects, updates and manages submodules.

   For more information about submodules, see gitsubmodules(7).

I suppose I'd prefer that it links directly to
linkgit:gitsubmodules[7] just because that would steer people toward
commands like "git checkout --recurse-submodules" instead of "git
submodule init".

With or without that tweak,
Reviewed-by: Jonathan Nieder 

Tested using

make -C Documentation/ git-config.1
man Documentation/git-config.1

Thanks,
Jonathan

diff --git i/Documentation/config.txt w/Documentation/config.txt
index 1277731aa4..efbd7e5652 100644
--- i/Documentation/config.txt
+++ w/Documentation/config.txt
@@ -3327,13 +3327,13 @@ submodule..ignore::
 submodule..active::
Boolean value indicating if the submodule is of interest to git
commands.  This config option takes precedence over the
-   submodule.active config option. See linkgit:git-submodule[1] for
+   submodule.active config option. See linkgit:gitsubmodules[7] for
details.
 
 submodule.active::
A repeated field which contains a pathspec used to match against a
submodule's path to determine if the submodule is of interest to git
-   commands. See linkgit:git-submodule[1] for details.
+   commands. See linkgit:gitsubmodule[7] for details.
 
 submodule.recurse::
Specifies if commands recurse into submodules by default. This


Re: [PATCH] docs: link to gitsubmodules

2018-06-05 Thread Ævar Arnfjörð Bjarmason


On Tue, Jun 05 2018, Brandon Williams wrote:

> Add a link to gitsubmodules(7) under the `submodule.active` entry in
> git-config(1).

Did you mean to change either the subject or content of this patch? Your
subject says gitsubmodules(7), but you link to git-submodule(1).


Re: [PATCH 2/8] upload-pack: implement ref-in-want

2018-06-05 Thread Ævar Arnfjörð Bjarmason


On Tue, Jun 05 2018, Brandon Williams wrote:

> +uploadpack.allowRefInWant::
> + If this option is set, `upload-pack` will support the `ref-in-want`
> + feature of the protocol version 2 `fetch` command.
> +

I think it makes sense to elaborate a bit on what this is for. Having
read this series through, and to make sure I understood this, maybe
something like this:

   This feature is intended for the benefit of load-balanced servers
   which may not have the same view of what SHA-1s their refs point to,
   but are guaranteed to never advertise a reference that another server
   serving the request doesn't know about.

I.e. from what I can tell this gives no benefits for someone using a
monolithic git server, except insofar as there would be a slight
decrease in network traffic if the average length of refs is less than
the length of a SHA-1.

That's fair enough, just something we should prominently say.

It does have the "disadvantage", if you can call it that, that it's
introducing a race condition between when we read the ref advertisement
and are promised XYZ refs, but may actually get ABC, but I can't think
of a reason anyone would care about this in practice.

The reason I'm saying "another server [...] doesn't know about" above is
that 2/8 has this:

if (read_ref(arg, ))
die("unknown ref %s", arg);

Doesn't that mean that if server A in your pool advertises master, next
& pu, and you then go and fetch from server B advertising master & next,
but not "pu" that the clone will die?

Presumably at Google you either have something to ensure a consistent
view, e.g. only advertise refs by name older than N seconds, or globally
update ref name but not their contents, and don't allow deleting refs
(or give them the same treatment).

But that, and again, I may have misunderstood this whole thing,
significantly reduces the utility of this feature for anyone "in the
wild" since nothing shipped with "git" gives you that feature.

The naïve way to do slave mirroring with stock git is to have a
post-receive hook that pushes to your mirrors in a for-loop, or has them
fetch from the master in a loop, and then round-robin LB those
servers. Due to the "die on nonexisting" semantics in this extension
that'll result in failed clones.

So I think we should either be really vocal about that caveat, or
perhaps think of how we could make that configurable, e.g. what happens
if the server says "sorry, don't know about that one", and carries on
with the rest it does know about?

Is there a way for client & server to gracefully recover from that?
E.g. send "master" & "next" now, and when I pull again in a few seconds
I get the new "pu"?

Also, as a digression isn't that a problem shared with protocol v2 in
general? I.e. without this extension isn't it going to make another
connection to the naïve LB'd mirroring setup described above and find
that SHA-1s as well as refs don't match?

BREAK.

Also is if this E-Mail wasn't long enough, on a completely different
topic, in an earlier discussion in
https://public-inbox.org/git/87inaje1uv@evledraar.gmail.com/ I noted
that it would be neat-o to have optional wildmatch/pcre etc. matching
for the use case you're not caring about here (and I don't expect you
to, you're solving a different problem).

But let's say I want to add that after this, and being unfamiliar with
the protocol v2 conventions. Would that be a whole new
ref-in-want-wildmatch-prefix capability with a new
want-ref-wildmatch-prefix verb, or is there some less verbose way we can
anticipate that use-case and internally version / advertise
sub-capabilities?

I don't know if that makes any sense, and would be fine with just a
ref-in-want-wildmatch-prefix if that's the way to do it. I just think
it's inevitable that we'll have such a thing eventually, so it's worth
thinking about how such a future extension fits in.


[PATCH] docs: link to gitsubmodules

2018-06-05 Thread Brandon Williams
Add a link to gitsubmodules(7) under the `submodule.active` entry in
git-config(1).

Signed-off-by: Brandon Williams 
---
 Documentation/config.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf5a..1277731aa 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3327,12 +3327,13 @@ submodule..ignore::
 submodule..active::
Boolean value indicating if the submodule is of interest to git
commands.  This config option takes precedence over the
-   submodule.active config option.
+   submodule.active config option. See linkgit:git-submodule[1] for
+   details.
 
 submodule.active::
A repeated field which contains a pathspec used to match against a
submodule's path to determine if the submodule is of interest to git
-   commands.
+   commands. See linkgit:git-submodule[1] for details.
 
 submodule.recurse::
Specifies if commands recurse into submodules by default. This
-- 
2.17.1.1185.g55be947832-goog



Re: [PATCH 0/3] refspec: refactor & fix free() behavior

2018-06-05 Thread Martin Ågren
On 5 June 2018 at 21:58, Brandon Williams  wrote:
> On 06/05, Ævar Arnfjörð Bjarmason wrote:
>> Since Martin & Brandon both liked this direction I've fixed it
>> up.
>>
>> Martin: I didn't want to be the author of the actual fix for the bug
>> you found, so I rewrote your commit in 3/3. The diff is different, and
>> I slightly modified the 3rd paragraph of the commit message & added my
>> sign-off, but otherwise it's the same.
>
> Thanks for writing up a proper patch series for this fix.  I liked
> breaking up your diff into two different patches to make it clear that
> all callers of refpsec_item_init relying on dieing.

Me too.

>> Martin Ågren (1):
>>   refspec: initalize `refspec_item` in `valid_fetch_refspec()`

I was a bit surprised at first that this wasn't a "while at it" in the
second patch, but on second thought, it does make sense to keep this
separate. Thanks for picking this up and polishing it.

Just noticed: s/initalize/initialize/. That would be my fault...

Martin


[PATCH 3/3] refspec: initalize `refspec_item` in `valid_fetch_refspec()`

2018-06-05 Thread Ævar Arnfjörð Bjarmason
From: Martin Ågren 

We allocate a `struct refspec_item` on the stack without initializing
it. In particular, its `dst` and `src` members will contain some random
data from the stack. When we later call `refspec_item_clear()`, it will
call `free()` on those pointers. So if the call to `parse_refspec()` did
not assign to them, we will be freeing some random "pointers". This is
undefined behavior.

To the best of my understanding, this cannot currently be triggered by
user-provided data. And for what it's worth, the test-suite does not
trigger this with SANITIZE=address. It can be provoked by calling
`valid_fetch_refspec(":*")`.

Zero the struct, as is done in other users of `struct refspec_item` by
using the refspec_item_init() initialization function.

Signed-off-by: Martin Ågren 
Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 refspec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/refspec.c b/refspec.c
index a35493e35e..e8010dce0c 100644
--- a/refspec.c
+++ b/refspec.c
@@ -196,7 +196,7 @@ void refspec_clear(struct refspec *rs)
 int valid_fetch_refspec(const char *fetch_refspec_str)
 {
struct refspec_item refspec;
-   int ret = parse_refspec(, fetch_refspec_str, REFSPEC_FETCH);
+   int ret = refspec_item_init(, fetch_refspec_str, REFSPEC_FETCH);
refspec_item_clear();
return ret;
 }
-- 
2.17.0.290.gded63e768a



Re: [PATCH 0/3] refspec: refactor & fix free() behavior

2018-06-05 Thread Brandon Williams
On 06/05, Ævar Arnfjörð Bjarmason wrote:
> Since Martin & Brandon both liked this direction I've fixed it
> up.
> 
> Martin: I didn't want to be the author of the actual fix for the bug
> you found, so I rewrote your commit in 3/3. The diff is different, and
> I slightly modified the 3rd paragraph of the commit message & added my
> sign-off, but otherwise it's the same.

Thanks for writing up a proper patch series for this fix.  I liked
breaking up your diff into two different patches to make it clear that
all callers of refpsec_item_init relying on dieing.

> 
> Martin Ågren (1):
>   refspec: initalize `refspec_item` in `valid_fetch_refspec()`
> 
> Ævar Arnfjörð Bjarmason (2):
>   refspec: s/refspec_item_init/&_or_die/g
>   refspec: add back a refspec_item_init() function
> 
>  builtin/clone.c |  2 +-
>  builtin/pull.c  |  2 +-
>  refspec.c   | 13 +
>  refspec.h   |  5 -
>  4 files changed, 15 insertions(+), 7 deletions(-)
> 
> -- 
> 2.17.0.290.gded63e768a
> 

-- 
Brandon Williams


[PATCH 0/3] refspec: refactor & fix free() behavior

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Since Martin & Brandon both liked this direction I've fixed it
up.

Martin: I didn't want to be the author of the actual fix for the bug
you found, so I rewrote your commit in 3/3. The diff is different, and
I slightly modified the 3rd paragraph of the commit message & added my
sign-off, but otherwise it's the same.

Martin Ågren (1):
  refspec: initalize `refspec_item` in `valid_fetch_refspec()`

Ævar Arnfjörð Bjarmason (2):
  refspec: s/refspec_item_init/&_or_die/g
  refspec: add back a refspec_item_init() function

 builtin/clone.c |  2 +-
 builtin/pull.c  |  2 +-
 refspec.c   | 13 +
 refspec.h   |  5 -
 4 files changed, 15 insertions(+), 7 deletions(-)

-- 
2.17.0.290.gded63e768a



[PATCH 2/3] refspec: add back a refspec_item_init() function

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Re-add the non-fatal version of refspec_item_init_or_die() renamed
away in an earlier change to get a more minimal diff. This should be
used by callers that have their own error handling.

This new function could be marked "static" since nothing outside of
refspec.c uses it, but expecting future use of it, let's make it
available to other users.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 refspec.c | 10 +++---
 refspec.h |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/refspec.c b/refspec.c
index 0fd392e96b..a35493e35e 100644
--- a/refspec.c
+++ b/refspec.c
@@ -124,12 +124,16 @@ static int parse_refspec(struct refspec_item *item, const 
char *refspec, int fet
return 1;
 }
 
-void refspec_item_init_or_die(struct refspec_item *item, const char *refspec,
- int fetch)
+int refspec_item_init(struct refspec_item *item, const char *refspec, int 
fetch)
 {
memset(item, 0, sizeof(*item));
+   return parse_refspec(item, refspec, fetch);
+}
 
-   if (!parse_refspec(item, refspec, fetch))
+void refspec_item_init_or_die(struct refspec_item *item, const char *refspec,
+ int fetch)
+{
+   if (!refspec_item_init(item, refspec, fetch))
die("Invalid refspec '%s'", refspec);
 }
 
diff --git a/refspec.h b/refspec.h
index 4caaf1f8e3..9b6e64a824 100644
--- a/refspec.h
+++ b/refspec.h
@@ -32,6 +32,8 @@ struct refspec {
int fetch;
 };
 
+int refspec_item_init(struct refspec_item *item, const char *refspec,
+ int fetch);
 void refspec_item_init_or_die(struct refspec_item *item, const char *refspec,
  int fetch);
 void refspec_item_clear(struct refspec_item *item);
-- 
2.17.0.290.gded63e768a



[PATCH 1/3] refspec: s/refspec_item_init/&_or_die/g

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Rename the refspec_item_init() function introduced in
6d4c057859 ("refspec: introduce struct refspec", 2018-05-16) to
refspec_item_init_or_die().

This follows the convention of other *_or_die() functions, and is done
in preparation for making it a wrapper for a non-fatal variant.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 builtin/clone.c | 2 +-
 builtin/pull.c  | 2 +-
 refspec.c   | 5 +++--
 refspec.h   | 3 ++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 99e73dae85..74a804f2e8 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1077,7 +1077,7 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
if (option_required_reference.nr || option_optional_reference.nr)
setup_reference();
 
-   refspec_item_init(, value.buf, REFSPEC_FETCH);
+   refspec_item_init_or_die(, value.buf, REFSPEC_FETCH);
 
strbuf_reset();
 
diff --git a/builtin/pull.c b/builtin/pull.c
index 1f2ecf3a88..bb64631d98 100644
--- a/builtin/pull.c
+++ b/builtin/pull.c
@@ -684,7 +684,7 @@ static const char *get_tracking_branch(const char *remote, 
const char *refspec)
const char *spec_src;
const char *merge_branch;
 
-   refspec_item_init(, refspec, REFSPEC_FETCH);
+   refspec_item_init_or_die(, refspec, REFSPEC_FETCH);
spec_src = spec.src;
if (!*spec_src || !strcmp(spec_src, "HEAD"))
spec_src = "HEAD";
diff --git a/refspec.c b/refspec.c
index 78edc48ae8..0fd392e96b 100644
--- a/refspec.c
+++ b/refspec.c
@@ -124,7 +124,8 @@ static int parse_refspec(struct refspec_item *item, const 
char *refspec, int fet
return 1;
 }
 
-void refspec_item_init(struct refspec_item *item, const char *refspec, int 
fetch)
+void refspec_item_init_or_die(struct refspec_item *item, const char *refspec,
+ int fetch)
 {
memset(item, 0, sizeof(*item));
 
@@ -152,7 +153,7 @@ void refspec_append(struct refspec *rs, const char *refspec)
 {
struct refspec_item item;
 
-   refspec_item_init(, refspec, rs->fetch);
+   refspec_item_init_or_die(, refspec, rs->fetch);
 
ALLOC_GROW(rs->items, rs->nr + 1, rs->alloc);
rs->items[rs->nr++] = item;
diff --git a/refspec.h b/refspec.h
index 3a9363887c..4caaf1f8e3 100644
--- a/refspec.h
+++ b/refspec.h
@@ -32,7 +32,8 @@ struct refspec {
int fetch;
 };
 
-void refspec_item_init(struct refspec_item *item, const char *refspec, int 
fetch);
+void refspec_item_init_or_die(struct refspec_item *item, const char *refspec,
+ int fetch);
 void refspec_item_clear(struct refspec_item *item);
 void refspec_init(struct refspec *rs, int fetch);
 void refspec_append(struct refspec *rs, const char *refspec);
-- 
2.17.0.290.gded63e768a



Re: [PATCHv1 1/1] git-p4: better error reporting when p4 fails

2018-06-05 Thread Eric Sunshine
On Tue, Jun 5, 2018 at 6:59 AM Luke Diamand  wrote:
> On 5 June 2018 at 11:49, Merland Romain  wrote:
> >> +# now check that we can actually talk to the server
> >> +global p4_access_checked
> >> +if not p4_access_checked:
> >> +p4_access_checked = True
> >> +p4_check_access()
> >> +
> >
> > Just switch the 2 lines 'p4_access_checked = True' and 'p4_check_access()'
> > It seems to me more logical
>
> Like this:
>
> +p4_check_access()
> +p4_access_checked = True
>
> You need to set p4_access_checked first so that it doesn't go and try
> to check the p4 access before running "p4 login -s", which would then
> get stuck forever.

Such subtlety may deserve an in-code comment.


Re: [PATCHv1 1/3] git-p4: raise exceptions from p4CmdList based on error from p4 server

2018-06-05 Thread Eric Sunshine
On Tue, Jun 5, 2018 at 6:56 AM Luke Diamand  wrote:
> On 5 June 2018 at 10:54, Eric Sunshine  wrote:
> > On Tue, Jun 5, 2018 at 5:14 AM Luke Diamand  wrote:
> >> +m = re.search('Too many rows scanned \(over (\d+)\)', 
> >> data)
> >> +if not m:
> >> +m = re.search('Request too large \(over (\d+)\)', 
> >> data)
> >
> > Does 'p4' localize these error messages?
>
> That's a good question.
>
> It turns out that Perforce open-sourced the P4 client in 2014 (I only
> recently found this out) so we can actually look at the code now!
>
> Here's the code:
>
> // ErrorId graveyard: retired/deprecated ErrorIds.

Hmm, the "too many rows" error you're seeing is retired/deprecated(?).

> ErrorId MsgDb::MaxResults  = { ErrorOf( ES_DB, 32,
> E_FAILED, EV_ADMIN, 1 ), "Request too large (over %maxResults%); see
> 'p4 help maxresults'." } ;//NOTRANS
> ErrorId MsgDb::MaxScanRows = { ErrorOf( ES_DB, 61,
> E_FAILED, EV_ADMIN, 1 ), "Too many rows scanned (over %maxScanRows%);
> see 'p4 help maxscanrows'." } ;//NOTRANS
>
> I don't think there's actually a way to make it return any language
> other than English though. [...]
> So I think probably the language is always English.

The "NOTRANS" annotation on the error messages is reassuring.


Re: [PATCH 2/8] upload-pack: implement ref-in-want

2018-06-05 Thread Ramsay Jones



On 05/06/18 18:51, Brandon Williams wrote:
> Currently, while performing packfile negotiation, clients are only
> allowed to specify their desired objects using object ids.  This causes
> a vulnerability to failure when an object turns non-existent during
> negotiation, which may happen if, for example, the desired repository is
> provided by multiple Git servers in a load-balancing arrangement.
> 
> In order to eliminate this vulnerability, implement the ref-in-want
> feature for the 'fetch' command in protocol version 2.  This feature
> enables the 'fetch' command to support requests in the form of ref names
> through a new "want-ref " parameter.  At the conclusion of
> negotiation, the server will send a list of all of the wanted references
> (as provided by "want-ref" lines) in addition to the generated packfile.
> 
> Signed-off-by: Brandon Williams 
> ---
>  Documentation/config.txt|   4 +
>  Documentation/technical/protocol-v2.txt |  28 -
>  t/t5703-upload-pack-ref-in-want.sh  | 153 
>  upload-pack.c   |  64 ++
>  4 files changed, 248 insertions(+), 1 deletion(-)
>  create mode 100755 t/t5703-upload-pack-ref-in-want.sh
> 
> diff --git a/Documentation/config.txt b/Documentation/config.txt
> index ab641bf5a..acafe6c8d 100644
> --- a/Documentation/config.txt
> +++ b/Documentation/config.txt
> @@ -3479,6 +3479,10 @@ Note that this configuration variable is ignored if it 
> is seen in the
>  repository-level config (this is a safety measure against fetching from
>  untrusted repositories).
>  
> +uploadpack.allowRefInWant::
> + If this option is set, `upload-pack` will support the `ref-in-want`
> + feature of the protocol version 2 `fetch` command.
> +
>  url..insteadOf::
>   Any URL that starts with this value will be rewritten to
>   start, instead, with . In cases where some site serves a
> diff --git a/Documentation/technical/protocol-v2.txt 
> b/Documentation/technical/protocol-v2.txt
> index 49bda76d2..8367e09b8 100644
> --- a/Documentation/technical/protocol-v2.txt
> +++ b/Documentation/technical/protocol-v2.txt
> @@ -299,12 +299,21 @@ included in the client's request:
>   for use with partial clone and partial fetch operations. See
>   `rev-list` for possible "filter-spec" values.
>  
> +If the 'ref-in-want' feature is advertised, the following argument can
> +be included in the client's request as well as the potential addition of
> +the 'wanted-refs' section in the server's response as explained below.
> +
> +want-ref 
> + Indicates to the server than the client wants to retrieve a
> + particular ref, where  is the full name of a ref on the
> + server.
> +
>  The response of `fetch` is broken into a number of sections separated by
>  delimiter packets (0001), with each section beginning with its section
>  header.
>  
>  output = *section
> -section = (acknowledgments | shallow-info | packfile)
> +section = (acknowledgments | shallow-info | wanted-refs | packfile)
> (flush-pkt | delim-pkt)
>  
>  acknowledgments = PKT-LINE("acknowledgments" LF)
> @@ -319,6 +328,10 @@ header.
>  shallow = "shallow" SP obj-id
>  unshallow = "unshallow" SP obj-id
>  
> +wanted-refs = PKT-LINE("wanted-refs" LF)
> +   *PKT-Line(wanted-ref LF)

s/PKT-Line/PKT-LINE/

ATB,
Ramsay Jones



[PATCH 2/8] upload-pack: implement ref-in-want

2018-06-05 Thread Brandon Williams
Currently, while performing packfile negotiation, clients are only
allowed to specify their desired objects using object ids.  This causes
a vulnerability to failure when an object turns non-existent during
negotiation, which may happen if, for example, the desired repository is
provided by multiple Git servers in a load-balancing arrangement.

In order to eliminate this vulnerability, implement the ref-in-want
feature for the 'fetch' command in protocol version 2.  This feature
enables the 'fetch' command to support requests in the form of ref names
through a new "want-ref " parameter.  At the conclusion of
negotiation, the server will send a list of all of the wanted references
(as provided by "want-ref" lines) in addition to the generated packfile.

Signed-off-by: Brandon Williams 
---
 Documentation/config.txt|   4 +
 Documentation/technical/protocol-v2.txt |  28 -
 t/t5703-upload-pack-ref-in-want.sh  | 153 
 upload-pack.c   |  64 ++
 4 files changed, 248 insertions(+), 1 deletion(-)
 create mode 100755 t/t5703-upload-pack-ref-in-want.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf5a..acafe6c8d 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3479,6 +3479,10 @@ Note that this configuration variable is ignored if it 
is seen in the
 repository-level config (this is a safety measure against fetching from
 untrusted repositories).
 
+uploadpack.allowRefInWant::
+   If this option is set, `upload-pack` will support the `ref-in-want`
+   feature of the protocol version 2 `fetch` command.
+
 url..insteadOf::
Any URL that starts with this value will be rewritten to
start, instead, with . In cases where some site serves a
diff --git a/Documentation/technical/protocol-v2.txt 
b/Documentation/technical/protocol-v2.txt
index 49bda76d2..8367e09b8 100644
--- a/Documentation/technical/protocol-v2.txt
+++ b/Documentation/technical/protocol-v2.txt
@@ -299,12 +299,21 @@ included in the client's request:
for use with partial clone and partial fetch operations. See
`rev-list` for possible "filter-spec" values.
 
+If the 'ref-in-want' feature is advertised, the following argument can
+be included in the client's request as well as the potential addition of
+the 'wanted-refs' section in the server's response as explained below.
+
+want-ref 
+   Indicates to the server than the client wants to retrieve a
+   particular ref, where  is the full name of a ref on the
+   server.
+
 The response of `fetch` is broken into a number of sections separated by
 delimiter packets (0001), with each section beginning with its section
 header.
 
 output = *section
-section = (acknowledgments | shallow-info | packfile)
+section = (acknowledgments | shallow-info | wanted-refs | packfile)
  (flush-pkt | delim-pkt)
 
 acknowledgments = PKT-LINE("acknowledgments" LF)
@@ -319,6 +328,10 @@ header.
 shallow = "shallow" SP obj-id
 unshallow = "unshallow" SP obj-id
 
+wanted-refs = PKT-LINE("wanted-refs" LF)
+ *PKT-Line(wanted-ref LF)
+wanted-ref = obj-id SP refname
+
 packfile = PKT-LINE("packfile" LF)
   *PKT-LINE(%x01-03 *%x00-ff)
 
@@ -379,6 +392,19 @@ header.
* This section is only included if a packfile section is also
  included in the response.
 
+wanted-refs section
+   * This section is only included if the client has requested a
+ ref using a 'want-ref' line and if a packfile section is also
+ included in the response.
+
+   * Always begins with the section header "wanted-refs"
+
+   * The server will send a ref listing (" ") for
+ each reference requested using 'want-ref' lines.
+
+   * Ther server MUST NOT send any refs which were not requested
+ using 'want-ref' lines.
+
 packfile section
* This section is only included if the client has sent 'want'
  lines in its request and either requested that no more
diff --git a/t/t5703-upload-pack-ref-in-want.sh 
b/t/t5703-upload-pack-ref-in-want.sh
new file mode 100755
index 0..0ef182970
--- /dev/null
+++ b/t/t5703-upload-pack-ref-in-want.sh
@@ -0,0 +1,153 @@
+#!/bin/sh
+
+test_description='upload-pack ref-in-want'
+
+. ./test-lib.sh
+
+get_actual_refs() {
+   sed -n '/wanted-refs/,/0001/p' actual_refs
+}
+
+get_actual_commits() {
+   sed -n '/packfile/,//p' o.pack &&
+   git index-pack o.pack &&
+   git verify-pack -v o.idx | grep commit | cut -c-40 | sort 
>actual_commits
+}
+
+check_output() {
+   get_actual_refs &&
+   test_cmp expected_refs actual_refs &&
+   get_actual_commits &&
+   test_cmp expected_commits actual_commits
+}
+
+# c(o/foo) d(o/bar)
+#\ /
+# b   e(baz)  f(master)
+#  \__  |  __/
+# \ | /
+#   a
+test_expect_success 'setup 

[PATCH 0/8] ref-in-want

2018-06-05 Thread Brandon Williams
This series adds the ref-in-want feature which was originally proposed
by Jonathan Tan
(https://public-inbox.org/git/cover.1485381677.git.jonathanta...@google.com/).

Back when ref-in-want was first discussed it was decided that we should
first solve the issue of moving to a new wire format and find a way to
limit the ref-advertisement before moving forward with ref-in-want.  Now
that protocol version 2 is a reality, and that refs can be filtered on
the server side, we can revisit ref-in-want.

This version of ref-in-want is a bit more restrictive than what Jonathan
originally proposed (only full ref names are allowed instead of globs
and OIDs), but it is meant to accomplish the same goal (solve the issues
of refs changing during negotiation).

Brandon Williams (8):
  test-pkt-line: add unpack-sideband subcommand
  upload-pack: implement ref-in-want
  upload-pack: test negotiation with changing repository
  fetch: refactor the population of peer ref OIDs
  fetch: refactor fetch_refs into two functions
  fetch: refactor to make function args narrower
  fetch-pack: put shallow info in output parameter
  fetch-pack: implement ref-in-want

 Documentation/config.txt|   4 +
 Documentation/technical/protocol-v2.txt |  28 ++-
 builtin/clone.c |   4 +-
 builtin/fetch.c | 126 +++-
 fetch-object.c  |   2 +-
 fetch-pack.c|  52 +++--
 remote.c|   1 +
 remote.h|   1 +
 t/helper/test-pkt-line.c|  37 
 t/lib-httpd.sh  |   1 +
 t/lib-httpd/apache.conf |   8 +
 t/lib-httpd/one-time-sed.sh |  16 ++
 t/t5703-upload-pack-ref-in-want.sh  | 245 
 transport-helper.c  |   6 +-
 transport-internal.h|   9 +-
 transport.c |  34 +++-
 transport.h |   3 +-
 upload-pack.c   |  64 +++
 18 files changed, 564 insertions(+), 77 deletions(-)
 create mode 100644 t/lib-httpd/one-time-sed.sh
 create mode 100755 t/t5703-upload-pack-ref-in-want.sh

-- 
2.17.1.1185.g55be947832-goog



[PATCH 1/8] test-pkt-line: add unpack-sideband subcommand

2018-06-05 Thread Brandon Williams
Add an 'unpack-sideband' subcommand to the test-pkt-line helper to
enable unpacking packet line data sent multiplexed using a sideband.

Signed-off-by: Brandon Williams 
---
 t/helper/test-pkt-line.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/t/helper/test-pkt-line.c b/t/helper/test-pkt-line.c
index 0f19e53c7..2a551 100644
--- a/t/helper/test-pkt-line.c
+++ b/t/helper/test-pkt-line.c
@@ -1,3 +1,4 @@
+#include "cache.h"
 #include "pkt-line.h"
 
 static void pack_line(const char *line)
@@ -48,6 +49,40 @@ static void unpack(void)
}
 }
 
+static void unpack_sideband(void)
+{
+   struct packet_reader reader;
+   packet_reader_init(, 0, NULL, 0,
+  PACKET_READ_GENTLE_ON_EOF |
+  PACKET_READ_CHOMP_NEWLINE);
+
+   while (packet_reader_read() != PACKET_READ_EOF) {
+   int band;
+   int fd;
+
+   switch (reader.status) {
+   case PACKET_READ_EOF:
+   break;
+   case PACKET_READ_NORMAL:
+   band = reader.line[0] & 0xff;
+   if (band == 1)
+   fd = 1;
+   else
+   fd = 2;
+
+   write_or_die(fd, reader.line+1, reader.pktlen-1);
+
+   if (band == 3)
+   die("sind-band error");
+   break;
+   case PACKET_READ_FLUSH:
+   return;
+   case PACKET_READ_DELIM:
+   break;
+   }
+   }
+}
+
 int cmd_main(int argc, const char **argv)
 {
if (argc < 2)
@@ -57,6 +92,8 @@ int cmd_main(int argc, const char **argv)
pack(argc - 2, argv + 2);
else if (!strcmp(argv[1], "unpack"))
unpack();
+   else if (!strcmp(argv[1], "unpack-sideband"))
+   unpack_sideband();
else
die("invalid argument '%s'", argv[1]);
 
-- 
2.17.1.1185.g55be947832-goog



[PATCH 5/8] fetch: refactor fetch_refs into two functions

2018-06-05 Thread Brandon Williams
Refactor the fetch_refs function into a function that does the fetching
of refs and another function that stores them.

Signed-off-by: Brandon Williams 
---
 builtin/fetch.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 545635448..ee8b87c78 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -967,10 +967,16 @@ static int fetch_refs(struct transport *transport, struct 
ref *ref_map)
int ret = quickfetch(ref_map);
if (ret)
ret = transport_fetch_refs(transport, ref_map);
-   if (!ret)
-   ret |= store_updated_refs(transport->url,
-   transport->remote->name,
-   ref_map);
+   if (ret)
+   transport_unlock_pack(transport);
+   return ret;
+}
+
+static int consume_refs(struct transport *transport, struct ref *ref_map)
+{
+   int ret = store_updated_refs(transport->url,
+transport->remote->name,
+ref_map);
transport_unlock_pack(transport);
return ret;
 }
@@ -1116,7 +1122,8 @@ static void backfill_tags(struct transport *transport, 
struct ref *ref_map)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL);
transport_set_option(transport, TRANS_OPT_DEPTH, "0");
transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL);
-   fetch_refs(transport, ref_map);
+   if (!fetch_refs(transport, ref_map))
+   consume_refs(transport, ref_map);
 
if (gsecondary) {
transport_disconnect(gsecondary);
@@ -1165,7 +1172,7 @@ static int do_fetch(struct transport *transport,
   transport->url);
}
}
-   if (fetch_refs(transport, ref_map)) {
+   if (fetch_refs(transport, ref_map) || consume_refs(transport, ref_map)) 
{
free_refs(ref_map);
retcode = 1;
goto cleanup;
-- 
2.17.1.1185.g55be947832-goog



[PATCH 7/8] fetch-pack: put shallow info in output parameter

2018-06-05 Thread Brandon Williams
Expand the transport fetch method signature, by adding an output
parameter, to allow transports to return information about the refs they
have fetched.  Then communicate shallow status information through this
mechanism instead of by modifying the input list of refs.

This does require clients to sometimes generate the ref map twice: once
from the list of refs provided by the remote (as is currently done) and
potentially once from the new list of refs that the fetch mechanism
provides.

Signed-off-by: Brandon Williams 
---
 builtin/clone.c  |  4 ++--
 builtin/fetch.c  | 23 +++
 fetch-object.c   |  2 +-
 fetch-pack.c | 17 +
 transport-helper.c   |  6 --
 transport-internal.h |  9 -
 transport.c  | 34 --
 transport.h  |  3 ++-
 8 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 99e73dae8..8f86d99c5 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1155,7 +1155,7 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
}
 
if (!is_local && !complete_refs_before_fetch)
-   transport_fetch_refs(transport, mapped_refs);
+   transport_fetch_refs(transport, mapped_refs, NULL);
 
remote_head = find_ref_by_name(refs, "HEAD");
remote_head_points_at =
@@ -1197,7 +1197,7 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
if (is_local)
clone_local(path, git_dir);
else if (refs && complete_refs_before_fetch)
-   transport_fetch_refs(transport, mapped_refs);
+   transport_fetch_refs(transport, mapped_refs, NULL);
 
update_remote_refs(refs, mapped_refs, remote_head_points_at,
   branch_top.buf, reflog_msg.buf, transport,
diff --git a/builtin/fetch.c b/builtin/fetch.c
index b600e1f10..ddf44ba1a 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -946,11 +946,13 @@ static int quickfetch(struct ref *ref_map)
return check_connected(iterate_ref_map, , );
 }
 
-static int fetch_refs(struct transport *transport, struct ref *ref_map)
+static int fetch_refs(struct transport *transport, struct ref *ref_map,
+ struct ref **updated_remote_refs)
 {
int ret = quickfetch(ref_map);
if (ret)
-   ret = transport_fetch_refs(transport, ref_map);
+   ret = transport_fetch_refs(transport, ref_map,
+  updated_remote_refs);
if (ret)
transport_unlock_pack(transport);
return ret;
@@ -1106,7 +1108,7 @@ static void backfill_tags(struct transport *transport, 
struct ref *ref_map)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL);
transport_set_option(transport, TRANS_OPT_DEPTH, "0");
transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL);
-   if (!fetch_refs(transport, ref_map))
+   if (!fetch_refs(transport, ref_map, NULL))
consume_refs(transport, ref_map);
 
if (gsecondary) {
@@ -1122,6 +1124,7 @@ static int do_fetch(struct transport *transport,
int autotags = (transport->remote->fetch_tags == 1);
int retcode = 0;
const struct ref *remote_refs;
+   struct ref *new_remote_refs = NULL;
struct argv_array ref_prefixes = ARGV_ARRAY_INIT;
 
if (tags == TAGS_DEFAULT) {
@@ -1172,7 +1175,19 @@ static int do_fetch(struct transport *transport,
   transport->url);
}
}
-   if (fetch_refs(transport, ref_map) || consume_refs(transport, ref_map)) 
{
+
+   if (fetch_refs(transport, ref_map, _remote_refs)) {
+   free_refs(ref_map);
+   retcode = 1;
+   goto cleanup;
+   }
+   if (new_remote_refs) {
+   free_refs(ref_map);
+   ref_map = get_ref_map(transport->remote, new_remote_refs, rs,
+ tags, );
+   free_refs(new_remote_refs);
+   }
+   if (consume_refs(transport, ref_map)) {
free_refs(ref_map);
retcode = 1;
goto cleanup;
diff --git a/fetch-object.c b/fetch-object.c
index 853624f81..48fe63dd6 100644
--- a/fetch-object.c
+++ b/fetch-object.c
@@ -19,7 +19,7 @@ static void fetch_refs(const char *remote_name, struct ref 
*ref)
 
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
-   transport_fetch_refs(transport, ref);
+   transport_fetch_refs(transport, ref, NULL);
fetch_if_missing = original_fetch_if_missing;
 }
 
diff --git a/fetch-pack.c b/fetch-pack.c
index a320ce987..7799ee2cd 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1470,12 +1470,13 @@ static int 

[PATCH 8/8] fetch-pack: implement ref-in-want

2018-06-05 Thread Brandon Williams
Implement ref-in-want on the client side so that when a server supports
the "ref-in-want" feature, a client will send "want-ref" lines for each
reference the client wants to fetch.

Signed-off-by: Brandon Williams 
---
 fetch-pack.c   | 35 +++---
 remote.c   |  1 +
 remote.h   |  1 +
 t/t5703-upload-pack-ref-in-want.sh |  4 ++--
 4 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 7799ee2cd..51e8356ba 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1102,9 +1102,10 @@ static void add_shallow_requests(struct strbuf *req_buf,
 
 static void add_wants(const struct ref *wants, struct strbuf *req_buf)
 {
+   int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 
0);
+
for ( ; wants ; wants = wants->next) {
const struct object_id *remote = >old_oid;
-   const char *remote_hex;
struct object *o;
 
/*
@@ -1122,8 +1123,10 @@ static void add_wants(const struct ref *wants, struct 
strbuf *req_buf)
continue;
}
 
-   remote_hex = oid_to_hex(remote);
-   packet_buf_write(req_buf, "want %s\n", remote_hex);
+   if (!use_ref_in_want || wants->exact_sha1)
+   packet_buf_write(req_buf, "want %s\n", 
oid_to_hex(remote));
+   else
+   packet_buf_write(req_buf, "want-ref %s\n", wants->name);
}
 }
 
@@ -1334,6 +1337,29 @@ static void receive_shallow_info(struct fetch_pack_args 
*args,
args->deepen = 1;
 }
 
+static void receive_wanted_refs(struct packet_reader *reader, struct ref *refs)
+{
+   process_section_header(reader, "wanted-refs", 0);
+   while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
+   struct object_id oid;
+   const char *end;
+   struct ref *r = NULL;
+
+   if (parse_oid_hex(reader->line, , ) || *end++ != ' ')
+   die("expected wanted-ref, got '%s'", reader->line);
+
+   for (r = refs; r; r = r->next) {
+   if (!strcmp(end, r->name)) {
+   oidcpy(>old_oid, );
+   break;
+   }
+   }
+   }
+
+   if (reader->status != PACKET_READ_DELIM)
+   die("error processing wanted refs: %d", reader->status);
+}
+
 enum fetch_state {
FETCH_CHECK_LOCAL = 0,
FETCH_SEND_REQUEST,
@@ -1408,6 +1434,9 @@ static struct ref *do_fetch_pack_v2(struct 
fetch_pack_args *args,
if (process_section_header(, "shallow-info", 1))
receive_shallow_info(args, );
 
+   if (process_section_header(, "wanted-refs", 1))
+   receive_wanted_refs(, ref);
+
/* get the pack */
process_section_header(, "packfile", 0);
if (get_pack(args, fd, pack_lockfile))
diff --git a/remote.c b/remote.c
index abe80c139..c9d452ac0 100644
--- a/remote.c
+++ b/remote.c
@@ -1735,6 +1735,7 @@ int get_fetch_map(const struct ref *remote_refs,
if (refspec->exact_sha1) {
ref_map = alloc_ref(name);
get_oid_hex(name, _map->old_oid);
+   ref_map->exact_sha1 = 1;
} else {
ref_map = get_remote_ref(remote_refs, name);
}
diff --git a/remote.h b/remote.h
index 45ecc6cef..e5338e368 100644
--- a/remote.h
+++ b/remote.h
@@ -73,6 +73,7 @@ struct ref {
force:1,
forced_update:1,
expect_old_sha1:1,
+   exact_sha1:1,
deletion:1;
 
enum {
diff --git a/t/t5703-upload-pack-ref-in-want.sh 
b/t/t5703-upload-pack-ref-in-want.sh
index 979ab6d03..b94a51380 100755
--- a/t/t5703-upload-pack-ref-in-want.sh
+++ b/t/t5703-upload-pack-ref-in-want.sh
@@ -204,7 +204,7 @@ test_expect_success 'server is initially ahead - no ref in 
want' '
grep "ERR upload-pack: not our ref" err
 '
 
-test_expect_failure 'server is initially ahead - ref in want' '
+test_expect_success 'server is initially ahead - ref in want' '
git -C "$REPO" config uploadpack.allowRefInWant true &&
rm -rf local &&
cp -r "$LOCAL_PRISTINE" local &&
@@ -228,7 +228,7 @@ test_expect_success 'server is initially behind - no ref in 
want' '
test_cmp expected actual
 '
 
-test_expect_failure 'server is initially behind - ref in want' '
+test_expect_success 'server is initially behind - ref in want' '
git -C "$REPO" config uploadpack.allowRefInWant true &&
rm -rf local &&
cp -r "$LOCAL_PRISTINE" local &&
-- 
2.17.1.1185.g55be947832-goog



Re: Where is git checkout --orphan implemented at.

2018-06-05 Thread Jeff King
On Tue, Jun 05, 2018 at 12:02:12PM -0400, Sean Hunt wrote:

> I would like to see the source code to git checkout --orphan so I can
> learn how it works and so I can manually do what it does by hand on
> making a new branch with no history in the refs folder. I can only do
> it on my iPhone as my laptop has no internet or way to do it there,
> and the program on my iPhone does not have the method implemented yet
> visually to do it, forcing manual creation of the orphan branch by
> hand in the end. If the public github had all the codes to all
> commands and subcommands like the one above it would be nice or well
> at least a file that explains the source file each command and
> subcommands are from so that way a person like me can use as a
> reference to make our own git gui that has 100% of command line git
> features.

The code for "checkout --orphan" is in builtin/checkout.c[1].

But if you want to do roughly the same thing with other tools, you can
do:

 git symbolic-ref HEAD refs/heads/new-branch

If you don't even have symbolic-ref handy, you can do:

  echo "ref: refs/heads/new-branch" >.git/HEAD

That's not generally recommended, since future versions of Git may
change the ref storage format, but it would work with any current
version of Git.

-Peff

[1] Try update_refs_for_switch(), especially:


https://github.com/git/git/blob/61856ae69a2ceb241a90e47953e18f218e4d5f2f/builtin/checkout.c#L635

and


https://github.com/git/git/blob/61856ae69a2ceb241a90e47953e18f218e4d5f2f/builtin/checkout.c#L695


[PATCH 6/8] fetch: refactor to make function args narrower

2018-06-05 Thread Brandon Williams
Refactor find_non_local_tags and get_ref_map to only take the
information they need instead of the entire transport struct. Besides
improving code clarity, this also improves their flexibility, allowing
for a different set of refs to be used instead of relying on the ones
stored in the transport struct.

Signed-off-by: Brandon Williams 
---
 builtin/fetch.c | 52 -
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ee8b87c78..b600e1f10 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -254,9 +254,9 @@ static int will_fetch(struct ref **head, const unsigned 
char *sha1)
return 0;
 }
 
-static void find_non_local_tags(struct transport *transport,
-   struct ref **head,
-   struct ref ***tail)
+static void find_non_local_tags(const struct ref *refs,
+   struct ref **head,
+   struct ref ***tail)
 {
struct string_list existing_refs = STRING_LIST_INIT_DUP;
struct string_list remote_refs = STRING_LIST_INIT_NODUP;
@@ -264,7 +264,7 @@ static void find_non_local_tags(struct transport *transport,
struct string_list_item *item = NULL;
 
for_each_ref(add_existing, _refs);
-   for (ref = transport_get_remote_refs(transport, NULL); ref; ref = 
ref->next) {
+   for (ref = refs; ref; ref = ref->next) {
if (!starts_with(ref->name, "refs/tags/"))
continue;
 
@@ -338,7 +338,8 @@ static void find_non_local_tags(struct transport *transport,
string_list_clear(_refs, 0);
 }
 
-static struct ref *get_ref_map(struct transport *transport,
+static struct ref *get_ref_map(struct remote *remote,
+  const struct ref *remote_refs,
   struct refspec *rs,
   int tags, int *autotags)
 {
@@ -346,27 +347,11 @@ static struct ref *get_ref_map(struct transport 
*transport,
struct ref *rm;
struct ref *ref_map = NULL;
struct ref **tail = _map;
-   struct argv_array ref_prefixes = ARGV_ARRAY_INIT;
 
/* opportunistically-updated references: */
struct ref *orefs = NULL, **oref_tail = 
 
struct string_list existing_refs = STRING_LIST_INIT_DUP;
-   const struct ref *remote_refs;
-
-   if (rs->nr)
-   refspec_ref_prefixes(rs, _prefixes);
-   else if (transport->remote && transport->remote->fetch.nr)
-   refspec_ref_prefixes(>remote->fetch, _prefixes);
-
-   if (ref_prefixes.argc &&
-   (tags == TAGS_SET || (tags == TAGS_DEFAULT && !rs->nr))) {
-   argv_array_push(_prefixes, "refs/tags/");
-   }
-
-   remote_refs = transport_get_remote_refs(transport, _prefixes);
-
-   argv_array_clear(_prefixes);
 
if (rs->nr) {
struct refspec *fetch_refspec;
@@ -403,7 +388,7 @@ static struct ref *get_ref_map(struct transport *transport,
if (refmap.nr)
fetch_refspec = 
else
-   fetch_refspec = >remote->fetch;
+   fetch_refspec = >fetch;
 
for (i = 0; i < fetch_refspec->nr; i++)
get_fetch_map(ref_map, _refspec->items[i], 
_tail, 1);
@@ -411,7 +396,6 @@ static struct ref *get_ref_map(struct transport *transport,
die("--refmap option is only meaningful with command-line 
refspec(s).");
} else {
/* Use the defaults */
-   struct remote *remote = transport->remote;
struct branch *branch = branch_get(NULL);
int has_merge = branch_has_merge_config(branch);
if (remote &&
@@ -450,7 +434,7 @@ static struct ref *get_ref_map(struct transport *transport,
/* also fetch all tags */
get_fetch_map(remote_refs, tag_refspec, , 0);
else if (tags == TAGS_DEFAULT && *autotags)
-   find_non_local_tags(transport, _map, );
+   find_non_local_tags(remote_refs, _map, );
 
/* Now append any refs to be updated opportunistically: */
*tail = orefs;
@@ -1137,6 +1121,8 @@ static int do_fetch(struct transport *transport,
struct ref *ref_map;
int autotags = (transport->remote->fetch_tags == 1);
int retcode = 0;
+   const struct ref *remote_refs;
+   struct argv_array ref_prefixes = ARGV_ARRAY_INIT;
 
if (tags == TAGS_DEFAULT) {
if (transport->remote->fetch_tags == 2)
@@ -1152,7 +1138,21 @@ static int do_fetch(struct transport *transport,
goto cleanup;
}
 
-   ref_map = get_ref_map(transport, rs, tags, );
+   if (rs->nr)
+   refspec_ref_prefixes(rs, _prefixes);
+   else if (transport->remote && transport->remote->fetch.nr)
+   

[PATCH 3/8] upload-pack: test negotiation with changing repository

2018-06-05 Thread Brandon Williams
Add tests to check the behavior of fetching from a repository which
changes between rounds of negotiation (for example, when different
servers in a load-balancing agreement participate in the same stateless
RPC negotiation). This forms a baseline of comparison to the ref-in-want
functionality (which will be introduced to the client in subsequent
commits), and ensures that subsequent commits do not change existing
behavior.

As part of this effort, a mechanism to substitute strings in a single
HTTP response is added.

Signed-off-by: Brandon Williams 
---
 t/lib-httpd.sh |  1 +
 t/lib-httpd/apache.conf|  8 +++
 t/lib-httpd/one-time-sed.sh| 16 ++
 t/t5703-upload-pack-ref-in-want.sh | 92 ++
 4 files changed, 117 insertions(+)
 create mode 100644 t/lib-httpd/one-time-sed.sh

diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index 435a37465..84f8efdd4 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -132,6 +132,7 @@ prepare_httpd() {
cp "$TEST_PATH"/passwd "$HTTPD_ROOT_PATH"
install_script broken-smart-http.sh
install_script error.sh
+   install_script one-time-sed.sh
 
ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules"
 
diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf
index 724d9ae46..fe68d37bb 100644
--- a/t/lib-httpd/apache.conf
+++ b/t/lib-httpd/apache.conf
@@ -111,9 +111,14 @@ Alias /auth/dumb/ www/auth/dumb/
SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
SetEnv GIT_HTTP_EXPORT_ALL
 
+
+   SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
+   SetEnv GIT_HTTP_EXPORT_ALL
+
 ScriptAliasMatch /smart_*[^/]*/(.*) ${GIT_EXEC_PATH}/git-http-backend/$1
 ScriptAlias /broken_smart/ broken-smart-http.sh/
 ScriptAlias /error/ error.sh/
+ScriptAliasMatch /one_time_sed/(.*) one-time-sed.sh/$1
 
Options FollowSymlinks
 
@@ -123,6 +128,9 @@ ScriptAlias /error/ error.sh/
 
   Options ExecCGI
 
+
+   Options ExecCGI
+
 
Options ExecCGI
 
diff --git a/t/lib-httpd/one-time-sed.sh b/t/lib-httpd/one-time-sed.sh
new file mode 100644
index 0..a9c4aa5f4
--- /dev/null
+++ b/t/lib-httpd/one-time-sed.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+if [ -e one-time-sed ]; then
+   "$GIT_EXEC_PATH/git-http-backend" >out
+
+   sed "$(cat one-time-sed)" out_modified
+
+   if diff out out_modified >/dev/null; then
+   cat out
+   else
+   cat out_modified
+   rm one-time-sed
+   fi
+else
+   "$GIT_EXEC_PATH/git-http-backend"
+fi
diff --git a/t/t5703-upload-pack-ref-in-want.sh 
b/t/t5703-upload-pack-ref-in-want.sh
index 0ef182970..979ab6d03 100755
--- a/t/t5703-upload-pack-ref-in-want.sh
+++ b/t/t5703-upload-pack-ref-in-want.sh
@@ -150,4 +150,96 @@ test_expect_success 'want-ref with ref we already have 
commit for' '
check_output
 '
 
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+REPO="$HTTPD_DOCUMENT_ROOT_PATH/repo"
+LOCAL_PRISTINE="$(pwd)/local_pristine"
+
+test_expect_success 'setup repos for change-while-negotiating test' '
+   (
+   git init "$REPO" &&
+   cd "$REPO" &&
+   >.git/git-daemon-export-ok &&
+   test_commit m1 &&
+   git tag -d m1 &&
+
+   # Local repo with many commits (so that negotiation will take
+   # more than 1 request/response pair)
+   git clone "http://127.0.0.1:$LIB_HTTPD_PORT/smart/repo; 
"$LOCAL_PRISTINE" &&
+   cd "$LOCAL_PRISTINE" &&
+   git checkout -b side &&
+   for i in $(seq 1 33); do test_commit s$i; done &&
+
+   # Add novel commits to upstream
+   git checkout master &&
+   cd "$REPO" &&
+   test_commit m2 &&
+   test_commit m3 &&
+   git tag -d m2 m3
+   ) &&
+   git -C "$LOCAL_PRISTINE" remote set-url origin 
"http://127.0.0.1:$LIB_HTTPD_PORT/one_time_sed/repo; &&
+   git -C "$LOCAL_PRISTINE" config protocol.version 2
+'
+
+inconsistency() {
+   # Simulate that the server initially reports $2 as the ref
+   # corresponding to $1, and after that, $1 as the ref corresponding to
+   # $1. This corresponds to the real-life situation where the server's
+   # repository appears to change during negotiation, for example, when
+   # different servers in a load-balancing arrangement serve (stateless)
+   # RPCs during a single negotiation.
+   printf "s/%s/%s/" \
+  $(git -C "$REPO" rev-parse $1 | tr -d "\n") \
+  $(git -C "$REPO" rev-parse $2 | tr -d "\n") \
+  >"$HTTPD_ROOT_PATH/one-time-sed"
+}
+
+test_expect_success 'server is initially ahead - no ref in want' '
+   git -C "$REPO" config uploadpack.allowRefInWant false &&
+   rm -rf local &&
+   cp -r "$LOCAL_PRISTINE" local &&
+   inconsistency master 1234567890123456789012345678901234567890 &&
+   test_must_fail git -C local fetch 2>err &&
+ 

[PATCH 4/8] fetch: refactor the population of peer ref OIDs

2018-06-05 Thread Brandon Williams
Populate peer ref OIDs in get_ref_map instead of do_fetch. Besides
tightening scopes of variables in the code, this also prepares for
get_ref_map being able to be called multiple times within do_fetch.

Signed-off-by: Brandon Williams 
---
 builtin/fetch.c | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669a..545635448 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -351,6 +351,7 @@ static struct ref *get_ref_map(struct transport *transport,
/* opportunistically-updated references: */
struct ref *orefs = NULL, **oref_tail = 
 
+   struct string_list existing_refs = STRING_LIST_INIT_DUP;
const struct ref *remote_refs;
 
if (rs->nr)
@@ -458,7 +459,23 @@ static struct ref *get_ref_map(struct transport *transport,
tail = >next;
}
 
-   return ref_remove_duplicates(ref_map);
+   ref_map = ref_remove_duplicates(ref_map);
+
+   for_each_ref(add_existing, _refs);
+   for (rm = ref_map; rm; rm = rm->next) {
+   if (rm->peer_ref) {
+   struct string_list_item *peer_item =
+   string_list_lookup(_refs,
+  rm->peer_ref->name);
+   if (peer_item) {
+   struct object_id *old_oid = peer_item->util;
+   oidcpy(>peer_ref->old_oid, old_oid);
+   }
+   }
+   }
+   string_list_clear(_refs, 1);
+
+   return ref_map;
 }
 
 #define STORE_REF_ERROR_OTHER 1
@@ -1110,14 +1127,10 @@ static void backfill_tags(struct transport *transport, 
struct ref *ref_map)
 static int do_fetch(struct transport *transport,
struct refspec *rs)
 {
-   struct string_list existing_refs = STRING_LIST_INIT_DUP;
struct ref *ref_map;
-   struct ref *rm;
int autotags = (transport->remote->fetch_tags == 1);
int retcode = 0;
 
-   for_each_ref(add_existing, _refs);
-
if (tags == TAGS_DEFAULT) {
if (transport->remote->fetch_tags == 2)
tags = TAGS_SET;
@@ -1136,18 +1149,6 @@ static int do_fetch(struct transport *transport,
if (!update_head_ok)
check_not_current_branch(ref_map);
 
-   for (rm = ref_map; rm; rm = rm->next) {
-   if (rm->peer_ref) {
-   struct string_list_item *peer_item =
-   string_list_lookup(_refs,
-  rm->peer_ref->name);
-   if (peer_item) {
-   struct object_id *old_oid = peer_item->util;
-   oidcpy(>peer_ref->old_oid, old_oid);
-   }
-   }
-   }
-
if (tags == TAGS_DEFAULT && autotags)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
if (prune) {
@@ -1183,7 +1184,6 @@ static int do_fetch(struct transport *transport,
}
 
  cleanup:
-   string_list_clear(_refs, 1);
return retcode;
 }
 
-- 
2.17.1.1185.g55be947832-goog



Re: [PATCH 4/6] unpack-tress: convert clear_ce_flags* to avoid the_index

2018-06-05 Thread Ramsay Jones



On 05/06/18 16:43, Nguyễn Thái Ngọc Duy wrote:
> Prior to fba92be8f7, this code implicitly (and incorrectly) assumes
> the_index when running the exclude machinery. fba92be8f7 helps show
> this problem clearer because unpack-trees operation is supposed to
> work on whatever index the caller specifies... not specifically
> the_index.
> 
> Update the code to use "istate" argument that's originally from
> mark_new_skip_worktree(). From the call sites, both in unpack_trees(),
> you can see that this function works on two separate indexes:
> o->src_index and o->result. The second mark_new_skip_worktree() so far
> has incorecctly applied exclude rules on o->src_index instead of
> o->result. It's unclear what is the consequences of this, but it's
> definitely wrong.
> 
> [1] fba92be8f7 (dir: convert is_excluded_from_list to take an index -
> 2017-05-05)
> 
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---
>  unpack-trees.c | 29 +
>  1 file changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 60d1138e08..5268de7af5 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -1085,13 +1085,15 @@ static int unpack_callback(int n, unsigned long mask, 
> unsigned long dirmask, str
>   return mask;
>  }
>  
> -static int clear_ce_flags_1(struct cache_entry **cache, int nr,
> +static int clear_ce_flags_1(struct index_state *istate,
> + struct cache_entry **cache, int nr,
>   struct strbuf *prefix,
>   int select_mask, int clear_mask,
>   struct exclude_list *el, int defval);
>  
>  /* Whole directory matching */
> -static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
> +static int clear_ce_flags_dir(struct index_state *istate,
> +   struct cache_entry **cache, int nr,
> struct strbuf *prefix,
> char *basename,
> int select_mask, int clear_mask,
> @@ -1100,7 +1102,7 @@ static int clear_ce_flags_dir(struct cache_entry 
> **cache, int nr,
>   struct cache_entry **cache_end;
>   int dtype = DT_DIR;
>   int ret = is_excluded_from_list(prefix->buf, prefix->len,
> - basename, , el, _index);
> + basename, , el, istate);
>   int rc;
>  
>   strbuf_addch(prefix, '/');
> @@ -1122,7 +1124,7 @@ static int clear_ce_flags_dir(struct cache_entry 
> **cache, int nr,
>* calling clear_ce_flags_1(). That function will call
>* the expensive is_excluded_from_list() on every entry.
>*/
> - rc = clear_ce_flags_1(cache, cache_end - cache,
> + rc = clear_ce_flags_1(istate, cache, cache_end - cache,
> prefix,
> select_mask, clear_mask,
> el, ret);
> @@ -1145,7 +1147,8 @@ static int clear_ce_flags_dir(struct cache_entry 
> **cache, int nr,
>   *   cache[0]->name[0..(prefix_len-1)]
>   * Top level path has prefix_len zero.
>   */
> -static int clear_ce_flags_1(struct cache_entry **cache, int nr,
> +static int clear_ce_flags_1(struct index_state *istate,
> + struct cache_entry **cache, int nr,
>   struct strbuf *prefix,
>   int select_mask, int clear_mask,
>   struct exclude_list *el, int defval)
> @@ -1179,7 +1182,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
> int nr,
>   len = slash - name;
>   strbuf_add(prefix, name, len);
>  
> - processed = clear_ce_flags_dir(cache, cache_end - cache,
> + processed = clear_ce_flags_dir(istate, cache, cache_end 
> - cache,
>  prefix,
>  prefix->buf + 
> prefix->len - len,
>  select_mask, clear_mask,
> @@ -1193,7 +1196,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
> int nr,
>   }
>  
>   strbuf_addch(prefix, '/');
> - cache += clear_ce_flags_1(cache, cache_end - cache,
> + cache += clear_ce_flags_1(istate, cache, cache_end - 
> cache,
> prefix,
> select_mask, clear_mask, el, 
> defval);
>   strbuf_setlen(prefix, prefix->len - len - 1);
> @@ -1203,7 +1206,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
> int nr,
>   /* Non-directory */
>   dtype = ce_to_dtype(ce);
>   ret = is_excluded_from_list(ce->name, ce_namelen(ce),
> - name, , el, _index);
> +   

Re: [PATCH 3/6] unpack-trees: don't shadow global var the_index

2018-06-05 Thread Ramsay Jones



On 05/06/18 16:43, Nguyễn Thái Ngọc Duy wrote:
> This function mark_new_skip_worktree() has an argument named the_index
> which is also the name of a global variable. While they have different
> types (the global the_index is not a pointer) mistakes can easily
> happen and it's also confusing for readers. Rename the function
> argument to something other than the_index.
> 
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---
>  unpack-trees.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 5d06aa9c98..60d1138e08 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -1231,7 +1231,7 @@ static int clear_ce_flags(struct cache_entry **cache, 
> int nr,
>   * Set/Clear CE_NEW_SKIP_WORKTREE according to $GIT_DIR/info/sparse-checkout
>   */
>  static void mark_new_skip_worktree(struct exclude_list *el,
> -struct index_state *the_index,
> +struct index_state *istate,
>  int select_flag, int skip_wt_flag)
>  {
>   int i;
> @@ -1240,8 +1240,8 @@ static void mark_new_skip_worktree(struct exclude_list 
> *el,
>* 1. Pretend the narrowest worktree: only unmerged entries
>* are checked out
>*/
> - for (i = 0; i < the_index->cache_nr; i++) {
> - struct cache_entry *ce = the_index->cache[i];
> + for (i = 0; i < istate->cache_nr; i++) {
> + struct cache_entry *ce = istate->cache[i];
>  
>   if (select_flag && !(ce->ce_flags & select_flag))
>   continue;
> @@ -1256,8 +1256,7 @@ static void mark_new_skip_worktree(struct exclude_list 
> *el,
>* 2. Widen worktree according to sparse-checkout file.
>* Matched entries will have skip_wt_flag cleared (i.e. "in")
>*/
> - clear_ce_flags(the_index->cache, the_index->cache_nr,
> -select_flag, skip_wt_flag, el);
> + clear_ce_flags(istate, select_flag, skip_wt_flag, el);

This looks a bit suspect. The clear_ce_flags() function has
not been modified to take a 'struct index_state *' as its first
parameter, right? (If you look back at the first hunk header, you
will see that it still takes 'struct cache_entry **, int, ...') ;-)

ATB,
Ramsay Jones



Where is git checkout --orphan implemented at.

2018-06-05 Thread Sean Hunt
I would like to see the source code to git checkout --orphan so I can learn how 
it works and so I can manually do what it does by hand on making a new branch 
with no history in the refs folder. I can only do it on my iPhone as my laptop 
has no internet or way to do it there, and the program on my iPhone does not 
have the method implemented yet visually to do it, forcing manual creation of 
the orphan branch by hand in the end. If the public github had all the codes to 
all commands and subcommands like the one above it would be nice or well at 
least a file that explains the source file each command and subcommands are 
from so that way a person like me can use as a reference to make our own git 
gui that has 100% of command line git features.

Sent from my iPhone

Re: [PATCH 6/6] Forbid "the_index" in dir.c and unpack-trees.c

2018-06-05 Thread Brandon Williams
On 06/05, Nguyễn Thái Ngọc Duy wrote:
> Use of global variables like the_index makes it very hard to follow
> the code flow, especially when it's buried deep in some minor helper
> function.
> 
> We are gradually avoiding global variables, this hopefully will make
> it one step closer. The end game is, the set of "library" source files
> will have just one macro set: "LIB_CODE" (or something). This macro
> will prevent access to most (if not all) global variables in those
> files. We can't do that now, so we just prevent one thing at a time.
> 
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---
>  Should I keep my trick of defining the_index to
>  the_index_should_not_be_used_here? It may help somewhat when people
>  accidentally use the_index.

We already have a set of index compatibility macros which this could
maybe be a part of.  Though if we want to go this way I think we should
do the reverse of this and instead have GLOBAL_INDEX which must be
defined in order to have access to the global.  That way new files are
already protected by this and it may be easier to reduce the number of
these defines through our codebase than to add them to every single
file.

> 
>  cache.h| 2 ++
>  dir.c  | 2 ++
>  unpack-trees.c | 2 ++
>  3 files changed, 6 insertions(+)
> 
> diff --git a/cache.h b/cache.h
> index 89a107a7f7..ecc96ccb0e 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -330,7 +330,9 @@ struct index_state {
>   struct ewah_bitmap *fsmonitor_dirty;
>  };
>  
> +#ifndef NO_GLOBAL_INDEX
>  extern struct index_state the_index;
> +#endif
>  
>  /* Name hashing */
>  extern int test_lazy_init_name_hash(struct index_state *istate, int 
> try_threaded);
> diff --git a/dir.c b/dir.c
> index ccf8b4975e..74d848db5a 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -8,6 +8,8 @@
>   *Junio Hamano, 2005-2006
>   */
>  #define NO_THE_INDEX_COMPATIBILITY_MACROS
> +/* Do not use the_index. You should have access to it via function arg */
> +#define NO_GLOBAL_INDEX
>  #include "cache.h"
>  #include "config.h"
>  #include "dir.h"
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 3ace82ca27..9aebe9762b 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -1,4 +1,6 @@
>  #define NO_THE_INDEX_COMPATIBILITY_MACROS
> +/* Do not use the_index here, you probably want o->src_index */
> +#define NO_GLOBAL_INDEX
>  #include "cache.h"
>  #include "argv-array.h"
>  #include "repository.h"
> -- 
> 2.18.0.rc0.333.g22e6ee6cdf
> 

-- 
Brandon Williams


Re: [PATCH v4 00/21] Integrate commit-graph into 'fsck' and 'gc'

2018-06-05 Thread Derrick Stolee

On 6/5/2018 10:51 AM, Ævar Arnfjörð Bjarmason wrote:

On Mon, Jun 4, 2018 at 6:52 PM, Derrick Stolee  wrote:

Thanks for the feedback on v3. There were several small cleanups, but
perhaps the biggest change is the addition of "commit-graph: use
string-list API for input" which makes "commit-graph: add '--reachable'
option" much simpler.

Do you have a version of this pushed somewhere? Your fsck/upstream
branch conflicts with e.g. long-since-merged nd/repack-keep-pack.


Sorry I forgot to push. It is now available on GitHub [1]. I based my 
commits on 'next' including the core.commitGraph fix for 
t5318-commit-graph.sh I already sent [2].


[1] https://github.com/derrickstolee/git/tree/fsck/upstream
    fsck/upstream branch matches this patch series

[2] 
https://public-inbox.org/git/20180604123906.136417-1-dsto...@microsoft.com/

    [PATCH] t5318-commit-graph.sh: use core.commitGraph


[PATCH v7 1/2] json_writer: new routines to create data in JSON format

2018-06-05 Thread git
From: Jeff Hostetler 

Add a series of jw_ routines and "struct json_writer" structure to compose
JSON data.  The resulting string data can then be output by commands wanting
to support a JSON output format.

The json-writer routines can be used to generate structured data in a
JSON-like format.  We say "JSON-like" because we do not enforce the Unicode
(usually UTF-8) requirement on string fields.  Internally, Git does not
necessarily have Unicode/UTF-8 data for most fields, so it is currently
unclear the best way to enforce that requirement.  For example, on Linx
pathnames can contain arbitrary 8-bit character data, so a command like
"status" would not know how to encode the reported pathnames.  We may want
to revisit this (or double encode such strings) in the future.

The initial use for the json-writer routines is for generating telemetry
data for executed Git commands.  Later, we may want to use them in other
commands, such as status.

Helped-by: René Scharfe 
Helped-by: Wink Saville 
Helped-by: Ramsay Jones 
Signed-off-by: Jeff Hostetler 
---
 Makefile|   2 +
 json-writer.c   | 419 
 json-writer.h   | 113 +
 t/helper/test-json-writer.c | 572 
 t/t0019-json-writer.sh  | 236 ++
 5 files changed, 1342 insertions(+)
 create mode 100644 json-writer.c
 create mode 100644 json-writer.h
 create mode 100644 t/helper/test-json-writer.c
 create mode 100755 t/t0019-json-writer.sh

diff --git a/Makefile b/Makefile
index a1d8775..4ae6946 100644
--- a/Makefile
+++ b/Makefile
@@ -666,6 +666,7 @@ TEST_PROGRAMS_NEED_X += test-fake-ssh
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-hashmap
 TEST_PROGRAMS_NEED_X += test-index-version
+TEST_PROGRAMS_NEED_X += test-json-writer
 TEST_PROGRAMS_NEED_X += test-lazy-init-name-hash
 TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
@@ -820,6 +821,7 @@ LIB_OBJS += hashmap.o
 LIB_OBJS += help.o
 LIB_OBJS += hex.o
 LIB_OBJS += ident.o
+LIB_OBJS += json-writer.o
 LIB_OBJS += kwset.o
 LIB_OBJS += levenshtein.o
 LIB_OBJS += line-log.o
diff --git a/json-writer.c b/json-writer.c
new file mode 100644
index 000..f35ce19
--- /dev/null
+++ b/json-writer.c
@@ -0,0 +1,419 @@
+#include "cache.h"
+#include "json-writer.h"
+
+void jw_init(struct json_writer *jw)
+{
+   strbuf_reset(>json);
+   strbuf_reset(>open_stack);
+   strbuf_reset(>first_stack);
+   jw->pretty = 0;
+}
+
+void jw_release(struct json_writer *jw)
+{
+   strbuf_release(>json);
+   strbuf_release(>open_stack);
+   strbuf_release(>first_stack);
+}
+
+/*
+ * Append JSON-quoted version of the given string to 'out'.
+ */
+static void append_quoted_string(struct strbuf *out, const char *in)
+{
+   unsigned char c;
+
+   strbuf_addch(out, '"');
+   while ((c = *in++) != '\0') {
+   if (c == '"')
+   strbuf_addstr(out, "\\\"");
+   else if (c == '\\')
+   strbuf_addstr(out, "");
+   else if (c == '\n')
+   strbuf_addstr(out, "\\n");
+   else if (c == '\r')
+   strbuf_addstr(out, "\\r");
+   else if (c == '\t')
+   strbuf_addstr(out, "\\t");
+   else if (c == '\f')
+   strbuf_addstr(out, "\\f");
+   else if (c == '\b')
+   strbuf_addstr(out, "\\b");
+   else if (c < 0x20)
+   strbuf_addf(out, "\\u%04x", c);
+   else
+   strbuf_addch(out, c);
+   }
+   strbuf_addch(out, '"');
+}
+
+static inline void indent_pretty(struct json_writer *jw)
+{
+   int k;
+
+   if (!jw->pretty)
+   return;
+
+   for (k = 0; k < jw->open_stack.len; k++)
+   strbuf_addstr(>json, "  ");
+}
+
+/*
+ * Begin an object or array (either top-level or nested within the currently
+ * open object or array).
+ */
+static inline void begin(struct json_writer *jw, char ch_open, int pretty)
+{
+   jw->pretty = pretty;
+
+   strbuf_addch(>json, ch_open);
+
+   strbuf_addch(>open_stack, ch_open);
+   strbuf_addch(>first_stack, '1');
+}
+
+/*
+ * Assert that the top of the open-stack is an object.
+ */
+static inline void assert_in_object(const struct json_writer *jw, const char 
*key)
+{
+   if (!jw->open_stack.len)
+   BUG("json-writer: object: missing jw_object_begin(): '%s'", 
key);
+   if (jw->open_stack.buf[jw->open_stack.len - 1] != '{')
+   BUG("json-writer: object: not in object: '%s'", key);
+}
+
+/*
+ * Assert that the top of the open-stack is an array.
+ */
+static inline void assert_in_array(const struct json_writer *jw)
+{
+   if (!jw->open_stack.len)
+   BUG("json-writer: array: missing jw_array_begin()");
+   if 

[PATCH v7 2/2] json-writer: t0019: add Python unit test

2018-06-05 Thread git
From: Jeff Hostetler 

Test json-writer output using Python.

Signed-off-by: Jeff Hostetler 
---
 t/t0019-json-writer.sh  | 38 ++
 t/t0019/parse_json_1.py | 35 +++
 2 files changed, 73 insertions(+)
 create mode 100644 t/t0019/parse_json_1.py

diff --git a/t/t0019-json-writer.sh b/t/t0019-json-writer.sh
index c9c2e23..951cd89 100755
--- a/t/t0019-json-writer.sh
+++ b/t/t0019-json-writer.sh
@@ -233,4 +233,42 @@ test_expect_success 'inline array with no members' '
test_cmp expect actual
 '
 
+# As a sanity check, ask Python to parse our generated JSON.  Let Python
+# recursively dump the resulting dictionary in sorted order.  Confirm that
+# that matches our expectations.
+test_expect_success PYTHON 'parse JSON using Python' '
+   cat >expect <<-\EOF &&
+   a abc
+   b 42
+   sub1 dict
+   sub1.c 3.14
+   sub1.d True
+   sub1.sub2 list
+   sub1.sub2[0] False
+   sub1.sub2[1] dict
+   sub1.sub2[1].g 0
+   sub1.sub2[1].h 1
+   sub1.sub2[2] None
+   EOF
+   test-json-writer >output.json \
+   @object \
+   @object-string a abc \
+   @object-int b 42 \
+   @object-object "sub1" \
+   @object-double c 2 3.140 \
+   @object-true d \
+   @object-array "sub2" \
+   @array-false \
+   @array-object \
+   @object-int g 0 \
+   @object-int h 1 \
+   @end \
+   @array-null \
+   @end \
+   @end \
+   @end &&
+   python "$TEST_DIRECTORY"/t0019/parse_json_1.py actual &&
+   test_cmp expect actual
+'
+
 test_done
diff --git a/t/t0019/parse_json_1.py b/t/t0019/parse_json_1.py
new file mode 100644
index 000..9d928a3
--- /dev/null
+++ b/t/t0019/parse_json_1.py
@@ -0,0 +1,35 @@
+import os
+import sys
+import json
+
+def dump_item(label_input, v):
+if type(v) is dict:
+print("%s dict" % (label_input))
+dump_dict(label_input, v)
+elif type(v) is list:
+print("%s list" % (label_input))
+dump_list(label_input, v)
+else:
+print("%s %s" % (label_input, v))
+
+def dump_list(label_input, list_input):
+ix = 0
+for v in list_input:
+label = ("%s[%d]" % (label_input, ix))
+dump_item(label, v)
+ix += 1
+return
+  
+def dump_dict(label_input, dict_input):
+for k in sorted(dict_input.iterkeys()):
+v = dict_input[k]
+if (len(label_input) > 0):
+label = ("%s.%s" % (label_input, k))
+else:
+label = k
+dump_item(label, v)
+return
+
+for line in sys.stdin:
+data = json.loads(line)
+dump_dict("", data)
-- 
2.9.3



[PATCH v7 0/2] json-writer V7

2018-06-05 Thread git
From: Jeff Hostetler 

Here is V7 of my json-writer patches.  Please replace the existing V5/V6
version of jh/json-writer branch with this one.

This version cleans up the die()-vs-BUG() issue that Duy mentioned recently.
It also fixes a formatting bug when composing empty sub-objects/-arrays.

It also includes a new Python-based test to consume the generated JSON.

I plan to use the json-writer routines in a followup telemetry patch series.

Jeff Hostetler (2):
  json_writer: new routines to create data in JSON format
  json-writer: t0019: add Python unit test

 Makefile|   2 +
 json-writer.c   | 419 
 json-writer.h   | 113 +
 t/helper/test-json-writer.c | 572 
 t/t0019-json-writer.sh  | 274 +
 t/t0019/parse_json_1.py |  35 +++
 6 files changed, 1415 insertions(+)
 create mode 100644 json-writer.c
 create mode 100644 json-writer.h
 create mode 100644 t/helper/test-json-writer.c
 create mode 100755 t/t0019-json-writer.sh
 create mode 100644 t/t0019/parse_json_1.py

-- 
2.9.3



Re: [PATCH] refspec: initalize `refspec_item` in `valid_fetch_refspec()`

2018-06-05 Thread Brandon Williams
On 06/04, Ævar Arnfjörð Bjarmason wrote:
> 
> On Mon, Jun 04 2018, Martin Ågren wrote:
> 
> > We allocate a `struct refspec_item` on the stack without initializing
> > it. In particular, its `dst` and `src` members will contain some random
> > data from the stack. When we later call `refspec_item_clear()`, it will
> > call `free()` on those pointers. So if the call to `parse_refspec()` did
> > not assign to them, we will be freeing some random "pointers". This is
> > undefined behavior.
> >
> > To the best of my understanding, this cannot currently be triggered by
> > user-provided data. And for what it's worth, the test-suite does not
> > trigger this with SANITIZE=address. It can be provoked by calling
> > `valid_fetch_refspec(":*")`.
> >
> > Zero the struct, as is done in other users of `struct refspec_item`.
> >
> > Signed-off-by: Martin Ågren 
> > ---
> > I found some time to look into this. It does not seem to be a
> > user-visible bug, so not particularly critical.
> >
> >  refspec.c | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/refspec.c b/refspec.c
> > index ada7854f7a..7dd7e361e5 100644
> > --- a/refspec.c
> > +++ b/refspec.c
> > @@ -189,7 +189,10 @@ void refspec_clear(struct refspec *rs)
> >  int valid_fetch_refspec(const char *fetch_refspec_str)
> >  {
> > struct refspec_item refspec;
> > -   int ret = parse_refspec(, fetch_refspec_str, REFSPEC_FETCH);
> > +   int ret;
> > +
> > +   memset(, 0, sizeof(refspec));
> > +   ret = parse_refspec(, fetch_refspec_str, REFSPEC_FETCH);
> > refspec_item_clear();
> > return ret;
> >  }
> 
> I think this makes more sense instead of this fix:

I like this diff.  The only nit I have is the same as what Martin
pointed out.  At least this way all memory will be initialized by the
time a call to parse_refspec is made.

> 
> diff --git a/builtin/clone.c b/builtin/clone.c
> index 99e73dae85..74a804f2e8 100644
> --- a/builtin/clone.c
> +++ b/builtin/clone.c
> @@ -1077,7 +1077,7 @@ int cmd_clone(int argc, const char **argv, const char 
> *prefix)
>   if (option_required_reference.nr || option_optional_reference.nr)
>   setup_reference();
> 
> - refspec_item_init(, value.buf, REFSPEC_FETCH);
> + refspec_item_init_or_die(, value.buf, REFSPEC_FETCH);
> 
>   strbuf_reset();
> 
> diff --git a/builtin/pull.c b/builtin/pull.c
> index 1f2ecf3a88..bb64631d98 100644
> --- a/builtin/pull.c
> +++ b/builtin/pull.c
> @@ -684,7 +684,7 @@ static const char *get_tracking_branch(const char 
> *remote, const char *refspec)
>   const char *spec_src;
>   const char *merge_branch;
> 
> - refspec_item_init(, refspec, REFSPEC_FETCH);
> + refspec_item_init_or_die(, refspec, REFSPEC_FETCH);
>   spec_src = spec.src;
>   if (!*spec_src || !strcmp(spec_src, "HEAD"))
>   spec_src = "HEAD";
> diff --git a/refspec.c b/refspec.c
> index 78edc48ae8..8806df0fd2 100644
> --- a/refspec.c
> +++ b/refspec.c
> @@ -124,11 +124,16 @@ static int parse_refspec(struct refspec_item *item, 
> const char *refspec, int fet
>   return 1;
>  }
> 
> -void refspec_item_init(struct refspec_item *item, const char *refspec, int 
> fetch)
> +int refspec_item_init(struct refspec_item *item, const char *refspec, int 
> fetch)
>  {
>   memset(item, 0, sizeof(*item));
> + int ret = parse_refspec(item, refspec, fetch);
> + return ret;
> +}
> 
> - if (!parse_refspec(item, refspec, fetch))
> +void refspec_item_init_or_die(struct refspec_item *item, const char 
> *refspec, int fetch)
> +{
> + if (!refspec_item_init(item, refspec, fetch))
>   die("Invalid refspec '%s'", refspec);
>  }
> 
> @@ -152,7 +157,7 @@ void refspec_append(struct refspec *rs, const char 
> *refspec)
>  {
>   struct refspec_item item;
> 
> - refspec_item_init(, refspec, rs->fetch);
> + refspec_item_init_or_die(, refspec, rs->fetch);
> 
>   ALLOC_GROW(rs->items, rs->nr + 1, rs->alloc);
>   rs->items[rs->nr++] = item;
> @@ -191,7 +196,7 @@ void refspec_clear(struct refspec *rs)
>  int valid_fetch_refspec(const char *fetch_refspec_str)
>  {
>   struct refspec_item refspec;
> - int ret = parse_refspec(, fetch_refspec_str, REFSPEC_FETCH);
> + int ret = refspec_item_init(, fetch_refspec_str, REFSPEC_FETCH);
>   refspec_item_clear();
>   return ret;
>  }
> diff --git a/refspec.h b/refspec.h
> index 3a9363887c..ed5d997f7f 100644
> --- a/refspec.h
> +++ b/refspec.h
> @@ -32,7 +32,8 @@ struct refspec {
>   int fetch;
>  };
> 
> -void refspec_item_init(struct refspec_item *item, const char *refspec, int 
> fetch);
> +int refspec_item_init(struct refspec_item *item, const char *refspec, int 
> fetch);
> +void refspec_item_init_or_die(struct refspec_item *item, const char 
> *refspec, int fetch);
>  void refspec_item_clear(struct refspec_item *item);
>  void refspec_init(struct refspec *rs, int fetch);
>  void refspec_append(struct refspec *rs, const char *refspec);
> 
> I.e. let's 

Re: [RFC PATCH 1/2] docs: reflect supported fetch options of git pull

2018-06-05 Thread Duy Nguyen
On Mon, Jun 4, 2018 at 11:50 PM, Rafael Ascensão  wrote:
> `git pull` understands some options of `git fetch` which then uses in
> its operation. The documentation of `git pull` doesn't reflect this
> clearly, showing options that are not yet supported (e.g. `--deepen`)
> and omitting options that are supported (e.g. `--prune`).
>
> Make the documentation consistent with present behaviour by hiding
> unavailable options only.

A better option may be making git-pull accept those options as well. I
see no reason git-pull should support options that git-fetch does (at
least most of them). But I would understand if you would not want to
go touch the code. It's basically a couple of OPT_PASSTRHU though so
not very hard to do.

PS. Anybody up to making parse-options accept multiple struct option
arrays? This way we can have much better option passthru without
specifying them again and again.

>
> Reported-by: Marius Giurgi 
> Signed-off-by: Rafael Ascensão 
> ---
>
> Marius asked on freenode.#git if pull supported `--prune`, upon
> inspection seems like the man page was missing some of the supported
> options and listing others that are not supported via pull.
>
> Here's a quick summary of the changes to pull's documentation:
>
> add:  remove:
>   --dry-run --deepen=
>   -p, --prune   --shallow-since=
>   --refmap=--shallow-exclude=
>   -t, --tags-u, --update-head-ok
>   -j, --jobs=
>
>  Documentation/fetch-options.txt | 20 +---
>  1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index 8631e365f..da17d27c1 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -14,6 +14,7 @@
> linkgit:git-clone[1]), deepen or shorten the history to the specified
> number of commits. Tags for the deepened commits are not fetched.
>
> +ifndef::git-pull[]
>  --deepen=::
> Similar to --depth, except it specifies the number of commits
> from the current shallow boundary instead of from the tip of
> @@ -27,6 +28,7 @@
> Deepen or shorten the history of a shallow repository to
> exclude commits reachable from a specified remote branch or tag.
> This option can be specified multiple times.
> +endif::git-pull[]
>
>  --unshallow::
> If the source repository is complete, convert a shallow
> @@ -42,10 +44,8 @@ the current repository has the same history as the source 
> repository.
> .git/shallow. This option updates .git/shallow and accept such
> refs.
>
> -ifndef::git-pull[]
>  --dry-run::
> Show what would be done, without making any changes.
> -endif::git-pull[]
>
>  -f::
>  --force::
> @@ -63,6 +63,7 @@ ifndef::git-pull[]
>  --multiple::
> Allow several  and  arguments to be
> specified. No s may be specified.
> +endif::git-pull[]
>
>  -p::
>  --prune::
> @@ -76,8 +77,14 @@ ifndef::git-pull[]
> subject to pruning. Supplying `--prune-tags` is a shorthand for
> providing the tag refspec.
>  +
> +ifdef::git-pull[]
> +See the PRUNING section on linkgit:git-fetch[1] for more details.
> +endif::git-pull[]
> +ifndef::git-pull[]
>  See the PRUNING section below for more details.
> +endif::git-pull[]
>
> +ifndef::git-pull[]
>  -P::
>  --prune-tags::
> Before fetching, remove any local tags that no longer exist on
> @@ -89,9 +96,6 @@ See the PRUNING section below for more details.
>  +
>  See the PRUNING section below for more details.
>
> -endif::git-pull[]
> -
> -ifndef::git-pull[]
>  -n::
>  endif::git-pull[]
>  --no-tags::
> @@ -101,7 +105,6 @@ endif::git-pull[]
> behavior for a remote may be specified with the remote..tagOpt
> setting. See linkgit:git-config[1].
>
> -ifndef::git-pull[]
>  --refmap=::
> When fetching refs listed on the command line, use the
> specified refspec (can be given more than once) to map the
> @@ -119,6 +122,7 @@ ifndef::git-pull[]
> is used (though tags may be pruned anyway if they are also the
> destination of an explicit refspec; see `--prune`).
>
> +ifndef::git-pull[]
>  --recurse-submodules[=yes|on-demand|no]::
> This option controls if and under what conditions new commits of
> populated submodules should be fetched too. It can be used as a
> @@ -129,6 +133,7 @@ ifndef::git-pull[]
> when the superproject retrieves a commit that updates the submodule's
> reference to a commit that isn't already in the local submodule
> clone.
> +endif::git-pull[]
>
>  -j::
>  --jobs=::
> @@ -137,6 +142,7 @@ ifndef::git-pull[]
> submodules will be faster. By default submodules will be fetched
> one at a time.
>
> +ifndef::git-pull[]
>  --no-recurse-submodules::
> Disable recursive fetching of submodules (this has the same effect as
> using the `--recurse-submodules=no` option).
> 

Re: [PATCH v7 1/8] checkout tests: index should be clean after dwim checkout

2018-06-05 Thread SZEDER Gábor
> diff --git a/t/t2024-checkout-dwim.sh b/t/t2024-checkout-dwim.sh
> index 3e5ac81bd2..ed32828105 100755
> --- a/t/t2024-checkout-dwim.sh
> +++ b/t/t2024-checkout-dwim.sh
> @@ -23,6 +23,12 @@ test_branch_upstream () {
>   test_cmp expect.upstream actual.upstream
>  }
>  
> +status_uno_is_clean() {
> + >status.expect &&
> + git status -uno --porcelain >status.actual &&
> + test_cmp status.expect status.actual

This function could be written a tad simpler as:

  git status -uno --porcelain >status.actual &&
  test_must_be_empty status.actual



[PATCH 3/6] unpack-trees: don't shadow global var the_index

2018-06-05 Thread Nguyễn Thái Ngọc Duy
This function mark_new_skip_worktree() has an argument named the_index
which is also the name of a global variable. While they have different
types (the global the_index is not a pointer) mistakes can easily
happen and it's also confusing for readers. Rename the function
argument to something other than the_index.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 5d06aa9c98..60d1138e08 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1231,7 +1231,7 @@ static int clear_ce_flags(struct cache_entry **cache, int 
nr,
  * Set/Clear CE_NEW_SKIP_WORKTREE according to $GIT_DIR/info/sparse-checkout
  */
 static void mark_new_skip_worktree(struct exclude_list *el,
-  struct index_state *the_index,
+  struct index_state *istate,
   int select_flag, int skip_wt_flag)
 {
int i;
@@ -1240,8 +1240,8 @@ static void mark_new_skip_worktree(struct exclude_list 
*el,
 * 1. Pretend the narrowest worktree: only unmerged entries
 * are checked out
 */
-   for (i = 0; i < the_index->cache_nr; i++) {
-   struct cache_entry *ce = the_index->cache[i];
+   for (i = 0; i < istate->cache_nr; i++) {
+   struct cache_entry *ce = istate->cache[i];
 
if (select_flag && !(ce->ce_flags & select_flag))
continue;
@@ -1256,8 +1256,7 @@ static void mark_new_skip_worktree(struct exclude_list 
*el,
 * 2. Widen worktree according to sparse-checkout file.
 * Matched entries will have skip_wt_flag cleared (i.e. "in")
 */
-   clear_ce_flags(the_index->cache, the_index->cache_nr,
-  select_flag, skip_wt_flag, el);
+   clear_ce_flags(istate, select_flag, skip_wt_flag, el);
 }
 
 static int verify_absent(const struct cache_entry *,
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH 0/6] Fix "the_index" usage in unpack-trees.c

2018-06-05 Thread Nguyễn Thái Ngọc Duy
This is a potential problem that is exposed after Brandon kicked
the_index out of dir.c (big thanks!) and could be seen as a
continuation of bw/dir-c-stops-relying-on-the-index. Also thanks to
Elijah for helping analyze this code.

The last patch may be debatable. If we go this route, we may end up
with plenty of "#define NO_THIS" and "#define NO_THAT" until we
finally achieve "#define THIS_IS_TRUE_LIB_CODE" many years later.

Nguyễn Thái Ngọc Duy (6):
  unpack-trees: remove 'extern' on function declaration
  unpack-trees: add a note about path invalidation
  unpack-trees: don't shadow global var the_index
  unpack-tress: convert clear_ce_flags* to avoid the_index
  unpack-trees: avoid the_index in verify_absent()
  Forbid "the_index" in dir.c and unpack-trees.c

 cache.h|  2 ++
 dir.c  |  2 ++
 unpack-trees.c | 55 +-
 unpack-trees.h |  4 ++--
 4 files changed, 42 insertions(+), 21 deletions(-)

-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH 1/6] unpack-trees: remove 'extern' on function declaration

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unpack-trees.h b/unpack-trees.h
index c2b434c606..534358fcc5 100644
--- a/unpack-trees.h
+++ b/unpack-trees.h
@@ -82,8 +82,8 @@ struct unpack_trees_options {
struct exclude_list *el; /* for internal use */
 };
 
-extern int unpack_trees(unsigned n, struct tree_desc *t,
-   struct unpack_trees_options *options);
+int unpack_trees(unsigned n, struct tree_desc *t,
+struct unpack_trees_options *options);
 
 int verify_uptodate(const struct cache_entry *ce,
struct unpack_trees_options *o);
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH 4/6] unpack-tress: convert clear_ce_flags* to avoid the_index

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Prior to fba92be8f7, this code implicitly (and incorrectly) assumes
the_index when running the exclude machinery. fba92be8f7 helps show
this problem clearer because unpack-trees operation is supposed to
work on whatever index the caller specifies... not specifically
the_index.

Update the code to use "istate" argument that's originally from
mark_new_skip_worktree(). From the call sites, both in unpack_trees(),
you can see that this function works on two separate indexes:
o->src_index and o->result. The second mark_new_skip_worktree() so far
has incorecctly applied exclude rules on o->src_index instead of
o->result. It's unclear what is the consequences of this, but it's
definitely wrong.

[1] fba92be8f7 (dir: convert is_excluded_from_list to take an index -
2017-05-05)

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 60d1138e08..5268de7af5 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1085,13 +1085,15 @@ static int unpack_callback(int n, unsigned long mask, 
unsigned long dirmask, str
return mask;
 }
 
-static int clear_ce_flags_1(struct cache_entry **cache, int nr,
+static int clear_ce_flags_1(struct index_state *istate,
+   struct cache_entry **cache, int nr,
struct strbuf *prefix,
int select_mask, int clear_mask,
struct exclude_list *el, int defval);
 
 /* Whole directory matching */
-static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
+static int clear_ce_flags_dir(struct index_state *istate,
+ struct cache_entry **cache, int nr,
  struct strbuf *prefix,
  char *basename,
  int select_mask, int clear_mask,
@@ -1100,7 +1102,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, 
int nr,
struct cache_entry **cache_end;
int dtype = DT_DIR;
int ret = is_excluded_from_list(prefix->buf, prefix->len,
-   basename, , el, _index);
+   basename, , el, istate);
int rc;
 
strbuf_addch(prefix, '/');
@@ -1122,7 +1124,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, 
int nr,
 * calling clear_ce_flags_1(). That function will call
 * the expensive is_excluded_from_list() on every entry.
 */
-   rc = clear_ce_flags_1(cache, cache_end - cache,
+   rc = clear_ce_flags_1(istate, cache, cache_end - cache,
  prefix,
  select_mask, clear_mask,
  el, ret);
@@ -1145,7 +1147,8 @@ static int clear_ce_flags_dir(struct cache_entry **cache, 
int nr,
  *   cache[0]->name[0..(prefix_len-1)]
  * Top level path has prefix_len zero.
  */
-static int clear_ce_flags_1(struct cache_entry **cache, int nr,
+static int clear_ce_flags_1(struct index_state *istate,
+   struct cache_entry **cache, int nr,
struct strbuf *prefix,
int select_mask, int clear_mask,
struct exclude_list *el, int defval)
@@ -1179,7 +1182,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
int nr,
len = slash - name;
strbuf_add(prefix, name, len);
 
-   processed = clear_ce_flags_dir(cache, cache_end - cache,
+   processed = clear_ce_flags_dir(istate, cache, cache_end 
- cache,
   prefix,
   prefix->buf + 
prefix->len - len,
   select_mask, clear_mask,
@@ -1193,7 +1196,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
int nr,
}
 
strbuf_addch(prefix, '/');
-   cache += clear_ce_flags_1(cache, cache_end - cache,
+   cache += clear_ce_flags_1(istate, cache, cache_end - 
cache,
  prefix,
  select_mask, clear_mask, el, 
defval);
strbuf_setlen(prefix, prefix->len - len - 1);
@@ -1203,7 +1206,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, 
int nr,
/* Non-directory */
dtype = ce_to_dtype(ce);
ret = is_excluded_from_list(ce->name, ce_namelen(ce),
-   name, , el, _index);
+   name, , el, istate);
if (ret < 0)
ret = defval;
if (ret > 0)
@@ -1213,15 +1216,17 @@ static int 

[PATCH 6/6] Forbid "the_index" in dir.c and unpack-trees.c

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Use of global variables like the_index makes it very hard to follow
the code flow, especially when it's buried deep in some minor helper
function.

We are gradually avoiding global variables, this hopefully will make
it one step closer. The end game is, the set of "library" source files
will have just one macro set: "LIB_CODE" (or something). This macro
will prevent access to most (if not all) global variables in those
files. We can't do that now, so we just prevent one thing at a time.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Should I keep my trick of defining the_index to
 the_index_should_not_be_used_here? It may help somewhat when people
 accidentally use the_index.

 cache.h| 2 ++
 dir.c  | 2 ++
 unpack-trees.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/cache.h b/cache.h
index 89a107a7f7..ecc96ccb0e 100644
--- a/cache.h
+++ b/cache.h
@@ -330,7 +330,9 @@ struct index_state {
struct ewah_bitmap *fsmonitor_dirty;
 };
 
+#ifndef NO_GLOBAL_INDEX
 extern struct index_state the_index;
+#endif
 
 /* Name hashing */
 extern int test_lazy_init_name_hash(struct index_state *istate, int 
try_threaded);
diff --git a/dir.c b/dir.c
index ccf8b4975e..74d848db5a 100644
--- a/dir.c
+++ b/dir.c
@@ -8,6 +8,8 @@
  *  Junio Hamano, 2005-2006
  */
 #define NO_THE_INDEX_COMPATIBILITY_MACROS
+/* Do not use the_index. You should have access to it via function arg */
+#define NO_GLOBAL_INDEX
 #include "cache.h"
 #include "config.h"
 #include "dir.h"
diff --git a/unpack-trees.c b/unpack-trees.c
index 3ace82ca27..9aebe9762b 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1,4 +1,6 @@
 #define NO_THE_INDEX_COMPATIBILITY_MACROS
+/* Do not use the_index here, you probably want o->src_index */
+#define NO_GLOBAL_INDEX
 #include "cache.h"
 #include "argv-array.h"
 #include "repository.h"
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH 2/6] unpack-trees: add a note about path invalidation

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/unpack-trees.c b/unpack-trees.c
index 3a85a02a77..5d06aa9c98 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1545,6 +1545,17 @@ static int verify_uptodate_sparse(const struct 
cache_entry *ce,
return verify_uptodate_1(ce, o, ERROR_SPARSE_NOT_UPTODATE_FILE);
 }
 
+/*
+ * TODO: We should actually invalidate o->result, not src_index [1].
+ * But since cache tree and untracked cache both are not copied to
+ * o->result until unpacking is complete, we invalidate them on
+ * src_index instead with the assumption that they will be copied to
+ * dst_index at the end.
+ *
+ * [1] src_index->cache_tree is also used in unpack_callback() so if
+ * we invalidate o->result, we need to update it to use
+ * o->result.cache_tree as well.
+ */
 static void invalidate_ce_path(const struct cache_entry *ce,
   struct unpack_trees_options *o)
 {
-- 
2.18.0.rc0.333.g22e6ee6cdf



[PATCH 5/6] unpack-trees: avoid the_index in verify_absent()

2018-06-05 Thread Nguyễn Thái Ngọc Duy
Both functions that are updated in this commit are called by
verify_absent(), which is part of the "unpack-trees" operation that is
supposed to work on any index file specified by the caller. Thanks to
Brandon [1] [2], an implicit dependency on the_index is exposed. This
commit fixes it.

In both functions, it makes sense to use src_index to check for
exclusion because it's almost unchanged and should give us the same
outcome as if running the exclude check before the unpack.

It's "almost unchanged" because we do invalidate cache-tree and
untracked cache in the source index. But this should not affect how
exclude machinery uses the index: to see if a file is tracked, and to
read a blob from the index instead of worktree if it's marked
skip-worktree (i.e. it's not available in worktree)

[1] a0bba65b10 (dir: convert is_excluded to take an index - 2017-05-05
[2] 2c1eb10454 (dir: convert read_directory to take an index - 2017-05-05)

Helped-by: Elijah Newren 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 unpack-trees.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 5268de7af5..3ace82ca27 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1651,7 +1651,7 @@ static int verify_clean_subdirectory(const struct 
cache_entry *ce,
memset(, 0, sizeof(d));
if (o->dir)
d.exclude_per_dir = o->dir->exclude_per_dir;
-   i = read_directory(, _index, pathbuf, namelen+1, NULL);
+   i = read_directory(, o->src_index, pathbuf, namelen+1, NULL);
if (i)
return o->gently ? -1 :
add_rejected_path(o, ERROR_NOT_UPTODATE_DIR, ce->name);
@@ -1693,7 +1693,7 @@ static int check_ok_to_remove(const char *name, int len, 
int dtype,
return 0;
 
if (o->dir &&
-   is_excluded(o->dir, _index, name, ))
+   is_excluded(o->dir, o->src_index, name, ))
/*
 * ce->name is explicitly excluded, so it is Ok to
 * overwrite it.
-- 
2.18.0.rc0.333.g22e6ee6cdf



BUG: submodule code prints '(null)'

2018-06-05 Thread Duy Nguyen
I do not know how to reproduce this (and didn't bother to look deeply
into it after I found it was not a trivial fix) but one of my "git
fetch" showed

warning: Submodule in commit be2db96a6c506464525f588da59cade0cedddb5e
at path: '(null)' collides with a submodule named the same. Skipping
it.

I think it's reported that some libc implementation will not be able
to gracefully handle NULL strings like glibc and may crash instead of
printing '(null)' here. I'll leave it to submodule people to fix this
:)
-- 
Duy


Re: [PATCH v4 00/21] Integrate commit-graph into 'fsck' and 'gc'

2018-06-05 Thread Ævar Arnfjörð Bjarmason
On Mon, Jun 4, 2018 at 6:52 PM, Derrick Stolee  wrote:
> Thanks for the feedback on v3. There were several small cleanups, but
> perhaps the biggest change is the addition of "commit-graph: use
> string-list API for input" which makes "commit-graph: add '--reachable'
> option" much simpler.

Do you have a version of this pushed somewhere? Your fsck/upstream
branch conflicts with e.g. long-since-merged nd/repack-keep-pack.


[PATCH v7 1/8] checkout tests: index should be clean after dwim checkout

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Assert that whenever there's a DWIM checkout that the index should be
clean afterwards, in addition to the correct branch being checked-out.

The way the DWIM checkout code in checkout.[ch] works is by looping
over all remotes, and for each remote trying to find if a given
reference name only exists on that remote, or if it exists anywhere
else.

This is done by starting out with a `unique = 1` tracking variable in
a struct shared by the entire loop, which will get set to `0` if the
data reference is not unique.

Thus if we find a match we know the dst_oid member of
tracking_name_data must be correct, since it's associated with the
only reference on the only remote that could have matched our query.

But if there was ever a mismatch there for some reason we might end up
with the correct branch checked out, but at the wrong oid, which would
show whatever the difference between the two staged in the
index (checkout branch A, stage changes from the state of branch B).

So let's amend the tests (mostly added in) 399e4a1c56 ("t2024: Add
tests verifying current DWIM behavior of 'git checkout '",
2013-04-21) to always assert that "status" is clean after we run
"checkout", that's being done with "-uno" because there's going to be
some untracked files related to the test itself which we don't care
about.

In all these tests (DWIM or otherwise) we start with a clean index, so
these tests are asserting that that's still the case after the
"checkout", failed or otherwise.

Then if we ever run into this sort of regression, either in the
existing code or with a new feature, we'll know.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 t/t2024-checkout-dwim.sh | 29 +
 1 file changed, 29 insertions(+)

diff --git a/t/t2024-checkout-dwim.sh b/t/t2024-checkout-dwim.sh
index 3e5ac81bd2..ed32828105 100755
--- a/t/t2024-checkout-dwim.sh
+++ b/t/t2024-checkout-dwim.sh
@@ -23,6 +23,12 @@ test_branch_upstream () {
test_cmp expect.upstream actual.upstream
 }
 
+status_uno_is_clean() {
+   >status.expect &&
+   git status -uno --porcelain >status.actual &&
+   test_cmp status.expect status.actual
+}
+
 test_expect_success 'setup' '
test_commit my_master &&
git init repo_a &&
@@ -55,6 +61,7 @@ test_expect_success 'checkout of non-existing branch fails' '
test_might_fail git branch -D xyzzy &&
 
test_must_fail git checkout xyzzy &&
+   status_uno_is_clean &&
test_must_fail git rev-parse --verify refs/heads/xyzzy &&
test_branch master
 '
@@ -64,6 +71,7 @@ test_expect_success 'checkout of branch from multiple remotes 
fails #1' '
test_might_fail git branch -D foo &&
 
test_must_fail git checkout foo &&
+   status_uno_is_clean &&
test_must_fail git rev-parse --verify refs/heads/foo &&
test_branch master
 '
@@ -73,6 +81,7 @@ test_expect_success 'checkout of branch from a single remote 
succeeds #1' '
test_might_fail git branch -D bar &&
 
git checkout bar &&
+   status_uno_is_clean &&
test_branch bar &&
test_cmp_rev remotes/repo_a/bar HEAD &&
test_branch_upstream bar repo_a bar
@@ -83,6 +92,7 @@ test_expect_success 'checkout of branch from a single remote 
succeeds #2' '
test_might_fail git branch -D baz &&
 
git checkout baz &&
+   status_uno_is_clean &&
test_branch baz &&
test_cmp_rev remotes/other_b/baz HEAD &&
test_branch_upstream baz repo_b baz
@@ -90,6 +100,7 @@ test_expect_success 'checkout of branch from a single remote 
succeeds #2' '
 
 test_expect_success '--no-guess suppresses branch auto-vivification' '
git checkout -B master &&
+   status_uno_is_clean &&
test_might_fail git branch -D bar &&
 
test_must_fail git checkout --no-guess bar &&
@@ -99,6 +110,7 @@ test_expect_success '--no-guess suppresses branch 
auto-vivification' '
 
 test_expect_success 'setup more remotes with unconventional refspecs' '
git checkout -B master &&
+   status_uno_is_clean &&
git init repo_c &&
(
cd repo_c &&
@@ -128,27 +140,33 @@ test_expect_success 'setup more remotes with 
unconventional refspecs' '
 
 test_expect_success 'checkout of branch from multiple remotes fails #2' '
git checkout -B master &&
+   status_uno_is_clean &&
test_might_fail git branch -D bar &&
 
test_must_fail git checkout bar &&
+   status_uno_is_clean &&
test_must_fail git rev-parse --verify refs/heads/bar &&
test_branch master
 '
 
 test_expect_success 'checkout of branch from multiple remotes fails #3' '
git checkout -B master &&
+   status_uno_is_clean &&
test_might_fail git branch -D baz &&
 
test_must_fail git checkout baz &&
+   status_uno_is_clean &&
test_must_fail git rev-parse --verify refs/heads/baz &&
test_branch master
 '
 
 test_expect_success 'checkout of branch from a single 

[PATCH v7 3/8] checkout.c: introduce an *_INIT macro

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Add an *_INIT macro for the tracking_name_data similar to what exists
elsewhere in the codebase, e.g. OID_ARRAY_INIT in sha1-array.h. This
will make it more idiomatic in later changes to add more fields to the
struct & its initialization macro.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 checkout.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/checkout.c b/checkout.c
index bdefc888ba..80e430cda8 100644
--- a/checkout.c
+++ b/checkout.c
@@ -10,6 +10,8 @@ struct tracking_name_data {
int unique;
 };
 
+#define TRACKING_NAME_DATA_INIT { NULL, NULL, NULL, 1 }
+
 static int check_tracking_name(struct remote *remote, void *cb_data)
 {
struct tracking_name_data *cb = cb_data;
@@ -32,7 +34,7 @@ static int check_tracking_name(struct remote *remote, void 
*cb_data)
 
 const char *unique_tracking_name(const char *name, struct object_id *oid)
 {
-   struct tracking_name_data cb_data = { NULL, NULL, NULL, 1 };
+   struct tracking_name_data cb_data = TRACKING_NAME_DATA_INIT;
cb_data.src_ref = xstrfmt("refs/heads/%s", name);
cb_data.dst_oid = oid;
for_each_remote(check_tracking_name, _data);
-- 
2.17.0.290.gded63e768a



[PATCH v7 5/8] checkout: pass the "num_matches" up to callers

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Pass the previously added "num_matches" struct value up to the callers
of unique_tracking_name(). This will allow callers to optionally print
better error messages in a later change.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 builtin/checkout.c | 10 +++---
 builtin/worktree.c |  4 ++--
 checkout.c |  5 -
 checkout.h |  3 ++-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 2e1d2376d2..72457fb7d5 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -878,7 +878,8 @@ static int parse_branchname_arg(int argc, const char **argv,
int dwim_new_local_branch_ok,
struct branch_info *new_branch_info,
struct checkout_opts *opts,
-   struct object_id *rev)
+   struct object_id *rev,
+   int *dwim_remotes_matched)
 {
struct tree **source_tree = >source_tree;
const char **new_branch = >new_branch;
@@ -972,7 +973,8 @@ static int parse_branchname_arg(int argc, const char **argv,
recover_with_dwim = 0;
 
if (recover_with_dwim) {
-   const char *remote = unique_tracking_name(arg, rev);
+   const char *remote = unique_tracking_name(arg, rev,
+ 
dwim_remotes_matched);
if (remote) {
*new_branch = arg;
arg = remote;
@@ -1109,6 +,7 @@ int cmd_checkout(int argc, const char **argv, const char 
*prefix)
struct branch_info new_branch_info;
char *conflict_style = NULL;
int dwim_new_local_branch = 1;
+   int dwim_remotes_matched = 0;
struct option options[] = {
OPT__QUIET(, N_("suppress progress reporting")),
OPT_STRING('b', NULL, _branch, N_("branch"),
@@ -1219,7 +1222,8 @@ int cmd_checkout(int argc, const char **argv, const char 
*prefix)
opts.track == BRANCH_TRACK_UNSPECIFIED &&
!opts.new_branch;
int n = parse_branchname_arg(argc, argv, dwim_ok,
-_branch_info, , );
+_branch_info, , ,
+_remotes_matched);
argv += n;
argc -= n;
}
diff --git a/builtin/worktree.c b/builtin/worktree.c
index 5c7d2bb180..a763dbdccb 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -412,7 +412,7 @@ static const char *dwim_branch(const char *path, const char 
**new_branch)
if (guess_remote) {
struct object_id oid;
const char *remote =
-   unique_tracking_name(*new_branch, );
+   unique_tracking_name(*new_branch, , NULL);
return remote;
}
return NULL;
@@ -484,7 +484,7 @@ static int add(int ac, const char **av, const char *prefix)
 
commit = lookup_commit_reference_by_name(branch);
if (!commit) {
-   remote = unique_tracking_name(branch, );
+   remote = unique_tracking_name(branch, , NULL);
if (remote) {
new_branch = branch;
branch = remote;
diff --git a/checkout.c b/checkout.c
index 7662a39a62..ee3a7e9c05 100644
--- a/checkout.c
+++ b/checkout.c
@@ -32,12 +32,15 @@ static int check_tracking_name(struct remote *remote, void 
*cb_data)
return 0;
 }
 
-const char *unique_tracking_name(const char *name, struct object_id *oid)
+const char *unique_tracking_name(const char *name, struct object_id *oid,
+int *dwim_remotes_matched)
 {
struct tracking_name_data cb_data = TRACKING_NAME_DATA_INIT;
cb_data.src_ref = xstrfmt("refs/heads/%s", name);
cb_data.dst_oid = oid;
for_each_remote(check_tracking_name, _data);
+   if (dwim_remotes_matched)
+   *dwim_remotes_matched = cb_data.num_matches;
free(cb_data.src_ref);
if (cb_data.num_matches == 1)
return cb_data.dst_ref;
diff --git a/checkout.h b/checkout.h
index 4cd4cd1c23..6b2073310c 100644
--- a/checkout.h
+++ b/checkout.h
@@ -9,6 +9,7 @@
  * exists, NULL otherwise.
  */
 extern const char *unique_tracking_name(const char *name,
-   struct object_id *oid);
+   struct object_id *oid,
+   int *dwim_remotes_matched);
 
 #endif /* CHECKOUT_H */
-- 
2.17.0.290.gded63e768a



[PATCH v7 8/8] checkout & worktree: introduce checkout.defaultRemote

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Introduce a checkout.defaultRemote setting which can be used to
designate a remote to prefer (via checkout.defaultRemote=origin) when
running e.g. "git checkout master" to mean origin/master, even though
there's other remotes that have the "master" branch.

I want this because it's very handy to use this workflow to checkout a
repository and create a topic branch, then get back to a "master" as
retrieved from upstream:

(
cd /tmp &&
rm -rf tbdiff &&
git clone g...@github.com:trast/tbdiff.git &&
cd tbdiff &&
git branch -m topic &&
git checkout master
)

That will output:

Branch 'master' set up to track remote branch 'master' from 'origin'.
Switched to a new branch 'master'

But as soon as a new remote is added (e.g. just to inspect something
from someone else) the DWIMery goes away:

(
cd /tmp &&
rm -rf tbdiff &&
git clone g...@github.com:trast/tbdiff.git &&
cd tbdiff &&
git branch -m topic &&
git remote add avar g...@github.com:avar/tbdiff.git &&
git fetch avar &&
git checkout master
)

Will output (without the advice output added earlier in this series):

error: pathspec 'master' did not match any file(s) known to git.

The new checkout.defaultRemote config allows me to say that whenever
that ambiguity comes up I'd like to prefer "origin", and it'll still
work as though the only remote I had was "origin".

Also adjust the advice.checkoutAmbiguousRemoteBranchName message to
mention this new config setting to the user, the full output on my
git.git is now (the last paragraph is new):

$ ./git --exec-path=$PWD checkout master
error: pathspec 'master' did not match any file(s) known to git.
hint: 'master' matched more than one remote tracking branch.
hint: We found 26 remotes with a reference that matched. So we fell back
hint: on trying to resolve the argument as a path, but failed there too!
hint:
hint: If you meant to check out a remote tracking branch on, e.g. 'origin',
hint: you can do so by fully qualifying the name with the --track option:
hint:
hint: git checkout --track origin/
hint:
hint: If you'd like to always have checkouts of an ambiguous  prefer
hint: one remote, e.g. the 'origin' remote, consider setting
hint: checkout.defaultRemote=origin in your config.

I considered splitting this into checkout.defaultRemote and
worktree.defaultRemote, but it's probably less confusing to break our
own rules that anything shared between config should live in core.*
than have two config settings, and I couldn't come up with a short
name under core.* that made sense (core.defaultRemoteForCheckout?).

See also 70c9ac2f19 ("DWIM "git checkout frotz" to "git checkout -b
frotz origin/frotz"", 2009-10-18) which introduced this DWIM feature
to begin with, and 4e85333197 ("worktree: make add  
dwim", 2017-11-26) which added it to git-worktree.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 Documentation/config.txt   | 21 -
 Documentation/git-checkout.txt |  9 +
 Documentation/git-worktree.txt |  9 +
 builtin/checkout.c | 12 +---
 checkout.c | 26 --
 t/t2024-checkout-dwim.sh   | 18 +-
 t/t2025-worktree-add.sh| 21 +
 7 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index dfc0413a84..aef2769211 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -350,7 +350,10 @@ advice.*::
remote tracking branch on more than one remote in
situations where an unambiguous argument would have
otherwise caused a remote-tracking branch to be
-   checked out.
+   checked out. See the `checkout.defaultRemote`
+   configuration variable for how to set a given remote
+   to used by default in some situations where this
+   advice would be printed.
amWorkDir::
Advice that shows the location of the patch file when
linkgit:git-am[1] fails to apply it.
@@ -1105,6 +1108,22 @@ browser..path::
browse HTML help (see `-w` option in linkgit:git-help[1]) or a
working repository in gitweb (see linkgit:git-instaweb[1]).
 
+checkout.defaultRemote::
+   When you run 'git checkout ' and only have one
+   remote, it may implicitly fall back on checking out and
+   tracking e.g. 'origin/'. This stops working as soon
+   as you have more than one remote with a ''
+   reference. This setting allows for setting the name of a
+   preferred remote that should always win when it comes to
+   disambiguation. The typical use-case is to set this to
+   `origin`.
++
+Currently this is used by linkgit:git-checkout[1] when 'git checkout
+' will 

[PATCH v7 6/8] builtin/checkout.c: use "ret" variable for return

2018-06-05 Thread Ævar Arnfjörð Bjarmason
There is no point in doing this right now, but in later change the
"ret" variable will be inspected. This change makes that meaningful
change smaller.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 builtin/checkout.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 72457fb7d5..8c93c55cbc 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -1265,8 +1265,10 @@ int cmd_checkout(int argc, const char **argv, const char 
*prefix)
}
 
UNLEAK(opts);
-   if (opts.patch_mode || opts.pathspec.nr)
-   return checkout_paths(, new_branch_info.name);
-   else
+   if (opts.patch_mode || opts.pathspec.nr) {
+   int ret = checkout_paths(, new_branch_info.name);
+   return ret;
+   } else {
return checkout_branch(, _branch_info);
+   }
 }
-- 
2.17.0.290.gded63e768a



[PATCH v7 2/8] checkout.h: wrap the arguments to unique_tracking_name()

2018-06-05 Thread Ævar Arnfjörð Bjarmason
The line was too long already, and will be longer still when a later
change adds another argument.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 checkout.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/checkout.h b/checkout.h
index 9980711179..4cd4cd1c23 100644
--- a/checkout.h
+++ b/checkout.h
@@ -8,6 +8,7 @@
  * tracking branch.  Return the name of the remote if such a branch
  * exists, NULL otherwise.
  */
-extern const char *unique_tracking_name(const char *name, struct object_id 
*oid);
+extern const char *unique_tracking_name(const char *name,
+   struct object_id *oid);
 
 #endif /* CHECKOUT_H */
-- 
2.17.0.290.gded63e768a



[PATCH v7 7/8] checkout: add advice for ambiguous "checkout "

2018-06-05 Thread Ævar Arnfjörð Bjarmason
As the "checkout" documentation describes:

If  is not found but there does exist a tracking branch in
exactly one remote (call it ) with a matching name, treat
as equivalent to [...] /

Note that the "error: pathspec[...]" message is still printed. This is
because whatever else checkout may have tried earlier, its final
fallback is to try to resolve the argument as a path. E.g. in this
case:

$ ./git --exec-path=$PWD checkout master pu
error: pathspec 'master' did not match any file(s) known to git.
error: pathspec 'pu' did not match any file(s) known to git.

There we don't print the "hint:" implicitly due to earlier logic
around the DWIM fallback. That fallback is only used if it looks like
we have one argument that might be a branch.

I can't think of an intrinsic reason for why we couldn't in some
future change skip printing the "error: pathspec[...]" error. However,
to do so we'd need to pass something down to checkout_paths() to make
it suppress printing an error on its own, and for us to be confident
that we're not silencing cases where those errors are meaningful.

I don't think that's worth it since determining whether that's the
case could easily change due to future changes in the checkout logic.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 Documentation/config.txt |  7 +++
 advice.c |  2 ++
 advice.h |  1 +
 builtin/checkout.c   | 13 +
 t/t2024-checkout-dwim.sh | 14 ++
 5 files changed, 37 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf5a9..dfc0413a84 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -344,6 +344,13 @@ advice.*::
Advice shown when you used linkgit:git-checkout[1] to
move to the detach HEAD state, to instruct how to create
a local branch after the fact.
+   checkoutAmbiguousRemoteBranchName::
+   Advice shown when the argument to
+   linkgit:git-checkout[1] ambiguously resolves to a
+   remote tracking branch on more than one remote in
+   situations where an unambiguous argument would have
+   otherwise caused a remote-tracking branch to be
+   checked out.
amWorkDir::
Advice that shows the location of the patch file when
linkgit:git-am[1] fails to apply it.
diff --git a/advice.c b/advice.c
index 370a56d054..75e7dede90 100644
--- a/advice.c
+++ b/advice.c
@@ -21,6 +21,7 @@ int advice_add_embedded_repo = 1;
 int advice_ignored_hook = 1;
 int advice_waiting_for_editor = 1;
 int advice_graft_file_deprecated = 1;
+int advice_checkout_ambiguous_remote_branch_name = 1;
 
 static int advice_use_color = -1;
 static char advice_colors[][COLOR_MAXLEN] = {
@@ -72,6 +73,7 @@ static struct {
{ "ignoredhook", _ignored_hook },
{ "waitingforeditor", _waiting_for_editor },
{ "graftfiledeprecated", _graft_file_deprecated },
+   { "checkoutambiguousremotebranchname", 
_checkout_ambiguous_remote_branch_name },
 
/* make this an alias for backward compatibility */
{ "pushnonfastforward", _push_update_rejected }
diff --git a/advice.h b/advice.h
index 9f5064e82a..4d11d51d43 100644
--- a/advice.h
+++ b/advice.h
@@ -22,6 +22,7 @@ extern int advice_add_embedded_repo;
 extern int advice_ignored_hook;
 extern int advice_waiting_for_editor;
 extern int advice_graft_file_deprecated;
+extern int advice_checkout_ambiguous_remote_branch_name;
 
 int git_default_advice_config(const char *var, const char *value);
 __attribute__((format (printf, 1, 2)))
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 8c93c55cbc..baa027455a 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -22,6 +22,7 @@
 #include "resolve-undo.h"
 #include "submodule-config.h"
 #include "submodule.h"
+#include "advice.h"
 
 static const char * const checkout_usage[] = {
N_("git checkout [] "),
@@ -1267,6 +1268,18 @@ int cmd_checkout(int argc, const char **argv, const char 
*prefix)
UNLEAK(opts);
if (opts.patch_mode || opts.pathspec.nr) {
int ret = checkout_paths(, new_branch_info.name);
+   if (ret && dwim_remotes_matched > 1 &&
+   advice_checkout_ambiguous_remote_branch_name)
+   advise(_("'%s' matched more than one remote tracking 
branch.\n"
+"We found %d remotes with a reference that 
matched. So we fell back\n"
+"on trying to resolve the argument as a path, 
but failed there too!\n"
+"\n"
+"If you meant to check out a remote tracking 
branch on, e.g. 'origin',\n"
+"you can do so by fully qualifying the name 
with the --track option:\n"
+"\n"
+"git 

[PATCH v7 4/8] checkout.c: change "unique" member to "num_matches"

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Internally track how many matches we find in the check_tracking_name()
callback. Nothing uses this now, but it will be made use of in a later
change.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 checkout.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/checkout.c b/checkout.c
index 80e430cda8..7662a39a62 100644
--- a/checkout.c
+++ b/checkout.c
@@ -7,10 +7,10 @@ struct tracking_name_data {
/* const */ char *src_ref;
char *dst_ref;
struct object_id *dst_oid;
-   int unique;
+   int num_matches;
 };
 
-#define TRACKING_NAME_DATA_INIT { NULL, NULL, NULL, 1 }
+#define TRACKING_NAME_DATA_INIT { NULL, NULL, NULL, 0 }
 
 static int check_tracking_name(struct remote *remote, void *cb_data)
 {
@@ -23,9 +23,9 @@ static int check_tracking_name(struct remote *remote, void 
*cb_data)
free(query.dst);
return 0;
}
+   cb->num_matches++;
if (cb->dst_ref) {
free(query.dst);
-   cb->unique = 0;
return 0;
}
cb->dst_ref = query.dst;
@@ -39,7 +39,7 @@ const char *unique_tracking_name(const char *name, struct 
object_id *oid)
cb_data.dst_oid = oid;
for_each_remote(check_tracking_name, _data);
free(cb_data.src_ref);
-   if (cb_data.unique)
+   if (cb_data.num_matches == 1)
return cb_data.dst_ref;
free(cb_data.dst_ref);
return NULL;
-- 
2.17.0.290.gded63e768a



[PATCH v7 0/8] ambiguous checkout UI & checkout.defaultRemote

2018-06-05 Thread Ævar Arnfjörð Bjarmason
Fixes issues noted with v6, hopefully ready for queuing. A tbdiff with
v6:

1: ab4529d9f5 = 1: 2ca81c76fc checkout tests: index should be clean after dwim 
checkout
2: c8bbece403 = 2: 19b14a1c75 checkout.h: wrap the arguments to 
unique_tracking_name()
3: 881fe63f4f = 3: 8bc6a9c052 checkout.c: introduce an *_INIT macro
4: 72ddaeddd3 ! 4: 34f3b67f9b checkout.c: change "unique" member to 
"num_matches"
@@ -1,6 +1,6 @@
 Author: Ævar Arnfjörð Bjarmason 
 
-checkout.c]: change "unique" member to "num_matches"
+checkout.c: change "unique" member to "num_matches"
 
 Internally track how many matches we find in the check_tracking_name()
 callback. Nothing uses this now, but it will be made use of in a later
5: 5e8c82680b = 5: 7d81c06a23 checkout: pass the "num_matches" up to callers
6: 07e667f80a = 6: e86636ad2c builtin/checkout.c: use "ret" variable for return
7: 0a148182e6 ! 7: c2130b347c checkout: add advice for ambiguous "checkout 
"
@@ -27,6 +27,28 @@
 hint: you can do so by fully qualifying the name with the --track 
option:
 hint:
 hint: git checkout --track origin/
+
+Note that the "error: pathspec[...]" message is still printed. This is
+because whatever else checkout may have tried earlier, its final
+fallback is to try to resolve the argument as a path. E.g. in this
+case:
+
+$ ./git --exec-path=$PWD checkout master pu
+error: pathspec 'master' did not match any file(s) known to git.
+error: pathspec 'pu' did not match any file(s) known to git.
+
+There we don't print the "hint:" implicitly due to earlier logic
+around the DWIM fallback. That fallback is only used if it looks like
+we have one argument that might be a branch.
+
+I can't think of an intrinsic reason for why we couldn't in some
+future change skip printing the "error: pathspec[...]" error. However,
+to do so we'd need to pass something down to checkout_paths() to make
+it suppress printing an error on its own, and for us to be confident
+that we're not silencing cases where those errors are meaningful.
+
+I don't think that's worth it since determining whether that's the
+case could easily change due to future changes in the checkout logic.
 
 Signed-off-by: Ævar Arnfjörð Bjarmason 
 
8: f3a52a26a2 ! 8: f1ac0f7351 checkout & worktree: introduce 
checkout.defaultRemote
@@ -53,12 +53,12 @@
 
 $ ./git --exec-path=$PWD checkout master
 error: pathspec 'master' did not match any file(s) known to git.
-hint: The argument 'master' matched more than one remote tracking 
branch.
+hint: 'master' matched more than one remote tracking branch.
 hint: We found 26 remotes with a reference that matched. So we 
fell back
 hint: on trying to resolve the argument as a path, but failed 
there too!
 hint:
-hint: If you meant to check out a remote tracking branch on e.g. 
'origin'
-hint: you can do so by fully-qualifying the name with the --track 
option:
+hint: If you meant to check out a remote tracking branch on, e.g. 
'origin',
+hint: you can do so by fully qualifying the name with the --track 
option:
 hint:
 hint: git checkout --track origin/
 hint:
@@ -263,7 +263,7 @@
status_uno_is_clean &&
 -  test_i18ngrep ! "^hint: " stderr
 +  test_i18ngrep ! "^hint: " stderr &&
-+  # Make sure the likes of checkout -p don not print this hint
++  # Make sure the likes of checkout -p do not print this hint
 +  git checkout -p foo 2>stderr &&
 +  test_i18ngrep ! "^hint: " stderr &&
 +  status_uno_is_clean

Ævar Arnfjörð Bjarmason (8):
  checkout tests: index should be clean after dwim checkout
  checkout.h: wrap the arguments to unique_tracking_name()
  checkout.c: introduce an *_INIT macro
  checkout.c: change "unique" member to "num_matches"
  checkout: pass the "num_matches" up to callers
  builtin/checkout.c: use "ret" variable for return
  checkout: add advice for ambiguous "checkout "
  checkout & worktree: introduce checkout.defaultRemote

 Documentation/config.txt   | 26 +++
 Documentation/git-checkout.txt |  9 ++
 Documentation/git-worktree.txt |  9 ++
 advice.c   |  2 ++
 advice.h   |  1 +
 builtin/checkout.c | 41 ++-
 builtin/worktree.c |  4 +--
 checkout.c | 37 ++---
 checkout.h |  4 ++-
 t/t2024-checkout-dwim.sh   | 59 ++
 t/t2025-worktree-add.sh| 21 
 11 files changed, 197 insertions(+), 16 deletions(-)

-- 

  1   2   >