Re: [PATCH v3 21/35] fetch-pack: perform a fetch using v2

2018-02-27 Thread Brandon Williams
On 02/27, Stefan Beller wrote:
> > +
> > +   /* Add initial haves */
> > +   ret = add_haves(_buf, in_vain);
> 
> I like the shortness and conciseness of this send_fetch_request
> function as it makes clear what is happening over the wire, however
> I wonder if we can improve on that, still.

I'm sure there is more that can be micro optimized but I don't really
want to get distracted by redesigning this logic another time right now.

> 
> The functions 'add_common' and 'add_haves' seem like they do the
> same (sending haves), except for different sets of oids.
> 
> So I would imagine that a structure like
> 
>   {
> struct set haves = compute_haves_from(in_vain, common, ...);
> struct set wants = compute_wants);
> 
> request_capabilities(args)
> send_haves();
> send_wants();
> flush();
>   }
> 
> That way we would have an even more concise way of writing
> one request, and factoring out the business logic. (Coming up
> with the "right" haves is a heuristic that we plan on changing in
> the future, so we'd want to have that encapsulated into one function
> that computes all the haves?
> 
> > +
> > +/*
> > + * Processes a section header in a server's response and checks if it 
> > matches
> > + * `section`.  If the value of `peek` is 1, the header line will be peeked 
> > (and
> > + * not consumed); if 0, the line will be consumed and the function will 
> > die if
> > + * the section header doesn't match what was expected.
> > + */
> > +static int process_section_header(struct packet_reader *reader,
> > + const char *section, int peek)
> > +{
> > +   int ret;
> > +
> > +   if (packet_reader_peek(reader) != PACKET_READ_NORMAL)
> > +   die("error reading packet");
> > +
> > +   ret = !strcmp(reader->line, section);
> > +
> > +   if (!peek) {
> > +   if (!ret)
> > +   die("expected '%s', received '%s'",
> > +   section, reader->line);
> > +   packet_reader_read(reader);
> > +   }
> > +
> > +   return ret;
> > +}
> > +
> > +static int process_acks(struct packet_reader *reader, struct oidset 
> > *common)
> > +{
> > +   /* received */
> > +   int received_ready = 0;
> > +   int received_ack = 0;
> > +
> > +   process_section_header(reader, "acknowledgments", 0);
> > +   while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
> > +   const char *arg;
> > +
> > +   if (!strcmp(reader->line, "NAK"))
> > +   continue;
> > +
> > +   if (skip_prefix(reader->line, "ACK ", )) {
> > +   struct object_id oid;
> > +   if (!get_oid_hex(arg, )) {
> > +   struct commit *commit;
> > +   oidset_insert(common, );
> > +   commit = lookup_commit();
> > +   mark_common(commit, 0, 1);
> > +   }
> > +   continue;
> > +   }
> > +
> > +   if (!strcmp(reader->line, "ready")) {
> > +   clear_prio_queue(_list);
> > +   received_ready = 1;
> > +   continue;
> > +   }
> > +
> > +   die(_("git fetch-pack: expected ACK/NAK, got '%s'"), 
> > reader->line);
> 
> This is slightly misleading, it could also expect "ready" ?

I'll update this.

> 
> 
> > +   }
> > +
> > +   if (reader->status != PACKET_READ_FLUSH &&
> > +   reader->status != PACKET_READ_DELIM)
> > +   die("Error during processing acks: %d", reader->status);
> 
> Why is this not translated unlike the one 5 lines prior to this?
> Do we expect these conditions to come up due to different
> root causes?
> 
> > +static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> > +   int fd[2],
> > +   const struct ref *orig_ref,
> > +   struct ref **sought, int nr_sought,
> > +   char **pack_lockfile)
> > +{
> > +   struct ref *ref = copy_ref_list(orig_ref);
> > +   enum fetch_state state = FETCH_CHECK_LOCAL;
> > +   struct oidset common = OIDSET_INIT;
> > +   struct packet_reader reader;
> > +   int in_vain = 0;
> > +   packet_reader_init(, fd[0], NULL, 0,
> > +  PACKET_READ_CHOMP_NEWLINE);
> > +
> > +   while (state != FETCH_DONE) {
> > +   switch (state) {
> > +   case FETCH_CHECK_LOCAL:
> > +   sort_ref_list(, ref_compare_name);
> > +   QSORT(sought, nr_sought, cmp_ref_by_name);
> > +
> > +   /* v2 supports these by default */
> 
> Is there a doc that says what is all on by default?

Yeah protocol-v2.txt should say all of that.

> 
> Thanks,
> Stefan

-- 

Re: [PATCH v3 21/35] fetch-pack: perform a fetch using v2

2018-02-27 Thread Stefan Beller
On Tue, Feb 6, 2018 at 5:12 PM, Brandon Williams  wrote:
> When communicating with a v2 server, perform a fetch by requesting the
> 'fetch' command.
>
> Signed-off-by: Brandon Williams 
> ---
>  builtin/fetch-pack.c   |   2 +-
>  fetch-pack.c   | 252 
> -
>  fetch-pack.h   |   4 +-
>  t/t5702-protocol-v2.sh |  84 +
>  transport.c|   7 +-
>  5 files changed, 342 insertions(+), 7 deletions(-)
>
> diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
> index f492e8abd..867dd3cc7 100644
> --- a/builtin/fetch-pack.c
> +++ b/builtin/fetch-pack.c
> @@ -213,7 +213,7 @@ int cmd_fetch_pack(int argc, const char **argv, const 
> char *prefix)
> }
>
> ref = fetch_pack(, fd, conn, ref, dest, sought, nr_sought,
> -, pack_lockfile_ptr);
> +, pack_lockfile_ptr, protocol_v0);
> if (pack_lockfile) {
> printf("lock %s\n", pack_lockfile);
> fflush(stdout);
> diff --git a/fetch-pack.c b/fetch-pack.c
> index 9f6b07ad9..4fb5805dd 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1008,6 +1008,247 @@ static struct ref *do_fetch_pack(struct 
> fetch_pack_args *args,
> return ref;
>  }
>
> +static void add_wants(const struct ref *wants, struct strbuf *req_buf)
> +{
> +   for ( ; wants ; wants = wants->next) {
> +   const struct object_id *remote = >old_oid;
> +   const char *remote_hex;
> +   struct object *o;
> +
> +   /*
> +* If that object is complete (i.e. it is an ancestor of a
> +* local ref), we tell them we have it but do not have to
> +* tell them about its ancestors, which they already know
> +* about.
> +*
> +* We use lookup_object here because we are only
> +* interested in the case we *know* the object is
> +* reachable and we have already scanned it.
> +*/
> +   if (((o = lookup_object(remote->hash)) != NULL) &&
> +   (o->flags & COMPLETE)) {
> +   continue;
> +   }
> +
> +   remote_hex = oid_to_hex(remote);
> +   packet_buf_write(req_buf, "want %s\n", remote_hex);
> +   }
> +}
> +
> +static void add_common(struct strbuf *req_buf, struct oidset *common)
> +{
> +   struct oidset_iter iter;
> +   const struct object_id *oid;
> +   oidset_iter_init(common, );
> +
> +   while ((oid = oidset_iter_next())) {
> +   packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
> +   }
> +}
> +
> +static int add_haves(struct strbuf *req_buf, int *in_vain)
> +{
> +   int ret = 0;
> +   int haves_added = 0;
> +   const struct object_id *oid;
> +
> +   while ((oid = get_rev())) {
> +   packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
> +   if (++haves_added >= INITIAL_FLUSH)
> +   break;
> +   };
> +
> +   *in_vain += haves_added;
> +   if (!haves_added || *in_vain >= MAX_IN_VAIN) {
> +   /* Send Done */
> +   packet_buf_write(req_buf, "done\n");
> +   ret = 1;
> +   }
> +
> +   return ret;
> +}
> +
> +static int send_fetch_request(int fd_out, const struct fetch_pack_args *args,
> + const struct ref *wants, struct oidset *common,
> + int *in_vain)
> +{
> +   int ret = 0;
> +   struct strbuf req_buf = STRBUF_INIT;
> +
> +   if (server_supports_v2("fetch", 1))
> +   packet_buf_write(_buf, "command=fetch");
> +   if (server_supports_v2("agent", 0))
> +   packet_buf_write(_buf, "agent=%s", 
> git_user_agent_sanitized());
> +
> +   packet_buf_delim(_buf);
> +   if (args->use_thin_pack)
> +   packet_buf_write(_buf, "thin-pack");
> +   if (args->no_progress)
> +   packet_buf_write(_buf, "no-progress");
> +   if (args->include_tag)
> +   packet_buf_write(_buf, "include-tag");
> +   if (prefer_ofs_delta)
> +   packet_buf_write(_buf, "ofs-delta");
> +
> +   /* add wants */
> +   add_wants(wants, _buf);

The comment might convey too much redundant information
instead of helping the reader gain more understanding. ;)

> +
> +   /* Add all of the common commits we've found in previous rounds */
> +   add_common(_buf, common);

nit:
Maybe s/add_common/add_common_haves/ or
add_previous_haves ?

> +
> +   /* Add initial haves */
> +   ret = add_haves(_buf, in_vain);

I like the shortness and conciseness of this send_fetch_request
function as it makes clear what is happening over the wire, however
I wonder if we can improve on that, still.

The functions 'add_common' and 'add_haves' seem 

Re: [PATCH v3 21/35] fetch-pack: perform a fetch using v2

2018-02-26 Thread Brandon Williams
On 02/23, Jonathan Tan wrote:
> On Tue,  6 Feb 2018 17:12:58 -0800
> Brandon Williams  wrote:
> 
> > +   while ((oid = get_rev())) {
> > +   packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
> > +   if (++haves_added >= INITIAL_FLUSH)
> > +   break;
> > +   };
> 
> Unnecessary semicolon after closing brace.

Thanks, I'll remove that.

> 
> > +   /* Filter 'ref' by 'sought' and those that aren't local 
> > */
> > +   if (everything_local(args, , sought, nr_sought))
> > +   state = FETCH_DONE;
> > +   else
> > +   state = FETCH_SEND_REQUEST;
> > +   break;
> 
> I haven't looked at this patch in detail, but I found a bug that can be
> reproduced if you patch the following onto this patch:
> 
> --- a/t/t5702-protocol-v2.sh
> +++ b/t/t5702-protocol-v2.sh
> @@ -124,6 +124,7 @@ test_expect_success 'clone with file:// using 
> protocol v2' '
>  
>  test_expect_success 'fetch with file:// using protocol v2' '
> test_commit -C file_parent two &&
> +   git -C file_parent tag -d one &&
>  
> GIT_TRACE_PACKET=1 git -C file_child -c protocol.version=2 \
> fetch origin 2>log &&
> @@ -133,7 +134,8 @@ test_expect_success 'fetch with file:// using 
> protocol v2' '
> test_cmp expect actual &&
>  
> # Server responded using protocol v2
> -   grep "fetch< version 2" log
> +   grep "fetch< version 2" log &&
> +   grep "have " log
>  '
> 
> Merely including the second hunk (the one with 'grep "have "') does not
> make the test fail, but including both the first and second hunks does.
> That is, fetch v2 emits "have" only for remote refs that point to
> objects we already have, not for local refs.
> 
> Everything still appears to work, except that packfiles are usually much
> larger than they need to be.
> 
> I did some digging in the code and found out that the equivalent of
> find_common() (which calls `for_each_ref(rev_list_insert_ref_oid,
> NULL)`) was not called in v2. In v1, find_common() is called immediately
> after everything_local(), but there is no equivalent in v2. (I quoted
> the invocation of everything_local() in v2 above.)

I actually caught this Friday morning when I realized that fetching from
a referenced repository would replicated objects instead of using them
from the referenced repository.  Thanks for pointing this out :)

-- 
Brandon Williams


Re: [PATCH v3 21/35] fetch-pack: perform a fetch using v2

2018-02-23 Thread Jonathan Tan
On Tue,  6 Feb 2018 17:12:58 -0800
Brandon Williams  wrote:

> + while ((oid = get_rev())) {
> + packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
> + if (++haves_added >= INITIAL_FLUSH)
> + break;
> + };

Unnecessary semicolon after closing brace.

> + /* Filter 'ref' by 'sought' and those that aren't local 
> */
> + if (everything_local(args, , sought, nr_sought))
> + state = FETCH_DONE;
> + else
> + state = FETCH_SEND_REQUEST;
> + break;

I haven't looked at this patch in detail, but I found a bug that can be
reproduced if you patch the following onto this patch:

--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -124,6 +124,7 @@ test_expect_success 'clone with file:// using protocol 
v2' '
 
 test_expect_success 'fetch with file:// using protocol v2' '
test_commit -C file_parent two &&
+   git -C file_parent tag -d one &&
 
GIT_TRACE_PACKET=1 git -C file_child -c protocol.version=2 \
fetch origin 2>log &&
@@ -133,7 +134,8 @@ test_expect_success 'fetch with file:// using protocol 
v2' '
test_cmp expect actual &&
 
# Server responded using protocol v2
-   grep "fetch< version 2" log
+   grep "fetch< version 2" log &&
+   grep "have " log
 '

Merely including the second hunk (the one with 'grep "have "') does not
make the test fail, but including both the first and second hunks does.
That is, fetch v2 emits "have" only for remote refs that point to
objects we already have, not for local refs.

Everything still appears to work, except that packfiles are usually much
larger than they need to be.

I did some digging in the code and found out that the equivalent of
find_common() (which calls `for_each_ref(rev_list_insert_ref_oid,
NULL)`) was not called in v2. In v1, find_common() is called immediately
after everything_local(), but there is no equivalent in v2. (I quoted
the invocation of everything_local() in v2 above.)


[PATCH v3 21/35] fetch-pack: perform a fetch using v2

2018-02-06 Thread Brandon Williams
When communicating with a v2 server, perform a fetch by requesting the
'fetch' command.

Signed-off-by: Brandon Williams 
---
 builtin/fetch-pack.c   |   2 +-
 fetch-pack.c   | 252 -
 fetch-pack.h   |   4 +-
 t/t5702-protocol-v2.sh |  84 +
 transport.c|   7 +-
 5 files changed, 342 insertions(+), 7 deletions(-)

diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index f492e8abd..867dd3cc7 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -213,7 +213,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char 
*prefix)
}
 
ref = fetch_pack(, fd, conn, ref, dest, sought, nr_sought,
-, pack_lockfile_ptr);
+, pack_lockfile_ptr, protocol_v0);
if (pack_lockfile) {
printf("lock %s\n", pack_lockfile);
fflush(stdout);
diff --git a/fetch-pack.c b/fetch-pack.c
index 9f6b07ad9..4fb5805dd 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1008,6 +1008,247 @@ static struct ref *do_fetch_pack(struct fetch_pack_args 
*args,
return ref;
 }
 
+static void add_wants(const struct ref *wants, struct strbuf *req_buf)
+{
+   for ( ; wants ; wants = wants->next) {
+   const struct object_id *remote = >old_oid;
+   const char *remote_hex;
+   struct object *o;
+
+   /*
+* If that object is complete (i.e. it is an ancestor of a
+* local ref), we tell them we have it but do not have to
+* tell them about its ancestors, which they already know
+* about.
+*
+* We use lookup_object here because we are only
+* interested in the case we *know* the object is
+* reachable and we have already scanned it.
+*/
+   if (((o = lookup_object(remote->hash)) != NULL) &&
+   (o->flags & COMPLETE)) {
+   continue;
+   }
+
+   remote_hex = oid_to_hex(remote);
+   packet_buf_write(req_buf, "want %s\n", remote_hex);
+   }
+}
+
+static void add_common(struct strbuf *req_buf, struct oidset *common)
+{
+   struct oidset_iter iter;
+   const struct object_id *oid;
+   oidset_iter_init(common, );
+
+   while ((oid = oidset_iter_next())) {
+   packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
+   }
+}
+
+static int add_haves(struct strbuf *req_buf, int *in_vain)
+{
+   int ret = 0;
+   int haves_added = 0;
+   const struct object_id *oid;
+
+   while ((oid = get_rev())) {
+   packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
+   if (++haves_added >= INITIAL_FLUSH)
+   break;
+   };
+
+   *in_vain += haves_added;
+   if (!haves_added || *in_vain >= MAX_IN_VAIN) {
+   /* Send Done */
+   packet_buf_write(req_buf, "done\n");
+   ret = 1;
+   }
+
+   return ret;
+}
+
+static int send_fetch_request(int fd_out, const struct fetch_pack_args *args,
+ const struct ref *wants, struct oidset *common,
+ int *in_vain)
+{
+   int ret = 0;
+   struct strbuf req_buf = STRBUF_INIT;
+
+   if (server_supports_v2("fetch", 1))
+   packet_buf_write(_buf, "command=fetch");
+   if (server_supports_v2("agent", 0))
+   packet_buf_write(_buf, "agent=%s", 
git_user_agent_sanitized());
+
+   packet_buf_delim(_buf);
+   if (args->use_thin_pack)
+   packet_buf_write(_buf, "thin-pack");
+   if (args->no_progress)
+   packet_buf_write(_buf, "no-progress");
+   if (args->include_tag)
+   packet_buf_write(_buf, "include-tag");
+   if (prefer_ofs_delta)
+   packet_buf_write(_buf, "ofs-delta");
+
+   /* add wants */
+   add_wants(wants, _buf);
+
+   /* Add all of the common commits we've found in previous rounds */
+   add_common(_buf, common);
+
+   /* Add initial haves */
+   ret = add_haves(_buf, in_vain);
+
+   /* Send request */
+   packet_buf_flush(_buf);
+   write_or_die(fd_out, req_buf.buf, req_buf.len);
+
+   strbuf_release(_buf);
+   return ret;
+}
+
+/*
+ * Processes a section header in a server's response and checks if it matches
+ * `section`.  If the value of `peek` is 1, the header line will be peeked (and
+ * not consumed); if 0, the line will be consumed and the function will die if
+ * the section header doesn't match what was expected.
+ */
+static int process_section_header(struct packet_reader *reader,
+ const char *section, int peek)
+{
+   int ret;
+
+   if (packet_reader_peek(reader) != PACKET_READ_NORMAL)
+   die("error reading packet");