Brandon Williams wrote:
> Teach the client to be able to request a remote's refs using protocol
> v2. This is done by having a client issue a 'ls-refs' request to a v2
> server.
Yay, ls-remote support!
[...]
> --- a/builtin/upload-pack.c
> +++ b/builtin/upload-pack.c
> @@ -5,6 +5,7 @@
> #include "parse-options.h"
> #include "protocol.h"
> #include "upload-pack.h"
> +#include "serve.h"
nit, no change needed in this patch: What is a good logical order for
the #includes here? Bonus points if there's a tool to make it happen
automatically.
Asking since adding #includes like this at the end tends to result in
a harder-to-read list of #includes, sometimes with duplicates, and
often producing conflicts when multiple patches in flight add a
#include to the same file.
[...]
> @@ -48,11 +50,9 @@ int cmd_upload_pack(int argc, const char **argv, const
> char *prefix)
>
> switch (determine_protocol_version_server()) {
> case protocol_v2:
> - /*
> - * fetch support for protocol v2 has not been implemented yet,
> - * so ignore the request to use v2 and fallback to using v0.
> - */
> - upload_pack(&opts);
> + serve_opts.advertise_capabilities = opts.advertise_refs;
> + serve_opts.stateless_rpc = opts.stateless_rpc;
> + serve(&serve_opts);
Interesting! daemon.c has its own (file-local) serve() function;
can one of the two be renamed?
I actually like both names: serve() is a traditional name for the
function a server calls when it's done setting up and is ready to
serve. But the clash is confusing.
[...]
> +++ b/connect.c
> @@ -12,9 +12,11 @@
> #include "sha1-array.h"
> #include "transport.h"
> #include "strbuf.h"
> +#include "version.h"
> #include "protocol.h"
>
> static char *server_capabilities;
> +static struct argv_array server_capabilities_v2 = ARGV_ARRAY_INIT;
Can a quick doc comment describe these and how they relate?
Is only one of them set, based on which protocol version is in use?
Should server_capabilities be renamed to server_capabilities_v1?
> static const char *parse_feature_value(const char *, const char *, int *);
>
> static int check_ref(const char *name, unsigned int flags)
> @@ -62,6 +64,33 @@ static void die_initial_contact(int unexpected)
> "and the repository exists."));
> }
>
> +/* Checks if the server supports the capability 'c' */
> +int server_supports_v2(const char *c, int die_on_error)
> +{
> + int i;
> +
> + for (i = 0; i < server_capabilities_v2.argc; i++) {
> + const char *out;
> + if (skip_prefix(server_capabilities_v2.argv[i], c, &out) &&
> + (!*out || *out == '='))
> + return 1;
> + }
> +
> + if (die_on_error)
> + die("server doesn't support '%s'", c);
> +
> + return 0;
> +}
Nice.
> +
> +static void process_capabilities_v2(struct packet_reader *reader)
> +{
> + while (packet_reader_read(reader) == PACKET_READ_NORMAL)
> + argv_array_push(&server_capabilities_v2, reader->line);
> +
> + if (reader->status != PACKET_READ_FLUSH)
> + die("protocol error");
Can this say more? E.g. "expected flush after capabilities, got <foo>"?
[...]
> @@ -85,7 +114,7 @@ enum protocol_version discover_version(struct
> packet_reader *reader)
> /* Maybe process capabilities here, at least for v2 */
Is this comment out of date now?
> switch (version) {
> case protocol_v2:
> - die("support for protocol v2 not implemented yet");
> + process_capabilities_v2(reader);
> break;
> case protocol_v1:
> /* Read the peeked version line */
> @@ -293,6 +322,98 @@ struct ref **get_remote_heads(struct packet_reader
> *reader,
> return list;
> }
>
> +static int process_ref_v2(const char *line, struct ref ***list)
What does the return value represent?
Could it return the more typical 0 on success, -1 on error?
> +{
> + int ret = 1;
> + int i = 0;
> + struct object_id old_oid;
> + struct ref *ref;
> + struct string_list line_sections = STRING_LIST_INIT_DUP;
> +
> + if (string_list_split(&line_sections, line, ' ', -1) < 2) {
Can there be a comment describing the expected format?
> + ret = 0;
> + goto out;
> + }
> +
> + if (get_oid_hex(line_sections.items[i++].string, &old_oid)) {
> + ret = 0;
> + goto out;
> + }
> +
> + ref = alloc_ref(line_sections.items[i++].string);
Ref names cannot contains a space, so this is safe. Good.
> +
> + oidcpy(&ref->old_oid, &old_oid);
> + **list = ref;
> + *list = &ref->next;
> +
> + for (; i < line_sections.nr; i++) {
> + const char *arg = line_sections.items[i].string;
> + if (skip_prefix(arg, "symref-target:", &arg))
> + ref->symref = xstrdup(arg);
Using space-delimited fields in a single pkt-line means that
- values cannot contains a space
- total length is limited by the size of a pkt-line
Given the context, I think that's fine. More generally it is tempting
to use a pkt-line per field to avoid the trouble v1 had with
capability lists crammed into a pkt-line, but I see why you used a
pkt-line per ref to avoid having to have sections-within-a-section.
My only potential worry is the length part: do we have an explicit
limit on the length of a ref name? git-check-ref-format(1) doesn't
mention one. A 32k ref name would be a bit ridiculous, though.
> +
> + if (skip_prefix(arg, "peeled:", &arg)) {
> + struct object_id peeled_oid;
> + char *peeled_name;
> + struct ref *peeled;
> + if (get_oid_hex(arg, &peeled_oid)) {
> + ret = 0;
> + goto out;
> + }
Can this also check that there's no trailing garbage after the oid?
[...]
> +
> + peeled_name = xstrfmt("%s^{}", ref->name);
optional: can reuse a buffer to avoid allocation churn:
struct strbuf peeled_name = STRBUF_INIT;
strbuf_reset(&peeled_name);
strbuf_addf(&peeled_name, "%s^{}", ref->name);
// or strbuf_addstr(ref->name); strbuf_addstr("^{}");
out:
strbuf_release(&peeled_name);
[...]
> +struct ref **get_remote_refs(int fd_out, struct packet_reader *reader,
> + struct ref **list, int for_push,
> + const struct argv_array *ref_patterns)
> +{
> + int i;
> + *list = NULL;
> +
> + /* Check that the server supports the ls-refs command */
> + /* Issue request for ls-refs */
> + if (server_supports_v2("ls-refs", 1))
> + packet_write_fmt(fd_out, "command=ls-refs\n");
Since the code is so clear, I don't think the above two comments are
helping.
> +
> + if (server_supports_v2("agent", 0))
> + packet_write_fmt(fd_out, "agent=%s", git_user_agent_sanitized());
whitespace nit: mixing tabs and spaces. Does "make style" catch this?
> +
> + packet_delim(fd_out);
> + /* When pushing we don't want to request the peeled tags */
Can you say more about this? In theory it would be nice to have the
peeled tags since they name commits whose history can be excluded from
the pack.
> + if (!for_push)
> + packet_write_fmt(fd_out, "peel\n");
> + packet_write_fmt(fd_out, "symrefs\n");
Are symrefs useful during push?
> + for (i = 0; ref_patterns && i < ref_patterns->argc; i++) {
> + packet_write_fmt(fd_out, "ref-pattern %s\n",
> + ref_patterns->argv[i]);
> + }
The exciting part.
Why do these pkts end with \n? I would have expected the pkt-line
framing to work to delimit them.
> + packet_flush(fd_out);
> +
> + /* Process response from server */
> + while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
> + if (!process_ref_v2(reader->line, &list))
> + die("invalid ls-refs response: %s", reader->line);
> + }
> +
> + if (reader->status != PACKET_READ_FLUSH)
> + die("protocol error");
Can this protocol error give more detail? When diagnosing an error in
servers, proxies, or lower-level networking issues, informative protocol
errors can be very helpful (similar to syntax errors from a compiler).
[...]
> --- a/connect.h
> +++ b/connect.h
> @@ -16,4 +16,6 @@ extern int url_is_local_not_ssh(const char *url);
> struct packet_reader;
> extern enum protocol_version discover_version(struct packet_reader *reader);
>
> +extern int server_supports_v2(const char *c, int die_on_error);
const char *cap, maybe?
[...]
> --- a/remote.h
> +++ b/remote.h
> @@ -151,10 +151,14 @@ void free_refs(struct ref *ref);
>
> struct oid_array;
> struct packet_reader;
> +struct argv_array;
> extern struct ref **get_remote_heads(struct packet_reader *reader,
> struct ref **list, unsigned int flags,
> struct oid_array *extra_have,
> struct oid_array *shallow_points);
> +extern struct ref **get_remote_refs(int fd_out, struct packet_reader *reader,
> + struct ref **list, int for_push,
> + const struct argv_array *ref_patterns);
What is the difference between get_remote_heads and get_remote_refs?
A comment might help. (BTW, thanks for making the new saner name to
replace get_remote_heads!)
[...]
> --- /dev/null
> +++ b/t/t5702-protocol-v2.sh
> @@ -0,0 +1,53 @@
> +#!/bin/sh
> +
> +test_description='test git wire-protocol version 2'
Woot!
[...]
> +test_expect_success 'list refs with git:// using protocol v2' '
> + GIT_TRACE_PACKET=1 git -c protocol.version=2 \
> + ls-remote --symref "$GIT_DAEMON_URL/parent" >actual 2>log &&
> +
> + # Client requested to use protocol v2
> + grep "git> .*\\\0\\\0version=2\\\0$" log &&
> + # Server responded using protocol v2
> + grep "git< version 2" log &&
optional: Could anchor these greps to make the test tighter (e.g. to
not match "version 20".
Thanks,
Jonathan