Re: [PATCH 1/5] connect: split git:// setup into a separate function

2017-11-16 Thread Junio C Hamano
Jonathan Nieder  writes:

>> Which means the defaulting of git_connect::conn to _fork is now
>> unneeded.  One of the things that made the original cascade a bit
>> harder to follow than necessary, aside from the physical length of
>> the PROTO_GIT part, was that the case where conn remains to point at
>> no_fork looked very special and it was buried in that long PROTO_GIT
>> part.
>
> Good idea.  Here's what I'll include in the reroll.

Sounds good.


Re: [PATCH 1/5] connect: split git:// setup into a separate function

2017-11-15 Thread Jonathan Nieder
Hi,

On Oct 24, 2017, Junio C Hamano wrote:
> Jonathan Nieder  writes:

>> +static struct child_process *git_connect_git(int fd[2], char *hostandport,
>> + const char *path, const char *prog,
>> + int flags)
>> +{
>> +struct child_process *conn = _fork;
>> +struct strbuf request = STRBUF_INIT;
>
> As this one decides what "conn" to return, including the fallback
> _fork instance,...
>
>> +...
>> +return conn;
>> +}
>> +
>>  /*
>>   * This returns a dummy child_process if the transport protocol does not
>>   * need fork(2), or a struct child_process object if it does.  Once done,
>> @@ -881,50 +939,7 @@ struct child_process *git_connect(int fd[2], const char 
>> *url,
>
> Each of the if/elseif/ cascade, one of which calls the new helper,
> now makes an explicit assignment to "conn" declared in
> git_connect().
>
> Which means the defaulting of git_connect::conn to _fork is now
> unneeded.  One of the things that made the original cascade a bit
> harder to follow than necessary, aside from the physical length of
> the PROTO_GIT part, was that the case where conn remains to point at
> no_fork looked very special and it was buried in that long PROTO_GIT
> part.

Good idea.  Here's what I'll include in the reroll.

-- >8 --
Subject: connect: move no_fork fallback to git_tcp_connect

git_connect has the structure

struct child_process *conn = _fork;

...
switch (protocol) {
case PROTO_GIT:
if (git_use_proxy(hostandport))
conn = git_proxy_connect(fd, hostandport);
else
git_tcp_connect(fd, hostandport, flags);
...
break;
case PROTO_SSH:
conn = xmalloc(sizeof(*conn));
child_process_init(conn);
argv_array_push(>args, ssh);
...
break;
...
return conn;

In all cases except the git_tcp_connect case, conn is explicitly
assigned a value. Make the code clearer by explicitly assigning
'conn = _fork' in the tcp case and eliminating the default so the
compiler can ensure conn is always correctly assigned.

Noticed-by: Junio C Hamano 
Signed-off-by: Jonathan Nieder 
---
 connect.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/connect.c b/connect.c
index 7fbd396b35..b6accf71cb 100644
--- a/connect.c
+++ b/connect.c
@@ -582,12 +582,21 @@ static int git_tcp_connect_sock(char *host, int flags)
 #endif /* NO_IPV6 */
 
 
-static void git_tcp_connect(int fd[2], char *host, int flags)
+static struct child_process no_fork = CHILD_PROCESS_INIT;
+
+int git_connection_is_socket(struct child_process *conn)
+{
+   return conn == _fork;
+}
+
+static struct child_process *git_tcp_connect(int fd[2], char *host, int flags)
 {
int sockfd = git_tcp_connect_sock(host, flags);
 
fd[0] = sockfd;
fd[1] = dup(sockfd);
+
+   return _fork;
 }
 
 
@@ -761,8 +770,6 @@ static enum protocol parse_connect_url(const char 
*url_orig, char **ret_host,
return protocol;
 }
 
-static struct child_process no_fork = CHILD_PROCESS_INIT;
-
 static const char *get_ssh_command(void)
 {
const char *ssh;
@@ -865,7 +872,7 @@ struct child_process *git_connect(int fd[2], const char 
*url,
  const char *prog, int flags)
 {
char *hostandport, *path;
-   struct child_process *conn = _fork;
+   struct child_process *conn;
enum protocol protocol;
 
/* Without this we cannot rely on waitpid() to tell
@@ -901,7 +908,7 @@ struct child_process *git_connect(int fd[2], const char 
*url,
if (git_use_proxy(hostandport))
conn = git_proxy_connect(fd, hostandport);
else
-   git_tcp_connect(fd, hostandport, flags);
+   conn = git_tcp_connect(fd, hostandport, flags);
/*
 * Separate original protocol components prog and path
 * from extended host header with a NUL byte.
@@ -1041,11 +1048,6 @@ struct child_process *git_connect(int fd[2], const char 
*url,
return conn;
 }
 
-int git_connection_is_socket(struct child_process *conn)
-{
-   return conn == _fork;
-}
-
 int finish_connect(struct child_process *conn)
 {
int code;
-- 
2.15.0.448.gf294e3d99a



Re: [PATCH 1/5] connect: split git:// setup into a separate function

2017-10-23 Thread Stefan Beller
On Mon, Oct 23, 2017 at 6:54 PM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> I think once this option is given, all we have to do is pay attention to
>> this option in diff.c#moved_entry_cmp/next_byte, which is best built
>> on top of Peffs recent fixes origin/jk/diff-color-moved-fix.
>> Would that be of interest for people?
>
> Two things and a half.
>
>  * I was hoping that the next_byte() and string_hash() thing, once
>they are cleaned up, will eventually be shared with the xdiff/
>code at the lower layer, which needs to do pretty much the same
>in order to implement various whitespace ignoring options.  I am
>not sure how well the approach taken by the WIP patch meshes with
>the needs of the lower layer.

Good point. I'll keep that in mind when redoing that patch.
(I might even try to clean up the xdiff stuff and reuse the code
here)

>  * I agree that -w that applies only one or the other and not both
>may sometimes produce a better/readable result, but the more
>important part is how the user can tell when to exercise the
>option.  Would it be realistic to expect them to try -w in
>different combinations and see which looks the best?  What if we
>have a patch that touch two files, one looks better with -w only
>for coloring moved and the other looks better with -w for both?
>
>  * As moved-lines display is mostly a presentation thing, I wonder
>if it makes sense to always match loosely wrt whitespace
>differences.  It is tempting because if it is true, we do not
>have to worry about the second issue above.

Well, sometimes the user wants to know if it is byte-for-byte identical
(unlikely to be code, but maybe column oriented data for input;
think of all our FORTRAN users. ;)
and at other times the user just wants to approximately know
if it is the same (i.e. code ignoring white space changes).
If Git was *really* smart w.r.t. languages it might even ignore
identifier renames in the programming language.

So I think an "unconditionally ignore all white space in
move detection" is not the best (it could be a good default though!)
but the user wants to have a possibility to byte-for-byte comparision
(maybe even including CRLFs/LFs)

Thanks,
Stefan

> Thanks.


Re: [PATCH 1/5] connect: split git:// setup into a separate function

2017-10-23 Thread Junio C Hamano
Stefan Beller  writes:

> I think once this option is given, all we have to do is pay attention to
> this option in diff.c#moved_entry_cmp/next_byte, which is best built
> on top of Peffs recent fixes origin/jk/diff-color-moved-fix.
> Would that be of interest for people?

Two things and a half.

 * I was hoping that the next_byte() and string_hash() thing, once
   they are cleaned up, will eventually be shared with the xdiff/
   code at the lower layer, which needs to do pretty much the same
   in order to implement various whitespace ignoring options.  I am
   not sure how well the approach taken by the WIP patch meshes with
   the needs of the lower layer.

 * I agree that -w that applies only one or the other and not both
   may sometimes produce a better/readable result, but the more
   important part is how the user can tell when to exercise the
   option.  Would it be realistic to expect them to try -w in
   different combinations and see which looks the best?  What if we
   have a patch that touch two files, one looks better with -w only
   for coloring moved and the other looks better with -w for both?

 * As moved-lines display is mostly a presentation thing, I wonder
   if it makes sense to always match loosely wrt whitespace
   differences.  It is tempting because if it is true, we do not
   have to worry about the second issue above.

Thanks.


Re: [PATCH 1/5] connect: split git:// setup into a separate function

2017-10-23 Thread Junio C Hamano
Jonathan Nieder  writes:

> The git_connect function is growing long.  Split the
> PROTO_GIT-specific portion to a separate function to make it easier to
> read.
>
> No functional change intended.
>
> Signed-off-by: Jonathan Nieder 
> Reviewed-by: Stefan Beller 
> ---
> As before, except with sbeller's Reviewed-by.

I found this quite nice, except for one thing.

> +/*
> + * Open a connection using Git's native protocol.
> + *
> + * The caller is responsible for freeing hostandport, but this function may
> + * modify it (for example, to truncate it to remove the port part).
> + */
> +static struct child_process *git_connect_git(int fd[2], char *hostandport,
> +  const char *path, const char *prog,
> +  int flags)
> +{
> + struct child_process *conn = _fork;
> + struct strbuf request = STRBUF_INIT;

As this one decides what "conn" to return, including the fallback
_fork instance,...

> + ...
> + return conn;
> +}
> +
>  /*
>   * This returns a dummy child_process if the transport protocol does not
>   * need fork(2), or a struct child_process object if it does.  Once done,
> @@ -881,50 +939,7 @@ struct child_process *git_connect(int fd[2], const char 
> *url,

Each of the if/elseif/ cascade, one of which calls the new helper,
now makes an explicit assignment to "conn" declared in
git_connect().

Which means the defaulting of git_connect::conn to _fork is now
unneeded.  One of the things that made the original cascade a bit
harder to follow than necessary, aside from the physical length of
the PROTO_GIT part, was that the case where conn remains to point at
no_fork looked very special and it was buried in that long PROTO_GIT
part.  Now the main source of that issue is fixed, it would make it
clear to leave conn uninitialized (or initialize to NULL---but leaving
it uninitialized would make the intention of the code more clear, I
would think, that each of the if/elseif/ cascade must assign to it).

>   printf("Diag: path=%s\n", path ? path : "NULL");
>   conn = NULL;
>   } else if (protocol == PROTO_GIT) {
> - struct strbuf request = STRBUF_INIT;
> -...
> + conn = git_connect_git(fd, hostandport, path, prog, flags);
>   } else {
>   struct strbuf cmd = STRBUF_INIT;
>   const char *const *var;


[PATCH 1/5] connect: split git:// setup into a separate function

2017-10-23 Thread Jonathan Nieder
The git_connect function is growing long.  Split the
PROTO_GIT-specific portion to a separate function to make it easier to
read.

No functional change intended.

Signed-off-by: Jonathan Nieder 
Reviewed-by: Stefan Beller 
---
As before, except with sbeller's Reviewed-by.

 connect.c | 103 +++---
 1 file changed, 59 insertions(+), 44 deletions(-)

diff --git a/connect.c b/connect.c
index 7fbd396b35..068e70caad 100644
--- a/connect.c
+++ b/connect.c
@@ -850,6 +850,64 @@ static enum ssh_variant determine_ssh_variant(const char 
*ssh_command,
return ssh_variant;
 }
 
+/*
+ * Open a connection using Git's native protocol.
+ *
+ * The caller is responsible for freeing hostandport, but this function may
+ * modify it (for example, to truncate it to remove the port part).
+ */
+static struct child_process *git_connect_git(int fd[2], char *hostandport,
+const char *path, const char *prog,
+int flags)
+{
+   struct child_process *conn = _fork;
+   struct strbuf request = STRBUF_INIT;
+   /*
+* Set up virtual host information based on where we will
+* connect, unless the user has overridden us in
+* the environment.
+*/
+   char *target_host = getenv("GIT_OVERRIDE_VIRTUAL_HOST");
+   if (target_host)
+   target_host = xstrdup(target_host);
+   else
+   target_host = xstrdup(hostandport);
+
+   transport_check_allowed("git");
+
+   /* These underlying connection commands die() if they
+* cannot connect.
+*/
+   if (git_use_proxy(hostandport))
+   conn = git_proxy_connect(fd, hostandport);
+   else
+   git_tcp_connect(fd, hostandport, flags);
+   /*
+* Separate original protocol components prog and path
+* from extended host header with a NUL byte.
+*
+* Note: Do not add any other headers here!  Doing so
+* will cause older git-daemon servers to crash.
+*/
+   strbuf_addf(,
+   "%s %s%chost=%s%c",
+   prog, path, 0,
+   target_host, 0);
+
+   /* If using a new version put that stuff here after a second null byte 
*/
+   if (get_protocol_version_config() > 0) {
+   strbuf_addch(, '\0');
+   strbuf_addf(, "version=%d%c",
+   get_protocol_version_config(), '\0');
+   }
+
+   packet_write(fd[1], request.buf, request.len);
+
+   free(target_host);
+   strbuf_release();
+   return conn;
+}
+
 /*
  * This returns a dummy child_process if the transport protocol does not
  * need fork(2), or a struct child_process object if it does.  Once done,
@@ -881,50 +939,7 @@ struct child_process *git_connect(int fd[2], const char 
*url,
printf("Diag: path=%s\n", path ? path : "NULL");
conn = NULL;
} else if (protocol == PROTO_GIT) {
-   struct strbuf request = STRBUF_INIT;
-   /*
-* Set up virtual host information based on where we will
-* connect, unless the user has overridden us in
-* the environment.
-*/
-   char *target_host = getenv("GIT_OVERRIDE_VIRTUAL_HOST");
-   if (target_host)
-   target_host = xstrdup(target_host);
-   else
-   target_host = xstrdup(hostandport);
-
-   transport_check_allowed("git");
-
-   /* These underlying connection commands die() if they
-* cannot connect.
-*/
-   if (git_use_proxy(hostandport))
-   conn = git_proxy_connect(fd, hostandport);
-   else
-   git_tcp_connect(fd, hostandport, flags);
-   /*
-* Separate original protocol components prog and path
-* from extended host header with a NUL byte.
-*
-* Note: Do not add any other headers here!  Doing so
-* will cause older git-daemon servers to crash.
-*/
-   strbuf_addf(,
-   "%s %s%chost=%s%c",
-   prog, path, 0,
-   target_host, 0);
-
-   /* If using a new version put that stuff here after a second 
null byte */
-   if (get_protocol_version_config() > 0) {
-   strbuf_addch(, '\0');
-   strbuf_addf(, "version=%d%c",
-   get_protocol_version_config(), '\0');
-   }
-
-   packet_write(fd[1], request.buf, request.len);
-
-   free(target_host);
-   strbuf_release();
+   conn = 

Re: [PATCH 1/5] connect: split git:// setup into a separate function

2017-10-23 Thread Stefan Beller
On Mon, Oct 23, 2017 at 2:29 PM, Jonathan Nieder  wrote:
> The git_connect function is growing long.  Split the
> PROTO_GIT-specific portion to a separate function to make it easier to
> read.
>
> No functional change intended.
>
> Signed-off-by: Jonathan Nieder 

This also looks good to me.

unrelated:
Patch 2 was very easy to review using "log -p -w --color-moved",
this one however was not. This is because -w caused the diff machinery
to generate a completely different diff. (Not showing the new function
completely but some weird function header trickery. The white space
mangled output is below; most of it was colored "moved")

I had to have -w as otherwise --color-moved would not work,
so maybe we want to have an option to ignore white space for the
sake of move detection only, not affecting the diff in general;
maybe '--ignore-white-space-in-move-detection'?

I think once this option is given, all we have to do is pay attention to
this option in diff.c#moved_entry_cmp/next_byte, which is best built
on top of Peffs recent fixes origin/jk/diff-color-moved-fix.
Would that be of interest for people?

Thanks,
Stefan

diff --git a/connect.c b/connect.c
index 7fbd396b35..068e70caad 100644
--- a/connect.c
+++ b/connect.c
@@ -851,36 +851,16 @@ static enum ssh_variant
determine_ssh_variant(const char *ssh_command,
 }

 /*
- * This returns a dummy child_process if the transport protocol does not
- * need fork(2), or a struct child_process object if it does.  Once done,
- * finish the connection with finish_connect() with the value returned from
- * this function (it is safe to call finish_connect() with NULL to support
- * the former case).
+ * Open a connection using Git's native protocol.
  *
- * If it returns, the connect is successful; it just dies on errors (this
- * will hopefully be changed in a libification effort, to return NULL when
- * the connection failed).
+ * The caller is responsible for freeing hostandport, but this function may
+ * modify it (for example, to truncate it to remove the port part).
  */
-struct child_process *git_connect(int fd[2], const char *url,
-  const char *prog, int flags)
+static struct child_process *git_connect_git(int fd[2], char *hostandport,
+ const char *path, const
char *prog,
+ int flags)
 {
-char *hostandport, *path;
 struct child_process *conn = _fork;
-enum protocol protocol;
-
-/* Without this we cannot rely on waitpid() to tell
- * what happened to our children.
- */
-signal(SIGCHLD, SIG_DFL);
-
-protocol = parse_connect_url(url, , );
-if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) {
-printf("Diag: url=%s\n", url ? url : "NULL");
-printf("Diag: protocol=%s\n", prot_name(protocol));
-printf("Diag: hostandport=%s\n", hostandport ?
hostandport : "NULL");
-printf("Diag: path=%s\n", path ? path : "NULL");
-conn = NULL;
-} else if (protocol == PROTO_GIT) {
 struct strbuf request = STRBUF_INIT;
 /*
  * Set up virtual host information based on where we will
@@ -925,6 +905,41 @@ struct child_process *git_connect(int fd[2],
const char *url,

 free(target_host);
 strbuf_release();
+return conn;
+}
+
+/*
+ * This returns a dummy child_process if the transport protocol does not
+ * need fork(2), or a struct child_process object if it does.  Once done,
+ * finish the connection with finish_connect() with the value returned from
+ * this function (it is safe to call finish_connect() with NULL to support
+ * the former case).
+ *
+ * If it returns, the connect is successful; it just dies on errors (this
+ * will hopefully be changed in a libification effort, to return NULL when
+ * the connection failed).
+ */
+struct child_process *git_connect(int fd[2], const char *url,
+  const char *prog, int flags)
+{
+char *hostandport, *path;
+struct child_process *conn = _fork;
+enum protocol protocol;
+
+/* Without this we cannot rely on waitpid() to tell
+ * what happened to our children.
+ */
+signal(SIGCHLD, SIG_DFL);
+
+protocol = parse_connect_url(url, , );
+if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) {
+printf("Diag: url=%s\n", url ? url : "NULL");
+printf("Diag: protocol=%s\n", prot_name(protocol));
+printf("Diag: hostandport=%s\n", hostandport ?
hostandport : "NULL");
+printf("Diag: path=%s\n", path ? path : "NULL");
+conn = NULL;
+} else if (protocol == PROTO_GIT) {
+conn = git_connect_git(fd, hostandport, path, prog, flags);
 } else {
 struct strbuf cmd = 

[PATCH 1/5] connect: split git:// setup into a separate function

2017-10-23 Thread Jonathan Nieder
The git_connect function is growing long.  Split the
PROTO_GIT-specific portion to a separate function to make it easier to
read.

No functional change intended.

Signed-off-by: Jonathan Nieder 
---
 connect.c | 103 +++---
 1 file changed, 59 insertions(+), 44 deletions(-)

diff --git a/connect.c b/connect.c
index 7fbd396b35..068e70caad 100644
--- a/connect.c
+++ b/connect.c
@@ -850,6 +850,64 @@ static enum ssh_variant determine_ssh_variant(const char 
*ssh_command,
return ssh_variant;
 }
 
+/*
+ * Open a connection using Git's native protocol.
+ *
+ * The caller is responsible for freeing hostandport, but this function may
+ * modify it (for example, to truncate it to remove the port part).
+ */
+static struct child_process *git_connect_git(int fd[2], char *hostandport,
+const char *path, const char *prog,
+int flags)
+{
+   struct child_process *conn = _fork;
+   struct strbuf request = STRBUF_INIT;
+   /*
+* Set up virtual host information based on where we will
+* connect, unless the user has overridden us in
+* the environment.
+*/
+   char *target_host = getenv("GIT_OVERRIDE_VIRTUAL_HOST");
+   if (target_host)
+   target_host = xstrdup(target_host);
+   else
+   target_host = xstrdup(hostandport);
+
+   transport_check_allowed("git");
+
+   /* These underlying connection commands die() if they
+* cannot connect.
+*/
+   if (git_use_proxy(hostandport))
+   conn = git_proxy_connect(fd, hostandport);
+   else
+   git_tcp_connect(fd, hostandport, flags);
+   /*
+* Separate original protocol components prog and path
+* from extended host header with a NUL byte.
+*
+* Note: Do not add any other headers here!  Doing so
+* will cause older git-daemon servers to crash.
+*/
+   strbuf_addf(,
+   "%s %s%chost=%s%c",
+   prog, path, 0,
+   target_host, 0);
+
+   /* If using a new version put that stuff here after a second null byte 
*/
+   if (get_protocol_version_config() > 0) {
+   strbuf_addch(, '\0');
+   strbuf_addf(, "version=%d%c",
+   get_protocol_version_config(), '\0');
+   }
+
+   packet_write(fd[1], request.buf, request.len);
+
+   free(target_host);
+   strbuf_release();
+   return conn;
+}
+
 /*
  * This returns a dummy child_process if the transport protocol does not
  * need fork(2), or a struct child_process object if it does.  Once done,
@@ -881,50 +939,7 @@ struct child_process *git_connect(int fd[2], const char 
*url,
printf("Diag: path=%s\n", path ? path : "NULL");
conn = NULL;
} else if (protocol == PROTO_GIT) {
-   struct strbuf request = STRBUF_INIT;
-   /*
-* Set up virtual host information based on where we will
-* connect, unless the user has overridden us in
-* the environment.
-*/
-   char *target_host = getenv("GIT_OVERRIDE_VIRTUAL_HOST");
-   if (target_host)
-   target_host = xstrdup(target_host);
-   else
-   target_host = xstrdup(hostandport);
-
-   transport_check_allowed("git");
-
-   /* These underlying connection commands die() if they
-* cannot connect.
-*/
-   if (git_use_proxy(hostandport))
-   conn = git_proxy_connect(fd, hostandport);
-   else
-   git_tcp_connect(fd, hostandport, flags);
-   /*
-* Separate original protocol components prog and path
-* from extended host header with a NUL byte.
-*
-* Note: Do not add any other headers here!  Doing so
-* will cause older git-daemon servers to crash.
-*/
-   strbuf_addf(,
-   "%s %s%chost=%s%c",
-   prog, path, 0,
-   target_host, 0);
-
-   /* If using a new version put that stuff here after a second 
null byte */
-   if (get_protocol_version_config() > 0) {
-   strbuf_addch(, '\0');
-   strbuf_addf(, "version=%d%c",
-   get_protocol_version_config(), '\0');
-   }
-
-   packet_write(fd[1], request.buf, request.len);
-
-   free(target_host);
-   strbuf_release();
+   conn = git_connect_git(fd, hostandport, path, prog, flags);
} else {
struct strbuf cmd =