Re: [PATCHv2] submodule: Port resolve_relative_url from shell to C
A rather superficial review... On Wed, Dec 16, 2015 at 7:26 PM, Stefan Bellerwrote: > This reimplements the helper function `resolve_relative_url` in shell s/This reimplements/Reimplement/ > in C. This functionality is needed in C for introducing the groups > feature later on. When using groups, the user should not need to run > `git submodule init`, but it should be implicit at all appropriate places, > which are all in C code. As the we would not just call out to `git I guess you mean "As then we..." or something? > submodule init`, but do a more fine grained structure there, we actually > need all the init functionality in C before attempting the groups > feature. To get the init functionality in C, rewriting the > resolve_relative_url subfunction is a major step. > > Signed-off-by: Stefan Beller > --- > diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c > @@ -9,6 +9,156 @@ > +static const char *get_default_remote(void) > +{ > + char *dest = NULL; > + unsigned char sha1[20]; > + int flag; > + struct strbuf sb = STRBUF_INIT; > + const char *refname = resolve_ref_unsafe("HEAD", 0, sha1, ); > + > + if (!refname) > + die("No such ref: HEAD"); > + > + refname = shorten_unambiguous_ref(refname, 0); > + strbuf_addf(, "branch.%s.remote", refname); > + if (git_config_get_string(sb.buf, )) > + return "origin"; > + else > + return xstrdup(dest); Leaking the strbuf at both returns. And, leaking the strdup'd dest (in the caller), but I suppose that's intentional? > +} > + > +static int has_same_dir_prefix(const char *str, const char **out) > +{ > +#ifdef GIT_WINDOWS_NATIVE > + return skip_prefix(str, "./", out) > + || skip_prefix(str, ".\\", out); > +#else > + return skip_prefix(str, "./", out); > +#endif > +} > + > +static int has_upper_dir_prefix(const char *str, const char **out) > +{ > +#ifdef GIT_WINDOWS_NATIVE > + return skip_prefix(str, "../", out) > + || skip_prefix(str, "..\\", out); > +#else > + return skip_prefix(str, "../", out); > +#endif > +} > + > +static char *last_dir_separator(const char *str) > +{ > +#ifdef GIT_WINDOWS_NATIVE > + return strrchr(str, "/") > + || strrchr(str, "\\"); > +#else > + return strrchr(str, '/'); > +#endif > +} Cleaner would be to move the #if's outside the functions: #ifdef GIT_WINDOWS_NATIVE /* Windows implementations */ static int has_same_dir_prefix(...) {...} static int has_upper_dir_prefix(...) {...} static char *last_dir_separator(...) {...} #else /* POSIX implementations */ static int has_same_dir_prefix(...) {...} static int has_upper_dir_prefix(...) {...} static char *last_dir_separator(...) {...} #endif > +static const char *relative_url(const char *url, const char *up_path) > +{ > + int is_relative = 0; > + size_t len; > + char *remoteurl = NULL; > + char *sep = "/"; 'sep' only ever holds a single character, so why not declare it 'char' rather than 'char *'? (And, adjust the format string of strbuf_addf(), of course.) > + const char *out; > + [...] > + while (url) { > + if (has_upper_dir_prefix(url, )) { > + [...] > + if (rfind) > + *rfind = '\0'; > + else { > + rfind = strrchr(remoteurl, ':'); > + if (rfind) { > + *rfind = '\0'; > + sep = ":"; > + } else { > + [...] > + } > + } > + } else if (has_same_dir_prefix(url, )) > + url = out; > + else > + break; > + } > + strbuf_reset(); > + strbuf_addf(, "%s%s%s", remoteurl, sep, url); > + > + if (!has_same_dir_prefix(sb.buf, )) > + out = sb.buf; > + out = xstrdup(out); > + > + strbuf_reset(); > + strbuf_addf(, "%s%s", is_relative && up_path ? up_path : "", out); > + > + free((char*)out); Why declare 'out' const if you know you will be freeing it? > + return strbuf_detach(, NULL); > +} > + > +static int resolve_relative_url(int argc, const char **argv, const char > *prefix) > +{ > + if (argc == 2) > + printf("%s\n", relative_url(argv[1], NULL)); > + else if (argc == 3) > + printf("%s\n", relative_url(argv[1], argv[2])); > + else > + die("BUG: resolve_relative_url only accepts one or two > arguments"); > + return 0; > +} > > struct module_list { > const struct cache_entry **entries; > @@ -264,6 +414,7 @@
Re: [PATCHv2] submodule: Port resolve_relative_url from shell to C
Am 17.12.2015 um 01:26 schrieb Stefan Beller: > This reimplements the helper function `resolve_relative_url` in shell > in C. This functionality is needed in C for introducing the groups > feature later on. When using groups, the user should not need to run > `git submodule init`, but it should be implicit at all appropriate places, > which are all in C code. As the we would not just call out to `git > submodule init`, but do a more fine grained structure there, we actually > need all the init functionality in C before attempting the groups > feature. To get the init functionality in C, rewriting the > resolve_relative_url subfunction is a major step. > > This also improves the performance: > (Best out of 3) time ./t7400-submodule-basic.sh > Before: > real 0m9.575s > user 0m2.683s > sys 0m6.773s > After: > real 0m9.293s > user 0m2.691s > sys 0m6.549s > > That's about 3%. I appreciate this effort as it should help us on Windows. Although the numbers (and my own timings) suggest that this is only a small step forward. That's not surprising as the patch removes only two forks. As to the implementation, find a patch below that removes the ifdefs and a few other suggestions. It is a mechanical conversion without understanding what relative_url() does. I have the gut feeling that the two strbuf_addf towards the end of the function can be contracted and the temporarily allocate copy in 'out' can be removed. If there were a few examples in the comment above the function, it would be much simpler to understand. diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index b925bed..8ec0975 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -11,6 +11,7 @@ #include "run-command.h" #include "remote.h" #include "refs.h" +#include "connect.h" static const char *get_default_remote(void) { @@ -31,34 +32,23 @@ static const char *get_default_remote(void) return xstrdup(dest); } -static int has_same_dir_prefix(const char *str, const char **out) +static int starts_with_dot_slash(const char *str) { -#ifdef GIT_WINDOWS_NATIVE - return skip_prefix(str, "./", out) - || skip_prefix(str, ".\\", out); -#else - return skip_prefix(str, "./", out); -#endif + return str[0] == '.' && is_dir_sep(str[1]); } -static int has_upper_dir_prefix(const char *str, const char **out) +static int starts_with_dot_dot_slash(const char *str) { -#ifdef GIT_WINDOWS_NATIVE - return skip_prefix(str, "../", out) - || skip_prefix(str, "..\\", out); -#else - return skip_prefix(str, "../", out); -#endif + return str[0] == '.' && str[1] == '.' && is_dir_sep(str[2]); } -static char *last_dir_separator(const char *str) +static char *last_dir_separator(char *str) { -#ifdef GIT_WINDOWS_NATIVE - return strrchr(str, "/") - || strrchr(str, "\\"); -#else - return strrchr(str, '/'); -#endif + char* p = str + strlen(str); + while (p-- != str) + if (is_dir_sep(*p)) + return p; + return NULL; } /* @@ -85,9 +75,10 @@ static const char *relative_url(const char *url, const char *up_path) size_t len; char *remoteurl = NULL; char *sep = "/"; - const char *out; + char *out; struct strbuf sb = STRBUF_INIT; const char *remote = get_default_remote(); + strbuf_addf(, "remote.%s.url", remote); if (git_config_get_string(sb.buf, )) @@ -98,22 +89,23 @@ static const char *relative_url(const char *url, const char *up_path) if (is_dir_sep(remoteurl[len])) remoteurl[len] = '\0'; - if (strchr(remoteurl, ':') || is_dir_sep(*remoteurl)) + if (!url_is_local_not_ssh(remoteurl) || is_absolute_path(remoteurl)) is_relative = 0; - else if (has_same_dir_prefix(remoteurl, ) || - has_upper_dir_prefix(remoteurl, )) + else if (starts_with_dot_slash(remoteurl) || + starts_with_dot_dot_slash(remoteurl)) is_relative = 1; else { is_relative = 1; strbuf_reset(); strbuf_addf(, "./%s", remoteurl); + free(remoteurl); remoteurl = strbuf_detach(, NULL); } while (url) { - if (has_upper_dir_prefix(url, )) { + if (starts_with_dot_dot_slash(url)) { char *rfind; - url = out; + url += 3; rfind = last_dir_separator(remoteurl); if (rfind) @@ -130,22 +122,23 @@ static const char *relative_url(const char *url, const char *up_path) remoteurl = "."; } } - } else if (has_same_dir_prefix(url, )) - url = out; -
Re: [PATCHv2] submodule: Port resolve_relative_url from shell to C
Am 17.12.2015 um 19:55 schrieb Johannes Sixt: As to the implementation, find a patch below that removes the ifdefs and a few other suggestions. The word "offers" is missing in this last half-sentence ;) -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] submodule: Port resolve_relative_url from shell to C
On Thu, Dec 17, 2015 at 07:55:43PM +0100, Johannes Sixt wrote: > -static int has_same_dir_prefix(const char *str, const char **out) > +static int starts_with_dot_slash(const char *str) > { > -#ifdef GIT_WINDOWS_NATIVE > - return skip_prefix(str, "./", out) > - || skip_prefix(str, ".\\", out); > -#else > - return skip_prefix(str, "./", out); > -#endif > + return str[0] == '.' && is_dir_sep(str[1]); > } > > -static int has_upper_dir_prefix(const char *str, const char **out) > +static int starts_with_dot_dot_slash(const char *str) > { > -#ifdef GIT_WINDOWS_NATIVE > - return skip_prefix(str, "../", out) > - || skip_prefix(str, "..\\", out); > -#else > - return skip_prefix(str, "../", out); > -#endif > + return str[0] == '.' && str[1] == '.' && is_dir_sep(str[2]); > } As the set of prefixes you are looking is probably bounded, it may not be worth generalizing this. But I wondered if something like: /* * Like skip_prefix, but consider any "/" in the prefix as a * directory separator for the platform. */ int skip_prefix_fs(const char *str, const char *prefix, const char **out) { while (1) { if (!*prefix) { *out = str; return 1; } else if (*prefix == '/') { if (!is_dir_sep(*str)) return 0; } else { if (*str != *prefix) return 0; } str++; prefix++; } } ... /* works on all platforms! */ if (skip_prefix_fs(foo, "./", )) ... would be helpful. I don't know if there are other opportunities in the code base that could make use of this. If it's just these two sites, it's probably not worth it. -Peff -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2] submodule: Port resolve_relative_url from shell to C
This reimplements the helper function `resolve_relative_url` in shell in C. This functionality is needed in C for introducing the groups feature later on. When using groups, the user should not need to run `git submodule init`, but it should be implicit at all appropriate places, which are all in C code. As the we would not just call out to `git submodule init`, but do a more fine grained structure there, we actually need all the init functionality in C before attempting the groups feature. To get the init functionality in C, rewriting the resolve_relative_url subfunction is a major step. This also improves the performance: (Best out of 3) time ./t7400-submodule-basic.sh Before: real0m9.575s user0m2.683s sys 0m6.773s After: real0m9.293s user0m2.691s sys 0m6.549s That's about 3%. Signed-off-by: Stefan Beller--- builtin/submodule--helper.c | 151 git-submodule.sh| 81 ++-- 2 files changed, 155 insertions(+), 77 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index f4c3eff..b925bed 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -9,6 +9,156 @@ #include "submodule-config.h" #include "string-list.h" #include "run-command.h" +#include "remote.h" +#include "refs.h" + +static const char *get_default_remote(void) +{ + char *dest = NULL; + unsigned char sha1[20]; + int flag; + struct strbuf sb = STRBUF_INIT; + const char *refname = resolve_ref_unsafe("HEAD", 0, sha1, ); + + if (!refname) + die("No such ref: HEAD"); + + refname = shorten_unambiguous_ref(refname, 0); + strbuf_addf(, "branch.%s.remote", refname); + if (git_config_get_string(sb.buf, )) + return "origin"; + else + return xstrdup(dest); +} + +static int has_same_dir_prefix(const char *str, const char **out) +{ +#ifdef GIT_WINDOWS_NATIVE + return skip_prefix(str, "./", out) + || skip_prefix(str, ".\\", out); +#else + return skip_prefix(str, "./", out); +#endif +} + +static int has_upper_dir_prefix(const char *str, const char **out) +{ +#ifdef GIT_WINDOWS_NATIVE + return skip_prefix(str, "../", out) + || skip_prefix(str, "..\\", out); +#else + return skip_prefix(str, "../", out); +#endif +} + +static char *last_dir_separator(const char *str) +{ +#ifdef GIT_WINDOWS_NATIVE + return strrchr(str, "/") + || strrchr(str, "\\"); +#else + return strrchr(str, '/'); +#endif +} + +/* + * The function takes at most 2 arguments. The first argument is the + * URL that navigates to the submodule origin repo. When relative, this URL + * is relative to the superproject origin URL repo. The second up_path + * argument, if specified, is the relative path that navigates + * from the submodule working tree to the superproject working tree. + * + * The output of the function is the origin URL of the submodule. + * + * The output will either be an absolute URL or filesystem path (if the + * superproject origin URL is an absolute URL or filesystem path, + * respectively) or a relative file system path (if the superproject + * origin URL is a relative file system path). + * + * When the output is a relative file system path, the path is either + * relative to the submodule working tree, if up_path is specified, or to + * the superproject working tree otherwise. + */ +static const char *relative_url(const char *url, const char *up_path) +{ + int is_relative = 0; + size_t len; + char *remoteurl = NULL; + char *sep = "/"; + const char *out; + struct strbuf sb = STRBUF_INIT; + const char *remote = get_default_remote(); + strbuf_addf(, "remote.%s.url", remote); + + if (git_config_get_string(sb.buf, )) + /* the repository is its own authoritative upstream */ + remoteurl = xgetcwd(); + + len = strlen(remoteurl); + if (is_dir_sep(remoteurl[len])) + remoteurl[len] = '\0'; + + if (strchr(remoteurl, ':') || is_dir_sep(*remoteurl)) + is_relative = 0; + else if (has_same_dir_prefix(remoteurl, ) || + has_upper_dir_prefix(remoteurl, )) + is_relative = 1; + else { + is_relative = 1; + strbuf_reset(); + strbuf_addf(, "./%s", remoteurl); + remoteurl = strbuf_detach(, NULL); + } + + while (url) { + if (has_upper_dir_prefix(url, )) { + char *rfind; + url = out; + + rfind = last_dir_separator(remoteurl); + if (rfind) + *rfind = '\0'; + else { + rfind = strrchr(remoteurl, ':'); + if