Re: [PATCH] builtins: search builtin commands via binary search.
Stefan Beller stefanbel...@googlemail.com writes: However I could not find a speedup. So if the patch is accepted, it would only be for readability. This adds a maintenance burden. It is very easy for somebody to mistakenly run sort on the region in her editor under non C locale. Perhaps with a change like the attached, if we were to do so. Then have a test that runs git --check-builtins-sorted will ensure we won't ever screw it up. FOR ILLUSTRATION PURPOSES ONLY: diff --git a/git.c b/git.c index 2025f77..3155273 100644 --- a/git.c +++ b/git.c @@ -34,6 +34,150 @@ static void commit_pager_choice(void) { } } +struct cmd_struct { + const char *cmd; + int (*fn)(int, const char **, const char *); + int option; +}; + +#define RUN_SETUP (10) +#define RUN_SETUP_GENTLY (11) +#define USE_PAGER (12) +/* + * require working tree to be present -- anything uses this needs + * RUN_SETUP for reading from the configuration file. + */ +#define NEED_WORK_TREE (13) + +static struct cmd_struct builtin_commands[] = { + { add, cmd_add, RUN_SETUP | NEED_WORK_TREE }, + { annotate, cmd_annotate, RUN_SETUP }, + ... + { whatchanged, cmd_whatchanged, RUN_SETUP }, + { write-tree, cmd_write_tree, RUN_SETUP }, +}; + +static void make_sure_the_builtin_array_is_sorted(void) +{ + int i; + for (i = 0; i ARRAY_SIZE(builtin_commands) - 1; i++) { + struct cmd_struct *it = builtin_commands[i]; + if (strcmp(it[0].cmd, it[1].cmd) = 0) + die(BUG: builtin command list not sorted %s %s, + it[0].cmd, it[1].cmd); + } +} + static int handle_options(const char ***argv, int *argc, int *envchanged) { const char **orig_argv = *argv; @@ -62,6 +206,9 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) puts(git_exec_path()); exit(0); } + } else if (!strcmp(cmd, --check-builtins-sorted)) { + make_sure_the_builtin_array_is_sorted(); + exit(0); } else if (!strcmp(cmd, --html-path)) { puts(system_path(GIT_HTML_PATH)); exit(0); @@ -241,21 +388,6 @@ static int handle_alias(int *argcp, const char ***argv) return ret; } -#define RUN_SETUP (10) -#define RUN_SETUP_GENTLY (11) -#define USE_PAGER (12) -/* - * require working tree to be present -- anything uses this needs - * RUN_SETUP for reading from the configuration file. - */ -#define NEED_WORK_TREE (13) - -struct cmd_struct { - const char *cmd; - int (*fn)(int, const char **, const char *); - int option; -}; - static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status, help; @@ -312,123 +444,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; - static struct cmd_struct commands[] = { - { add, cmd_add, RUN_SETUP | NEED_WORK_TREE }, - { annotate, cmd_annotate, RUN_SETUP }, - ... - { whatchanged, cmd_whatchanged, RUN_SETUP }, - { write-tree, cmd_write_tree, RUN_SETUP }, - }; int i; static const char ext[] = STRIP_EXTENSION; @@ -447,8 +462,8 @@ static void handle_internal_command(int argc, const char **argv) argv[0] = cmd = help; } - for (i = 0; i ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands+i; + for (i = 0; i ARRAY_SIZE(builtin_commands); i++) { + struct cmd_struct *p = builtin_commands+i; if (strcmp(p-cmd, cmd)) continue; exit(run_builtin(p, argc, argv)); -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] builtins: search builtin commands via binary search.
On 07/26/2013 10:57 PM, Jonathan Nieder wrote: Hi, Stefan Beller wrote: --- a/git.c +++ b/git.c @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) return 0; } +static int compare_internal_command(const void *a, const void *b) { +/* The first parameter is of type char* describing the name, + the second is a struct cmd_struct */ Style: /* * Multi-line comments in git look like this, with an initial * /* line, a leading * on each line with text, and a line * with '*' '/' at the end. */ [...] Thanks for noting, however as Eric points out, that comment was not enlightening, so I removed it. @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv) argv[0] = cmd = help; } -for (i = 0; i ARRAY_SIZE(commands); i++) { -struct cmd_struct *p = commands+i; -if (strcmp(p-cmd, cmd)) -continue; +struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands, +ARRAY_SIZE(commands), sizeof(struct cmd_struct), +compare_internal_command); No need to cast --- this is C. Also removed. Fun. Does this result in a measurable speedup, or is it just for more pleasant reading? Thanks and hope that helps, Jonathan premature optimization is the root of all evil I tried hard to come up with a benchmark, but this is lost in the noise. I could not figure out a way to reproducably make sure this patch is really faster. So I tried to `time git add COPYING` to show it's not getting slower for the first entries in the list as well as `git fast-external-command` whereas the fast-external-command is just an int main() {return 0; } to check if the external commands, which are executed after searching through all the internals come up faster. However I could not find a speedup. So if the patch is accepted, it would only be for readability. I was fiddling around with make now to include the suggestion of Eric to check the arguments for being sorted in make. However I do not seem to fully understand the syntax yet. My approach would have been: sorted_internal_cmds: git.c { awk '/cmd_struct commands/,/};/ { if (match($2,//)) print $2 }' git.c builtin.actual \ sort builtin.actual builtin.expect \ cmp -s builtin.expect builtin.actual \ rm builtin.expect builtin.actual \ } all:: sorted_internal_cmds But then there is $ make ... } /bin/sh: 5: Syntax error: end of file unexpected (expecting }) So I suspect the { within the shell code inside the awk parameter is messing up? Thanks, Stefan signature.asc Description: OpenPGP digital signature
[PATCH] builtins: search builtin commands via binary search.
There are currently 115 commands built into the git executable. Before this commit, it was iterated over these commands in a linear order, i.e. each command was checked. As it turns out the commands are already sorted alphabetically, it is easy to perform a binary search instead of linear searching. This results in 7 lookups in the worst case. Signed-off-by: Stefan Beller stefanbel...@googlemail.com --- git.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/git.c b/git.c index 2025f77..6d4de2b 100644 --- a/git.c +++ b/git.c @@ -309,9 +309,14 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) return 0; } +static int compare_internal_command(const void *name, const void *cmd) { + return strcmp((const char*)name, ((const struct cmd_struct*)cmd)-cmd); +} + static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; + /* commands must be sorted alphabetically for binary search */ static struct cmd_struct commands[] = { { add, cmd_add, RUN_SETUP | NEED_WORK_TREE }, { annotate, cmd_annotate, RUN_SETUP }, @@ -447,12 +452,12 @@ static void handle_internal_command(int argc, const char **argv) argv[0] = cmd = help; } - for (i = 0; i ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands+i; - if (strcmp(p-cmd, cmd)) - continue; + struct cmd_struct *p = bsearch(cmd, commands, + ARRAY_SIZE(commands), sizeof(struct cmd_struct), + compare_internal_command); + + if (p) exit(run_builtin(p, argc, argv)); - } } static void execv_dashed_external(const char **argv) -- 1.8.4.rc0.1.g8f6a3e5 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] builtins: search builtin commands via binary search.
Stefan Beller stefanbel...@googlemail.com writes: My approach would have been: sorted_internal_cmds: git.c { awk '/cmd_struct commands/,/};/ { if (match($2,//)) print $2 }' git.c builtin.actual \ sort builtin.actual builtin.expect \ cmp -s builtin.expect builtin.actual \ rm builtin.expect builtin.actual \ } You need a semicolon before the closing }. Andreas. -- Andreas Schwab, sch...@linux-m68k.org GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 And now for something completely different. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] builtins: search builtin commands via binary search.
There are currently 115 commands built into the git executable. Before this commit, it was iterated over these commands in a linear order, i.e. each command was checked. As it turns out the commands are already sorted alphabetically, it is easy to perform a binary search instead of linear searching. This results in 7 lookups in the worst case. Signed-off-by: Stefan Beller stefanbel...@googlemail.com --- git.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/git.c b/git.c index 2025f77..0d7a9b5 100644 --- a/git.c +++ b/git.c @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) return 0; } +static int compare_internal_command(const void *a, const void *b) { + /* The first parameter is of type char* describing the name, + the second is a struct cmd_struct */ + const char *name = (const char*)a; + const struct cmd_struct *cmd_struct = (struct cmd_struct*)b; + return strcmp(name, cmd_struct-cmd); +} + static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; + /* commands must be sorted alphabetically */ static struct cmd_struct commands[] = { { add, cmd_add, RUN_SETUP | NEED_WORK_TREE }, { annotate, cmd_annotate, RUN_SETUP }, @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv) argv[0] = cmd = help; } - for (i = 0; i ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands+i; - if (strcmp(p-cmd, cmd)) - continue; + struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands, + ARRAY_SIZE(commands), sizeof(struct cmd_struct), + compare_internal_command); + + if (p) exit(run_builtin(p, argc, argv)); - } } static void execv_dashed_external(const char **argv) -- 1.8.4.rc0.1.g8f6a3e5 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] builtins: search builtin commands via binary search.
Hi, Stefan Beller wrote: --- a/git.c +++ b/git.c @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) return 0; } +static int compare_internal_command(const void *a, const void *b) { + /* The first parameter is of type char* describing the name, +the second is a struct cmd_struct */ Style: /* * Multi-line comments in git look like this, with an initial * /* line, a leading * on each line with text, and a line * with '*' '/' at the end. */ [...] @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv) argv[0] = cmd = help; } - for (i = 0; i ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands+i; - if (strcmp(p-cmd, cmd)) - continue; + struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands, + ARRAY_SIZE(commands), sizeof(struct cmd_struct), + compare_internal_command); No need to cast --- this is C. Fun. Does this result in a measurable speedup, or is it just for more pleasant reading? Thanks and hope that helps, Jonathan -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] builtins: search builtin commands via binary search.
On Fri, Jul 26, 2013 at 4:50 PM, Stefan Beller stefanbel...@googlemail.com wrote: There are currently 115 commands built into the git executable. Before this commit, it was iterated over these commands in a linear order, i.e. each command was checked. As it turns out the commands are already sorted alphabetically, it is easy to perform a binary search instead of linear searching. This results in 7 lookups in the worst case. Signed-off-by: Stefan Beller stefanbel...@googlemail.com --- git.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/git.c b/git.c index 2025f77..0d7a9b5 100644 --- a/git.c +++ b/git.c @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) return 0; } +static int compare_internal_command(const void *a, const void *b) { + /* The first parameter is of type char* describing the name, + the second is a struct cmd_struct */ + const char *name = (const char*)a; + const struct cmd_struct *cmd_struct = (struct cmd_struct*)b; Comments typically exist to elucidate something non-obvious in the code, however, in this case the code and comment say the same thing, making the comment redundant. Such redundancy can make code harder to read since the reader has to take extra time to figure out if the comment is really explaining something not obvious in the code. Thus, this comment can be removed without loss of clarity. + return strcmp(name, cmd_struct-cmd); +} + static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; + /* commands must be sorted alphabetically */ static struct cmd_struct commands[] = { This new comment, on the other hand does explain something not obvious at this point in the code. { add, cmd_add, RUN_SETUP | NEED_WORK_TREE }, { annotate, cmd_annotate, RUN_SETUP }, @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv) argv[0] = cmd = help; } - for (i = 0; i ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands+i; - if (strcmp(p-cmd, cmd)) - continue; + struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands, + ARRAY_SIZE(commands), sizeof(struct cmd_struct), + compare_internal_command); Since this will break down if the commands[] array becomes unsorted, it would make sense to protect against such a failure. For instance, you could add a check in Makefile which triggers when git.c is edited. It might do something like this: awk '/cmd_struct commands/,/};/ { if (match($2,//)) print $2 }' git.c builtin.actual sort builtin.actual builtin.expect cmp -s builtin.expect builtin.actual rm builtin.expect builtin.actual + + if (p) exit(run_builtin(p, argc, argv)); - } } static void execv_dashed_external(const char **argv) -- -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html