Re: [PATCH] builtins: search builtin commands via binary search.

2013-07-29 Thread Junio C Hamano
Stefan Beller stefanbel...@googlemail.com writes:

 However I could not find a speedup.
 So if the patch is accepted, it would only be for readability.

This adds a maintenance burden.  It is very easy for somebody to
mistakenly run sort on the region in her editor under non C
locale.

Perhaps with a change like the attached, if we were to do so.  Then
have a test that runs git --check-builtins-sorted will ensure we
won't ever screw it up.

FOR ILLUSTRATION PURPOSES ONLY: diff --git a/git.c b/git.c
index 2025f77..3155273 100644
--- a/git.c
+++ b/git.c
@@ -34,6 +34,150 @@ static void commit_pager_choice(void) {
}
 }
 
+struct cmd_struct {
+   const char *cmd;
+   int (*fn)(int, const char **, const char *);
+   int option;
+};
+
+#define RUN_SETUP  (10)
+#define RUN_SETUP_GENTLY   (11)
+#define USE_PAGER  (12)
+/*
+ * require working tree to be present -- anything uses this needs
+ * RUN_SETUP for reading from the configuration file.
+ */
+#define NEED_WORK_TREE (13)
+
+static struct cmd_struct builtin_commands[] = {
+   { add, cmd_add, RUN_SETUP | NEED_WORK_TREE },
+   { annotate, cmd_annotate, RUN_SETUP },
+   ...
+   { whatchanged, cmd_whatchanged, RUN_SETUP },
+   { write-tree, cmd_write_tree, RUN_SETUP },
+};
+
+static void make_sure_the_builtin_array_is_sorted(void)
+{
+   int i;
+   for (i = 0; i  ARRAY_SIZE(builtin_commands) - 1; i++) {
+   struct cmd_struct *it = builtin_commands[i];
+   if (strcmp(it[0].cmd, it[1].cmd) = 0)
+   die(BUG: builtin command list not sorted %s  %s,
+   it[0].cmd, it[1].cmd);
+   }
+}
+
 static int handle_options(const char ***argv, int *argc, int *envchanged)
 {
const char **orig_argv = *argv;
@@ -62,6 +206,9 @@ static int handle_options(const char ***argv, int *argc, int 
*envchanged)
puts(git_exec_path());
exit(0);
}
+   } else if (!strcmp(cmd, --check-builtins-sorted)) {
+   make_sure_the_builtin_array_is_sorted();
+   exit(0);
} else if (!strcmp(cmd, --html-path)) {
puts(system_path(GIT_HTML_PATH));
exit(0);
@@ -241,21 +388,6 @@ static int handle_alias(int *argcp, const char ***argv)
return ret;
 }
 
-#define RUN_SETUP  (10)
-#define RUN_SETUP_GENTLY   (11)
-#define USE_PAGER  (12)
-/*
- * require working tree to be present -- anything uses this needs
- * RUN_SETUP for reading from the configuration file.
- */
-#define NEED_WORK_TREE (13)
-
-struct cmd_struct {
-   const char *cmd;
-   int (*fn)(int, const char **, const char *);
-   int option;
-};
-
 static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 {
int status, help;
@@ -312,123 +444,6 @@ static int run_builtin(struct cmd_struct *p, int argc, 
const char **argv)
 static void handle_internal_command(int argc, const char **argv)
 {
const char *cmd = argv[0];
-   static struct cmd_struct commands[] = {
-   { add, cmd_add, RUN_SETUP | NEED_WORK_TREE },
-   { annotate, cmd_annotate, RUN_SETUP },
-   ...
-   { whatchanged, cmd_whatchanged, RUN_SETUP },
-   { write-tree, cmd_write_tree, RUN_SETUP },
-   };
int i;
static const char ext[] = STRIP_EXTENSION;
 
@@ -447,8 +462,8 @@ static void handle_internal_command(int argc, const char 
**argv)
argv[0] = cmd = help;
}
 
-   for (i = 0; i  ARRAY_SIZE(commands); i++) {
-   struct cmd_struct *p = commands+i;
+   for (i = 0; i  ARRAY_SIZE(builtin_commands); i++) {
+   struct cmd_struct *p = builtin_commands+i;
if (strcmp(p-cmd, cmd))
continue;
exit(run_builtin(p, argc, argv));
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] builtins: search builtin commands via binary search.

2013-07-27 Thread Stefan Beller
On 07/26/2013 10:57 PM, Jonathan Nieder wrote:
 Hi,
 
 Stefan Beller wrote:
 
 --- a/git.c
 +++ b/git.c
 @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, 
 const char **argv)
  return 0;
  }
  
 +static int compare_internal_command(const void *a, const void *b) {
 +/* The first parameter is of type char* describing the name,
 +   the second is a struct cmd_struct */
 
 Style:
 
   /*
* Multi-line comments in git look like this, with an initial
* /* line, a leading * on each line with text, and a line
* with '*' '/' at the end.
*/
 
 [...]

Thanks for noting, however as Eric points out, that comment was not 
enlightening, so I removed it.


 @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const 
 char **argv)
  argv[0] = cmd = help;
  }
  
 -for (i = 0; i  ARRAY_SIZE(commands); i++) {
 -struct cmd_struct *p = commands+i;
 -if (strcmp(p-cmd, cmd))
 -continue;
 +struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
 +ARRAY_SIZE(commands), sizeof(struct cmd_struct),
 +compare_internal_command);
 
 No need to cast --- this is C.

Also removed.

 
 Fun.  Does this result in a measurable speedup, or is it just for more
 pleasant reading?
 
 Thanks and hope that helps,
 Jonathan
 

premature optimization is the root of all evil
I tried hard to come up with a benchmark, but this is lost in the
noise. I could not figure out a way to reproducably make sure this
patch is really faster.
So I tried to `time git add COPYING` to show it's not getting slower
for the first entries in the list as well as `git fast-external-command`
whereas the fast-external-command is just an int main() {return 0; }
to check if the external commands, which are executed after searching 
through all the internals come up faster.

However I could not find a speedup.
So if the patch is accepted, it would only be for readability.

I was fiddling around with make now to include the suggestion of Eric to
check the arguments for being sorted in make. However I do not 
seem to fully understand the syntax yet.
My approach would have been:

sorted_internal_cmds: git.c
{ awk '/cmd_struct commands/,/};/ { if (match($2,//)) print $2 }' 
git.c builtin.actual  \
sort builtin.actual builtin.expect  \
cmp -s builtin.expect builtin.actual  \
rm builtin.expect builtin.actual \
}

all:: sorted_internal_cmds

But then there is 
$ make 
...
}
/bin/sh: 5: Syntax error: end of file unexpected (expecting })

So I suspect the { within the shell code inside the awk parameter is messing up?

Thanks,
Stefan




signature.asc
Description: OpenPGP digital signature


[PATCH] builtins: search builtin commands via binary search.

2013-07-27 Thread Stefan Beller
There are currently 115 commands built into the git executable.
Before this commit, it was iterated over these commands in a linear
order, i.e. each command was checked.

As it turns out the commands are already sorted alphabetically, it is easy
to perform a binary search instead of linear searching.
This results in 7 lookups in the worst case.

Signed-off-by: Stefan Beller stefanbel...@googlemail.com
---
 git.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/git.c b/git.c
index 2025f77..6d4de2b 100644
--- a/git.c
+++ b/git.c
@@ -309,9 +309,14 @@ static int run_builtin(struct cmd_struct *p, int argc, 
const char **argv)
return 0;
 }
 
+static int compare_internal_command(const void *name, const void *cmd) {
+   return strcmp((const char*)name, ((const struct cmd_struct*)cmd)-cmd);
+}
+
 static void handle_internal_command(int argc, const char **argv)
 {
const char *cmd = argv[0];
+   /* commands must be sorted alphabetically for binary search */
static struct cmd_struct commands[] = {
{ add, cmd_add, RUN_SETUP | NEED_WORK_TREE },
{ annotate, cmd_annotate, RUN_SETUP },
@@ -447,12 +452,12 @@ static void handle_internal_command(int argc, const char 
**argv)
argv[0] = cmd = help;
}
 
-   for (i = 0; i  ARRAY_SIZE(commands); i++) {
-   struct cmd_struct *p = commands+i;
-   if (strcmp(p-cmd, cmd))
-   continue;
+   struct cmd_struct *p = bsearch(cmd, commands,
+   ARRAY_SIZE(commands), sizeof(struct cmd_struct),
+   compare_internal_command);
+
+   if (p)
exit(run_builtin(p, argc, argv));
-   }
 }
 
 static void execv_dashed_external(const char **argv)
-- 
1.8.4.rc0.1.g8f6a3e5

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] builtins: search builtin commands via binary search.

2013-07-27 Thread Andreas Schwab
Stefan Beller stefanbel...@googlemail.com writes:

 My approach would have been:

 sorted_internal_cmds: git.c
   { awk '/cmd_struct commands/,/};/ { if (match($2,//)) print $2 }' 
 git.c builtin.actual  \
   sort builtin.actual builtin.expect  \
   cmp -s builtin.expect builtin.actual  \
   rm builtin.expect builtin.actual \
   }

You need a semicolon before the closing }.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
And now for something completely different.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] builtins: search builtin commands via binary search.

2013-07-26 Thread Stefan Beller
There are currently 115 commands built into the git executable.
Before this commit, it was iterated over these commands in a linear
order, i.e. each command was checked.

As it turns out the commands are already sorted alphabetically, it is easy
to perform a binary search instead of linear searching.
This results in 7 lookups in the worst case.

Signed-off-by: Stefan Beller stefanbel...@googlemail.com
---
 git.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/git.c b/git.c
index 2025f77..0d7a9b5 100644
--- a/git.c
+++ b/git.c
@@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, 
const char **argv)
return 0;
 }
 
+static int compare_internal_command(const void *a, const void *b) {
+   /* The first parameter is of type char* describing the name,
+  the second is a struct cmd_struct */
+   const char *name = (const char*)a;
+   const struct cmd_struct *cmd_struct = (struct cmd_struct*)b;
+   return strcmp(name, cmd_struct-cmd);
+}
+
 static void handle_internal_command(int argc, const char **argv)
 {
const char *cmd = argv[0];
+   /* commands must be sorted alphabetically */
static struct cmd_struct commands[] = {
{ add, cmd_add, RUN_SETUP | NEED_WORK_TREE },
{ annotate, cmd_annotate, RUN_SETUP },
@@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char 
**argv)
argv[0] = cmd = help;
}
 
-   for (i = 0; i  ARRAY_SIZE(commands); i++) {
-   struct cmd_struct *p = commands+i;
-   if (strcmp(p-cmd, cmd))
-   continue;
+   struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
+   ARRAY_SIZE(commands), sizeof(struct cmd_struct),
+   compare_internal_command);
+
+   if (p)
exit(run_builtin(p, argc, argv));
-   }
 }
 
 static void execv_dashed_external(const char **argv)
-- 
1.8.4.rc0.1.g8f6a3e5

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] builtins: search builtin commands via binary search.

2013-07-26 Thread Jonathan Nieder
Hi,

Stefan Beller wrote:

 --- a/git.c
 +++ b/git.c
 @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, 
 const char **argv)
   return 0;
  }
  
 +static int compare_internal_command(const void *a, const void *b) {
 + /* The first parameter is of type char* describing the name,
 +the second is a struct cmd_struct */

Style:

/*
 * Multi-line comments in git look like this, with an initial
 * /* line, a leading * on each line with text, and a line
 * with '*' '/' at the end.
 */

[...]
 @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const 
 char **argv)
   argv[0] = cmd = help;
   }
  
 - for (i = 0; i  ARRAY_SIZE(commands); i++) {
 - struct cmd_struct *p = commands+i;
 - if (strcmp(p-cmd, cmd))
 - continue;
 + struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
 + ARRAY_SIZE(commands), sizeof(struct cmd_struct),
 + compare_internal_command);

No need to cast --- this is C.

Fun.  Does this result in a measurable speedup, or is it just for more
pleasant reading?

Thanks and hope that helps,
Jonathan
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] builtins: search builtin commands via binary search.

2013-07-26 Thread Eric Sunshine
On Fri, Jul 26, 2013 at 4:50 PM, Stefan Beller
stefanbel...@googlemail.com wrote:
 There are currently 115 commands built into the git executable.
 Before this commit, it was iterated over these commands in a linear
 order, i.e. each command was checked.

 As it turns out the commands are already sorted alphabetically, it is easy
 to perform a binary search instead of linear searching.
 This results in 7 lookups in the worst case.

 Signed-off-by: Stefan Beller stefanbel...@googlemail.com
 ---
  git.c | 19 ++-
  1 file changed, 14 insertions(+), 5 deletions(-)

 diff --git a/git.c b/git.c
 index 2025f77..0d7a9b5 100644
 --- a/git.c
 +++ b/git.c
 @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, 
 const char **argv)
 return 0;
  }

 +static int compare_internal_command(const void *a, const void *b) {
 +   /* The first parameter is of type char* describing the name,
 +  the second is a struct cmd_struct */
 +   const char *name = (const char*)a;
 +   const struct cmd_struct *cmd_struct = (struct cmd_struct*)b;

Comments typically exist to elucidate something non-obvious in the
code, however, in this case the code and comment say the same thing,
making the comment redundant. Such redundancy can make code harder to
read since the reader has to take extra time to figure out if the
comment is really explaining something not obvious in the code. Thus,
this comment can be removed without loss of clarity.

 +   return strcmp(name, cmd_struct-cmd);
 +}
 +
  static void handle_internal_command(int argc, const char **argv)
  {
 const char *cmd = argv[0];
 +   /* commands must be sorted alphabetically */
 static struct cmd_struct commands[] = {

This new comment, on the other hand does explain something not obvious
at this point in the code.

 { add, cmd_add, RUN_SETUP | NEED_WORK_TREE },
 { annotate, cmd_annotate, RUN_SETUP },
 @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const 
 char **argv)
 argv[0] = cmd = help;
 }

 -   for (i = 0; i  ARRAY_SIZE(commands); i++) {
 -   struct cmd_struct *p = commands+i;
 -   if (strcmp(p-cmd, cmd))
 -   continue;
 +   struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
 +   ARRAY_SIZE(commands), sizeof(struct 
 cmd_struct),
 +   compare_internal_command);

Since this will break down if the commands[] array becomes unsorted,
it would make sense to protect against such a failure. For instance,
you could add a check in Makefile which triggers when git.c is edited.
It might do something like this:

  awk '/cmd_struct commands/,/};/ { if (match($2,//)) print $2 }'
git.c builtin.actual 
  sort builtin.actual builtin.expect 
  cmp -s builtin.expect builtin.actual 
  rm builtin.expect builtin.actual

 +
 +   if (p)
 exit(run_builtin(p, argc, argv));
 -   }
  }

  static void execv_dashed_external(const char **argv)
 --
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html