Re: [GSoC][PATCH v7 24/26] stash: optimize `get_untracked_files()` and `check_changes()`

2018-08-18 Thread Thomas Gummerer
On 08/08, Paul-Sebastian Ungureanu wrote:
> This commits introduces a optimization by avoiding calling the
> same functions again. For example, `git stash push -u`
> would call at some points the following functions:
> 
>  * `check_changes()`
>  * `do_create_stash()`, which calls: `check_changes()` and
> `get_untracked_files()`
> 
> Note that `check_changes()` also calls `get_untracked_files()`.
> So, `check_changes()` is called 2 times and `get_untracked_files()`
> 3 times. By checking at the beginning of the function if we already
> performed a check, we can avoid making useless calls.

While I can see that this may give us some performance gains, what's
being described above sounds like we should look into why we are
making these duplicate calls in the first place, rather than trying to
return early from them.  I feel like the duplicate calls are mostly a
remnant from the way the shell script was written, but not inherent to
the design of 'git stash'. 

For example 'check_changes' could be called from 'create_stash'
directly, so we don't have to call it in 'do_create_stash', in the
process removing the duplicate call from the 'git stash push'
codepath.  That would provide the same improvements, and keep the code
cleaner, rather than introducing more special cases for these
functions.

I haven't looked into the 'get_untracked_files()' call chain yet, but
I imagine we can do something similar for that.


[GSoC][PATCH v7 24/26] stash: optimize `get_untracked_files()` and `check_changes()`

2018-08-08 Thread Paul-Sebastian Ungureanu
This commits introduces a optimization by avoiding calling the
same functions again. For example, `git stash push -u`
would call at some points the following functions:

 * `check_changes()`
 * `do_create_stash()`, which calls: `check_changes()` and
`get_untracked_files()`

Note that `check_changes()` also calls `get_untracked_files()`.
So, `check_changes()` is called 2 times and `get_untracked_files()`
3 times. By checking at the beginning of the function if we already
performed a check, we can avoid making useless calls.
---
 builtin/stash.c | 50 +++--
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/builtin/stash.c b/builtin/stash.c
index 0ef88408a..4d5c0d16e 100644
--- a/builtin/stash.c
+++ b/builtin/stash.c
@@ -819,13 +819,23 @@ static int store_stash(int argc, const char **argv, const 
char *prefix)
 }
 
 /*
- * `out` will be filled with the names of untracked files. The return value is:
+ * `has_untracked_files` is:
+ * -2 if `get_untracked_files()` hasn't been called
+ * -1 if there were errors
+ *  0 if there are no untracked files
+ *  1 if there are untracked files
+ *
+ * `untracked_files` will be filled with the names of untracked files.
+ * The return value is:
  *
  * = 0 if there are not any untracked files
  * > 0 if there are untracked files
  */
+static struct strbuf untracked_files = STRBUF_INIT;
+static int has_untracked_files = -2;
+
 static int get_untracked_files(const char **argv, const char *prefix,
-  int include_untracked, struct strbuf *out)
+  int include_untracked)
 {
int max_len;
int i;
@@ -833,6 +843,9 @@ static int get_untracked_files(const char **argv, const 
char *prefix,
struct dir_struct dir;
struct pathspec pathspec;
 
+   if (has_untracked_files != -2)
+   return has_untracked_files;
+
memset(&dir, 0, sizeof(dir));
if (include_untracked != 2)
setup_standard_excludes(&dir);
@@ -849,7 +862,7 @@ static int get_untracked_files(const char **argv, const 
char *prefix,
free(ent);
continue;
}
-   strbuf_addf(out, "%s\n", ent->name);
+   strbuf_addf(&untracked_files, "%s\n", ent->name);
free(ent);
}
 
@@ -857,16 +870,25 @@ static int get_untracked_files(const char **argv, const 
char *prefix,
free(dir.ignored);
clear_directory(&dir);
free(seen);
-   return out->len;
+   has_untracked_files = untracked_files.len;
+   return untracked_files.len;
 }
 
 /*
+ * `changes` is:
+ * -2 if `check_changes()` hasn't been called
+ * -1 if there were any errors
+ *  0 if there are no changes
+ *  1 if there are changes
+ *
  * The return value of `check_changes()` can be:
  *
  * < 0 if there was an error
  * = 0 if there are no changes.
  * > 0 if there are changes.
  */
+static int changes = -2;
+
 static int check_changes(const char **argv, int include_untracked,
 const char *prefix)
 {
@@ -874,9 +896,11 @@ static int check_changes(const char **argv, int 
include_untracked,
int ret = 0;
struct rev_info rev;
struct object_id dummy;
-   struct strbuf out = STRBUF_INIT;
struct argv_array args = ARGV_ARRAY_INIT;
 
+   if (changes != -2)
+   return changes;
+
init_revisions(&rev, prefix);
parse_pathspec(&rev.prune_data, 0, PATHSPEC_PREFER_FULL,
   prefix, argv);
@@ -912,17 +936,16 @@ static int check_changes(const char **argv, int 
include_untracked,
}
 
if (include_untracked && get_untracked_files(argv, prefix,
-include_untracked, &out))
+include_untracked))
ret = 1;
 
 done:
-   strbuf_release(&out);
+   changes = ret;
argv_array_clear(&args);
return ret;
 }
 
-static int save_untracked_files(struct stash_info *info, struct strbuf *msg,
-   struct strbuf *out)
+static int save_untracked_files(struct stash_info *info, struct strbuf *msg)
 {
int ret = 0;
struct strbuf untracked_msg = STRBUF_INIT;
@@ -937,7 +960,8 @@ static int save_untracked_files(struct stash_info *info, 
struct strbuf *msg,
 stash_index_path.buf);
 
strbuf_addf(&untracked_msg, "untracked files on %s\n", msg->buf);
-   if (pipe_command(&cp, out->buf, out->len, NULL, 0, NULL, 0)) {
+   if (pipe_command(&cp, untracked_files.buf, untracked_files.len,
+NULL, 0, NULL, 0)) {
ret = -1;
goto done;
}
@@ -1116,7 +1140,6 @@ static int do_create_stash(int argc, const char **argv, 
const char *prefix,
struct commit_list *parents = NULL;
struct strbuf msg = STRBUF_INIT;