Nguyễn Thái Ngọc Duy <pclo...@gmail.com> writes:

> watch_entries() is a lot of computation and could trigger a lot more
> lookups in file-watcher. Normally after the first set of watches are
> in place, we do not need to update often. Moreover if the number of
> entries is small, the overhead of file watcher may actually slow git
> down.
>
> This patch only allows to update watches if the number of watchable
> files is over a limit (and there are new files added if this is not
> the first time). Measurements on Core i5-2520M and Linux 3.7.6, about
> 920 lstat() take 1ms. Somewhere between 2^16 and 2^17 lstat calls that
> it starts to take longer than 100ms. 2^16 is chosen at the minimum
> limit to start using file watcher.
>
> Recently updated files are not considered watchable because they are
> likely to be updated again soon, not worth the ping-pong game with
> file watcher. The default limit 30min is just a random value.

But then a fresh clone of a big repository would not get any benefit
from the watcher?

Not yet sure how to best handle this.

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
> ---
>  Documentation/config.txt |  9 +++++++++
>  cache.h                  |  1 +
>  read-cache.c             | 44 ++++++++++++++++++++++++++++++++++++--------
>  3 files changed, 46 insertions(+), 8 deletions(-)
>
> diff --git a/Documentation/config.txt b/Documentation/config.txt
> index a405806..e394399 100644
> --- a/Documentation/config.txt
> +++ b/Documentation/config.txt
> @@ -1038,6 +1038,15 @@ difftool.<tool>.cmd::
>  difftool.prompt::
>       Prompt before each invocation of the diff tool.
>  
> +filewatcher.minfiles::
> +     Start watching files if the number of watchable files are
> +     above this limit. Default value is 65536.
> +
> +filewatcher.recentlimit::
> +     Files that are last updated within filewatcher.recentlimit
> +     seconds from now are not considered watchable. Default value
> +     is 1800 (30 minutes).
> +
>  fetch.recurseSubmodules::
>       This option can be either set to a boolean value or to 'on-demand'.
>       Setting it to a boolean changes the behavior of fetch and pull to
> diff --git a/cache.h b/cache.h
> index 0d55551..bcec29b 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -278,6 +278,7 @@ struct index_state {
>       struct cache_tree *cache_tree;
>       struct cache_time timestamp;
>       unsigned name_hash_initialized : 1,
> +              update_watches : 1,
>                initialized : 1;
>       struct hashmap name_hash;
>       struct hashmap dir_hash;
> diff --git a/read-cache.c b/read-cache.c
> index 21c3207..406834a 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -38,6 +38,8 @@ static struct cache_entry *refresh_cache_entry(struct 
> cache_entry *ce, int reall
>  #define CACHE_EXT_WATCH 0x57415443     /* "WATC" */
>  
>  struct index_state the_index;
> +static int watch_lowerlimit = 65536;
> +static int recent_limit = 1800;
>  
>  static void set_index_entry(struct index_state *istate, int nr, struct 
> cache_entry *ce)
>  {
> @@ -1014,6 +1016,7 @@ int add_index_entry(struct index_state *istate, struct 
> cache_entry *ce, int opti
>                       (istate->cache_nr - pos - 1) * sizeof(ce));
>       set_index_entry(istate, pos, ce);
>       istate->cache_changed = 1;
> +     istate->update_watches = 1;
>       return 0;
>  }
>  
> @@ -1300,13 +1303,14 @@ static void read_watch_extension(struct index_state 
> *istate, uint8_t *data,
>                                unsigned long sz)
>  {
>       int i;
> -     if ((istate->cache_nr + 7) / 8 != sz) {
> +     if ((istate->cache_nr + 7) / 8 + 1 != sz) {
>               error("invalid 'WATC' extension");
>               return;
>       }
>       for (i = 0; i < istate->cache_nr; i++)
>               if (data[i / 8] & (1 << (i % 8)))
>                       istate->cache[i]->ce_flags |= CE_WATCHED;
> +     istate->update_watches = data[sz - 1];
>  }
>  
>  static int read_index_extension(struct index_state *istate,
> @@ -1449,6 +1453,19 @@ static struct cache_entry *create_from_disk(struct 
> ondisk_cache_entry *ondisk,
>       return ce;
>  }
>  
> +static int watcher_config(const char *var, const char *value, void *data)
> +{
> +     if (!strcmp(var, "filewatcher.minfiles")) {
> +             watch_lowerlimit = git_config_int(var, value);
> +             return 0;
> +     }
> +     if (!strcmp(var, "filewatcher.recentlimit")) {
> +             recent_limit = git_config_int(var, value);
> +             return 0;
> +     }
> +     return 0;
> +}
> +
>  static void validate_watcher(struct index_state *istate, const char *path)
>  {
>       int i;
> @@ -1458,6 +1475,7 @@ static void validate_watcher(struct index_state 
> *istate, const char *path)
>               return;
>       }
>  
> +     git_config(watcher_config, NULL);
>       istate->watcher = connect_watcher(path);
>       if (istate->watcher != -1) {
>               struct strbuf sb = STRBUF_INIT;
> @@ -1478,6 +1496,7 @@ static void validate_watcher(struct index_state 
> *istate, const char *path)
>                       istate->cache[i]->ce_flags &= ~CE_WATCHED;
>                       istate->cache_changed = 1;
>               }
> +     istate->update_watches = 1;
>  }
>  
>  static int sort_by_date(const void *a_, const void *b_)
> @@ -1524,7 +1543,7 @@ static int do_watch_entries(struct index_state *istate,
>       return 0;
>  }
>  
> -static inline int ce_watchable(struct cache_entry *ce)
> +static inline int ce_watchable(struct cache_entry *ce, time_t now)
>  {
>       return ce_uptodate(ce) && /* write_index will catch late ce_uptodate 
> bits */
>               !(ce->ce_flags & CE_WATCHED) &&
> @@ -1534,7 +1553,8 @@ static inline int ce_watchable(struct cache_entry *ce)
>                * obviously. S_IFLNK could be problematic because
>                * inotify may follow symlinks without IN_DONT_FOLLOW
>                */
> -             S_ISREG(ce->ce_mode);
> +             S_ISREG(ce->ce_mode) &&
> +             (ce->ce_stat_data.sd_mtime.sec + recent_limit < now);
>  }
>  
>  static void watch_entries(struct index_state *istate)
> @@ -1544,15 +1564,20 @@ static void watch_entries(struct index_state *istate)
>       struct strbuf sb = STRBUF_INIT;
>       int val;
>       socklen_t vallen = sizeof(val);
> +     time_t now = time(NULL);
>  
> -     if (istate->watcher <= 0)
> +     if (istate->watcher <= 0 || !istate->update_watches)
>               return;
> +     istate->update_watches = 0;
> +     istate->cache_changed = 1;
>       for (i = nr = 0; i < istate->cache_nr; i++)
> -             if (ce_watchable(istate->cache[i]))
> +             if (ce_watchable(istate->cache[i], now))
>                       nr++;
> +     if (nr < watch_lowerlimit)
> +             return;
>       sorted = xmalloc(sizeof(*sorted) * nr);
>       for (i = nr = 0; i < istate->cache_nr; i++)
> -             if (ce_watchable(istate->cache[i]))
> +             if (ce_watchable(istate->cache[i], now))
>                       sorted[nr++] = istate->cache[i];
>  
>       getsockopt(istate->watcher, SOL_SOCKET, SO_SNDBUF, &val, &vallen);
> @@ -1616,6 +1641,7 @@ int read_index_from(struct index_state *istate, const 
> char *path)
>       istate->cache_alloc = alloc_nr(istate->cache_nr);
>       istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
>       istate->initialized = 1;
> +     istate->update_watches = 1;
>  
>       if (istate->version == 4)
>               previous_name = &previous_name_buf;
> @@ -2024,8 +2050,9 @@ int write_index(struct index_state *istate, int newfd)
>               if (err)
>                       return -1;
>       }
> -     if (has_watches) {
> -             int id, sz = (entries - removed + 7) / 8;
> +     if (has_watches ||
> +         (istate->watcher != -1 && !istate->update_watches)) {
> +             int id, sz = (entries - removed + 7) / 8 + 1;
>               uint8_t *data = xmalloc(sz);
>               memset(data, 0, sz);
>               for (i = 0, id = 0; i < entries && has_watches; i++) {
> @@ -2038,6 +2065,7 @@ int write_index(struct index_state *istate, int newfd)
>                       }
>                       id++;
>               }
> +             data[sz - 1] = istate->update_watches;
>               err = write_index_ext_header(&c, newfd, CACHE_EXT_WATCH, sz) < 0
>                       || ce_write(&c, newfd, data, sz) < 0;
>               free(data);

-- 
Thomas Rast
t...@thomasrast.ch
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to