Re: [PATCH 2/2] commit-template: distinguish status information unconditionally

2017-06-30 Thread Kaartic Sivaraam
On Fri, 2017-06-30 at 07:52 -0700, Junio C Hamano wrote:
> Thanks, both looks good.  Will queue.
You're welcome :)


[PATCH 05/10] convert/sub-process: drop cast to hashmap_cmp_fn

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 convert.c | 3 +--
 sub-process.c | 7 +--
 sub-process.h | 4 ++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/convert.c b/convert.c
index deaf0ba7b3..04966c723c 100644
--- a/convert.c
+++ b/convert.c
@@ -583,8 +583,7 @@ static int apply_multi_file_filter(const char *path, const 
char *src, size_t len
 
if (!subprocess_map_initialized) {
subprocess_map_initialized = 1;
-   hashmap_init(_map, (hashmap_cmp_fn) cmd2process_cmp,
-NULL, 0);
+   hashmap_init(_map, cmd2process_cmp, NULL, 0);
entry = NULL;
} else {
entry = (struct cmd2process 
*)subprocess_find_entry(_map, cmd);
diff --git a/sub-process.c b/sub-process.c
index a3cfab1a9d..6cbffa4406 100644
--- a/sub-process.c
+++ b/sub-process.c
@@ -6,10 +6,13 @@
 #include "pkt-line.h"
 
 int cmd2process_cmp(const void *unused_cmp_data,
-   const struct subprocess_entry *e1,
-   const struct subprocess_entry *e2,
+   const void *entry,
+   const void *entry_or_key,
const void *unused_keydata)
 {
+   const struct subprocess_entry *e1 = entry;
+   const struct subprocess_entry *e2 = entry_or_key;
+
return strcmp(e1->cmd, e2->cmd);
 }
 
diff --git a/sub-process.h b/sub-process.h
index 96a2cca360..8cd07a59ab 100644
--- a/sub-process.h
+++ b/sub-process.h
@@ -21,8 +21,8 @@ struct subprocess_entry {
 /* subprocess functions */
 
 extern int cmd2process_cmp(const void *unused_cmp_data,
-  const struct subprocess_entry *e1,
-  const struct subprocess_entry *e2,
+  const void *e1,
+  const void *e2,
   const void *unused_keydata);
 
 typedef int(*subprocess_start_fn)(struct subprocess_entry *entry);
-- 
2.13.0.31.g9b732c453e



[PATCH 07/10] remote.c: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 remote.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/remote.c b/remote.c
index 60d0043921..3efa358558 100644
--- a/remote.c
+++ b/remote.c
@@ -134,10 +134,14 @@ struct remotes_hash_key {
 };
 
 static int remotes_hash_cmp(const void *unused_cmp_data,
-   const struct remote *a,
-   const struct remote *b,
-   const struct remotes_hash_key *key)
+   const void *entry,
+   const void *entry_or_key,
+   const void *keydata)
 {
+   const struct remote *a = entry;
+   const struct remote *b = entry_or_key;
+   const struct remotes_hash_key *key = keydata;
+
if (key)
return strncmp(a->name, key->str, key->len) || 
a->name[key->len];
else
@@ -147,7 +151,7 @@ static int remotes_hash_cmp(const void *unused_cmp_data,
 static inline void init_remotes_hash(void)
 {
if (!remotes_hash.cmpfn)
-   hashmap_init(_hash, (hashmap_cmp_fn)remotes_hash_cmp, 
NULL, 0);
+   hashmap_init(_hash, remotes_hash_cmp, NULL, 0);
 }
 
 static struct remote *make_remote(const char *name, int len)
-- 
2.13.0.31.g9b732c453e



[PATCH 06/10] patch-ids.c: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 patch-ids.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/patch-ids.c b/patch-ids.c
index b4166b0f38..9ea523984b 100644
--- a/patch-ids.c
+++ b/patch-ids.c
@@ -35,11 +35,16 @@ int commit_patch_id(struct commit *commit, struct 
diff_options *options,
  * the side of safety.  The actual value being negative does not have
  * any significance; only that it is non-zero matters.
  */
-static int patch_id_cmp(struct diff_options *opt,
-   struct patch_id *a,
-   struct patch_id *b,
+static int patch_id_cmp(const void *cmpfn_data,
+   const void *entry,
+   const void *entry_or_key,
const void *unused_keydata)
 {
+   /* NEEDSWORK: const correctness? */
+   struct diff_options *opt = (void*)cmpfn_data;
+   struct patch_id *a = (void*)entry;
+   struct patch_id *b = (void*)entry_or_key;
+
if (is_null_oid(>patch_id) &&
commit_patch_id(a->commit, opt, >patch_id, 0))
return error("Could not get patch ID for %s",
@@ -58,8 +63,7 @@ int init_patch_ids(struct patch_ids *ids)
ids->diffopts.detect_rename = 0;
DIFF_OPT_SET(>diffopts, RECURSIVE);
diff_setup_done(>diffopts);
-   hashmap_init(>patches, (hashmap_cmp_fn)patch_id_cmp,
->diffopts, 256);
+   hashmap_init(>patches, patch_id_cmp, >diffopts, 256);
return 0;
 }
 
-- 
2.13.0.31.g9b732c453e



[PATCH 10/10] t/helper/test-hashmap: use custom data instead of duplicate cmp functions

2017-06-30 Thread Stefan Beller
With the new field that is passed to the compare function, we can pass
through flags there instead of having multiple compare functions.
Also drop the cast to hashmap_cmp_fn.

Signed-off-by: Stefan Beller 
---
 t/helper/test-hashmap.c | 34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c
index 095d7395f3..93ccfbb75f 100644
--- a/t/helper/test-hashmap.c
+++ b/t/helper/test-hashmap.c
@@ -13,20 +13,20 @@ static const char *get_value(const struct test_entry *e)
return e->key + strlen(e->key) + 1;
 }
 
-static int test_entry_cmp(const void *unused_cmp_data,
- const struct test_entry *e1,
- const struct test_entry *e2,
- const char* key)
+static int test_entry_cmp(const void *cmp_data,
+ const void *entry,
+ const void *entry_or_key,
+ const void* keydata)
 {
-   return strcmp(e1->key, key ? key : e2->key);
-}
-
-static int test_entry_cmp_icase(const void *unused_cmp_data,
-   const struct test_entry *e1,
-   const struct test_entry *e2,
-   const char* key)
-{
-   return strcasecmp(e1->key, key ? key : e2->key);
+   const int ignore_case = cmp_data ? *((int*)cmp_data) : 0;
+   const struct test_entry *e1 = entry;
+   const struct test_entry *e2 = entry_or_key;
+   const char* key = keydata;
+
+   if (ignore_case)
+   return strcasecmp(e1->key, key ? key : e2->key);
+   else
+   return strcmp(e1->key, key ? key : e2->key);
 }
 
 static struct test_entry *alloc_test_entry(int hash, char *key, int klen,
@@ -96,8 +96,7 @@ static void perf_hashmap(unsigned int method, unsigned int 
rounds)
if (method & TEST_ADD) {
/* test adding to the map */
for (j = 0; j < rounds; j++) {
-   hashmap_init(, (hashmap_cmp_fn) test_entry_cmp,
-NULL, 0);
+   hashmap_init(, test_entry_cmp, NULL, 0);
 
/* add entries */
for (i = 0; i < TEST_SIZE; i++) {
@@ -109,7 +108,7 @@ static void perf_hashmap(unsigned int method, unsigned int 
rounds)
}
} else {
/* test map lookups */
-   hashmap_init(, (hashmap_cmp_fn) test_entry_cmp, NULL, 0);
+   hashmap_init(, test_entry_cmp, NULL, 0);
 
/* fill the map (sparsely if specified) */
j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE;
@@ -151,8 +150,7 @@ int cmd_main(int argc, const char **argv)
 
/* init hash map */
icase = argc > 1 && !strcmp("ignorecase", argv[1]);
-   hashmap_init(, (hashmap_cmp_fn) (icase ? test_entry_cmp_icase
-   : test_entry_cmp), NULL, 0);
+   hashmap_init(, test_entry_cmp, , 0);
 
/* process commands from stdin */
while (fgets(line, sizeof(line), stdin)) {
-- 
2.13.0.31.g9b732c453e



[PATCH 08/10] submodule-config.c: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 submodule-config.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index 0e1126183d..edc8dd04b6 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -35,27 +35,33 @@ static struct submodule_cache the_submodule_cache;
 static int is_cache_init;
 
 static int config_path_cmp(const void *unused_cmp_data,
-  const struct submodule_entry *a,
-  const struct submodule_entry *b,
+  const void *entry,
+  const void *entry_or_key,
   const void *unused_keydata)
 {
+   const struct submodule_entry *a = entry;
+   const struct submodule_entry *b = entry_or_key;
+
return strcmp(a->config->path, b->config->path) ||
   hashcmp(a->config->gitmodules_sha1, b->config->gitmodules_sha1);
 }
 
 static int config_name_cmp(const void *unused_cmp_data,
-  const struct submodule_entry *a,
-  const struct submodule_entry *b,
+  const void *entry,
+  const void *entry_or_key,
   const void *unused_keydata)
 {
+   const struct submodule_entry *a = entry;
+   const struct submodule_entry *b = entry_or_key;
+
return strcmp(a->config->name, b->config->name) ||
   hashcmp(a->config->gitmodules_sha1, b->config->gitmodules_sha1);
 }
 
 static void cache_init(struct submodule_cache *cache)
 {
-   hashmap_init(>for_path, (hashmap_cmp_fn) config_path_cmp, NULL, 
0);
-   hashmap_init(>for_name, (hashmap_cmp_fn) config_name_cmp, NULL, 
0);
+   hashmap_init(>for_path, config_path_cmp, NULL, 0);
+   hashmap_init(>for_name, config_name_cmp, NULL, 0);
 }
 
 static void free_one_config(struct submodule_entry *entry)
-- 
2.13.0.31.g9b732c453e



[PATCH 03/10] builtin/describe: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 builtin/describe.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 8868f00ed0..f2f2edf935 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -55,10 +55,13 @@ static const char *prio_names[] = {
 };
 
 static int commit_name_cmp(const void *unused_cmp_data,
-  const struct commit_name *cn1,
-  const struct commit_name *cn2,
+  const void *entry,
+  const void *entry_or_key,
   const void *peeled)
 {
+   const struct commit_name *cn1 = entry;
+   const struct commit_name *cn2 = entry_or_key;
+
return oidcmp(>peeled, peeled ? peeled : >peeled);
 }
 
@@ -503,7 +506,7 @@ int cmd_describe(int argc, const char **argv, const char 
*prefix)
return cmd_name_rev(args.argc, args.argv, prefix);
}
 
-   hashmap_init(, (hashmap_cmp_fn) commit_name_cmp, NULL, 0);
+   hashmap_init(, commit_name_cmp, NULL, 0);
for_each_rawref(get_name, NULL);
if (!names.size && !always)
die(_("No names found, cannot describe anything."));
-- 
2.13.0.31.g9b732c453e



[PATCH 09/10] name-hash.c: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 name-hash.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/name-hash.c b/name-hash.c
index 0e10f3eab8..bd8dc7a6a7 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -17,10 +17,14 @@ struct dir_entry {
 };
 
 static int dir_entry_cmp(const void *unused_cmp_data,
-const struct dir_entry *e1,
-const struct dir_entry *e2,
-const char *name)
+const void *entry,
+const void *entry_or_key,
+const void *keydata)
 {
+   const struct dir_entry *e1 = entry;
+   const struct dir_entry *e2 = entry_or_key;
+   const char *name = keydata;
+
return e1->namelen != e2->namelen || strncasecmp(e1->name,
name ? name : e2->name, e1->namelen);
 }
@@ -110,10 +114,12 @@ static void hash_index_entry(struct index_state *istate, 
struct cache_entry *ce)
 }
 
 static int cache_entry_cmp(const void *unused_cmp_data,
-  const struct cache_entry *ce1,
-  const struct cache_entry *ce2,
+  const void *entry,
+  const void *entry_or_key,
   const void *remove)
 {
+   const struct cache_entry *ce1 = entry;
+   const struct cache_entry *ce2 = entry_or_key;
/*
 * For remove_name_hash, find the exact entry (pointer equality); for
 * index_file_exists, find all entries with matching hash code and
@@ -574,10 +580,8 @@ static void lazy_init_name_hash(struct index_state *istate)
 {
if (istate->name_hash_initialized)
return;
-   hashmap_init(>name_hash, (hashmap_cmp_fn) cache_entry_cmp,
-   NULL, istate->cache_nr);
-   hashmap_init(>dir_hash, (hashmap_cmp_fn) dir_entry_cmp,
-   NULL, istate->cache_nr);
+   hashmap_init(>name_hash, cache_entry_cmp, NULL, 
istate->cache_nr);
+   hashmap_init(>dir_hash, dir_entry_cmp, NULL, istate->cache_nr);
 
if (lookup_lazy_params(istate)) {
hashmap_disallow_rehash(>dir_hash, 1);
-- 
2.13.0.31.g9b732c453e



[PATCH 00/10] Friday night special: hash map cleanup

2017-06-30 Thread Stefan Beller
This goes on top of sb/hashmap-customize-comparison.
No functional impact but a pure cleanup series:
No casts to hashmap_cmp_fn in the whole code base any more.

This revealed another interesting thing, which is not a bug per se:
The const correctness of hashmap_cmp_fn as it requires all its void
pointers to be const!

We violate this in patch-ids.c as we modify the `fndata` at some
uncritical part (a part that doesn't change the equal-functionality
AFAICT).

Thanks,
Stefan

Stefan Beller (10):
  attr.c: drop hashmap_cmp_fn cast
  builtin/difftool.c: drop hashmap_cmp_fn cast
  builtin/describe: drop hashmap_cmp_fn cast
  config.c: drop hashmap_cmp_fn cast
  convert/sub-process: drop cast to hashmap_cmp_fn
  patch-ids.c: drop hashmap_cmp_fn cast
  remote.c: drop hashmap_cmp_fn cast
  submodule-config.c: drop hashmap_cmp_fn cast
  name-hash.c: drop hashmap_cmp_fn cast
  t/helper/test-hashmap: use custom data instead of duplicate cmp
functions

 attr.c  | 12 +++-
 builtin/describe.c  |  9 ++---
 builtin/difftool.c  | 37 ++---
 config.c| 10 ++
 convert.c   |  3 +--
 name-hash.c | 22 +-
 patch-ids.c | 14 +-
 remote.c| 12 
 sub-process.c   |  7 +--
 sub-process.h   |  4 ++--
 submodule-config.c  | 18 --
 t/helper/test-hashmap.c | 34 --
 12 files changed, 107 insertions(+), 75 deletions(-)

-- 
2.13.0.31.g9b732c453e



[PATCH 01/10] attr.c: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
MAke the code more readable and less error prone by avoiding the cast
of the compare function pointer in hashmap_init, but instead have the
correctly named void pointers to casted to the specific data structure.

Signed-off-by: Stefan Beller 
---
 attr.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/attr.c b/attr.c
index 56961f0236..2f49151736 100644
--- a/attr.c
+++ b/attr.c
@@ -76,18 +76,20 @@ struct attr_hash_entry {
 };
 
 /* attr_hashmap comparison function */
-static int attr_hash_entry_cmp(void *unused_cmp_data,
-  const struct attr_hash_entry *a,
-  const struct attr_hash_entry *b,
-  void *unused_keydata)
+static int attr_hash_entry_cmp(const void *unused_cmp_data,
+  const void *entry,
+  const void *entry_or_key,
+  const void *unused_keydata)
 {
+   const struct attr_hash_entry *a = entry;
+   const struct attr_hash_entry *b = entry_or_key;
return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
 }
 
 /* Initialize an 'attr_hashmap' object */
 static void attr_hashmap_init(struct attr_hashmap *map)
 {
-   hashmap_init(>map, (hashmap_cmp_fn) attr_hash_entry_cmp, NULL, 0);
+   hashmap_init(>map, attr_hash_entry_cmp, NULL, 0);
 }
 
 /*
-- 
2.13.0.31.g9b732c453e



[PATCH 04/10] config.c: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 config.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/config.c b/config.c
index 4a31e31ac3..30ff700629 100644
--- a/config.c
+++ b/config.c
@@ -1754,17 +1754,19 @@ static int configset_add_value(struct config_set *cs, 
const char *key, const cha
 }
 
 static int config_set_element_cmp(const void *unused_cmp_data,
- const struct config_set_element *e1,
- const struct config_set_element *e2,
+ const void *entry,
+ const void *entry_or_key,
  const void *unused_keydata)
 {
+   const struct config_set_element *e1 = entry;
+   const struct config_set_element *e2 = entry_or_key;
+
return strcmp(e1->key, e2->key);
 }
 
 void git_configset_init(struct config_set *cs)
 {
-   hashmap_init(>config_hash, (hashmap_cmp_fn)config_set_element_cmp,
-NULL, 0);
+   hashmap_init(>config_hash, config_set_element_cmp, NULL, 0);
cs->hash_initialized = 1;
cs->list.nr = 0;
cs->list.alloc = 0;
-- 
2.13.0.31.g9b732c453e



[PATCH 02/10] builtin/difftool.c: drop hashmap_cmp_fn cast

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 builtin/difftool.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/builtin/difftool.c b/builtin/difftool.c
index a1a26ba891..8864d846f8 100644
--- a/builtin/difftool.c
+++ b/builtin/difftool.c
@@ -131,10 +131,12 @@ struct working_tree_entry {
 };
 
 static int working_tree_entry_cmp(const void *unused_cmp_data,
- struct working_tree_entry *a,
- struct working_tree_entry *b,
- void *unused_keydata)
+ const void *entry,
+ const void *entry_or_key,
+ const void *unused_keydata)
 {
+   const struct working_tree_entry *a = entry;
+   const struct working_tree_entry *b = entry_or_key;
return strcmp(a->path, b->path);
 }
 
@@ -149,9 +151,13 @@ struct pair_entry {
 };
 
 static int pair_cmp(const void *unused_cmp_data,
-   struct pair_entry *a, struct pair_entry *b,
-   void *unused_keydata)
+   const void *entry,
+   const void *entry_or_key,
+   const void *unused_keydata)
 {
+   const struct pair_entry *a = entry;
+   const struct pair_entry *b = entry_or_key;
+
return strcmp(a->path, b->path);
 }
 
@@ -179,9 +185,13 @@ struct path_entry {
 };
 
 static int path_entry_cmp(const void *unused_cmp_data,
- struct path_entry *a, struct path_entry *b,
- void *key)
+ const void *entry,
+ const void *entry_or_key,
+ const void *key)
 {
+   const struct path_entry *a = entry;
+   const struct path_entry *b = entry_or_key;
+
return strcmp(a->path, key ? key : b->path);
 }
 
@@ -372,10 +382,9 @@ static int run_dir_diff(const char *extcmd, int symlinks, 
const char *prefix,
rdir_len = rdir.len;
wtdir_len = wtdir.len;
 
-   hashmap_init(_tree_dups,
-(hashmap_cmp_fn)working_tree_entry_cmp, NULL, 0);
-   hashmap_init(, (hashmap_cmp_fn)pair_cmp, NULL, 0);
-   hashmap_init(, (hashmap_cmp_fn)pair_cmp, NULL, 0);
+   hashmap_init(_tree_dups, working_tree_entry_cmp, NULL, 0);
+   hashmap_init(, pair_cmp, NULL, 0);
+   hashmap_init(, pair_cmp, NULL, 0);
 
child.no_stdin = 1;
child.git_cmd = 1;
@@ -585,10 +594,8 @@ static int run_dir_diff(const char *extcmd, int symlinks, 
const char *prefix,
 * in the common case of --symlinks and the difftool updating
 * files through the symlink.
 */
-   hashmap_init(_modified, (hashmap_cmp_fn)path_entry_cmp,
-NULL, wtindex.cache_nr);
-   hashmap_init(_modified, (hashmap_cmp_fn)path_entry_cmp,
-NULL, wtindex.cache_nr);
+   hashmap_init(_modified, path_entry_cmp, NULL, wtindex.cache_nr);
+   hashmap_init(_modified, path_entry_cmp, NULL, wtindex.cache_nr);
 
for (i = 0; i < wtindex.cache_nr; i++) {
struct hashmap_entry dummy;
-- 
2.13.0.31.g9b732c453e



Re: [GSoC][PATCH 5/5 v3] submodule: port submodule subcommand 'sync' from shell to C

2017-06-30 Thread Junio C Hamano
Prathamesh Chavan  writes:

> +static void sync_submodule(const struct cache_entry *list_item, void 
> *cb_data)
> +{
> + struct sync_cb *info = cb_data;
> + const struct submodule *sub;
> + char *sub_key, *remote_key;
> + char *sub_origin_url, *super_config_url, *displaypath;
> + struct strbuf sb = STRBUF_INIT;
> + struct child_process cp = CHILD_PROCESS_INIT;
> +
> + if (!is_submodule_initialized(list_item->name))
> + return;
> +
> + sub = submodule_from_path(null_sha1, list_item->name);
> +
> + if (!sub || !sub->url)
> + die(_("no url found for submodule path '%s' in .gitmodules"),
> +   list_item->name);
> +
> + if (starts_with_dot_dot_slash(sub->url) || 
> starts_with_dot_slash(sub->url)) {
> + char *remote_url, *up_path;
> + char *remote = get_default_remote();
> + char *remote_key = xstrfmt("remote.%s.url", remote);
> + free(remote);
> +

Have a blank line between the decls and the first statement
(i.e. before "free(remote)").  The blank line after "free(remote)"
may or may not be there.

> + if (git_config_get_string(remote_key, _url))
> + remote_url = xgetcwd();
> +...
> +
> + prepare_submodule_repo_env(_array);
> + cp.git_cmd = 1;
> + cp.dir = list_item->name;
> + argv_array_pushl(, "submodule--helper",
> +  "print-default-remote", NULL);
> + if (capture_command(, , 0))
> + die(_("failed to get the default remote for submodule '%s'"),
> +   list_item->name);

OK.  Perhaps in a distant future when the repository object
abstraction is ready, we can do this within the main process, but
for now, due to the presence of cp.dir, this is the best we can do.

> + strbuf_strip_suffix(, "\n");
> + remote_key = xstrfmt("remote.%s.url", sb.buf);
> + strbuf_release();
> +
> + child_process_init();
> + prepare_submodule_repo_env(_array);
> + cp.git_cmd = 1;
> + cp.dir = list_item->name;
> + argv_array_pushl(, "config", remote_key, sub_origin_url, NULL);
> + if (run_command())
> + die(_("failed to update remote for submodule '%s'"),
> +   list_item->name);

Likewise.

Looks like a fairly faithful conversion to me.

Thanks.


Re: [GSoC][PATCH 4/5 v3] submodule: port submodule subcommand 'status' from shell to C

2017-06-30 Thread Junio C Hamano
Prathamesh Chavan  writes:

> + argv_array_pushl(_files_args, "diff-files",
> +  "--ignore-submodules=dirty", "--quiet", "--",
> +  list_item->name, NULL);
> +
> + if (!cmd_diff_files(diff_files_args.argc, diff_files_args.argv,
> + info->prefix)) {

Essentially we'd only want to run ce_match_stat() on list_item and
the current on-filesystem submodule.  We not just know which path we
are interested in, but we already have the cache entry for it, so it
feels quite wasteful to go over the_index.cache[] once again to find
the cache entry that matches list_item->name.  Yes, that is how the
scripted Porcelain did it, but it feels way overkill now we have
ready access to the lower level machinery.

Having said all that, I think it is a good idea to stick to a
faithful conversion in this series.  I just think a NEEDSWORK
comment may be a good idea to indicate future optimization
opportunities.

A similar optimization is already happening in this patch, actually.
Instead of doing a stat of "$sm_path/.git" and checking test -d/-f,
the code just calls is_submodule_initialized().  Which is good ;-)

> + } else {
> + if (!info->cached) {
> + struct child_process cp = CHILD_PROCESS_INIT;
> + struct strbuf sb = STRBUF_INIT;
> +
> + prepare_submodule_repo_env(_array);
> + cp.git_cmd = 1;
> + cp.dir = list_item->name;
> +
> + argv_array_pushl(, "rev-parse",
> +  "--verify", "HEAD", NULL);
> +
> + if (capture_command(, , 0))
> + die(_("could not run 'git rev-parse --verify"
> +   "HEAD' in submodule %s"),
> +   list_item->name);
> +
> + strbuf_strip_suffix(, "\n");

Likewise.  This is merely resolving a ref inside a submodule;
calling something like head_ref_submodule() may be sufficient
as an optimization and at least deserves to be mentioned in a
NEEDSWORK comment.

> + print_status(info, '+', list_item->name, sb.buf,
> +  displaypath);
> + strbuf_release();
> + } else {
> + print_status(info, '+', list_item->name, sub_sha1,
> +  displaypath);
> + }
> + }


Re: [GSoC][PATCH 3/5 v3] submodule: port set_name_rev() from shell to C

2017-06-30 Thread Junio C Hamano
Prathamesh Chavan  writes:

> Function set_name_rev() is ported from git-submodule to the
> submodule--helper builtin. The function get_name_rev() generates the
> value of the revision name as required, and the function
> print_name_rev() handles the formating and printing of the obtained
> revision name.
>
> Mentored-by: Christian Couder 
> Mentored-by: Stefan Beller 
> Signed-off-by: Prathamesh Chavan 
> ---
>  builtin/submodule--helper.c | 69 
> +
>  git-submodule.sh| 16 ++-
>  2 files changed, 71 insertions(+), 14 deletions(-)
>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index c4286aac5..4103e40e4 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -244,6 +244,74 @@ static char *get_submodule_displaypath(const char *path, 
> const char *prefix)
>   }
>  }
>  
> +enum describe_step {
> + step_bare,
> + step_tags,
> + step_contains,
> + step_all_always,
> + step_end
> +};

Hmph.

The only difference between the steps being what subcommand is run,
a better implementation might be to do

static const char *describe_bare[] = {
NULL
};

...

static const char **describe_argv[] = {
describe_bare, describe_tags, describe_contains, 
describe_all_always, NULL
};

const char ***d;

for (d = describe_argv; *d; d++) {
argv_array_pushl(, "describe");
argv_array_pushv(, *d);
... do the thing ...
}

but unfortunately C is a bit klunky to do so; we cannot easily make
the contents of describe_argv[] as anonymous arrays.  An otherwise
useless enum stil bothers me, but I do not think of anything better
offhand.

> +
> +static char *get_name_rev(const char *sub_path, const char* object_id)
> +{
> + struct strbuf sb = STRBUF_INIT;
> + enum describe_step cur_step;
> +
> + for (cur_step = step_bare; cur_step < step_end; cur_step++) {
> + struct child_process cp = CHILD_PROCESS_INIT;
> + prepare_submodule_repo_env(_array);
> + cp.dir = sub_path;
> + cp.git_cmd = 1;
> + cp.no_stderr = 1;
> +
> + switch (cur_step) {
> + case step_bare:
> + argv_array_pushl(, "describe",
> +  object_id, NULL);
> + break;
> + case step_tags: 
> + argv_array_pushl(, "describe",
> +  "--tags", object_id, NULL);
> + break;
> + case step_contains:
> + argv_array_pushl(, "describe",
> +  "--contains", object_id,
> +  NULL);
> + break;
> + case step_all_always:
> + argv_array_pushl(, "describe",
> +  "--all", "--always",
> +  object_id, NULL);
> + break;
> + default:
> + BUG("unknown describe step '%d'", cur_step);
> + }

Dedent the body of switch() by one level, i.e.

switch (var) {
case val1:
do_this();
break;
case val2:
do_that();
...
}

Otherwise looking good.

> @@ -1042,14 +1030,14 @@ cmd_status()
>   fi
>   if git diff-files --ignore-submodules=dirty --quiet -- 
> "$sm_path"
>   then
> - set_name_rev "$sm_path" "$sha1"
> + revname=$(git submodule--helper print-name-rev 
> "$sm_path" "$sha1")
>   say " $sha1 $displaypath$revname"
>   else
>   if test -z "$cached"
>   then
>   sha1=$(sanitize_submodule_env; cd "$sm_path" && 
> git rev-parse --verify HEAD)
>   fi
> - set_name_rev "$sm_path" "$sha1"
> + revname=$(git submodule--helper print-name-rev 
> "$sm_path" "$sha1")
>   say "+$sha1 $displaypath$revname"
>   fi


Re: [RFC 0/4] Implementation of fetch-blobs and fetch-refs

2017-06-30 Thread Stefan Beller
Sorry for not paying attention to this patch set earlier. I missed
(and might still
miss) the big picture(tm) here. This is because
http://public-inbox.org/git/ffd92ad9-39fe-c76b-178d-6e3d6a425...@google.com/
seemed to be specific for big binary files, but ...

On Mon, Apr 10, 2017 at 1:46 PM, Jonathan Tan  wrote:

> In particular, patch 1 demonstrates that a new server endpoint that
> serves refs without the initial ref advertisement can be done in 228
> lines of code (according to "git diff --stat") while accounting for the
> various special cases that upload-pack imposes (stateless RPC, inclusion
> of an additional response when depth is requested, handling of "done" in
> request, sending a packfile directly after a response containing "ACK
> ready" without waiting for "done", and so on).

... from looking at the patches, this actually implements the idea
that was thrown around on the mailing list as "client speaks first",
as this essentially omits the refs advertisement, and then continues the
conversation by running upload-pack as normal.

That seems very exciting to me!

> To that end, I'm sending these patches in the hope of showing that these
> features are useful (omitting ref advertisements help greatly when
> serving large repos, as described in the commit message of patch 1, and
> serving blobs is useful for any fetch-blob-on-demand repo scheme) and
> that my proposed way of implementing them can be done in a relatively
> uncomplicated manner (as seen in these patches).

Yes, if we encapsulate v1 protocol on (both?) sides, we seem to need
very little additional code, but get the benefit of using lots of well
tested code.

Thanks,
Stefan


Re: [GSoC][PATCH 1/5 v3] submodule--helper: introduce get_submodule_displaypath()

2017-06-30 Thread Junio C Hamano
Prathamesh Chavan  writes:

> Introduce function get_submodule_displaypath() to replace the code
> occurring in submodule_init() for generating displaypath of the
> submodule with a call to it.
>
> This new function will also be used in other parts of the system
> in later patches.
>
> Mentored-by: Christian Couder 
> Mentored-by: Stefan Beller 
> Signed-off-by: Prathamesh Chavan 
> ---
> The patch series is updated, and is based on 'master' branch.
>
> This patch series contains updates patches about
> Introduction of the function: get_submodule_displaypath()
> (This patch wasn't posted in the last update by mistake)

I was wondering what that thing was while reading the previous
round.  Good that it now appears in this round of the series ;-)

Thanks.




Re: speeding up git pull from a busy gerrit instance over a slow link?

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 5:28 AM, Noel Grandin  wrote:
> Hi
>
> I'm running git version 2.13.1 on Ubuntu 16.04 (x64)
>
> I'm connecting over a very slow (international link) to a very busy gerrit
> server (gerrit.libreoffice.org) using ssh.
> Ping types are on the order of 200ms.
>
> Using GIT_TRACE_PACKET=true, what I am seeing is that the bulk of the time
> is spent retrieving packets having to do with things which I have no
> interest in, i.e. the refs/changes/* stuff (see below).
>
> Is there any way to deliberately exclude these from the pull process?
>
> My git config looks like:
>remote.origin.url=ssh://logerrit/core
>remote.origin.fetch=+refs/heads/*:refs/remotes/origin/*
>branch.master.remote=origin
>branch.master.merge=refs/heads/master
>branch.sdrshapeid.remote=origin
>branch.sdrshapeid.merge=refs/heads/master
>
> Thanks, Noel Grandin

>From here I figured, you're talking about gerrit.libreoffice.org/core
My initial clone is taking rather long, it was stuck in the
"Counting objects" phase for quite some time. Maybe ask the
Gerrit administrator if they have pack.useBitmaps enabled?

(This is a tangent, and most likely related to the initial clone only,
you are asking for everyday fetches.)


Re: speeding up git pull from a busy gerrit instance over a slow link?

2017-06-30 Thread Jonathan Tan
On Fri, 30 Jun 2017 14:28:15 +0200
Noel Grandin  wrote:

> -
> snippet of packet trace
> ---
> 
> 14:20:45.705091 pkt-line.c:80   packet:fetch< 
> c5b026801c729ab37e2af6a610f31ca2e28b51fe 
> refs/changes/99/29099/2
> 14:20:45.705093 pkt-line.c:80   packet:fetch< 
> 931e2c40aeb4cf4591ae9fcfea1b352b966f0a32 
> refs/changes/99/29199/1

Out of curiosity, what is the timestamp difference between the first and
last GIT_TRACE_PACKET log message containing "refs/changes"?

This is an issue for the Android repository too (which also uses
Gerrit). I have some work in progress to avoid the extra refs from being
sent [1], but haven't been working on it recently.

[1] https://public-inbox.org/git/cover.1491851452.git.jonathanta...@google.com/


What's cooking in git.git (Jun 2017, #09; Fri, 30)

2017-06-30 Thread Junio C Hamano
Here are the topics that have been cooking.  Commits prefixed with
'-' are only in 'pu' (proposed updates) while commits prefixed with
'+' are in 'next'.  The ones marked with '.' do not appear in any of
the integration branches, but I am still holding onto them.

You can find the changes described here in the integration branches
of the repositories listed at

http://git-blame.blogspot.com/p/git-public-repositories.html

--
[Graduated to "master"]

* ab/die-errors-in-threaded (2017-06-21) 1 commit
  (merged to 'next' on 2017-06-24 at 135fc4b963)
 + die(): stop hiding errors due to overzealous recursion guard

 Traditionally, the default die() routine had a code to prevent it
 from getting called multiple times, which interacted badly when a
 threaded program used it (one downside is that the real error may
 be hidden and instead the only error message given to the user may
 end up being "die recursion detected", which is not very useful).


* ah/doc-pretty-color-auto-prefix (2017-06-24) 1 commit
  (merged to 'next' on 2017-06-26 at d7489fc831)
 + doc: clarify syntax for %C(auto,...) in pretty formats

 Doc update.


* jc/pack-bitmap-unaligned (2017-06-26) 1 commit
  (merged to 'next' on 2017-06-28 at ad026b12a3)
 + pack-bitmap: don't perform unaligned memory access

 An unaligned 32-bit access in pack-bitmap code ahs been corrected.


* ks/status-initial-commit (2017-06-21) 1 commit
  (merged to 'next' on 2017-06-24 at 940ffd5816)
 + status: contextually notify user about an initial commit

 "git status" has long shown essentially the same message as "git
 commit"; the message it gives while preparing for the root commit,
 i.e. "Initial commit", was hard to understand for some new users.
 Now it says "No commits yet" to stress more on the current status
 (rather than the commit the user is preparing for, which is more in
 line with the focus of "git commit").


* ks/submodule-add-doc (2017-06-22) 1 commit
  (merged to 'next' on 2017-06-24 at 26309b38f2)
 + Documentation/git-submodule: cleanup "add" section

 Doc update.


* pw/rebase-i-regression-fix-tests (2017-06-23) 5 commits
  (merged to 'next' on 2017-06-23 at 835ae762f5)
 + t3420: fix under GETTEXT_POISON build
  (merged to 'next' on 2017-06-22 at d1dde1672a)
 + rebase: add more regression tests for console output
 + rebase: add regression tests for console output
 + rebase -i: add test for reflog message
 + sequencer: print autostash messages to stderr

 Fix a recent regression to "git rebase -i" and add tests that would
 have caught it and others.


* rs/apply-validate-input (2017-06-27) 3 commits
  (merged to 'next' on 2017-06-28 at 81e006b50e)
 + apply: check git diffs for mutually exclusive header lines
 + apply: check git diffs for invalid file modes
 + apply: check git diffs for missing old filenames

 Tighten error checks for invalid "git apply" input.


* vs/typofixes (2017-06-27) 1 commit
  (merged to 'next' on 2017-06-28 at 3d11e0b3fa)
 + Spelling fixes

 Many typofixes.

--
[New Topics]

* ab/grep-lose-opt-regflags (2017-06-30) 6 commits
 - grep: remove redundant REG_NEWLINE when compiling fixed regex
 - grep: remove regflags from the public grep_opt API
 - grep: remove redundant and verbose re-assignments to 0
 - grep: remove redundant "fixed" field re-assignment to 0
 - grep: adjust a redundant grep pattern type assignment
 - grep: remove redundant double assignment to 0

 Code cleanup.

 Will merge to 'next'.


* ks/commit-assuming-only-warning-removal (2017-06-30) 2 commits
 - commit-template: distinguish status information unconditionally
 - commit-template: remove outdated notice about explicit paths

 An old message shown in the commit log template was removed, as it
 has outlived its usefulness.

 Will merge to 'next'.


* sb/hashmap-customize-comparison (2017-06-30) 3 commits
 - hashmap: migrate documentation from Documentation/technical into header
 - patch-ids.c: use hashmap correctly
 - hashmap.h: compare function has access to a data field
 (this branch is used by sb/diff-color-move.)

 Update the hashmap API so that data to customize the behaviour of
 the comparison function can be specified at the time a hashmap is
 initialized.  This fixes a bug in patch-ids that may have caused
 segfault.


* sb/merge-recursive-code-cleanup (2017-06-30) 1 commit
 - merge-recursive: use DIFF_XDL_SET macro

 Code clean-up.

 Will merge to 'next'.

--
[Stalled]

* mg/status-in-progress-info (2017-05-10) 2 commits
 - status --short --inprogress: spell it as --in-progress
 - status: show in-progress info for short status

 "git status" learns an option to report various operations
 (e.g. "merging") that the user is in the middle of.

 cf. 


* nd/worktree-move (2017-04-20) 6 commits
 - worktree remove: new command
 - worktree move: refuse to move 

Re: [PATCHv2 00/25] Reroll of sb/diff-color-moved.

2017-06-30 Thread Junio C Hamano
This (obviously) passes the failing tests.

I should have spotted why the rebase of the previous one did not
work myself before bugging you X-<.

Thanks.  Queued.


Re: [PATCHv2 22/25] diff.c: color moved lines differently

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> On Fri, Jun 30, 2017 at 2:11 PM, Junio C Hamano  wrote:
>>> + return (int)' ';
>>
>> Do we need a cast here?
>
> No, I figured it is good to have it here explicitly, though.
> We can drop that if you have strong preferences one way or another.

I was merely wondering if you were protecting against a funny
platform where ' ' is a negative number ;-)

>>> +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct 
>>> diff_options *o)
>>> +{
>>> + if (o->xdl_opts & XDF_WHITESPACE_FLAGS) {
>>> + static struct strbuf sb = STRBUF_INIT;
>>> + const char *ap = es->line, *ae = es->line + es->len;
>>> + int c;
>>> +
>>> + strbuf_reset();
>>> + while (ae > ap && isspace(*ae))
>>> + ae--;
>>
>> Not testing for the AT_EOL option here?
>
> No, because the other options are stronger than the AT_EOL,
> such that as you note it is still correct.
>
> If in the future, we'd have another new option e.g.
> IGNORE_TAB_BLANK_CONVERSION_BUT_WARN_ON_LENGTH_DIFF
> (useful for python programmers ;)
> this would break.

That remark makes it sound as if this is a timebomb ticking, but I
do not think that is the case.  This is merely stripping whitespaces
at the end; mixing tabs and spaces without changing the indentation
level matters only when you have something that is !isspace() to
indent to begin with ;-)

So, the effect of not checking is only a hashmap that is bit less
efficient than necessary, but is there an undue cost of actually
checking and doing this skipping only when we are ignoring the
whitespaces at the end of lines?

>> By the way, this is an unrelated tangent because I think you
> ...
> I agree.  I can make a cleanup throughout the whole code base,
> but I would prefer if that is done in a separate series, as this
> is already slightly lengthy.

Oh, I 100% agree that it is an unrelated tangent.  


Re: [PATCH v9 0/7] convert: add "status=delayed" to filter process protocol

2017-06-30 Thread Junio C Hamano
Will queue.  I personally feel that this is polished enough for 'next'.

Thanks.


Re: [PATCHv2 22/25] diff.c: color moved lines differently

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 2:11 PM, Junio C Hamano  wrote:
>> + return (int)' ';
>
> Do we need a cast here?

No, I figured it is good to have it here explicitly, though.
We can drop that if you have strong preferences one way or another.

>
>> +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct 
>> diff_options *o)
>> +{
>> + if (o->xdl_opts & XDF_WHITESPACE_FLAGS) {
>> + static struct strbuf sb = STRBUF_INIT;
>> + const char *ap = es->line, *ae = es->line + es->len;
>> + int c;
>> +
>> + strbuf_reset();
>> + while (ae > ap && isspace(*ae))
>> + ae--;
>
> Not testing for the AT_EOL option here?

No, because the other options are stronger than the AT_EOL,
such that as you note it is still correct.

If in the future, we'd have another new option e.g.
IGNORE_TAB_BLANK_CONVERSION_BUT_WARN_ON_LENGTH_DIFF
(useful for python programmers ;)
this would break.

>
> By the way, this is an unrelated tangent because I think you
> inherited this pattern by copying and pasting from elsewhere, but I
> think it would be better if we avoid casting the function pointer
> type like this:
>
>> + if (o->color_moved) {
>> + struct hashmap add_lines, del_lines;
>> +
>> + hashmap_init(_lines,
>> +  (hashmap_cmp_fn)moved_entry_cmp, o, 0);
>> + hashmap_init(_lines,
>> +  (hashmap_cmp_fn)moved_entry_cmp, o, 0);
>
> When hashmap_cmp_fn's definition changes, these two calling sites
> won't be caught as passing a incorrectly typed callback function by
> the compiler.
>
> Instead, we can match the actual implementation of the callback
> function, e.g.
>
>> +static int moved_entry_cmp(const struct diff_options *diffopt,
>> +const struct moved_entry *a,
>> +const struct moved_entry *b,
>> +const void *keydata)
>> +{
>
> to the expected function type, i.e.
>
> static int moved_entry_cmp(const void *fndata,
>const void *entry, const void *entry_or_key,
>const void *keydata)
> {
> const struct diff_options *diffopt = fndata;
> const struct moved_entry *a = entry;
> const struct moved_entry *b = entry_or_key;
> ...
>
> by casting the parameters.

I agree.  I can make a cleanup throughout the whole code base,
but I would prefer if that is done in a separate series, as this
is already slightly lengthy.

Thanks,
Stefan


Re: [PATCHv2 22/25] diff.c: color moved lines differently

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> +static int next_byte(const char **cp, const char **endp,
> +  const struct diff_options *diffopt)
> +{
> + int retval;
> +
> + if (*cp > *endp)
> + return -1;
> +
> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) {
> + while (*cp < *endp && isspace(**cp))
> + (*cp)++;
> + /*
> +  * After skipping a couple of whitespaces, we still have to
> +  * account for one space.
> +  */
> + return (int)' ';

Do we need a cast here?

> +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct 
> diff_options *o)
> +{
> + if (o->xdl_opts & XDF_WHITESPACE_FLAGS) {
> + static struct strbuf sb = STRBUF_INIT;
> + const char *ap = es->line, *ae = es->line + es->len;
> + int c;
> +
> + strbuf_reset();
> + while (ae > ap && isspace(*ae))
> + ae--;

Not testing for the AT_EOL option here?  

It does not make a difference in correctness; two lines that differ
only by their trailing whitespaces will be hashed into the same bin
even when we are not using ignore-whitespace-at-eol, making the 
hashmap a bit less efficient than necessary.

By the way, this is an unrelated tangent because I think you
inherited this pattern by copying and pasting from elsewhere, but I
think it would be better if we avoid casting the function pointer
type like this:

> + if (o->color_moved) {
> + struct hashmap add_lines, del_lines;
> +
> + hashmap_init(_lines,
> +  (hashmap_cmp_fn)moved_entry_cmp, o, 0);
> + hashmap_init(_lines,
> +  (hashmap_cmp_fn)moved_entry_cmp, o, 0);

When hashmap_cmp_fn's definition changes, these two calling sites
won't be caught as passing a incorrectly typed callback function by
the compiler.

Instead, we can match the actual implementation of the callback
function, e.g.

> +static int moved_entry_cmp(const struct diff_options *diffopt,
> +const struct moved_entry *a,
> +const struct moved_entry *b,
> +const void *keydata)
> +{

to the expected function type, i.e.

static int moved_entry_cmp(const void *fndata,
   const void *entry, const void *entry_or_key,
   const void *keydata)
{
const struct diff_options *diffopt = fndata;
const struct moved_entry *a = entry;
const struct moved_entry *b = entry_or_key;
...

by casting the parameters.


[PATCHv2 04/25] diff.c: introduce emit_diff_symbol

2017-06-30 Thread Stefan Beller
In a later patch we want to buffer all output before emitting it as a
new feature ("markup moved lines") conceptually cannot be implemented
in a single pass over the output.

There are different approaches to buffer all output such as:
* Buffering on the char level, i.e. we'd have a char[] which would
  grow at approximately 80 characters a line. This would keep the
  output completely unstructured, but might be very easy to implement,
  such as redirecting all output to a temporary file and working off
  that. The later passes over the buffer are quite complicated though,
  because we have to parse back any output and then decide if it should
  be modified.

* Buffer on a line level. As the output is mostly line oriented already,
  this would make sense, but it still is a bit awkward as we'd have to
  make sense of it again by looking at the first characters of a line
  to decide what part of a diff a line is.

* Buffer semantically. Imagine there is a formal grammar for the diff
  output and we'd keep the symbols of this grammar around. This keeps
  the highest level of structure in the buffered data, such that the
  actual memory requirements are less than say the first option. Instead
  of buffering the characters of the line, we'll buffer what we intend
  to do plus additional information for the specifics. An output of

diff --git a/new.txt b/new.txt
index fa69b07..412428c 100644
Binary files a/new.txt and b/new.txt differ

  could be buffered as
 DIFF_SYMBOL_DIFF_START + new.txt
 DIFF_SYMBOL_INDEX_MODE + fa69b07 412428c "non-executable" flag
 DIFF_SYMBOL_BINARY_FILES + new.txt

This and the following patches introduce the third option of buffering
by first moving any output to emit_diff_symbol, and then introducing the
buffering in this function.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/diff.c b/diff.c
index 77ef56a6e4..4637368d59 100644
--- a/diff.c
+++ b/diff.c
@@ -560,6 +560,24 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
emit_line_0(o, set, reset, line[0], line+1, len-1);
 }
 
+enum diff_symbol {
+   DIFF_SYMBOL_SEPARATOR
+};
+
+static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
+const char *line, int len)
+{
+   switch (s) {
+   case DIFF_SYMBOL_SEPARATOR:
+   fprintf(o->file, "%s%c",
+   diff_line_prefix(o),
+   o->line_termination);
+   break;
+   default:
+   die("BUG: unknown diff symbol");
+   }
+}
+
 static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char 
*line, int len)
 {
if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) &&
@@ -4828,9 +4846,7 @@ void diff_flush(struct diff_options *options)
 
if (output_format & DIFF_FORMAT_PATCH) {
if (separator) {
-   fprintf(options->file, "%s%c",
-   diff_line_prefix(options),
-   options->line_termination);
+   emit_diff_symbol(options, DIFF_SYMBOL_SEPARATOR, NULL, 
0);
if (options->stat_sep) {
/* attach patch instead of inline */
fputs(options->stat_sep, options->file);
-- 
2.13.0.31.g9b732c453e



[PATCHv2 08/25] diff.c: migrate emit_line_checked to use emit_diff_symbol

2017-06-30 Thread Stefan Beller
Add a new flags field to emit_diff_symbol, that will be used by
context lines for:
* white space rules that are applicable (The first 12 bits)
  Take a note in cahe.c as well, when this ws rules are extended we have
  to fix the bits in the flags field.
* how the rules are evaluated (actually this double encodes the sign
  of the line, but the code is easier to keep this way, bits 13,14,15)
* if the line a blank line at EOF (bit 16)

The check if new lines need to be marked up as extra lines at the end of
file, is now done unconditionally. That should be ok, as
'new_blank_line_at_eof' has a quick early return.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 cache.h |   2 ++
 diff.c  | 116 +---
 diff.h  |   6 ++--
 3 files changed, 80 insertions(+), 44 deletions(-)

diff --git a/cache.h b/cache.h
index 96055c2229..18aabacba5 100644
--- a/cache.h
+++ b/cache.h
@@ -1996,6 +1996,8 @@ void shift_tree_by(const struct object_id *, const struct 
object_id *, struct ob
 #define WS_TRAILING_SPACE  (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF)
 #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8)
 #define WS_TAB_WIDTH_MASK077
+/* All WS_* -- when extended, adapt diff.c emit_symbol */
+#define WS_RULE_MASK   0
 extern unsigned whitespace_rule_cfg;
 extern unsigned whitespace_rule(const char *);
 extern unsigned parse_whitespace_rule(const char *);
diff --git a/diff.c b/diff.c
index 488096b757..e5430d56da 100644
--- a/diff.c
+++ b/diff.c
@@ -561,17 +561,54 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_CONTEXT,
+   DIFF_SYMBOL_PLUS,
+   DIFF_SYMBOL_MINUS,
DIFF_SYMBOL_NO_LF_EOF,
DIFF_SYMBOL_CONTEXT_FRAGINFO,
DIFF_SYMBOL_CONTEXT_MARKER,
DIFF_SYMBOL_SEPARATOR
 };
+/*
+ * Flags for content lines:
+ * 0..12 are whitespace rules
+ * 13-15 are WSEH_NEW | WSEH_OLD | WSEH_CONTEXT
+ * 16 is marking if the line is blank at EOF
+ */
+#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16)
+#define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | 
WS_RULE_MASK)
+
+static void emit_line_ws_markup(struct diff_options *o,
+   const char *set, const char *reset,
+   const char *line, int len, char sign,
+   unsigned ws_rule, int blank_at_eof)
+{
+   const char *ws = NULL;
+
+   if (o->ws_error_highlight & ws_rule) {
+   ws = diff_get_color_opt(o, DIFF_WHITESPACE);
+   if (!*ws)
+   ws = NULL;
+   }
+
+   if (!ws)
+   emit_line_0(o, set, reset, sign, line, len);
+   else if (blank_at_eof)
+   /* Blank line at EOF - paint '+' as well */
+   emit_line_0(o, ws, reset, sign, line, len);
+   else {
+   /* Emit just the prefix, then the rest. */
+   emit_line_0(o, set, reset, sign, "", 0);
+   ws_check_emit(line, len, ws_rule,
+ o->file, set, reset, ws);
+   }
+}
 
 static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
-const char *line, int len)
+const char *line, int len, unsigned flags)
 {
static const char *nneof = " No newline at end of file\n";
-   const char *context, *reset;
+   const char *context, *reset, *set;
switch (s) {
case DIFF_SYMBOL_NO_LF_EOF:
context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -593,6 +630,25 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
diff_line_prefix(o),
o->line_termination);
break;
+   case DIFF_SYMBOL_CONTEXT:
+   set = diff_get_color_opt(o, DIFF_CONTEXT);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line_ws_markup(o, set, reset, line, len, ' ',
+   flags & (DIFF_SYMBOL_CONTENT_WS_MASK), 0);
+   break;
+   case DIFF_SYMBOL_PLUS:
+   set = diff_get_color_opt(o, DIFF_FILE_NEW);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line_ws_markup(o, set, reset, line, len, '+',
+   flags & DIFF_SYMBOL_CONTENT_WS_MASK,
+   flags & DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF);
+   break;
+   case DIFF_SYMBOL_MINUS:
+   set = diff_get_color_opt(o, DIFF_FILE_OLD);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line_ws_markup(o, set, reset, line, len, '-',
+   flags & DIFF_SYMBOL_CONTENT_WS_MASK, 0);
+   break;
default:
die("BUG: unknown diff symbol");

[PATCHv2 16/25] diff.c: convert emit_binary_diff_body to use emit_diff_symbol

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 63 ++-
 1 file changed, 46 insertions(+), 17 deletions(-)

diff --git a/diff.c b/diff.c
index 48f719fb07..f5a14359ae 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,11 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_BINARY_DIFF_HEADER,
+   DIFF_SYMBOL_BINARY_DIFF_HEADER_DELTA,
+   DIFF_SYMBOL_BINARY_DIFF_HEADER_LITERAL,
+   DIFF_SYMBOL_BINARY_DIFF_BODY,
+   DIFF_SYMBOL_BINARY_DIFF_FOOTER,
DIFF_SYMBOL_SUBMODULE_ADD,
DIFF_SYMBOL_SUBMODULE_DEL,
DIFF_SYMBOL_SUBMODULE_UNTRACKED,
@@ -635,6 +640,7 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_SUBMODULE_HEADER:
case DIFF_SYMBOL_SUBMODULE_ERROR:
case DIFF_SYMBOL_SUBMODULE_PIPETHROUGH:
+   case DIFF_SYMBOL_BINARY_DIFF_BODY:
case DIFF_SYMBOL_CONTEXT_FRAGINFO:
emit_line(o, "", "", line, len);
break;
@@ -706,6 +712,19 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_HEADER:
fprintf(o->file, "%s", line);
break;
+   case DIFF_SYMBOL_BINARY_DIFF_HEADER:
+   fprintf(o->file, "%sGIT binary patch\n", diff_line_prefix(o));
+   break;
+   case DIFF_SYMBOL_BINARY_DIFF_HEADER_DELTA:
+   fprintf(o->file, "%sdelta %s\n", diff_line_prefix(o), line);
+   break;
+   case DIFF_SYMBOL_BINARY_DIFF_HEADER_LITERAL:
+   fprintf(o->file, "%sliteral %s\n", diff_line_prefix(o), line);
+   break;
+   case DIFF_SYMBOL_BINARY_DIFF_FOOTER:
+   fputs(diff_line_prefix(o), o->file);
+   fputc('\n', o->file);
+   break;
case DIFF_SYMBOL_REWRITE_DIFF:
fraginfo = diff_get_color(o->use_color, DIFF_FRAGINFO);
reset = diff_get_color_opt(o, DIFF_RESET);
@@ -2390,8 +2409,8 @@ static unsigned char *deflate_it(char *data,
return deflated;
 }
 
-static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two,
- const char *prefix)
+static void emit_binary_diff_body(struct diff_options *o,
+ mmfile_t *one, mmfile_t *two)
 {
void *cp;
void *delta;
@@ -2420,13 +2439,18 @@ static void emit_binary_diff_body(FILE *file, mmfile_t 
*one, mmfile_t *two,
}
 
if (delta && delta_size < deflate_size) {
-   fprintf(file, "%sdelta %lu\n", prefix, orig_size);
+   char *s = xstrfmt("%lu", orig_size);
+   emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_HEADER_DELTA,
+s, strlen(s), 0);
+   free(s);
free(deflated);
data = delta;
data_size = delta_size;
-   }
-   else {
-   fprintf(file, "%sliteral %lu\n", prefix, two->size);
+   } else {
+   char *s = xstrfmt("%lu", two->size);
+   emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_HEADER_LITERAL,
+s, strlen(s), 0);
+   free(s);
free(delta);
data = deflated;
data_size = deflate_size;
@@ -2435,8 +2459,9 @@ static void emit_binary_diff_body(FILE *file, mmfile_t 
*one, mmfile_t *two,
/* emit data encoded in base85 */
cp = data;
while (data_size) {
+   int len;
int bytes = (52 < data_size) ? 52 : data_size;
-   char line[70];
+   char line[71];
data_size -= bytes;
if (bytes <= 26)
line[0] = bytes + 'A' - 1;
@@ -2444,20 +2469,24 @@ static void emit_binary_diff_body(FILE *file, mmfile_t 
*one, mmfile_t *two,
line[0] = bytes - 26 + 'a' - 1;
encode_85(line + 1, cp, bytes);
cp = (char *) cp + bytes;
-   fprintf(file, "%s", prefix);
-   fputs(line, file);
-   fputc('\n', file);
+
+   len = strlen(line);
+   line[len++] = '\n';
+   line[len] = '\0';
+
+   emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_BODY,
+line, len, 0);
}
-   fprintf(file, "%s\n", prefix);
+   emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_FOOTER, NULL, 0, 0);
free(data);
 }
 
-static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two,
-const char *prefix)
+static void emit_binary_diff(struct diff_options *o,
+mmfile_t *one, mmfile_t *two)
 {
-   fprintf(file, "%sGIT binary patch\n", prefix);
-   

[PATCHv2 22/25] diff.c: color moved lines differently

2017-06-30 Thread Stefan Beller
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):

[OM]  -void sensitive_stuff(void)
[OM]  -{
[OM]  -if (!is_authorized_user())
[OM]  -die("unauthorized");
[OM]  -sensitive_stuff(spanning,
[OM]  -multiple,
[OM]  -lines);
[OM]  -}

   void another_function()
   {
[del] -printf("foo");
[add] +printf("bar");
   }

[NM]  +void sensitive_stuff(void)
[NM]  +{
[NM]  +if (!is_authorized_user())
[NM]  +die("unauthorized");
[NM]  +sensitive_stuff(spanning,
[NM]  +multiple,
[NM]  +lines);
[NM]  +}

However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:

[OM]  -void sensitive_stuff(void)
[OM]  -{
[OMA] -if (!is_authorized_user())
[OMA] -die("unauthorized");
[OM]  -sensitive_stuff(spanning,
[OM]  -multiple,
[OM]  -lines);
[OMA] -}

   void another_function()
   {
[del] -printf("foo");
[add] +printf("bar");
   }

[NM]  +void sensitive_stuff(void)
[NM]  +{
[NMA] +sensitive_stuff(spanning,
[NMA] +multiple,
[NMA] +lines);
[NM]  +if (!is_authorized_user())
[NM]  +die("unauthorized");
[NMA] +}

If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.

This patch implements the first mode:
* basic alternating 'Zebra' mode
  This conveys all information needed to the user.  Defer customization to
  later patches.

First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').

Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.

A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.

It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.

While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.

For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.

Helped-by: Jonathan Tan 
Signed-off-by: Stefan Beller 
---
 diff.c | 344 +++--
 diff.h |  12 +-
 t/t4015-diff-whitespace.sh | 261 ++
 3 files changed, 602 insertions(+), 15 deletions(-)

diff --git a/diff.c b/diff.c
index 31720abf8f..084ecc5678 100644
--- a/diff.c
+++ b/diff.c
@@ -16,6 +16,7 @@
 #include "userdiff.h"
 #include "submodule-config.h"
 #include "submodule.h"
+#include "hashmap.h"
 #include "ll-merge.h"
 #include "string-list.h"
 #include "argv-array.h"
@@ -32,6 +33,7 @@ static int diff_indent_heuristic = 1;
 static int diff_rename_limit_default = 400;
 static int diff_suppress_blank_empty;
 static int diff_use_color_default = -1;
+static int 

[PATCHv2 23/25] diff.c: color moved lines differently, plain mode

2017-06-30 Thread Stefan Beller
Add the 'plain' mode for move detection of code. This omits the checking
for adjacent blocks, so it is not as useful. If you have a lot of the
same blocks moved in the same patch, the 'Zebra' would end up slow as it
is O(n^2) (n is number of same blocks). So this may be useful there and
is generally easy to add. Instead be very literal at the move detection,
do not skip over short blocks here.

Signed-off-by: Stefan Beller 
---
 diff.c | 10 +++--
 diff.h |  1 +
 t/t4015-diff-whitespace.sh | 51 +-
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/diff.c b/diff.c
index 084ecc5678..79ea592388 100644
--- a/diff.c
+++ b/diff.c
@@ -256,12 +256,14 @@ static int parse_color_moved(const char *arg)
 
if (!strcmp(arg, "no"))
return COLOR_MOVED_NO;
+   else if (!strcmp(arg, "plain"))
+   return COLOR_MOVED_PLAIN;
else if (!strcmp(arg, "zebra"))
return COLOR_MOVED_ZEBRA;
else if (!strcmp(arg, "default"))
return COLOR_MOVED_DEFAULT;
else
-   return error(_("color moved setting must be one of 'no', 
'default', 'zebra'"));
+   return error(_("color moved setting must be one of 'no', 
'default', 'zebra', 'plain'"));
 }
 
 int git_diff_ui_config(const char *var, const char *value, void *cb)
@@ -879,7 +881,8 @@ static void mark_color_as_moved(struct diff_options *o,
}
 
if (!match) {
-   if (block_length < COLOR_MOVED_MIN_BLOCK_LENGTH) {
+   if (block_length < COLOR_MOVED_MIN_BLOCK_LENGTH &&
+   o->color_moved != COLOR_MOVED_PLAIN) {
for (i = 0; i < block_length + 1; i++) {
l = >emitted_symbols->buf[n - i];
l->flags &= ~DIFF_SYMBOL_MOVED_LINE;
@@ -893,6 +896,9 @@ static void mark_color_as_moved(struct diff_options *o,
l->flags |= DIFF_SYMBOL_MOVED_LINE;
block_length++;
 
+   if (o->color_moved == COLOR_MOVED_PLAIN)
+   continue;
+
/* Check any potential block runs, advance each or nullify */
for (i = 0; i < pmb_nr; i++) {
struct moved_entry *p = pmb[i];
diff --git a/diff.h b/diff.h
index 3196802673..4cfd609c54 100644
--- a/diff.h
+++ b/diff.h
@@ -190,6 +190,7 @@ struct diff_options {
struct emitted_diff_symbols *emitted_symbols;
enum {
COLOR_MOVED_NO = 0,
+   COLOR_MOVED_PLAIN = 1,
COLOR_MOVED_ZEBRA = 2,
} color_moved;
#define COLOR_MOVED_DEFAULT COLOR_MOVED_ZEBRA
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index 29704ae14e..d1d7b0 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -986,7 +986,7 @@ test_expect_success 'detect moved code, complete file' '
git mv test.c main.c &&
test_config color.diff.oldMoved "normal red" &&
test_config color.diff.newMoved "normal green" &&
-   git diff HEAD --color-moved --no-renames | test_decode_color >actual &&
+   git diff HEAD --color-moved=zebra --no-renames | test_decode_color 
>actual &&
cat >expected <<-\EOF &&
diff --git a/main.c b/main.c
new file mode 100644
@@ -1130,6 +1130,55 @@ test_expect_success 'detect malicious moved code, inside 
file' '
test_cmp expected actual
 '
 
+test_expect_success 'plain moved code, inside file' '
+   test_config color.diff.oldMoved "normal red" &&
+   test_config color.diff.newMoved "normal green" &&
+   test_config color.diff.oldMovedAlternative "blue" &&
+   test_config color.diff.newMovedAlternative "yellow" &&
+   # needs previous test as setup
+   git diff HEAD --no-renames --color-moved=plain| test_decode_color 
>actual &&
+   cat <<-\EOF >expected &&
+   diff --git a/main.c b/main.c
+   index 27a619c..7cf9336 100644
+   --- a/main.c
+   +++ b/main.c
+   @@ -5,13 +5,6 @@ printf("Hello ");
+printf("World\n");
+}
+
+   -int secure_foo(struct user *u)
+   -{
+   -if (!u->is_allowed_foo)
+   -return;
+   -foo(u);
+   -}
+   -
+int main()
+{
+foo();
+   diff --git a/test.c b/test.c
+   index 1dc1d85..2bedec9 100644
+   --- a/test.c
+   +++ b/test.c
+   @@ -4,6 +4,13 @@ int bar()
+printf("Hello World, but different\n");
+}
+
+   +int secure_foo(struct user *u)
+   +{
+   +foo(u);
+   +if (!u->is_allowed_foo)
+   +return;
+   +}
+   +
+int another_function()
+{
+bar();
+   EOF
+
+   test_cmp expected actual
+'
+
 test_expect_success 'no effect from --color-moved with 

[PATCHv2 11/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_FILEPAIR_{PLUS, MINUS}

2017-06-30 Thread Stefan Beller
We have to use fprintf instead of emit_line, because we want to emit the
tab after the color. This is important for ancient versions of gnu patch
AFAICT, although we probably do not want to feed colored output to the
patch utility, such that it would not matter if the trailing tab is
colored. Keep the corner case as-is though.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 51 ++-
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/diff.c b/diff.c
index b2b2a19fcf..49b45fef29 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,8 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_FILEPAIR_PLUS,
+   DIFF_SYMBOL_FILEPAIR_MINUS,
DIFF_SYMBOL_WORDS_PORCELAIN,
DIFF_SYMBOL_WORDS,
DIFF_SYMBOL_CONTEXT,
@@ -611,7 +613,7 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
 const char *line, int len, unsigned flags)
 {
static const char *nneof = " No newline at end of file\n";
-   const char *context, *reset, *set;
+   const char *context, *reset, *set, *meta;
switch (s) {
case DIFF_SYMBOL_NO_LF_EOF:
context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -673,6 +675,20 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
}
emit_line(o, context, reset, line, len);
break;
+   case DIFF_SYMBOL_FILEPAIR_PLUS:
+   meta = diff_get_color_opt(o, DIFF_METAINFO);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   fprintf(o->file, "%s%s+++ %s%s%s\n", diff_line_prefix(o), meta,
+   line, reset,
+   strchr(line, ' ') ? "\t" : "");
+   break;
+   case DIFF_SYMBOL_FILEPAIR_MINUS:
+   meta = diff_get_color_opt(o, DIFF_METAINFO);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   fprintf(o->file, "%s%s--- %s%s%s\n", diff_line_prefix(o), meta,
+   line, reset,
+   strchr(line, ' ') ? "\t" : "");
+   break;
default:
die("BUG: unknown diff symbol");
}
@@ -844,8 +860,6 @@ static void emit_rewrite_diff(const char *name_a,
  struct diff_options *o)
 {
int lc_a, lc_b;
-   const char *name_a_tab, *name_b_tab;
-   const char *metainfo = diff_get_color(o->use_color, DIFF_METAINFO);
const char *fraginfo = diff_get_color(o->use_color, DIFF_FRAGINFO);
const char *reset = diff_get_color(o->use_color, DIFF_RESET);
static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT;
@@ -865,8 +879,6 @@ static void emit_rewrite_diff(const char *name_a,
 
name_a += (*name_a == '/');
name_b += (*name_b == '/');
-   name_a_tab = strchr(name_a, ' ') ? "\t" : "";
-   name_b_tab = strchr(name_b, ' ') ? "\t" : "";
 
strbuf_reset(_name);
strbuf_reset(_name);
@@ -893,11 +905,13 @@ static void emit_rewrite_diff(const char *name_a,
 
lc_a = count_lines(data_one, size_one);
lc_b = count_lines(data_two, size_two);
-   fprintf(o->file,
-   "%s%s--- %s%s%s\n%s%s+++ %s%s%s\n%s%s@@ -",
-   line_prefix, metainfo, a_name.buf, name_a_tab, reset,
-   line_prefix, metainfo, b_name.buf, name_b_tab, reset,
-   line_prefix, fraginfo);
+
+   emit_diff_symbol(o, DIFF_SYMBOL_FILEPAIR_MINUS,
+a_name.buf, a_name.len, 0);
+   emit_diff_symbol(o, DIFF_SYMBOL_FILEPAIR_PLUS,
+b_name.buf, b_name.len, 0);
+
+   fprintf(o->file, "%s%s@@ -", line_prefix, fraginfo);
if (!o->irreversible_delete)
print_line_count(o->file, lc_a);
else
@@ -1365,10 +1379,8 @@ static void find_lno(const char *line, struct 
emit_callback *ecbdata)
 static void fn_out_consume(void *priv, char *line, unsigned long len)
 {
struct emit_callback *ecbdata = priv;
-   const char *meta = diff_get_color(ecbdata->color_diff, DIFF_METAINFO);
const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
struct diff_options *o = ecbdata->opt;
-   const char *line_prefix = diff_line_prefix(o);
 
o->found_changes = 1;
 
@@ -1379,15 +1391,12 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
}
 
if (ecbdata->label_path[0]) {
-   const char *name_a_tab, *name_b_tab;
-
-   name_a_tab = strchr(ecbdata->label_path[0], ' ') ? "\t" : "";
-   name_b_tab = strchr(ecbdata->label_path[1], ' ') ? "\t" : "";
-
-   fprintf(o->file, "%s%s--- %s%s%s\n",
-   line_prefix, meta, ecbdata->label_path[0], reset, 
name_a_tab);
-  

[PATCHv2 10/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_INCOMPLETE

2017-06-30 Thread Stefan Beller
The context marker use the exact same output pattern, so reuse it.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 85bfd9310d..b2b2a19fcf 100644
--- a/diff.c
+++ b/diff.c
@@ -564,6 +564,7 @@ enum diff_symbol {
DIFF_SYMBOL_WORDS_PORCELAIN,
DIFF_SYMBOL_WORDS,
DIFF_SYMBOL_CONTEXT,
+   DIFF_SYMBOL_CONTEXT_INCOMPLETE,
DIFF_SYMBOL_PLUS,
DIFF_SYMBOL_MINUS,
DIFF_SYMBOL_NO_LF_EOF,
@@ -622,6 +623,7 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_CONTEXT_FRAGINFO:
emit_line(o, "", "", line, len);
break;
+   case DIFF_SYMBOL_CONTEXT_INCOMPLETE:
case DIFF_SYMBOL_CONTEXT_MARKER:
context = diff_get_color_opt(o, DIFF_CONTEXT);
reset = diff_get_color_opt(o, DIFF_RESET);
@@ -1448,8 +1450,8 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
default:
/* incomplete line at the end */
ecbdata->lno_in_preimage++;
-   emit_line(o, diff_get_color(ecbdata->color_diff, DIFF_CONTEXT),
- reset, line, len);
+   emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE,
+line, len, 0);
break;
}
 }
-- 
2.13.0.31.g9b732c453e



[PATCHv2 03/25] diff.c: factor out diff_flush_patch_all_file_pairs

2017-06-30 Thread Stefan Beller
In a later patch we want to do more things before and after all filepairs
are flushed. So factor flushing out all file pairs into its own function
that the new code can be plugged in easily.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index 94fdb57927..77ef56a6e4 100644
--- a/diff.c
+++ b/diff.c
@@ -4734,6 +4734,17 @@ void diff_warn_rename_limit(const char *varname, int 
needed, int degraded_cc)
warning(_(rename_limit_advice), varname, needed);
 }
 
+static void diff_flush_patch_all_file_pairs(struct diff_options *o)
+{
+   int i;
+   struct diff_queue_struct *q = _queued_diff;
+   for (i = 0; i < q->nr; i++) {
+   struct diff_filepair *p = q->queue[i];
+   if (check_pair_status(p))
+   diff_flush_patch(p, o);
+   }
+}
+
 void diff_flush(struct diff_options *options)
 {
struct diff_queue_struct *q = _queued_diff;
@@ -4826,11 +4837,7 @@ void diff_flush(struct diff_options *options)
}
}
 
-   for (i = 0; i < q->nr; i++) {
-   struct diff_filepair *p = q->queue[i];
-   if (check_pair_status(p))
-   diff_flush_patch(p, options);
-   }
+   diff_flush_patch_all_file_pairs(options);
}
 
if (output_format & DIFF_FORMAT_CALLBACK)
-- 
2.13.0.31.g9b732c453e



[PATCHv2 25/25] diff: document the new --color-moved setting

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
---
 Documentation/config.txt   | 15 +--
 Documentation/diff-options.txt | 36 
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 06898a7498..74382e5ff5 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1077,14 +1077,25 @@ This does not affect linkgit:git-format-patch[1] or the
 'git-diff-{asterisk}' plumbing commands.  Can be overridden on the
 command line with the `--color[=]` option.
 
+diff.colorMoved::
+   If set to either a valid `` or a true value, moved lines
+   in a diff are colored differently, for details of valid modes
+   see '--color-moved' in linkgit:git-diff[1]. If simply set to
+   true the default color mode will be used. When set to false,
+   moved lines are not colored.
+
 color.diff.::
Use customized color for diff colorization.  `` specifies
which part of the patch to use the specified color, and is one
of `context` (context text - `plain` is a historical synonym),
`meta` (metainformation), `frag`
(hunk header), 'func' (function in hunk header), `old` (removed lines),
-   `new` (added lines), `commit` (commit headers), or `whitespace`
-   (highlighting whitespace errors).
+   `new` (added lines), `commit` (commit headers), `whitespace`
+   (highlighting whitespace errors), `oldMoved` (deleted lines),
+   `newMoved` (added lines), `oldMovedDimmed`, `oldMovedAlternative`,
+   `oldMovedAlternativeDimmed`, `newMovedDimmed`, `newMovedAlternative`
+   and `newMovedAlternativeDimmed` (See the ''
+   setting of '--color-moved' in linkgit:git-diff[1] for details).
 
 color.decorate.::
Use customized color for 'git log --decorate' output.  `` is one
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 89cc0f48de..bc52bd0b99 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -231,6 +231,42 @@ ifdef::git-diff[]
 endif::git-diff[]
It is the same as `--color=never`.
 
+--color-moved[=]::
+   Moved lines of code are colored differently.
+ifdef::git-diff[]
+   It can be changed by the `diff.colorMoved` configuration setting.
+endif::git-diff[]
+   The  defaults to 'no' if the option is not given
+   and to 'zebra' if the option with no mode is given.
+   The mode must be one of:
++
+--
+no::
+   Moved lines are not highlighted.
+default::
+   Is a synonym for `zebra`. This may change to a more sensible mode
+   in the future.
+plain::
+   Any line that is added in one location and was removed
+   in another location will be colored with 'color.diff.newMoved'.
+   Similarly 'color.diff.oldMoved' will be used for removed lines
+   that are added somewhere else in the diff. This mode picks up any
+   moved line, but it is not very useful in a review to determine
+   if a block of code was moved without permutation.
+zebra::
+   Blocks of moved code are detected greedily. The detected blocks are
+   painted using either the 'color.diff.{old,new}Moved' color or
+   'color.diff.{old,new}MovedAlternative'. The change between
+   the two colors indicates that a new block was detected. If there
+   are fewer than 3 adjacent moved lines, they are not marked up
+   as moved, but the regular colors 'color.diff.{old,new}' will be
+   used.
+dimmed_zebra::
+   Similar to 'zebra', but additional dimming of uninteresting parts
+   of moved code is performed. The bordering lines of two adjacent
+   blocks are considered interesting, the rest is uninteresting.
+--
+
 --word-diff[=]::
Show a word diff, using the  to delimit changed words.
By default, words are delimited by whitespace; see
-- 
2.13.0.31.g9b732c453e



[PATCHv2 21/25] diff.c: buffer all output if asked to

2017-06-30 Thread Stefan Beller
Introduce a new option 'emitted_symbols' in the struct diff_options which
controls whether all output is buffered up until all output is available.
It is set internally in diff.c when necessary.

We'll have a new struct 'emitted_string' in diff.c which will be used to
buffer each line.  The emitted_string will duplicate the memory of the
line to buffer as that is easiest to reason about for now. In a future
patch we may want to decrease the memory usage by not duplicating all
output for buffering but rather we may want to store offsets into the
file or in case of hunk descriptions such as the similarity score, we
could just store the relevant number and reproduce the text later on.

This approach was chosen as a first step because it is quite simple
compared to the alternative with less memory footprint.

emit_diff_symbol factors out the emission part and depending on the
diff_options->emitted_symbols the emission will be performed directly
when calling emit_diff_symbol or after the whole process is done, i.e.
by buffering we have add the possibility for a second pass over the
whole output before doing the actual output.

In 6440d34 (2012-03-14, diff: tweak a _copy_ of diff_options with
word-diff) we introduced a duplicate diff options struct for word
emissions as we may have different regex settings in there.
When buffering the output, we need to operate on just one buffer,
so we have to copy back the emissions of the word buffer into the
main buffer.

Unconditionally enable output via buffer in this patch as it yields
a great opportunity for testing, i.e. all the diff tests from the
test suite pass without having reordering issues (i.e. only parts
of the output got buffered, and we forgot to buffer other parts).
The test suite passes, which gives confidence that we converted all
functions to use emit_string for output.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 109 +++--
 diff.h |   2 ++
 2 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 2db0d7c0f5..31720abf8f 100644
--- a/diff.c
+++ b/diff.c
@@ -605,6 +605,47 @@ enum diff_symbol {
 #define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16)
 #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | 
WS_RULE_MASK)
 
+/*
+ * This struct is used when we need to buffer the output of the diff output.
+ *
+ * NEEDSWORK: Instead of storing a copy of the line, add an offset pointer
+ * into the pre/post image file. This pointer could be a union with the
+ * line pointer. By storing an offset into the file instead of the literal 
line,
+ * we can decrease the memory footprint for the buffered output. At first we
+ * may want to only have indirection for the content lines, but we could also
+ * enhance the state for emitting prefabricated lines, e.g. the similarity
+ * score line or hunk/file headers would only need to store a number or path
+ * and then the output can be constructed later on depending on state.
+ */
+struct emitted_diff_symbol {
+   const char *line;
+   int len;
+   int flags;
+   enum diff_symbol s;
+};
+#define EMITTED_DIFF_SYMBOL_INIT {NULL}
+
+struct emitted_diff_symbols {
+   struct emitted_diff_symbol *buf;
+   int nr, alloc;
+};
+#define EMITTED_DIFF_SYMBOLS_INIT {NULL, 0, 0}
+
+static void append_emitted_diff_symbol(struct diff_options *o,
+  struct emitted_diff_symbol *e)
+{
+   struct emitted_diff_symbol *f;
+
+   ALLOC_GROW(o->emitted_symbols->buf,
+  o->emitted_symbols->nr + 1,
+  o->emitted_symbols->alloc);
+   f = >emitted_symbols->buf[o->emitted_symbols->nr++];
+
+   memcpy(f, e, sizeof(struct emitted_diff_symbol));
+   f->line = e->line ? xmemdupz(e->line, e->len) : NULL;
+}
+
+
 static void emit_line_ws_markup(struct diff_options *o,
const char *set, const char *reset,
const char *line, int len, char sign,
@@ -631,12 +672,18 @@ static void emit_line_ws_markup(struct diff_options *o,
}
 }
 
-static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
-const char *line, int len, unsigned flags)
+static void emit_diff_symbol_from_struct(struct diff_options *o,
+struct emitted_diff_symbol *eds)
 {
static const char *nneof = " No newline at end of file\n";
const char *context, *reset, *set, *meta, *fraginfo;
struct strbuf sb = STRBUF_INIT;
+
+   enum diff_symbol s = eds->s;
+   const char *line = eds->line;
+   int len = eds->len;
+   unsigned flags = eds->flags;
+
switch (s) {
case DIFF_SYMBOL_NO_LF_EOF:
context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -778,6 +825,17 @@ static void emit_diff_symbol(struct diff_options *o, enum 

[PATCHv2 19/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_STAT_SEP

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/diff.c b/diff.c
index e0d39d04da..5a9c55736d 100644
--- a/diff.c
+++ b/diff.c
@@ -571,6 +571,7 @@ enum diff_symbol {
DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES,
DIFF_SYMBOL_STATS_LINE,
DIFF_SYMBOL_WORD_DIFF,
+   DIFF_SYMBOL_STAT_SEP,
DIFF_SYMBOL_SUBMODULE_ADD,
DIFF_SYMBOL_SUBMODULE_DEL,
DIFF_SYMBOL_SUBMODULE_UNTRACKED,
@@ -766,6 +767,9 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_WORD_DIFF:
fprintf(o->file, "%.*s", len, line);
break;
+   case DIFF_SYMBOL_STAT_SEP:
+   fputs(o->stat_sep, o->file);
+   break;
default:
die("BUG: unknown diff symbol");
}
@@ -5077,10 +5081,10 @@ void diff_flush(struct diff_options *options)
if (output_format & DIFF_FORMAT_PATCH) {
if (separator) {
emit_diff_symbol(options, DIFF_SYMBOL_SEPARATOR, NULL, 
0, 0);
-   if (options->stat_sep) {
+   if (options->stat_sep)
/* attach patch instead of inline */
-   fputs(options->stat_sep, options->file);
-   }
+   emit_diff_symbol(options, DIFF_SYMBOL_STAT_SEP,
+NULL, 0, 0);
}
 
diff_flush_patch_all_file_pairs(options);
-- 
2.13.0.31.g9b732c453e



[PATCHv2 24/25] diff.c: add dimming to moved line detection

2017-06-30 Thread Stefan Beller
Any lines inside a moved block of code are not interesting. Boundaries
of blocks are only interesting if they are next to another block of moved
code.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 color.h|   2 +
 diff.c | 132 +
 diff.h |   9 +++-
 t/t4015-diff-whitespace.sh | 124 ++
 4 files changed, 254 insertions(+), 13 deletions(-)

diff --git a/color.h b/color.h
index 90627650fc..fd2b688dfb 100644
--- a/color.h
+++ b/color.h
@@ -42,6 +42,8 @@ struct strbuf;
 #define GIT_COLOR_BG_BLUE  "\033[44m"
 #define GIT_COLOR_BG_MAGENTA   "\033[45m"
 #define GIT_COLOR_BG_CYAN  "\033[46m"
+#define GIT_COLOR_FAINT"\033[2m"
+#define GIT_COLOR_FAINT_ITALIC "\033[2;3m"
 
 /* A special value meaning "no color selected" */
 #define GIT_COLOR_NIL "NIL"
diff --git a/diff.c b/diff.c
index 79ea592388..4af73a7e0c 100644
--- a/diff.c
+++ b/diff.c
@@ -58,10 +58,14 @@ static char diff_colors[][COLOR_MAXLEN] = {
GIT_COLOR_YELLOW,   /* COMMIT */
GIT_COLOR_BG_RED,   /* WHITESPACE */
GIT_COLOR_NORMAL,   /* FUNCINFO */
-   GIT_COLOR_MAGENTA,  /* OLD_MOVED */
-   GIT_COLOR_BLUE, /* OLD_MOVED ALTERNATIVE */
-   GIT_COLOR_CYAN, /* NEW_MOVED */
-   GIT_COLOR_YELLOW,   /* NEW_MOVED ALTERNATIVE */
+   GIT_COLOR_BOLD_MAGENTA, /* OLD_MOVED */
+   GIT_COLOR_BOLD_BLUE,/* OLD_MOVED ALTERNATIVE */
+   GIT_COLOR_FAINT,/* OLD_MOVED_DIM */
+   GIT_COLOR_FAINT_ITALIC, /* OLD_MOVED_ALTERNATIVE_DIM */
+   GIT_COLOR_BOLD_CYAN,/* NEW_MOVED */
+   GIT_COLOR_BOLD_YELLOW,  /* NEW_MOVED ALTERNATIVE */
+   GIT_COLOR_FAINT,/* NEW_MOVED_DIM */
+   GIT_COLOR_FAINT_ITALIC, /* NEW_MOVED_ALTERNATIVE_DIM */
 };
 
 static NORETURN void die_want_option(const char *option_name)
@@ -91,10 +95,18 @@ static int parse_diff_color_slot(const char *var)
return DIFF_FILE_OLD_MOVED;
if (!strcasecmp(var, "oldmovedalternative"))
return DIFF_FILE_OLD_MOVED_ALT;
+   if (!strcasecmp(var, "oldmoveddimmed"))
+   return DIFF_FILE_OLD_MOVED_DIM;
+   if (!strcasecmp(var, "oldmovedalternativedimmed"))
+   return DIFF_FILE_OLD_MOVED_ALT_DIM;
if (!strcasecmp(var, "newmoved"))
return DIFF_FILE_NEW_MOVED;
if (!strcasecmp(var, "newmovedalternative"))
return DIFF_FILE_NEW_MOVED_ALT;
+   if (!strcasecmp(var, "newmoveddimmed"))
+   return DIFF_FILE_NEW_MOVED_DIM;
+   if (!strcasecmp(var, "newmovedalternativedimmed"))
+   return DIFF_FILE_NEW_MOVED_ALT_DIM;
return -1;
 }
 
@@ -262,8 +274,10 @@ static int parse_color_moved(const char *arg)
return COLOR_MOVED_ZEBRA;
else if (!strcmp(arg, "default"))
return COLOR_MOVED_DEFAULT;
+   else if (!strcmp(arg, "dimmed_zebra"))
+   return COLOR_MOVED_ZEBRA_DIM;
else
-   return error(_("color moved setting must be one of 'no', 
'default', 'zebra', 'plain'"));
+   return error(_("color moved setting must be one of 'no', 
'default', 'zebra', 'dimmed_zebra', 'plain'"));
 }
 
 int git_diff_ui_config(const char *var, const char *value, void *cb)
@@ -649,6 +663,7 @@ enum diff_symbol {
 #define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16)
 #define DIFF_SYMBOL_MOVED_LINE (1<<17)
 #define DIFF_SYMBOL_MOVED_LINE_ALT (1<<18)
+#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING   (1<<19)
 #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | 
WS_RULE_MASK)
 
 /*
@@ -933,6 +948,67 @@ static void mark_color_as_moved(struct diff_options *o,
free(pmb);
 }
 
+#define DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK \
+  (DIFF_SYMBOL_MOVED_LINE | DIFF_SYMBOL_MOVED_LINE_ALT)
+static void dim_moved_lines(struct diff_options *o)
+{
+   int n;
+   for (n = 0; n < o->emitted_symbols->nr; n++) {
+   struct emitted_diff_symbol *prev = (n != 0) ?
+   >emitted_symbols->buf[n - 1] : NULL;
+   struct emitted_diff_symbol *l = >emitted_symbols->buf[n];
+   struct emitted_diff_symbol *next =
+   (n < o->emitted_symbols->nr - 1) ?
+   >emitted_symbols->buf[n + 1] : NULL;
+
+   /* Not a plus or minus line? */
+   if (l->s != DIFF_SYMBOL_PLUS && l->s != DIFF_SYMBOL_MINUS)
+   continue;
+
+   /* Not a moved line? */
+   if (!(l->flags & DIFF_SYMBOL_MOVED_LINE))
+   continue;
+
+   /*
+* If prev or next are not a plus or minus line,
+* pretend they don't exist
+*/
+   if (prev && 

[PATCHv2 14/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_REWRITE_DIFF

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 35 +--
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/diff.c b/diff.c
index 1d0f8b509f..5c428e02b6 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,7 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_REWRITE_DIFF,
DIFF_SYMBOL_BINARY_FILES,
DIFF_SYMBOL_HEADER,
DIFF_SYMBOL_FILEPAIR_PLUS,
@@ -615,7 +616,7 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
 const char *line, int len, unsigned flags)
 {
static const char *nneof = " No newline at end of file\n";
-   const char *context, *reset, *set, *meta;
+   const char *context, *reset, *set, *meta, *fraginfo;
switch (s) {
case DIFF_SYMBOL_NO_LF_EOF:
context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -695,6 +696,11 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_HEADER:
fprintf(o->file, "%s", line);
break;
+   case DIFF_SYMBOL_REWRITE_DIFF:
+   fraginfo = diff_get_color(o->use_color, DIFF_FRAGINFO);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line(o, fraginfo, reset, line, len);
+   break;
default:
die("BUG: unknown diff symbol");
}
@@ -817,17 +823,17 @@ static void remove_tempfile(void)
}
 }
 
-static void print_line_count(FILE *file, int count)
+static void add_line_count(struct strbuf *out, int count)
 {
switch (count) {
case 0:
-   fprintf(file, "0,0");
+   strbuf_addstr(out, "0,0");
break;
case 1:
-   fprintf(file, "1");
+   strbuf_addstr(out, "1");
break;
default:
-   fprintf(file, "1,%d", count);
+   strbuf_addf(out, "1,%d", count);
break;
}
 }
@@ -866,14 +872,12 @@ static void emit_rewrite_diff(const char *name_a,
  struct diff_options *o)
 {
int lc_a, lc_b;
-   const char *fraginfo = diff_get_color(o->use_color, DIFF_FRAGINFO);
-   const char *reset = diff_get_color(o->use_color, DIFF_RESET);
static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT;
const char *a_prefix, *b_prefix;
char *data_one, *data_two;
size_t size_one, size_two;
struct emit_callback ecbdata;
-   const char *line_prefix = diff_line_prefix(o);
+   struct strbuf out = STRBUF_INIT;
 
if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) {
a_prefix = o->b_prefix;
@@ -917,14 +921,17 @@ static void emit_rewrite_diff(const char *name_a,
emit_diff_symbol(o, DIFF_SYMBOL_FILEPAIR_PLUS,
 b_name.buf, b_name.len, 0);
 
-   fprintf(o->file, "%s%s@@ -", line_prefix, fraginfo);
+   strbuf_addstr(, "@@ -");
if (!o->irreversible_delete)
-   print_line_count(o->file, lc_a);
+   add_line_count(, lc_a);
else
-   fprintf(o->file, "?,?");
-   fprintf(o->file, " +");
-   print_line_count(o->file, lc_b);
-   fprintf(o->file, " @@%s\n", reset);
+   strbuf_addstr(, "?,?");
+   strbuf_addstr(, " +");
+   add_line_count(, lc_b);
+   strbuf_addstr(, " @@\n");
+   emit_diff_symbol(o, DIFF_SYMBOL_REWRITE_DIFF, out.buf, out.len, 0);
+   strbuf_release();
+
if (lc_a && !o->irreversible_delete)
emit_rewrite_lines(, '-', data_one, size_one);
if (lc_b)
-- 
2.13.0.31.g9b732c453e



[PATCHv2 12/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_HEADER

2017-06-30 Thread Stefan Beller
The header is constructed lazily including line breaks, so just emit
the raw string as is.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/diff.c b/diff.c
index 49b45fef29..78f7c6f82f 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,7 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_HEADER,
DIFF_SYMBOL_FILEPAIR_PLUS,
DIFF_SYMBOL_FILEPAIR_MINUS,
DIFF_SYMBOL_WORDS_PORCELAIN,
@@ -689,6 +690,9 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
line, reset,
strchr(line, ' ') ? "\t" : "");
break;
+   case DIFF_SYMBOL_HEADER:
+   fprintf(o->file, "%s", line);
+   break;
default:
die("BUG: unknown diff symbol");
}
@@ -1385,7 +1389,8 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
o->found_changes = 1;
 
if (ecbdata->header) {
-   fprintf(o->file, "%s", ecbdata->header->buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER,
+ecbdata->header->buf, ecbdata->header->len, 0);
strbuf_reset(ecbdata->header);
ecbdata->header = NULL;
}
@@ -2519,7 +2524,8 @@ static void builtin_diff(const char *name_a,
if (complete_rewrite &&
(textconv_one || !diff_filespec_is_binary(one)) &&
(textconv_two || !diff_filespec_is_binary(two))) {
-   fprintf(o->file, "%s", header.buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER,
+header.buf, header.len, 0);
strbuf_reset();
emit_rewrite_diff(name_a, name_b, one, two,
textconv_one, textconv_two, o);
@@ -2529,7 +2535,8 @@ static void builtin_diff(const char *name_a,
}
 
if (o->irreversible_delete && lbl[1][0] == '/') {
-   fprintf(o->file, "%s", header.buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER, header.buf,
+header.len, 0);
strbuf_reset();
goto free_ab_and_return;
} else if (!DIFF_OPT_TST(o, TEXT) &&
@@ -2540,10 +2547,13 @@ static void builtin_diff(const char *name_a,
!DIFF_OPT_TST(o, BINARY)) {
if (!oidcmp(>oid, >oid)) {
if (must_show_header)
-   fprintf(o->file, "%s", header.buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER,
+header.buf, header.len,
+0);
goto free_ab_and_return;
}
-   fprintf(o->file, "%s", header.buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER,
+header.buf, header.len, 0);
fprintf(o->file, "%sBinary files %s and %s differ\n",
line_prefix, lbl[0], lbl[1]);
goto free_ab_and_return;
@@ -2554,10 +2564,11 @@ static void builtin_diff(const char *name_a,
if (mf1.size == mf2.size &&
!memcmp(mf1.ptr, mf2.ptr, mf1.size)) {
if (must_show_header)
-   fprintf(o->file, "%s", header.buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER,
+header.buf, header.len, 0);
goto free_ab_and_return;
}
-   fprintf(o->file, "%s", header.buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER, header.buf, header.len, 
0);
strbuf_reset();
if (DIFF_OPT_TST(o, BINARY))
emit_binary_diff(o->file, , , line_prefix);
@@ -2575,7 +2586,8 @@ static void builtin_diff(const char *name_a,
const struct userdiff_funcname *pe;
 
if (must_show_header) {
-   fprintf(o->file, "%s", header.buf);
+   emit_diff_symbol(o, DIFF_SYMBOL_HEADER,
+header.buf, header.len, 0);
strbuf_reset();
}
 
-- 
2.13.0.31.g9b732c453e



[PATCHv2 09/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_WORDS[_PORCELAIN]

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 42 ++
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/diff.c b/diff.c
index e5430d56da..85bfd9310d 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,8 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_WORDS_PORCELAIN,
+   DIFF_SYMBOL_WORDS,
DIFF_SYMBOL_CONTEXT,
DIFF_SYMBOL_PLUS,
DIFF_SYMBOL_MINUS,
@@ -649,6 +651,26 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
emit_line_ws_markup(o, set, reset, line, len, '-',
flags & DIFF_SYMBOL_CONTENT_WS_MASK, 0);
break;
+   case DIFF_SYMBOL_WORDS_PORCELAIN:
+   context = diff_get_color_opt(o, DIFF_CONTEXT);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line(o, context, reset, line, len);
+   fputs("~\n", o->file);
+   break;
+   case DIFF_SYMBOL_WORDS:
+   context = diff_get_color_opt(o, DIFF_CONTEXT);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   /*
+* Skip the prefix character, if any.  With
+* diff_suppress_blank_empty, there may be
+* none.
+*/
+   if (line[0] != '\n') {
+   line++;
+   len--;
+   }
+   emit_line(o, context, reset, line, len);
+   break;
default:
die("BUG: unknown diff symbol");
}
@@ -1342,7 +1364,6 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
 {
struct emit_callback *ecbdata = priv;
const char *meta = diff_get_color(ecbdata->color_diff, DIFF_METAINFO);
-   const char *context = diff_get_color(ecbdata->color_diff, DIFF_CONTEXT);
const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
struct diff_options *o = ecbdata->opt;
const char *line_prefix = diff_line_prefix(o);
@@ -1384,6 +1405,9 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
}
 
if (ecbdata->diff_words) {
+   enum diff_symbol s =
+   ecbdata->diff_words->type == DIFF_WORDS_PORCELAIN ?
+   DIFF_SYMBOL_WORDS_PORCELAIN : DIFF_SYMBOL_WORDS;
if (line[0] == '-') {
diff_words_append(line, len,
  >diff_words->minus);
@@ -1403,21 +1427,7 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
return;
}
diff_words_flush(ecbdata);
-   if (ecbdata->diff_words->type == DIFF_WORDS_PORCELAIN) {
-   emit_line(o, context, reset, line, len);
-   fputs("~\n", o->file);
-   } else {
-   /*
-* Skip the prefix character, if any.  With
-* diff_suppress_blank_empty, there may be
-* none.
-*/
-   if (line[0] != '\n') {
- line++;
- len--;
-   }
-   emit_line(o, context, reset, line, len);
-   }
+   emit_diff_symbol(o, s, line, len, 0);
return;
}
 
-- 
2.13.0.31.g9b732c453e



[PATCHv2 17/25] diff.c: convert show_stats to use emit_diff_symbol

2017-06-30 Thread Stefan Beller
We call print_stat_summary from builtin/apply, so we still
need the version with a file pointer, so introduce
print_stat_summary_0 that uses emit_string machinery and
keep print_stat_summary with the same arguments around.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 114 +
 diff.h |   4 +--
 2 files changed, 74 insertions(+), 44 deletions(-)

diff --git a/diff.c b/diff.c
index f5a14359ae..76d4b8ebf9 100644
--- a/diff.c
+++ b/diff.c
@@ -566,6 +566,10 @@ enum diff_symbol {
DIFF_SYMBOL_BINARY_DIFF_HEADER_LITERAL,
DIFF_SYMBOL_BINARY_DIFF_BODY,
DIFF_SYMBOL_BINARY_DIFF_FOOTER,
+   DIFF_SYMBOL_STATS_SUMMARY_NO_FILES,
+   DIFF_SYMBOL_STATS_SUMMARY_ABBREV,
+   DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES,
+   DIFF_SYMBOL_STATS_LINE,
DIFF_SYMBOL_SUBMODULE_ADD,
DIFF_SYMBOL_SUBMODULE_DEL,
DIFF_SYMBOL_SUBMODULE_UNTRACKED,
@@ -629,6 +633,7 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
 {
static const char *nneof = " No newline at end of file\n";
const char *context, *reset, *set, *meta, *fraginfo;
+   struct strbuf sb = STRBUF_INIT;
switch (s) {
case DIFF_SYMBOL_NO_LF_EOF:
context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -640,6 +645,8 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_SUBMODULE_HEADER:
case DIFF_SYMBOL_SUBMODULE_ERROR:
case DIFF_SYMBOL_SUBMODULE_PIPETHROUGH:
+   case DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES:
+   case DIFF_SYMBOL_STATS_LINE:
case DIFF_SYMBOL_BINARY_DIFF_BODY:
case DIFF_SYMBOL_CONTEXT_FRAGINFO:
emit_line(o, "", "", line, len);
@@ -748,9 +755,17 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
fprintf(o->file, "%sSubmodule %s contains modified content\n",
diff_line_prefix(o), line);
break;
+   case DIFF_SYMBOL_STATS_SUMMARY_NO_FILES:
+   emit_line(o, "", "", " 0 files changed\n",
+ strlen(" 0 files changed\n"));
+   break;
+   case DIFF_SYMBOL_STATS_SUMMARY_ABBREV:
+   emit_line(o, "", "", " ...\n", strlen(" ...\n"));
+   break;
default:
die("BUG: unknown diff symbol");
}
+   strbuf_release();
 }
 
 void diff_emit_submodule_del(struct diff_options *o, const char *line)
@@ -1705,20 +1720,14 @@ static int scale_linear(int it, int width, int 
max_change)
return 1 + (it * (width - 1) / max_change);
 }
 
-static void show_name(FILE *file,
- const char *prefix, const char *name, int len)
-{
-   fprintf(file, " %s%-*s |", prefix, len, name);
-}
-
-static void show_graph(FILE *file, char ch, int cnt, const char *set, const 
char *reset)
+static void show_graph(struct strbuf *out, char ch, int cnt,
+  const char *set, const char *reset)
 {
if (cnt <= 0)
return;
-   fprintf(file, "%s", set);
-   while (cnt--)
-   putc(ch, file);
-   fprintf(file, "%s", reset);
+   strbuf_addstr(out, set);
+   strbuf_addchars(out, ch, cnt);
+   strbuf_addstr(out, reset);
 }
 
 static void fill_print_name(struct diffstat_file *file)
@@ -1742,14 +1751,16 @@ static void fill_print_name(struct diffstat_file *file)
file->print_name = pname;
 }
 
-int print_stat_summary(FILE *fp, int files, int insertions, int deletions)
+static void print_stat_summary_inserts_deletes(struct diff_options *options,
+   int files, int insertions, int deletions)
 {
struct strbuf sb = STRBUF_INIT;
-   int ret;
 
if (!files) {
assert(insertions == 0 && deletions == 0);
-   return fprintf(fp, "%s\n", " 0 files changed");
+   emit_diff_symbol(options, DIFF_SYMBOL_STATS_SUMMARY_NO_FILES,
+NULL, 0, 0);
+   return;
}
 
strbuf_addf(,
@@ -1776,9 +1787,19 @@ int print_stat_summary(FILE *fp, int files, int 
insertions, int deletions)
deletions);
}
strbuf_addch(, '\n');
-   ret = fputs(sb.buf, fp);
+   emit_diff_symbol(options, DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES,
+sb.buf, sb.len, 0);
strbuf_release();
-   return ret;
+}
+
+void print_stat_summary(FILE *fp, int files,
+   int insertions, int deletions)
+{
+   struct diff_options o;
+   memset(, 0, sizeof(o));
+   o.file = fp;
+
+   print_stat_summary_inserts_deletes(, files, insertions, deletions);
 }
 
 static void show_stats(struct diffstat_t *data, struct diff_options *options)
@@ -1788,13 +1809,13 @@ static void show_stats(struct diffstat_t *data, 

[PATCHv2 18/25] diff.c: convert word diffing to use emit_diff_symbol

2017-06-30 Thread Stefan Beller
The word diffing is not line oriented and would need some serious
effort to be transformed into a line oriented approach, so
just go with a symbol DIFF_SYMBOL_WORD_DIFF that is a partial line.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 79 ++
 1 file changed, 46 insertions(+), 33 deletions(-)

diff --git a/diff.c b/diff.c
index 76d4b8ebf9..e0d39d04da 100644
--- a/diff.c
+++ b/diff.c
@@ -570,6 +570,7 @@ enum diff_symbol {
DIFF_SYMBOL_STATS_SUMMARY_ABBREV,
DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES,
DIFF_SYMBOL_STATS_LINE,
+   DIFF_SYMBOL_WORD_DIFF,
DIFF_SYMBOL_SUBMODULE_ADD,
DIFF_SYMBOL_SUBMODULE_DEL,
DIFF_SYMBOL_SUBMODULE_UNTRACKED,
@@ -762,6 +763,9 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_STATS_SUMMARY_ABBREV:
emit_line(o, "", "", " ...\n", strlen(" ...\n"));
break;
+   case DIFF_SYMBOL_WORD_DIFF:
+   fprintf(o->file, "%.*s", len, line);
+   break;
default:
die("BUG: unknown diff symbol");
}
@@ -1092,37 +1096,49 @@ struct diff_words_data {
struct diff_words_style *style;
 };
 
-static int fn_out_diff_words_write_helper(FILE *fp,
+static int fn_out_diff_words_write_helper(struct diff_options *o,
  struct diff_words_style_elem *st_el,
  const char *newline,
- size_t count, const char *buf,
- const char *line_prefix)
+ size_t count, const char *buf)
 {
int print = 0;
+   struct strbuf sb = STRBUF_INIT;
 
while (count) {
char *p = memchr(buf, '\n', count);
if (print)
-   fputs(line_prefix, fp);
+   strbuf_addstr(, diff_line_prefix(o));
+
if (p != buf) {
-   if (st_el->color && fputs(st_el->color, fp) < 0)
-   return -1;
-   if (fputs(st_el->prefix, fp) < 0 ||
-   fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
-   fputs(st_el->suffix, fp) < 0)
-   return -1;
-   if (st_el->color && *st_el->color
-   && fputs(GIT_COLOR_RESET, fp) < 0)
-   return -1;
+   const char *reset = st_el->color && *st_el->color ?
+   GIT_COLOR_RESET : NULL;
+   if (st_el->color && *st_el->color)
+   strbuf_addstr(, st_el->color);
+   strbuf_addstr(, st_el->prefix);
+   strbuf_add(, buf, p ? p - buf : count);
+   strbuf_addstr(, st_el->suffix);
+   if (reset)
+   strbuf_addstr(, reset);
}
if (!p)
-   return 0;
-   if (fputs(newline, fp) < 0)
-   return -1;
+   goto out;
+
+   strbuf_addstr(, newline);
count -= p + 1 - buf;
buf = p + 1;
print = 1;
+   if (count) {
+   emit_diff_symbol(o, DIFF_SYMBOL_WORD_DIFF,
+sb.buf, sb.len, 0);
+   strbuf_reset();
+   }
}
+
+out:
+   if (sb.len)
+   emit_diff_symbol(o, DIFF_SYMBOL_WORD_DIFF,
+sb.buf, sb.len, 0);
+   strbuf_release();
return 0;
 }
 
@@ -1204,24 +1220,20 @@ static void fn_out_diff_words_aux(void *priv, char 
*line, unsigned long len)
fputs(line_prefix, diff_words->opt->file);
}
if (diff_words->current_plus != plus_begin) {
-   fn_out_diff_words_write_helper(diff_words->opt->file,
+   fn_out_diff_words_write_helper(diff_words->opt,
>ctx, style->newline,
plus_begin - diff_words->current_plus,
-   diff_words->current_plus, line_prefix);
-   if (*(plus_begin - 1) == '\n')
-   fputs(line_prefix, diff_words->opt->file);
+   diff_words->current_plus);
}
if (minus_begin != minus_end) {
-   fn_out_diff_words_write_helper(diff_words->opt->file,
+   fn_out_diff_words_write_helper(diff_words->opt,
>old, style->newline,
-   minus_end - minus_begin, minus_begin,
-   

[PATCHv2 13/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_BINARY_FILES

2017-06-30 Thread Stefan Beller
we could save a little bit of memory when buffering in a later mode
by just passing the inner part ("%s and %s", file1, file 2), but
those a just a few bytes, so instead let's reuse the implementation from
DIFF_SYMBOL_HEADER and keep the whole line around.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index 78f7c6f82f..1d0f8b509f 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,7 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_BINARY_FILES,
DIFF_SYMBOL_HEADER,
DIFF_SYMBOL_FILEPAIR_PLUS,
DIFF_SYMBOL_FILEPAIR_MINUS,
@@ -690,6 +691,7 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
line, reset,
strchr(line, ' ') ? "\t" : "");
break;
+   case DIFF_SYMBOL_BINARY_FILES:
case DIFF_SYMBOL_HEADER:
fprintf(o->file, "%s", line);
break;
@@ -2542,6 +2544,7 @@ static void builtin_diff(const char *name_a,
} else if (!DIFF_OPT_TST(o, TEXT) &&
( (!textconv_one && diff_filespec_is_binary(one)) ||
  (!textconv_two && diff_filespec_is_binary(two)) )) {
+   struct strbuf sb = STRBUF_INIT;
if (!one->data && !two->data &&
S_ISREG(one->mode) && S_ISREG(two->mode) &&
!DIFF_OPT_TST(o, BINARY)) {
@@ -2554,8 +2557,11 @@ static void builtin_diff(const char *name_a,
}
emit_diff_symbol(o, DIFF_SYMBOL_HEADER,
 header.buf, header.len, 0);
-   fprintf(o->file, "%sBinary files %s and %s differ\n",
-   line_prefix, lbl[0], lbl[1]);
+   strbuf_addf(, "%sBinary files %s and %s differ\n",
+   diff_line_prefix(o), lbl[0], lbl[1]);
+   emit_diff_symbol(o, DIFF_SYMBOL_BINARY_FILES,
+sb.buf, sb.len, 0);
+   strbuf_release();
goto free_ab_and_return;
}
if (fill_mmfile(, one) < 0 || fill_mmfile(, two) < 0)
@@ -2572,9 +2578,13 @@ static void builtin_diff(const char *name_a,
strbuf_reset();
if (DIFF_OPT_TST(o, BINARY))
emit_binary_diff(o->file, , , line_prefix);
-   else
-   fprintf(o->file, "%sBinary files %s and %s differ\n",
-   line_prefix, lbl[0], lbl[1]);
+   else {
+   strbuf_addf(, "%sBinary files %s and %s differ\n",
+   diff_line_prefix(o), lbl[0], lbl[1]);
+   emit_diff_symbol(o, DIFF_SYMBOL_BINARY_FILES,
+sb.buf, sb.len, 0);
+   strbuf_release();
+   }
o->found_changes = 1;
} else {
/* Crazy xdl interfaces.. */
-- 
2.13.0.31.g9b732c453e



[PATCHv2 15/25] submodule.c: migrate diff output to use emit_diff_symbol

2017-06-30 Thread Stefan Beller
As the submodule process is no longer attached to the same file pointer
'o->file' as the superprojects process, there is a different result in
color.c::check_auto_color. That is why we need to pass coloring explicitly,
such that the submodule coloring decision will be made by the child process
processing the submodule. Only DIFF_SYMBOL_SUBMODULE_PIPETHROUGH contains
color, the other symbols are for embedding the submodule output into the
superprojects output.

Remove the colors from the function signatures, as all the coloring
decisions will be made either inside the child process or the final
emit_diff_symbol, but not in the functions driving the submodule diff.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c  | 83 +++-
 diff.h  |  9 +++
 submodule.c | 84 +++--
 submodule.h | 13 +++---
 4 files changed, 121 insertions(+), 68 deletions(-)

diff --git a/diff.c b/diff.c
index 5c428e02b6..48f719fb07 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,13 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_SUBMODULE_ADD,
+   DIFF_SYMBOL_SUBMODULE_DEL,
+   DIFF_SYMBOL_SUBMODULE_UNTRACKED,
+   DIFF_SYMBOL_SUBMODULE_MODIFIED,
+   DIFF_SYMBOL_SUBMODULE_HEADER,
+   DIFF_SYMBOL_SUBMODULE_ERROR,
+   DIFF_SYMBOL_SUBMODULE_PIPETHROUGH,
DIFF_SYMBOL_REWRITE_DIFF,
DIFF_SYMBOL_BINARY_FILES,
DIFF_SYMBOL_HEADER,
@@ -625,6 +632,9 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
emit_line_0(o, context, reset, '\\',
nneof, strlen(nneof));
break;
+   case DIFF_SYMBOL_SUBMODULE_HEADER:
+   case DIFF_SYMBOL_SUBMODULE_ERROR:
+   case DIFF_SYMBOL_SUBMODULE_PIPETHROUGH:
case DIFF_SYMBOL_CONTEXT_FRAGINFO:
emit_line(o, "", "", line, len);
break;
@@ -701,11 +711,68 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
reset = diff_get_color_opt(o, DIFF_RESET);
emit_line(o, fraginfo, reset, line, len);
break;
+   case DIFF_SYMBOL_SUBMODULE_ADD:
+   set = diff_get_color_opt(o, DIFF_FILE_NEW);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line(o, set, reset, line, len);
+   break;
+   case DIFF_SYMBOL_SUBMODULE_DEL:
+   set = diff_get_color_opt(o, DIFF_FILE_OLD);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line(o, set, reset, line, len);
+   break;
+   case DIFF_SYMBOL_SUBMODULE_UNTRACKED:
+   fprintf(o->file, "%sSubmodule %s contains untracked content\n",
+   diff_line_prefix(o), line);
+   break;
+   case DIFF_SYMBOL_SUBMODULE_MODIFIED:
+   fprintf(o->file, "%sSubmodule %s contains modified content\n",
+   diff_line_prefix(o), line);
+   break;
default:
die("BUG: unknown diff symbol");
}
 }
 
+void diff_emit_submodule_del(struct diff_options *o, const char *line)
+{
+   emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_DEL, line, strlen(line), 0);
+}
+
+void diff_emit_submodule_add(struct diff_options *o, const char *line)
+{
+   emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_ADD, line, strlen(line), 0);
+}
+
+void diff_emit_submodule_untracked(struct diff_options *o, const char *path)
+{
+   emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_UNTRACKED,
+path, strlen(path), 0);
+}
+
+void diff_emit_submodule_modified(struct diff_options *o, const char *path)
+{
+   emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_MODIFIED,
+path, strlen(path), 0);
+}
+
+void diff_emit_submodule_header(struct diff_options *o, const char *header)
+{
+   emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_HEADER,
+header, strlen(header), 0);
+}
+
+void diff_emit_submodule_error(struct diff_options *o, const char *err)
+{
+   emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_ERROR, err, strlen(err), 0);
+}
+
+void diff_emit_submodule_pipethrough(struct diff_options *o,
+const char *line, int len)
+{
+   emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_PIPETHROUGH, line, len, 0);
+}
+
 static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char 
*line, int len)
 {
if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) &&
@@ -2467,24 +2534,16 @@ static void builtin_diff(const char *name_a,
if (o->submodule_format == DIFF_SUBMODULE_LOG &&
(!one->mode || S_ISGITLINK(one->mode)) &&
(!two->mode || S_ISGITLINK(two->mode))) {
-   const char *del = 

[PATCHv2 20/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_SUMMARY

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 71 ++
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/diff.c b/diff.c
index 5a9c55736d..2db0d7c0f5 100644
--- a/diff.c
+++ b/diff.c
@@ -572,6 +572,7 @@ enum diff_symbol {
DIFF_SYMBOL_STATS_LINE,
DIFF_SYMBOL_WORD_DIFF,
DIFF_SYMBOL_STAT_SEP,
+   DIFF_SYMBOL_SUMMARY,
DIFF_SYMBOL_SUBMODULE_ADD,
DIFF_SYMBOL_SUBMODULE_DEL,
DIFF_SYMBOL_SUBMODULE_UNTRACKED,
@@ -648,6 +649,7 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
case DIFF_SYMBOL_SUBMODULE_ERROR:
case DIFF_SYMBOL_SUBMODULE_PIPETHROUGH:
case DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES:
+   case DIFF_SYMBOL_SUMMARY:
case DIFF_SYMBOL_STATS_LINE:
case DIFF_SYMBOL_BINARY_DIFF_BODY:
case DIFF_SYMBOL_CONTEXT_FRAGINFO:
@@ -4717,67 +4719,76 @@ static void flush_one_pair(struct diff_filepair *p, 
struct diff_options *opt)
}
 }
 
-static void show_file_mode_name(FILE *file, const char *newdelete, struct 
diff_filespec *fs)
+static void show_file_mode_name(struct diff_options *opt, const char 
*newdelete, struct diff_filespec *fs)
 {
+   struct strbuf sb = STRBUF_INIT;
if (fs->mode)
-   fprintf(file, " %s mode %06o ", newdelete, fs->mode);
+   strbuf_addf(, " %s mode %06o ", newdelete, fs->mode);
else
-   fprintf(file, " %s ", newdelete);
-   write_name_quoted(fs->path, file, '\n');
-}
+   strbuf_addf(, " %s ", newdelete);
 
+   quote_c_style(fs->path, , NULL, 0);
+   strbuf_addch(, '\n');
+   emit_diff_symbol(opt, DIFF_SYMBOL_SUMMARY,
+sb.buf, sb.len, 0);
+   strbuf_release();
+}
 
-static void show_mode_change(FILE *file, struct diff_filepair *p, int 
show_name,
-   const char *line_prefix)
+static void show_mode_change(struct diff_options *opt, struct diff_filepair *p,
+   int show_name)
 {
if (p->one->mode && p->two->mode && p->one->mode != p->two->mode) {
-   fprintf(file, "%s mode change %06o => %06o%c", line_prefix, 
p->one->mode,
-   p->two->mode, show_name ? ' ' : '\n');
+   struct strbuf sb = STRBUF_INIT;
+   strbuf_addf(, " mode change %06o => %06o",
+   p->one->mode, p->two->mode);
if (show_name) {
-   write_name_quoted(p->two->path, file, '\n');
+   strbuf_addch(, ' ');
+   quote_c_style(p->two->path, , NULL, 0);
}
+   emit_diff_symbol(opt, DIFF_SYMBOL_SUMMARY,
+sb.buf, sb.len, 0);
+   strbuf_release();
}
 }
 
-static void show_rename_copy(FILE *file, const char *renamecopy, struct 
diff_filepair *p,
-   const char *line_prefix)
+static void show_rename_copy(struct diff_options *opt, const char *renamecopy,
+   struct diff_filepair *p)
 {
+   struct strbuf sb = STRBUF_INIT;
char *names = pprint_rename(p->one->path, p->two->path);
-
-   fprintf(file, " %s %s (%d%%)\n", renamecopy, names, 
similarity_index(p));
+   strbuf_addf(, " %s %s (%d%%)\n",
+   renamecopy, names, similarity_index(p));
free(names);
-   show_mode_change(file, p, 0, line_prefix);
+   emit_diff_symbol(opt, DIFF_SYMBOL_SUMMARY,
+sb.buf, sb.len, 0);
+   show_mode_change(opt, p, 0);
 }
 
 static void diff_summary(struct diff_options *opt, struct diff_filepair *p)
 {
-   FILE *file = opt->file;
-   const char *line_prefix = diff_line_prefix(opt);
-
switch(p->status) {
case DIFF_STATUS_DELETED:
-   fputs(line_prefix, file);
-   show_file_mode_name(file, "delete", p->one);
+   show_file_mode_name(opt, "delete", p->one);
break;
case DIFF_STATUS_ADDED:
-   fputs(line_prefix, file);
-   show_file_mode_name(file, "create", p->two);
+   show_file_mode_name(opt, "create", p->two);
break;
case DIFF_STATUS_COPIED:
-   fputs(line_prefix, file);
-   show_rename_copy(file, "copy", p, line_prefix);
+   show_rename_copy(opt, "copy", p);
break;
case DIFF_STATUS_RENAMED:
-   fputs(line_prefix, file);
-   show_rename_copy(file, "rename", p, line_prefix);
+   show_rename_copy(opt, "rename", p);
break;
default:
if (p->score) {
-   fprintf(file, "%s rewrite ", line_prefix);
-   write_name_quoted(p->two->path, file, ' ');
-   fprintf(file, "(%d%%)\n", 

[PATCHv2 02/25] diff.c: move line ending check into emit_hunk_header

2017-06-30 Thread Stefan Beller
The emit_hunk_header() function is responsible for assembling a
hunk header and calling emit_line() to send the hunk header
to the output file.  Its only caller fn_out_consume() needs
to prepare for a case where the function emits an incomplete
line and add the terminating LF.

Instead make sure emit_hunk_header() to always send a
completed line to emit_line().

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 2874dfc6fc..94fdb57927 100644
--- a/diff.c
+++ b/diff.c
@@ -678,6 +678,8 @@ static void emit_hunk_header(struct emit_callback *ecbdata,
}
 
strbuf_add(, line + len, org_len - len);
+   strbuf_complete_line();
+
emit_line(ecbdata->opt, "", "", msgbuf.buf, msgbuf.len);
strbuf_release();
 }
@@ -1315,8 +1317,6 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
len = sane_truncate_line(ecbdata, line, len);
find_lno(line, ecbdata);
emit_hunk_header(ecbdata, line, len);
-   if (line[len-1] != '\n')
-   putc('\n', o->file);
return;
}
 
-- 
2.13.0.31.g9b732c453e



[PATCHv2 07/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_NO_LF_EOF

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/diff.c b/diff.c
index 6e48a129ed..488096b757 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,7 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_NO_LF_EOF,
DIFF_SYMBOL_CONTEXT_FRAGINFO,
DIFF_SYMBOL_CONTEXT_MARKER,
DIFF_SYMBOL_SEPARATOR
@@ -569,8 +570,16 @@ enum diff_symbol {
 static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
 const char *line, int len)
 {
+   static const char *nneof = " No newline at end of file\n";
const char *context, *reset;
switch (s) {
+   case DIFF_SYMBOL_NO_LF_EOF:
+   context = diff_get_color_opt(o, DIFF_CONTEXT);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   putc('\n', o->file);
+   emit_line_0(o, context, reset, '\\',
+   nneof, strlen(nneof));
+   break;
case DIFF_SYMBOL_CONTEXT_FRAGINFO:
emit_line(o, "", "", line, len);
break;
@@ -751,7 +760,6 @@ static void emit_rewrite_lines(struct emit_callback *ecb,
   int prefix, const char *data, int size)
 {
const char *endp = NULL;
-   static const char *nneof = " No newline at end of file\n";
const char *reset = diff_get_color(ecb->color_diff, DIFF_RESET);
 
while (0 < size) {
@@ -769,13 +777,8 @@ static void emit_rewrite_lines(struct emit_callback *ecb,
size -= len;
data += len;
}
-   if (!endp) {
-   const char *context = diff_get_color(ecb->color_diff,
-DIFF_CONTEXT);
-   putc('\n', ecb->opt->file);
-   emit_line_0(ecb->opt, context, reset, '\\',
-   nneof, strlen(nneof));
-   }
+   if (!endp)
+   emit_diff_symbol(ecb->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0);
 }
 
 static void emit_rewrite_diff(const char *name_a,
-- 
2.13.0.31.g9b732c453e



[PATCHv2 01/25] diff.c: readability fix

2017-06-30 Thread Stefan Beller
We already have dereferenced 'p->two' into a local variable 'two'.
Use that.

Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 00b4c86698..2874dfc6fc 100644
--- a/diff.c
+++ b/diff.c
@@ -3282,8 +3282,8 @@ static void run_diff(struct diff_filepair *p, struct 
diff_options *o)
const char *other;
const char *attr_path;
 
-   name  = p->one->path;
-   other = (strcmp(name, p->two->path) ? p->two->path : NULL);
+   name  = one->path;
+   other = (strcmp(name, two->path) ? two->path : NULL);
attr_path = name;
if (o->prefix_length)
strip_prefix(o->prefix_length, , );
-- 
2.13.0.31.g9b732c453e



[PATCHv2 05/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_MARKER

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index 4637368d59..75b996c4cf 100644
--- a/diff.c
+++ b/diff.c
@@ -561,13 +561,20 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_CONTEXT_MARKER,
DIFF_SYMBOL_SEPARATOR
 };
 
 static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
 const char *line, int len)
 {
+   const char *context, *reset;
switch (s) {
+   case DIFF_SYMBOL_CONTEXT_MARKER:
+   context = diff_get_color_opt(o, DIFF_CONTEXT);
+   reset = diff_get_color_opt(o, DIFF_RESET);
+   emit_line(o, context, reset, line, len);
+   break;
case DIFF_SYMBOL_SEPARATOR:
fprintf(o->file, "%s%c",
diff_line_prefix(o),
@@ -662,7 +669,8 @@ static void emit_hunk_header(struct emit_callback *ecbdata,
if (len < 10 ||
memcmp(line, atat, 2) ||
!(ep = memmem(line + 2, len - 2, atat, 2))) {
-   emit_line(ecbdata->opt, context, reset, line, len);
+   emit_diff_symbol(ecbdata->opt,
+DIFF_SYMBOL_CONTEXT_MARKER, line, len);
return;
}
ep += 2; /* skip over @@ */
-- 
2.13.0.31.g9b732c453e



[PATCHv2 06/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_FRAGINFO

2017-06-30 Thread Stefan Beller
Signed-off-by: Stefan Beller 
Signed-off-by: Junio C Hamano 
---
 diff.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 75b996c4cf..6e48a129ed 100644
--- a/diff.c
+++ b/diff.c
@@ -561,6 +561,7 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
 }
 
 enum diff_symbol {
+   DIFF_SYMBOL_CONTEXT_FRAGINFO,
DIFF_SYMBOL_CONTEXT_MARKER,
DIFF_SYMBOL_SEPARATOR
 };
@@ -570,6 +571,9 @@ static void emit_diff_symbol(struct diff_options *o, enum 
diff_symbol s,
 {
const char *context, *reset;
switch (s) {
+   case DIFF_SYMBOL_CONTEXT_FRAGINFO:
+   emit_line(o, "", "", line, len);
+   break;
case DIFF_SYMBOL_CONTEXT_MARKER:
context = diff_get_color_opt(o, DIFF_CONTEXT);
reset = diff_get_color_opt(o, DIFF_RESET);
@@ -705,8 +709,8 @@ static void emit_hunk_header(struct emit_callback *ecbdata,
 
strbuf_add(, line + len, org_len - len);
strbuf_complete_line();
-
-   emit_line(ecbdata->opt, "", "", msgbuf.buf, msgbuf.len);
+   emit_diff_symbol(ecbdata->opt,
+DIFF_SYMBOL_CONTEXT_FRAGINFO, msgbuf.buf, msgbuf.len);
strbuf_release();
 }
 
-- 
2.13.0.31.g9b732c453e



[PATCHv2 00/25] Reroll of sb/diff-color-moved.

2017-06-30 Thread Stefan Beller
v2:

* on top of the new hashmap patches[3]. It compiles when put on older versions
  of the hashmap series, as the hashmap compare function just swapped void 
pointers
  in the definition.
  
* better documentation, Thanks Simon!
  ... If there
  are fewer than 3 adjacent moved lines, they are not marked up
  as moved, but the regular colors 'color.diff.{old,new}' will be
  used. ...
  
* clarified the compare function (casting the return to unsigned char, such
  that 0-255 is from the string, -1 indicated EOF. Adapted the checks for that.
  
Thanks,
Stefan

[3] https://public-inbox.org/git/20170630191407.5381-1-sbel...@google.com/

v1:

Reroll of sb/diff-color-moved, applies on top of the hashmap patches[1].

* Supports different whitespace modes correctly now (and has a test for trivial
  white space changes)
* The fixups[2] have been squashed.

Thanks,
Stefan

[1] https://public-inbox.org/git/20170629235336.28460-1-sbel...@google.com/
[2] https://public-inbox.org/git/20170628005651.8110-1-sbel...@google.com/

Stefan Beller (25):
  diff.c: readability fix
  diff.c: move line ending check into emit_hunk_header
  diff.c: factor out diff_flush_patch_all_file_pairs
  diff.c: introduce emit_diff_symbol
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_MARKER
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_FRAGINFO
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_NO_LF_EOF
  diff.c: migrate emit_line_checked to use emit_diff_symbol
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_WORDS[_PORCELAIN]
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_INCOMPLETE
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_FILEPAIR_{PLUS, MINUS}
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_HEADER
  diff.c: emit_diff_symbol learns about DIFF_SYMBOL_BINARY_FILES
  diff.c: emit_diff_symbol learns DIFF_SYMBOL_REWRITE_DIFF
  submodule.c: migrate diff output to use emit_diff_symbol
  diff.c: convert emit_binary_diff_body to use emit_diff_symbol
  diff.c: convert show_stats to use emit_diff_symbol
  diff.c: convert word diffing to use emit_diff_symbol
  diff.c: emit_diff_symbol learns about DIFF_SYMBOL_STAT_SEP
  diff.c: emit_diff_symbol learns about DIFF_SYMBOL_SUMMARY
  diff.c: buffer all output if asked to
  diff.c: color moved lines differently
  diff.c: color moved lines differently, plain mode
  diff.c: add dimming to moved line detection
  diff: document the new --color-moved setting

 Documentation/config.txt   |   15 +-
 Documentation/diff-options.txt |   36 ++
 cache.h|2 +
 color.h|2 +
 diff.c | 1309 
 diff.h |   39 +-
 submodule.c|   84 ++-
 submodule.h|   13 +-
 t/t4015-diff-whitespace.sh |  434 +
 9 files changed, 1618 insertions(+), 316 deletions(-)

-- 
2.13.0.31.g9b732c453e



Re: [PATCH v8 6/6] convert: add "status=delayed" to filter process protocol

2017-06-30 Thread Lars Schneider

> On 30 Jun 2017, at 20:19, Junio C Hamano  wrote:
> 
> Lars Schneider  writes:
> 
>>> On 29 Jun 2017, at 20:34, Junio C Hamano  wrote:
>>> 
>>> You seem to have squashed an unrelated "table-driven" clean-up into
>>> this step.  While I think the end result looks good, I would have
>>> liked to see it as a separate step, either as a preparatory "now we
>>> are going to add the third one, let's make it table-driven before
>>> doing so" step, or a follow-up "now we have done everything we
>>> wanted to do, let's make this clean-up at the end" step.
>> 
>> I am sorry. That was a misunderstanding - I thought you want to have 
>> this change squashed. 
>> 
>> The "preparatory" patch sounds good! 
>> Would you be OK if I send a v9 with this change?
> 
> That would be more than OK.  I think we are pretty much ready to hit
> 'next'.

Done: 
http://public-inbox.org/git/20170630204128.48708-1-larsxschnei...@gmail.com/

My brain is fried already today ... I hope the commit message is OK :)

Thanks,
Lars

[PATCH v9 2/7] t0021: make debug log file name configurable

2017-06-30 Thread Lars Schneider
The "rot13-filter.pl" helper wrote its debug logs always to "rot13-filter.log".
Make this configurable by defining the log file as first parameter of
"rot13-filter.pl".

This is useful if "rot13-filter.pl" is configured multiple times similar to the
subsequent patch 'convert: add "status=delayed" to filter process protocol'.

Signed-off-by: Lars Schneider 
---
 t/t0021-conversion.sh   | 44 ++--
 t/t0021/rot13-filter.pl |  8 +---
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index ff2424225b..0139b460e7 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -28,7 +28,7 @@ file_size () {
 }
 
 filter_git () {
-   rm -f rot13-filter.log &&
+   rm -f *.log &&
git "$@"
 }
 
@@ -342,7 +342,7 @@ test_expect_success 'diff does not reuse worktree files 
that need cleaning' '
 '
 
 test_expect_success PERL 'required process filter should filter data' '
-   test_config_global filter.protocol.process "rot13-filter.pl clean 
smudge" &&
+   test_config_global filter.protocol.process "rot13-filter.pl debug.log 
clean smudge" &&
test_config_global filter.protocol.required true &&
rm -rf repo &&
mkdir repo &&
@@ -375,7 +375,7 @@ test_expect_success PERL 'required process filter should 
filter data' '
IN: clean testsubdir/test3 '\''sq'\'',\$x=.r $S3 [OK] 
-- OUT: $S3 . [OK]
STOP
EOF
-   test_cmp_count expected.log rot13-filter.log &&
+   test_cmp_count expected.log debug.log &&
 
git commit -m "test commit 2" &&
rm -f test2.r "testsubdir/test3 '\''sq'\'',\$x=.r" &&
@@ -388,7 +388,7 @@ test_expect_success PERL 'required process filter should 
filter data' '
IN: smudge testsubdir/test3 '\''sq'\'',\$x=.r $S3 [OK] 
-- OUT: $S3 . [OK]
STOP
EOF
-   test_cmp_exclude_clean expected.log rot13-filter.log &&
+   test_cmp_exclude_clean expected.log debug.log &&
 
filter_git checkout --quiet --no-progress empty-branch &&
cat >expected.log <<-EOF &&
@@ -397,7 +397,7 @@ test_expect_success PERL 'required process filter should 
filter data' '
IN: clean test.r $S [OK] -- OUT: $S . [OK]
STOP
EOF
-   test_cmp_exclude_clean expected.log rot13-filter.log &&
+   test_cmp_exclude_clean expected.log debug.log &&
 
filter_git checkout --quiet --no-progress master &&
cat >expected.log <<-EOF &&
@@ -409,7 +409,7 @@ test_expect_success PERL 'required process filter should 
filter data' '
IN: smudge testsubdir/test3 '\''sq'\'',\$x=.r $S3 [OK] 
-- OUT: $S3 . [OK]
STOP
EOF
-   test_cmp_exclude_clean expected.log rot13-filter.log &&
+   test_cmp_exclude_clean expected.log debug.log &&
 
test_cmp_committed_rot13 "$TEST_ROOT/test.o" test.r &&
test_cmp_committed_rot13 "$TEST_ROOT/test2.o" test2.r &&
@@ -419,7 +419,7 @@ test_expect_success PERL 'required process filter should 
filter data' '
 
 test_expect_success PERL 'required process filter takes precedence' '
test_config_global filter.protocol.clean false &&
-   test_config_global filter.protocol.process "rot13-filter.pl clean" &&
+   test_config_global filter.protocol.process "rot13-filter.pl debug.log 
clean" &&
test_config_global filter.protocol.required true &&
rm -rf repo &&
mkdir repo &&
@@ -439,12 +439,12 @@ test_expect_success PERL 'required process filter takes 
precedence' '
IN: clean test.r $S [OK] -- OUT: $S . [OK]
STOP
EOF
-   test_cmp_count expected.log rot13-filter.log
+   test_cmp_count expected.log debug.log
)
 '
 
 test_expect_success PERL 'required process filter should be used only for 
"clean" operation only' '
-   test_config_global filter.protocol.process "rot13-filter.pl clean" &&
+   test_config_global filter.protocol.process "rot13-filter.pl debug.log 
clean" &&
rm -rf repo &&
mkdir repo &&
(
@@ -462,7 +462,7 @@ test_expect_success PERL 'required process filter should be 
used only for "clean
IN: clean test.r $S [OK] -- OUT: $S . [OK]
STOP
EOF
-   test_cmp_count expected.log rot13-filter.log &&
+   test_cmp_count expected.log debug.log &&
 
rm test.r &&
 
@@ -474,12 +474,12 @@ test_expect_success PERL 'required process filter should 
be used only for "clean
init handshake complete
STOP

[PATCH v9 6/7] convert: refactor capabilities negotiation

2017-06-30 Thread Lars Schneider
The code to negotiate long running filter capabilities was very
repetitive for new capabilities. Replace the repetitive conditional
statements with a table-driven approach. This is useful for the
subsequent patch 'convert: add "status=delayed" to filter process
protocol'.

Suggested-by: Junio C Hamano 
---
 convert.c | 39 +++
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/convert.c b/convert.c
index e55c034d86..d13e505dfb 100644
--- a/convert.c
+++ b/convert.c
@@ -507,7 +507,7 @@ static struct hashmap subprocess_map;
 
 static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
 {
-   int err;
+   int err, i;
struct cmd2process *entry = (struct cmd2process *)subprocess;
struct string_list cap_list = STRING_LIST_INIT_NODUP;
char *cap_buf;
@@ -515,6 +515,14 @@ static int start_multi_file_filter_fn(struct 
subprocess_entry *subprocess)
struct child_process *process = >process;
const char *cmd = subprocess->cmd;
 
+   static const struct {
+   const char *name;
+   unsigned int cap;
+   } known_caps[] = {
+   { "clean",  CAP_CLEAN  },
+   { "smudge", CAP_SMUDGE },
+   };
+
sigchain_push(SIGPIPE, SIG_IGN);
 
err = packet_writel(process->in, "git-filter-client", "version=2", 
NULL);
@@ -533,7 +541,15 @@ static int start_multi_file_filter_fn(struct 
subprocess_entry *subprocess)
if (err)
goto done;
 
-   err = packet_writel(process->in, "capability=clean", 
"capability=smudge", NULL);
+   for (i = 0; i < ARRAY_SIZE(known_caps); ++i) {
+   err = packet_write_fmt_gently(
+   process->in, "capability=%s\n", known_caps[i].name);
+   if (err)
+   goto done;
+   }
+   err = packet_flush_gently(process->in);
+   if (err)
+   goto done;
 
for (;;) {
cap_buf = packet_read_line(process->out, NULL);
@@ -545,16 +561,15 @@ static int start_multi_file_filter_fn(struct 
subprocess_entry *subprocess)
continue;
 
cap_name = cap_list.items[1].string;
-   if (!strcmp(cap_name, "clean")) {
-   entry->supported_capabilities |= CAP_CLEAN;
-   } else if (!strcmp(cap_name, "smudge")) {
-   entry->supported_capabilities |= CAP_SMUDGE;
-   } else {
-   warning(
-   "external filter '%s' requested unsupported 
filter capability '%s'",
-   cmd, cap_name
-   );
-   }
+   i = ARRAY_SIZE(known_caps) - 1;
+   while (i >= 0 && strcmp(cap_name, known_caps[i].name))
+   i--;
+
+   if (i >= 0)
+   entry->supported_capabilities |= known_caps[i].cap;
+   else
+   warning("external filter '%s' requested unsupported 
filter capability '%s'",
+   cmd, cap_name);
 
string_list_clear(_list, 0);
}
-- 
2.13.2



[PATCH v9 7/7] convert: add "status=delayed" to filter process protocol

2017-06-30 Thread Lars Schneider
Some `clean` / `smudge` filters may require a significant amount of
time to process a single blob (e.g. the Git LFS smudge filter might
perform network requests). During this process the Git checkout
operation is blocked and Git needs to wait until the filter is done to
continue with the checkout.

Teach the filter process protocol, introduced in edcc8581 ("convert: add
filter..process option", 2016-10-16), to accept the status
"delayed" as response to a filter request. Upon this response Git
continues with the checkout operation. After the checkout operation Git
calls "finish_delayed_checkout" which queries the filter for remaining
blobs. If the filter is still working on the completion, then the filter
is expected to block. If the filter has completed all remaining blobs
then an empty response is expected.

Git has a multiple code paths that checkout a blob. Support delayed
checkouts only in `clone` (in unpack-trees.c) and `checkout` operations
for now. The optimization is most effective in these code paths as all
files of the tree are processed.

Signed-off-by: Lars Schneider 
---
 Documentation/gitattributes.txt |  69 +-
 builtin/checkout.c  |   3 +
 cache.h |   3 +
 convert.c   | 110 ++
 convert.h   |  26 +
 entry.c | 132 +-
 t/t0021-conversion.sh   | 116 +++
 t/t0021/rot13-filter.pl | 204 +++-
 unpack-trees.c  |   2 +
 9 files changed, 575 insertions(+), 90 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 4736483865..4049a0b9a8 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -425,8 +425,8 @@ packet:  git< capability=clean
 packet:  git< capability=smudge
 packet:  git< 
 
-Supported filter capabilities in version 2 are "clean" and
-"smudge".
+Supported filter capabilities in version 2 are "clean", "smudge",
+and "delay".
 
 Afterwards Git sends a list of "key=value" pairs terminated with
 a flush packet. The list will contain at least the filter command
@@ -512,12 +512,73 @@ the protocol then Git will stop the filter process and 
restart it
 with the next file that needs to be processed. Depending on the
 `filter..required` flag Git will interpret that as error.
 
-After the filter has processed a blob it is expected to wait for
-the next "key=value" list containing a command. Git will close
+After the filter has processed a command it is expected to wait for
+a "key=value" list containing the next command. Git will close
 the command pipe on exit. The filter is expected to detect EOF
 and exit gracefully on its own. Git will wait until the filter
 process has stopped.
 
+Delay
+^
+
+If the filter supports the "delay" capability, then Git can send the
+flag "can-delay" after the filter command and pathname. This flag
+denotes that the filter can delay filtering the current blob (e.g. to
+compensate network latencies) by responding with no content but with
+the status "delayed" and a flush packet.
+
+packet:  git> command=smudge
+packet:  git> pathname=path/testfile.dat
+packet:  git> can-delay=1
+packet:  git> 
+packet:  git> CONTENT
+packet:  git> 
+packet:  git< status=delayed
+packet:  git< 
+
+
+If the filter supports the "delay" capability then it must support the
+"list_available_blobs" command. If Git sends this command, then the
+filter is expected to return a list of pathnames representing blobs
+that have been delayed earlier and are now available.
+The list must be terminated with a flush packet followed
+by a "success" status that is also terminated with a flush packet. If
+no blobs for the delayed paths are available, yet, then the filter is
+expected to block the response until at least one blob becomes
+available. The filter can tell Git that it has no more delayed blobs
+by sending an empty list. As soon as the filter responds with an empty
+list, Git stops asking. All blobs that Git has not received at this
+point are considered missing and will result in an error.
+
+
+packet:  git> command=list_available_blobs
+packet:  git> 
+packet:  git< pathname=path/testfile.dat
+packet:  git< pathname=path/otherfile.dat
+packet:  git< 
+packet:  git< status=success
+packet:  git< 
+
+
+After Git received the pathnames, it will request the corresponding
+blobs again. These requests contain a pathname and an empty content
+section. The filter is expected to respond with the smudged content
+in the usual way as explained above.

[PATCH v9 5/7] convert: move multiple file filter error handling to separate function

2017-06-30 Thread Lars Schneider
Refactoring the filter error handling is useful for the subsequent patch
'convert: add "status=delayed" to filter process protocol'.

In addition, replace the parentheses around the empty "if" block with a
single semicolon to adhere to the Git style guide.

Signed-off-by: Lars Schneider 
---
 convert.c | 47 ++-
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/convert.c b/convert.c
index 9907e3b9ba..e55c034d86 100644
--- a/convert.c
+++ b/convert.c
@@ -565,6 +565,29 @@ static int start_multi_file_filter_fn(struct 
subprocess_entry *subprocess)
return err;
 }
 
+static void handle_filter_error(const struct strbuf *filter_status,
+   struct cmd2process *entry,
+   const unsigned int wanted_capability) {
+   if (!strcmp(filter_status->buf, "error"))
+   ; /* The filter signaled a problem with the file. */
+   else if (!strcmp(filter_status->buf, "abort") && wanted_capability) {
+   /*
+* The filter signaled a permanent problem. Don't try to filter
+* files with the same command for the lifetime of the current
+* Git process.
+*/
+entry->supported_capabilities &= ~wanted_capability;
+   } else {
+   /*
+* Something went wrong with the protocol filter.
+* Force shutdown and restart if another blob requires 
filtering.
+*/
+   error("external filter '%s' failed", entry->subprocess.cmd);
+   subprocess_stop(_map, >subprocess);
+   free(entry);
+   }
+}
+
 static int apply_multi_file_filter(const char *path, const char *src, size_t 
len,
   int fd, struct strbuf *dst, const char *cmd,
   const unsigned int wanted_capability)
@@ -656,28 +679,10 @@ static int apply_multi_file_filter(const char *path, 
const char *src, size_t len
 done:
sigchain_pop(SIGPIPE);
 
-   if (err) {
-   if (!strcmp(filter_status.buf, "error")) {
-   /* The filter signaled a problem with the file. */
-   } else if (!strcmp(filter_status.buf, "abort")) {
-   /*
-* The filter signaled a permanent problem. Don't try 
to filter
-* files with the same command for the lifetime of the 
current
-* Git process.
-*/
-entry->supported_capabilities &= ~wanted_capability;
-   } else {
-   /*
-* Something went wrong with the protocol filter.
-* Force shutdown and restart if another blob requires 
filtering.
-*/
-   error("external filter '%s' failed", cmd);
-   subprocess_stop(_map, >subprocess);
-   free(entry);
-   }
-   } else {
+   if (err)
+   handle_filter_error(_status, entry, wanted_capability);
+   else
strbuf_swap(dst, );
-   }
strbuf_release();
return !err;
 }
-- 
2.13.2



[PATCH v9 4/7] convert: put the flags field before the flag itself for consistent style

2017-06-30 Thread Lars Schneider
Suggested-by: Jeff King 
Signed-off-by: Lars Schneider 
---
 convert.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/convert.c b/convert.c
index f1e168bc30..9907e3b9ba 100644
--- a/convert.c
+++ b/convert.c
@@ -597,12 +597,12 @@ static int apply_multi_file_filter(const char *path, 
const char *src, size_t len
}
process = >subprocess.process;
 
-   if (!(wanted_capability & entry->supported_capabilities))
+   if (!(entry->supported_capabilities & wanted_capability))
return 0;
 
-   if (CAP_CLEAN & wanted_capability)
+   if (wanted_capability & CAP_CLEAN)
filter_type = "clean";
-   else if (CAP_SMUDGE & wanted_capability)
+   else if (wanted_capability & CAP_SMUDGE)
filter_type = "smudge";
else
die("unexpected filter type");
@@ -703,9 +703,9 @@ static int apply_filter(const char *path, const char *src, 
size_t len,
if (!dst)
return 1;
 
-   if ((CAP_CLEAN & wanted_capability) && !drv->process && drv->clean)
+   if ((wanted_capability & CAP_CLEAN) && !drv->process && drv->clean)
cmd = drv->clean;
-   else if ((CAP_SMUDGE & wanted_capability) && !drv->process && 
drv->smudge)
+   else if ((wanted_capability & CAP_SMUDGE) && !drv->process && 
drv->smudge)
cmd = drv->smudge;
 
if (cmd && *cmd)
-- 
2.13.2



[PATCH v9 3/7] t0021: write "OUT " only on success

2017-06-30 Thread Lars Schneider
"rot13-filter.pl" always writes "OUT " to the debug log at the end
of a response.

This works perfectly for the existing responses "abort", "error", and
"success". A new response "delayed", that will be introduced in a
subsequent patch, accepts the input without giving the filtered result
right away. At this point we cannot know the size of the response.
Therefore, we do not write "OUT " for "delayed" responses.

To simplify the code we do not write "OUT " for "abort" and
"error" responses either as their size is always zero.

Signed-off-by: Lars Schneider 
---
 t/t0021-conversion.sh   | 6 +++---
 t/t0021/rot13-filter.pl | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 0139b460e7..0c04d346a1 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -588,7 +588,7 @@ test_expect_success PERL 'process filter should restart 
after unexpected write f
cat >expected.log <<-EOF &&
START
init handshake complete
-   IN: smudge smudge-write-fail.r $SF [OK] -- OUT: $SF 
[WRITE FAIL]
+   IN: smudge smudge-write-fail.r $SF [OK] -- [WRITE FAIL]
START
init handshake complete
IN: smudge test.r $S [OK] -- OUT: $S . [OK]
@@ -634,7 +634,7 @@ test_expect_success PERL 'process filter should not be 
restarted if it signals a
cat >expected.log <<-EOF &&
START
init handshake complete
-   IN: smudge error.r $SE [OK] -- OUT: 0 [ERROR]
+   IN: smudge error.r $SE [OK] -- [ERROR]
IN: smudge test.r $S [OK] -- OUT: $S . [OK]
IN: smudge test2.r $S2 [OK] -- OUT: $S2 . [OK]
STOP
@@ -673,7 +673,7 @@ test_expect_success PERL 'process filter abort stops 
processing of all further f
cat >expected.log <<-EOF &&
START
init handshake complete
-   IN: smudge abort.r $SA [OK] -- OUT: 0 [ABORT]
+   IN: smudge abort.r $SA [OK] -- [ABORT]
STOP
EOF
test_cmp_exclude_clean expected.log debug.log &&
diff --git a/t/t0021/rot13-filter.pl b/t/t0021/rot13-filter.pl
index 0b943bb377..5e43faeec1 100644
--- a/t/t0021/rot13-filter.pl
+++ b/t/t0021/rot13-filter.pl
@@ -153,9 +153,6 @@ while (1) {
die "bad command '$command'";
}
 
-   print $debug "OUT: " . length($output) . " ";
-   $debug->flush();
-
if ( $pathname eq "error.r" ) {
print $debug "[ERROR]\n";
$debug->flush();
@@ -178,6 +175,9 @@ while (1) {
die "${command} write error";
}
 
+   print $debug "OUT: " . length($output) . " ";
+   $debug->flush();
+
while ( length($output) > 0 ) {
my $packet = substr( $output, 0, 
$MAX_PACKET_CONTENT_SIZE );
packet_bin_write($packet);
-- 
2.13.2



[PATCH v9 1/7] t0021: keep filter log files on comparison

2017-06-30 Thread Lars Schneider
The filter log files are modified on comparison. That might be
unexpected by the caller. It would be even undesirable if the caller
wants to reuse the original log files.

Address these issues by using temp files for modifications. This is
useful for the subsequent patch 'convert: add "status=delayed" to
filter process protocol'.

Signed-off-by: Lars Schneider 
---
 t/t0021-conversion.sh | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 161f560446..ff2424225b 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -42,10 +42,10 @@ test_cmp_count () {
for FILE in "$expect" "$actual"
do
sort "$FILE" | uniq -c |
-   sed -e "s/^ *[0-9][0-9]*[   ]*IN: /x IN: /" >"$FILE.tmp" &&
-   mv "$FILE.tmp" "$FILE" || return
+   sed -e "s/^ *[0-9][0-9]*[   ]*IN: /x IN: /" >"$FILE.tmp"
done &&
-   test_cmp "$expect" "$actual"
+   test_cmp "$expect.tmp" "$actual.tmp" &&
+   rm "$expect.tmp" "$actual.tmp"
 }
 
 # Compare two files but exclude all `clean` invocations because Git can
@@ -56,10 +56,10 @@ test_cmp_exclude_clean () {
actual=$2
for FILE in "$expect" "$actual"
do
-   grep -v "IN: clean" "$FILE" >"$FILE.tmp" &&
-   mv "$FILE.tmp" "$FILE"
+   grep -v "IN: clean" "$FILE" >"$FILE.tmp"
done &&
-   test_cmp "$expect" "$actual"
+   test_cmp "$expect.tmp" "$actual.tmp" &&
+   rm "$expect.tmp" "$actual.tmp"
 }
 
 # Check that the contents of two files are equal and that their rot13 version
-- 
2.13.2



[PATCH v9 0/7] convert: add "status=delayed" to filter process protocol

2017-06-30 Thread Lars Schneider
Hi,

here is the 9th iteration of my "status delayed" topic. I think that
might be the final one :-)

Patch 1 to 3 are minor t0021 test adjustments. Patch 4 to 6 are convert
refactorings to prepare the new feature. Patch 7 is the new feature.

### Changes since v7:
* extracted capabilities negotiation refactoring into dedicated patch 6
  --> no code changes and therefore no inter-diff

Thanks,
Lars


RFC: http://public-inbox.org/git/d10f7c47-14e8-465b-8b7a-a09a1b28a...@gmail.com/
v1: http://public-inbox.org/git/20170108191736.47359-1-larsxschnei...@gmail.com/
v2: http://public-inbox.org/git/20170226184816.30010-1-larsxschnei...@gmail.com/
v3: http://public-inbox.org/git/20170409191107.20547-1-larsxschnei...@gmail.com/
v4: http://public-inbox.org/git/20170522135001.54506-1-larsxschnei...@gmail.com/
v5: http://public-inbox.org/git/20170601082203.50397-1-larsxschnei...@gmail.com/
v6: http://public-inbox.org/git/20170625182125.6741-1-larsxschnei...@gmail.com/
v7: http://public-inbox.org/git/20170627121027.99209-1-larsxschnei...@gmail.com/
v8: http://public-inbox.org/git/20170628212952.60781-1-larsxschnei...@gmail.com/

Base Ref: master
Web-Diff: https://github.com/larsxschneider/git/commit/c391b48aa2
Checkout: git fetch https://github.com/larsxschneider/git 
filter-process/delay-v9 && git checkout c391b48aa2


### Patches

Lars Schneider (7):
  t0021: keep filter log files on comparison
  t0021: make debug log file name configurable
  t0021: write "OUT " only on success
  convert: put the flags field before the flag itself for consistent
style
  convert: move multiple file filter error handling to separate function
  convert: refactor capabilities negotiation
  convert: add "status=delayed" to filter process protocol

 Documentation/gitattributes.txt |  69 -
 builtin/checkout.c  |   3 +
 cache.h |   3 +
 convert.c   | 202 +++--
 convert.h   |  26 +
 entry.c | 132 -
 t/t0021-conversion.sh   | 178 +++--
 t/t0021/rot13-filter.pl | 214 +++-
 unpack-trees.c  |   2 +
 9 files changed, 668 insertions(+), 161 deletions(-)


base-commit: 0339965c70d68fd3831c9a5306443c869de3f6a8
--
2.13.2



Re: [PATCH 25/25] diff: document the new --color-moved setting

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 1:31 PM, Simon Ruderich  wrote:
> On Fri, Jun 30, 2017 at 09:04:50AM -0700, Stefan Beller wrote:
>> [snip]
>>
>> However
>>
>> context
>>   + moved line, block A or B
>>   + moved line, block A or B
>> context
>>
>> is omitted, because the number of lines
>> here is fewer than 3 ignoring the block
>> type.
>>
>> Maybe
>>
>>   If there are fewer than 3 adjacent lines of
>>   moved code, they are skipped.
>
> My issue with "skipped" is that it's not clear what exactly is
> skipped. Move detection? Inclusion in the diff? Something else?
> That's why I tried to add the "excluded from move detection" to
> make it clear that the change is still shown to the user, just
> not handled by move detection and using the usual diff colors.
>
> In your example above, what exactly is "omitted"? The complete
> hunk from the output or the special coloring? That's what
> confuses me and what might confuse a future reader of the
> documentation.

Oh. I see. That was not the point of confusion that I thought.

skipped/omitted both refer to the move detection and the lines
will be included in the diff, but just as regular added/removed lines.

Will fix.

Thanks for bearing with me,
Stefan


Re: [PATCH 25/25] diff: document the new --color-moved setting

2017-06-30 Thread Simon Ruderich
On Fri, Jun 30, 2017 at 09:04:50AM -0700, Stefan Beller wrote:
> [snip]
>
> However
>
> context
>   + moved line, block A or B
>   + moved line, block A or B
> context
>
> is omitted, because the number of lines
> here is fewer than 3 ignoring the block
> type.
>
> Maybe
>
>   If there are fewer than 3 adjacent lines of
>   moved code, they are skipped.

My issue with "skipped" is that it's not clear what exactly is
skipped. Move detection? Inclusion in the diff? Something else?
That's why I tried to add the "excluded from move detection" to
make it clear that the change is still shown to the user, just
not handled by move detection and using the usual diff colors.

In your example above, what exactly is "omitted"? The complete
hunk from the output or the special coloring? That's what
confuses me and what might confuse a future reader of the
documentation.

Regards
Simon
-- 
+ privacy is necessary
+ using gnupg http://gnupg.org
+ public key id: 0x92FEFDB7E44C32F9


Re: [GSoC][PATCH 5/5 v3] submodule: port submodule subcommand 'sync' from shell to C

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 12:47 PM, Prathamesh Chavan  wrote:
> Port the submodule subcommand 'sync' from shell to C using the same
> mechanism as that used for porting submodule subcommand 'status'.
> Hence, here the function cmd_sync() is ported from shell to C.
> This is done by introducing three functions: module_sync(),
> sync_submodule() and print_default_remote().
>
> The function print_default_remote() is introduced for getting
> the default remote as stdout.
>
> Mentored-by: Christian Couder 
> Mentored-by: Stefan Beller 
> Signed-off-by: Prathamesh Chavan 
> ---

All these patches are also
Reviewed-by: Stefan Beller 

Thanks for upstreaming the early parts.

Thanks,
Stefan


[GSoC][PATCH 1/5 v3] submodule--helper: introduce get_submodule_displaypath()

2017-06-30 Thread Prathamesh Chavan
Introduce function get_submodule_displaypath() to replace the code
occurring in submodule_init() for generating displaypath of the
submodule with a call to it.

This new function will also be used in other parts of the system
in later patches.

Mentored-by: Christian Couder 
Mentored-by: Stefan Beller 
Signed-off-by: Prathamesh Chavan 
---
The patch series is updated, and is based on 'master' branch.

This patch series contains updates patches about
Introduction of the function: get_submodule_displaypath()
(This patch wasn't posted in the last update by mistake)
Introduction of the function: for_each_submodule()
Port shell function set_name_rev() to C
Port submodule subcommand 'status' from shell to C
Port submodule subcommand 'sync' from shell to C

Complete build report of this patch-series is available at:
https://travis-ci.org/pratham-pc/git/builds
Branch: patch-series-1
Build #114

Also, the above series was also build by basing it on the 'next'
branch for the purpose of testing the code.
Since the function is_submodule_initialized changed to
is_submodule_active, this change was required to be added in the
above patches while basing it on next.
After doing the required changes,
Complete build report of the above is available at:
https://travis-ci.org/pratham-pc/git/builds
Branch: patch-series-1-next
Build #116

I have held back the following patch since some work is still
required to be done:
Port submodule subcommand 'foreach' from shell to C
Port submodule subcommand 'deinit' from shell to C
Port submodule subcommand 'summary' from shell to C
I hope to complete this and post these patches later with the
weekly updates.

 builtin/submodule--helper.c | 33 ++---
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 8517032b3..1bfc91bca 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -220,6 +220,27 @@ static int resolve_relative_url_test(int argc, const char 
**argv, const char *pr
return 0;
 }
 
+static char *get_submodule_displaypath(const char *path, const char *prefix)
+{
+   const char *super_prefix = get_super_prefix();
+
+   if (prefix && super_prefix) {
+   BUG("cannot have prefix '%s' and superprefix '%s'",
+   prefix, super_prefix);
+   } else if (prefix) {
+   struct strbuf sb = STRBUF_INIT;
+   char *displaypath = xstrdup(relative_path(path, prefix, ));
+   strbuf_release();
+   return displaypath;
+   } else if (super_prefix) {
+   int len = strlen(super_prefix);
+   const char *format = is_dir_sep(super_prefix[len - 1]) ? "%s%s" 
: "%s/%s";
+   return xstrfmt(format, super_prefix, path);
+   } else {
+   return xstrdup(path);
+   }
+}
+
 struct module_list {
const struct cache_entry **entries;
int alloc, nr;
@@ -339,16 +360,7 @@ static void init_submodule(const char *path, const char 
*prefix, int quiet)
 
/* Only loads from .gitmodules, no overlay with .git/config */
gitmodules_config();
-
-   if (prefix && get_super_prefix())
-   die("BUG: cannot have prefix and superprefix");
-   else if (prefix)
-   displaypath = xstrdup(relative_path(path, prefix, ));
-   else if (get_super_prefix()) {
-   strbuf_addf(, "%s%s", get_super_prefix(), path);
-   displaypath = strbuf_detach(, NULL);
-   } else
-   displaypath = xstrdup(path);
+   displaypath = get_submodule_displaypath(path, prefix);
 
sub = submodule_from_path(null_sha1, path);
 
@@ -363,7 +375,6 @@ static void init_submodule(const char *path, const char 
*prefix, int quiet)
 * Set active flag for the submodule being initialized
 */
if (!is_submodule_initialized(path)) {
-   strbuf_reset();
strbuf_addf(, "submodule.%s.active", sub->name);
git_config_set_gently(sb.buf, "true");
}
-- 
2.13.0



Re: [PATCHv3 2/3] patch-ids.c: use hashmap correctly

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> As eluded to in the previous patch, the code in patch-ids.c is using
> the hashmaps API wrong.
>
> Luckily we do not have a bug, as all hashmap functionality that we use
> here (hashmap_get) passes through the keydata.  If hashmap_get_next were
> to be used, a bug would occur as that passes NULL for the key_data.
>
> So instead use the hashmap API correctly and provide the caller required
> data in the compare function via the first argument that always gets
> passed and was setup via the hashmap_init function.
>
> Signed-off-by: Stefan Beller 
> ---
>  patch-ids.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)

Nicely explained.  Thanks for polishing the series.

>
> diff --git a/patch-ids.c b/patch-ids.c
> index 815c115811..b4166b0f38 100644
> --- a/patch-ids.c
> +++ b/patch-ids.c
> @@ -35,10 +35,10 @@ int commit_patch_id(struct commit *commit, struct 
> diff_options *options,
>   * the side of safety.  The actual value being negative does not have
>   * any significance; only that it is non-zero matters.
>   */
> -static int patch_id_cmp(const void *unused_cmp_data,
> +static int patch_id_cmp(struct diff_options *opt,
>   struct patch_id *a,
>   struct patch_id *b,
> - struct diff_options *opt)
> + const void *unused_keydata)
>  {
>   if (is_null_oid(>patch_id) &&
>   commit_patch_id(a->commit, opt, >patch_id, 0))
> @@ -59,7 +59,7 @@ int init_patch_ids(struct patch_ids *ids)
>   DIFF_OPT_SET(>diffopts, RECURSIVE);
>   diff_setup_done(>diffopts);
>   hashmap_init(>patches, (hashmap_cmp_fn)patch_id_cmp,
> -  NULL, 256);
> +  >diffopts, 256);
>   return 0;
>  }
>  
> @@ -95,7 +95,7 @@ struct patch_id *has_commit_patch_id(struct commit *commit,
>   if (init_patch_id_entry(, commit, ids))
>   return NULL;
>  
> - return hashmap_get(>patches, , >diffopts);
> + return hashmap_get(>patches, , NULL);
>  }
>  
>  struct patch_id *add_commit_patch_id(struct commit *commit,


[GSoC][PATCH 3/5 v3] submodule: port set_name_rev() from shell to C

2017-06-30 Thread Prathamesh Chavan
Function set_name_rev() is ported from git-submodule to the
submodule--helper builtin. The function get_name_rev() generates the
value of the revision name as required, and the function
print_name_rev() handles the formating and printing of the obtained
revision name.

Mentored-by: Christian Couder 
Mentored-by: Stefan Beller 
Signed-off-by: Prathamesh Chavan 
---
 builtin/submodule--helper.c | 69 +
 git-submodule.sh| 16 ++-
 2 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index c4286aac5..4103e40e4 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -244,6 +244,74 @@ static char *get_submodule_displaypath(const char *path, 
const char *prefix)
}
 }
 
+enum describe_step {
+   step_bare,
+   step_tags,
+   step_contains,
+   step_all_always,
+   step_end
+};
+
+static char *get_name_rev(const char *sub_path, const char* object_id)
+{
+   struct strbuf sb = STRBUF_INIT;
+   enum describe_step cur_step;
+
+   for (cur_step = step_bare; cur_step < step_end; cur_step++) {
+   struct child_process cp = CHILD_PROCESS_INIT;
+   prepare_submodule_repo_env(_array);
+   cp.dir = sub_path;
+   cp.git_cmd = 1;
+   cp.no_stderr = 1;
+
+   switch (cur_step) {
+   case step_bare:
+   argv_array_pushl(, "describe",
+object_id, NULL);
+   break;
+   case step_tags: 
+   argv_array_pushl(, "describe",
+"--tags", object_id, NULL);
+   break;
+   case step_contains:
+   argv_array_pushl(, "describe",
+"--contains", object_id,
+NULL);
+   break;
+   case step_all_always:
+   argv_array_pushl(, "describe",
+"--all", "--always",
+object_id, NULL);
+   break;
+   default:
+   BUG("unknown describe step '%d'", cur_step);
+   }
+
+   if (!capture_command(, , 0) && sb.len) {
+   strbuf_strip_suffix(, "\n");
+   return strbuf_detach(, NULL);
+   }
+
+   }
+
+   strbuf_release();
+   return NULL;
+}
+
+static int print_name_rev(int argc, const char **argv, const char *prefix)
+{
+   char *namerev;
+   if (argc != 3)
+   die("print-name-rev only accepts two arguments:  ");
+
+   namerev = get_name_rev(argv[1], argv[2]);
+   if (namerev && namerev[0])
+   printf(" (%s)", namerev);
+   printf("\n");
+
+   return 0;
+}
+
 struct module_list {
const struct cache_entry **entries;
int alloc, nr;
@@ -1242,6 +1310,7 @@ static struct cmd_struct commands[] = {
{"relative-path", resolve_relative_path, 0},
{"resolve-relative-url", resolve_relative_url, 0},
{"resolve-relative-url-test", resolve_relative_url_test, 0},
+   {"print-name-rev", print_name_rev, 0},
{"init", module_init, SUPPORT_SUPER_PREFIX},
{"remote-branch", resolve_remote_submodule_branch, 0},
{"push-check", push_check, 0},
diff --git a/git-submodule.sh b/git-submodule.sh
index e131760ee..e988167e0 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -759,18 +759,6 @@ cmd_update()
}
 }
 
-set_name_rev () {
-   revname=$( (
-   sanitize_submodule_env
-   cd "$1" && {
-   git describe "$2" 2>/dev/null ||
-   git describe --tags "$2" 2>/dev/null ||
-   git describe --contains "$2" 2>/dev/null ||
-   git describe --all --always "$2"
-   }
-   ) )
-   test -z "$revname" || revname=" ($revname)"
-}
 #
 # Show commit summary for submodules in index or working tree
 #
@@ -1042,14 +1030,14 @@ cmd_status()
fi
if git diff-files --ignore-submodules=dirty --quiet -- 
"$sm_path"
then
-   set_name_rev "$sm_path" "$sha1"
+   revname=$(git submodule--helper print-name-rev 
"$sm_path" "$sha1")
say " $sha1 $displaypath$revname"
else
if test -z "$cached"
then
sha1=$(sanitize_submodule_env; cd 

[GSoC][PATCH 5/5 v3] submodule: port submodule subcommand 'sync' from shell to C

2017-06-30 Thread Prathamesh Chavan
Port the submodule subcommand 'sync' from shell to C using the same
mechanism as that used for porting submodule subcommand 'status'.
Hence, here the function cmd_sync() is ported from shell to C.
This is done by introducing three functions: module_sync(),
sync_submodule() and print_default_remote().

The function print_default_remote() is introduced for getting
the default remote as stdout.

Mentored-by: Christian Couder 
Mentored-by: Stefan Beller 
Signed-off-by: Prathamesh Chavan 
---
 builtin/submodule--helper.c | 177 
 git-submodule.sh|  56 +-
 2 files changed, 178 insertions(+), 55 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 575ae0218..d933daadd 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -44,6 +44,20 @@ static char *get_default_remote(void)
return ret;
 }
 
+static int print_default_remote(int argc, const char **argv, const char 
*prefix)
+{
+   const char *remote;
+
+   if (argc != 1)
+   die(_("submodule--helper print-default-remote takes no 
arguments"));
+
+   remote = get_default_remote();
+   if (remote)
+   puts(remote);
+
+   return 0;
+}
+
 static int starts_with_dot_slash(const char *str)
 {
return str[0] == '.' && is_dir_sep(str[1]);
@@ -385,6 +399,25 @@ static void module_list_active(struct module_list *list)
*list = active_modules;
 }
 
+static char *get_up_path(const char *path)
+{
+   int i;
+   struct strbuf sb = STRBUF_INIT;
+
+   for (i = count_slashes(path); i; i--)
+   strbuf_addstr(, "../");
+
+   /*
+* Check if 'path' ends with slash or not
+* for having the same output for dir/sub_dir
+* and dir/sub_dir/
+*/
+   if (!is_dir_sep(path[strlen(path) - 1]))
+   strbuf_addstr(, "../");
+
+   return strbuf_detach(, NULL);
+}
+
 static int module_list(int argc, const char **argv, const char *prefix)
 {
int i;
@@ -736,6 +769,148 @@ static int module_name(int argc, const char **argv, const 
char *prefix)
return 0;
 }
 
+struct sync_cb {
+   const char *prefix;
+   unsigned int quiet: 1;
+   unsigned int recursive: 1;
+};
+#define SYNC_CB_INIT { NULL, 0, 0 }
+
+static void sync_submodule(const struct cache_entry *list_item, void *cb_data)
+{
+   struct sync_cb *info = cb_data;
+   const struct submodule *sub;
+   char *sub_key, *remote_key;
+   char *sub_origin_url, *super_config_url, *displaypath;
+   struct strbuf sb = STRBUF_INIT;
+   struct child_process cp = CHILD_PROCESS_INIT;
+
+   if (!is_submodule_initialized(list_item->name))
+   return;
+
+   sub = submodule_from_path(null_sha1, list_item->name);
+
+   if (!sub || !sub->url)
+   die(_("no url found for submodule path '%s' in .gitmodules"),
+ list_item->name);
+
+   if (starts_with_dot_dot_slash(sub->url) || 
starts_with_dot_slash(sub->url)) {
+   char *remote_url, *up_path;
+   char *remote = get_default_remote();
+   char *remote_key = xstrfmt("remote.%s.url", remote);
+   free(remote);
+
+   if (git_config_get_string(remote_key, _url))
+   remote_url = xgetcwd();
+   up_path = get_up_path(list_item->name);
+   sub_origin_url = relative_url(remote_url, sub->url, up_path);
+   super_config_url = relative_url(remote_url, sub->url, NULL);
+   free(remote_key);
+   free(up_path);
+   free(remote_url);
+   } else {
+   sub_origin_url = xstrdup(sub->url);
+   super_config_url = xstrdup(sub->url);
+   }
+
+   displaypath = get_submodule_displaypath(list_item->name, info->prefix);
+
+   if (!info->quiet)
+   printf(_("Synchronizing submodule url for '%s'\n"),
+displaypath);
+
+   sub_key = xstrfmt("submodule.%s.url", sub->name);
+   if (git_config_set_gently(sub_key, super_config_url))
+   die(_("failed to register url for submodule path '%s'"),
+ displaypath);
+
+   if (!is_submodule_populated_gently(list_item->name, NULL))
+   goto cleanup;
+
+   prepare_submodule_repo_env(_array);
+   cp.git_cmd = 1;
+   cp.dir = list_item->name;
+   argv_array_pushl(, "submodule--helper",
+"print-default-remote", NULL);
+   if (capture_command(, , 0))
+   die(_("failed to get the default remote for submodule '%s'"),
+ list_item->name);
+
+   strbuf_strip_suffix(, "\n");
+   remote_key = xstrfmt("remote.%s.url", sb.buf);
+   strbuf_release();
+
+   child_process_init();
+   

[GSoC][PATCH 2/5 v3] submodule--helper: introduce for_each_submodule_list()

2017-06-30 Thread Prathamesh Chavan
Introduce function for_each_submodule_list() and
replace a loop in module_init() with a call to it.

The new function will also be used in other parts of the
system in later patches.

Mentored-by: Christian Couder 
Mentored-by: Stefan Beller 
Signed-off-by: Prathamesh Chavan 
---
 builtin/submodule--helper.c | 39 +--
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 1bfc91bca..c4286aac5 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -14,6 +14,9 @@
 #include "refs.h"
 #include "connect.h"
 
+typedef void (*submodule_list_func_t)(const struct cache_entry *list_item,
+ void *cb_data);
+
 static char *get_default_remote(void)
 {
char *dest = NULL, *ret;
@@ -352,17 +355,30 @@ static int module_list(int argc, const char **argv, const 
char *prefix)
return 0;
 }
 
-static void init_submodule(const char *path, const char *prefix, int quiet)
+static void for_each_submodule_list(const struct module_list list,
+   submodule_list_func_t fn, void *cb_data)
 {
+   int i;
+   for (i = 0; i < list.nr; i++)
+   fn(list.entries[i], cb_data);
+}
+
+struct init_cb {
+   const char *prefix;
+   unsigned int quiet: 1;
+};
+#define INIT_CB_INIT { NULL, 0 }
+
+static void init_submodule(const struct cache_entry *list_item, void *cb_data)
+{
+   struct init_cb *info = cb_data;
const struct submodule *sub;
struct strbuf sb = STRBUF_INIT;
char *upd = NULL, *url = NULL, *displaypath;
 
-   /* Only loads from .gitmodules, no overlay with .git/config */
-   gitmodules_config();
-   displaypath = get_submodule_displaypath(path, prefix);
+   displaypath = get_submodule_displaypath(list_item->name, info->prefix);
 
-   sub = submodule_from_path(null_sha1, path);
+   sub = submodule_from_path(null_sha1, list_item->name);
 
if (!sub)
die(_("No url found for submodule path '%s' in .gitmodules"),
@@ -374,7 +390,7 @@ static void init_submodule(const char *path, const char 
*prefix, int quiet)
 *
 * Set active flag for the submodule being initialized
 */
-   if (!is_submodule_initialized(path)) {
+   if (!is_submodule_initialized(list_item->name)) {
strbuf_addf(, "submodule.%s.active", sub->name);
git_config_set_gently(sb.buf, "true");
}
@@ -416,7 +432,7 @@ static void init_submodule(const char *path, const char 
*prefix, int quiet)
if (git_config_set_gently(sb.buf, url))
die(_("Failed to register url for submodule path '%s'"),
displaypath);
-   if (!quiet)
+   if (!info->quiet)
fprintf(stderr,
_("Submodule '%s' (%s) registered for path 
'%s'\n"),
sub->name, url, displaypath);
@@ -445,10 +461,10 @@ static void init_submodule(const char *path, const char 
*prefix, int quiet)
 
 static int module_init(int argc, const char **argv, const char *prefix)
 {
+   struct init_cb info = INIT_CB_INIT;
struct pathspec pathspec;
struct module_list list = MODULE_LIST_INIT;
int quiet = 0;
-   int i;
 
struct option module_init_options[] = {
OPT__QUIET(, N_("Suppress output for initializing a 
submodule")),
@@ -473,8 +489,11 @@ static int module_init(int argc, const char **argv, const 
char *prefix)
if (!argc && git_config_get_value_multi("submodule.active"))
module_list_active();
 
-   for (i = 0; i < list.nr; i++)
-   init_submodule(list.entries[i]->name, prefix, quiet);
+   info.prefix = prefix;
+   info.quiet = !!quiet;
+
+   gitmodules_config();
+   for_each_submodule_list(list, init_submodule, );
 
return 0;
 }
-- 
2.13.0



[GSoC][PATCH 4/5 v3] submodule: port submodule subcommand 'status' from shell to C

2017-06-30 Thread Prathamesh Chavan
This aims to make git-submodule 'status' a built-in. Hence, the function
cmd_status() is ported from shell to C. This is done by introducing
three functions: module_status(), submodule_status() and print_status().

The function module_status() acts as the front-end of the subcommand.
It parses subcommand's options and then calls the function
module_list_compute() for computing the list of submodules. Then
this functions calls for_each_submodule_list() looping through the
list obtained.

Then for_each_submodule_list() calls submodule_status() for each of the
submodule in its list. The function submodule_status() is responsible
for generating the status each submodule it is called for, and
then calls print_status().

Finally, the function print_status() handles the printing of submodule's
status.

Mentored-by: Christian Couder 
Mentored-by: Stefan Beller 
Signed-off-by: Prathamesh Chavan 
---
 builtin/submodule--helper.c | 152 
 git-submodule.sh|  49 +-
 2 files changed, 153 insertions(+), 48 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 4103e40e4..575ae0218 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -566,6 +566,157 @@ static int module_init(int argc, const char **argv, const 
char *prefix)
return 0;
 }
 
+struct status_cb {
+   const char *prefix;
+   unsigned int quiet: 1;
+   unsigned int recursive: 1;
+   unsigned int cached: 1;
+};
+#define STATUS_CB_INIT { NULL, 0, 0, 0 }
+
+static void print_status(struct status_cb *info, char state, const char *path,
+char *sub_sha1, char *displaypath)
+{
+   if (info->quiet)
+   return;
+
+   printf("%c%s %s", state, sub_sha1, displaypath);
+
+   if (state == ' ' || state == '+') {
+   struct argv_array name_rev_args = ARGV_ARRAY_INIT;
+
+   argv_array_pushl(_rev_args, "print-name-rev",
+path, sub_sha1, NULL);
+   print_name_rev(name_rev_args.argc, name_rev_args.argv,
+  info->prefix);
+   } else {
+   printf("\n");
+   }
+}
+
+static void status_submodule(const struct cache_entry *list_item, void 
*cb_data)
+{
+   struct status_cb *info = cb_data;
+   char *sub_sha1 = xstrdup(oid_to_hex(_item->oid));
+   char *displaypath;
+   struct argv_array diff_files_args = ARGV_ARRAY_INIT;
+
+   if (!submodule_from_path(null_sha1, list_item->name))
+   die(_("no submodule mapping found in .gitmodules for path 
'%s'"),
+ list_item->name);
+
+   displaypath = get_submodule_displaypath(list_item->name, info->prefix);
+
+   if (list_item->ce_flags) {
+   print_status(info, 'U', list_item->name,
+sha1_to_hex(null_sha1), displaypath);
+   goto cleanup;
+   }
+
+   if (!is_submodule_initialized(list_item->name)) {
+   print_status(info, '-', list_item->name, sub_sha1, displaypath);
+   goto cleanup;
+   }
+
+   argv_array_pushl(_files_args, "diff-files",
+"--ignore-submodules=dirty", "--quiet", "--",
+list_item->name, NULL);
+
+   if (!cmd_diff_files(diff_files_args.argc, diff_files_args.argv,
+   info->prefix)) {
+   print_status(info, ' ', list_item->name, sub_sha1, displaypath);
+   } else {
+   if (!info->cached) {
+   struct child_process cp = CHILD_PROCESS_INIT;
+   struct strbuf sb = STRBUF_INIT;
+
+   prepare_submodule_repo_env(_array);
+   cp.git_cmd = 1;
+   cp.dir = list_item->name;
+
+   argv_array_pushl(, "rev-parse",
+"--verify", "HEAD", NULL);
+
+   if (capture_command(, , 0))
+   die(_("could not run 'git rev-parse --verify"
+ "HEAD' in submodule %s"),
+ list_item->name);
+
+   strbuf_strip_suffix(, "\n");
+   print_status(info, '+', list_item->name, sb.buf,
+displaypath);
+   strbuf_release();
+   } else {
+   print_status(info, '+', list_item->name, sub_sha1,
+displaypath);
+   }
+   }
+
+   if (info->recursive) {
+   struct child_process cpr = CHILD_PROCESS_INIT;
+
+   cpr.git_cmd = 1;
+   cpr.dir = list_item->name;
+   prepare_submodule_repo_env(_array);
+
+   argv_array_pushl(, "--super-prefix", 

Re: Warning suggestion for git stash drop

2017-06-30 Thread Junio C Hamano
Laurent Humblet  writes:

> Thank you for your feedback.
>
> I suppose that turning a hypothetical confirmation option 'on' would
> impact a stash pop for instance as it automatically drops the stash if
> it was applied without conflicts.
>
> What about a --confirm flag?  You could then simply alias 'git stash
> drop --confirm' locally and it wouldn't impact anything else?

I think that is probably trivial to add, but how would you make sure
you give it?  One way may be to train your fingers to type "git sd"
with something like this in your ~/.gitconfig:

[alias] sd = "stash drop --confirm"

but at that point, you could instead have something like the
following in you ~/bin/git-sd and get the same effect:

#!/bin/sh
if tty -s
then
echo >&2 "are you sure you want to drop all stash entries?"
case "$(read)" in
[Yy]*) ;;
*) echo >&2 "ok, let's not drop 'em"; exit 0 ;;
esac
fi
exec git stash drop

without adding the "--confirm" option at all.

So I am not sure that would get us closer to a satisfactory solution
to your original problem.  

Retroactively adding an end-user safety is hard.

> Have a great week-end!

You too.


[PATCHv3 3/3] hashmap: migrate documentation from Documentation/technical into header

2017-06-30 Thread Stefan Beller
While at it, clarify the use of `key`, `keydata`, `entry_or_key` as well
as documenting the new data pointer for the compare function.

Rework the example.

Signed-off-by: Stefan Beller 
---
 Documentation/technical/api-hashmap.txt | 309 
 hashmap.h   | 348 +---
 2 files changed, 316 insertions(+), 341 deletions(-)
 delete mode 100644 Documentation/technical/api-hashmap.txt

diff --git a/Documentation/technical/api-hashmap.txt 
b/Documentation/technical/api-hashmap.txt
deleted file mode 100644
index ccc634bbd7..00
--- a/Documentation/technical/api-hashmap.txt
+++ /dev/null
@@ -1,309 +0,0 @@
-hashmap API
-===
-
-The hashmap API is a generic implementation of hash-based key-value mappings.
-
-Data Structures

-
-`struct hashmap`::
-
-   The hash table structure. Members can be used as follows, but should
-   not be modified directly:
-+
-The `size` member keeps track of the total number of entries (0 means the
-hashmap is empty).
-+
-`tablesize` is the allocated size of the hash table. A non-0 value indicates
-that the hashmap is initialized. It may also be useful for statistical purposes
-(i.e. `size / tablesize` is the current load factor).
-+
-`cmpfn` stores the comparison function specified in `hashmap_init()`. In
-advanced scenarios, it may be useful to change this, e.g. to switch between
-case-sensitive and case-insensitive lookup.
-+
-When `disallow_rehash` is set, automatic rehashes are prevented during inserts
-and deletes.
-
-`struct hashmap_entry`::
-
-   An opaque structure representing an entry in the hash table, which must
-   be used as first member of user data structures. Ideally it should be
-   followed by an int-sized member to prevent unused memory on 64-bit
-   systems due to alignment.
-+
-The `hash` member is the entry's hash code and the `next` member points to the
-next entry in case of collisions (i.e. if multiple entries map to the same
-bucket).
-
-`struct hashmap_iter`::
-
-   An iterator structure, to be used with hashmap_iter_* functions.
-
-Types
--
-
-`int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key, const void 
*keydata)`::
-
-   User-supplied function to test two hashmap entries for equality. Shall
-   return 0 if the entries are equal.
-+
-This function is always called with non-NULL `entry` / `entry_or_key`
-parameters that have the same hash code. When looking up an entry, the `key`
-and `keydata` parameters to hashmap_get and hashmap_remove are always passed
-as second and third argument, respectively. Otherwise, `keydata` is NULL.
-
-Functions
--
-
-`unsigned int strhash(const char *buf)`::
-`unsigned int strihash(const char *buf)`::
-`unsigned int memhash(const void *buf, size_t len)`::
-`unsigned int memihash(const void *buf, size_t len)`::
-`unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t 
len)`::
-
-   Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
-   http://www.isthe.com/chongo/tech/comp/fnv).
-+
-`strhash` and `strihash` take 0-terminated strings, while `memhash` and
-`memihash` operate on arbitrary-length memory.
-+
-`strihash` and `memihash` are case insensitive versions.
-+
-`memihash_cont` is a variant of `memihash` that allows a computation to be
-continued with another chunk of data.
-
-`unsigned int sha1hash(const unsigned char *sha1)`::
-
-   Converts a cryptographic hash (e.g. SHA-1) into an int-sized hash code
-   for use in hash tables. Cryptographic hashes are supposed to have
-   uniform distribution, so in contrast to `memhash()`, this just copies
-   the first `sizeof(int)` bytes without shuffling any bits. Note that
-   the results will be different on big-endian and little-endian
-   platforms, so they should not be stored or transferred over the net.
-
-`void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t 
initial_size)`::
-
-   Initializes a hashmap structure.
-+
-`map` is the hashmap to initialize.
-+
-The `equals_function` can be specified to compare two entries for equality.
-If NULL, entries are considered equal if their hash codes are equal.
-+
-If the total number of entries is known in advance, the `initial_size`
-parameter may be used to preallocate a sufficiently large table and thus
-prevent expensive resizing. If 0, the table is dynamically resized.
-
-`void hashmap_free(struct hashmap *map, int free_entries)`::
-
-   Frees a hashmap structure and allocated memory.
-+
-`map` is the hashmap to free.
-+
-If `free_entries` is true, each hashmap_entry in the map is freed as well
-(using stdlib's free()).
-
-`void hashmap_entry_init(void *entry, unsigned int hash)`::
-
-   Initializes a hashmap_entry structure.
-+
-`entry` points to the entry to initialize.
-+
-`hash` is the hash code of the entry.
-+
-The 

[PATCHv3 2/3] patch-ids.c: use hashmap correctly

2017-06-30 Thread Stefan Beller
As eluded to in the previous patch, the code in patch-ids.c is using
the hashmaps API wrong.

Luckily we do not have a bug, as all hashmap functionality that we use
here (hashmap_get) passes through the keydata.  If hashmap_get_next were
to be used, a bug would occur as that passes NULL for the key_data.

So instead use the hashmap API correctly and provide the caller required
data in the compare function via the first argument that always gets
passed and was setup via the hashmap_init function.

Signed-off-by: Stefan Beller 
---
 patch-ids.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/patch-ids.c b/patch-ids.c
index 815c115811..b4166b0f38 100644
--- a/patch-ids.c
+++ b/patch-ids.c
@@ -35,10 +35,10 @@ int commit_patch_id(struct commit *commit, struct 
diff_options *options,
  * the side of safety.  The actual value being negative does not have
  * any significance; only that it is non-zero matters.
  */
-static int patch_id_cmp(const void *unused_cmp_data,
+static int patch_id_cmp(struct diff_options *opt,
struct patch_id *a,
struct patch_id *b,
-   struct diff_options *opt)
+   const void *unused_keydata)
 {
if (is_null_oid(>patch_id) &&
commit_patch_id(a->commit, opt, >patch_id, 0))
@@ -59,7 +59,7 @@ int init_patch_ids(struct patch_ids *ids)
DIFF_OPT_SET(>diffopts, RECURSIVE);
diff_setup_done(>diffopts);
hashmap_init(>patches, (hashmap_cmp_fn)patch_id_cmp,
-NULL, 256);
+>diffopts, 256);
return 0;
 }
 
@@ -95,7 +95,7 @@ struct patch_id *has_commit_patch_id(struct commit *commit,
if (init_patch_id_entry(, commit, ids))
return NULL;
 
-   return hashmap_get(>patches, , >diffopts);
+   return hashmap_get(>patches, , NULL);
 }
 
 struct patch_id *add_commit_patch_id(struct commit *commit,
-- 
2.13.0.31.g9b732c453e



[PATCHv3 1/3] hashmap.h: compare function has access to a data field

2017-06-30 Thread Stefan Beller
When using the hashmap a common need is to have access to caller provided
data in the compare function. A couple of times we abuse the keydata field
to pass in the data needed. This happens for example in patch-ids.c.

This patch changes the function signature of the compare function
to have one more void pointer available. The pointer given for each
invocation of the compare function must be defined in the init function
of the hashmap and is just passed through.

Documentation of this new feature is deferred to a later patch.
This is a rather mechanical conversion, just adding the new pass-through
parameter.  However while at it improve the naming of the fields of all
compare functions used by hashmaps by ensuring unused parameters are
prefixed with 'unused_' and naming the parameters what they are (instead
of 'unused' make it 'unused_keydata').

Signed-off-by: Stefan Beller 
---
 attr.c  |  7 ---
 builtin/describe.c  |  8 +---
 builtin/difftool.c  | 24 +++-
 builtin/fast-export.c   |  7 ---
 config.c|  9 ++---
 convert.c   |  3 ++-
 diffcore-rename.c   |  2 +-
 hashmap.c   | 17 -
 hashmap.h   | 12 
 name-hash.c | 16 ++--
 oidset.c|  5 +++--
 patch-ids.c |  6 --
 refs.c  |  5 +++--
 remote.c|  7 +--
 sha1_file.c |  5 +++--
 sub-process.c   |  7 ---
 sub-process.h   |  6 --
 submodule-config.c  | 14 --
 t/helper/test-hashmap.c | 19 ---
 19 files changed, 113 insertions(+), 66 deletions(-)

diff --git a/attr.c b/attr.c
index 37454999d2..56961f0236 100644
--- a/attr.c
+++ b/attr.c
@@ -76,9 +76,10 @@ struct attr_hash_entry {
 };
 
 /* attr_hashmap comparison function */
-static int attr_hash_entry_cmp(const struct attr_hash_entry *a,
+static int attr_hash_entry_cmp(void *unused_cmp_data,
+  const struct attr_hash_entry *a,
   const struct attr_hash_entry *b,
-  void *unused)
+  void *unused_keydata)
 {
return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
 }
@@ -86,7 +87,7 @@ static int attr_hash_entry_cmp(const struct attr_hash_entry 
*a,
 /* Initialize an 'attr_hashmap' object */
 static void attr_hashmap_init(struct attr_hashmap *map)
 {
-   hashmap_init(>map, (hashmap_cmp_fn) attr_hash_entry_cmp, 0);
+   hashmap_init(>map, (hashmap_cmp_fn) attr_hash_entry_cmp, NULL, 0);
 }
 
 /*
diff --git a/builtin/describe.c b/builtin/describe.c
index 70eb144608..8868f00ed0 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -54,8 +54,10 @@ static const char *prio_names[] = {
N_("head"), N_("lightweight"), N_("annotated"),
 };
 
-static int commit_name_cmp(const struct commit_name *cn1,
-   const struct commit_name *cn2, const void *peeled)
+static int commit_name_cmp(const void *unused_cmp_data,
+  const struct commit_name *cn1,
+  const struct commit_name *cn2,
+  const void *peeled)
 {
return oidcmp(>peeled, peeled ? peeled : >peeled);
 }
@@ -501,7 +503,7 @@ int cmd_describe(int argc, const char **argv, const char 
*prefix)
return cmd_name_rev(args.argc, args.argv, prefix);
}
 
-   hashmap_init(, (hashmap_cmp_fn) commit_name_cmp, 0);
+   hashmap_init(, (hashmap_cmp_fn) commit_name_cmp, NULL, 0);
for_each_rawref(get_name, NULL);
if (!names.size && !always)
die(_("No names found, cannot describe anything."));
diff --git a/builtin/difftool.c b/builtin/difftool.c
index 9199227f6e..a1a26ba891 100644
--- a/builtin/difftool.c
+++ b/builtin/difftool.c
@@ -130,8 +130,10 @@ struct working_tree_entry {
char path[FLEX_ARRAY];
 };
 
-static int working_tree_entry_cmp(struct working_tree_entry *a,
- struct working_tree_entry *b, void *keydata)
+static int working_tree_entry_cmp(const void *unused_cmp_data,
+ struct working_tree_entry *a,
+ struct working_tree_entry *b,
+ void *unused_keydata)
 {
return strcmp(a->path, b->path);
 }
@@ -146,7 +148,9 @@ struct pair_entry {
const char path[FLEX_ARRAY];
 };
 
-static int pair_cmp(struct pair_entry *a, struct pair_entry *b, void *keydata)
+static int pair_cmp(const void *unused_cmp_data,
+   struct pair_entry *a, struct pair_entry *b,
+   void *unused_keydata)
 {
return strcmp(a->path, b->path);
 }
@@ -174,7 +178,9 @@ struct path_entry {
char path[FLEX_ARRAY];
 };
 
-static int path_entry_cmp(struct path_entry *a, struct path_entry *b, void 
*key)
+static int 

[PATCHv3 0/3] Introduce data field in hashmap and migrate docs to header

2017-06-30 Thread Stefan Beller
v3:

* fixed first patch to call the data 'caller provided' instead of arbitrary.
* moved the position of the caller provided data to first position
* split up the rather mechanical change of function signature with
  fixing the API usage of patch-ids.c

v2:

addressed all but the last point of Jonathan Nieder.

Thanks,
Stefan

v1:

https://public-inbox.org/git/xmqqpodnvmmw@gitster.mtv.corp.google.com/
for context why we need a new data field.  Implement that.

Once upon a time we had a long discussion where to put documentation best.
The answer was header files as there documentation has less chance
to become stale and be out of date.  Improve the docs by
* migrating them to the header
* clarifying how the compare function is to be used
* how the arguments to hashmap_get/remove should be used.

Thanks,
Stefan

Stefan Beller (3):
  hashmap.h: compare function has access to a data field
  patch-ids.c: use hashmap correctly
  hashmap: migrate documentation from Documentation/technical into
header

 Documentation/technical/api-hashmap.txt | 309 ---
 attr.c  |   7 +-
 builtin/describe.c  |   8 +-
 builtin/difftool.c  |  24 ++-
 builtin/fast-export.c   |   7 +-
 config.c|   9 +-
 convert.c   |   3 +-
 diffcore-rename.c   |   2 +-
 hashmap.c   |  17 +-
 hashmap.h   | 360 
 name-hash.c |  16 +-
 oidset.c|   5 +-
 patch-ids.c |  10 +-
 refs.c  |   5 +-
 remote.c|   7 +-
 sha1_file.c |   5 +-
 sub-process.c   |   7 +-
 sub-process.h   |   6 +-
 submodule-config.c  |  14 +-
 t/helper/test-hashmap.c |  19 +-
 20 files changed, 431 insertions(+), 409 deletions(-)
 delete mode 100644 Documentation/technical/api-hashmap.txt

-- 
2.13.0.31.g9b732c453e



Re: [PATCHv2 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 11:47 AM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> Ok, let me redo the patch to have fndata at the front.
>>
>> Looking at other places (that have a similar mechanism mechanically,
>> but are semantically different), such as the callback functions for
>> the diff machinery, we have the user provided pointer at the end
>> of the list. But that is because the diff_options are the objects that
>> should be in front row (they are bound to the callback more than
>> some caller needed data).
>
> Quite honestly, I do not care too deeply about an API specific to a
> particular area like "diff" that passes its "configuration" data
> that everybody in the API knows, i.e. diff_options.  If the
> convention for ordinary functions in the API is to pass that in a
> particular location in the parameter list, I would think it is good
> for a application-supplied callback function to follow that pattern.
> After all, it is to configure the behaviour of the "diff" and the
> caller-supplied callback could have been part of a (hypothetically
> richer) API implementation.
>
> But I view a comparison function that is given to hashmap that is
> supplied by the caller a bit differently.  It is not about
> "hashing", so the reason to have the data close to function pointer
> is stronger there.

Yes I agree with that, I forgot to say so.

I just cited the example to see if we have a preference in the project
already and that's about it, but as it is different, we can put the fndata
first.

Thanks,
Stefan


Re: [PATCHv2 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> Ok, let me redo the patch to have fndata at the front.
>
> Looking at other places (that have a similar mechanism mechanically,
> but are semantically different), such as the callback functions for
> the diff machinery, we have the user provided pointer at the end
> of the list. But that is because the diff_options are the objects that
> should be in front row (they are bound to the callback more than
> some caller needed data).

Quite honestly, I do not care too deeply about an API specific to a
particular area like "diff" that passes its "configuration" data
that everybody in the API knows, i.e. diff_options.  If the
convention for ordinary functions in the API is to pass that in a
particular location in the parameter list, I would think it is good
for a application-supplied callback function to follow that pattern.
After all, it is to configure the behaviour of the "diff" and the
caller-supplied callback could have been part of a (hypothetically
richer) API implementation.

But I view a comparison function that is given to hashmap that is
supplied by the caller a bit differently.  It is not about
"hashing", so the reason to have the data close to function pointer
is stronger there.


Re: [PATCH 22/25] diff.c: color moved lines differently

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> On Fri, Jun 30, 2017 at 10:54 AM, Junio C Hamano  wrote:
>> Stefan Beller  writes:
>>
>>> +static int next_byte(const char **cp, const char **endp,
>>> +  const struct diff_options *diffopt)
>>> +{
>>> + int retval;
>>> +
>>> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) {
>>> + while (*endp > *cp && isspace(**endp))
>>> + (*endp)--;
>>> + }
>>
>> This should be done by the callers (both moved_entry_cmp() and
>> get_string_hash()) before starting to iterate over the bytes from
>> the beginning, no?
>
> Good point.
>
>>> +
>>> + retval = **cp;
>>
>> The char could be signed, and byte 0xff may become indistinguishable
>> from the EOF (i.e. -1) you returned earlier.
>
> Ah, I messed up there. I think EOF is wrong, too.
> So maybe we'll just return 256 to indicate the end of memory chunk
> to not have to deal with signedness

I would just say that next_byte() returns -1 (at end of string) or
0-255 if it is returning a byte that matters.


Re: [PATCHv2 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> I am at a loss here after re-reading your answer over and over,
> but I think you are asking if patch_id_cmp can break, as
> we have a callchain like
>
>   patch_id_cmp
> commit_patch_id
>   (diff_root_tree_oid)
> diff_tree_oid
>   ll_diff_tree_oid
>
> passing diff_options down there, and patch_id_cmp may have
> gotten NULL.
>
> The answer is no, it was safe. (by accident?)
>
> That is because we never use hashmap_get_next
> on the hashmap that uses patch_id_cmp as the compare
> function.
>
> hashmap_get_next is the only function that does not pass
> on a keydata, any other has valid caller provided keydata.

Ah, thanks for clarifying.  I think I misread the earlier
discussion.  So unless somebody goes in to patch-ids.c and adds a
call to do get_next, we won't see a breakage, and it is not worth to
do a test-patch-ids.c that peeks into the hashmap patch-ids.c uses
and does get_next() only to demonstrate a potential future breakage.

OK.


MISE À NIVEAU EMAIL

2017-06-30 Thread helpd...@grenoble-inp.fr



--
Cher: Grenoble Institute of Technology  Webmail,

Nous vous annonons que votre compte de messagerie a dépassé son stockage
limite. Vous ne pourrez pas envoyer et recevoir des mails et votre 
compte de messagerie
Seront supprimés de notre serveur. Pour éviter ce problème, il est 
conseillé de

Cliquez sur le lien ci-dessous et mettez à jour votre compte

https://formcrafts.com/a/29400?preview=true

Je vous remercie

L'équipe Grenoble Institute of Technology


Re: Warning suggestion for git stash drop

2017-06-30 Thread Laurent Humblet
Thank you for your feedback.

I suppose that turning a hypothetical confirmation option 'on' would
impact a stash pop for instance as it automatically drops the stash if
it was applied without conflicts.

What about a --confirm flag?  You could then simply alias 'git stash
drop --confirm' locally and it wouldn't impact anything else?

Have a great week-end!

On 29 June 2017 at 20:56, Stefan Beller  wrote:
> On Thu, Jun 29, 2017 at 11:44 AM, Junio C Hamano  wrote:
>> Laurent Humblet  writes:
>>
>>> Would it be possible to add an optional Yes/No when doing a 'git stash
>>> drop'?  Something similar as what happens when pushing on a remotely
>>> checked out branch (with a config setting to turn the warning on/off).
>>>
>>> I know that you can always get your dropped stash back using git
>>> reflog but a small warning might be a useful feature to avoid unwanted
>>> stash drops on a regular basis.
>>
>> I sympathize with this, but the same principle also would apply many
>> destructive commands like "git reset --hard", "git rm ", etc.
>> and also "/bin/rm -f" ;-)
>>
>> I however do not think a good general way to do this without
>> breaking people's scripts.  When they do 'stash drop' in their
>> scripts, they know they want to get rid of the dropped stash
>> entries, and they expect that the user may not necessarily be
>> sitting in front of the shell to give the command a Yes (they
>> probably wouldn't even give the user a message "the next step
>> may ask you to say Yes; please do so").
>>
>> On the other hand, just like "git reset --hard" and "git clean -f"
>> does not have such safety (i.e. the user is aware that the command
>> is destructive by giving "--hard" and "-f"), "drop" may be a sign
>> that the user knowingly doing something destructive.
>>
>> So I dunno.
>>
>
> I was about to propose to have an easy undo operation, such as the
> reflog. But then I checked the man page for git-stash and it already says:
>
>older stashes are found in the reflog of this reference and can be 
> named
>using the usual reflog syntax (e.g. stash@{0} is the most recently
>created stash, stash@{1} is the one before it, stash@{2.hours.ago} is
>also possible). Stashes may also be referenced by specifying
> just the stash
>index (e.g. the integer n is equivalent to stash@{n})
>
> Maybe that needs to be polished a bit more?
>
> I myself used the stash back then (I don't use it any more), and I assumed
> the stash was a stack of changes, but the data structure seems to imply it
> is only one thing that can be stashed and the reflog enables the stacking
> part, such that a dropped stash is gone and is not recorded in the reflog.
> Dropping a stash seems to just erase the topmost line from the stash reflog,
> so it really is as destructive an "/bin/rm -f"
>
> So getting back a dropped stash via the reflog is not via asking for a stash
> reflog but rather for HEADs or a branchs reflog, which may be complicated.


Re: RFC: Missing blob hook might be invoked infinitely recursively

2017-06-30 Thread Jeff Hostetler



On 6/29/2017 2:48 PM, Jonathan Tan wrote:

As some of you may know, I'm currently working on support for partial
clones/fetches in Git (where blobs above a user-specified size threshold
are not downloaded - only their names and sizes are downloaded). To do
this, the client repository needs to be able to download blobs at will
whenever it needs a missing one (for example, upon checkout).

So I have done this by adding support for a hook in Git [1], and
updating the object-reading code in Git to, by default, automatically
invoke this hook whenever necessary. (This means that existing
subsystems will all work by default, in theory at least.) My current
design is for the hook to have maximum flexibility - when invoked with a
list of SHA-1s, it must merely ensure that those objects are in the
local repo, whether packed or loose.

I am also working on a command (fetch-blob) to be bundled with Git to be
used as a default hook, and here is where the problem lies.

Suppose you have missing blob AB12 and CD34 that you now need, so
fetch-blob is invoked. It sends the literals AB12 and CD34 to a new
server endpoint and obtains a packfile, which it then pipes through "git
index-pack". The issue is that "git index-pack" wants to try to access
AB12 and CD34 in the local repo in order to do a SHA-1 collision check,
and therefore fetch-blob is invoked once again, creating infinite
recursion.

This is straightforwardly fixed by making "git index-pack" understand
about missing blobs, but this might be a symptom of this approach being
error-prone (custom hooks that invoke any Git command must be extra
careful).


I'm not sure if this what you meant, but could you pass to index-pack
the set of missing blobs that you passed to fetch-blob?  That is, let
index-pack do it's normal lookups, but just not on the list passed in.



So I have thought of a few solutions, each with its pros and cons:

1. Require the hook to instead output a packfile to stdout. This means
that that hook no longer needs to access the local repo, and thus has
less dependence on Git commands. But this reduces the flexibility in
that its output must be packed, not loose. (This is fine for the use
cases I'm thinking of, but probably not so for others.)

2. Add support for an environment variable to Git that suppresses access
to the missing blob manifest, in effect, suppressing invocation of the
hook. This allows anyone (the person configuring Git or the hook writer)
to suppress this access, although they might need in-depth knowledge to
know whether the hook is meant to be run with such access suppressed or
required.

3. Like the above, except for a command-line argument to Git.

What do you think? Any solutions that I am missing?

[1] Work in progress, but you can see an earlier version here: 
https://public-inbox.org/git/b917a463f0ad4ce0ab115203b3f24894961a2e75.1497558851.git.jonathanta...@google.com/



Re: [PATCH v8 6/6] convert: add "status=delayed" to filter process protocol

2017-06-30 Thread Junio C Hamano
Lars Schneider  writes:

>> On 29 Jun 2017, at 20:34, Junio C Hamano  wrote:
>> 
>> You seem to have squashed an unrelated "table-driven" clean-up into
>> this step.  While I think the end result looks good, I would have
>> liked to see it as a separate step, either as a preparatory "now we
>> are going to add the third one, let's make it table-driven before
>> doing so" step, or a follow-up "now we have done everything we
>> wanted to do, let's make this clean-up at the end" step.
>
> I am sorry. That was a misunderstanding - I thought you want to have 
> this change squashed. 
>
> The "preparatory" patch sounds good! 
> Would you be OK if I send a v9 with this change?

That would be more than OK.  I think we are pretty much ready to hit
'next'.

Thanks.


Re: [PATCH 22/25] diff.c: color moved lines differently

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 10:54 AM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> +static int next_byte(const char **cp, const char **endp,
>> +  const struct diff_options *diffopt)
>> +{
>> + int retval;
>> +
>> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) {
>> + while (*endp > *cp && isspace(**endp))
>> + (*endp)--;
>> + }
>
> This should be done by the callers (both moved_entry_cmp() and
> get_string_hash()) before starting to iterate over the bytes from
> the beginning, no?

Good point.

>> +
>> + retval = **cp;
>
> The char could be signed, and byte 0xff may become indistinguishable
> from the EOF (i.e. -1) you returned earlier.

Ah, I messed up there. I think EOF is wrong, too.
So maybe we'll just return 256 to indicate the end of memory chunk
to not have to deal with signedness

>> + if (ca != cb)
>> + return 1; /* differs */
>> + if (!ca)
>
> Shouldn't this check for "ca == -1", as we are not dealing with NUL
> terminated string but a  thing?

Yes, we'd check for the ending symbol instead of 0.


Re: [PATCHv2 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 10:39 AM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> diff --git a/patch-ids.c b/patch-ids.c
>> index 9c0ab9e67a..b9b2ebbad0 100644
>> --- a/patch-ids.c
>> +++ b/patch-ids.c
>> @@ -37,6 +37,7 @@ int commit_patch_id(struct commit *commit, struct 
>> diff_options *options,
>>   */
>>  static int patch_id_cmp(struct patch_id *a,
>>   struct patch_id *b,
>> + const void *unused_keydata,
>>   struct diff_options *opt)
>>  {
>>   if (is_null_oid(>patch_id) &&
>> @@ -57,7 +58,8 @@ int init_patch_ids(struct patch_ids *ids)
>>   ids->diffopts.detect_rename = 0;
>>   DIFF_OPT_SET(>diffopts, RECURSIVE);
>>   diff_setup_done(>diffopts);
>> - hashmap_init(>patches, (hashmap_cmp_fn)patch_id_cmp, 256);
>> + hashmap_init(>patches, (hashmap_cmp_fn)patch_id_cmp,
>> +  >diffopts, 256);
>>   return 0;
>>  }
>>
>> @@ -93,7 +95,7 @@ struct patch_id *has_commit_patch_id(struct commit *commit,
>>   if (init_patch_id_entry(, commit, ids))
>>   return NULL;
>>
>> - return hashmap_get(>patches, , >diffopts);
>> + return hashmap_get(>patches, , NULL);
>>  }

+cc Peff

>
> This actually makes me wonder if we can demonstrate an existing
> breakage in tests.  The old code used to pass NULL to the diffopts,
> causing it to be passed down through commit_patch_id() function down
> to diff_tree_oid() or diff_root_tree_oid().  When the codepath
> triggers the issue that Peff warned about in his old article (was it
> about rehashing or something?)  that makes two entries compared
> (i.e. not using keydata, because we are not comparing an existing
> entry with a key and data only to see if that already exists in the
> hashmap), wouldn't that cause ll_diff_tree_oid() that is called from
> diff_tree_oid() to dereference NULL?
>
> Thanks.

I am at a loss here after re-reading your answer over and over,
but I think you are asking if patch_id_cmp can break, as
we have a callchain like

  patch_id_cmp
commit_patch_id
  (diff_root_tree_oid)
diff_tree_oid
  ll_diff_tree_oid

passing diff_options down there, and patch_id_cmp may have
gotten NULL.

The answer is no, it was safe. (by accident?)

That is because we never use hashmap_get_next
on the hashmap that uses patch_id_cmp as the compare
function.

hashmap_get_next is the only function that does not pass
on a keydata, any other has valid caller provided keydata.

Thanks,
Stefan


Re: [PATCH] merge-recursive: use DIFF_XDL_SET macro

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> Instead of implementing this on our own, just use a convenience macro.
>
> Signed-off-by: Stefan Beller 
> ---

Good eyes.  Thanks.

>  merge-recursive.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/merge-recursive.c b/merge-recursive.c
> index 59e5ee41a8..1494ffdb82 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -2210,11 +2210,11 @@ int parse_merge_opt(struct merge_options *o, const 
> char *s)
>   o->xdl_opts |= value;
>   }
>   else if (!strcmp(s, "ignore-space-change"))
> - o->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE;
> + DIFF_XDL_SET(o, IGNORE_WHITESPACE_CHANGE);
>   else if (!strcmp(s, "ignore-all-space"))
> - o->xdl_opts |= XDF_IGNORE_WHITESPACE;
> + DIFF_XDL_SET(o, IGNORE_WHITESPACE);
>   else if (!strcmp(s, "ignore-space-at-eol"))
> - o->xdl_opts |= XDF_IGNORE_WHITESPACE_AT_EOL;
> + DIFF_XDL_SET(o, IGNORE_WHITESPACE_AT_EOL);
>   else if (!strcmp(s, "renormalize"))
>   o->renormalize = 1;
>   else if (!strcmp(s, "no-renormalize"))


Re: [PATCH 22/25] diff.c: color moved lines differently

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> +static int next_byte(const char **cp, const char **endp,
> +  const struct diff_options *diffopt)
> +{
> + int retval;
> +
> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) {
> + while (*endp > *cp && isspace(**endp))
> + (*endp)--;
> + }

This should be done by the callers (both moved_entry_cmp() and
get_string_hash()) before starting to iterate over the bytes from
the beginning, no?

> + if (*cp > *endp)
> + return -1;
> +
> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) {
> + while (*cp < *endp && isspace(**cp))
> + (*cp)++;
> + /*
> +  * After skipping a couple of whitespaces, we still have to
> +  * account for one space.
> +  */
> + return (int)' ';
> + }
> +
> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) {
> + while (*cp < *endp && isspace(**cp))
> + (*cp)++;
> + /* return the first non-ws character via the usual below */
> + }
> +
> + retval = **cp;

The char could be signed, and byte 0xff may become indistinguishable
from the EOF (i.e. -1) you returned earlier.

> + (*cp)++; /* advance */
> + return retval;
> +}
> +
> +static int moved_entry_cmp(const struct moved_entry *a,
> +const struct moved_entry *b,
> +const void *keydata,
> +const void *data)
> +{
> + const struct diff_options *diffopt = data;
> + const char *ap = a->es->line, *ae = a->es->line + a->es->len;
> + const char *bp = b->es->line, *be = b->es->line + b->es->len;
> +
> + if (!(diffopt->xdl_opts & XDF_WHITESPACE_FLAGS))
> + return a->es->len != b->es->len  || memcmp(ap, bp, a->es->len);
> +
> + while (1) {
> + int ca, cb;
> + ca = next_byte(, , diffopt);
> + cb = next_byte(, , diffopt);
> + if (ca != cb)
> + return 1; /* differs */
> + if (!ca)

Shouldn't this check for "ca == -1", as we are not dealing with NUL
terminated string but a  thing?

> + return 0;
> + };
> +}
> +
> +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct 
> diff_options *o)
> +{
> + if (o->xdl_opts & XDF_WHITESPACE_FLAGS) {
> + static struct strbuf sb = STRBUF_INIT;
> + const char *ap = es->line, *ae = es->line + es->len;
> + int c;
> +
> + strbuf_reset();
> + while ((c = next_byte(, , o)) > 0)
> + strbuf_addch(, c);
> +
> + return memhash(sb.buf, sb.len);
> + } else {
> + return memhash(es->line, es->len);
> + }
> +}


Re: [PATCHv2 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 10:34 AM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> When using the hashmap a common need is to have access to arbitrary data
>> in the compare function. A couple of times we abuse the keydata field
>> to pass in the data needed. This happens for example in patch-ids.c.
>
> It is not "arbitrary data"; it is very important to streess that we
> are not just passing random crud, but adding a mechanism to
> tailor/curry the function in a way that is fixed throughout the
> lifetime of a hashmap.

Ah yes, I forgot to fix patch1, when spending all time on the docs in p2.

>
>> diff --git a/hashmap.h b/hashmap.h
>> index de6022a3a9..1c26bbad5b 100644
>> --- a/hashmap.h
>> +++ b/hashmap.h
>> @@ -33,11 +33,12 @@ struct hashmap_entry {
>>  };
>>
>>  typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key,
>> - const void *keydata);
>> + const void *keydata, const void *cbdata);
>
> As I view the new "data" thing the C's (read: poor-man's) way to
> cutomize the function, I would have tweaked the function signature
> by giving the customization data at the front, i.e.
>
> fn(fndata, entry, entry_or_key, keydata);
>
> That would hopefully make it more obvious that the new thing is
> pairs with fn, not with other arguments (and entry-or-key and
> keydata pairs, instead of three old arguments standing as equals).

Ok, let me redo the patch to have fndata at the front.

Looking at other places (that have a similar mechanism mechanically,
but are semantically different), such as the callback functions for
the diff machinery, we have the user provided pointer at the end
of the list. But that is because the diff_options are the objects that
should be in front row (they are bound to the callback more than
some caller needed data).

typedef void (*diff_format_fn_t)(struct diff_queue_struct *q,
  struct diff_options *options, void *data);

typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options
*opt, void *data);

Thanks!

> As I think the way we wish to be able to express it in a better
> language would have been something like:
>
> (partial_apply(fn, fndata))(entry, entry_or_key, keydata)
>
> that order would match what is going on better.


Re: [PATCHv2 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> diff --git a/patch-ids.c b/patch-ids.c
> index 9c0ab9e67a..b9b2ebbad0 100644
> --- a/patch-ids.c
> +++ b/patch-ids.c
> @@ -37,6 +37,7 @@ int commit_patch_id(struct commit *commit, struct 
> diff_options *options,
>   */
>  static int patch_id_cmp(struct patch_id *a,
>   struct patch_id *b,
> + const void *unused_keydata,
>   struct diff_options *opt)
>  {
>   if (is_null_oid(>patch_id) &&
> @@ -57,7 +58,8 @@ int init_patch_ids(struct patch_ids *ids)
>   ids->diffopts.detect_rename = 0;
>   DIFF_OPT_SET(>diffopts, RECURSIVE);
>   diff_setup_done(>diffopts);
> - hashmap_init(>patches, (hashmap_cmp_fn)patch_id_cmp, 256);
> + hashmap_init(>patches, (hashmap_cmp_fn)patch_id_cmp,
> +  >diffopts, 256);
>   return 0;
>  }
>  
> @@ -93,7 +95,7 @@ struct patch_id *has_commit_patch_id(struct commit *commit,
>   if (init_patch_id_entry(, commit, ids))
>   return NULL;
>  
> - return hashmap_get(>patches, , >diffopts);
> + return hashmap_get(>patches, , NULL);
>  }

This actually makes me wonder if we can demonstrate an existing
breakage in tests.  The old code used to pass NULL to the diffopts,
causing it to be passed down through commit_patch_id() function down
to diff_tree_oid() or diff_root_tree_oid().  When the codepath
triggers the issue that Peff warned about in his old article (was it
about rehashing or something?) that makes two entries compared
(i.e. not using keydata, because we are not comparing an existing
entry with a key and data only to see if that already exists in the
hashmap), wouldn't that cause ll_diff_tree_oid() that is called from
diff_tree_oid() to dereference NULL?

Thanks.


Re: [PATCHv2 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> When using the hashmap a common need is to have access to arbitrary data
> in the compare function. A couple of times we abuse the keydata field
> to pass in the data needed. This happens for example in patch-ids.c.

It is not "arbitrary data"; it is very important to streess that we
are not just passing random crud, but adding a mechanism to
tailor/curry the function in a way that is fixed throughout the
lifetime of a hashmap.

> diff --git a/hashmap.h b/hashmap.h
> index de6022a3a9..1c26bbad5b 100644
> --- a/hashmap.h
> +++ b/hashmap.h
> @@ -33,11 +33,12 @@ struct hashmap_entry {
>  };
>  
>  typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key,
> - const void *keydata);
> + const void *keydata, const void *cbdata);

As I view the new "data" thing the C's (read: poor-man's) way to
cutomize the function, I would have tweaked the function signature
by giving the customization data at the front, i.e.

fn(fndata, entry, entry_or_key, keydata);

That would hopefully make it more obvious that the new thing is
pairs with fn, not with other arguments (and entry-or-key and
keydata pairs, instead of three old arguments standing as equals).

As I think the way we wish to be able to express it in a better
language would have been something like:

(partial_apply(fn, fndata))(entry, entry_or_key, keydata)

that order would match what is going on better.


Re: git log use of date format differs between Command Line and script usage.

2017-06-30 Thread Shaun Uldrikis
Yes. That is the case. Just confirmed it. I'll remove the old version.
Sorry to have bothered the mailing list.

Thank you.

-Shaun

On Fri, Jun 30, 2017 at 12:43 PM, Stefan Beller  wrote:
> On Fri, Jun 30, 2017 at 9:06 AM, Shaun Uldrikis  wrote:
>> If you supply a non-standard format to the date configuration for git
>> log, something like:
>> [log]
>> date = format:%Y-%m-%d %H:%M
>
> So I ran
>
> $ git config log.date  "format:%Y-%m-%d %H:%M"
> $ git config --list |grep log.date
> log.date=format:%Y-%m-%d %H:%M
>
> Then I have a script as
>   $ cat script.sh
>   #!/bin/sh
>
>   git log >out
>
> after executing I get:
>
>   $ head out
>   commit 7930db48ca31b41ac335ae8cd25cb29094d1de5e
>   Author: Stefan Beller 
>   Date:   2017-06-30 09:26
>
> Also gitk seems to work here.
>
> Rene's answer sounds reasonable,
> check the version(s) of Git on your system?


Re: [PATCH 1/2] hashmap.h: compare function has access to a data field

2017-06-30 Thread Junio C Hamano
Stefan Beller  writes:

> Yes it was a last minute squash before sending it out, as the fix was only
> two lines whereas the conversion is a lot. If it were separated I could have
> claimed the introduction to be a rather mechanical patch, but I did not
> make use of coccinelle or such, so the likelihood for errors is just as high.
>
> So I decided to squash them.

I somehow think that logic leads to a suboptimal workflow.  If they
were separated, somebody else could have done an independent
mechanical conversion to verify the result matches yours, which
would give us more confidence.  When such an independent mechanical
conversion does not match, we need one round-trip to ask you if it
was a misconversion or a manual tweak.

In any case, I think I've looked at it long enough to be reasonably
OK with the conversion result myself, so let's move it forward.  Of
course I welcome independent eyeballing by others.

Thanks.


Re: [PATCH v2 5/6] grep: remove regflags from the public grep_opt API

2017-06-30 Thread Junio C Hamano
Ævar Arnfjörð Bjarmason   writes:

> @@ -169,6 +167,24 @@ void grep_init(struct grep_opt *opt, const char *prefix)
>  
>  static void grep_set_pattern_type_option(enum grep_pattern_type 
> pattern_type, struct grep_opt *opt)
>  {
> + /*
> +  * When committing to the pattern type by setting the relevant
> +  * fields in grep_opt it's generally not necessary to zero out
> +  * the fields we're not choosing, since they won't have been
> +  * set by anything. The extended_regexp_option field is the
> +  * only exception to this.
> +  *
> +  * This is because in the process of parsing grep.patternType
> +  * & grep.extendedRegexp we set opt->pattern_type_option and
> +  * opt->extended_regexp_option, respectively. We then
> +  * internally use opt->extended_regexp_option to see if we're
> +  * compiling an ERE. It must be unset if that's not actually
> +  * the case.
> +  */
> + if (pattern_type != GREP_PATTERN_TYPE_ERE &&
> + opt->extended_regexp_option)
> + opt->extended_regexp_option = 0;

Good to have the reasoning in an in-code comment like the above.
But after reading these two paragraphs and then before reading the
three line code, a more natural embodiment in the code of the
commentary that came to my mind was

if (pattern_type != GREP_PATTERN_TYPE_ERE)
opt->extended_regexp_option = 0;

The end-result is the same as yours, of course, but I somehow found
it match the reasoning better.

Now, I wonder if this can further be tweaked to

opt->extended_regexp_option = (pattern_type == GREP_PATTERN_TYPE_ERE);

which might lead us in a direction to really unify the two related
fields extended_regexp_option and pattern_type_option.

Even if that were a good longer term direction to go in, it is
outside the scope of this step, of course.  I am merely bringing it
up as an conversation item ;-).



Re: [PATCH] hooks: add signature to the top of the commit message

2017-06-30 Thread Junio C Hamano
Kaartic Sivaraam  writes:

> The sample hook to prepare the commit message before
> a commit allows users to opt-in to add the signature
> to the commit message. The signature is added at a place
> that isn't consistent with the "-s" option of "git commit".
> Further, it could go out of view in certain cases.
>
> Add the signature to the top of the commit message as it's
> more appropriate and consistent with the "-s" option of git
> commit.
>
> Signed-off-by: Kaartic Sivaraam 
> ---
>  The change might seem to be bit of an hack, but it seems
>  worth it (at least to me). I hope I haven't used any
>  commands that aren't portable.

It does look like a hack.  I was wondering if "interpret-trailers"
is mature enough and can be used for this by now.  Also the big
comment before these examples say that this one you are updating is
"rarely a good idea", though.

By the way, the one that is still actually enabled is no longer
needed.  The commit template generated internally was corrected some
time ago not to add the "Conflicts:" section without commenting it
out.

Have you tried "merge", "cherry-pick" and "commit --amend" with this
patch on (meaning, with the "add sob at the top" logic in your actual
hook that is enabled in your repository)?

>  templates/hooks--prepare-commit-msg.sample | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/templates/hooks--prepare-commit-msg.sample 
> b/templates/hooks--prepare-commit-msg.sample
> index 86b8f227e..3c8f5a53d 100755
> --- a/templates/hooks--prepare-commit-msg.sample
> +++ b/templates/hooks--prepare-commit-msg.sample
> @@ -33,4 +33,7 @@ case "$2,$3" in
>  esac
>  
>  # SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: 
> \1/p')
> -# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
> +# SOB_TO_ADD='
> +#
> +# '$SOB
> +# grep -qs "^$SOB" "$1" || (echo "$SOB_TO_ADD" | cat - "$1" >temp-template 
> && mv temp-template "$1")


Re: git log use of date format differs between Command Line and script usage.

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 9:06 AM, Shaun Uldrikis  wrote:
> If you supply a non-standard format to the date configuration for git
> log, something like:
> [log]
> date = format:%Y-%m-%d %H:%M

So I ran

$ git config log.date  "format:%Y-%m-%d %H:%M"
$ git config --list |grep log.date
log.date=format:%Y-%m-%d %H:%M

Then I have a script as
  $ cat script.sh
  #!/bin/sh

  git log >out

after executing I get:

  $ head out
  commit 7930db48ca31b41ac335ae8cd25cb29094d1de5e
  Author: Stefan Beller 
  Date:   2017-06-30 09:26

Also gitk seems to work here.

Rene's answer sounds reasonable,
check the version(s) of Git on your system?


Re: git log use of date format differs between Command Line and script usage.

2017-06-30 Thread René Scharfe

Am 30.06.2017 um 18:06 schrieb Shaun Uldrikis:

If you supply a non-standard format to the date configuration for git
log, something like:
[log]
 date = format:%Y-%m-%d %H:%M

then, when you run 'git log' inside a script, or when using gitk
(anywhere), it fails on decoding the format.

fatal: unknown date format format: %Y-%m-%d %H:%M


However, that format works correctly on the command line.
I do not have a patch to address this issue.


I guess you have two versions of git on your system, and the one used in
scripts is older than 2.6.0, which introduced this feature.

René


[PATCH] status: suppress additional warning output in plumbing modes

2017-06-30 Thread Stefan Beller
When status is called with '--porcelain' (as implied by '-z'), we promise
to output only messages as described in the man page.

Suppress CRLF warnings.

Signed-off-by: Stefan Beller 
---

Maybe something like this?

 builtin/commit.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/builtin/commit.c b/builtin/commit.c
index 00a01f07c3..3705d5ec6f 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1126,6 +1126,11 @@ static void finalize_deferred_config(struct wt_status *s)
die(_("--long and -z are incompatible"));
}
 
+   /* suppress all additional output in porcelain mode */
+   if (status_format == STATUS_FORMAT_PORCELAIN ||
+   status_format == STATUS_FORMAT_PORCELAIN_V2)
+   safe_crlf = SAFE_CRLF_FALSE;
+
if (use_deferred_config && status_format == STATUS_FORMAT_UNSPECIFIED)
status_format = status_deferred_config.status_format;
if (status_format == STATUS_FORMAT_UNSPECIFIED)
-- 
2.13.0.31.g9b732c453e



git log use of date format differs between Command Line and script usage.

2017-06-30 Thread Shaun Uldrikis
If you supply a non-standard format to the date configuration for git
log, something like:
[log]
date = format:%Y-%m-%d %H:%M

then, when you run 'git log' inside a script, or when using gitk
(anywhere), it fails on decoding the format.

fatal: unknown date format format: %Y-%m-%d %H:%M


However, that format works correctly on the command line.
I do not have a patch to address this issue.

-Shaun


Re: [PATCH 25/25] diff: document the new --color-moved setting

2017-06-30 Thread Stefan Beller
On Fri, Jun 30, 2017 at 12:26 AM, Simon Ruderich  wrote:
> On Thu, Jun 29, 2017 at 05:07:10PM -0700, Stefan Beller wrote:
>> + Small blocks of 3 moved lines or fewer are skipped.
>
> If I read the commit messages correctly, this "skipping" process
> applies to the move detection in general for those smaller blocks
> and therefore doesn't mean a malicious move can hide smaller
> changes, correct? If so, I find this sentence misleading. Maybe
> something like:
>
> Small blocks of 3 moved lines or fewer are excluded from move
> detection and colored as regular diff.

Well, this reads as if "blocks of 3 lines" are excluded, but what I
mean is "if all adjacent blocks combined are 3 lines or fewer"

Example of how I understand the code:

context
  + moved line, block A
  + moved line, block A
  + moved line, block B
  + moved line, block A
  + moved line, block A
context

These five lines are colored, because 5>3, but each
individual block is smaller than 3 lines. However we
already want to tell the reviewer that the middle line is not part of
a contiguous 5 line block, so we have to use alternative color
in the middle.

However

context
  + moved line, block A or B
  + moved line, block A or B
context

is omitted, because the number of lines
here is fewer than 3 ignoring the block
type.

Maybe

  If there are fewer than 3 adjacent lines of
  moved code, they are skipped.

Thanks,
Stefan

>
> Regards
> Simon
> --
> + privacy is necessary
> + using gnupg http://gnupg.org
> + public key id: 0x92FEFDB7E44C32F9


Re: [PATCH] git-p4: parse marshal output "p4 -G" in p4 changes

2017-06-30 Thread Miguel Torroja
On Fri, Jun 30, 2017 at 12:13 PM, Lars Schneider
 wrote:
>
>> On 30 Jun 2017, at 11:41, Miguel Torroja  wrote:
>>
>> On Fri, Jun 30, 2017 at 10:26 AM, Lars Schneider
>>  wrote:
>>>
 On 30 Jun 2017, at 00:46, miguel torroja  wrote:

 The option -G of p4 (python marshal output) gives more context about the
 data being output. That's useful when using the command "change -o" as
 we can distinguish between warning/error line and real change description.

 Some p4 triggers in the server side generate some warnings when
 executed. Unfortunately those messages are mixed with the output of
 "p4 change -o". Those extra warning lines are reported as {'code':'info'}
 in python marshal output (-G). The real change output is reported as
 {'code':'stat'}

 A new test has been created to t9807-git-p4-submit.sh adding a p4 trigger
 that outputs extra lines with "p4 change -o" and "p4 changes"

 Signed-off-by: Miguel Torroja 
 Signed-off-by: Junio C Hamano 
 ---
 ...
>>>
>>> I have never worked with p4 triggers and that might be
>>> the reason why I don't understand your test case.
>>> Maybe you can help me?
>>>
 +test_expect_success 'description with extra lines from verbose p4 
 trigger' '
 + test_when_finished cleanup_git &&
 + git p4 clone --dest="$git" //depot &&
 + (
 + p4 triggers -i <<-EOF
 + Triggers: p4triggertest-command command pre-user-change 
 "echo verbose trigger"
 + EOF
 + ) &&
>>>
>>> You clone the test repo and install a trigger.
>>>
 + (
 + cd "$git" &&
 + git config git-p4.skipSubmitEdit true &&
 + echo file20 >file20 &&
 + git add file20 &&
 + git commit -m file20 &&
 + git p4 submit
 + ) &&
>>>
>>> You make a new commit. This should run the "echo verbose trigger", right?
>>
>> Yes, that's correct. In this case the trigger is run with p4 change
>> and p4 changes
>>
>>>
 + (
 + p4 triggers -i <<-EOF
 + Triggers:
 + EOF
 + ) &&
>>>
>>> You delete the trigger.
>>>
 + (
 + cd "$cli" &&
 + test_path_is_file file20
 + )
>>>
>>> You check that the file20 is available in P4.
>>>
>>>
>>> What would happen if I run this test case without your patch?
>>> Wouldn't it pass just fine?
>>
>> If you run it without the patch for git-p4.py, the test doesn't pass
>
> You are right. I did not run "make" properly before running the test :)
>
>
>>> Wouldn't we need to check that no warning/error is in the
>>> real change description?
>>>
>>
>> that can also be added, something like this: 'p4 change -o | grep
>> "verbose trigger"' after setting the trigger?
>
> Yeah, maybe. I hope this is no stupid question, but: If you clone the
> repo with git-p4 *again* ... would you see the "verbose trigger" output
> in the Git commit message?
>

The commands that are affected are the ones that don't use the -G
option, as everything is sent to the standard output without being
able to filter out what is the real contents or just info messages.
That's not the case with the python output (-G). Having said that... I
tried what you just said (just to be sure) and the function
p4_last_change fails... as it expects the first dictionary returned by
p4CmdList is the one that contains the change:
"int(results[0]['change'])" and that's not the case as it's an info
entry (no 'change' key, that's in the next entry...)  I'll update with
new patches

I didn't notice that before because the P4 server we have in our
office only outputs extra info messages with the command "p4 change".


> - Lars


[PATCH] hooks: add signature to the top of the commit message

2017-06-30 Thread Kaartic Sivaraam
The sample hook to prepare the commit message before
a commit allows users to opt-in to add the signature
to the commit message. The signature is added at a place
that isn't consistent with the "-s" option of "git commit".
Further, it could go out of view in certain cases.

Add the signature to the top of the commit message as it's
more appropriate and consistent with the "-s" option of git
commit.

Signed-off-by: Kaartic Sivaraam 
---
 The change might seem to be bit of an hack, but it seems
 worth it (at least to me). I hope I haven't used any
 commands that aren't portable.

 templates/hooks--prepare-commit-msg.sample | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/templates/hooks--prepare-commit-msg.sample 
b/templates/hooks--prepare-commit-msg.sample
index 86b8f227e..3c8f5a53d 100755
--- a/templates/hooks--prepare-commit-msg.sample
+++ b/templates/hooks--prepare-commit-msg.sample
@@ -33,4 +33,7 @@ case "$2,$3" in
 esac
 
 # SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
-# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
+# SOB_TO_ADD='
+#
+# '$SOB
+# grep -qs "^$SOB" "$1" || (echo "$SOB_TO_ADD" | cat - "$1" >temp-template && 
mv temp-template "$1")
-- 
2.11.0



  1   2   >