[PATCH v2 09/14] bisect.c: use commit-slab for commit weight instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 bisect.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/bisect.c b/bisect.c
index a579b50884..6de1abd407 100644
--- a/bisect.c
+++ b/bisect.c
@@ -12,6 +12,7 @@
 #include "bisect.h"
 #include "sha1-array.h"
 #include "argv-array.h"
+#include "commit-slab.h"
 
 static struct oid_array good_revs;
 static struct oid_array skipped_revs;
@@ -70,16 +71,19 @@ static void clear_distance(struct commit_list *list)
}
 }
 
+define_commit_slab(commit_weight, int *);
+static struct commit_weight commit_weight;
+
 #define DEBUG_BISECT 0
 
 static inline int weight(struct commit_list *elem)
 {
-   return *((int*)(elem->item->util));
+   return **commit_weight_at(_weight, elem->item);
 }
 
 static inline void weight_set(struct commit_list *elem, int weight)
 {
-   *((int*)(elem->item->util)) = weight;
+   **commit_weight_at(_weight, elem->item) = weight;
 }
 
 static int count_interesting_parents(struct commit *commit)
@@ -265,7 +269,7 @@ static struct commit_list *do_find_bisection(struct 
commit_list *list,
struct commit *commit = p->item;
unsigned flags = commit->object.flags;
 
-   p->item->util = [n++];
+   *commit_weight_at(_weight, p->item) = [n++];
switch (count_interesting_parents(commit)) {
case 0:
if (!(flags & TREESAME)) {
@@ -372,6 +376,7 @@ void find_bisection(struct commit_list **commit_list, int 
*reaches,
int *weights;
 
show_list("bisection 2 entry", 0, 0, *commit_list);
+   init_commit_weight(_weight);
 
/*
 * Count the number of total and tree-changing items on the
@@ -412,6 +417,7 @@ void find_bisection(struct commit_list **commit_list, int 
*reaches,
}
free(weights);
*commit_list = best;
+   clear_commit_weight(_weight);
 }
 
 static int register_ref(const char *refname, const struct object_id *oid,
-- 
2.17.0.705.g3525833791



[PATCH v2 04/14] describe: use commit-slab for commit names instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/describe.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index b5afc45846..1b6ca42553 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -15,9 +15,12 @@
 #include "run-command.h"
 #include "revision.h"
 #include "list-objects.h"
+#include "commit-slab.h"
 
 #define MAX_TAGS   (FLAG_BITS - 1)
 
+define_commit_slab(commit_names, struct commit_name *);
+
 static const char * const describe_usage[] = {
N_("git describe [] [...]"),
N_("git describe [] --dirty"),
@@ -37,6 +40,7 @@ static struct string_list patterns = STRING_LIST_INIT_NODUP;
 static struct string_list exclude_patterns = STRING_LIST_INIT_NODUP;
 static int always;
 static const char *suffix, *dirty, *broken;
+static struct commit_names commit_names;
 
 /* diff-index command arguments to check if working tree is dirty. */
 static const char *diff_index_args[] = {
@@ -321,11 +325,14 @@ static void describe_commit(struct object_id *oid, struct 
strbuf *dst)
if (!have_util) {
struct hashmap_iter iter;
struct commit *c;
-   struct commit_name *n = hashmap_iter_first(, );
+   struct commit_name *n;
+
+   init_commit_names(_names);
+   n = hashmap_iter_first(, );
for (; n; n = hashmap_iter_next()) {
c = lookup_commit_reference_gently(>peeled, 1);
if (c)
-   c->util = n;
+   *commit_names_at(_names, c) = n;
}
have_util = 1;
}
@@ -336,8 +343,11 @@ static void describe_commit(struct object_id *oid, struct 
strbuf *dst)
while (list) {
struct commit *c = pop_commit();
struct commit_list *parents = c->parents;
+   struct commit_name **slot;
+
seen_commits++;
-   n = c->util;
+   slot = commit_names_peek(_names, c);
+   n = slot ? *slot : NULL;
if (n) {
if (!tags && !all && n->prio < 2) {
unannotated_cnt++;
-- 
2.17.0.705.g3525833791



[PATCH v2 06/14] sequencer.c: use commit-slab to mark seen commits

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 sequencer.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index 4ce5120e77..6b785c6c5f 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -23,6 +23,7 @@
 #include "hashmap.h"
 #include "notes-utils.h"
 #include "sigchain.h"
+#include "commit-slab.h"
 
 #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION"
 
@@ -3160,6 +3161,7 @@ static enum check_level 
get_missing_commit_check_level(void)
return CHECK_IGNORE;
 }
 
+define_commit_slab(commit_seen, uint8_t);
 /*
  * Check if the user dropped some commits by mistake
  * Behaviour determined by rebase.missingCommitsCheck.
@@ -3173,6 +3175,9 @@ int check_todo_list(void)
struct todo_list todo_list = TODO_LIST_INIT;
struct strbuf missing = STRBUF_INIT;
int advise_to_edit_todo = 0, res = 0, i;
+   struct commit_seen commit_seen;
+
+   init_commit_seen(_seen);
 
strbuf_addstr(_file, rebase_path_todo());
if (strbuf_read_file_or_whine(_list.buf, todo_file.buf) < 0) {
@@ -3189,7 +3194,7 @@ int check_todo_list(void)
for (i = 0; i < todo_list.nr; i++) {
struct commit *commit = todo_list.items[i].commit;
if (commit)
-   commit->util = (void *)1;
+   *commit_seen_at(_seen, commit) = 1;
}
 
todo_list_release(_list);
@@ -3205,11 +3210,11 @@ int check_todo_list(void)
for (i = todo_list.nr - 1; i >= 0; i--) {
struct todo_item *item = todo_list.items + i;
struct commit *commit = item->commit;
-   if (commit && !commit->util) {
+   if (commit && !*commit_seen_at(_seen, commit)) {
strbuf_addf(, " - %s %.*s\n",
short_commit_name(commit),
item->arg_len, item->arg);
-   commit->util = (void *)1;
+   *commit_seen_at(_seen, commit) = 1;
}
}
 
@@ -3235,6 +3240,7 @@ int check_todo_list(void)
"The possible behaviours are: ignore, warn, error.\n\n"));
 
 leave_check:
+   clear_commit_seen(_seen);
strbuf_release(_file);
todo_list_release(_list);
 
-- 
2.17.0.705.g3525833791



[PATCH v2 05/14] shallow.c: use commit-slab for commit depth instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

While at there, plug a leak for keeping track of depth in this code.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 shallow.c | 41 +
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/shallow.c b/shallow.c
index df4d44ea7a..daf60a9391 100644
--- a/shallow.c
+++ b/shallow.c
@@ -12,6 +12,7 @@
 #include "commit-slab.h"
 #include "revision.h"
 #include "list-objects.h"
+#include "commit-slab.h"
 
 static int is_shallow = -1;
 static struct stat_validity shallow_stat;
@@ -74,6 +75,11 @@ int is_repository_shallow(void)
return is_shallow;
 }
 
+/*
+ * TODO: use "int" elemtype instead of "int *" when/if commit-slab
+ * supports a "valid" flag.
+ */
+define_commit_slab(commit_depth, int *);
 struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag)
 {
@@ -82,25 +88,29 @@ struct commit_list *get_shallow_commits(struct object_array 
*heads, int depth,
struct object_array stack = OBJECT_ARRAY_INIT;
struct commit *commit = NULL;
struct commit_graft *graft;
+   struct commit_depth depths;
 
+   init_commit_depth();
while (commit || i < heads->nr || stack.nr) {
struct commit_list *p;
if (!commit) {
if (i < heads->nr) {
+   int **depth_slot;
commit = (struct commit *)
deref_tag(heads->objects[i++].item, 
NULL, 0);
if (!commit || commit->object.type != 
OBJ_COMMIT) {
commit = NULL;
continue;
}
-   if (!commit->util)
-   commit->util = xmalloc(sizeof(int));
-   *(int *)commit->util = 0;
+   depth_slot = commit_depth_at(, commit);
+   if (!*depth_slot)
+   *depth_slot = xmalloc(sizeof(int));
+   **depth_slot = 0;
cur_depth = 0;
} else {
commit = (struct commit *)
object_array_pop();
-   cur_depth = *(int *)commit->util;
+   cur_depth = **commit_depth_peek(, 
commit);
}
}
parse_commit_or_die(commit);
@@ -116,25 +126,32 @@ struct commit_list *get_shallow_commits(struct 
object_array *heads, int depth,
}
commit->object.flags |= not_shallow_flag;
for (p = commit->parents, commit = NULL; p; p = p->next) {
-   if (!p->item->util) {
-   int *pointer = xmalloc(sizeof(int));
-   p->item->util = pointer;
-   *pointer =  cur_depth;
+   int **depth_slot = commit_depth_at(, p->item);
+   if (!*depth_slot) {
+   *depth_slot = xmalloc(sizeof(int));
+   **depth_slot = cur_depth;
} else {
-   int *pointer = p->item->util;
-   if (cur_depth >= *pointer)
+   if (cur_depth >= **depth_slot)
continue;
-   *pointer = cur_depth;
+   **depth_slot = cur_depth;
}
if (p->next)
add_object_array(>item->object,
NULL, );
else {
commit = p->item;
-   cur_depth = *(int *)commit->util;
+   depth_slot = commit_depth_peek(, commit);
+   cur_depth = **depth_slot;
}
}
}
+   for (i = 0; i < depths.slab_count; i++) {
+   int j;
+
+   for (j = 0; j < depths.slab_size; j++)
+   free(depths.slab[i][j]);
+   }
+   clear_commit_depth();
 
return result;
 }
-- 
2.17.0.705.g3525833791



[PATCH v2 12/14] log: use commit-slab in prepare_bases() instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/log.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/builtin/log.c b/builtin/log.c
index d017e57475..967fbc5caa 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -28,6 +28,7 @@
 #include "mailmap.h"
 #include "gpg-interface.h"
 #include "progress.h"
+#include "commit-slab.h"
 
 #define MAIL_DEFAULT_WRAP 72
 
@@ -1340,6 +1341,8 @@ static struct commit *get_base_commit(const char 
*base_commit,
return base;
 }
 
+define_commit_slab(commit_base, int);
+
 static void prepare_bases(struct base_tree_info *bases,
  struct commit *base,
  struct commit **list,
@@ -1348,11 +1351,13 @@ static void prepare_bases(struct base_tree_info *bases,
struct commit *commit;
struct rev_info revs;
struct diff_options diffopt;
+   struct commit_base commit_base;
int i;
 
if (!base)
return;
 
+   init_commit_base(_base);
diff_setup();
diffopt.flags.recursive = 1;
diff_setup_done();
@@ -1365,7 +1370,7 @@ static void prepare_bases(struct base_tree_info *bases,
for (i = 0; i < total; i++) {
list[i]->object.flags &= ~UNINTERESTING;
add_pending_object(, [i]->object, "rev_list");
-   list[i]->util = (void *)1;
+   *commit_base_at(_base, list[i]) = 1;
}
base->object.flags |= UNINTERESTING;
add_pending_object(, >object, "base");
@@ -1379,7 +1384,7 @@ static void prepare_bases(struct base_tree_info *bases,
while ((commit = get_revision()) != NULL) {
struct object_id oid;
struct object_id *patch_id;
-   if (commit->util)
+   if (*commit_base_at(_base, commit))
continue;
if (commit_patch_id(commit, , , 0))
die(_("cannot get patch id"));
@@ -1388,6 +1393,7 @@ static void prepare_bases(struct base_tree_info *bases,
oidcpy(patch_id, );
bases->nr_patch_id++;
}
+   clear_commit_base(_base);
 }
 
 static void print_bases(struct base_tree_info *bases, FILE *file)
-- 
2.17.0.705.g3525833791



[PATCH v2 10/14] name-rev: use commit-slab for rev-name instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/name-rev.c | 23 ---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 387ddf85d2..0eb440359d 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -6,6 +6,7 @@
 #include "refs.h"
 #include "parse-options.h"
 #include "sha1-lookup.h"
+#include "commit-slab.h"
 
 #define CUTOFF_DATE_SLOP 86400 /* one day */
 
@@ -17,11 +18,26 @@ typedef struct rev_name {
int from_tag;
 } rev_name;
 
+define_commit_slab(commit_rev_name, struct rev_name *);
+
 static timestamp_t cutoff = TIME_MAX;
+static struct commit_rev_name rev_names;
 
 /* How many generations are maximally preferred over _one_ merge traversal? */
 #define MERGE_TRAVERSAL_WEIGHT 65535
 
+static struct rev_name *get_commit_rev_name(struct commit *commit)
+{
+   struct rev_name **slot = commit_rev_name_peek(_names, commit);
+
+   return slot ? *slot : NULL;
+}
+
+static void set_commit_rev_name(struct commit *commit, struct rev_name *name)
+{
+   *commit_rev_name_at(_names, commit) = name;
+}
+
 static int is_better_name(struct rev_name *name,
  const char *tip_name,
  timestamp_t taggerdate,
@@ -65,7 +81,7 @@ static void name_rev(struct commit *commit,
int generation, int distance, int from_tag,
int deref)
 {
-   struct rev_name *name = (struct rev_name *)commit->util;
+   struct rev_name *name = get_commit_rev_name(commit);
struct commit_list *parents;
int parent_number = 1;
char *to_free = NULL;
@@ -84,7 +100,7 @@ static void name_rev(struct commit *commit,
 
if (name == NULL) {
name = xmalloc(sizeof(rev_name));
-   commit->util = name;
+   set_commit_rev_name(commit, name);
goto copy_data;
} else if (is_better_name(name, tip_name, taggerdate,
  generation, distance, from_tag)) {
@@ -296,7 +312,7 @@ static const char *get_rev_name(const struct object *o, 
struct strbuf *buf)
if (o->type != OBJ_COMMIT)
return get_exact_ref_match(o);
c = (struct commit *) o;
-   n = c->util;
+   n = get_commit_rev_name(c);
if (!n)
return NULL;
 
@@ -413,6 +429,7 @@ int cmd_name_rev(int argc, const char **argv, const char 
*prefix)
OPT_END(),
};
 
+   init_commit_rev_name(_names);
git_config(git_default_config, NULL);
argc = parse_options(argc, argv, prefix, opts, name_rev_usage, 0);
if (all + transform_stdin + !!argc > 1) {
-- 
2.17.0.705.g3525833791



[PATCH v2 01/14] commit-slab.h: code split

2018-05-12 Thread Nguyễn Thái Ngọc Duy
The struct declaration and implementation macros are moved to
commit-slab-hdr.h and commit-slab-impl.h respectively. This right now
is not needed for current users but if we share a commit-slab for
multiple files, we need something better than the current structure.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 commit-slab-hdr.h  |  30 
 commit-slab-impl.h |  91 +++
 commit-slab.h  | 115 +++--
 3 files changed, 127 insertions(+), 109 deletions(-)
 create mode 100644 commit-slab-hdr.h
 create mode 100644 commit-slab-impl.h

diff --git a/commit-slab-hdr.h b/commit-slab-hdr.h
new file mode 100644
index 00..fb5220fb7d
--- /dev/null
+++ b/commit-slab-hdr.h
@@ -0,0 +1,30 @@
+#ifndef COMMIT_SLAB_HDR_H
+#define COMMIT_SLAB_HDR_H
+
+/* allocate ~512kB at once, allowing for malloc overhead */
+#ifndef COMMIT_SLAB_SIZE
+#define COMMIT_SLAB_SIZE (512*1024-32)
+#endif
+
+#define declare_commit_slab(slabname, elemtype)\
+   \
+struct slabname {  \
+   unsigned slab_size; \
+   unsigned stride;\
+   unsigned slab_count;\
+   elemtype **slab;\
+}
+
+/*
+ * Statically initialize a commit slab named "var". Note that this
+ * evaluates "stride" multiple times! Example:
+ *
+ *   struct indegree indegrees = COMMIT_SLAB_INIT(1, indegrees);
+ *
+ */
+#define COMMIT_SLAB_INIT(stride, var) { \
+   COMMIT_SLAB_SIZE / sizeof(**((var).slab)) / (stride), \
+   (stride), 0, NULL \
+}
+
+#endif /* COMMIT_SLAB_HDR_H */
diff --git a/commit-slab-impl.h b/commit-slab-impl.h
new file mode 100644
index 00..234d9ee5f0
--- /dev/null
+++ b/commit-slab-impl.h
@@ -0,0 +1,91 @@
+#ifndef COMMIT_SLAB_IMPL_H
+#define COMMIT_SLAB_IMPL_H
+
+#define MAYBE_UNUSED __attribute__((__unused__))
+
+#define implement_commit_slab(slabname, elemtype)  \
+   \
+static int stat_ ##slabname## realloc; \
+   \
+static MAYBE_UNUSED void init_ ##slabname## _with_stride(struct slabname *s, \
+  unsigned stride) \
+{  \
+   unsigned int elem_size; \
+   if (!stride)\
+   stride = 1; \
+   s->stride = stride; \
+   elem_size = sizeof(elemtype) * stride;  \
+   s->slab_size = COMMIT_SLAB_SIZE / elem_size;\
+   s->slab_count = 0;  \
+   s->slab = NULL; \
+}  \
+   \
+static MAYBE_UNUSED void init_ ##slabname(struct slabname *s)  \
+{  \
+   init_ ##slabname## _with_stride(s, 1);  \
+}  \
+   \
+static MAYBE_UNUSED void clear_ ##slabname(struct slabname *s) \
+{  \
+   unsigned int i; \
+   for (i = 0; i < s->slab_count; i++) \
+   free(s->slab[i]);   \
+   s->slab_count = 0;  \
+   FREE_AND_NULL(s->slab); \
+}  \
+   \
+static MAYBE_UNUSED elemtype *slabname## _at_peek(struct slabname *s,  \
+ const struct commit *c, \
+ int add_if_missing)   \
+{  \
+   unsigned int nth_slab, nth_slot;\
+   \
+   nth_slab = c->index / s->slab_size; \
+   nth_slot = c->index % 

[PATCH v2 00/14] Die commit->util, die!

2018-05-12 Thread Nguyễn Thái Ngọc Duy
v2 is mostly refinements with a big change: commit-slab.h is
restructured to allow sharing commit slabs. Other changes are

- rename struct source_slab to revision_sources
- keep revision_sources_* functinons (and one static variable) to
  revision.c instead of duplicating them whenver revision.h is
  included.
- reduce elemtype in commit_seen in sequencer.c from 4 bytes to 1.
- avoid dereferencing _peek in sequencer.c

Interdiff

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 092e29583e..b08e5ea0e3 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -39,7 +39,7 @@ static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
 static struct refspec *refspecs;
 static int refspecs_nr;
 static int anonymize;
-static struct source_slab source_slab;
+static struct revision_sources revision_sources;
 
 static int parse_opt_signed_tag_mode(const struct option *opt,
 const char *arg, int unset)
@@ -592,7 +592,7 @@ static void handle_commit(struct commit *commit, struct 
rev_info *rev,
if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
export_blob(_queued_diff.queue[i]->two->oid);
 
-   refname = *source_slab_peek(_slab, commit);
+   refname = *revision_sources_peek(_sources, commit);
if (anonymize) {
refname = anonymize_refname(refname);
anonymize_ident_line(, _end);
@@ -864,11 +864,11 @@ static void get_tags_and_duplicates(struct 
rev_cmdline_info *info)
 * This ref will not be updated through a commit, lets make
 * sure it gets properly updated eventually.
 */
-   if (*source_slab_at(_slab, commit) ||
+   if (*revision_sources_at(_sources, commit) ||
commit->object.flags & SHOWN)
string_list_append(_refs, full_name)->util = 
commit;
-   if (!*source_slab_at(_slab, commit))
-   *source_slab_at(_slab, commit) = full_name;
+   if (!*revision_sources_at(_sources, commit))
+   *revision_sources_at(_sources, commit) = 
full_name;
}
 }
 
@@ -1032,9 +1032,9 @@ int cmd_fast_export(int argc, const char **argv, const 
char *prefix)
git_config(git_default_config, NULL);
 
init_revisions(, prefix);
-   init_source_slab(_slab);
+   init_revision_sources(_sources);
revs.topo_order = 1;
-   revs.source_slab = _slab;
+   revs.sources = _sources;
revs.rewrite_parents = 1;
argc = parse_options(argc, argv, prefix, options, fast_export_usage,
PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
diff --git a/builtin/log.c b/builtin/log.c
index b771d27164..967fbc5caa 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -149,7 +149,7 @@ static void cmd_log_init_finish(int argc, const char 
**argv, const char *prefix,
static struct string_list decorate_refs_include = 
STRING_LIST_INIT_NODUP;
struct decoration_filter decoration_filter = {_refs_include,
  _refs_exclude};
-   static struct source_slab source_slab;
+   static struct revision_sources revision_sources;
 
const struct option builtin_log_options[] = {
OPT__QUIET(, N_("suppress diff output")),
@@ -197,8 +197,8 @@ static void cmd_log_init_finish(int argc, const char 
**argv, const char *prefix,
rev->always_show_header = 0;
 
if (source) {
-   init_source_slab(_slab);
-   rev->source_slab = _slab;
+   init_revision_sources(_sources);
+   rev->sources = _sources;
}
 
if (mailmap) {
diff --git a/commit-slab-hdr.h b/commit-slab-hdr.h
new file mode 100644
index 00..adc7b46c83
--- /dev/null
+++ b/commit-slab-hdr.h
@@ -0,0 +1,43 @@
+#ifndef COMMIT_SLAB_HDR_H
+#define COMMIT_SLAB_HDR_H
+
+/* allocate ~512kB at once, allowing for malloc overhead */
+#ifndef COMMIT_SLAB_SIZE
+#define COMMIT_SLAB_SIZE (512*1024-32)
+#endif
+
+#define declare_commit_slab(slabname, elemtype)\
+   \
+struct slabname {  \
+   unsigned slab_size; \
+   unsigned stride;\
+   unsigned slab_count;\
+   elemtype **slab;\
+}
+
+/*
+ * Statically initialize a commit slab named "var". Note that this
+ * evaluates "stride" multiple times! Example:
+ *
+ *   struct indegree indegrees = COMMIT_SLAB_INIT(1, indegrees);
+ *
+ */
+#define COMMIT_SLAB_INIT(stride, var) { \
+   COMMIT_SLAB_SIZE / sizeof(**((var).slab)) / (stride), \
+   (stride), 0, NULL \
+}
+
+#define 

[PATCH v2 11/14] show-branch: use commit-slab for commit-name instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/show-branch.c | 39 +++
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/builtin/show-branch.c b/builtin/show-branch.c
index 6c2148b71d..29d15d16d2 100644
--- a/builtin/show-branch.c
+++ b/builtin/show-branch.c
@@ -7,6 +7,7 @@
 #include "argv-array.h"
 #include "parse-options.h"
 #include "dir.h"
+#include "commit-slab.h"
 
 static const char* show_branch_usage[] = {
 N_("git show-branch [-a | --all] [-r | --remotes] [--topo-order | 
--date-order]\n"
@@ -59,15 +60,27 @@ struct commit_name {
int generation; /* how many parents away from head_name */
 };
 
+define_commit_slab(commit_name_slab, struct commit_name *);
+static struct commit_name_slab name_slab;
+
+static struct commit_name *commit_to_name(struct commit *commit)
+{
+   return *commit_name_slab_at(_slab, commit);
+}
+
+
 /* Name the commit as nth generation ancestor of head_name;
  * we count only the first-parent relationship for naming purposes.
  */
 static void name_commit(struct commit *commit, const char *head_name, int nth)
 {
struct commit_name *name;
-   if (!commit->util)
-   commit->util = xmalloc(sizeof(struct commit_name));
-   name = commit->util;
+
+   name = *commit_name_slab_at(_slab, commit);
+   if (!name) {
+   name = xmalloc(sizeof(*name));
+   *commit_name_slab_at(_slab, commit) = name;
+   }
name->head_name = head_name;
name->generation = nth;
 }
@@ -79,8 +92,8 @@ static void name_commit(struct commit *commit, const char 
*head_name, int nth)
  */
 static void name_parent(struct commit *commit, struct commit *parent)
 {
-   struct commit_name *commit_name = commit->util;
-   struct commit_name *parent_name = parent->util;
+   struct commit_name *commit_name = commit_to_name(commit);
+   struct commit_name *parent_name = commit_to_name(parent);
if (!commit_name)
return;
if (!parent_name ||
@@ -94,12 +107,12 @@ static int name_first_parent_chain(struct commit *c)
int i = 0;
while (c) {
struct commit *p;
-   if (!c->util)
+   if (!commit_to_name(c))
break;
if (!c->parents)
break;
p = c->parents->item;
-   if (!p->util) {
+   if (!commit_to_name(p)) {
name_parent(c, p);
i++;
}
@@ -122,7 +135,7 @@ static void name_commits(struct commit_list *list,
/* First give names to the given heads */
for (cl = list; cl; cl = cl->next) {
c = cl->item;
-   if (c->util)
+   if (commit_to_name(c))
continue;
for (i = 0; i < num_rev; i++) {
if (rev[i] == c) {
@@ -148,9 +161,9 @@ static void name_commits(struct commit_list *list,
struct commit_name *n;
int nth;
c = cl->item;
-   if (!c->util)
+   if (!commit_to_name(c))
continue;
-   n = c->util;
+   n = commit_to_name(c);
parents = c->parents;
nth = 0;
while (parents) {
@@ -158,7 +171,7 @@ static void name_commits(struct commit_list *list,
struct strbuf newname = STRBUF_INIT;
parents = parents->next;
nth++;
-   if (p->util)
+   if (commit_to_name(p))
continue;
switch (n->generation) {
case 0:
@@ -271,7 +284,7 @@ static void show_one_commit(struct commit *commit, int 
no_name)
 {
struct strbuf pretty = STRBUF_INIT;
const char *pretty_str = "(unavailable)";
-   struct commit_name *name = commit->util;
+   struct commit_name *name = commit_to_name(commit);
 
if (commit->object.parsed) {
pp_commit_easy(CMIT_FMT_ONELINE, commit, );
@@ -660,6 +673,8 @@ int cmd_show_branch(int ac, const char **av, const char 
*prefix)
OPT_END()
};
 
+   init_commit_name_slab(_slab);
+
git_config(git_show_branch_config, NULL);
 
/* If nothing is specified, try the default first */
-- 
2.17.0.705.g3525833791



[PATCH v2 03/14] blame: use commit-slab for blame suspects instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 blame.c | 42 +++---
 blame.h |  2 ++
 builtin/blame.c |  2 +-
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/blame.c b/blame.c
index 78c9808bd1..18e8bd996a 100644
--- a/blame.c
+++ b/blame.c
@@ -6,6 +6,24 @@
 #include "diffcore.h"
 #include "tag.h"
 #include "blame.h"
+#include "commit-slab.h"
+
+define_commit_slab(blame_suspects, struct blame_origin *);
+static struct blame_suspects blame_suspects;
+
+struct blame_origin *get_blame_suspects(struct commit *commit)
+{
+   struct blame_origin **result;
+
+   result = blame_suspects_peek(_suspects, commit);
+
+   return result ? *result : NULL;
+}
+
+static void set_blame_suspects(struct commit *commit, struct blame_origin 
*origin)
+{
+   *blame_suspects_at(_suspects, commit) = origin;
+}
 
 void blame_origin_decref(struct blame_origin *o)
 {
@@ -15,12 +33,12 @@ void blame_origin_decref(struct blame_origin *o)
blame_origin_decref(o->previous);
free(o->file.ptr);
/* Should be present exactly once in commit chain */
-   for (p = o->commit->util; p; l = p, p = p->next) {
+   for (p = get_blame_suspects(o->commit); p; l = p, p = p->next) {
if (p == o) {
if (l)
l->next = p->next;
else
-   o->commit->util = p->next;
+   set_blame_suspects(o->commit, p->next);
free(o);
return;
}
@@ -41,8 +59,8 @@ static struct blame_origin *make_origin(struct commit 
*commit, const char *path)
FLEX_ALLOC_STR(o, path, path);
o->commit = commit;
o->refcnt = 1;
-   o->next = commit->util;
-   commit->util = o;
+   o->next = get_blame_suspects(commit);
+   set_blame_suspects(commit, o);
return o;
 }
 
@@ -54,13 +72,13 @@ static struct blame_origin *get_origin(struct commit 
*commit, const char *path)
 {
struct blame_origin *o, *l;
 
-   for (o = commit->util, l = NULL; o; l = o, o = o->next) {
+   for (o = get_blame_suspects(commit), l = NULL; o; l = o, o = o->next) {
if (!strcmp(o->path, path)) {
/* bump to front */
if (l) {
l->next = o->next;
-   o->next = commit->util;
-   commit->util = o;
+   o->next = get_blame_suspects(commit);
+   set_blame_suspects(commit, o);
}
return blame_origin_incref(o);
}
@@ -478,7 +496,7 @@ static void queue_blames(struct blame_scoreboard *sb, 
struct blame_origin *porig
porigin->suspects = blame_merge(porigin->suspects, sorted);
else {
struct blame_origin *o;
-   for (o = porigin->commit->util; o; o = o->next) {
+   for (o = get_blame_suspects(porigin->commit); o; o = o->next) {
if (o->suspects) {
porigin->suspects = sorted;
return;
@@ -525,7 +543,7 @@ static struct blame_origin *find_origin(struct commit 
*parent,
const char *paths[2];
 
/* First check any existing origins */
-   for (porigin = parent->util; porigin; porigin = porigin->next)
+   for (porigin = get_blame_suspects(parent); porigin; porigin = 
porigin->next)
if (!strcmp(porigin->path, origin->path)) {
/*
 * The same path between origin and its parent
@@ -1550,7 +1568,7 @@ void assign_blame(struct blame_scoreboard *sb, int opt)
 
while (commit) {
struct blame_entry *ent;
-   struct blame_origin *suspect = commit->util;
+   struct blame_origin *suspect = get_blame_suspects(commit);
 
/* find one suspect to break down */
while (suspect && !suspect->suspects)
@@ -1752,6 +1770,8 @@ void setup_scoreboard(struct blame_scoreboard *sb, const 
char *path, struct blam
struct commit *final_commit = NULL;
enum object_type type;
 
+   init_blame_suspects(_suspects);
+
if (sb->reverse && sb->contents_from)
die(_("--contents and --reverse do not blend well."));
 
@@ -1815,7 +1835,7 @@ void setup_scoreboard(struct blame_scoreboard *sb, const 
char *path, struct blam
}
 
if (is_null_oid(>final->object.oid)) {
-   o = sb->final->util;
+   o = 

[PATCH v2 07/14] sequencer.c: use commit-slab to associate todo items to commits

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 sequencer.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index 6b785c6c5f..dd4993fd99 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -3362,6 +3362,8 @@ static int subject2item_cmp(const void *fndata,
return key ? strcmp(a->subject, key) : strcmp(a->subject, b->subject);
 }
 
+define_commit_slab(commit_todo_item, struct todo_item *);
+
 /*
  * Rearrange the todo list that has both "pick commit-id msg" and "pick
  * commit-id fixup!/squash! msg" in it so that the latter is put immediately
@@ -3378,6 +3380,7 @@ int rearrange_squash(void)
struct hashmap subject2item;
int res = 0, rearranged = 0, *next, *tail, i;
char **subjects;
+   struct commit_todo_item commit_todo;
 
if (strbuf_read_file_or_whine(_list.buf, todo_file) < 0)
return -1;
@@ -3386,6 +3389,7 @@ int rearrange_squash(void)
return -1;
}
 
+   init_commit_todo_item(_todo);
/*
 * The hashmap maps onelines to the respective todo list index.
 *
@@ -3416,10 +3420,11 @@ int rearrange_squash(void)
 
if (is_fixup(item->command)) {
todo_list_release(_list);
+   clear_commit_todo_item(_todo);
return error(_("the script was already rearranged."));
}
 
-   item->commit->util = item;
+   *commit_todo_item_at(_todo, item->commit) = item;
 
parse_commit(item->commit);
commit_buffer = get_commit_buffer(item->commit, NULL);
@@ -3446,9 +3451,9 @@ int rearrange_squash(void)
else if (!strchr(p, ' ') &&
 (commit2 =
  lookup_commit_reference_by_name(p)) &&
-commit2->util)
+*commit_todo_item_at(_todo, commit2))
/* found by commit name */
-   i2 = (struct todo_item *)commit2->util
+   i2 = *commit_todo_item_at(_todo, commit2)
- todo_list.items;
else {
/* copy can be a prefix of the commit subject */
@@ -3527,5 +3532,6 @@ int rearrange_squash(void)
hashmap_free(, 1);
todo_list_release(_list);
 
+   clear_commit_todo_item(_todo);
return res;
 }
-- 
2.17.0.705.g3525833791



[PATCH v2 02/14] commit-slab: support shared commit-slab

2018-05-12 Thread Nguyễn Thái Ngọc Duy
define_shared_commit_slab() could be used in a header file to define a
commit-slab. One of these C files must include commit-slab-impl.h and
"call" implement_shared_commit_slab().

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 commit-slab-hdr.h  | 13 +
 commit-slab-impl.h | 20 +---
 commit-slab.h  |  2 +-
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/commit-slab-hdr.h b/commit-slab-hdr.h
index fb5220fb7d..adc7b46c83 100644
--- a/commit-slab-hdr.h
+++ b/commit-slab-hdr.h
@@ -27,4 +27,17 @@ struct slabname {
\
(stride), 0, NULL \
 }
 
+#define declare_commit_slab_prototypes(slabname, elemtype) \
+   \
+void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
+void init_ ##slabname(struct slabname *s); \
+void clear_ ##slabname(struct slabname *s);\
+elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int 
add_if_missing); \
+elemtype *slabname## _at(struct slabname *s, const struct commit *c);  \
+elemtype *slabname## _peek(struct slabname *s, const struct commit *c)
+
+#define define_shared_commit_slab(slabname, elemtype) \
+   declare_commit_slab(slabname, elemtype); \
+   declare_commit_slab_prototypes(slabname, elemtype)
+
 #endif /* COMMIT_SLAB_HDR_H */
diff --git a/commit-slab-impl.h b/commit-slab-impl.h
index 234d9ee5f0..19a88d7d8f 100644
--- a/commit-slab-impl.h
+++ b/commit-slab-impl.h
@@ -3,11 +3,17 @@
 
 #define MAYBE_UNUSED __attribute__((__unused__))
 
-#define implement_commit_slab(slabname, elemtype)  \
+#define implement_static_commit_slab(slabname, elemtype) \
+   implement_commit_slab(slabname, elemtype, static MAYBE_UNUSED)
+
+#define implement_shared_commit_slab(slabname, elemtype) \
+   implement_commit_slab(slabname, elemtype, )
+
+#define implement_commit_slab(slabname, elemtype, scope)   \
\
 static int stat_ ##slabname## realloc; \
\
-static MAYBE_UNUSED void init_ ##slabname## _with_stride(struct slabname *s, \
+scope void init_ ##slabname## _with_stride(struct slabname *s, \
   unsigned stride) \
 {  \
unsigned int elem_size; \
@@ -20,12 +26,12 @@ static MAYBE_UNUSED void init_ ##slabname## 
_with_stride(struct slabname *s, \
s->slab = NULL; \
 }  \
\
-static MAYBE_UNUSED void init_ ##slabname(struct slabname *s)  \
+scope void init_ ##slabname(struct slabname *s)
\
 {  \
init_ ##slabname## _with_stride(s, 1);  \
 }  \
\
-static MAYBE_UNUSED void clear_ ##slabname(struct slabname *s) \
+scope void clear_ ##slabname(struct slabname *s)   \
 {  \
unsigned int i; \
for (i = 0; i < s->slab_count; i++) \
@@ -34,7 +40,7 @@ static MAYBE_UNUSED void clear_ ##slabname(struct slabname 
*s)\
FREE_AND_NULL(s->slab); \
 }  \
\
-static MAYBE_UNUSED elemtype *slabname## _at_peek(struct slabname *s,  \
+scope elemtype *slabname## _at_peek(struct slabname *s,
\
  const struct commit *c, \
  int add_if_missing)   \
 {  \
@@ -62,13 +68,13 @@ static MAYBE_UNUSED elemtype *slabname## _at_peek(struct 
slabname *s,   \
return >slab[nth_slab][nth_slot * s->stride];\
 }  \
\
-static MAYBE_UNUSED elemtype *slabname## _at(struct slabname *s,   \
+scope elemtype *slabname## _at(struct slabname *s,  

[PATCH v2 08/14] revision.c: use commit-slab for show_source

2018-05-12 Thread Nguyễn Thái Ngọc Duy
Instead of relying on commit->util to store the source string, let the
user provide a commit-slab to store the source strings in.

It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/fast-export.c | 14 +-
 builtin/log.c |  7 +--
 log-tree.c|  8 ++--
 revision.c| 19 +++
 revision.h|  5 -
 5 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 530df12f05..b08e5ea0e3 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -21,6 +21,7 @@
 #include "quote.h"
 #include "remote.h"
 #include "blob.h"
+#include "commit-slab.h"
 
 static const char *fast_export_usage[] = {
N_("git fast-export [rev-list-opts]"),
@@ -38,6 +39,7 @@ static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
 static struct refspec *refspecs;
 static int refspecs_nr;
 static int anonymize;
+static struct revision_sources revision_sources;
 
 static int parse_opt_signed_tag_mode(const struct option *opt,
 const char *arg, int unset)
@@ -590,7 +592,7 @@ static void handle_commit(struct commit *commit, struct 
rev_info *rev,
if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
export_blob(_queued_diff.queue[i]->two->oid);
 
-   refname = commit->util;
+   refname = *revision_sources_peek(_sources, commit);
if (anonymize) {
refname = anonymize_refname(refname);
anonymize_ident_line(, _end);
@@ -862,10 +864,11 @@ static void get_tags_and_duplicates(struct 
rev_cmdline_info *info)
 * This ref will not be updated through a commit, lets make
 * sure it gets properly updated eventually.
 */
-   if (commit->util || commit->object.flags & SHOWN)
+   if (*revision_sources_at(_sources, commit) ||
+   commit->object.flags & SHOWN)
string_list_append(_refs, full_name)->util = 
commit;
-   if (!commit->util)
-   commit->util = full_name;
+   if (!*revision_sources_at(_sources, commit))
+   *revision_sources_at(_sources, commit) = 
full_name;
}
 }
 
@@ -1029,8 +1032,9 @@ int cmd_fast_export(int argc, const char **argv, const 
char *prefix)
git_config(git_default_config, NULL);
 
init_revisions(, prefix);
+   init_revision_sources(_sources);
revs.topo_order = 1;
-   revs.show_source = 1;
+   revs.sources = _sources;
revs.rewrite_parents = 1;
argc = parse_options(argc, argv, prefix, options, fast_export_usage,
PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
diff --git a/builtin/log.c b/builtin/log.c
index 71f68a3e4f..d017e57475 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -148,6 +148,7 @@ static void cmd_log_init_finish(int argc, const char 
**argv, const char *prefix,
static struct string_list decorate_refs_include = 
STRING_LIST_INIT_NODUP;
struct decoration_filter decoration_filter = {_refs_include,
  _refs_exclude};
+   static struct revision_sources revision_sources;
 
const struct option builtin_log_options[] = {
OPT__QUIET(, N_("suppress diff output")),
@@ -194,8 +195,10 @@ static void cmd_log_init_finish(int argc, const char 
**argv, const char *prefix,
rev->diffopt.filter || rev->diffopt.flags.follow_renames)
rev->always_show_header = 0;
 
-   if (source)
-   rev->show_source = 1;
+   if (source) {
+   init_revision_sources(_sources);
+   rev->sources = _sources;
+   }
 
if (mailmap) {
rev->mailmap = xcalloc(1, sizeof(struct string_list));
diff --git a/log-tree.c b/log-tree.c
index d1c0bedf24..0b41ee3235 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -295,8 +295,12 @@ void show_decorations(struct rev_info *opt, struct commit 
*commit)
 {
struct strbuf sb = STRBUF_INIT;
 
-   if (opt->show_source && commit->util)
-   fprintf(opt->diffopt.file, "\t%s", (char *) commit->util);
+   if (opt->sources) {
+   char **slot = revision_sources_peek(opt->sources, commit);
+
+   if (slot && *slot)
+   fprintf(opt->diffopt.file, "\t%s", *slot);
+   }
if (!opt->show_decorations)
return;
format_decorations(, commit, opt->diffopt.use_color);
diff --git a/revision.c b/revision.c
index 1cff11833e..be8fe7d67b 100644
--- a/revision.c
+++ b/revision.c
@@ -29,6 +29,8 @@ volatile show_early_output_fn_t show_early_output;
 static const char *term_bad;
 static const char *term_good;
 

[PATCH v2 14/14] commit.h: delete 'util' field in struct commit

2018-05-12 Thread Nguyễn Thái Ngọc Duy
If you have come this far, you probably have seen that this 'util'
pointer is used for many different purposes. Some are not even
contained in a command code, but buried deep in common code with no
clue who will use it and how.

The move to using commit-slab gives us a much better picture of how
some piece of data is associated with a commit and what for. Since
nobody uses 'util' pointer anymore, we can retire so that nobody will
abuse it again. commit-slab will be the way forward for associating
data to a commit.

As a side benefit, this shrinks struct commit by 8 bytes (on 64-bit
architecture) which should help reduce memory usage for reachability
test a bit. This is also what commit-slab is invented for [1].

[1] 96c4f4a370 (commit: allow associating auxiliary info on-demand -
2013-04-09)

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 commit.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/commit.h b/commit.h
index 838f6a6b26..70371e111e 100644
--- a/commit.h
+++ b/commit.h
@@ -18,12 +18,16 @@ struct commit_list {
 
 struct commit {
struct object object;
-   void *util;
unsigned int index;
timestamp_t date;
struct commit_list *parents;
struct tree *tree;
uint32_t graph_pos;
+   /*
+* Do not add more fields here unless it's _very_ often
+* used. Use commit-slab to associate more data with a commit
+* instead.
+*/
 };
 
 extern int save_commit_buffer;
-- 
2.17.0.705.g3525833791



[PATCH v2 13/14] merge: use commit-slab in merge remote desc instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/merge.c   | 25 +
 commit.c  | 12 ++--
 commit.h  |  2 +-
 merge-recursive.c |  8 +---
 4 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 9db5a2cf16..fc55bc264b 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -443,6 +443,7 @@ static void merge_name(const char *remote, struct strbuf 
*msg)
struct object_id branch_head;
struct strbuf buf = STRBUF_INIT;
struct strbuf bname = STRBUF_INIT;
+   struct merge_remote_desc *desc;
const char *ptr;
char *found_ref;
int len, early;
@@ -515,16 +516,13 @@ static void merge_name(const char *remote, struct strbuf 
*msg)
strbuf_release();
}
 
-   if (remote_head->util) {
-   struct merge_remote_desc *desc;
-   desc = merge_remote_util(remote_head);
-   if (desc && desc->obj && desc->obj->type == OBJ_TAG) {
-   strbuf_addf(msg, "%s\t\t%s '%s'\n",
-   oid_to_hex(>obj->oid),
-   type_name(desc->obj->type),
-   remote);
-   goto cleanup;
-   }
+   desc = merge_remote_util(remote_head);
+   if (desc && desc->obj && desc->obj->type == OBJ_TAG) {
+   strbuf_addf(msg, "%s\t\t%s '%s'\n",
+   oid_to_hex(>obj->oid),
+   type_name(desc->obj->type),
+   remote);
+   goto cleanup;
}
 
strbuf_addf(msg, "%s\t\tcommit '%s'\n",
@@ -932,8 +930,11 @@ static void write_merge_heads(struct commit_list 
*remoteheads)
for (j = remoteheads; j; j = j->next) {
struct object_id *oid;
struct commit *c = j->item;
-   if (c->util && merge_remote_util(c)->obj) {
-   oid = _remote_util(c)->obj->oid;
+   struct merge_remote_desc *desc;
+
+   desc = merge_remote_util(c);
+   if (desc && desc->obj) {
+   oid = >obj->oid;
} else {
oid = >object.oid;
}
diff --git a/commit.c b/commit.c
index 57049118a5..8202067cd5 100644
--- a/commit.c
+++ b/commit.c
@@ -1574,13 +1574,21 @@ int commit_tree_extended(const char *msg, size_t 
msg_len,
return result;
 }
 
+define_commit_slab(merge_desc_slab, struct merge_remote_desc *);
+struct merge_desc_slab merge_desc_slab = COMMIT_SLAB_INIT(1, merge_desc_slab);
+
+struct merge_remote_desc *merge_remote_util(struct commit *commit)
+{
+   return *merge_desc_slab_at(_desc_slab, commit);
+}
+
 void set_merge_remote_desc(struct commit *commit,
   const char *name, struct object *obj)
 {
struct merge_remote_desc *desc;
FLEX_ALLOC_STR(desc, name, name);
desc->obj = obj;
-   commit->util = desc;
+   *merge_desc_slab_at(_desc_slab, commit) = desc;
 }
 
 struct commit *get_merge_parent(const char *name)
@@ -1592,7 +1600,7 @@ struct commit *get_merge_parent(const char *name)
return NULL;
obj = parse_object();
commit = (struct commit *)peel_to_type(name, 0, obj, OBJ_COMMIT);
-   if (commit && !commit->util)
+   if (commit && !merge_remote_util(commit))
set_merge_remote_desc(commit, name, obj);
return commit;
 }
diff --git a/commit.h b/commit.h
index e57ae4b583..838f6a6b26 100644
--- a/commit.h
+++ b/commit.h
@@ -303,7 +303,7 @@ struct merge_remote_desc {
struct object *obj; /* the named object, could be a tag */
char name[FLEX_ARRAY];
 };
-#define merge_remote_util(commit) ((struct merge_remote_desc 
*)((commit)->util))
+extern struct merge_remote_desc *merge_remote_util(struct commit *);
 extern void set_merge_remote_desc(struct commit *commit,
  const char *name, struct object *obj);
 
diff --git a/merge-recursive.c b/merge-recursive.c
index 0c0d48624d..5537f01f8e 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -223,10 +223,12 @@ static void output(struct merge_options *o, int v, const 
char *fmt, ...)
 
 static void output_commit_title(struct merge_options *o, struct commit *commit)
 {
+   struct merge_remote_desc *desc;
+
strbuf_addchars(>obuf, ' ', o->call_depth * 2);
-   if (commit->util)
-   strbuf_addf(>obuf, "virtual %s\n",
-   merge_remote_util(commit)->name);
+   desc = merge_remote_util(commit);
+   if (desc)
+   strbuf_addf(>obuf, "virtual %s\n", desc->name);
else {
strbuf_add_unique_abbrev(>obuf, >object.oid,
 

Re: [PATCH 00/12] Die commit->util, die!

2018-05-12 Thread Duy Nguyen
On Sat, May 12, 2018 at 8:50 PM, Jakub Narebski  wrote:
> I just wonder if most of those transformation could not be done with
> Coccinelle, instead of doing it by hand.

I doubt coccinelle is smart enough to figure out the convoluted use of
'util' pointer (but then I'm not a heavy coccinelle user). I actually
got it wrong a couple times and had to rely on the test suite to guide
me.
-- 
Duy


[PATCH v2 16/28] t4008: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4008-diff-break-rewrite.sh | 59 +++
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/t/t4008-diff-break-rewrite.sh b/t/t4008-diff-break-rewrite.sh
index 9dd1bc5e16..b1ccd4102e 100755
--- a/t/t4008-diff-break-rewrite.sh
+++ b/t/t4008-diff-break-rewrite.sh
@@ -27,29 +27,32 @@ Further, with -B and -M together, these should turn into 
two renames.
 test_expect_success setup '
cat "$TEST_DIRECTORY"/diff-lib/README >file0 &&
cat "$TEST_DIRECTORY"/diff-lib/COPYING >file1 &&
+   blob0_id=$(git hash-object file0) &&
+   blob1_id=$(git hash-object file1) &&
git update-index --add file0 file1 &&
git tag reference $(git write-tree)
 '
 
 test_expect_success 'change file1 with copy-edit of file0 and remove file0' '
sed -e "s/git/GIT/" file0 >file1 &&
+   blob2_id=$(git hash-object file1) &&
rm -f file0 &&
git update-index --remove file0 file1
 '
 
 test_expect_success 'run diff with -B (#1)' '
git diff-index -B --cached reference >current &&
-   cat >expect <<-\EOF &&
-   :100644 00 548142c327a6790ff8821d67c2ee1eff7a656b52 
 D  file0
-   :100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 
2fbedd0b5d4b8126e4750c3bee305e8ff79f80ec M100   file1
+   cat >expect <<-EOF &&
+   :100644 00 $blob0_id $ZERO_OID Dfile0
+   :100644 100644 $blob1_id $blob2_id M100 file1
EOF
compare_diff_raw expect current
 '
 
 test_expect_success 'run diff with -B and -M (#2)' '
git diff-index -B -M reference >current &&
-   cat >expect <<-\EOF &&
-   :100644 100644 548142c327a6790ff8821d67c2ee1eff7a656b52 
2fbedd0b5d4b8126e4750c3bee305e8ff79f80ec R100   file0   file1
+   cat >expect <<-EOF &&
+   :100644 100644 $blob0_id $blob2_id R100 file0   file1
EOF
compare_diff_raw expect current
 '
@@ -66,18 +69,18 @@ test_expect_success 'swap file0 and file1' '
 
 test_expect_success 'run diff with -B (#3)' '
git diff-index -B reference >current &&
-   cat >expect <<-\EOF &&
-   :100644 100644 548142c327a6790ff8821d67c2ee1eff7a656b52 
6ff87c4664981e4397625791c8ea3bbb5f2279a3 M100   file0
-   :100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 
548142c327a6790ff8821d67c2ee1eff7a656b52 M100   file1
+   cat >expect <<-EOF &&
+   :100644 100644 $blob0_id $blob1_id M100 file0
+   :100644 100644 $blob1_id $blob0_id M100 file1
EOF
compare_diff_raw expect current
 '
 
 test_expect_success 'run diff with -B and -M (#4)' '
git diff-index -B -M reference >current &&
-   cat >expect <<-\EOF &&
-   :100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 
6ff87c4664981e4397625791c8ea3bbb5f2279a3 R100   file1   file0
-   :100644 100644 548142c327a6790ff8821d67c2ee1eff7a656b52 
548142c327a6790ff8821d67c2ee1eff7a656b52 R100   file0   file1
+   cat >expect <<-EOF &&
+   :100644 100644 $blob1_id $blob1_id R100 file1   file0
+   :100644 100644 $blob0_id $blob0_id R100 file0   file1
EOF
compare_diff_raw expect current
 '
@@ -85,14 +88,15 @@ test_expect_success 'run diff with -B and -M (#4)' '
 test_expect_success 'make file0 into something completely different' '
rm -f file0 &&
test_ln_s_add frotz file0 &&
+   slink_id=$(printf frotz | git hash-object --stdin) &&
git update-index file1
 '
 
 test_expect_success 'run diff with -B (#5)' '
git diff-index -B reference >current &&
-   cat >expect <<-\EOF &&
-   :100644 12 548142c327a6790ff8821d67c2ee1eff7a656b52 
67be421f88824578857624f7b3dc75e99a8a1481 T  file0
-   :100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 
548142c327a6790ff8821d67c2ee1eff7a656b52 M100   file1
+   cat >expect <<-EOF &&
+   :100644 12 $blob0_id $slink_id Tfile0
+   :100644 100644 $blob1_id $blob0_id M100 file1
EOF
compare_diff_raw expect current
 '
@@ -103,9 +107,9 @@ test_expect_success 'run diff with -B -M (#6)' '
# file0 changed from regular to symlink.  file1 is the same as the 
preimage
# of file0.  Because the change does not make file0 disappear, file1 is
# denoted as a copy of file0
-   cat >expect <<-\EOF &&
-   :100644 12 548142c327a6790ff8821d67c2ee1eff7a656b52 
67be421f88824578857624f7b3dc75e99a8a1481 T  file0
-   :100644 100644 548142c327a6790ff8821d67c2ee1eff7a656b52 
548142c327a6790ff8821d67c2ee1eff7a656b52 C  file0   file1
+   cat >expect <<-EOF &&
+   :100644 12 $blob0_id $slink_id Tfile0
+   :100644 100644 $blob0_id $blob0_id Cfile0   file1
EOF
compare_diff_raw expect current
 '
@@ -115,9 +119,9 @@ test_expect_success 'run diff with -M 

[PATCH v2 14/28] t3905: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t3905-stash-include-untracked.sh | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/t/t3905-stash-include-untracked.sh 
b/t/t3905-stash-include-untracked.sh
index 3ea5b9bb3f..597b0637d1 100755
--- a/t/t3905-stash-include-untracked.sh
+++ b/t/t3905-stash-include-untracked.sh
@@ -35,24 +35,26 @@ test_expect_success 'stash save --include-untracked cleaned 
the untracked files'
test_cmp expect actual
 '
 
+tracked=$(git rev-parse --short $(echo 1 | git hash-object --stdin))
+untracked=$(git rev-parse --short $(echo untracked | git hash-object --stdin))
 cat > expect.diff < expect <

[PATCH v2 25/28] t4042: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4042-diff-textconv-caching.sh | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
index 04a44d5c61..bf33aedf4b 100755
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -15,9 +15,13 @@ test_expect_success 'setup' '
echo bar content 1 >bar.bin &&
git add . &&
git commit -m one &&
+   foo1=$(git rev-parse --short HEAD:foo.bin) &&
+   bar1=$(git rev-parse --short HEAD:bar.bin) &&
echo foo content 2 >foo.bin &&
echo bar content 2 >bar.bin &&
git commit -a -m two &&
+   foo2=$(git rev-parse --short HEAD:foo.bin) &&
+   bar2=$(git rev-parse --short HEAD:bar.bin) &&
echo "*.bin diff=magic" >.gitattributes &&
git config diff.magic.textconv ./helper &&
git config diff.magic.cachetextconv true
@@ -25,14 +29,14 @@ test_expect_success 'setup' '
 
 cat >expect 

[PATCH v2 24/28] t4205: sort log output in a hash-independent way

2018-05-12 Thread brian m. carlson
This test enumerates log entries and then sorts them.  For SHA-1, this
produces results that happen to sort in the order specified in the test,
but for other hash algorithms they sort differently.  Ensure we sort the
log entries in a hash-independent way by sorting on the ref name instead
of the object ID.

Signed-off-by: brian m. carlson 
---
 t/t4205-log-pretty-formats.sh | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh
index 591f35daaf..2052cadb11 100755
--- a/t/t4205-log-pretty-formats.sh
+++ b/t/t4205-log-pretty-formats.sh
@@ -516,22 +516,22 @@ test_expect_success 'log decoration properly follows tag 
chain' '
git commit --amend -m shorter &&
git log --no-walk --tags --pretty="%H %d" --decorate=full >actual &&
cat <<-EOF >expected &&
-   $head1  (tag: refs/tags/tag2)
$head2  (tag: refs/tags/message-one)
$old_head1  (tag: refs/tags/message-two)
+   $head1  (tag: refs/tags/tag2)
EOF
-   sort actual >actual1 &&
+   sort -k3 actual >actual1 &&
test_cmp expected actual1
 '
 
 test_expect_success 'clean log decoration' '
git log --no-walk --tags --pretty="%H %D" --decorate=full >actual &&
cat >expected <<-EOF &&
-   $head1 tag: refs/tags/tag2
$head2 tag: refs/tags/message-one
$old_head1 tag: refs/tags/message-two
+   $head1 tag: refs/tags/tag2
EOF
-   sort actual >actual1 &&
+   sort -k3 actual >actual1 &&
test_cmp expected actual1
 '
 


[PATCH v2 23/28] t/lib-diff-alternative: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test code so that it computes variables for blobs instead of
using hard-coded hashes.  This makes t4033 and t4050 (the patience and
histogram tests) pass.

Signed-off-by: brian m. carlson 
---
 t/lib-diff-alternative.sh | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/t/lib-diff-alternative.sh b/t/lib-diff-alternative.sh
index 8b4dbf22d2..8d1e408bb5 100644
--- a/t/lib-diff-alternative.sh
+++ b/t/lib-diff-alternative.sh
@@ -59,9 +59,11 @@ int main(int argc, char **argv)
 }
 EOF
 
-   cat >expect <<\EOF
+   file1=$(git rev-parse --short $(git hash-object file1))
+   file2=$(git rev-parse --short $(git hash-object file2))
+   cat >expect expect <

[PATCH v2 17/28] t4014: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes values for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4014-format-patch.sh | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh
index dac3f349a3..42b3e11207 100755
--- a/t/t4014-format-patch.sh
+++ b/t/t4014-format-patch.sh
@@ -578,7 +578,9 @@ test_expect_success 'excessive subject' '
 
rm -rf patches/ &&
git checkout side &&
+   before=$(git rev-parse --short $(git hash-object file)) &&
for i in 5 6 1 2 3 A 4 B C 7 8 9 10 D E F; do echo "$i"; done >>file &&
+   after=$(git rev-parse --short $(git hash-object file)) &&
git update-index file &&
git commit -m "This is an excessively long subject line for a message 
due to the habit some projects have of not having a short, one-line subject at 
the start of the commit message, but rather sticking a whole paragraph right at 
the start as the only thing in the commit message. It had better not become the 
filename for the patch." &&
git format-patch -o patches/ master..side &&
@@ -586,7 +588,6 @@ test_expect_success 'excessive subject' '
 '
 
 test_expect_success 'cover-letter inherits diff options' '
-
git mv file foo &&
git commit -m foo &&
git format-patch --no-renames --cover-letter -1 &&
@@ -616,7 +617,7 @@ test_expect_success 'shortlog of cover-letter wraps 
overly-long onelines' '
 '
 
 cat > expect << EOF
-index 40f36c6..2dc5c23 100644
+index $before..$after 100644
 --- a/file
 +++ b/file
 @@ -13,4 +13,20 @@ C
@@ -640,7 +641,7 @@ test_expect_success 'format-patch respects -U' '
 cat > expect << EOF
 
 diff --git a/file b/file
-index 40f36c6..2dc5c23 100644
+index $before..$after 100644
 --- a/file
 +++ b/file
 @@ -14,3 +14,19 @@ C


[PATCH v2 13/28] t3702: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Strip out the index lines in the diff before comparing them, as these
will differ between hash algorithms.  This leads to a smaller, simpler
change than editing the index line.

Signed-off-by: brian m. carlson 
---
 t/t3702-add-edit.sh | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/t/t3702-add-edit.sh b/t/t3702-add-edit.sh
index 3cb74ca296..1be5cd5756 100755
--- a/t/t3702-add-edit.sh
+++ b/t/t3702-add-edit.sh
@@ -40,7 +40,6 @@ test_expect_success 'setup' '
 
 cat > expected-patch << EOF
 diff --git a/file b/file
-index b9834b5..9020acb 100644
 --- a/file
 +++ b/file
 @@ -1,11 +1,6 @@
@@ -80,7 +79,6 @@ EOF
 
 cat > expected << EOF
 diff --git a/file b/file
-index b9834b5..ef6e94c 100644
 --- a/file
 +++ b/file
 @@ -1,10 +1,12 @@
@@ -100,7 +98,7 @@ EOF
 
 echo "#!$SHELL_PATH" >fake-editor.sh
 cat >> fake-editor.sh <<\EOF
-mv -f "$1" orig-patch &&
+egrep -v '^index' "$1" >orig-patch &&
 mv -f patch "$1"
 EOF
 
@@ -113,7 +111,8 @@ test_expect_success 'add -e' '
git add -e &&
test_cmp second-part file &&
test_cmp orig-patch expected-patch &&
-   git diff --cached > out &&
+   git diff --cached >actual &&
+   egrep -v "^index " actual >out &&
test_cmp out expected
 
 '


[PATCH v2 27/28] t4208: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for object IDs instead of
using hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4208-log-magic-pathspec.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/t4208-log-magic-pathspec.sh b/t/t4208-log-magic-pathspec.sh
index a1705f70cf..62f335b2d9 100755
--- a/t/t4208-log-magic-pathspec.sh
+++ b/t/t4208-log-magic-pathspec.sh
@@ -45,8 +45,9 @@ test_expect_success 'git log -- :' '
 '
 
 test_expect_success 'git log HEAD -- :/' '
+   initial=$(git rev-parse --short HEAD^) &&
cat >expected <<-EOF &&
-   24b24cf initial
+   $initial initial
EOF
(cd sub && git log --oneline HEAD -- :/ >../actual) &&
test_cmp expected actual


[PATCH v2 18/28] t4020: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4020-diff-external.sh | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/t/t4020-diff-external.sh b/t/t4020-diff-external.sh
index 49d3f54b29..fd2140700e 100755
--- a/t/t4020-diff-external.sh
+++ b/t/t4020-diff-external.sh
@@ -13,6 +13,7 @@ test_expect_success setup '
 
test_tick &&
echo second >file &&
+   before=$(git rev-parse --short $(git hash-object file)) &&
git add file &&
git commit -m second &&
 
@@ -180,9 +181,12 @@ test_expect_success 'no diff with -diff' '
 echo NULZbetweenZwords | perl -pe 'y/Z/\000/' > file
 
 test_expect_success 'force diff with "diff"' '
+   after=$(git rev-parse --short $(git hash-object file)) &&
echo >.gitattributes "file diff" &&
git diff >actual &&
-   test_cmp "$TEST_DIRECTORY"/t4020/diff.NUL actual
+   sed -e "s/^index .*/index $before..$after 100644/" \
+   "$TEST_DIRECTORY"/t4020/diff.NUL >expected-diff &&
+   test_cmp expected-diff actual
 '
 
 test_expect_success 'GIT_EXTERNAL_DIFF with more than one changed files' '
@@ -237,7 +241,7 @@ test_expect_success 'diff --cached' '
git update-index --assume-unchanged file &&
echo second >file &&
git diff --cached >actual &&
-   test_cmp "$TEST_DIRECTORY"/t4020/diff.NUL actual
+   test_cmp expected-diff actual
 '
 
 test_expect_success 'clean up crlf leftovers' '


[PATCH v2 20/28] t4029: fix test indentation

2018-05-12 Thread brian m. carlson
We typically indent our tests with a single tab, partially so that we
can take advantage of indented heredocs.  Make this change and move the
quote marks to be in the typical position for our tests.

Signed-off-by: brian m. carlson 
---
 t/t4029-diff-trailing-space.sh | 33 -
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/t/t4029-diff-trailing-space.sh b/t/t4029-diff-trailing-space.sh
index 3ccc237a8d..f4e18cb8d3 100755
--- a/t/t4029-diff-trailing-space.sh
+++ b/t/t4029-diff-trailing-space.sh
@@ -18,22 +18,21 @@ index 5f6a263..8cb8bae 100644
 EOF
 exit 1
 
-test_expect_success \
-"$test_description" \
-'printf "\nx\n" > f &&
- git add f &&
- git commit -q -m. f &&
- printf "\ny\n" > f &&
- git config --bool diff.suppressBlankEmpty true &&
- git diff f > actual &&
- test_cmp exp actual &&
- perl -i.bak -p -e "s/^\$/ /" exp &&
- git config --bool diff.suppressBlankEmpty false &&
- git diff f > actual &&
- test_cmp exp actual &&
- git config --bool --unset diff.suppressBlankEmpty &&
- git diff f > actual &&
- test_cmp exp actual
- '
+test_expect_success "$test_description" '
+   printf "\nx\n" > f &&
+   git add f &&
+   git commit -q -m. f &&
+   printf "\ny\n" > f &&
+   git config --bool diff.suppressBlankEmpty true &&
+   git diff f > actual &&
+   test_cmp exp actual &&
+   perl -i.bak -p -e "s/^\$/ /" exp &&
+   git config --bool diff.suppressBlankEmpty false &&
+   git diff f > actual &&
+   test_cmp exp actual &&
+   git config --bool --unset diff.suppressBlankEmpty &&
+   git diff f > actual &&
+   test_cmp exp actual
+'
 
 test_done


[PATCH v2 21/28] t4029: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4029-diff-trailing-space.sh | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/t/t4029-diff-trailing-space.sh b/t/t4029-diff-trailing-space.sh
index f4e18cb8d3..eaa56521e8 100755
--- a/t/t4029-diff-trailing-space.sh
+++ b/t/t4029-diff-trailing-space.sh
@@ -6,7 +6,7 @@ test_description='diff honors config option, 
diff.suppressBlankEmpty'
 
 . ./test-lib.sh
 
-cat <<\EOF > exp ||
+cat <<\EOF >expected ||
 diff --git a/f b/f
 index 5f6a263..8cb8bae 100644
 --- a/f
@@ -20,9 +20,12 @@ exit 1
 
 test_expect_success "$test_description" '
printf "\nx\n" > f &&
+   before=$(git rev-parse --short $(git hash-object f)) &&
git add f &&
git commit -q -m. f &&
printf "\ny\n" > f &&
+   after=$(git rev-parse --short $(git hash-object f)) &&
+   sed -e "s/^index .*/index $before..$after 100644/" expected >exp &&
git config --bool diff.suppressBlankEmpty true &&
git diff f > actual &&
test_cmp exp actual &&


[PATCH v2 26/28] t4045: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4045-diff-relative.sh | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/t/t4045-diff-relative.sh b/t/t4045-diff-relative.sh
index 6471a68701..36f8ed8a81 100755
--- a/t/t4045-diff-relative.sh
+++ b/t/t4045-diff-relative.sh
@@ -8,6 +8,7 @@ test_expect_success 'setup' '
echo content >file1 &&
mkdir subdir &&
echo other content >subdir/file2 &&
+   blob=$(git hash-object subdir/file2) &&
git add . &&
git commit -m one
 '
@@ -17,10 +18,11 @@ check_diff () {
shift
expect=$1
shift
+   short_blob=$(git rev-parse --short $blob)
cat >expected <<-EOF
diff --git a/$expect b/$expect
new file mode 100644
-   index 000..25c05ef
+   index 000..$short_blob
--- /dev/null
+++ b/$expect
@@ -0,0 +1 @@
@@ -68,7 +70,7 @@ check_raw () {
expect=$1
shift
cat >expected <<-EOF
-   :00 100644  
25c05ef3639d2d270e7fe765a67668f098092bc5 A  $expect
+   :00 100644  $blob A $expect
EOF
test_expect_success "--raw $*" "
git -C '$dir' diff --no-abbrev --raw $* HEAD^ >actual &&


[PATCH v2 11/28] t2203: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t2203-add-intent.sh | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh
index 1797f946b9..04d840a544 100755
--- a/t/t2203-add-intent.sh
+++ b/t/t2203-add-intent.sh
@@ -27,8 +27,8 @@ test_expect_success 'git status' '
 
 test_expect_success 'git status with porcelain v2' '
git status --porcelain=v2 | grep -v "^?" >actual &&
-   nam1=d00491fd7e5bb6fa28c517a0bb32b8b506539d4d &&
-   nam2=ce013625030ba8dba906f756967f9e9ca394464a &&
+   nam1=$(echo 1 | git hash-object --stdin) &&
+   nam2=$(git hash-object elif) &&
cat >expect <<-EOF &&
1 DA N... 100644 00 100644 $nam1 $ZERO_OID 1.t
1 A. N... 00 100644 100644 $ZERO_OID $nam2 elif
@@ -181,7 +181,7 @@ test_expect_success 'rename detection finds the right 
names' '
EOF
test_cmp expected.2 actual.2 &&
 
-   hash=12f00e90b6ef79117ce6e650416b8cf517099b78 &&
+   hash=$(git hash-object third) &&
git status --porcelain=v2 | grep -v "^?" >actual.3 &&
cat >expected.3 <<-EOF &&
2 .R N... 100644 100644 100644 $hash $hash R100 third   first
@@ -212,7 +212,7 @@ test_expect_success 'double rename detection in status' '
EOF
test_cmp expected.2 actual.2 &&
 
-   hash=12f00e90b6ef79117ce6e650416b8cf517099b78 &&
+   hash=$(git hash-object third) &&
git status --porcelain=v2 | grep -v "^?" >actual.3 &&
cat >expected.3 <<-EOF &&
2 R. N... 100644 100644 100644 $hash $hash R100 second  first


[PATCH v2 08/28] t1512: skip test if not using SHA-1

2018-05-12 Thread brian m. carlson
This test relies on objects with colliding short names which are
necessarily dependent on the hash used.  Skip the test if we're not
using SHA-1.

Signed-off-by: brian m. carlson 
---
 t/t1512-rev-parse-disambiguation.sh | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/t/t1512-rev-parse-disambiguation.sh 
b/t/t1512-rev-parse-disambiguation.sh
index 711704ba5a..6537f30c9e 100755
--- a/t/t1512-rev-parse-disambiguation.sh
+++ b/t/t1512-rev-parse-disambiguation.sh
@@ -22,6 +22,12 @@ one tagged as v1.0.0.  They all have one regular file each.
 
 . ./test-lib.sh
 
+if ! test_have_prereq SHA1
+then
+   skip_all='not using SHA-1 for objects'
+   test_done
+fi
+
 test_expect_success 'blob and tree' '
test_tick &&
(


[PATCH v2 19/28] t4022: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4022-diff-rewrite.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/t/t4022-diff-rewrite.sh b/t/t4022-diff-rewrite.sh
index cb51d9f9d4..0f1287a8ce 100755
--- a/t/t4022-diff-rewrite.sh
+++ b/t/t4022-diff-rewrite.sh
@@ -13,6 +13,7 @@ test_expect_success setup '
  "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM" \
  <"$TEST_DIRECTORY"/../COPYING >test &&
echo "to be deleted" >test2 &&
+   blob=$(git rev-parse --short $(git hash-object test2)) &&
git add test2
 
 '
@@ -27,7 +28,7 @@ test_expect_success 'detect rewrite' '
 cat >expect 

[PATCH v2 10/28] t: skip pack tests if not using SHA-1

2018-05-12 Thread brian m. carlson
These tests rely on creating packs with specially named objects which
are necessarily dependent on the hash used.  Skip these tests if we're
not using SHA-1.

Signed-off-by: brian m. carlson 
---
 t/t5308-pack-detect-duplicates.sh | 6 ++
 t/t5309-pack-delta-cycles.sh  | 6 ++
 2 files changed, 12 insertions(+)

diff --git a/t/t5308-pack-detect-duplicates.sh 
b/t/t5308-pack-detect-duplicates.sh
index 156ae9e9d3..6845c1f3c3 100755
--- a/t/t5308-pack-detect-duplicates.sh
+++ b/t/t5308-pack-detect-duplicates.sh
@@ -4,6 +4,12 @@ test_description='handling of duplicate objects in incoming 
packfiles'
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/lib-pack.sh
 
+if ! test_have_prereq SHA1
+then
+   skip_all='not using SHA-1 for objects'
+   test_done
+fi
+
 # The sha1s we have in our pack. It's important that these have the same
 # starting byte, so that they end up in the same fanout section of the index.
 # That lets us make sure we are exercising the binary search with both sets.
diff --git a/t/t5309-pack-delta-cycles.sh b/t/t5309-pack-delta-cycles.sh
index 3e7861b075..491556dad9 100755
--- a/t/t5309-pack-delta-cycles.sh
+++ b/t/t5309-pack-delta-cycles.sh
@@ -4,6 +4,12 @@ test_description='test index-pack handling of delta cycles in 
packfiles'
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/lib-pack.sh
 
+if ! test_have_prereq SHA1
+then
+   skip_all='not using SHA-1 for objects'
+   test_done
+fi
+
 # Two similar-ish objects that we have computed deltas between.
 A=01d7713666f4de822776c7622c10f1b07de280dc
 B=e68fe8129b546b101aee9510c5328e7f21ca1d18


[PATCH v2 06/28] t0000: annotate with SHA1 prerequisite

2018-05-12 Thread brian m. carlson
Since this is a core test that tests basic functionality, annotate the
assertions that have dependencies on SHA-1 with the appropriate
prerequisite.

Signed-off-by: brian m. carlson 
---
 t/t-basic.sh | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/t/t-basic.sh b/t/t-basic.sh
index 7fd87dd544..af61d083b4 100755
--- a/t/t-basic.sh
+++ b/t/t-basic.sh
@@ -839,7 +839,7 @@ test_expect_success 'writing tree out with git write-tree' '
 '
 
 # we know the shape and contents of the tree and know the object ID for it.
-test_expect_success 'validate object ID of a known tree' '
+test_expect_success SHA1 'validate object ID of a known tree' '
test "$tree" = 7bb943559a305bdd6bdee2cef6e5df2413c3d30a
 '
 
@@ -882,7 +882,7 @@ test_expect_success 'showing stage with git ls-files 
--stage' '
git ls-files --stage >current
 '
 
-test_expect_success 'validate git ls-files output for a known tree' '
+test_expect_success SHA1 'validate git ls-files output for a known tree' '
cat >expected <<-\EOF &&
100644 f87290f8eb2cbbea7857214459a0739927eab154 0   path0
12 15a98433ae33114b085f3eb3bb03b832b3180a01 0   path0sym
@@ -900,7 +900,7 @@ test_expect_success 'writing tree out with git write-tree' '
tree=$(git write-tree)
 '
 
-test_expect_success 'validate object ID for a known tree' '
+test_expect_success SHA1 'validate object ID for a known tree' '
test "$tree" = 087704a96baf1c2d1c869a8b084481e121c88b5b
 '
 
@@ -908,7 +908,7 @@ test_expect_success 'showing tree with git ls-tree' '
 git ls-tree $tree >current
 '
 
-test_expect_success 'git ls-tree output for a known tree' '
+test_expect_success SHA1 'git ls-tree output for a known tree' '
cat >expected <<-\EOF &&
100644 blob f87290f8eb2cbbea7857214459a0739927eab154path0
12 blob 15a98433ae33114b085f3eb3bb03b832b3180a01path0sym
@@ -924,7 +924,7 @@ test_expect_success 'showing tree with git ls-tree -r' '
git ls-tree -r $tree >current
 '
 
-test_expect_success 'git ls-tree -r output for a known tree' '
+test_expect_success SHA1 'git ls-tree -r output for a known tree' '
cat >expected <<-\EOF &&
100644 blob f87290f8eb2cbbea7857214459a0739927eab154path0
12 blob 15a98433ae33114b085f3eb3bb03b832b3180a01path0sym
@@ -943,7 +943,7 @@ test_expect_success 'showing tree with git ls-tree -r -t' '
git ls-tree -r -t $tree >current
 '
 
-test_expect_success 'git ls-tree -r output for a known tree' '
+test_expect_success SHA1 'git ls-tree -r output for a known tree' '
cat >expected <<-\EOF &&
100644 blob f87290f8eb2cbbea7857214459a0739927eab154path0
12 blob 15a98433ae33114b085f3eb3bb03b832b3180a01path0sym
@@ -964,7 +964,7 @@ test_expect_success 'writing partial tree out with git 
write-tree --prefix' '
ptree=$(git write-tree --prefix=path3)
 '
 
-test_expect_success 'validate object ID for a known tree' '
+test_expect_success SHA1 'validate object ID for a known tree' '
test "$ptree" = 21ae8269cacbe57ae09138dcc3a2887f904d02b3
 '
 
@@ -972,7 +972,7 @@ test_expect_success 'writing partial tree out with git 
write-tree --prefix' '
ptree=$(git write-tree --prefix=path3/subp3)
 '
 
-test_expect_success 'validate object ID for a known tree' '
+test_expect_success SHA1 'validate object ID for a known tree' '
test "$ptree" = 3c5e5399f3a333eddecce7a9b9465b63f65f51e2
 '
 
@@ -1006,7 +1006,7 @@ test_expect_success 'git read-tree followed by write-tree 
should be idempotent'
test "$newtree" = "$tree"
 '
 
-test_expect_success 'validate git diff-files output for a know cache/work tree 
state' '
+test_expect_success SHA1 'validate git diff-files output for a know cache/work 
tree state' '
cat >expected <<\EOF &&
 :100644 100644 f87290f8eb2cbbea7857214459a0739927eab154 
 M path0
 :12 12 15a98433ae33114b085f3eb3bb03b832b3180a01 
 M path0sym
@@ -1033,21 +1033,21 @@ test_expect_success 'no diff after checkout and git 
update-index --refresh' '
 
 P=087704a96baf1c2d1c869a8b084481e121c88b5b
 
-test_expect_success 'git commit-tree records the correct tree in a commit' '
+test_expect_success SHA1 'git commit-tree records the correct tree in a 
commit' '
commit0=$(echo NO | git commit-tree $P) &&
tree=$(git show --pretty=raw $commit0 |
 sed -n -e "s/^tree //p" -e "/^author /q") &&
test "z$tree" = "z$P"
 '
 
-test_expect_success 'git commit-tree records the correct parent in a commit' '
+test_expect_success SHA1 'git commit-tree records the correct parent in a 
commit' '
commit1=$(echo NO | git commit-tree $P -p $commit0) &&
parent=$(git show --pretty=raw $commit1 |
  

[PATCH v2 09/28] t4044: skip test if not using SHA-1

2018-05-12 Thread brian m. carlson
This test relies on objects with colliding short names which are
necessarily dependent on the hash used.  Skip the test if we're not
using SHA-1.

Signed-off-by: brian m. carlson 
---
 t/t4044-diff-index-unique-abbrev.sh | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/t/t4044-diff-index-unique-abbrev.sh 
b/t/t4044-diff-index-unique-abbrev.sh
index d5ce72be63..647905e01f 100755
--- a/t/t4044-diff-index-unique-abbrev.sh
+++ b/t/t4044-diff-index-unique-abbrev.sh
@@ -3,6 +3,12 @@
 test_description='test unique sha1 abbreviation on "index from..to" line'
 . ./test-lib.sh
 
+if ! test_have_prereq SHA1
+then
+   skip_all='not using SHA-1 for objects'
+   test_done
+fi
+
 cat >expect_initial <

[PATCH v2 07/28] t1007: annotate with SHA1 prerequisite

2018-05-12 Thread brian m. carlson
Since this is a core test that tests basic functionality, annotate the
assertions that have dependencies on SHA-1 with the appropriate
prerequisite.

Signed-off-by: brian m. carlson 
---
 t/t1007-hash-object.sh | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index 532682f51c..a37753047e 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -9,13 +9,13 @@ echo_without_newline() {
 }
 
 test_blob_does_not_exist() {
-   test_expect_success 'blob does not exist in database' "
+   test_expect_success SHA1 'blob does not exist in database' "
test_must_fail git cat-file blob $1
"
 }
 
 test_blob_exists() {
-   test_expect_success 'blob exists in database' "
+   test_expect_success SHA1 'blob exists in database' "
git cat-file blob $1
"
 }
@@ -73,19 +73,19 @@ test_expect_success "Can't use --path with --no-filters" '
 
 push_repo
 
-test_expect_success 'hash a file' '
+test_expect_success SHA1 'hash a file' '
test $hello_sha1 = $(git hash-object hello)
 '
 
 test_blob_does_not_exist $hello_sha1
 
-test_expect_success 'hash from stdin' '
+test_expect_success SHA1 'hash from stdin' '
test $example_sha1 = $(git hash-object --stdin < example)
 '
 
 test_blob_does_not_exist $example_sha1
 
-test_expect_success 'hash a file and write to database' '
+test_expect_success SHA1 'hash a file and write to database' '
test $hello_sha1 = $(git hash-object -w hello)
 '
 
@@ -161,7 +161,7 @@ pop_repo
 for args in "-w --stdin" "--stdin -w"; do
push_repo
 
-   test_expect_success "hash from stdin and write to database ($args)" '
+   test_expect_success SHA1 "hash from stdin and write to database 
($args)" '
test $example_sha1 = $(git hash-object $args < example)
'
 
@@ -176,14 +176,14 @@ example"
 sha1s="$hello_sha1
 $example_sha1"
 
-test_expect_success "hash two files with names on stdin" '
+test_expect_success SHA1 "hash two files with names on stdin" '
test "$sha1s" = "$(echo_without_newline "$filenames" | git hash-object 
--stdin-paths)"
 '
 
 for args in "-w --stdin-paths" "--stdin-paths -w"; do
push_repo
 
-   test_expect_success "hash two files with names on stdin and write to 
database ($args)" '
+   test_expect_success SHA1 "hash two files with names on stdin and write 
to database ($args)" '
test "$sha1s" = "$(echo_without_newline "$filenames" | git 
hash-object $args)"
'
 


[PATCH v2 22/28] t4030: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs instead of using
hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4030-diff-textconv.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index aad6c7f78d..4cb9f0e523 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -148,7 +148,8 @@ test_expect_success 'diffstat does not run textconv' '
 # restore working setup
 echo file diff=foo >.gitattributes
 
-cat >expect.typechange <<'EOF'
+symlink=$(git rev-parse --short $(printf frotz | git hash-object --stdin))
+cat >expect.typechange 

[PATCH v2 12/28] t3103: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it uses variables and command substitution for
trees instead of hard-coded hashes.  This also has the benefit of making
it more obvious how the test works.

Signed-off-by: brian m. carlson 
---
 t/t3103-ls-tree-misc.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/t3103-ls-tree-misc.sh b/t/t3103-ls-tree-misc.sh
index 09dcf043fd..14520913af 100755
--- a/t/t3103-ls-tree-misc.sh
+++ b/t/t3103-ls-tree-misc.sh
@@ -17,7 +17,8 @@ test_expect_success 'setup' '
 '
 
 test_expect_success 'ls-tree fails with non-zero exit code on broken tree' '
-   rm -f .git/objects/5f/cffbd6e4c5c5b8d81f5e9314b20e338e35 &&
+   tree=$(git rev-parse HEAD:a) &&
+   rm -f .git/objects/$(echo $tree | sed -e "s,^\(..\),\1/,") &&
test_must_fail git ls-tree -r HEAD
 '
 


[PATCH v2 28/28] t5300: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for object IDs instead of
using hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t5300-pack-object.sh | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index 65ff60f2ee..9e66637a19 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -466,9 +466,11 @@ test_expect_success !PTHREADS,C_LOCALE_OUTPUT 
'pack-objects --threads=N or pack.
 
 test_expect_success \
 'fake a SHA1 hash collision' \
-'test -f   .git/objects/c8/2de19312b6c3695c0c18f70709a6c535682a67 &&
- cp -f .git/objects/9d/235ed07cd19811a6ceb342de82f190e49c9f68 \
-   .git/objects/c8/2de19312b6c3695c0c18f70709a6c535682a67'
+'long_a=$(git hash-object a | sed -e "s!^..!&/!") &&
+ long_b=$(git hash-object b | sed -e "s!^..!&/!") &&
+ test -f   .git/objects/$long_b &&
+ cp -f .git/objects/$long_a \
+   .git/objects/$long_b'
 
 test_expect_success \
 'make sure index-pack detects the SHA1 collision' \


[PATCH v2 15/28] t4007: abstract away SHA-1-specific constants

2018-05-12 Thread brian m. carlson
Adjust the test so that it computes variables for blobs and uses the
ZERO_OID variable instead of using hard-coded hashes.

Signed-off-by: brian m. carlson 
---
 t/t4007-rename-3.sh | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh
index dae327fabb..b187b7f6c6 100755
--- a/t/t4007-rename-3.sh
+++ b/t/t4007-rename-3.sh
@@ -17,6 +17,7 @@ test_expect_success 'prepare reference tree' '
echo $tree
 '
 
+blob=$(git hash-object "$TEST_DIRECTORY/diff-lib/COPYING")
 test_expect_success 'prepare work tree' '
cp path0/COPYING path1/COPYING &&
git update-index --add --remove path0/COPYING path1/COPYING
@@ -26,8 +27,8 @@ test_expect_success 'prepare work tree' '
 # path1 both have COPYING and the latter is a copy of path0/COPYING.
 # Comparing the full tree with cache should tell us so.
 
-cat >expected <<\EOF
-:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 
6ff87c4664981e4397625791c8ea3bbb5f2279a3 C100  path0/COPYING   path1/COPYING
+cat >expected expected expected expected <

[PATCH v2 04/28] t/test-lib: introduce OID_REGEX

2018-05-12 Thread brian m. carlson
Currently we have a variable, $_x40, which contains a regex that matches
a full 40-character hex constant.  However, with NewHash, we'll have
object IDs that are longer than 40 characters.  In such a case, $_x40
will be a confusing name.  Create a $OID_REGEX variable which will
always reflect a regex matching the appropriate object ID, regardless of
the length of the current hash.

Signed-off-by: brian m. carlson 
---
 t/test-lib.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index 58c2ea52c6..fed21c3dfc 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -184,6 +184,7 @@ _x40="$_x35$_x05"
 # Zero SHA-1
 _z40=
 
+OID_REGEX="$_x40"
 ZERO_OID=$_z40
 EMPTY_TREE=4b825dc642cb6eb9a060e54bf8d69288fbee4904
 EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
@@ -196,7 +197,7 @@ LF='
 # when case-folding filenames
 u200c=$(printf '\342\200\214')
 
-export _x05 _x35 _x40 _z40 LF u200c EMPTY_TREE EMPTY_BLOB ZERO_OID
+export _x05 _x35 _x40 _z40 LF u200c EMPTY_TREE EMPTY_BLOB ZERO_OID OID_REGEX
 
 # Each test should start with something like this, after copyright notices:
 #


[PATCH v2 03/28] t: switch $_z40 to $ZERO_OID

2018-05-12 Thread brian m. carlson
Switch all uses of $_z40 to $ZERO_OID so that they work correctly with
larger hashes.  This commit was created by using the following sed
command to modify all files in the t directory except t/test-lib.sh:

  sed -i 's/\$_z40/$ZERO_OID/g'

Signed-off-by: brian m. carlson 
---
 t/t1006-cat-file.sh|  8 ++---
 t/t1400-update-ref.sh  |  2 +-
 t/t1407-worktree-ref-store.sh  |  8 ++---
 t/t1450-fsck.sh|  4 +--
 t/t1501-work-tree.sh   |  6 ++--
 t/t1601-index-bogus.sh |  2 +-
 t/t1700-split-index.sh |  2 +-
 t/t2011-checkout-invalid-head.sh   |  2 +-
 t/t2025-worktree-add.sh|  8 ++---
 t/t2027-worktree-list.sh   |  2 +-
 t/t2107-update-index-basic.sh  |  4 +--
 t/t2201-add-update-typechange.sh   | 16 -
 t/t2203-add-intent.sh  |  6 ++--
 t/t3200-branch.sh  |  4 +--
 t/t4002-diff-basic.sh  |  2 +-
 t/t4014-format-patch.sh|  2 +-
 t/t4020-diff-external.sh   | 10 +++---
 t/t4027-diff-submodule.sh  |  6 ++--
 t/t4046-diff-unmerged.sh   | 14 
 t/t4054-diff-bogus-tree.sh | 12 +++
 t/t4058-diff-duplicates.sh | 12 +++
 t/t4150-am.sh  |  4 +--
 t/t4200-rerere.sh  |  2 +-
 t/t5516-fetch-push.sh  | 22 ++--
 t/t5527-fetch-odd-refs.sh  |  2 +-
 t/t5571-pre-push-hook.sh   |  8 ++---
 t/t6120-describe.sh|  2 +-
 t/t6300-for-each-ref.sh|  2 +-
 t/t6301-for-each-ref-errors.sh |  2 +-
 t/t7009-filter-branch-null-sha1.sh |  2 +-
 t/t7011-skip-worktree-reading.sh   |  2 +-
 t/t7064-wtstatus-pv2.sh| 58 +++---
 32 files changed, 119 insertions(+), 119 deletions(-)

diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 2ac3b940c6..13dd510b2e 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -236,8 +236,8 @@ test_expect_success "--batch-check for an empty line" '
 '
 
 test_expect_success 'empty --batch-check notices missing object' '
-   echo "$_z40 missing" >expect &&
-   echo "$_z40" | git cat-file --batch-check="" >actual &&
+   echo "$ZERO_OID missing" >expect &&
+   echo "$ZERO_OID" | git cat-file --batch-check="" >actual &&
test_cmp expect actual
 '
 
@@ -294,8 +294,8 @@ test_expect_success 'setup blobs which are likely to delta' 
'
 
 test_expect_success 'confirm that neither loose blob is a delta' '
cat >expect <<-EOF &&
-   $_z40
-   $_z40
+   $ZERO_OID
+   $ZERO_OID
EOF
git cat-file --batch-check="%(deltabase)" actual &&
test_cmp expect actual
diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
index 664a3a4e4e..f6dc05654e 100755
--- a/t/t1400-update-ref.sh
+++ b/t/t1400-update-ref.sh
@@ -6,7 +6,7 @@
 test_description='Test git update-ref and basic ref logging'
 . ./test-lib.sh
 
-Z=$_z40
+Z=$ZERO_OID
 
 m=refs/heads/master
 n_dir=refs/heads/gu
diff --git a/t/t1407-worktree-ref-store.sh b/t/t1407-worktree-ref-store.sh
index 2211f9831f..4623ae15c4 100755
--- a/t/t1407-worktree-ref-store.sh
+++ b/t/t1407-worktree-ref-store.sh
@@ -50,13 +50,13 @@ test_expect_success 'create_symref(FOO, refs/heads/master)' 
'
 '
 
 test_expect_success 'for_each_reflog()' '
-   echo $_z40 > .git/logs/PSEUDO-MAIN &&
+   echo $ZERO_OID > .git/logs/PSEUDO-MAIN &&
mkdir -p .git/logs/refs/bisect &&
-   echo $_z40 > .git/logs/refs/bisect/random &&
+   echo $ZERO_OID > .git/logs/refs/bisect/random &&
 
-   echo $_z40 > .git/worktrees/wt/logs/PSEUDO-WT &&
+   echo $ZERO_OID > .git/worktrees/wt/logs/PSEUDO-WT &&
mkdir -p .git/worktrees/wt/logs/refs/bisect &&
-   echo $_z40 > .git/worktrees/wt/logs/refs/bisect/wt-random &&
+   echo $ZERO_OID > .git/worktrees/wt/logs/refs/bisect/wt-random &&
 
$RWT for-each-reflog | cut -c 42- | sort >actual &&
cat >expected <<-\EOF &&
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index cb4b66e29d..91fd71444d 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -713,7 +713,7 @@ test_expect_success 'fsck notices dangling objects' '
 
 test_expect_success 'fsck $name notices bogus $name' '
test_must_fail git fsck bogus &&
-   test_must_fail git fsck $_z40
+   test_must_fail git fsck $ZERO_OID
 '
 
 test_expect_success 'bogus head does not fallback to all heads' '
@@ -723,7 +723,7 @@ test_expect_success 'bogus head does not fallback to all 
heads' '
blob=$(git rev-parse :foo) &&
test_when_finished "git rm --cached foo" &&
remove_object $blob &&
-   test_must_fail git fsck $_z40 >out 2>&1 &&
+   test_must_fail git fsck $ZERO_OID >out 2>&1 &&
! grep $blob out
 '
 
diff --git a/t/t1501-work-tree.sh b/t/t1501-work-tree.sh
index 9c0bc65250..afcdfafe45 100755
--- a/t/t1501-work-tree.sh
+++ b/t/t1501-work-tree.sh
@@ 

[PATCH v2 05/28] t: switch $_x40 to $OID_REGEX

2018-05-12 Thread brian m. carlson
Switch all uses of $_x40 to $OID_REGEX so that they work correctly with
larger hashes.  This commit was created by using the following sed
command to modify all files in the t directory except t/test-lib.sh:

  sed -i 's/\$_x40/$OID_REGEX/g'

Signed-off-by: brian m. carlson 
---
 t/diff-lib.sh   |  4 ++--
 t/t0090-cache-tree.sh   |  2 +-
 t/t1000-read-tree-m-3way.sh |  2 +-
 t/t1001-read-tree-m-2way.sh |  2 +-
 t/t1002-read-tree-m-u-2way.sh   |  2 +-
 t/t1012-read-tree-df.sh |  2 +-
 t/t3100-ls-tree-restrict.sh |  2 +-
 t/t3101-ls-tree-dirname.sh  |  2 +-
 t/t3510-cherry-pick-sequence.sh |  8 
 t/t4002-diff-basic.sh   |  2 +-
 t/t4006-diff-mode.sh|  2 +-
 t/t4014-format-patch.sh |  2 +-
 t/t4201-shortlog.sh |  2 +-
 t/t5150-request-pull.sh |  2 +-
 t/t6006-rev-list-format.sh  |  4 ++--
 t/t6012-rev-list-simplify.sh|  2 +-
 t/t6111-rev-list-treesame.sh|  2 +-
 t/t7506-status-submodule.sh |  2 +-
 t/t9010-svn-fe.sh   | 14 +++---
 t/t9300-fast-import.sh  |  6 +++---
 20 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/t/diff-lib.sh b/t/diff-lib.sh
index c211dc40ee..2de880f7a5 100644
--- a/t/diff-lib.sh
+++ b/t/diff-lib.sh
@@ -1,6 +1,6 @@
 :
 
-sanitize_diff_raw='/^:/s/ '"\($_x40\)"' '"\($_x40\)"' \([A-Z]\)[0-9]*  / \1 \2 
\3# /'
+sanitize_diff_raw='/^:/s/ '"\($OID_REGEX\)"' '"\($OID_REGEX\)"' 
\([A-Z]\)[0-9]*/ \1 \2 \3# /'
 compare_diff_raw () {
 # When heuristics are improved, the score numbers would change.
 # Ignore them while comparing.
@@ -12,7 +12,7 @@ compare_diff_raw () {
 test_cmp .tmp-1 .tmp-2 && rm -f .tmp-1 .tmp-2
 }
 
-sanitize_diff_raw_z='/^:/s/ '"$_x40"' '"$_x40"' \([A-Z]\)[0-9]*$/ X X \1#/'
+sanitize_diff_raw_z='/^:/s/ '"$OID_REGEX"' '"$OID_REGEX"' \([A-Z]\)[0-9]*$/ X 
X \1#/'
 compare_diff_raw_z () {
 # When heuristics are improved, the score numbers would change.
 # Ignore them while comparing.
diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
index 4ae0995cd9..0c61268fd2 100755
--- a/t/t0090-cache-tree.sh
+++ b/t/t0090-cache-tree.sh
@@ -9,7 +9,7 @@ cache-tree extension.
 
 cmp_cache_tree () {
test-tool dump-cache-tree | sed -e '/#(ref)/d' >actual &&
-   sed "s/$_x40/SHA/" filtered &&
+   sed "s/$OID_REGEX/SHA/" filtered &&
test_cmp "$1" filtered
 }
 
diff --git a/t/t1000-read-tree-m-3way.sh b/t/t1000-read-tree-m-3way.sh
index 3c4d2d6045..013c5a7bc3 100755
--- a/t/t1000-read-tree-m-3way.sh
+++ b/t/t1000-read-tree-m-3way.sh
@@ -128,7 +128,7 @@ cat >expected <<\EOF
 EOF
 
 check_result () {
-   git ls-files --stage | sed -e 's/ '"$_x40"' / X /' >current &&
+   git ls-files --stage | sed -e 's/ '"$OID_REGEX"' / X /' >current &&
test_cmp expected current
 }
 
diff --git a/t/t1001-read-tree-m-2way.sh b/t/t1001-read-tree-m-2way.sh
index 5ededd8e40..1057a96b24 100755
--- a/t/t1001-read-tree-m-2way.sh
+++ b/t/t1001-read-tree-m-2way.sh
@@ -30,7 +30,7 @@ read_tree_twoway () {
 compare_change () {
sed -n >current \
-e '/^--- /d; /^+++ /d; /^@@ /d;' \
-   -e 's/^\([-+][0-7][0-7][0-7][0-7][0-7][0-7]\) '"$_x40"' /\1 X /p' \
+   -e 's/^\([-+][0-7][0-7][0-7][0-7][0-7][0-7]\) '"$OID_REGEX"' /\1 X 
/p' \
"$1"
test_cmp expected current
 }
diff --git a/t/t1002-read-tree-m-u-2way.sh b/t/t1002-read-tree-m-u-2way.sh
index 7ca2e65d10..9c05f5e1f5 100755
--- a/t/t1002-read-tree-m-u-2way.sh
+++ b/t/t1002-read-tree-m-u-2way.sh
@@ -16,7 +16,7 @@ compare_change () {
-e '1{/^diff --git /d;}' \
-e '2{/^index /d;}' \
-e '/^--- /d; /^+++ /d; /^@@ /d;' \
-   -e 's/^\(.[0-7][0-7][0-7][0-7][0-7][0-7]\) '"$_x40"' /\1 X /' "$1"
+   -e 's/^\(.[0-7][0-7][0-7][0-7][0-7][0-7]\) '"$OID_REGEX"' /\1 X /' 
"$1"
test_cmp expected current
 }
 
diff --git a/t/t1012-read-tree-df.sh b/t/t1012-read-tree-df.sh
index a6a04b6b90..57f0770df1 100755
--- a/t/t1012-read-tree-df.sh
+++ b/t/t1012-read-tree-df.sh
@@ -32,7 +32,7 @@ settree () {
 
 checkindex () {
git ls-files -s |
-   sed "s|^[0-7][0-7]* $_x40 \([0-3]\) |\1 |" >current &&
+   sed "s|^[0-7][0-7]* $OID_REGEX \([0-3]\)|\1 |" >current &&
cat >expect &&
test_cmp expect current
 }
diff --git a/t/t3100-ls-tree-restrict.sh b/t/t3100-ls-tree-restrict.sh
index 325114f8fe..18baf49a49 100755
--- a/t/t3100-ls-tree-restrict.sh
+++ b/t/t3100-ls-tree-restrict.sh
@@ -32,7 +32,7 @@ test_expect_success \
  echo $tree'
 
 test_output () {
-sed -e "s/ $_x40   / X /" check
+sed -e "s/ $OID_REGEX  / X /" check
 test_cmp expected check
 }
 
diff --git a/t/t3101-ls-tree-dirname.sh b/t/t3101-ls-tree-dirname.sh
index 327ded4000..12bf31022a 100755
--- a/t/t3101-ls-tree-dirname.sh
+++ b/t/t3101-ls-tree-dirname.sh
@@ -40,7 +40,7 @@ test_expect_success 'setup' 

[PATCH v2 02/28] t/test-lib: introduce ZERO_OID

2018-05-12 Thread brian m. carlson
Currently we have a variable, $_z40, which contains the all-zero object
ID.  However, with NewHash, we'll have an all-zero object ID which is
longer than 40 hex characters.  In such a case, $_z40 will be a
confusing name.  Create a $ZERO_OID variable which will always reflect
the all-zeros object ID, regardless of the length of the current hash.

Signed-off-by: brian m. carlson 
---
 t/test-lib.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index fce728d2ea..58c2ea52c6 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -184,6 +184,7 @@ _x40="$_x35$_x05"
 # Zero SHA-1
 _z40=
 
+ZERO_OID=$_z40
 EMPTY_TREE=4b825dc642cb6eb9a060e54bf8d69288fbee4904
 EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
 
@@ -195,7 +196,7 @@ LF='
 # when case-folding filenames
 u200c=$(printf '\342\200\214')
 
-export _x05 _x35 _x40 _z40 LF u200c EMPTY_TREE EMPTY_BLOB
+export _x05 _x35 _x40 _z40 LF u200c EMPTY_TREE EMPTY_BLOB ZERO_OID
 
 # Each test should start with something like this, after copyright notices:
 #


[PATCH v2 00/28] Hash-independent tests (part 2)

2018-05-12 Thread brian m. carlson
This is part 2 in the series to make tests hash independent.

This series introduces an SHA1 prerequisite which checks if the hash in
use is SHA-1, and can be used to skip the test if it is not.
Additionally, because NewHash will be 256-bit, I introduced aliases for
the test constants $_x40 and $_z40 which will be less confusing when the
hash isn't 40 hex characters long.  I opted to leave the old names in
place for the moment to prevent any potential conflicts with other
series and will clean up any stragglers later.

To address any concerns that might be present, I do plan to retrofit
tests marked with the SHA1 prerequisite and ensure that they are passing
or are not applicable (e.g. for pack formats supporting only SHA-1).  I
agree with concerns that shipping a NewHash-capable Git without a
complete, functional testsuite would be a bad idea.

Changes from v1:
* Amend commit message to indicate that we *will* be updating tests
  annotated with the SHA1 prerequisite to work with NewHash.
* Rename FULL_HEX to OID_REGEX.
* Regenerate patch for OID_REGEX.
* Update variable name from "link_oid" to "slink_id" for consistency
  while still preserving alignment.
* Restore blank line between tests.

tbdiff output below.

brian m. carlson (28):
  t/test-lib: add an SHA1 prerequisite
  t/test-lib: introduce ZERO_OID
  t: switch $_z40 to $ZERO_OID
  t/test-lib: introduce OID_REGEX
  t: switch $_x40 to $OID_REGEX
  t: annotate with SHA1 prerequisite
  t1007: annotate with SHA1 prerequisite
  t1512: skip test if not using SHA-1
  t4044: skip test if not using SHA-1
  t: skip pack tests if not using SHA-1
  t2203: abstract away SHA-1-specific constants
  t3103: abstract away SHA-1-specific constants
  t3702: abstract away SHA-1-specific constants
  t3905: abstract away SHA-1-specific constants
  t4007: abstract away SHA-1-specific constants
  t4008: abstract away SHA-1-specific constants
  t4014: abstract away SHA-1-specific constants
  t4020: abstract away SHA-1-specific constants
  t4022: abstract away SHA-1-specific constants
  t4029: fix test indentation
  t4029: abstract away SHA-1-specific constants
  t4030: abstract away SHA-1-specific constants
  t/lib-diff-alternative: abstract away SHA-1-specific constants
  t4205: sort log output in a hash-independent way
  t4042: abstract away SHA-1-specific constants
  t4045: abstract away SHA-1-specific constants
  t4208: abstract away SHA-1-specific constants
  t5300: abstract away SHA-1-specific constants

 t/diff-lib.sh   |  4 +-
 t/lib-diff-alternative.sh   | 12 --
 t/t-basic.sh| 24 ++--
 t/t0090-cache-tree.sh   |  2 +-
 t/t1000-read-tree-m-3way.sh |  2 +-
 t/t1001-read-tree-m-2way.sh |  2 +-
 t/t1002-read-tree-m-u-2way.sh   |  2 +-
 t/t1006-cat-file.sh |  8 ++--
 t/t1007-hash-object.sh  | 16 
 t/t1012-read-tree-df.sh |  2 +-
 t/t1400-update-ref.sh   |  2 +-
 t/t1407-worktree-ref-store.sh   |  8 ++--
 t/t1450-fsck.sh |  4 +-
 t/t1501-work-tree.sh|  6 +--
 t/t1512-rev-parse-disambiguation.sh |  6 +++
 t/t1601-index-bogus.sh  |  2 +-
 t/t1700-split-index.sh  |  2 +-
 t/t2011-checkout-invalid-head.sh|  2 +-
 t/t2025-worktree-add.sh |  8 ++--
 t/t2027-worktree-list.sh|  2 +-
 t/t2107-update-index-basic.sh   |  4 +-
 t/t2201-add-update-typechange.sh| 16 
 t/t2203-add-intent.sh   | 14 +++
 t/t3100-ls-tree-restrict.sh |  2 +-
 t/t3101-ls-tree-dirname.sh  |  2 +-
 t/t3103-ls-tree-misc.sh |  3 +-
 t/t3200-branch.sh   |  4 +-
 t/t3510-cherry-pick-sequence.sh |  8 ++--
 t/t3702-add-edit.sh |  7 ++--
 t/t3905-stash-include-untracked.sh  | 11 --
 t/t4002-diff-basic.sh   |  2 +-
 t/t4006-diff-mode.sh|  2 +-
 t/t4007-rename-3.sh | 17 +
 t/t4008-diff-break-rewrite.sh   | 59 -
 t/t4014-format-patch.sh | 11 +++---
 t/t4020-diff-external.sh| 18 +
 t/t4022-diff-rewrite.sh |  5 ++-
 t/t4027-diff-submodule.sh   |  6 +--
 t/t4029-diff-trailing-space.sh  | 38 ++-
 t/t4030-diff-textconv.sh|  5 ++-
 t/t4042-diff-textconv-caching.sh| 16 +---
 t/t4044-diff-index-unique-abbrev.sh |  6 +++
 t/t4045-diff-relative.sh|  6 ++-
 t/t4046-diff-unmerged.sh| 14 +++
 t/t4054-diff-bogus-tree.sh  | 12 +++---
 t/t4058-diff-duplicates.sh  | 12 +++---
 t/t4150-am.sh   |  4 +-
 t/t4200-rerere.sh   |  2 +-
 t/t4201-shortlog.sh |  2 +-
 t/t4205-log-pretty-formats.sh   |  8 ++--
 t/t4208-log-magic-pathspec.sh   |  3 +-
 t/t5150-request-pull.sh |  2 +-
 t/t5300-pack-object.sh  

[PATCH v2 01/28] t/test-lib: add an SHA1 prerequisite

2018-05-12 Thread brian m. carlson
There are some basic tests in our codebase that test that we get fixed
SHA-1 values.  These are valuable because they make sure that our SHA-1
implementation is free of bugs, but obviously these tests will fail with
a different hash.

There are also tests which intentionally produce objects that have
collisions when truncated to a certain length to test our handling of
these cases.  These tests, too, will fail with a different hash.

Add an SHA1 prerequisite to annotate both of these types of tests and
disable them when we're using a different hash.  In the future, we will
create versions of these tests which handle both SHA-1 and NewHash.

Signed-off-by: brian m. carlson 
---
 t/test-lib.sh | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index ea2bbaaa7a..fce728d2ea 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1212,3 +1212,10 @@ test_lazy_prereq TIME_T_IS_64BIT 'test-tool date 
time_t-is64bit'
 test_lazy_prereq CURL '
curl --version
 '
+
+# SHA1 is a test if the hash algorithm in use is SHA-1.  This is both for tests
+# which will not work with other hash algorithms and tests that work but don't
+# test anything meaningful (e.g. special values which cause short collisions).
+test_lazy_prereq SHA1 '
+   test $(git hash-object /dev/null) = 
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+'


[PATCH 3/3] doc: update doc for strict usage of -- checkout

2018-05-12 Thread Dannier Castro L
The flag '--' must always be before any file path when 
command is used.

The main documentation about the usage of  command is
updated to include the strict usage of the flag '--' so that the
user can specify file names over branch names.

Signed-off-by: Dannier Castro L :
---
 Documentation/git-checkout.txt | 20 ++--
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/Documentation/git-checkout.txt b/Documentation/git-checkout.txt
index ca5fc9c..8360a98 100644
--- a/Documentation/git-checkout.txt
+++ b/Documentation/git-checkout.txt
@@ -12,9 +12,9 @@ SYNOPSIS
 'git checkout' [-q] [-f] [-m] --detach []
 'git checkout' [-q] [-f] [-m] [--detach] 
 'git checkout' [-q] [-f] [-m] [[-b|-B|--orphan] ] []
-'git checkout' 

[PATCH 1/3] checkout.c: add strict usage of -- before file_path

2018-05-12 Thread Dannier Castro L
Currently,  is a complex command able to handle both
branches and files without any distintion other than their names,
taking into account that depending on the type (branch or file),
the functionality is completely different, the easier example:

$ git checkout   # Switch from current branch to .
$ git checkout # Restore  from HEAD, discarding
 # changes if it's necessary.
$ git checkout --  # The same as the last one, only with an
 # useless '--'.

For GIT new users, this complicated versatility of  could
be very confused, also considering that actually the flag '--' is
completely useless (added or not, there is not any difference for
this command), when the same program messages promote the use of
this flag.

Regarding the 's power to overwrite any file, discarding
changes if they exist without some way of recovering them, the
solution propuses that the usage of '--' is strict before to
specify the file(s) path(s) for any  command (including
all types of flags), as a "defense barrier" to make sure about
user's knowledge and intension running .

The solution consists in detect '--' into command args, allowing
the discard of changes and considering the following names as
file paths, otherwise, they are branch names.

Signed-off-by: Dannier Castro L 
---
 builtin/checkout.c | 30 +-
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index d76e13c..ec577b3 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -40,6 +40,7 @@ struct checkout_opts {
int ignore_skipworktree;
int ignore_other_worktrees;
int show_progress;
+   int discard_changes;
 
const char *new_branch;
const char *new_branch_force;
@@ -885,8 +886,8 @@ static int parse_branchname_arg(int argc, const char **argv,
/*
 * case 1: git checkout  -- []
 *
-*must be a valid tree, everything after the '--' must be
-*   a path.
+*must be a valid tree. '--' must always be before any path,
+*   it means, everything after the '--' must be a path.
 *
 * case 2: git checkout -- []
 *
@@ -900,26 +901,28 @@ static int parse_branchname_arg(int argc, const char 
**argv,
 *   omit at most one side), and if there is a unique merge base
 *   between A and B, A...B names that merge base.
 *
-*   (b) If  is _not_ a commit, either "--" is present
-*   or  is not a path, no -t or -b was given, and
-*   and there is a tracking branch whose name is 
-*   in one and only one remote, then this is a short-hand to
-*   fork local  from that remote-tracking branch.
+*   (b) If  is _not_ a commit, either "--" is present,
+*   no -t or -b was given, and there is a tracking branch
+*   whose name is  in one and only one remote,
+*   then this is a short-hand to fork local  from
+*   that remote-tracking branch.
 *
 *   (c) Otherwise, if "--" is present, treat it like case (1).
 *
 *   (d) Otherwise :
 *   - if it's a reference, treat it like case (1)
-*   - else if it's a path, treat it like case (2)
 *   - else: fail.
 *
-* case 4: git checkout  
+*can never be a path (at least without '--' before).
+*
+* case 4: git checkout  -- 
 *
 *   The first argument must not be ambiguous.
 *   - If it's *only* a reference, treat it like case (1).
-*   - If it's only a path, treat it like case (2).
 *   - else: fail.
 *
+*can never be a path (at least without '--' before).
+*
 */
if (!argc)
return 0;
@@ -928,6 +931,7 @@ static int parse_branchname_arg(int argc, const char **argv,
dash_dash_pos = -1;
for (i = 0; i < argc; i++) {
if (!strcmp(argv[i], "--")) {
+   opts->discard_changes = 1;
dash_dash_pos = i;
break;
}
@@ -957,8 +961,8 @@ static int parse_branchname_arg(int argc, const char **argv,
(check_filename(opts->prefix, arg) || !no_wildcard(arg)))
recover_with_dwim = 0;
/*
-* Accept "git checkout foo" and "git checkout foo --"
-* as candidates for dwim.
+* Accept "git checkout foo_branch" and
+* "git checkout foo_branch --" as candidates for dwim.
 */
if (!(argc == 1 && !has_dash_dash) &&
!(argc == 2 && has_dash_dash))
@@ -1254,7 +1258,7 @@ int cmd_checkout(int argc, const char **argv, const char 
*prefix)
}
 
UNLEAK(opts);
-   if 

[PATCH 2/3] test: update tests for strict usage of -- checkout

2018-05-12 Thread Dannier Castro L
The flag '--' must always be before any file path when 
command is used.

The list of 34 test files updated is shown:

  t0021-conversion.sh
  t0027-auto-crlf.sh
  t1011-read-tree-sparse-checkout.sh
  t1050-large.sh
  t1051-large-conversion.sh
  t2009-checkout-statinfo.sh
  t2010-checkout-ambiguous.sh
  t2013-checkout-submodule.sh
  t2016-checkout-patch.sh
  t2022-checkout-paths.sh
  t2028-worktree-move.sh
  t2030-unresolve-info.sh
  t3001-ls-files-others-exclude.sh
  t3509-cherry-pick-merge-df.sh
  t3510-cherry-pick-sequence.sh
  t4015-diff-whitespace.sh
  t4124-apply-ws-rule.sh
  t5403-post-checkout-hook.sh
  t6007-rev-list-cherry-pick-file.sh
  t6026-merge-attr.sh
  t6030-bisect-porcelain.sh
  t6111-rev-list-treesame.sh
  t7001-mv.sh
  t7008-grep-binary.sh
  t7201-co.sh
  t7300-clean.sh
  t7501-commit.sh
  t7502-commit.sh
  t7607-merge-overwrite.sh
  t7810-grep.sh
  t7811-grep-open.sh
  t8006-blame-textconv.sh
  t9010-svn-fe.sh
  t8003-blame-corner-cases.sh

Signed-off-by: Dannier Castro L 
---
 t/t0021-conversion.sh| 12 ++--
 t/t0027-auto-crlf.sh |  4 ++--
 t/t1011-read-tree-sparse-checkout.sh |  4 ++--
 t/t1050-large.sh |  2 +-
 t/t1051-large-conversion.sh  |  4 ++--
 t/t2009-checkout-statinfo.sh |  6 +++---
 t/t2010-checkout-ambiguous.sh|  4 ++--
 t/t2013-checkout-submodule.sh|  4 ++--
 t/t2016-checkout-patch.sh|  9 +
 t/t2022-checkout-paths.sh|  4 ++--
 t/t2028-worktree-move.sh |  2 +-
 t/t2030-unresolve-info.sh|  6 +++---
 t/t3001-ls-files-others-exclude.sh   |  2 +-
 t/t3420-rebase-autostash.sh  |  2 +-
 t/t3510-cherry-pick-sequence.sh  |  4 ++--
 t/t3910-mac-os-precompose.sh |  4 ++--
 t/t4015-diff-whitespace.sh   |  4 ++--
 t/t4117-apply-reject.sh  |  2 +-
 t/t4124-apply-ws-rule.sh | 16 
 t/t5403-post-checkout-hook.sh|  2 +-
 t/t6007-rev-list-cherry-pick-file.sh |  2 +-
 t/t6026-merge-attr.sh|  2 +-
 t/t6030-bisect-porcelain.sh  |  2 +-
 t/t6038-merge-text-auto.sh   |  2 +-
 t/t6050-replace.sh   |  2 +-
 t/t6111-rev-list-treesame.sh |  2 +-
 t/t7001-mv.sh|  2 +-
 t/t7008-grep-binary.sh   |  2 +-
 t/t7201-co.sh|  8 
 t/t7300-clean.sh |  2 +-
 t/t7501-commit.sh| 10 +-
 t/t7502-commit.sh|  2 +-
 t/t7607-merge-overwrite.sh   |  4 ++--
 t/t7810-grep.sh  |  2 +-
 t/t7811-grep-open.sh |  2 +-
 t/t8003-blame-corner-cases.sh|  2 +-
 t/t8006-blame-textconv.sh|  2 +-
 t/t9010-svn-fe.sh|  2 +-
 38 files changed, 71 insertions(+), 78 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 46f8e58..9f4955a 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -380,7 +380,7 @@ test_expect_success PERL 'required process filter should 
filter data' '
git commit -m "test commit 2" &&
rm -f test2.r "testsubdir/test3 '\''sq'\'',\$x=.r" &&
 
-   filter_git checkout --quiet --no-progress . &&
+   filter_git checkout --quiet --no-progress -- . &&
cat >expected.log <<-EOF &&
START
init handshake complete
@@ -466,7 +466,7 @@ test_expect_success PERL 'required process filter should be 
used only for "clean
 
rm test.r &&
 
-   filter_git checkout --quiet --no-progress . &&
+   filter_git checkout --quiet --no-progress -- . &&
# If the filter would be used for "smudge", too, we would see
# "IN: smudge test.r 57 [OK] -- OUT: 57 . [OK]" here
cat >expected.log <<-EOF &&
@@ -580,7 +580,7 @@ test_expect_success PERL 'process filter should restart 
after unexpected write f
rm -f *.r &&
 
rm -f debug.log &&
-   git checkout --quiet --no-progress . 2>git-stderr.log &&
+   git checkout --quiet --no-progress -- . 2>git-stderr.log &&
 
grep "smudge write error at" git-stderr.log &&
grep "error: external filter" git-stderr.log &&
@@ -630,7 +630,7 @@ test_expect_success PERL 'process filter should not be 
restarted if it signals a
git add . &&
rm -f *.r &&
 
-   filter_git checkout --quiet --no-progress . &&
+   filter_git checkout --quiet --no-progress -- . &&
cat >expected.log <<-EOF &&
START
init handshake complete
@@ -669,7 +669,7 @@ test_expect_success PERL 'process filter abort stops 
processing of all further f
 
# Note: This test assumes that Git filters 

Re: [PATCH 2/4] mark_parents_uninteresting(): drop missing object check

2018-05-12 Thread Junio C Hamano
Jeff King  writes:

>   2. It "lies" about the commit by setting the parsed flag,
>  even though we didn't load any useful data into the
>  struct. This shouldn't matter for the UNINTERESTING
>  case, but we may later clear our flags and do another
>  traversal in the same process. While pretty unlikely,
>  it's possible that we could then look at the same
>  commit without the UNINTERESTING flag,...

Yeah, if two ranges given to tbdiff to be compared are computed
in-core, uninteresting boundary of one range is likely to be
interesting on the other range.



Re: [PATCH] commit.h: rearrange 'index' to shrink struct commit

2018-05-12 Thread Junio C Hamano
Nguyễn Thái Ngọc Duy   writes:

> On linux 64-bit architecture, pahole finds that there's a 4 bytes
> padding after 'index'. Moving it to the end reduces this struct's size
> from 72 to 64 bytes (because of another 4 bytes padding after
> graph_pos). On linux 32-bit, the struct size remains 52 bytes like
> before.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---
>  commit.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/commit.h b/commit.h
> index e57ae4b583..fd1cbe7263 100644
> --- a/commit.h
> +++ b/commit.h
> @@ -19,11 +19,11 @@ struct commit_list {
>  struct commit {
>   struct object object;
>   void *util;
> - unsigned int index;
>   timestamp_t date;
>   struct commit_list *parents;
>   struct tree *tree;
>   uint32_t graph_pos;
> + unsigned int index;
>  };
>  
>  extern int save_commit_buffer;

Quite nice.


[PATCHv2 0/1] add git-p4 unshelve command

2018-05-12 Thread Luke Diamand
This is another attempt to make a "git p4 unshelve" command.

Unshelving in p4 is a bit like a cross between cherry-pick
and "am", and is very commonly used for review.

This command helps git users who want to try out a shelved
p4 change from some other repo:

e.g.

   $ git p4 unshelve 12345
   unshelved CL12345 into refs/remotes/p4/unshelved/12345
   $ git show refs/remotes/p4/unshelved/12345

I abandoned an earlier attempt because it seemed like there
is no way to get around a rather nasty problem: git-p4
just constructs the commit and passes the file contents to
git-fastimport. But there's no easy way to construct the
*prior* commit, because Perforce doesn't record this
information, and so you can end up with other changes
mixed into the unshelved commit - these are the differences
between your tree and the other tree, for each file that
has been modified.

However, I think the command is sufficiently useful that
it's worth supporting anyway, even with that caveat.

I also tried to use "p4 describe" to get the deltas, but
that's very unsatisfactory: I found myself writing a
second-rate version of git's diff tool to try to make
up for the deficiencies in Perforce's diff tool.

It might be possible to reconstruct the missing base
commit information, but that's a reasonably tricky task.

I have incorporated some of the comments from the earlier
review rounds, in particular:

- no longer adds the [git-p4...] annotation in unshelve
- try to use .format() in place of %
- rename the target branch if it already exists

Luke Diamand (1):
  git-p4: add unshelve command

 Documentation/git-p4.txt |  26 ++
 git-p4.py| 171 ++-
 t/t9832-unshelve.sh  |  99 +++
 3 files changed, 260 insertions(+), 36 deletions(-)
 create mode 100755 t/t9832-unshelve.sh

-- 
2.17.0.392.gdeb1a6e9b7



[PATCH 1/1] git-p4: add unshelve command

2018-05-12 Thread Luke Diamand
This can be used to "unshelve" a shelved P4 commit into
a git commit.

For example:

  $ git p4 unshelve 12345

The resulting commit ends up in the branch:
   refs/remotes/p4/unshelved/12345

If that branch already exists, it is renamed - for example
the above branch would be saved as p4/unshelved/12345.1.

Caveat:

The unshelving is done against the current "p4/master" branch;
git-p4 uses "p4 print" to get the file contents at the requested
revision, and then fast-import creates a commit relative to p4/master.

Ideally what you would want is for fast-import to create the
commit based on the Perforce "revision" prior to the shelved commit,
but Perforce doesn't have such a concept - to do this, git-p4
would need to figure out the revisions of the individual files
before the shelved changelist, and then construct a temporary
git branch which matched this.

It's possible to do this, but doing so makes this change a lot more
complicated.

This limitation means that if you unshelve a change where some
of the changed files were not based on p4/master, you will get
an amalgam of the change you wanted, and these other changes.

The reference branch can be changed manually with the "--origin"
option.

The change adds a new Unshelve command class. This just runs the
existing P4Sync code tweaked to handle a shelved changelist.

Signed-off-by: Luke Diamand 
---
 Documentation/git-p4.txt |  26 ++
 git-p4.py| 171 ++-
 t/t9832-unshelve.sh  |  99 +++
 3 files changed, 260 insertions(+), 36 deletions(-)
 create mode 100755 t/t9832-unshelve.sh

diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
index d8c8f11c9f..2d768eec10 100644
--- a/Documentation/git-p4.txt
+++ b/Documentation/git-p4.txt
@@ -164,6 +164,25 @@ $ git p4 submit --shelve
 $ git p4 submit --update-shelve 1234 --update-shelve 2345
 
 
+
+Unshelve
+
+Unshelving will take a shelved P4 changelist, and produce the equivalent git 
commit
+in the branch refs/remotes/p4/unshelved/.
+
+The git commit is created relative to the current p4/master, so if this
+is behind Perforce itself, it may include more changes than you expected. You 
can
+change the reference branch with the "--origin" option.
+
+If the target branch in refs/remotes/p4/unshelved already exists, the old one 
will
+be renamed.
+
+
+$ git p4 sync
+$ git p4 unshelve 12345
+$ git show refs/remotes/p4/unshelved/12345
+
+
 OPTIONS
 ---
 
@@ -337,6 +356,13 @@ These options can be used to modify 'git p4 rebase' 
behavior.
 --import-labels::
Import p4 labels.
 
+Unshelve options
+
+
+--origin::
+Sets the git refspec against which the shelved P4 changelist is compared.
+Defaults to p4/master.
+
 DEPOT PATH SYNTAX
 -
 The p4 depot path argument to 'git p4 sync' and 'git p4 clone' can
diff --git a/git-p4.py b/git-p4.py
index 7bb9cadc69..dcf6dc9f4f 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -316,12 +316,17 @@ def p4_last_change():
 results = p4CmdList(["changes", "-m", "1"], skip_info=True)
 return int(results[0]['change'])
 
-def p4_describe(change):
+def p4_describe(change, shelved=False):
 """Make sure it returns a valid result by checking for
the presence of field "time".  Return a dict of the
results."""
 
-ds = p4CmdList(["describe", "-s", str(change)], skip_info=True)
+cmd = ["describe", "-s"]
+if shelved:
+cmd += ["-S"]
+cmd += [str(change)]
+
+ds = p4CmdList(cmd, skip_info=True)
 if len(ds) != 1:
 die("p4 describe -s %d did not return 1 result: %s" % (change, 
str(ds)))
 
@@ -662,6 +667,12 @@ def gitBranchExists(branch):
 stderr=subprocess.PIPE, stdout=subprocess.PIPE);
 return proc.wait() == 0;
 
+def gitUpdateRef(ref, newvalue):
+subprocess.check_call(["git", "update-ref", ref, newvalue])
+
+def gitDeleteRef(ref):
+subprocess.check_call(["git", "update-ref", "-d", ref])
+
 _gitConfig = {}
 
 def gitConfig(key, typeSpecifier=None):
@@ -2411,6 +2422,7 @@ class P4Sync(Command, P4UserMap):
 self.tempBranches = []
 self.tempBranchLocation = "refs/git-p4-tmp"
 self.largeFileSystem = None
+self.suppress_meta_comment = False
 
 if gitConfig('git-p4.largeFileSystem'):
 largeFileSystemConstructor = 
globals()[gitConfig('git-p4.largeFileSystem')]
@@ -2421,6 +2433,18 @@ class P4Sync(Command, P4UserMap):
 if gitConfig("git-p4.syncFromOrigin") == "false":
 self.syncWithOrigin = False
 
+self.depotPaths = []
+self.changeRange = ""
+self.previousDepotPaths = []
+self.hasOrigin = False
+
+# map from branch depot path to parent branch
+self.knownBranches = {}
+self.initialParents = {}
+
+self.tz = "%+03d%02d" % (- time.timezone / 3600, ((- time.timezone % 
3600) / 60))
+self.labels = {}
+
 

Re: [PATCH v2 08/12] commit-graph: verify commit contents against odb

2018-05-12 Thread Martin Ågren
On 11 May 2018 at 23:15, Derrick Stolee  wrote:
> When running 'git commit-graph verify', compare the contents of the
> commits that are loaded from the commit-graph file with commits that are
> loaded directly from the object database. This includes checking the
> root tree object ID, commit date, and parents.
>
> Parse the commit from the graph during the initial loop through the
> object IDs to guarantee we parse from the commit-graph file.
>
> In addition, verify the generation number calculation is correct for all
> commits in the commit-graph file.
>
> While testing, we discovered that mutating the integer value for a
> parent to be outside the accepted range causes a segmentation fault. Add
> a new check in insert_parent_or_die() that prevents this fault. Check
> for that error during the test, both in the typical parents and in the
> list of parents for octopus merges.

This paragraph and the corresponding fix and test feel like a separate
patch to me. (The commit message of it could be "To test the next patch,
we threw invalid data at `git commit-graph verify, and it crashed in
pre-existing code, so let's fix that first" -- there is definitely a
connection.) Is this important enough to fast-track to master in time
for 2.18? My guess would be no.

> +
> +   if (pos >= g->num_commits)
> +   die("invalide parent position %"PRIu64, pos);

s/invalide/invalid/

> @@ -875,6 +879,8 @@ int verify_commit_graph(struct commit_graph *g)
> return 1;
>
> for (i = 0; i < g->num_commits; i++) {
> +   struct commit *graph_commit;
> +
> hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
>
> if (i && oidcmp(_oid, _oid) >= 0)
> @@ -892,6 +898,10 @@ int verify_commit_graph(struct commit_graph *g)
>
> cur_fanout_pos++;
> }
> +
> +   graph_commit = lookup_commit(_oid);
> +   if (!parse_commit_in_graph_one(g, graph_commit))
> +   graph_report("failed to parse %s from commit-graph", 
> oid_to_hex(_oid));
> }

Could this end up giving ridiculous amounts of output? It would depend
on the input, I guess.

> while (cur_fanout_pos < 256) {
> @@ -904,5 +914,95 @@ int verify_commit_graph(struct commit_graph *g)
> cur_fanout_pos++;
> }
>
> +   if (verify_commit_graph_error)
> +   return 1;

Well, here we give up before running into *too* much problem.

> +   for (i = 0; i < g->num_commits; i++) {
> +   struct commit *graph_commit, *odb_commit;
> +   struct commit_list *graph_parents, *odb_parents;
> +   int num_parents = 0;
> +
> +   hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
> +
> +   graph_commit = lookup_commit(_oid);
> +   odb_commit = (struct commit *)create_object(cur_oid.hash, 
> alloc_commit_node());
> +   if (parse_commit_internal(odb_commit, 0, 0)) {
> +   graph_report("failed to parse %s from object 
> database", oid_to_hex(_oid));
> +   continue;
> +   }
> +
> +   if (oidcmp(_commit_tree_in_graph_one(g, 
> graph_commit)->object.oid,
> +  get_commit_tree_oid(odb_commit)))
> +   graph_report("root tree object ID for commit %s in 
> commit-graph is %s != %s",
> +oid_to_hex(_oid),
> +
> oid_to_hex(get_commit_tree_oid(graph_commit)),
> +
> oid_to_hex(get_commit_tree_oid(odb_commit)));
> +
> +   if (graph_commit->date != odb_commit->date)
> +   graph_report("commit date for commit %s in 
> commit-graph is %"PRItime" != %"PRItime"",
> +oid_to_hex(_oid),
> +graph_commit->date,
> +odb_commit->date);
> +
> +
> +   graph_parents = graph_commit->parents;
> +   odb_parents = odb_commit->parents;
> +
> +   while (graph_parents) {
> +   num_parents++;
> +
> +   if (odb_parents == NULL)
> +   graph_report("commit-graph parent list for 
> commit %s is too long (%d)",
> +oid_to_hex(_oid),
> +num_parents);
> +
> +   if (oidcmp(_parents->item->object.oid, 
> _parents->item->object.oid))
> +   graph_report("commit-graph parent for %s is 
> %s != %s",
> +oid_to_hex(_oid),
> +
> oid_to_hex(_parents->item->object.oid),
> +
> oid_to_hex(_parents->item->object.oid));
> +

Re: [BUG] git send-email: incorrectly parses email address with comma

2018-05-12 Thread Heinrich Schuchardt
On 05/12/2018 11:48 AM, Jeff King wrote:
> On Sat, May 12, 2018 at 10:21:46AM +0200, Heinrich Schuchardt wrote:
> 
>> Git send-email allows to combine multiple email addresses in one
>> parameter, e.g.
>>
>> --to="a...@example.com, b...@example.com"
>>
>> But email addresses may contain commas themselves:
>>
>> --to="LASTNAME, firstname "
>>
>> This may lead to an error:
> 
> If the name contains syntactically relevant metacharacters, it can be
> quoted. So as a workaround, you can do:
> 
>   --to='"LASTNAME, firstname" '
> 
> I think rfc822 actually requires even names with just spaces in them to
> be quoted, but git-send-email and most other mail programs are pretty
> lax about allowing just about anything outside of the <>, so people tend
> not to bother.
> 
>> If the string preceding a comma is not a valid email address do not
>> split it off.
> 
> That might work as a heuristic, though "is a valid email address" is a
> notoriously hard thing to check. Possibly looking for an "@" would catch
> most common cases, though.

A more elaborate test would be:
A string matching [\S\s]*<\S+@\S+.\S+>\s* is an email address.
A string matching \s*\S+@\S+.\S+\s* is an email address.
Both may need trimming of whitespace.
Any other string is not an email address.

Regards

Heinrich

> 
> -Peff
> 



Re: [PATCH v2 07/12] commit-graph: load a root tree from specific graph

2018-05-12 Thread Martin Ågren
On 11 May 2018 at 23:15, Derrick Stolee  wrote:

> -struct tree *get_commit_tree_in_graph(const struct commit *c)
> +static struct tree *get_commit_tree_in_graph_one(struct commit_graph *g,
> +const struct commit *c)
>  {
> if (c->maybe_tree)
> return c->maybe_tree;
> if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
> BUG("get_commit_tree_in_graph called from non-commit-graph 
> commit");

Update the function name in the BUG? Not that it will ever matter. ;-)

(This one is now static, ok.)


Re: [PATCH v2 06/12] commit: force commit to parse from object database

2018-05-12 Thread Martin Ågren
On 11 May 2018 at 23:15, Derrick Stolee  wrote:

> -int parse_commit_gently(struct commit *item, int quiet_on_missing)
> +int parse_commit_internal(struct commit *item, int quiet_on_missing, int 
> use_commit_graph)
>  {
> enum object_type type;
> void *buffer;
> @@ -403,17 +403,17 @@ int parse_commit_gently(struct commit *item, int 
> quiet_on_missing)
> return -1;
> if (item->object.parsed)
> return 0;
> -   if (parse_commit_in_graph(item))
> +   if (use_commit_graph && parse_commit_in_graph(item))
> return 0;

Right, this is where we check the graph. It's the only place we need to
consider the new flag.

> buffer = read_sha1_file(item->object.oid.hash, , );
> if (!buffer)
> return quiet_on_missing ? -1 :
> error("Could not read %s",
> -oid_to_hex(>object.oid));
> +   oid_to_hex(>object.oid));
> if (type != OBJ_COMMIT) {
> free(buffer);
> return error("Object %s not a commit",
> -oid_to_hex(>object.oid));
> +   oid_to_hex(>object.oid));

Some spurious indentation reshuffling going on in two lines here.

> --- a/commit.h
> +++ b/commit.h
> @@ -73,6 +73,7 @@ struct commit *lookup_commit_reference_by_name(const char 
> *name);
>  struct commit *lookup_commit_or_die(const struct object_id *oid, const char 
> *ref_name);
>
>  int parse_commit_buffer(struct commit *item, const void *buffer, unsigned 
> long size, int check_graph);
> +int parse_commit_internal(struct commit *item, int quiet_on_missing, int 
> use_commit_graph);

Unlike my comment on a previous patch, this one is meant for external
use. That's why it's not marked as static above. Ok.

Martin


Re: [PATCH v2 04/12] commit-graph: parse commit from chosen graph

2018-05-12 Thread Martin Ågren
> -int parse_commit_in_graph(struct commit *item)
> +int parse_commit_in_graph_one(struct commit_graph *g, struct commit *item)

I think this function should be static.


RE: [Best Practices Request] clean/smudge configuration - Avoiding the chicken and egg

2018-05-12 Thread Randall S. Becker
On May 11, 2018 3:26 PM, I wrote:
> On May 10, 2018 10:27 PM, Junio C Hamano wrote:
> > "Randall S. Becker"  writes:
> >
> > > What if we create a ../.gitconfig like ../.gitattributes, that is
> > > loaded before .git/config?
> >
> > You should not forget one of the two reasons why clean/smudge/diff etc.
> > drivers must be given with a step of redirection, split between
> > attributes and config.  "This path holds text from ancient macs" at
> > the logical level may be expressed in .gitattributes, and then "when
> > checking out text from ancient macs, process the blob data with this
> > program to turn it into a form suitable for working tree" is given as
> > a smudge filter program, that is (1) suitable for the platform _you_
> > as the writer of the config file is using *AND* (2) something you are
> confortable running on behalf of you.
> >
> > We *deliberately* lack a mechanism to pay attention to in-tree config
> > that gets propagated to those who get them via "git clone", exactly
> > because otherwise your upstream may not just specify a "smudge"
> > program that your platform would be unable to run, but can specify a
> > "smudge" program that pretends to be a smudger, but is actually a
> > program that installs a keylogger and posts your login password and
> > bank details to this mailing list ;-)
> >
> > So, no, I do not think in-tree configuration will fly.
> 
> I agree, which is why I asked the original question instead of posting it as a
> formal patch. We would probably get a brand new CVE implementing the
> proposed proto-changes even if the original intent was internal trusted
> repositories only. That's why I asked this as a "Best Practices" type 
> question,
> which I think I have a better idea on now. The actual situation is rather cool
> from a DevOps point of view, and whatever the ultimate solution is, might
> make for a nice presentation at some future conference .

Here's where I ended up, from a solution standpoint:

0. Make sure the git scripts you use are always trusted using your favourite 
technique
1. Wrap the clone in a such a script to do the next two steps to avoid the 
usual problems of forgetting things
2. The clone script should use "git -c name=value clone repo" for all 
clean/smudge values needed that would otherwise be in .git/config if we had one 
which we don't
3. Have the script create/update .git/config using "git config --local name 
value" with all of the same clean/smudge values for subsequent operations.

>From there, it seems that the contents of the smudged files are always 
>correct, assuming the filter works of course. It was the use of -c that makes 
>this work.

Sound about right?

Cheers,
Randall



Re: [PATCH 03/12] shallow.c: use commit-slab for commit depth instead of commit->util

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 02:09:18PM +0200, Duy Nguyen wrote:

> > That wastes an extra 4 bytes per slot over storing an int directly, but
> > it's the same as storing an 8-byte pointer, plus you avoid the storage
> > and runtime overhead of malloc.
> 
> Or we could have a way to let the user decide initial values. If the
> initial value here is -1 (which can't possibly be used in the current
> code), it could be the sentinel value.

That's actually a little tricky. Right now the sentinels are assigned by
xcalloc(), so they're all-bits zero. We _could_ walk any newly allocated
slab and assign a value to each element, but I'd worry that is wasteful
for cases where might only use a small part of the slab (and my
assumption is that the OS can give us zero'd pages pretty cheaply, and
maybe even avoid allocating a page until we touch them, but I don't know
how true that is).

> Did you notice the for loop in the end to free "int *"? I don't like
> peeking inside a slab that way and would prefer passing a "free"
> function pointer to clear_commit_depth(), or just assign a "free"
> function to some new field in struct commit_depth and
> clear_commit_depth() will call it. If we have a new field for "free"
> callback in the struct, it makes sense to have an "init" callback to
> do extra initialization on top of xcalloc.

You might find that a free() is tricky with the type system. This is all
implemented with macros, so you'd end up calling free() on an int (even
if it's a function that nobody ever calls). I suppose the value could be
cast to void to shut up the compiler, but then that function is like a
ticking time bomb. ;)

So I guess you'd need a variant of define_commit_slab() that defines the
clear function and one that doesn't.

> > I actually wonder if we could wrap commit_slab with a variant that
> > stores the sentinel data itself, to make this pattern easier. I.e.,
> > something like:
> >
> >   #define define_commit_slab_sentinel(name, type) \
> > struct name##_value { \
> > unsigned valid:1; \
> > type value; \
> > }; \
> > define_commit_slab(name, struct name##_value)
> >
> > and some matching "peek" and "at" functions to manipulate value
> > directly.
> 
> If you keep this valid bit in a separate slab, you can pack these bits
> very tight and not worry about wasting memory. Lookup in commit-slab
> is cheap enough that doing double lookups (one for valid field, one
> for value) is not a problem, I think.

True, though your cache behavior gets worse if your have two separate
arrays. I'm not sure pinching bytes matters all that much. linux.git has
~600k commits, so even there a few bytes each is not so bad (although
again, we get into cache issues).

> Yeah I think I will stick with a faithful conversion for now. The
> conversion shows room for improvement which could be the next
> microproject (I thought of adding this removing 'util' task as a 2019
> microproject but it was too tricky for newcomers to do).

Makes sense to me.

-Peff


Re: [PATCH 06/12] revision.c: use commit-slab for show_source

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 04:13:59PM +0200, Duy Nguyen wrote:

> > Should this one be global in the first place?  Can we tie it to say
> > "struct rev_info" or something?  I'd somehow anticipate a far future
> > where object flag bits used for traversal book-keeping would be moved
> > out of the objects themselves and multiple simultanous traversal
> > becomes reality.
> 
> Yeah I thought about those 27 bits but discarded it because it was not
> that much. But now that you brought up the maintainability aspect of
> it, it might make sense to move those bits out (if we manage to make
> commit-slab (or a specialized version of it) manage bitmaps instead of
> primitive types, which is not impossible).
> 
> I don't understand the tying to struct rev_info though. This is a
> struct definition and cannot really be tied to another struct. The
> pointer to struct source_slab _is_ tied to struct rev_info.

Right. We have a global type name and global functions, because this is
C. But the actual variable itself is inside struct rev_info (you used a
pointer to a caller-allocated struct, but I think that's fine, too).

> > Not limited to this particular one, but we'd need to think how the
> > commit_slab mechanism should interact with the_repository idea
> > Stefan has been having fun with.  If the object pool is maintained
> > per in-core repository object, presumably we'd have more than one
> > in-core instances of the same commit object if we have more than one
> > repository objects, and decorating one with a slab data may not want
> > to decorate the other one at the same time.
> 
> It should be ok. The slab is indexed by the "commit index" which is
> already per parsed_object_pool. Commit-slab user has to be careful to
> use the right slab with the right repo and free it at the right time,
> but that I think is outside with struct repository.

In theory somebody could misuse a "struct commit" from the wrong
repository and get nonsense results. I'd like to think that would be
pretty hard, but I guess it could be possible to make a mistake like
that at the interface where we call into a submodule-related function.

You could get around it by making the commit_index global across all
pools, but I don't think that's a good idea. Since it's an array index,
we want it to be compact and low.

-Peff


Re: [PATCH 00/12] Die commit->util, die!

2018-05-12 Thread Jakub Narebski
Nguyễn Thái Ngọc Duy  writes:

> There's not much to write here. It's basically a copy from 12/12:
>
> This 'util' pointer can be used for many different purposes,
> controlled in different ways. Some are not even contained in a command
> code, but buried deep in common code with no clue who will use it and
> how. For example, if revs.show_source is set, then it's used for
> storing path name, but if you happen to call get_merge_parent() then
> some 'util' may end up storing another thing.
>
> The move to using commit-slab gives us a much better picture of how
> some piece of data is associated with a commit and what for. Since
> nobody uses 'util' pointer anymore, we can retire it so that nobody will
> abuse it again. commit-slab will be the way forward for associating
> data to a commit.
>
> As a side benefit, this shrinks struct commit by 8 bytes (on 64-bit
> architecture) which should help reduce memory usage for reachability
> test a bit. This is also what commit-slab is invented for [1].
>
> [1] 96c4f4a370 (commit: allow associating auxiliary info on-demand -
> 2013-04-09)

Good work.  This would reduce the amount of technical debt in Git.

I just wonder if most of those transformation could not be done with
Coccinelle, instead of doing it by hand.

> Nguyễn Thái Ngọc Duy (12):
>   blame: use commit-slab for blame suspects instead of commit->util
>   describe: use commit-slab for commit names instead of commit->util
>   shallow.c: use commit-slab for commit depth instead of commit->util
>   sequencer.c: use commit-slab to mark seen commits
>   sequencer.c: use commit-slab to associate todo items to commits
>   revision.c: use commit-slab for show_source
>   bisect.c: use commit-slab for commit weight instead of commit->util
>   name-rev: use commit-slab for rev-name instead of commit->util
>   show-branch: use commit-slab for commit-name instead of commit->util
>   log: use commit-slab in prepare_bases() instead of commit->util
>   merge: use commit-slab in merge remote desc instead of commit->util
>   commit.h: delete 'util' field in struct commit
>
>  bisect.c  | 12 +---
>  blame.c   | 42 +++---
>  blame.h   |  2 ++
>  builtin/blame.c   |  2 +-
>  builtin/describe.c| 16 +---
>  builtin/fast-export.c | 14 +-
>  builtin/log.c | 17 +
>  builtin/merge.c   | 25 +
>  builtin/name-rev.c| 23 ---
>  builtin/show-branch.c | 39 +++
>  commit.c  | 12 ++--
>  commit.h  |  8 ++--
>  log-tree.c|  8 ++--
>  merge-recursive.c |  8 +---
>  revision.c| 17 +
>  revision.h|  5 -
>  sequencer.c   | 24 ++--
>  shallow.c | 37 +
>  18 files changed, 225 insertions(+), 86 deletions(-)


[GSoC] GSoC with git, week 2

2018-05-12 Thread Alban Gruin
Hi,

I published a new blog post about this week. You can read it here:

https://blog.pa1ch.fr/posts/2018/05/12/en/gsoc2018-week-2.html

Please tell me what you think about it :)

Cheers,
Alban



Re: [PATCH v6 11/13] command-list.txt: documentation and guide line

2018-05-12 Thread Philip Oakley

Hi Duy,

From: "Nguyễn Thái Ngọc Duy"  : Monday, May 07, 2018

This is intended to help anybody who needs to update command-list.txt.
It gives a brief introduction of all attributes a command can take.
---
command-list.txt | 44 
1 file changed, 44 insertions(+)

diff --git a/command-list.txt b/command-list.txt
index 99ddc231c1..9c70c69193 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -1,3 +1,47 @@
+# Command classification list
+# ---
+# All supported commands, builtin or external, must be described in
+# here. This info is used to list commands in various places. Each
+# command is on one line followed by one or more attributes.
+#
+# The first attribute group is mandatory and indicates the command
+# type. This group includes:
+#
+#   mainporcelain
+#   ancillarymanipulators
+#   ancillaryinterrogators
+#   foreignscminterface
+#   plumbingmanipulators
+#   plumbinginterrogators
+#   synchingrepositories
+#   synchelpers
+#   purehelpers
+#
+# The type names are self explanatory. But if you want to see what
+# command belongs to what group to get a better picture, have a look
+# at "git" man page, "GIT COMMANDS" section.
+#
+# Commands of type mainporcelain can also optionally have one of these
+# attributes:
+#
+#   init
+#   worktree
+#   info
+#   history
+#   remote
+#
+# These commands are considered "common" and will show up in "git
+# help" output in groups. Uncommon porcelain commands must not
+# specify any of these attributes.
+#
+# "complete" attribute is used to mark that the command should be
+# completable by git-completion.bash. Note that by default,
+# mainporcelain commands are completable so you don't need this
+# attribute.
+#
+# While not true commands, guides are also specified here, which can
+# only have "guide" attribute and nothing else.


While the file is called ~ "Command List", the list is here as a support to
the Help function, and ultimately to the user's reading of the man pages,
including the man(5/7) guides, so I'd view the man page guides as first
class citizens.

Perhaps:
# As part of the Git man page list, the man(5/7) guides are also specified
# here, which can only have "guide" attribute and nothing else.

--
Philip


+#
### command list (do not change this line, also do not change alignment)
# command name  category [category] [category]
git-add mainporcelain   worktree
--
2.17.0.705.g3525833791






Re: [PATCH 06/12] revision.c: use commit-slab for show_source

2018-05-12 Thread Duy Nguyen
On Sat, May 12, 2018 at 3:58 PM, Junio C Hamano  wrote:
> Jeff King  writes:
>
>> On Sat, May 12, 2018 at 10:00:22AM +0200, Nguyễn Thái Ngọc Duy wrote:
>>
>>> diff --git a/revision.h b/revision.h
>>> index b8c47b98e2..72404e2599 100644
>>> --- a/revision.h
>>> +++ b/revision.h
>>> @@ -6,6 +6,7 @@
>>>  #include "notes.h"
>>>  #include "pretty.h"
>>>  #include "diff.h"
>>> +#include "commit-slab.h"
>>>
>>>  /* Remember to update object flag allocation in object.h */
>>>  #define SEEN(1u<<0)
>>> @@ -29,6 +30,7 @@ struct rev_info;
>>>  struct log_info;
>>>  struct string_list;
>>>  struct saved_parents;
>>> +define_commit_slab(source_slab, char *);
>>
>> Since this one is a global, can we give it a name that ties it to the
>> revision machinery? Like "revision_source_slab" or something?
>
> Should this one be global in the first place?  Can we tie it to say
> "struct rev_info" or something?  I'd somehow anticipate a far future
> where object flag bits used for traversal book-keeping would be moved
> out of the objects themselves and multiple simultanous traversal
> becomes reality.

Yeah I thought about those 27 bits but discarded it because it was not
that much. But now that you brought up the maintainability aspect of
it, it might make sense to move those bits out (if we manage to make
commit-slab (or a specialized version of it) manage bitmaps instead of
primitive types, which is not impossible).

I don't understand the tying to struct rev_info though. This is a
struct definition and cannot really be tied to another struct. The
pointer to struct source_slab _is_ tied to struct rev_info.

> Not limited to this particular one, but we'd need to think how the
> commit_slab mechanism should interact with the_repository idea
> Stefan has been having fun with.  If the object pool is maintained
> per in-core repository object, presumably we'd have more than one
> in-core instances of the same commit object if we have more than one
> repository objects, and decorating one with a slab data may not want
> to decorate the other one at the same time.

It should be ok. The slab is indexed by the "commit index" which is
already per parsed_object_pool. Commit-slab user has to be careful to
use the right slab with the right repo and free it at the right time,
but that I think is outside with struct repository.
-- 
Duy


Re: [PATCH 04/12] sequencer.c: use commit-slab to mark seen commits

2018-05-12 Thread Duy Nguyen
On Sat, May 12, 2018 at 3:43 PM, Junio C Hamano  wrote:
> Jeff King  writes:
>
>> On Sat, May 12, 2018 at 10:00:20AM +0200, Nguyễn Thái Ngọc Duy wrote:
>>
>>> +define_commit_slab(commit_seen, int);
>>
>> Yay, this one is nice and simple. :) This is what I had hoped for all
>> along with the slab concept.
>>
>> -Peff
>
> Does it need to use a full int, not just a byte per commit, though?

True. Is it time to introduce 'bool' type? I did not want to put the
'char' type up there (but now that I think about it I could have put
uint8_t too).
-- 
Duy


Re: [RFC] Other chunks for commit-graph, part 1 - Bloom filters, topo order, etc.

2018-05-12 Thread Jakub Narebski
Derrick Stolee  writes:
> On 5/4/2018 3:40 PM, Jakub Narebski wrote:
>>
>> With early parts of commit-graph feature (ds/commit-graph and
>> ds/lazy-load-trees) close to being merged into "master", see
>> https://public-inbox.org/git/xmqq4ljtz87g@gitster-ct.c.googlers.com/
>> I think it would be good idea to think what other data could be added
>> there to make Git even faster.
>
> Before thinking about adding more data to the commit-graph, I think
> instead we need to finish taking advantage of the data that is already
> there. This means landing the generation number patch [1] (I think
> this is close, so I'll send a v6 this week if there is no new
> feedback.) and the auto-compute patch [2] (this could use more
> feedback, but I'll send a v1 based on the RFC feedback if no one
> chimes in).
>
> [1]
> https://public-inbox.org/git/20180501124652.155781-1-dsto...@microsoft.com/
>     [PATCH v5 00/11] Compute and consume generation numbers
>
> [2]
> https://public-inbox.org/git/20180417181028.198397-1-dsto...@microsoft.com/
>     [RFC PATCH 00/12] Integrate commit-graph into 'fsck' and 'gc'

Right, so the RFC might be a bit premature; I wanted the discussion to
be out there to think about when adding new uses of existing features.


DIGRESSION: it is commendable that you are sending patches in small,
easy digestible chunks / patch series.  It is much easier to review 10+
series than 80+ behemoth (though I understand it is not always possible
to subdivide patch series into smaller self-contained sub-series).

On the other hand, it would be nice to have some roadmap about series
and features to be sent in the future, if possible.  Something like what
was done when 'git rebase --interactive' was getting builtinified: moved
(in parts) to C.  It would be great to have such roadmap with milestones
achieved and milestones to be done in the cover letter for series.

> The big wins remaining from this data are `git tag --merged` and `git
> log --graph`. The `tag` scenario is probably easier: this can be done
> by replacing the revision-walk underlying the call to use
> paint_down_to_common() instead. Requires adding an external method to
> commit.c, but not too much code.

I wonder if there is some significant reason behind `git tag --merged`
using its own codepath, beside historical reasons.  Maybe performance is
better with current code?

Utilizing paint_down_to_common() there, beside reducing amount of code
you would have to modify, would also unify code (and possibly reduce
amount of code).  That's very nice.

>
> The tougher challenge is `git log --graph`. The revision walk
> machinery currently uses two precompute phases before iterating
> results to the pager: limit_list() and sort_in_topological_order();
> these correspond to two phases of Kahn's algorithm for topo-sort
> (compute in-degrees, then walk by peeling commits with in-degree
> zero). This requires O(N) time, where N is the number of reachable
> commits. Instead, we could make this be O(W) time to output one page
> of results, where W is (roughly) the number of reachable commits with
> generation number above the last reported result.

A reminder: Kahn's algorithm (described for example in [1] and [3])
looks like this:

  L ← Empty list that will contain the sorted elements
  S ← Collection of all nodes with no incoming edge
  while S is non-empty do
  remove a node 'n' from S
  add 'n' to tail of L
  for each parent 'm' of 'n' do
  decrease the in-degree of 'm'
  if 'm' has in-degree of 0 then
  insert 'm' into S

[1]: https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
[2]: https://www.geeksforgeeks.org/topological-sorting-indegree-based-solution/

> In order to take advantage of this approach, the two phases of Kahn's
> algorithm need to be done in-line with reporting results to the
> pager. This means keeping two queues: one is a priority queue by
> generation number that computes in-degrees,

This simple solition of using priority queue by generation number won't
work, I think.  In-degree is computed from heads down, generation number
is computed from roots up.

For example in the graph below

   *<---*<---*<---*<---*<---*<---*<---*<---*<---A
 \
  \--*<---B

both A and B have in-degree of 0, but gen(B) << gen(A).

But I might be wrong.

>the other is a priority
> queue (by commit-date or a visit-order value to do the --topo-order
> priority) that peels the in-degree-zero commits (and decrements the
> in-degree of their parents). I have not begun this refactoring effort
> because appears complicated to me, and it will be hard to tease out
> the logic without affecting other consumers of the revision-walk
> machinery.
>
> I would love it if someone picked up the `git log --graph` task, since
> it will be a few weeks before I have the time to focus on it.

Let's assume that we have extra data (indexes such as 

Re: [PATCH 07/12] bisect.c: use commit-slab for commit weight instead of commit->util

2018-05-12 Thread Junio C Hamano
Nguyễn Thái Ngọc Duy   writes:

> It's done so that commit->util can be removed. See more explanation in
> the commit that removes commit->util.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---

Yup, this one is a no-brainer.

>  bisect.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/bisect.c b/bisect.c
> index a579b50884..6de1abd407 100644
> --- a/bisect.c
> +++ b/bisect.c
> @@ -12,6 +12,7 @@
>  #include "bisect.h"
>  #include "sha1-array.h"
>  #include "argv-array.h"
> +#include "commit-slab.h"
>  
>  static struct oid_array good_revs;
>  static struct oid_array skipped_revs;
> @@ -70,16 +71,19 @@ static void clear_distance(struct commit_list *list)
>   }
>  }
>  
> +define_commit_slab(commit_weight, int *);
> +static struct commit_weight commit_weight;
> +
>  #define DEBUG_BISECT 0
>  
>  static inline int weight(struct commit_list *elem)
>  {
> - return *((int*)(elem->item->util));
> + return **commit_weight_at(_weight, elem->item);
>  }
>  
>  static inline void weight_set(struct commit_list *elem, int weight)
>  {
> - *((int*)(elem->item->util)) = weight;
> + **commit_weight_at(_weight, elem->item) = weight;
>  }
>  
>  static int count_interesting_parents(struct commit *commit)
> @@ -265,7 +269,7 @@ static struct commit_list *do_find_bisection(struct 
> commit_list *list,
>   struct commit *commit = p->item;
>   unsigned flags = commit->object.flags;
>  
> - p->item->util = [n++];
> + *commit_weight_at(_weight, p->item) = [n++];
>   switch (count_interesting_parents(commit)) {
>   case 0:
>   if (!(flags & TREESAME)) {
> @@ -372,6 +376,7 @@ void find_bisection(struct commit_list **commit_list, int 
> *reaches,
>   int *weights;
>  
>   show_list("bisection 2 entry", 0, 0, *commit_list);
> + init_commit_weight(_weight);
>  
>   /*
>* Count the number of total and tree-changing items on the
> @@ -412,6 +417,7 @@ void find_bisection(struct commit_list **commit_list, int 
> *reaches,
>   }
>   free(weights);
>   *commit_list = best;
> + clear_commit_weight(_weight);
>  }
>  
>  static int register_ref(const char *refname, const struct object_id *oid,


Re: [PATCH 06/12] revision.c: use commit-slab for show_source

2018-05-12 Thread Junio C Hamano
Jeff King  writes:

> On Sat, May 12, 2018 at 10:00:22AM +0200, Nguyễn Thái Ngọc Duy wrote:
>
>> diff --git a/revision.h b/revision.h
>> index b8c47b98e2..72404e2599 100644
>> --- a/revision.h
>> +++ b/revision.h
>> @@ -6,6 +6,7 @@
>>  #include "notes.h"
>>  #include "pretty.h"
>>  #include "diff.h"
>> +#include "commit-slab.h"
>>  
>>  /* Remember to update object flag allocation in object.h */
>>  #define SEEN(1u<<0)
>> @@ -29,6 +30,7 @@ struct rev_info;
>>  struct log_info;
>>  struct string_list;
>>  struct saved_parents;
>> +define_commit_slab(source_slab, char *);
>
> Since this one is a global, can we give it a name that ties it to the
> revision machinery? Like "revision_source_slab" or something?

Should this one be global in the first place?  Can we tie it to say
"struct rev_info" or something?  I'd somehow anticipate a far future
where object flag bits used for traversal book-keeping would be moved
out of the objects themselves and multiple simultanous traversal
becomes reality.

Not limited to this particular one, but we'd need to think how the
commit_slab mechanism should interact with the_repository idea
Stefan has been having fun with.  If the object pool is maintained
per in-core repository object, presumably we'd have more than one
in-core instances of the same commit object if we have more than one
repository objects, and decorating one with a slab data may not want
to decorate the other one at the same time.


Re: [PATCH v2 03/12] commit-graph: test that 'verify' finds corruption

2018-05-12 Thread Martin Ågren
On 11 May 2018 at 23:15, Derrick Stolee  wrote:

> +test_expect_success 'detect bad signature' '
> +   cd "$TRASH_DIRECTORY/full" &&

I was a bit surprised at the "cd outside subshell", but then realized
that this file already does that. It will only be a problem if later
tests think they're somewhere else. Let's read on.

> +   cp $objdir/info/commit-graph commit-graph-backup &&
> +   test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
> +   corrupt_data $objdir/info/commit-graph 0 "\0" &&
> +   test_must_fail git commit-graph verify 2>err &&
> +   grep -v "^\+" err > verify-errors &&
> +   test_line_count = 1 verify-errors &&
> +   grep "graph signature" verify-errors
> +'
> +
> +test_expect_success 'detect bad version number' '
> +   cd "$TRASH_DIRECTORY/full" &&
> +   cp $objdir/info/commit-graph commit-graph-backup &&
> +   test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
> +   corrupt_data $objdir/info/commit-graph 4 "\02" &&
> +   test_must_fail git commit-graph verify 2>err &&
> +   grep -v "^\+" err > verify-errors &&
> +   test_line_count = 1 verify-errors &&
> +   grep "graph version" verify-errors
> +'
> +
> +test_expect_success 'detect bad hash version' '
> +   cd "$TRASH_DIRECTORY/full" &&
> +   cp $objdir/info/commit-graph commit-graph-backup &&
> +   test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
> +   corrupt_data $objdir/info/commit-graph 5 "\02" &&
> +   test_must_fail git commit-graph verify 2>err &&
> +   grep -v "^\+" err > verify-errors &&
> +   test_line_count = 1 verify-errors &&
> +   grep "hash version" verify-errors
> +'

These look a bit boiler-platey. Maybe not too bad though.

> +test_expect_success 'detect too small chunk-count' '

s/too small/bad/?

To be honest, I wrote this title without thinking too hard about the
problem. In general, it would be quite hard for `git commit-graph
verify` to say "*this* is wrong in your file" ("the number of chunks is
too small") -- it should be much easier to say "*something* is wrong".

> +   cd "$TRASH_DIRECTORY/full" &&
> +   cp $objdir/info/commit-graph commit-graph-backup &&
> +   test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
> +   corrupt_data $objdir/info/commit-graph 6 "\01" &&
> +   test_must_fail git commit-graph verify 2>err &&
> +   grep -v "^\+" err > verify-errors &&
> +   test_line_count = 2 verify-errors &&
> +   grep "missing the OID Lookup chunk" verify-errors &&
> +   grep "missing the Commit Data chunk" verify-errors

Maybe these tests could go with the previous patch(es). IMVHO I would
prefer reading the test with the implementation. A separate commit for
the tests might make sense if they're really tricky and need some
explaining, but I don't think that's the case here.

All of these comments are just minor nits, or not even that. I will
continue with the other patches at another time.

Thank you, I'm really looking forward to Git with commit-graph magic.

Martin


Re: [PATCH 04/12] sequencer.c: use commit-slab to mark seen commits

2018-05-12 Thread Junio C Hamano
Jeff King  writes:

> On Sat, May 12, 2018 at 10:00:20AM +0200, Nguyễn Thái Ngọc Duy wrote:
>
>> +define_commit_slab(commit_seen, int);
>
> Yay, this one is nice and simple. :) This is what I had hoped for all
> along with the slab concept.
>
> -Peff

Does it need to use a full int, not just a byte per commit, though?


Re: [PATCH v2 02/12] commit-graph: verify file header information

2018-05-12 Thread Martin Ågren
On 11 May 2018 at 23:15, Derrick Stolee  wrote:
> During a run of 'git commit-graph verify', list the issues with the
> header information in the commit-graph file. Some of this information
> is inferred from the loaded 'struct commit_graph'. Some header
> information is checked as part of load_commit_graph_one().
>
> Signed-off-by: Derrick Stolee 
> ---
>  commit-graph.c | 32 +++-
>  1 file changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/commit-graph.c b/commit-graph.c
> index b25aaed128..d2db20e49a 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -818,7 +818,37 @@ void write_commit_graph(const char *obj_dir,
> oids.nr = 0;
>  }
>
> +static int verify_commit_graph_error;
> +
> +static void graph_report(const char *fmt, ...)
> +{
> +   va_list ap;
> +   struct strbuf sb = STRBUF_INIT;
> +   verify_commit_graph_error = 1;
> +
> +   va_start(ap, fmt);
> +   strbuf_vaddf(, fmt, ap);
> +
> +   fprintf(stderr, "%s\n", sb.buf);
> +   strbuf_release();
> +   va_end(ap);
> +}

Right, so this replaces the macro-trickery from v1, and we print a
newline after each error.

>  int verify_commit_graph(struct commit_graph *g)
>  {
> -   return !g;
> +   if (!g) {
> +   graph_report("no commit-graph file loaded");
> +   return 1;
> +   }

> +
> +   return verify_commit_graph_error;
>  }

Not sure it matters much: I suppose you could introduce the parts that I
have quoted here in the previous patch. Or maybe not.

Martin


Re: [PATCH v2 01/12] commit-graph: add 'verify' subcommand

2018-05-12 Thread Martin Ågren
On 11 May 2018 at 23:15, Derrick Stolee  wrote:

> graph_name = get_commit_graph_filename(opts.obj_dir);
> graph = load_commit_graph_one(graph_name);
> +   FREE_AND_NULL(graph_name);
>
> if (!graph)
> die("graph file %s does not exist", graph_name);
> -   FREE_AND_NULL(graph_name);

This is probably because of something I said, but this does not look
correct. The `die()` would typically print "(null)" or segfault. If the
`die()` means we don't free `graph_name`, that should be fine.

You're still leaking `graph` here (possibly nothing this patch should
worry about) and in `graph_verify()`. UNLEAK-ing it immediately before
calling `verify_commit_graph()` should be ok. I also think punting on
this UNLEAK-business entirely would be ok; I was just a bit surprised to
see one variable getting freed and the other one ignored.

Martin


refreshing the gitk colour scheme

2018-05-12 Thread Andrej Shadura
Hello everyone,

I’ve been using Gitk with the following colour settings for a year or
so, and I found it much more convenient to use that the current default,
so I’d like to propose to make them the defaults. These colours are
slightly less 80’s i.e. they’re using colours outside of the ‘classic’
16 colour palette for a more pleasing visual experience.

set headbgcolor #98ff5e
set remotebgcolor #bae2ff
set tagbgcolor #f3fb57
set mainheadcirclecolor #ffeb74
set indexcirclecolor green
set circlecolors {white #08b5ed gray blue blue}

I also found it may be a good idea to disable auto-selection of the
commit hash by default, since it overwrites whatever content was in the
selection clipboard, and also pollutes the clipboard history if the user
is using a clipboard manager:

set autoselect 0

Here’s one more change which I am not proposing for inclusion, but I
found it more æsthetically pleasing than the default:

set mainfont {FreeSans 11}

-- 
Cheers,
  Andrej


Re: [PATCH 01/12] blame: use commit-slab for blame suspects instead of commit->util

2018-05-12 Thread Duy Nguyen
On Sat, May 12, 2018 at 11:22 AM, Jeff King  wrote:
> On Sat, May 12, 2018 at 10:00:17AM +0200, Nguyễn Thái Ngọc Duy wrote:
>
>> +define_commit_slab(blame_suspects, struct blame_origin *);
>> +static struct blame_suspects blame_suspects;
>> +
>> +struct blame_origin *get_blame_suspects(struct commit *commit)
>> +{
>> + struct blame_origin **result;
>> +
>> + result = blame_suspects_peek(_suspects, commit);
>> +
>> + return result ? *result : NULL;
>> +}
>
> Hmm. You need this helper because you want to be able to peek and get a
> NULL. But that's already what _at() would return, with the only
> difference that we may extend the slab just to return NULL.
>
> I wonder how much it matters in practice. We'd generally be extending
> the slab to hit every commit anyway in this case, I would think.

I don't know much about blame so I stay very conservative ;-) If it's
safe to just do _at() here, I'll update this patch.

> I suppose it doesn't actually simplify the code that much to do it that
> way, though. We could get rid of this helper, but the caller would still
> look like:
>
>   for (p = *blame_suspects_at(o->commit); p; p = p->next)
>
> which is actually slightly uglier than get_blame_suspects(), because we
> have to do the pointer-dereference ourselves.

And the caller would need to include commit-slab.h too. I added
get_blame_suspects() because I wanted to avoid that.
-- 
Duy


Re: [PATCH 03/12] shallow.c: use commit-slab for commit depth instead of commit->util

2018-05-12 Thread Duy Nguyen
On Sat, May 12, 2018 at 11:18 AM, Jeff King  wrote:
> On Sat, May 12, 2018 at 05:07:48AM -0400, Jeff King wrote:
>
>> So no, it wouldn't work to directly store depths with the code as
>> written.  I'm not sure if the depth can ever be 0. If not, then it would
>> be a suitable sentinel as:
>>
>>   int *slot = commit_depth_at(, p->item);
>>   if (!*slot || cur_depth < *slot)
>>   *slot = cur_depth;
>>
>> But somebody would have to dig into the possible values of cur_depth
>> there (which would make sense to do as a separate patch anyway, since
>> the point of this is to be a direct conversion).
>
> By the way, one other approach if xcalloc() doesn't produce a good
> sentinel is to use a data type that does. ;) E.g., something like this
> should work:
>
>   struct depth {
> unsigned valid:1;
> int value;
>   };
>   define_commit_slab(commit_depth, struct depth);
>
>   ...
>
>   struct depth *slot = commit_depth_at(, p->item);
>   if (!slot->valid || cur_depth < slot->value) {
> slot->value = cur_depth;
> slot->valid = 1;
>   }
>
> That wastes an extra 4 bytes per slot over storing an int directly, but
> it's the same as storing an 8-byte pointer, plus you avoid the storage
> and runtime overhead of malloc.

Or we could have a way to let the user decide initial values. If the
initial value here is -1 (which can't possibly be used in the current
code), it could be the sentinel value.

Did you notice the for loop in the end to free "int *"? I don't like
peeking inside a slab that way and would prefer passing a "free"
function pointer to clear_commit_depth(), or just assign a "free"
function to some new field in struct commit_depth and
clear_commit_depth() will call it. If we have a new field for "free"
callback in the struct, it makes sense to have an "init" callback to
do extra initialization on top of xcalloc.

> I actually wonder if we could wrap commit_slab with a variant that
> stores the sentinel data itself, to make this pattern easier. I.e.,
> something like:
>
>   #define define_commit_slab_sentinel(name, type) \
> struct name##_value { \
> unsigned valid:1; \
> type value; \
> }; \
> define_commit_slab(name, struct name##_value)
>
> and some matching "peek" and "at" functions to manipulate value
> directly.

If you keep this valid bit in a separate slab, you can pack these bits
very tight and not worry about wasting memory. Lookup in commit-slab
is cheap enough that doing double lookups (one for valid field, one
for value) is not a problem, I think.

> I think the end result would be nicer, but it's turning into a little
> bit of a rabbit hole. So I don't mind going with your direct conversion
> here for now.

Yeah I think I will stick with a faithful conversion for now. The
conversion shows room for improvement which could be the next
microproject (I thought of adding this removing 'util' task as a 2019
microproject but it was too tricky for newcomers to do).
-- 
Duy


Re: [PATCH 03/12] shallow.c: use commit-slab for commit depth instead of commit->util

2018-05-12 Thread Duy Nguyen
On Sat, May 12, 2018 at 11:07 AM, Jeff King  wrote:
> On Sat, May 12, 2018 at 10:00:19AM +0200, Nguyễn Thái Ngọc Duy wrote:
>
>> @@ -82,25 +84,29 @@ struct commit_list *get_shallow_commits(struct 
>> object_array *heads, int depth,
>>   struct object_array stack = OBJECT_ARRAY_INIT;
>>   struct commit *commit = NULL;
>>   struct commit_graft *graft;
>> + struct commit_depth depths;
>>
>> + init_commit_depth();
>>   while (commit || i < heads->nr || stack.nr) {
>>   struct commit_list *p;
>>   if (!commit) {
>>   if (i < heads->nr) {
>> + int **depth_slot;
>>   commit = (struct commit *)
>>   deref_tag(heads->objects[i++].item, 
>> NULL, 0);
>>   if (!commit || commit->object.type != 
>> OBJ_COMMIT) {
>>   commit = NULL;
>>   continue;
>>   }
>> - if (!commit->util)
>> - commit->util = xmalloc(sizeof(int));
>> - *(int *)commit->util = 0;
>> + depth_slot = commit_depth_at(, commit);
>> + if (!*depth_slot)
>> + *depth_slot = xmalloc(sizeof(int));
>> + **depth_slot = 0;
>
> It looks like we could save ourselves an extra layer of indirection (and
> tiny heap allocations) by just storing an "int" directly in the slab.
> Do we ever use the NULL as a sentinel value?
>
> Here we just allocate it if not set. Let's see if we can find some
> others...
>
>> @@ -116,25 +122,32 @@ struct commit_list *get_shallow_commits(struct 
>> object_array *heads, int depth,
>>   }
>>   commit->object.flags |= not_shallow_flag;
>>   for (p = commit->parents, commit = NULL; p; p = p->next) {
>> - if (!p->item->util) {
>> - int *pointer = xmalloc(sizeof(int));
>> - p->item->util = pointer;
>> - *pointer =  cur_depth;
>> + int **depth_slot = commit_depth_at(, p->item);
>> + if (!*depth_slot) {
>> + *depth_slot = xmalloc(sizeof(int));
>> + **depth_slot = cur_depth;
>>   } else {
>> - int *pointer = p->item->util;
>> - if (cur_depth >= *pointer)
>> + if (cur_depth >= **depth_slot)
>>   continue;
>> - *pointer = cur_depth;
>> + **depth_slot = cur_depth;
>>   }
>
> Here we malloc again if it's not set. But we do behave slightly
> differently when we see NULL, in that we do not bother to even compare
> against cur_depth. So if we were to directly store ints, we'd see "0" as
> the sentinel depth here, which would not match our "cur_depth >=
> depth_slot" check.
>
> So no, it wouldn't work to directly store depths with the code as
> written.  I'm not sure if the depth can ever be 0. If not, then it would
> be a suitable sentinel as:
>
>   int *slot = commit_depth_at(, p->item);
>   if (!*slot || cur_depth < *slot)
> *slot = cur_depth;
>
> But somebody would have to dig into the possible values of cur_depth
> there (which would make sense to do as a separate patch anyway, since
> the point of this is to be a direct conversion).

I actually tried that first, going with storing int directly in the
slab instead of int*. And some shallow tests failed so I didn't bother
(the goal was to get rid of 'util' pointer, not to optimize more)
-- 
Duy


Re: [PATCH v14 4/4] ls-remote: create '--sort' option

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:45:23AM +0200, René Scharfe wrote:

> Why is fetch called outside of the test?  Its output is shown among the
> test messages, where it doesn't belong:
> 
> ok 23 - overrides work between mixed transfer/upload-pack hideRefs
> From /home/lsr/src/git/t/trash directory.t5512-ls-remote/
>  * [new branch]  master -> origin/master
> ok 24 - ls-remote --symref

Heh, I just noticed this yesterday, too, but ran out of time before
writing up the patch.

> -- >8 --
> Subject: [PATCH] t5512: run git fetch inside test
> 
> Do the preparatory fetch inside the test of ls-remote --symref to avoid
> cluttering the test output and to be able to catch unexpected fetch
> failures.

Yep, this looks obviously correct.

-Peff


Re: [BUG] git send-email: incorrectly parses email address with comma

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:21:46AM +0200, Heinrich Schuchardt wrote:

> Git send-email allows to combine multiple email addresses in one
> parameter, e.g.
> 
> --to="a...@example.com, b...@example.com"
> 
> But email addresses may contain commas themselves:
> 
> --to="LASTNAME, firstname "
> 
> This may lead to an error:

If the name contains syntactically relevant metacharacters, it can be
quoted. So as a workaround, you can do:

  --to='"LASTNAME, firstname" '

I think rfc822 actually requires even names with just spaces in them to
be quoted, but git-send-email and most other mail programs are pretty
lax about allowing just about anything outside of the <>, so people tend
not to bother.

> If the string preceding a comma is not a valid email address do not
> split it off.

That might work as a heuristic, though "is a valid email address" is a
notoriously hard thing to check. Possibly looking for an "@" would catch
most common cases, though.

-Peff


Re: [PATCH 00/12] Die commit->util, die!

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:00:16AM +0200, Nguyễn Thái Ngọc Duy wrote:

> There's not much to write here. It's basically a copy from 12/12:
> 
> This 'util' pointer can be used for many different purposes,
> controlled in different ways. Some are not even contained in a command
> code, but buried deep in common code with no clue who will use it and
> how. For example, if revs.show_source is set, then it's used for
> storing path name, but if you happen to call get_merge_parent() then
> some 'util' may end up storing another thing.
> 
> The move to using commit-slab gives us a much better picture of how
> some piece of data is associated with a commit and what for. Since
> nobody uses 'util' pointer anymore, we can retire it so that nobody will
> abuse it again. commit-slab will be the way forward for associating
> data to a commit.
> 
> As a side benefit, this shrinks struct commit by 8 bytes (on 64-bit
> architecture) which should help reduce memory usage for reachability
> test a bit. This is also what commit-slab is invented for [1].

I left a few comments, but overall this looks pretty good. A few of the
conversions get tricky with the number of pointer dereferences, but most
of those were pretty tricky to begin with (that weight stuff in
bisect.c...yikes!).

I love the result. More maintainable code, less possibility of conflicts
in the util field, and a memory savings to boot.

-Peff


Re: [PATCH 06/12] revision.c: use commit-slab for show_source

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:00:22AM +0200, Nguyễn Thái Ngọc Duy wrote:

> diff --git a/revision.h b/revision.h
> index b8c47b98e2..72404e2599 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -6,6 +6,7 @@
>  #include "notes.h"
>  #include "pretty.h"
>  #include "diff.h"
> +#include "commit-slab.h"
>  
>  /* Remember to update object flag allocation in object.h */
>  #define SEEN (1u<<0)
> @@ -29,6 +30,7 @@ struct rev_info;
>  struct log_info;
>  struct string_list;
>  struct saved_parents;
> +define_commit_slab(source_slab, char *);

Since this one is a global, can we give it a name that ties it to the
revision machinery? Like "revision_source_slab" or something?

I wonder if we can even drop the "slab" to make the name shorter (e.g.,
"revision_sources" or something).

It's a little ugly that everybody who includes revision.h gets the full
set of static functions defined. I wonder if we should have separate
declare/define macros to let these exist as real functions. Maybe it
doesn't really matter, though.

-Peff


Re: [PATCH 05/12] sequencer.c: use commit-slab to associate todo items to commits

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:00:21AM +0200, Nguyễn Thái Ngọc Duy wrote:

> @@ -3446,9 +3451,9 @@ int rearrange_squash(void)
>   else if (!strchr(p, ' ') &&
>(commit2 =
> lookup_commit_reference_by_name(p)) &&
> -  commit2->util)
> +  *commit_todo_item_at(_todo, commit2))
>   /* found by commit name */
> - i2 = (struct todo_item *)commit2->util
> + i2 = *commit_todo_item_peek(_todo, 
> commit2)
>   - todo_list.items;

Directly dereferencing _peek() is an anti-pattern, since it may return
NULL. We know it's OK here because the earlier call to _at() would have
extended the slab. But it probably makes sense to use _at() in both
places then (or even to use _peek() for the earlier one if you want to
avoid extending, but as I said in the other message, I kind of
suspect most of these cases end up allocating slab space for most of the
commits anyway).

-Peff


[PATCH v2 2/2] git-credential-netrc: accept gpg option

2018-05-12 Thread Luis Marsano
git-credential-netrc was hardcoded to decrypt with 'gpg' regardless of
the gpg.program option. This is a problem on distributions like Debian
that call modern GnuPG something else, like 'gpg2'.
Set the command according to these settings in descending precedence
1. the git-credential-netrc command -g|--gpg option
2. the git gpg.program configuration option
3. the default: 'gpg'

For conformance with Documentation/CodingGuidelines
- use Git.pm for repository and global option queries
- document -g|--gpg command option in command usage
- test repository & command options
- write documentation placeholders according to main standards

Signed-off-by: Luis Marsano 
Acked-by: Ted Zlatanov 
---
 contrib/credential/netrc/git-credential-netrc | 58 ---
 .../netrc/t-git-credential-netrc.sh   |  2 +-
 .../credential/netrc/test.command-option-gpg  |  2 +
 contrib/credential/netrc/test.git-config-gpg  |  2 +
 contrib/credential/netrc/test.netrc.gpg   |  0
 contrib/credential/netrc/test.pl  | 22 ++-
 6 files changed, 62 insertions(+), 24 deletions(-)
 create mode 100755 contrib/credential/netrc/test.command-option-gpg
 create mode 100755 contrib/credential/netrc/test.git-config-gpg
 create mode 100644 contrib/credential/netrc/test.netrc.gpg

diff --git a/contrib/credential/netrc/git-credential-netrc 
b/contrib/credential/netrc/git-credential-netrc
index 1571a7b26..0b9a94102 100755
--- a/contrib/credential/netrc/git-credential-netrc
+++ b/contrib/credential/netrc/git-credential-netrc
@@ -2,11 +2,13 @@
 
 use strict;
 use warnings;
+use autodie;
 
 use Getopt::Long;
 use File::Basename;
+use Git;
 
-my $VERSION = "0.1";
+my $VERSION = "0.2";
 
 my %options = (
   help => 0,
@@ -54,6 +56,7 @@ GetOptions(\%options,
"insecure|k",
"verbose|v",
"file|f=s@",
+   'gpg|g:s',
   );
 
 if ($options{help}) {
@@ -62,27 +65,31 @@ if ($options{help}) {
 
print <

[PATCH v2 1/2] git-credential-netrc: adapt to test framework for git

2018-05-12 Thread Luis Marsano
git-credential-netrc tests did not run in a test repository.
Reuse the main test framework to stage a temporary repository.
To imitate Perl tests under t/
- switch to Test::More module
- use File::Basename & File::Spec::Functions

Signed-off-by: Luis Marsano 
Acked-by: Ted Zlatanov 
---
 contrib/credential/netrc/Makefile |  4 +-
 .../netrc/t-git-credential-netrc.sh   | 31 
 contrib/credential/netrc/test.pl  | 72 +++
 3 files changed, 77 insertions(+), 30 deletions(-)
 create mode 100755 contrib/credential/netrc/t-git-credential-netrc.sh

diff --git a/contrib/credential/netrc/Makefile 
b/contrib/credential/netrc/Makefile
index 51b76138a..0ffa40719 100644
--- a/contrib/credential/netrc/Makefile
+++ b/contrib/credential/netrc/Makefile
@@ -1,5 +1,5 @@
 test:
-   ./test.pl
+   ./t-git-credential-netrc.sh
 
 testverbose:
-   ./test.pl -d -v
+   ./t-git-credential-netrc.sh -d -v
diff --git a/contrib/credential/netrc/t-git-credential-netrc.sh 
b/contrib/credential/netrc/t-git-credential-netrc.sh
new file mode 100755
index 0..efb81efed
--- /dev/null
+++ b/contrib/credential/netrc/t-git-credential-netrc.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+(
+   cd ../../../t
+   test_description='git-credential-netrc'
+   . ./test-lib.sh
+
+   if ! test_have_prereq PERL; then
+   skip_all='skipping perl interface tests, perl not available'
+   test_done
+   fi
+
+   perl -MTest::More -e 0 2>/dev/null || {
+   skip_all="Perl Test::More unavailable, skipping test"
+   test_done
+   }
+
+   # set up test repository
+
+   test_expect_success \
+'set up test repository' \
+:
+
+   # The external test will outputs its own plan
+   test_external_has_tap=1
+
+   test_external \
+'git-credential-netrc' \
+perl "$TEST_DIRECTORY"/../contrib/credential/netrc/test.pl
+
+   test_done
+)
diff --git a/contrib/credential/netrc/test.pl b/contrib/credential/netrc/test.pl
index 169b6463c..ea95a2c8a 100755
--- a/contrib/credential/netrc/test.pl
+++ b/contrib/credential/netrc/test.pl
@@ -1,83 +1,99 @@
 #!/usr/bin/perl
+use lib (split(/:/, $ENV{GITPERLLIB}));
 
 use warnings;
 use strict;
-use Test;
+use Test::More qw(no_plan);
+use File::Basename;
+use File::Spec::Functions qw(:DEFAULT rel2abs);
 use IPC::Open2;
 
-BEGIN { plan tests => 15 }
+BEGIN {
+   # t-git-credential-netrc.sh kicks off our testing, so we have to go 
from there.
+   Test::More->builder->current_test(1);
+   Test::More->builder->no_ending(1);
+}
 
 my @global_credential_args = @ARGV;
-my $netrc = './test.netrc';
-print "# Testing insecure file, nothing should be found\n";
+my $scriptDir = dirname rel2abs $0;
+my $netrc = catfile $scriptDir, 'test.netrc';
+my $gcNetrc = catfile $scriptDir, 'git-credential-netrc';
+local $ENV{PATH} = join ':'
+  , $scriptDir
+  , $ENV{PATH}
+  ? $ENV{PATH}
+  : ();
+
+diag "Testing insecure file, nothing should be found\n";
 chmod 0644, $netrc;
 my $cred = run_credential(['-f', $netrc, 'get'],
  { host => 'github.com' });
 
-ok(scalar keys %$cred, 0, "Got 0 keys from insecure file");
+ok(scalar keys %$cred == 0, "Got 0 keys from insecure file");
 
-print "# Testing missing file, nothing should be found\n";
+diag "Testing missing file, nothing should be found\n";
 chmod 0644, $netrc;
 $cred = run_credential(['-f', '///nosuchfile///', 'get'],
   { host => 'github.com' });
 
-ok(scalar keys %$cred, 0, "Got 0 keys from missing file");
+ok(scalar keys %$cred == 0, "Got 0 keys from missing file");
 
 chmod 0600, $netrc;
 
-print "# Testing with invalid data\n";
+diag "Testing with invalid data\n";
 $cred = run_credential(['-f', $netrc, 'get'],
   "bad data");
-ok(scalar keys %$cred, 4, "Got first found keys with bad data");
+ok(scalar keys %$cred == 4, "Got first found keys with bad data");
 
-print "# Testing netrc file for a missing corovamilkbar entry\n";
+diag "Testing netrc file for a missing corovamilkbar entry\n";
 $cred = run_credential(['-f', $netrc, 'get'],
   { host => 'corovamilkbar' });
 
-ok(scalar keys %$cred, 0, "Got no corovamilkbar keys");
+ok(scalar keys %$cred == 0, "Got no corovamilkbar keys");
 
-print "# Testing netrc file for a github.com entry\n";
+diag "Testing netrc file for a github.com entry\n";
 $cred = run_credential(['-f', $netrc, 'get'],
   { host => 'github.com' });
 
-ok(scalar keys %$cred, 2, "Got 2 Github keys");
+ok(scalar keys %$cred == 2, "Got 2 Github keys");
 
-ok($cred->{password}, 'carolknows', "Got correct Github password");
-ok($cred->{username}, 'carol', "Got correct Github username");
+is($cred->{password}, 'carolknows', "Got correct Github password");
+is($cred->{username}, 'carol', 

[PATCH v2 0/2] Configurable GnuPG command for git-credential-netrc

2018-05-12 Thread Luis Marsano
Updated per Junio's comments.
- clarify commit messages
- explain usage placeholder style as conformance with main coding style
- drop Unicode dependence

Unicode punctuation was initially included for semantics & accessibility, since 
assistive technology like screenreaders handle … better than ...
However, character conversion between encodings is typically handled through 
Native Language Support such as gettext, which recommends untranslated strings 
in ASCII.
Therefore, strings remain in ASCII to ease extensibility in that direction.

The following changes since commit ccdcbd54c4475c2238b310f7113ab3075b5abc9c:

  The fifth batch for 2.18 (2018-05-08 15:59:49 +0900)

are available in the Git repository at:

  https://github.com/lmmarsano/git.git 

for you to fetch changes up to 62a76a1eb7906199b731858ae2d193cfdd3176e0:

  git-credential-netrc: accept gpg option (2018-05-12 03:27:45 -0400)


Luis Marsano (2):
  git-credential-netrc: adapt to test framework for git
  git-credential-netrc: accept gpg option

 contrib/credential/netrc/Makefile |  4 +-
 contrib/credential/netrc/git-credential-netrc | 58 +++-
 .../netrc/t-git-credential-netrc.sh   | 31 +++
 .../credential/netrc/test.command-option-gpg  |  2 +
 contrib/credential/netrc/test.git-config-gpg  |  2 +
 contrib/credential/netrc/test.netrc.gpg   |  0
 contrib/credential/netrc/test.pl  | 88 +--
 7 files changed, 135 insertions(+), 50 deletions(-)
 create mode 100755 contrib/credential/netrc/t-git-credential-netrc.sh
 create mode 100755 contrib/credential/netrc/test.command-option-gpg
 create mode 100755 contrib/credential/netrc/test.git-config-gpg
 create mode 100644 contrib/credential/netrc/test.netrc.gpg

-- 
2.17.0



Re: [PATCH 04/12] sequencer.c: use commit-slab to mark seen commits

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:00:20AM +0200, Nguyễn Thái Ngọc Duy wrote:

> +define_commit_slab(commit_seen, int);

Yay, this one is nice and simple. :) This is what I had hoped for all
along with the slab concept.

-Peff


Re: [PATCH 01/12] blame: use commit-slab for blame suspects instead of commit->util

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:00:17AM +0200, Nguyễn Thái Ngọc Duy wrote:

> +define_commit_slab(blame_suspects, struct blame_origin *);
> +static struct blame_suspects blame_suspects;
> +
> +struct blame_origin *get_blame_suspects(struct commit *commit)
> +{
> + struct blame_origin **result;
> +
> + result = blame_suspects_peek(_suspects, commit);
> +
> + return result ? *result : NULL;
> +}

Hmm. You need this helper because you want to be able to peek and get a
NULL. But that's already what _at() would return, with the only
difference that we may extend the slab just to return NULL.

I wonder how much it matters in practice. We'd generally be extending
the slab to hit every commit anyway in this case, I would think.

I suppose it doesn't actually simplify the code that much to do it that
way, though. We could get rid of this helper, but the caller would still
look like:

  for (p = *blame_suspects_at(o->commit); p; p = p->next)

which is actually slightly uglier than get_blame_suspects(), because we
have to do the pointer-dereference ourselves.

-Peff


Re: [PATCH 03/12] shallow.c: use commit-slab for commit depth instead of commit->util

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 05:07:48AM -0400, Jeff King wrote:

> So no, it wouldn't work to directly store depths with the code as
> written.  I'm not sure if the depth can ever be 0. If not, then it would
> be a suitable sentinel as:
> 
>   int *slot = commit_depth_at(, p->item);
>   if (!*slot || cur_depth < *slot)
>   *slot = cur_depth;
> 
> But somebody would have to dig into the possible values of cur_depth
> there (which would make sense to do as a separate patch anyway, since
> the point of this is to be a direct conversion).

By the way, one other approach if xcalloc() doesn't produce a good
sentinel is to use a data type that does. ;) E.g., something like this
should work:

  struct depth {
unsigned valid:1;
int value;
  };
  define_commit_slab(commit_depth, struct depth);

  ...

  struct depth *slot = commit_depth_at(, p->item);
  if (!slot->valid || cur_depth < slot->value) {
slot->value = cur_depth;
slot->valid = 1;
  }

That wastes an extra 4 bytes per slot over storing an int directly, but
it's the same as storing an 8-byte pointer, plus you avoid the storage
and runtime overhead of malloc.

I actually wonder if we could wrap commit_slab with a variant that
stores the sentinel data itself, to make this pattern easier. I.e.,
something like:

  #define define_commit_slab_sentinel(name, type) \
struct name##_value { \
unsigned valid:1; \
type value; \
}; \
define_commit_slab(name, struct name##_value)

and some matching "peek" and "at" functions to manipulate value
directly.

I think the end result would be nicer, but it's turning into a little
bit of a rabbit hole. So I don't mind going with your direct conversion
here for now.

-Peff


Re: [PATCH 03/12] shallow.c: use commit-slab for commit depth instead of commit->util

2018-05-12 Thread Jeff King
On Sat, May 12, 2018 at 10:00:19AM +0200, Nguyễn Thái Ngọc Duy wrote:

> @@ -82,25 +84,29 @@ struct commit_list *get_shallow_commits(struct 
> object_array *heads, int depth,
>   struct object_array stack = OBJECT_ARRAY_INIT;
>   struct commit *commit = NULL;
>   struct commit_graft *graft;
> + struct commit_depth depths;
>  
> + init_commit_depth();
>   while (commit || i < heads->nr || stack.nr) {
>   struct commit_list *p;
>   if (!commit) {
>   if (i < heads->nr) {
> + int **depth_slot;
>   commit = (struct commit *)
>   deref_tag(heads->objects[i++].item, 
> NULL, 0);
>   if (!commit || commit->object.type != 
> OBJ_COMMIT) {
>   commit = NULL;
>   continue;
>   }
> - if (!commit->util)
> - commit->util = xmalloc(sizeof(int));
> - *(int *)commit->util = 0;
> + depth_slot = commit_depth_at(, commit);
> + if (!*depth_slot)
> + *depth_slot = xmalloc(sizeof(int));
> + **depth_slot = 0;

It looks like we could save ourselves an extra layer of indirection (and
tiny heap allocations) by just storing an "int" directly in the slab.
Do we ever use the NULL as a sentinel value?

Here we just allocate it if not set. Let's see if we can find some
others...

> @@ -116,25 +122,32 @@ struct commit_list *get_shallow_commits(struct 
> object_array *heads, int depth,
>   }
>   commit->object.flags |= not_shallow_flag;
>   for (p = commit->parents, commit = NULL; p; p = p->next) {
> - if (!p->item->util) {
> - int *pointer = xmalloc(sizeof(int));
> - p->item->util = pointer;
> - *pointer =  cur_depth;
> + int **depth_slot = commit_depth_at(, p->item);
> + if (!*depth_slot) {
> + *depth_slot = xmalloc(sizeof(int));
> + **depth_slot = cur_depth;
>   } else {
> - int *pointer = p->item->util;
> - if (cur_depth >= *pointer)
> + if (cur_depth >= **depth_slot)
>   continue;
> - *pointer = cur_depth;
> + **depth_slot = cur_depth;
>   }

Here we malloc again if it's not set. But we do behave slightly
differently when we see NULL, in that we do not bother to even compare
against cur_depth. So if we were to directly store ints, we'd see "0" as
the sentinel depth here, which would not match our "cur_depth >=
depth_slot" check.

So no, it wouldn't work to directly store depths with the code as
written.  I'm not sure if the depth can ever be 0. If not, then it would
be a suitable sentinel as:

  int *slot = commit_depth_at(, p->item);
  if (!*slot || cur_depth < *slot)
*slot = cur_depth;

But somebody would have to dig into the possible values of cur_depth
there (which would make sense to do as a separate patch anyway, since
the point of this is to be a direct conversion).

-Peff


Re: [PATCH] fast-export: avoid NULL pointer arithmetic

2018-05-12 Thread René Scharfe
Am 12.05.2018 um 10:45 schrieb René Scharfe:
> Or we could roll our own custom hash map, as I mused in an earlier post.
> That would duplicate quite a bit of code; are there reusable pieces
> hidden within that could be extracted into common functions?

At least it would allow us to save four bytes of padding per object on
x64 by using a separate array for the hash map values; not sure how that
would impact performance, though.

---
 builtin/fast-export.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 627b0032f3..086fcaf9ea 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -72,13 +72,13 @@ static int parse_opt_tag_of_filtered_mode(const struct 
option *opt,
 
 struct object_mark_entry {
const struct object *base;
-   uint32_t mark;
 };
 
 struct object_marks {
unsigned int size;
unsigned int nr;
struct object_mark_entry *entries;
+   uint32_t *marks;
 };
 
 static struct object_marks idnums;
@@ -98,14 +98,14 @@ static void set_object_mark(struct object_marks *n, const 
struct object *base,
 
while (entries[j].base) {
if (entries[j].base == base) {
-   entries[j].mark = mark;
+   n->marks[j] = mark;
return;
}
if (++j >= size)
j = 0;
}
entries[j].base = base;
-   entries[j].mark = mark;
+   n->marks[j] = mark;
n->nr++;
 }
 
@@ -114,19 +114,22 @@ static void grow_object_marks(struct object_marks *n)
unsigned int i;
unsigned int old_size = n->size;
struct object_mark_entry *old_entries = n->entries;
+   uint32_t *old_marks = n->marks;
 
n->size = (old_size + 1000) * 3 / 2;
n->entries = xcalloc(n->size, sizeof(n->entries[0]));
+   n->marks = xcalloc(n->size, sizeof(n->marks[0]));
n->nr = 0;
 
for (i = 0; i < old_size; i++) {
const struct object *base = old_entries[i].base;
-   uint32_t mark = old_entries[i].mark;
+   uint32_t mark = old_marks[i];
 
if (mark)
set_object_mark(n, base, mark);
}
free(old_entries);
+   free(old_marks);
 }
 
 static int has_unshown_parent(struct commit *commit)
@@ -236,7 +239,7 @@ static int get_object_mark(struct object *object)
for (;;) {
struct object_mark_entry *ref = idnums.entries + j;
if (ref->base == object)
-   return ref->mark;
+   return idnums.marks[j];
if (!ref->base)
return 0;
if (++j == idnums.size)
@@ -966,7 +969,7 @@ static void export_marks(char *file)
 
for (i = 0; i < idnums.size; i++) {
if (entry->base && entry->base->type == 1) {
-   if (fprintf(f, ":%"PRIu32" %s\n", entry->mark,
+   if (fprintf(f, ":%"PRIu32" %s\n", idnums.marks[i],
oid_to_hex(>base->oid)) < 0) {
e = 1;
break;
-- 
2.17.0



Re: [PATCH v14 4/4] ls-remote: create '--sort' option

2018-05-12 Thread René Scharfe
Am 09.04.2018 um 03:42 schrieb Harald Nordgren:
> diff --git a/t/t5512-ls-remote.sh b/t/t5512-ls-remote.sh
> index 02106c922..83cd35c39 100755
> --- a/t/t5512-ls-remote.sh
> +++ b/t/t5512-ls-remote.sh

> @@ -170,14 +206,18 @@ test_expect_success 'overrides work between mixed 
> transfer/upload-pack hideRefs'
>   grep refs/tags/magic actual
>   '
>   
> +git fetch origin
>   test_expect_success 'ls-remote --symref' '
> - cat >expect <<-\EOF &&
> + cat >expect <<-EOF &&
>   ref: refs/heads/master  HEAD
> - 1bd44cb9d13204b0fe1958db0082f5028a16eb3aHEAD
> - 1bd44cb9d13204b0fe1958db0082f5028a16eb3arefs/heads/master
> - 1bd44cb9d13204b0fe1958db0082f5028a16eb3arefs/remotes/origin/HEAD
> - 1bd44cb9d13204b0fe1958db0082f5028a16eb3a
> refs/remotes/origin/master
> - 1bd44cb9d13204b0fe1958db0082f5028a16eb3arefs/tags/mark
> + $(git rev-parse HEAD)   HEAD
> + $(git rev-parse refs/heads/master)  refs/heads/master
> + $(git rev-parse HEAD)   refs/remotes/origin/HEAD
> + $(git rev-parse refs/remotes/origin/master) 
> refs/remotes/origin/master
> + $(git rev-parse refs/tags/mark) refs/tags/mark
> + $(git rev-parse refs/tags/mark1.1)  refs/tags/mark1.1
> + $(git rev-parse refs/tags/mark1.10) refs/tags/mark1.10
> + $(git rev-parse refs/tags/mark1.2)  refs/tags/mark1.2
>   EOF
>   git ls-remote --symref >actual &&
>   test_cmp expect actual

Why is fetch called outside of the test?  Its output is shown among the
test messages, where it doesn't belong:

ok 23 - overrides work between mixed transfer/upload-pack hideRefs
From /home/lsr/src/git/t/trash directory.t5512-ls-remote/
 * [new branch]  master -> origin/master
ok 24 - ls-remote --symref

-- >8 --
Subject: [PATCH] t5512: run git fetch inside test

Do the preparatory fetch inside the test of ls-remote --symref to avoid
cluttering the test output and to be able to catch unexpected fetch
failures.

Signed-off-by: Rene Scharfe 
---
 t/t5512-ls-remote.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t5512-ls-remote.sh b/t/t5512-ls-remote.sh
index 83cd35c391..6a949484d0 100755
--- a/t/t5512-ls-remote.sh
+++ b/t/t5512-ls-remote.sh
@@ -206,8 +206,8 @@ test_expect_success 'overrides work between mixed 
transfer/upload-pack hideRefs'
grep refs/tags/magic actual
 '
 
-git fetch origin
 test_expect_success 'ls-remote --symref' '
+   git fetch origin &&
cat >expect <<-EOF &&
ref: refs/heads/master  HEAD
$(git rev-parse HEAD)   HEAD
-- 
2.17.0


Re: [PATCH] fast-export: avoid NULL pointer arithmetic

2018-05-12 Thread René Scharfe
Am 11.05.2018 um 19:42 schrieb Jeff King:
> On Fri, May 11, 2018 at 03:34:19PM +0200, Duy Nguyen wrote:
> 
>> On Fri, May 11, 2018 at 03:11:46PM +0200, Duy Nguyen wrote:
>>> Back to fast-export, can we just allocate a new int on heap and point
>>> it there? Allocating small pieces becomes quite cheap and fast with
>>> mem-pool.h and we can avoid this storing integer in pointer business.
>>
>> Something like this seems to work, but we use 4-ish more bytes per
>> object, or 100MB overhead on a repo with 25M objects. I think it's a
>> reasonable trade off.
> 
> I'm not sure I agree. 4 bytes per object certainly isn't the end of the
> world, but what was the problem we were solving in the first place? Just
> that we weren't comfortable with the round-trip from uintptr_t to void
> and back? Is this actually a problem on real platforms? If not, it seems
> silly to incur a run-time cost.

Storing integer values in pointers is a trick that seems to have worked
so far for fast-export.  A portable way to avoid that trick without
requiring more memory would be to use a union.

Or we could roll our own custom hash map, as I mused in an earlier post.
That would duplicate quite a bit of code; are there reusable pieces
hidden within that could be extracted into common functions?

---
 builtin/fast-export.c | 105 --
 1 file changed, 81 insertions(+), 24 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 530df12f05..627b0032f3 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -14,7 +14,6 @@
 #include "diffcore.h"
 #include "log-tree.h"
 #include "revision.h"
-#include "decorate.h"
 #include "string-list.h"
 #include "utf8.h"
 #include "parse-options.h"
@@ -71,9 +70,65 @@ static int parse_opt_tag_of_filtered_mode(const struct 
option *opt,
return 0;
 }
 
-static struct decoration idnums;
+struct object_mark_entry {
+   const struct object *base;
+   uint32_t mark;
+};
+
+struct object_marks {
+   unsigned int size;
+   unsigned int nr;
+   struct object_mark_entry *entries;
+};
+
+static struct object_marks idnums;
 static uint32_t last_idnum;
 
+static unsigned int hash_obj(const struct object *obj, unsigned int n)
+{
+   return sha1hash(obj->oid.hash) % n;
+}
+
+static void set_object_mark(struct object_marks *n, const struct object *base,
+   uint32_t mark)
+{
+   unsigned int size = n->size;
+   struct object_mark_entry *entries = n->entries;
+   unsigned int j = hash_obj(base, size);
+
+   while (entries[j].base) {
+   if (entries[j].base == base) {
+   entries[j].mark = mark;
+   return;
+   }
+   if (++j >= size)
+   j = 0;
+   }
+   entries[j].base = base;
+   entries[j].mark = mark;
+   n->nr++;
+}
+
+static void grow_object_marks(struct object_marks *n)
+{
+   unsigned int i;
+   unsigned int old_size = n->size;
+   struct object_mark_entry *old_entries = n->entries;
+
+   n->size = (old_size + 1000) * 3 / 2;
+   n->entries = xcalloc(n->size, sizeof(n->entries[0]));
+   n->nr = 0;
+
+   for (i = 0; i < old_size; i++) {
+   const struct object *base = old_entries[i].base;
+   uint32_t mark = old_entries[i].mark;
+
+   if (mark)
+   set_object_mark(n, base, mark);
+   }
+   free(old_entries);
+}
+
 static int has_unshown_parent(struct commit *commit)
 {
struct commit_list *parent;
@@ -156,20 +211,13 @@ static void anonymize_path(struct strbuf *out, const char 
*path,
}
 }
 
-/* Since intptr_t is C99, we do not use it here */
-static inline uint32_t *mark_to_ptr(uint32_t mark)
-{
-   return ((uint32_t *)NULL) + mark;
-}
-
-static inline uint32_t ptr_to_mark(void * mark)
-{
-   return (uint32_t *)mark - (uint32_t *)NULL;
-}
-
 static inline void mark_object(struct object *object, uint32_t mark)
 {
-   add_decoration(, object, mark_to_ptr(mark));
+   unsigned int nr = idnums.nr + 1;
+
+   if (nr > idnums.size * 2 / 3)
+   grow_object_marks();
+   return set_object_mark(, object, mark);
 }
 
 static inline void mark_next_object(struct object *object)
@@ -179,10 +227,21 @@ static inline void mark_next_object(struct object *object)
 
 static int get_object_mark(struct object *object)
 {
-   void *decoration = lookup_decoration(, object);
-   if (!decoration)
+   unsigned int j;
+
+   /* nothing to lookup */
+   if (!idnums.size)
return 0;
-   return ptr_to_mark(decoration);
+   j = hash_obj(object, idnums.size);
+   for (;;) {
+   struct object_mark_entry *ref = idnums.entries + j;
+   if (ref->base == object)
+   return ref->mark;
+   if (!ref->base)
+   return 0;
+   if (++j == 

Re: Bug report for git push

2018-05-12 Thread Jeff King
On Fri, May 11, 2018 at 09:44:54PM -0400, Surenkumar Nihalani wrote:

> Push summary: [remote rejected] (cannot lock ref 'refs/heads/master': is at 
> cf2cc0c147d8215ec87d3ddaf32f0b2c58630423 but expected 
> fdda486ad43a6e6b5dc5f2795ce27124e0686752)

This generally indicates that somebody was pushing at the same time as
you, and you lost the race. The push conversation works basically like
this:

 1. server advertise master at some sha1 (like fdda486ad)

 2. client checks that moving from fdda486ad to whatever the new value is
(let's say 1234abcd) matches its criteria (e.g., not a fast-forward)
and prepares a pack with the appropriate objects

 3. client tells server "update master from fdda486ad to 1234abcd"

 4. server locks master and then under lock checks that it's still at
fdda486ad. In this case it's not, so it aborts the lock. This is
likely due to somebody else pushing in the interim.

If you push again at this point, the client will notice in step that
it's not a fast-forward and give you the "somebody else has pushed, you
need to pull first" error message.

The only difference here is that because of the timing (i.e., somebody
pushing at the exact same time as you), only the server's locking
operation noticed the conflict. But the fix is the same (pull their
work, then push).

-Peff


[BUG] git send-email: incorrectly parses email address with comma

2018-05-12 Thread Heinrich Schuchardt
Git send-email allows to combine multiple email addresses in one
parameter, e.g.

--to="a...@example.com, b...@example.com"

But email addresses may contain commas themselves:

--to="LASTNAME, firstname "

This may lead to an error:
$ git send-email --to="Schuchardt, Heinrich " \
*.patch-cover-letter.patch
(mbox) Adding cc: Heinrich Schuchardt  from
line 'From: Heinrich Schuchardt '

From: Heinrich Schuchardt 
To: Schuchardt,
Heinrich 
Subject: [PATCH 0/2] efi_loader: adjust definitions of variable services
Date: Sat, 12 May 2018 10:01:21 +0200
Message-Id: <20180512080121.26620-1-xypron.g...@example.com>
X-Mailer: git-send-email 2.17.0

Send this email? ([y]es|[n]o|[q]uit|[a]ll): a
Password for 'smtp://xypron.g...@example.com@mail.example.com:587':
Syntax error in parameters or arguments



Please, implement the following logic,

If the string preceding a comma is not a valid email address do not
split it off.

Best regards

Heinrich Schuchardt


Re: [PATCH v3] add status config and command line options for rename detection

2018-05-12 Thread Eckhard Maaß
On Fri, May 11, 2018 at 12:56:39PM +, Ben Peart wrote:
> After performing a merge that has conflicts git status will, by default,
> attempt to detect renames which causes many objects to be examined.  In a
> virtualized repo, those objects do not exist locally so the rename logic
> triggers them to be fetched from the server. This results in the status call
> taking hours to complete on very large repos vs seconds with this patch.

I see where your need comes from, but as you based this on my little
patch one can achieve this already with tweaking diff.renames itself. I
do wonder why there is a special need for the status command here. And
if there is, I personally would like it more in a style that you could
take all the options provided by diff.*-configuration and prefix that
with status, eg status.diff.renames = true. What do you think? If you
really only need this for merges, maybe a more specialised option is
called for that only kicks in when there is a merge going on?

I would like that status behaves as similar as possible to
diff/show/log. Special options will pull away from that again - passing
-m to show or log will lead to the same performance issues, correct?
Could it be feasible to impose an overall time limit on the detection?

And after writing this I wonder what were your experience with just
tweaking renameLimit - setting it very low should have helped the
fetching from server part already, shouldn't it?

> Add --no-renames command line option to status that enables overriding the
> config setting from the command line. Add --find-renames[=] command line
> option to status that enables detecting renames and optionally setting the
> similarity index.

Would it be reasonable to extend this so that we just use the same
machinery for parsing command line options for the diffcore options and
pass this along? It seems to me that git status wants the same init as
diff/show/log has anyway. But I like the direction towards passing more
command line options to the git status command. 

>  static void wt_longstatus_print_unmerged_header(struct wt_status *s)
> @@ -592,6 +595,9 @@ static void wt_status_collect_changes_worktree(struct 
> wt_status *s)
>   }
>   rev.diffopt.format_callback = wt_status_collect_changed_cb;
>   rev.diffopt.format_callback_data = s;
> + rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : 
> rev.diffopt.detect_rename;
> + rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : 
> rev.diffopt.rename_limit;
> + rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : 
> rev.diffopt.rename_score;
>   copy_pathspec(_data, >pathspec);
>   run_diff_files(, 0);
>  }
> @@ -625,6 +631,9 @@ static void wt_status_collect_changes_index(struct 
> wt_status *s)
>   rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
>   rev.diffopt.format_callback = wt_status_collect_updated_cb;
>   rev.diffopt.format_callback_data = s;
> + rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : 
> rev.diffopt.detect_rename;
> + rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : 
> rev.diffopt.rename_limit;
> + rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : 
> rev.diffopt.rename_score;
>   copy_pathspec(_data, >pathspec);
>   run_diff_index(, 1);
>  }
> @@ -982,6 +991,9 @@ static void wt_longstatus_print_verbose(struct wt_status 
> *s)
>   setup_revisions(0, NULL, , );
>  
>   rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
> + rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : 
> rev.diffopt.detect_rename;
> + rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : 
> rev.diffopt.rename_limit;
> + rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : 
> rev.diffopt.rename_score;
>   rev.diffopt.file = s->fp;
>   rev.diffopt.close_file = 0;
>   /*

Somehow I am inclined that those should be factored out to a common
method if the rest of the patch stays as it is.

Greetings,
Eckhard


[PATCH 03/12] shallow.c: use commit-slab for commit depth instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

While at there, plug a leak for keeping track of depth in this code.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 shallow.c | 37 +
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/shallow.c b/shallow.c
index df4d44ea7a..6ea411b0d2 100644
--- a/shallow.c
+++ b/shallow.c
@@ -12,6 +12,7 @@
 #include "commit-slab.h"
 #include "revision.h"
 #include "list-objects.h"
+#include "commit-slab.h"
 
 static int is_shallow = -1;
 static struct stat_validity shallow_stat;
@@ -74,6 +75,7 @@ int is_repository_shallow(void)
return is_shallow;
 }
 
+define_commit_slab(commit_depth, int *);
 struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag)
 {
@@ -82,25 +84,29 @@ struct commit_list *get_shallow_commits(struct object_array 
*heads, int depth,
struct object_array stack = OBJECT_ARRAY_INIT;
struct commit *commit = NULL;
struct commit_graft *graft;
+   struct commit_depth depths;
 
+   init_commit_depth();
while (commit || i < heads->nr || stack.nr) {
struct commit_list *p;
if (!commit) {
if (i < heads->nr) {
+   int **depth_slot;
commit = (struct commit *)
deref_tag(heads->objects[i++].item, 
NULL, 0);
if (!commit || commit->object.type != 
OBJ_COMMIT) {
commit = NULL;
continue;
}
-   if (!commit->util)
-   commit->util = xmalloc(sizeof(int));
-   *(int *)commit->util = 0;
+   depth_slot = commit_depth_at(, commit);
+   if (!*depth_slot)
+   *depth_slot = xmalloc(sizeof(int));
+   **depth_slot = 0;
cur_depth = 0;
} else {
commit = (struct commit *)
object_array_pop();
-   cur_depth = *(int *)commit->util;
+   cur_depth = **commit_depth_peek(, 
commit);
}
}
parse_commit_or_die(commit);
@@ -116,25 +122,32 @@ struct commit_list *get_shallow_commits(struct 
object_array *heads, int depth,
}
commit->object.flags |= not_shallow_flag;
for (p = commit->parents, commit = NULL; p; p = p->next) {
-   if (!p->item->util) {
-   int *pointer = xmalloc(sizeof(int));
-   p->item->util = pointer;
-   *pointer =  cur_depth;
+   int **depth_slot = commit_depth_at(, p->item);
+   if (!*depth_slot) {
+   *depth_slot = xmalloc(sizeof(int));
+   **depth_slot = cur_depth;
} else {
-   int *pointer = p->item->util;
-   if (cur_depth >= *pointer)
+   if (cur_depth >= **depth_slot)
continue;
-   *pointer = cur_depth;
+   **depth_slot = cur_depth;
}
if (p->next)
add_object_array(>item->object,
NULL, );
else {
commit = p->item;
-   cur_depth = *(int *)commit->util;
+   depth_slot = commit_depth_peek(, commit);
+   cur_depth = **depth_slot;
}
}
}
+   for (i = 0; i < depths.slab_count; i++) {
+   int j;
+
+   for (j = 0; j < depths.slab_size; j++)
+   free(depths.slab[i][j]);
+   }
+   clear_commit_depth();
 
return result;
 }
-- 
2.17.0.705.g3525833791



[PATCH 00/12] Die commit->util, die!

2018-05-12 Thread Nguyễn Thái Ngọc Duy
There's not much to write here. It's basically a copy from 12/12:

This 'util' pointer can be used for many different purposes,
controlled in different ways. Some are not even contained in a command
code, but buried deep in common code with no clue who will use it and
how. For example, if revs.show_source is set, then it's used for
storing path name, but if you happen to call get_merge_parent() then
some 'util' may end up storing another thing.

The move to using commit-slab gives us a much better picture of how
some piece of data is associated with a commit and what for. Since
nobody uses 'util' pointer anymore, we can retire it so that nobody will
abuse it again. commit-slab will be the way forward for associating
data to a commit.

As a side benefit, this shrinks struct commit by 8 bytes (on 64-bit
architecture) which should help reduce memory usage for reachability
test a bit. This is also what commit-slab is invented for [1].

[1] 96c4f4a370 (commit: allow associating auxiliary info on-demand -
2013-04-09)

Nguyễn Thái Ngọc Duy (12):
  blame: use commit-slab for blame suspects instead of commit->util
  describe: use commit-slab for commit names instead of commit->util
  shallow.c: use commit-slab for commit depth instead of commit->util
  sequencer.c: use commit-slab to mark seen commits
  sequencer.c: use commit-slab to associate todo items to commits
  revision.c: use commit-slab for show_source
  bisect.c: use commit-slab for commit weight instead of commit->util
  name-rev: use commit-slab for rev-name instead of commit->util
  show-branch: use commit-slab for commit-name instead of commit->util
  log: use commit-slab in prepare_bases() instead of commit->util
  merge: use commit-slab in merge remote desc instead of commit->util
  commit.h: delete 'util' field in struct commit

 bisect.c  | 12 +---
 blame.c   | 42 +++---
 blame.h   |  2 ++
 builtin/blame.c   |  2 +-
 builtin/describe.c| 16 +---
 builtin/fast-export.c | 14 +-
 builtin/log.c | 17 +
 builtin/merge.c   | 25 +
 builtin/name-rev.c| 23 ---
 builtin/show-branch.c | 39 +++
 commit.c  | 12 ++--
 commit.h  |  8 ++--
 log-tree.c|  8 ++--
 merge-recursive.c |  8 +---
 revision.c| 17 +
 revision.h|  5 -
 sequencer.c   | 24 ++--
 shallow.c | 37 +
 18 files changed, 225 insertions(+), 86 deletions(-)

-- 
2.17.0.705.g3525833791



[PATCH 01/12] blame: use commit-slab for blame suspects instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 blame.c | 42 +++---
 blame.h |  2 ++
 builtin/blame.c |  2 +-
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/blame.c b/blame.c
index 78c9808bd1..18e8bd996a 100644
--- a/blame.c
+++ b/blame.c
@@ -6,6 +6,24 @@
 #include "diffcore.h"
 #include "tag.h"
 #include "blame.h"
+#include "commit-slab.h"
+
+define_commit_slab(blame_suspects, struct blame_origin *);
+static struct blame_suspects blame_suspects;
+
+struct blame_origin *get_blame_suspects(struct commit *commit)
+{
+   struct blame_origin **result;
+
+   result = blame_suspects_peek(_suspects, commit);
+
+   return result ? *result : NULL;
+}
+
+static void set_blame_suspects(struct commit *commit, struct blame_origin 
*origin)
+{
+   *blame_suspects_at(_suspects, commit) = origin;
+}
 
 void blame_origin_decref(struct blame_origin *o)
 {
@@ -15,12 +33,12 @@ void blame_origin_decref(struct blame_origin *o)
blame_origin_decref(o->previous);
free(o->file.ptr);
/* Should be present exactly once in commit chain */
-   for (p = o->commit->util; p; l = p, p = p->next) {
+   for (p = get_blame_suspects(o->commit); p; l = p, p = p->next) {
if (p == o) {
if (l)
l->next = p->next;
else
-   o->commit->util = p->next;
+   set_blame_suspects(o->commit, p->next);
free(o);
return;
}
@@ -41,8 +59,8 @@ static struct blame_origin *make_origin(struct commit 
*commit, const char *path)
FLEX_ALLOC_STR(o, path, path);
o->commit = commit;
o->refcnt = 1;
-   o->next = commit->util;
-   commit->util = o;
+   o->next = get_blame_suspects(commit);
+   set_blame_suspects(commit, o);
return o;
 }
 
@@ -54,13 +72,13 @@ static struct blame_origin *get_origin(struct commit 
*commit, const char *path)
 {
struct blame_origin *o, *l;
 
-   for (o = commit->util, l = NULL; o; l = o, o = o->next) {
+   for (o = get_blame_suspects(commit), l = NULL; o; l = o, o = o->next) {
if (!strcmp(o->path, path)) {
/* bump to front */
if (l) {
l->next = o->next;
-   o->next = commit->util;
-   commit->util = o;
+   o->next = get_blame_suspects(commit);
+   set_blame_suspects(commit, o);
}
return blame_origin_incref(o);
}
@@ -478,7 +496,7 @@ static void queue_blames(struct blame_scoreboard *sb, 
struct blame_origin *porig
porigin->suspects = blame_merge(porigin->suspects, sorted);
else {
struct blame_origin *o;
-   for (o = porigin->commit->util; o; o = o->next) {
+   for (o = get_blame_suspects(porigin->commit); o; o = o->next) {
if (o->suspects) {
porigin->suspects = sorted;
return;
@@ -525,7 +543,7 @@ static struct blame_origin *find_origin(struct commit 
*parent,
const char *paths[2];
 
/* First check any existing origins */
-   for (porigin = parent->util; porigin; porigin = porigin->next)
+   for (porigin = get_blame_suspects(parent); porigin; porigin = 
porigin->next)
if (!strcmp(porigin->path, origin->path)) {
/*
 * The same path between origin and its parent
@@ -1550,7 +1568,7 @@ void assign_blame(struct blame_scoreboard *sb, int opt)
 
while (commit) {
struct blame_entry *ent;
-   struct blame_origin *suspect = commit->util;
+   struct blame_origin *suspect = get_blame_suspects(commit);
 
/* find one suspect to break down */
while (suspect && !suspect->suspects)
@@ -1752,6 +1770,8 @@ void setup_scoreboard(struct blame_scoreboard *sb, const 
char *path, struct blam
struct commit *final_commit = NULL;
enum object_type type;
 
+   init_blame_suspects(_suspects);
+
if (sb->reverse && sb->contents_from)
die(_("--contents and --reverse do not blend well."));
 
@@ -1815,7 +1835,7 @@ void setup_scoreboard(struct blame_scoreboard *sb, const 
char *path, struct blam
}
 
if (is_null_oid(>final->object.oid)) {
-   o = sb->final->util;
+   o = 

[PATCH 09/12] show-branch: use commit-slab for commit-name instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/show-branch.c | 39 +++
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/builtin/show-branch.c b/builtin/show-branch.c
index 6c2148b71d..29d15d16d2 100644
--- a/builtin/show-branch.c
+++ b/builtin/show-branch.c
@@ -7,6 +7,7 @@
 #include "argv-array.h"
 #include "parse-options.h"
 #include "dir.h"
+#include "commit-slab.h"
 
 static const char* show_branch_usage[] = {
 N_("git show-branch [-a | --all] [-r | --remotes] [--topo-order | 
--date-order]\n"
@@ -59,15 +60,27 @@ struct commit_name {
int generation; /* how many parents away from head_name */
 };
 
+define_commit_slab(commit_name_slab, struct commit_name *);
+static struct commit_name_slab name_slab;
+
+static struct commit_name *commit_to_name(struct commit *commit)
+{
+   return *commit_name_slab_at(_slab, commit);
+}
+
+
 /* Name the commit as nth generation ancestor of head_name;
  * we count only the first-parent relationship for naming purposes.
  */
 static void name_commit(struct commit *commit, const char *head_name, int nth)
 {
struct commit_name *name;
-   if (!commit->util)
-   commit->util = xmalloc(sizeof(struct commit_name));
-   name = commit->util;
+
+   name = *commit_name_slab_at(_slab, commit);
+   if (!name) {
+   name = xmalloc(sizeof(*name));
+   *commit_name_slab_at(_slab, commit) = name;
+   }
name->head_name = head_name;
name->generation = nth;
 }
@@ -79,8 +92,8 @@ static void name_commit(struct commit *commit, const char 
*head_name, int nth)
  */
 static void name_parent(struct commit *commit, struct commit *parent)
 {
-   struct commit_name *commit_name = commit->util;
-   struct commit_name *parent_name = parent->util;
+   struct commit_name *commit_name = commit_to_name(commit);
+   struct commit_name *parent_name = commit_to_name(parent);
if (!commit_name)
return;
if (!parent_name ||
@@ -94,12 +107,12 @@ static int name_first_parent_chain(struct commit *c)
int i = 0;
while (c) {
struct commit *p;
-   if (!c->util)
+   if (!commit_to_name(c))
break;
if (!c->parents)
break;
p = c->parents->item;
-   if (!p->util) {
+   if (!commit_to_name(p)) {
name_parent(c, p);
i++;
}
@@ -122,7 +135,7 @@ static void name_commits(struct commit_list *list,
/* First give names to the given heads */
for (cl = list; cl; cl = cl->next) {
c = cl->item;
-   if (c->util)
+   if (commit_to_name(c))
continue;
for (i = 0; i < num_rev; i++) {
if (rev[i] == c) {
@@ -148,9 +161,9 @@ static void name_commits(struct commit_list *list,
struct commit_name *n;
int nth;
c = cl->item;
-   if (!c->util)
+   if (!commit_to_name(c))
continue;
-   n = c->util;
+   n = commit_to_name(c);
parents = c->parents;
nth = 0;
while (parents) {
@@ -158,7 +171,7 @@ static void name_commits(struct commit_list *list,
struct strbuf newname = STRBUF_INIT;
parents = parents->next;
nth++;
-   if (p->util)
+   if (commit_to_name(p))
continue;
switch (n->generation) {
case 0:
@@ -271,7 +284,7 @@ static void show_one_commit(struct commit *commit, int 
no_name)
 {
struct strbuf pretty = STRBUF_INIT;
const char *pretty_str = "(unavailable)";
-   struct commit_name *name = commit->util;
+   struct commit_name *name = commit_to_name(commit);
 
if (commit->object.parsed) {
pp_commit_easy(CMIT_FMT_ONELINE, commit, );
@@ -660,6 +673,8 @@ int cmd_show_branch(int ac, const char **av, const char 
*prefix)
OPT_END()
};
 
+   init_commit_name_slab(_slab);
+
git_config(git_show_branch_config, NULL);
 
/* If nothing is specified, try the default first */
-- 
2.17.0.705.g3525833791



[PATCH 08/12] name-rev: use commit-slab for rev-name instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/name-rev.c | 23 ---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 387ddf85d2..0eb440359d 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -6,6 +6,7 @@
 #include "refs.h"
 #include "parse-options.h"
 #include "sha1-lookup.h"
+#include "commit-slab.h"
 
 #define CUTOFF_DATE_SLOP 86400 /* one day */
 
@@ -17,11 +18,26 @@ typedef struct rev_name {
int from_tag;
 } rev_name;
 
+define_commit_slab(commit_rev_name, struct rev_name *);
+
 static timestamp_t cutoff = TIME_MAX;
+static struct commit_rev_name rev_names;
 
 /* How many generations are maximally preferred over _one_ merge traversal? */
 #define MERGE_TRAVERSAL_WEIGHT 65535
 
+static struct rev_name *get_commit_rev_name(struct commit *commit)
+{
+   struct rev_name **slot = commit_rev_name_peek(_names, commit);
+
+   return slot ? *slot : NULL;
+}
+
+static void set_commit_rev_name(struct commit *commit, struct rev_name *name)
+{
+   *commit_rev_name_at(_names, commit) = name;
+}
+
 static int is_better_name(struct rev_name *name,
  const char *tip_name,
  timestamp_t taggerdate,
@@ -65,7 +81,7 @@ static void name_rev(struct commit *commit,
int generation, int distance, int from_tag,
int deref)
 {
-   struct rev_name *name = (struct rev_name *)commit->util;
+   struct rev_name *name = get_commit_rev_name(commit);
struct commit_list *parents;
int parent_number = 1;
char *to_free = NULL;
@@ -84,7 +100,7 @@ static void name_rev(struct commit *commit,
 
if (name == NULL) {
name = xmalloc(sizeof(rev_name));
-   commit->util = name;
+   set_commit_rev_name(commit, name);
goto copy_data;
} else if (is_better_name(name, tip_name, taggerdate,
  generation, distance, from_tag)) {
@@ -296,7 +312,7 @@ static const char *get_rev_name(const struct object *o, 
struct strbuf *buf)
if (o->type != OBJ_COMMIT)
return get_exact_ref_match(o);
c = (struct commit *) o;
-   n = c->util;
+   n = get_commit_rev_name(c);
if (!n)
return NULL;
 
@@ -413,6 +429,7 @@ int cmd_name_rev(int argc, const char **argv, const char 
*prefix)
OPT_END(),
};
 
+   init_commit_rev_name(_names);
git_config(git_default_config, NULL);
argc = parse_options(argc, argv, prefix, opts, name_rev_usage, 0);
if (all + transform_stdin + !!argc > 1) {
-- 
2.17.0.705.g3525833791



[PATCH 07/12] bisect.c: use commit-slab for commit weight instead of commit->util

2018-05-12 Thread Nguyễn Thái Ngọc Duy
It's done so that commit->util can be removed. See more explanation in
the commit that removes commit->util.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 bisect.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/bisect.c b/bisect.c
index a579b50884..6de1abd407 100644
--- a/bisect.c
+++ b/bisect.c
@@ -12,6 +12,7 @@
 #include "bisect.h"
 #include "sha1-array.h"
 #include "argv-array.h"
+#include "commit-slab.h"
 
 static struct oid_array good_revs;
 static struct oid_array skipped_revs;
@@ -70,16 +71,19 @@ static void clear_distance(struct commit_list *list)
}
 }
 
+define_commit_slab(commit_weight, int *);
+static struct commit_weight commit_weight;
+
 #define DEBUG_BISECT 0
 
 static inline int weight(struct commit_list *elem)
 {
-   return *((int*)(elem->item->util));
+   return **commit_weight_at(_weight, elem->item);
 }
 
 static inline void weight_set(struct commit_list *elem, int weight)
 {
-   *((int*)(elem->item->util)) = weight;
+   **commit_weight_at(_weight, elem->item) = weight;
 }
 
 static int count_interesting_parents(struct commit *commit)
@@ -265,7 +269,7 @@ static struct commit_list *do_find_bisection(struct 
commit_list *list,
struct commit *commit = p->item;
unsigned flags = commit->object.flags;
 
-   p->item->util = [n++];
+   *commit_weight_at(_weight, p->item) = [n++];
switch (count_interesting_parents(commit)) {
case 0:
if (!(flags & TREESAME)) {
@@ -372,6 +376,7 @@ void find_bisection(struct commit_list **commit_list, int 
*reaches,
int *weights;
 
show_list("bisection 2 entry", 0, 0, *commit_list);
+   init_commit_weight(_weight);
 
/*
 * Count the number of total and tree-changing items on the
@@ -412,6 +417,7 @@ void find_bisection(struct commit_list **commit_list, int 
*reaches,
}
free(weights);
*commit_list = best;
+   clear_commit_weight(_weight);
 }
 
 static int register_ref(const char *refname, const struct object_id *oid,
-- 
2.17.0.705.g3525833791



  1   2   >