Git Test Coverage Report (July 21)

2019-07-21 Thread Derrick Stolee
dd -i: implement the `status` command
Denton Liu  f39a9c65 remote: add --save-to-push option to git remote set-url
Denton Liu  10572de1 rebase: fast-forward --onto in more cases
Denton Liu  526c03b5 rebase: refactor can_fast_forward into goto tower
Johannes Schindelin d31f31d1 built-in add -i: color the header in the 
`status` command
Johannes Schindelin db07a815 Start to implement a built-in version of `git 
add --interactive`
Johannes Schindelin f3665cfd built-in add -i: implement the `help` command
Johannes Schindelin 81e604c5 built-in add -i: implement the main loop
Johannes Schindelin ec4ddbf6 built-in add -i: support `?` (prompt help)
Johannes Schindelin 74265dd7 built-in add -i: refresh the index before 
running `status`
Jonathan Tana8d662e3 upload-pack: refactor reading of pack-objects out
Jonathan Tan820a5361 upload-pack: send part of packfile response as uri
Junio C Hamano  4706acc5 Merge branch 'jt/fetch-cdn-offload' into pu
Matthew DeVore  9430147c list-objects-filter: encapsulate filter components
Matthew DeVore  e987df5f list-objects-filter: implement composite filters
Matthew DeVore  f56f7642 list-objects-filter-options: move error check up
Matthew DeVore  cf9ceb5a list-objects-filter-options: make filter_spec a 
string_list
Slavica Djukic  928e9d0d built-in add -i: show unique prefixes of the commands
Slavica Djukic  253cdc20 built-in add -i: use color in the main loop
Slavica Djukic  1f3e3049 Add a function to determine unique prefixes for a list 
of strings


Uncovered code in 'jch' not in 'next'


builtin/ls-files.c
272b3f2a 672) die(_("--debug-json cannot be used with other file selection 
options"));
272b3f2a 674) die(_("--debug-json cannot be used with %s"), "--resolve-undo");
272b3f2a 676) die(_("--debug-json cannot be used with %s"), "--with-tree");
272b3f2a 678) die(_("--debug-json cannot be used with %s"), "--debug");
272b3f2a 701) die("index file corrupt");

cache-tree.c
fd335a21 605) ret = NULL; /* not the whole tree */

dir.c
3b2385cf 2840) static void jw_object_untracked_cache_dir(struct json_writer *jw,
3b2385cf 2845) jw_object_bool(jw, "valid", ucd->valid);
3b2385cf 2846) jw_object_bool(jw, "check-only", ucd->check_only);
3b2385cf 2847) jw_object_stat_data(jw, "stat", &ucd->stat_data);
3b2385cf 2848) jw_object_string(jw, "exclude-oid", 
oid_to_hex(&ucd->exclude_oid));
3b2385cf 2849) jw_object_inline_begin_array(jw, "untracked");
3b2385cf 2850) for (i = 0; i < ucd->untracked_nr; i++)
3b2385cf 2851) jw_array_string(jw, ucd->untracked[i]);
3b2385cf 2852) jw_end(jw);
3b2385cf 2854) jw_object_inline_begin_object(jw, "dirs");
3b2385cf 2855) for (i = 0; i < ucd->dirs_nr; i++) {
3b2385cf 2856) jw_object_inline_begin_object(jw, ucd->dirs[i]->name);
3b2385cf 2857) jw_object_untracked_cache_dir(jw, ucd->dirs[i]);
3b2385cf 2858) jw_end(jw);
3b2385cf 2860) jw_end(jw);
3b2385cf 2861) }
3b2385cf 2958) jw_object_inline_begin_object(jw, "root");
3b2385cf 2959) jw_object_untracked_cache_dir(jw, uc->root);
3b2385cf 2960) jw_end(jw);

read-cache.c
8eeabe15 1754) ret = error(_("index uses %.4s extension, which we do not 
understand"),
ee70c128 1756) if (advice_unknown_index_extension) {
ee70c128 1757) warning(_("ignoring optional %.4s index extension"), ext);
ee70c128 1758) advise(_("This is likely due to the file having been written by 
a newer\n"
272b3f2a 2028) jw_object_true(jw, "assume_unchanged");
272b3f2a 2032) jw_object_true(jw, "skip_worktree");
272b3f2a 2034) jw_object_intmax(jw, "stage", ce_stage(ce));
f0f544da 2311) ieot = read_ieot_extension(istate, mmap, mmap_size, 
extension_offset);
f0f544da 3653) static struct index_entry_offset_table *read_ieot_extension(
f0f544da 3675) return do_read_ieot_extension(istate, index, extsize);

repo-settings.c
bbd04cf0 20) UPDATE_DEFAULT(rs->pack_use_sparse, 1);

split-index.c
1f825794 29) goto done;

Commits introducting uncovered code:
Derrick Stolee  bbd04cf0 repo-settings: pack.useSparse=true
Jonathan Nieder ee70c128 index: offer advice for unknown index extensions
Nguyễn Thái Ngọc Duy272b3f2a ls-files: add --json to dump the index
Nguyễn Thái Ngọc Duyfd335a21 cache-tree.c: dump "TREE" extension as json
Nguyễn Thái Ngọc Duy3b2385cf dir.c: dump "UNTR" extension as json
Nguyễn Thái Ngọc Duy8eeabe15 read-cache.c: dump common extension info in 
json
Nguyễn Thái Ngọc Duyf0f544da read-cache.c: dump "IEOT" extension as json
Nguyễn Thái Ngọc Duy1f825794 split-index.c: dump "link" extension as json


Uncovered code in 'next' not in 'master'

Re: Git Test Coverage Report (Wed. July 10)

2019-07-10 Thread Derrick Stolee
maining_oids);

protocol.c

remote-curl.c

t/helper/test-dir-iterator.c
655af733 24) die("invalid option '%s'", *argv);
655af733 28) die("dir-iterator needs exactly one non-option argument");
9bd70db7 46) printf("[?] ");

upload-pack.c
a8d662e3 130) return readsz;
820a5361 149) BUG("packfile_uris requires sideband-all");
cf9ceb5a 221) sq_quote_buf(&buf, spec);
a8d662e3 354) send_client_data(1, output_state.buffer, output_state.used);
820a5361 1387) string_list_clear(&data->uri_protocols, 0);

wrapper.c

Commits introducting uncovered code:
Christian Couderb14ed5ad Use promisor_remote_get_direct() and 
has_promisor_remote()
Christian Couderfaf2abf4 promisor-remote: use 
repository_format_partial_clone
Christian Couder4ca9474e Move core_partial_clone_filter_default to 
promisor-remote.c
Christian Couderfa3d1b63 promisor-remote: parse 
remote.*.partialclonefilter
Christian Couder9e27beaa promisor-remote: implement 
promisor_remote_get_direct()
Christian Couderdb27dca5 Remove fetch-object.{c,h} in favor of 
promisor-remote.{c,h}
Christian Couder48de3158 Add initial support for many promisor remotes
Daniel Ferreira 9bd70db7 dir-iterator: add tests for dir-iterator API
Denton Liu  10572de1 rebase: fast-forward --onto in more cases
Denton Liu  526c03b5 rebase: refactor can_fast_forward into goto tower
Denton Liu  f39a9c65 remote: add --save-to-push option to git remote set-url
Jonathan Tana8d662e3 upload-pack: refactor reading of pack-objects out
Jonathan Tan820a5361 upload-pack: send part of packfile response as uri
Junio C Hamano  18615216 Merge branch 'jt/fetch-cdn-offload' into pu
Matheus Tavares c40f077a dir-iterator: use warning_errno when possible
Matheus Tavares 655af733 dir-iterator: add flags parameter to dir_iterator_begin
Matheus Tavares 50e85c40 dir-iterator: refactor state machine model
Matheus Tavares d95432d7 clone: use dir-iterator to avoid explicit dir traversal
Matheus Tavares fbb4a33c clone: extract function from copy_or_link_directory
Matthew DeVore  f56f7642 list-objects-filter-options: move error check up
Matthew DeVore  cf9ceb5a list-objects-filter-options: make filter_spec a 
string_list
Matthew DeVore  9430147c list-objects-filter: encapsulate filter components
Matthew DeVore  e987df5f list-objects-filter: implement composite filters


Uncovered code in 'jch' not in 'next'


apply.c
85c3713d 1187) cp = skip_tree_prefix(p_value, second, line + llen - second);

builtin/ls-files.c
272b3f2a 672) die(_("--debug-json cannot be used with other file selection 
options"));
272b3f2a 674) die(_("--debug-json cannot be used with %s"), "--resolve-undo");
272b3f2a 676) die(_("--debug-json cannot be used with %s"), "--with-tree");
272b3f2a 678) die(_("--debug-json cannot be used with %s"), "--debug");
272b3f2a 701) die("index file corrupt");

cache-tree.c
fd335a21 605) ret = NULL; /* not the whole tree */

dir.c
3b2385cf 2840) static void jw_object_untracked_cache_dir(struct json_writer *jw,
3b2385cf 2845) jw_object_bool(jw, "valid", ucd->valid);
3b2385cf 2846) jw_object_bool(jw, "check-only", ucd->check_only);
3b2385cf 2847) jw_object_stat_data(jw, "stat", &ucd->stat_data);
3b2385cf 2848) jw_object_string(jw, "exclude-oid", 
oid_to_hex(&ucd->exclude_oid));
3b2385cf 2849) jw_object_inline_begin_array(jw, "untracked");
3b2385cf 2850) for (i = 0; i < ucd->untracked_nr; i++)
3b2385cf 2851) jw_array_string(jw, ucd->untracked[i]);
3b2385cf 2852) jw_end(jw);
3b2385cf 2854) jw_object_inline_begin_object(jw, "dirs");
3b2385cf 2855) for (i = 0; i < ucd->dirs_nr; i++) {
3b2385cf 2856) jw_object_inline_begin_object(jw, ucd->dirs[i]->name);
3b2385cf 2857) jw_object_untracked_cache_dir(jw, ucd->dirs[i]);
3b2385cf 2858) jw_end(jw);
3b2385cf 2860) jw_end(jw);
3b2385cf 2861) }
3b2385cf 2958) jw_object_inline_begin_object(jw, "root");
3b2385cf 2959) jw_object_untracked_cache_dir(jw, uc->root);
3b2385cf 2960) jw_end(jw);

range-diff.c
ab5b0492 33) return size;
ab5b0492 95) strbuf_release(&contents);
04539fc6 117) die(_("could not parse git header '%.*s'"), (int)len, line);
04539fc6 135) patch.old_mode != patch.new_mode)
04539fc6 136) strbuf_addf(&buf, " (mode change %06o => %06o)",
ab5b0492 181) strbuf_addstr(&buf, line);

read-cache.c
8eeabe15 1754) ret = error(_("index uses %.4s extension, which we do not 
understand"),
ee70c128 1756) if (advice_unknown_index_extension) {
ee70c128 1757) warning(_("ignoring optional %.4s index extension"), ext);
ee70c128 1758) advise(_("This is likely due to the file having been written by 
a newer\n"
27

Git Test Coverage Report (Wed. July 10)

2019-07-10 Thread Derrick Stolee
nt heuristic to match 
ignored lines
Nickolai Belakovski 2582083f ref-filter: add worktreepath atom
Phillip Woodd559f502 rebase --abort/--quit: cleanup refs/rewritten
Phillip Wood37e9ee5c sequencer: return errors from sequencer_remove_state()


Uncovered code in 'jch' not in 'next'


builtin/branch.c
1fde99cf 833) die(_("The -a, and -r, options to 'git branch' do not take a 
branch name.\n"

builtin/checkout.c

builtin/commit.c

builtin/gc.c
efeb229e 691) return 1;

builtin/rebase.c
526c03b5 1262) goto done;
10572de1 1278) goto done;

commit-graph.c
d83160e8 906) error(_("error opening index for %s"), packname.buf);
d83160e8 907) return 1;
63a8be62 946) continue;
93ba1867 969) display_progress(ctx->progress, ctx->approx_nr_objects);
8520d7fc 1039) error(_("unable to create leading directories of %s"),
8520d7fc 1041) return errno;
efeb229e 1158) error(_("the commit graph format cannot write %d commits"), 
count_distinct);
efeb229e 1159) res = 1;
efeb229e 1160) goto cleanup;
efeb229e 1169) error(_("too many commits to write graph"));
efeb229e 1170) res = 1;
efeb229e 1171) goto cleanup;

list-objects-filter-options.c
5c03bc8b 94) strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg);

read-cache.c
ee70c128 1723) if (advice_unknown_index_extension) {
ee70c128 1724) warning(_("ignoring optional %.4s index extension"), ext);
ee70c128 1725) advise(_("This is likely due to the file having been written by 
a newer\n"

Commits introducting uncovered code:
Denton Liu  526c03b5 rebase: refactor can_fast_forward into goto tower
Denton Liu  10572de1 rebase: fast-forward --onto in more cases
Derrick Stolee  efeb229e commit-graph: return with errors during write
Derrick Stolee  d83160e8 commit-graph: extract fill_oids_from_packs()
Derrick Stolee  63a8be62 commit-graph: extract fill_oids_from_commit_hex()
Derrick Stolee  93ba1867 commit-graph: extract fill_oids_from_all_packs()
Derrick Stolee  8520d7fc commit-graph: extract write_commit_graph_file()
Jonathan Nieder ee70c128 index: offer advice for unknown index extensions
Matthew DeVore  5c03bc8b list-objects-filter-options: error is localizeable
Philip Oakley   1fde99cf doc branch: provide examples for listing remote 
tracking branches


Uncovered code in 'next' not in 'master'


builtin/am.c
97387c8b 1662) die("unable to read from stdin; aborting");
6e7baf24 2336) die(_("interactive mode requires patches on the command line"));

builtin/bisect--helper.c
7877ac3d 574) retval = error(_("invalid ref: '%s'"), start_head.buf);
7877ac3d 575) goto finish;

builtin/fast-export.c
e80001f8 81) static int parse_opt_reencode_mode(const struct option *opt,
e80001f8 84) if (unset) {
e80001f8 85) reencode_mode = REENCODE_ABORT;
e80001f8 86) return 0;
e80001f8 89) switch (git_parse_maybe_bool(arg)) {
e80001f8 91) reencode_mode = REENCODE_NO;
e80001f8 92) break;
e80001f8 94) reencode_mode = REENCODE_YES;
e80001f8 95) break;
e80001f8 97) if (!strcasecmp(arg, "abort"))
e80001f8 98) reencode_mode = REENCODE_ABORT;
e80001f8 100) return error("Unknown reencoding mode: %s", arg);
e80001f8 103) return 0;
e80001f8 665) switch(reencode_mode) {
e80001f8 667) reencoded = reencode_string(message, "UTF-8", encoding);
e80001f8 668) break;
e80001f8 670) break;
e80001f8 672) die("Encountered commit-specific encoding %s in commit "
e80001f8 674) encoding, oid_to_hex(&commit->object.oid));
ccbfc96d 686) printf("encoding %s\n", encoding);

builtin/index-pack.c
8a30a1ef 1365) continue;

builtin/log.c
13cdf780 873) return 0;

builtin/merge.c
f3f8311e 1290) usage_msg_opt(_("--quit expects no arguments"),

builtin/worktree.c
1de16aec 297) BUG("How come '%s' becomes empty after sanitization?", sb.buf);

builtin/write-tree.c
76a7bc09 53) die("%s: prefix %s not found", me, tree_prefix);

fast-import.c
3edfcc65 2612) read_next_command();
3edfcc65 2679) strbuf_addf(&new_data,

grep.c
de99eb0c 1784) BUG("grep call which could print a name requires "

read-cache.c
7bd9631b 2201) src_offset += load_cache_entries_threaded(istate, mmap, 
mmap_size, nr_threads, ieot);

refs.c
1de16aec 111) sanitized->buf[sanitized->len-1] = '-';
1de16aec 170) if (sanitized)
1de16aec 171) strbuf_addch(sanitized, '-');
1de16aec 173) return -1;
1de16aec 178) strbuf_complete(sanitized, '/');
1de16aec 215) BUG("sanitizing refname '%s' check returned error", refname);

server-info.c
f4f476b6 110) ret = -1;
f4f476b6 111) goto out;
f4f476b6 123) goto out;
f4f476b6 125) goto out;
f4f476b6 134) if (uic.cur_fp)
f4f476b6 135) fclose(uic.cur_fp);

Commits introducting uncov

Re: [PATCH v3 0/3] [RFC] Create 'core.featureAdoptionRate' setting to update config defaults

2019-07-09 Thread Derrick Stolee
On 7/9/2019 3:21 PM, Junio C Hamano wrote:
> Derrick Stolee  writes:
> 
>> The other category that has been discussed already is that of "experimental
>> features that we generally think are helpful but change behavior slightly in
>> some cases".
>>
>>  feature.experimental:
>>  pack.useSparse = true
>>  status.aheadBehind = false
>>  fetch.showForcedUpdates = false
>>  merge.directoryRenames = true
>>  protocol.version = 2
>>  fetch.negotiationAlgorithm = skipping
> 
> Other classes you listed I can easily support, but I have trouble
> deciding if this concept itself is bad, or merely that some/many of
> the sample knobs you listed above are not exactly appropriate.
> Either way, I have hard time swallowing this one as-is.  You may
> think aheadBehind==false is helpful, but I don't, for example, and
> there may be people for and against each of the experimental knobs.

Thanks for the specific note about aheadBehind. I'll drop that one
from consideration.

I suppose that fetch.showForcedUpdates is in the same category, and
it has a self-discovery mechanism (a warning message) for users who
feel the pain of checking for forced updates (i.e. it takes >10s).

> But there may be a clear set of "this is agreed to be the way to the
> future, but the implementation currently is too convoluted and
> suspected of bugs, so we'll let early adoptors opt into the feature,
> and when that happens, eventually this knob will go away (i.e. you
> won't be able to turn it off)" type of knobs.  Or it may change the
> behaviour drastically, but as long as it is agreed that the future
> lies in that direction, I think it is OK to throw such a knob into
> this class.  The key points are (1) we are committed that in the
> future everybody will be forced to have it and (2) it is not merely
> "we generally think", but "the decision about the future has been
> made---there won't be any other way".  The feature.experimental
> becomes merely a way to let early adoptors in.  If you limit the
> individual features governed by feature.experimental to that kind of
> knobs, I can be easily convinced that this class is a good idea.

>From this list, do you think any of these settings are likely to
become defaults? It seems that protocol.version = 2 may be a default
now that _most_ services have an implementation, and it always falls
back to protocol v1 without extra cost.

When pack.useSparse was first introduced, I considered making it true
by default after a while. But you protested, saying you want people
knocking at the door saying it is useful. What if it lived here?

fetch.negotiationAlgorithm and merge.directoryRenames seem like
valuable features and maybe just need more time out in the world
before they could be considered defaults.

I appreciate all of the feedback, and to drive the discussion forward
I'm trying to tease out very specific opinions.

Thanks,
-Stolee


Re: [PATCH v3 0/3] [RFC] Create 'core.featureAdoptionRate' setting to update config defaults

2019-07-08 Thread Derrick Stolee
On 7/1/2019 10:29 AM, Derrick Stolee via GitGitGadget wrote:
> Here is a second run at this RFC, which aims to create a "meta" config
> setting that automatically turns on other settings according to a user's
> willingness to trade new Git behavior or new feature risk for performance
> benefits. The new name for the setting is "core.featureAdoptionRate" and is
> an integer scale from 0 to 10. There will be multiple "categories" of
> settings, and the intention is to allow more granular levels as necessary.

(Adding people who contributed feedback to CC line.)

It seems that this "Feature Adoption Rate" idea was too simplistic, and
had several issues. Time to take a different stab at this direction, but
with these clear goals in mind:

 1. We want intermediate users to be able to take advantage of new config
options without watching every release for new config options.

 2. The config name should match the general effect of the implied
settings.

 3. There are orthogonal settings that may not apply beneficially to
all repos.

With this in mind, I propose instead a set of "feature.*" config settings
that form groups of "community recommended" settings (with some caveats).
In the space below, I'll list a set of possible feature names and the
implied config options.

First, the main two categories we've discussed so far: many commits and
many files. These two feature sets are for when your repo is large in
one of these dimensions. Perhaps there are other settings to include
in these?

feature.manyFiles:
index.version = 4
index.threads = true
core.untrackedCache = true

feature.manyCommits:
core.commitGraph = true
gc.writeCommitGraph = true
(future: fetch.writeSplitCommitGraph = true)

Note: the `fetch.writeSplitCommitGraph` does not exist yet, but could
be introduced in a later release to write a new commit-graph (with --split)
on fetch.

The other category that has been discussed already is that of "experimental
features that we generally think are helpful but change behavior slightly in
some cases".

feature.experimental:
pack.useSparse = true
status.aheadBehind = false
fetch.showForcedUpdates = false
merge.directoryRenames = true
protocol.version = 2
fetch.negotiationAlgorithm = skipping

We have not discussed anything like the next category, but Dscho thought
a set of configs to make pretty diffs could be a fun "meta-config" setting:

feature.prettyDiff:
diff.color = auto
ui.color = auto
diff.context = 5
diff.colorMoved = true
diff.colorMovedWs = allow-indentation-change
diff.algorithm = minimal

These are just a first round of suggestions. I'm sure we would enjoy a
debate around an optimal set of diff settings.

Finally, here is a kind of feature that I could imagine being helpful
in the future, but maybe is not a good idea to pursue right now. In
some cases users use "gc.auto = 0" to prevent all user-time blocking
maintenance. This can degrade performance over time as loose objects
and pack-files accumulate. The performance could mostly be recovered
by using a multi-pack-index, but there is not current way to automatically
write the file. This would not solve the space issues that happen here.

feature.noGC:
gc.auto = 0
core.multiPackIndex = true
(future: fetch.writeMultiPackIndex = true)

What do people think about this general idea? Are there any other
feature.* settings that could be useful? Any additional settings
to add to these groups?

Thanks,
-Stolee


Re: Virtual Git Contributor Summit

2019-07-03 Thread Derrick Stolee
On 7/3/2019 9:01 AM, Johannes Schindelin wrote:
> Team,
> 
> I kept talking about this idea of a purely online Git Contributor Summit,
> and it is finally time for action.

Thanks for organizing this!

> To alleviate both of those points, we might want to consider spreading it
> out over a couple of days? I already heard some fierce opposition against
> that idea, though.

I don't have fierce opposition, but I do recommend one 8-hour day with the
intention of some contributors only able to join in the AM and some only in
the PM hours. We should not require all participants to be present for the
entire summit. By taking copious notes, we should be able to include the
rest of the community in the discussion.

Thanks,
-Stolee


Re: [PATCH v2 1/3] repo-settings: create core.featureAdoptionRate setting

2019-07-02 Thread Derrick Stolee
On 7/2/2019 7:09 AM, Duy Nguyen wrote:
> On Tue, Jul 2, 2019 at 5:47 PM Ævar Arnfjörð Bjarmason  
> wrote:
>>
>>
>> On Wed, Jun 19 2019, Derrick Stolee via GitGitGadget wrote:
>>
>>>  core.commitGraph::
>>>   If true, then git will read the commit-graph file (if it exists)
>>> - to parse the graph structure of commits. Defaults to false. See
>>> + to parse the graph structure of commits. Defaults to false, unless
>>> + `core.featureAdoptionRate` is at least three. See
>>>   linkgit:git-commit-graph[1] for more information.
>>>
>>>  core.useReplaceRefs::
>>> @@ -601,3 +602,21 @@ core.abbrev::
>>>   in your repository, which hopefully is enough for
>>>   abbreviated object names to stay unique for some time.
>>>   The minimum length is 4.
>>> +
>>> +core.featureAdoptionRate::
>>> + Set an integer value on a scale from 0 to 10 describing your
>>> + desire to adopt new performance features. Defaults to 0. As
>>> + the value increases, features are enabled by changing the
>>> + default values of other config settings. If a config variable
>>> + is specified explicitly, the explicit value will override these
>>> + defaults:
>>> ++
>>> +If the value is at least 3, then the following defaults are modified.
>>> +These represent relatively new features that have existed for multiple
>>> +major releases, and present significant performance benefits. They do
>>> +not modify the user-facing output of porcelain commands.
>>> ++
>>> +* `core.commitGraph=true` enables reading commit-graph files.
>>> ++
>>> +* `gc.writeCommitGraph=true` eneables writing commit-graph files during
>>
>> I barked up a similar tree in
>> https://public-inbox.org/git/cacbzzx5sbyo5fvptk6lw1ff96nr5591rhhc-5wdjw-fmg1r...@mail.gmail.com/
>>
>> I wonder if you've seen that & what you think about that
>> approach. I.e. have a core.version=2.28 (or core.version=+6) or whatever
>> to opt-in to features we'd make default in 2.28. Would that be your
>> core.featureAdoptionRate=6 (28-28 = 6)?

I had not seen that message. Thanks for the link.

However, I don't think that your idea of "give me features that will be
default soon" is enough, as some of these features will _never_ be
turned on by default. It's more about how much a user is willing to have
some slight changes (i.e. extra files during maintenance [commit-graph],
different index format, changed ahead/behind messages in status) in
exchange for an overall "better" experience. Here "better" is decided by
the community members adjusting the values on this feature.
>> I admit that question is partly rhetorical, because I think it suggests
>> how hard it would be for users to reason about this.

The intention here is to make this as simple for users as possible. I want
to be able to say "I highly recommend you set core.featureAdoptionRate=5"
and for them to not need to know what is happening. Of course, they can
learn more about the details and opt-out as things change.

>> The "core.version" idea also sucks, but at least it's bound to our
>> advertised version number, so it's obvious if you set it to e.g. +2 what
>> feature track you're on, and furthermore when we'd commit to making that
>> the default for users who don't set core.version (although we could of
>> course always change our minds...). It's also something that mirrors how
>> e.g. Perl, C compilers (with --std=*) treat this sort of thing.
>>
>> So I'm all for a facility to have a setting to collectively opt-in to
>> new things early. But I think for such a thing we really should a) at
>> least in principle commit to making those things the default eventually
> 
> Some features may be best enabled for certain setups. This is why I
> set configuration variables repo size, worktree size.. instead of just
> one number.

You are right that some features are best for different scale factors:

 * commit-graph is best with a deep history.
 * index version 4 is best with a large working directory.

These things are usually correlated, and users don't always know what
exactly is "large" for any of these variables.

Further, these features actually don't have much downside even if
a user is legitimately struggling with one of these scale factors and
not another.

>> (if they don't suck) b) it needs to be obvious to the user how the
>> "rate" relates to git releases.
> 
> I see this more like gcc =O options. And for thos

Re: [RFC/PATCH 1/2] rebuash - squash/rebase in a single step

2019-07-02 Thread Derrick Stolee
On 7/1/2019 2:35 PM, Junio C Hamano wrote:
> Jeff King  writes:
> 
>>> First, we create a (temporary) merge commit of both branches (M3)
>>>
>>> 
>>> R1---R2---R3---R4---R5---R6---R7---M3
>>>  \ \  \   /
>>>   F1---F2---M1---F3---F4---M2---F5
>>> 
>>>
>>> At this point, all differences between M3 and R7 are the changes related to 
>>> the
>>> feature branch, so we can run git reset --soft from M3 to R7 to put all 
>>> those
>>> differeces in index, and then we create single revision that is both
>>> squashed/rebased for our feature branch.
>>
>> So if I understand correctly, our goal is:
>>
>>   R1--R2--...--R7--R8
>>
>> where R8 has the same tree as M3?
>>
>> Wouldn't doing "git merge --squash" do the same thing?
> 
> Yup, from Edmundo's description, I agree that they are equivalent,
> modulo the merge direction.

[snip]

> If M3 merge is always easier to manage than incremental stepwise
> rebase of the topic, then doing the "git merge --reverse-squash"
> would be a saner interface and also conceptually simpler.

I agree that this would be a better way to expose this behavior,
and likely the implementation could be very clean.

Thanks,
-Stolee
 



Re: [PATCH] t5319: don't trip over a user name with whitespace

2019-07-01 Thread Derrick Stolee
On 7/1/2019 2:22 PM, Johannes Sixt wrote:
> Am 01.07.19 um 14:30 schrieb Derrick Stolee:
>> On 7/1/2019 8:11 AM, Johannes Schindelin wrote:
>>> Or we stop introducing new Perl calls, and use the perfectly fine
>>> `test-tool path-utils file-size` command:
>>>
>>> https://github.com/git/git/blob/v2.22.0/t/helper/test-path-utils.c#L302-L312
>>>
>>> This solves not only portability problems but also avoids yet another
>>> obstacle into making a `NO_PERL` test suite run really work without Perl.
>> Thanks! This does seem like the best option. Thanks for bringing this to our
>> attention. Here is a diff, and I'll prepare a full patch:
> 
> Thanks. Please also explain why the first of the two tests does not fail
> with a large --batch-size (unless it is obvious for people who know a
> bit about multi-pack-index, of course).

Sorry about missing that. Here is an explanation:

The --batch-size=X option determines how much data we want to move in
a single operation. Based on this value, we select packs greedily as
follows:

  1. If a pack has size at most X, include it.
  2. If our total pack size is greater than X, stop.

At the end of this process, if we have zero or one packs we do nothing.
That is why a small size does nothing.

Also, if our total pack size is smaller than X, we do nothing. Our
batch is not "full" enough to merit the work. This is why a large size
does nothing.

Thanks,
-Stolee


[PATCH v3 0/3] [RFC] Create 'core.featureAdoptionRate' setting to update config defaults

2019-07-01 Thread Derrick Stolee via GitGitGadget
Here is a second run at this RFC, which aims to create a "meta" config
setting that automatically turns on other settings according to a user's
willingness to trade new Git behavior or new feature risk for performance
benefits. The new name for the setting is "core.featureAdoptionRate" and is
an integer scale from 0 to 10. There will be multiple "categories" of
settings, and the intention is to allow more granular levels as necessary.

The first category is "3 or higher" which means that the user is willing to
adopt features that have been tested in multiple major releases. The
settings to include here are core.commitGraph=true,
gc.writeCommitGraph=true, and index.version=4.

The second category is "5 or higher" which means the user is willing to
adopt features that have not been out for multiple major releases. The
setting included here is pack.useSparse=true.

In the future, I would add a "7 or higher" setting which means the user is
willing to have a change of behavior in exchange for performance benefits.
The two settings to place here are 'status.aheadBehind=false' and
'fetch.showForcedUpdates=false'. Instead of including these settings in the
current series, I've submitted them independently for full review [1, 2].

Hopefully this direction is amenable to allow "early adopters" gain access
to new performance features even if they are not necessary reading every
line of the release notes.

Thanks, -Stolee

[1] https://public-inbox.org/git/pull.272.git.gitgitgad...@gmail.com/

[2] https://public-inbox.org/git/pull.273.git.gitgitgad...@gmail.com/

Derrick Stolee (3):
  repo-settings: create core.featureAdoptionRate setting
  repo-settings: use index.version=4 by default
  repo-settings: pack.useSparse=true

 Documentation/config/core.txt  | 34 +++-
 Documentation/config/gc.txt|  4 +--
 Documentation/config/index.txt |  2 ++
 Documentation/config/pack.txt  |  3 +-
 Makefile   |  1 +
 builtin/gc.c   |  6 ++--
 builtin/pack-objects.c |  9 +++---
 commit-graph.c |  7 ++--
 read-cache.c   | 12 ---
 repo-settings.c| 58 ++
 repo-settings.h| 15 +
 repository.h   |  3 ++
 t/t1600-index.sh   | 34 +---
 13 files changed, 164 insertions(+), 24 deletions(-)
 create mode 100644 repo-settings.c
 create mode 100644 repo-settings.h


base-commit: aa25c82427ae70aebf3b8f970f2afd54e9a2a8c6
Published-As: 
https://github.com/gitgitgadget/git/releases/tag/pr-254%2Fderrickstolee%2Fconfig-large%2Fupstream-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git 
pr-254/derrickstolee/config-large/upstream-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/254

Range-diff vs v2:

 1:  bdaee3ea9d ! 1:  13b9e71b38 repo-settings: create core.featureAdoptionRate 
setting
 @@ -71,8 +71,9 @@
  ++
  +If the value is at least 3, then the following defaults are modified.
  +These represent relatively new features that have existed for multiple
 -+major releases, and present significant performance benefits. They do
 -+not modify the user-facing output of porcelain commands.
 ++major releases, and may present performance benefits. These benefits
 ++depend on the amount and kind of data in your repo and how you use it.
 ++The settings do not modify the user-facing output of porcelain commands.
  ++
  +* `core.commitGraph=true` enables reading commit-graph files.
  ++
 @@ -236,8 +237,8 @@
  +#define REPO_SETTINGS_H
  +
  +struct repo_settings {
 -+ char core_commit_graph;
 -+ char gc_write_commit_graph;
 ++ int core_commit_graph;
 ++ int gc_write_commit_graph;
  +};
  +
  +struct repository;
 2:  02c89415fe ! 2:  4fe896e423 repo-settings: use index.version=4 by default
 @@ -6,6 +6,12 @@
  This means the index could be compressed using version 4. Set this as
  a default when core.featureAdoptionRate is at least three.
  
 +Since the index version is written to a file, this is an excellent
 +opportunity to test that the config settings are working correctly
 +with the different precedence rules. Adapt a test from t1600-index.sh
 +to verify the version is set properly with different values of
 +index.version config, core.featureAdoptionRate, and GIT_INDEX_VERSION.
 +
  Signed-off-by: Derrick Stolee 
  
   diff --git a/Documentation/config/core.txt 
b/Documentation/config/core.txt
 @@ -108,9 +114,60 @@
   +++ b/repo-settings.h
  @@
   struct repo_settings {
 -  char core_commit_graph;
 -  char gc_write_commit_graph;
 +  int core_commit_graph;
 +  int gc_write_commit_graph;
  + int index_version;
  

[PATCH v3 2/3] repo-settings: use index.version=4 by default

2019-07-01 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

If a repo is large, it likely has many paths in its working directory.
This means the index could be compressed using version 4. Set this as
a default when core.featureAdoptionRate is at least three.

Since the index version is written to a file, this is an excellent
opportunity to test that the config settings are working correctly
with the different precedence rules. Adapt a test from t1600-index.sh
to verify the version is set properly with different values of
index.version config, core.featureAdoptionRate, and GIT_INDEX_VERSION.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/core.txt  |  3 +++
 Documentation/config/index.txt |  2 ++
 read-cache.c   | 12 +++-
 repo-settings.c|  6 ++
 repo-settings.h|  1 +
 t/t1600-index.sh   | 34 +-
 6 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index bfe647c76f..865252aba9 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -621,3 +621,6 @@ The settings do not modify the user-facing output of 
porcelain commands.
 +
 * `gc.writeCommitGraph=true` eneables writing commit-graph files during
 `git gc`.
++
+* `index.version=4` uses prefix-compression to reduce the size of the
+.git/index file.
diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt
index f181503041..98a88c30be 100644
--- a/Documentation/config/index.txt
+++ b/Documentation/config/index.txt
@@ -24,3 +24,5 @@ index.threads::
 index.version::
Specify the version with which new index files should be
initialized.  This does not affect existing repositories.
+   If `core.featureAdoptionRate` is at least three, then the
+   default value is 4.
diff --git a/read-cache.c b/read-cache.c
index 22e7b9944e..7fab8ff748 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -25,6 +25,7 @@
 #include "fsmonitor.h"
 #include "thread-utils.h"
 #include "progress.h"
+#include "repo-settings.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1599,16 +1600,17 @@ struct cache_entry *refresh_cache_entry(struct 
index_state *istate,
 
 #define INDEX_FORMAT_DEFAULT 3
 
-static unsigned int get_index_format_default(void)
+static unsigned int get_index_format_default(struct repository *r)
 {
char *envversion = getenv("GIT_INDEX_VERSION");
char *endp;
-   int value;
unsigned int version = INDEX_FORMAT_DEFAULT;
 
if (!envversion) {
-   if (!git_config_get_int("index.version", &value))
-   version = value;
+   prepare_repo_settings(r);
+
+   if (r->settings->index_version >= 0)
+   version = r->settings->index_version;
if (version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < version) {
warning(_("index.version set, but the value is 
invalid.\n"
  "Using version %i"), INDEX_FORMAT_DEFAULT);
@@ -2765,7 +2767,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
}
 
if (!istate->version) {
-   istate->version = get_index_format_default();
+   istate->version = get_index_format_default(the_repository);
if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0))
init_split_index(istate);
}
diff --git a/repo-settings.c b/repo-settings.c
index f7fc2a1959..5753153a84 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -14,6 +14,7 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
if (rate >= 3) {
UPDATE_DEFAULT(rs->core_commit_graph, 1);
UPDATE_DEFAULT(rs->gc_write_commit_graph, 1);
+   UPDATE_DEFAULT(rs->index_version, 4);
}
return 0;
}
@@ -25,6 +26,10 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
rs->gc_write_commit_graph = git_config_bool(key, value);
return 0;
}
+   if (!strcmp(key, "index.version")) {
+   rs->index_version = git_config_int(key, value);
+   return 0;
+   }
 
return 1;
 }
@@ -39,6 +44,7 @@ void prepare_repo_settings(struct repository *r)
/* Defaults */
r->settings->core_commit_graph = -1;
r->settings->gc_write_commit_graph = -1;
+   r->settings->index_version = -1;
 
repo_config(r, git_repo_config, r->settings);
 }
diff --git a/repo-settings.h b/repo-settings.h
index 7d44627bf0..b752dfe8b4 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -4,6 +4,7 @@
 struct repo_setti

[PATCH v3 1/3] repo-settings: create core.featureAdoptionRate setting

2019-07-01 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Several advanced config settings are highly recommended for clients
using large repositories. Power users learn these one-by-one and
enable them as they see fit. This could be made simpler, to allow
more users to have access to these almost-always beneficial features
(and more beneficial in larger repos).

Create a 'core.featureAdoptionRate' config setting that allows integer
values. This is a rating from 0 to 10 for the user's willingness to
adopt new or experimental features that improve Git performance.
The default is 0, meaning "don't change anything!" A value of 10
would mean "I'm willing for some behavior to change to get the
best performance I can get, and can take experimental features
in their first release." As we integrate this with more config
settings, we will make this scale more clear.

This config setting only changes the default values of other config
settings. If the setting is given explicitly, then take the
explicit value.

This change adds these two defaults when core.featureAdoptionRate
is at least three:

 * core.commitGraph=true
 * gc.writeCommitGraph=true

The use of "three or higher" for these settings means that a value
of 3 means "I'm willing to add optional features that can augment
the data on disk in favor of improved performance, but those
features should be stable after being included in multiple major
releases."

To centralize these config options and properly set the defaults,
create a repo_settings that contains chars for each config variable.
Use -1 as "unset", with 0 for false and 1 for true.

The prepare_repo_settings() method ensures that this settings
struct has been initialized, and avoids double-scanning the config
settings.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/core.txt | 22 +-
 Documentation/config/gc.txt   |  4 ++--
 Makefile  |  1 +
 builtin/gc.c  |  6 ++---
 commit-graph.c|  7 +++---
 repo-settings.c   | 44 +++
 repo-settings.h   | 13 +++
 repository.h  |  3 +++
 8 files changed, 91 insertions(+), 9 deletions(-)
 create mode 100644 repo-settings.c
 create mode 100644 repo-settings.h

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index 75538d27e7..bfe647c76f 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -577,7 +577,8 @@ the `GIT_NOTES_REF` environment variable.  See 
linkgit:git-notes[1].
 
 core.commitGraph::
If true, then git will read the commit-graph file (if it exists)
-   to parse the graph structure of commits. Defaults to false. See
+   to parse the graph structure of commits. Defaults to false, unless
+   `core.featureAdoptionRate` is at least three. See
linkgit:git-commit-graph[1] for more information.
 
 core.useReplaceRefs::
@@ -601,3 +602,22 @@ core.abbrev::
in your repository, which hopefully is enough for
abbreviated object names to stay unique for some time.
The minimum length is 4.
+
+core.featureAdoptionRate::
+   Set an integer value on a scale from 0 to 10 describing your
+   desire to adopt new performance features. Defaults to 0. As
+   the value increases, features are enabled by changing the
+   default values of other config settings. If a config variable
+   is specified explicitly, the explicit value will override these
+   defaults:
++
+If the value is at least 3, then the following defaults are modified.
+These represent relatively new features that have existed for multiple
+major releases, and may present performance benefits. These benefits
+depend on the amount and kind of data in your repo and how you use it.
+The settings do not modify the user-facing output of porcelain commands.
++
+* `core.commitGraph=true` enables reading commit-graph files.
++
+* `gc.writeCommitGraph=true` eneables writing commit-graph files during
+`git gc`.
diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt
index 02b92b18b5..898263209c 100644
--- a/Documentation/config/gc.txt
+++ b/Documentation/config/gc.txt
@@ -63,8 +63,8 @@ gc.writeCommitGraph::
If true, then gc will rewrite the commit-graph file when
linkgit:git-gc[1] is run. When using `git gc --auto`
the commit-graph will be updated if housekeeping is
-   required. Default is false. See linkgit:git-commit-graph[1]
-   for details.
+   required. Default is false, unless `core.featureAdoptionRage`
+   is at least three. See linkgit:git-commit-graph[1] for details.
 
 gc.logExpiry::
If the file gc.log exists, then `git gc --auto` will print
diff --git a/Makefile b/Makefile
index 8a7e235352..2d3499d7ac 100644
--- a/Makefile
+++ b/Makefile
@@ -967,6 +967,7 @@ LIB_OBJS += refspec.o
 LIB_OBJS += ref-filter.o
 LIB_OBJS 

[PATCH v3 3/3] repo-settings: pack.useSparse=true

2019-07-01 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

If a repo is large, then it probably has a very large working
directory. In this case, a typical developer's edits usually impact
many fewer paths than the full path set. The sparse treewalk
algorithm is optimized for this case, speeding up 'git push' calls.

Use pack.useSparse=true when core.featureAdoptionRate is at least
five. This is the first setting where the feature has only been
out for a single major version. This could be moved to the "at
least three" category after another major version.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/core.txt | 9 +
 Documentation/config/pack.txt | 3 ++-
 builtin/pack-objects.c| 9 +
 repo-settings.c   | 8 
 repo-settings.h   | 1 +
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index 865252aba9..60356102a8 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -624,3 +624,12 @@ The settings do not modify the user-facing output of 
porcelain commands.
 +
 * `index.version=4` uses prefix-compression to reduce the size of the
 .git/index file.
++
+If the value is at least 5, then all of the defaults above are included,
+plus the defaults below. These represent new features that present
+significant performance benefits, but may not have been released for
+multiple major versions.
++
+* `pack.useSparse=true` uses the sparse tree-walk algorithm, which is
+optimized for enumerating objects during linkgit:git-push[1] from a
+client machine.
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index 9cdcfa7324..9c4f8ea9ff 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -112,7 +112,8 @@ pack.useSparse::
objects. This can have significant performance benefits when
computing a pack to send a small change. However, it is possible
that extra objects are added to the pack-file if the included
-   commits contain certain types of direct renames.
+   commits contain certain types of direct renames. Defaults to
+   false, unless `core.featureAdoptionRate` is at least five.
 
 pack.writeBitmaps (deprecated)::
This is a deprecated synonym for `repack.writeBitmaps`.
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 41d7fc5983..f26b3f2892 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -34,6 +34,7 @@
 #include "dir.h"
 #include "midx.h"
 #include "trace2.h"
+#include "repo-settings.h"
 
 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 #define SIZE(obj) oe_size(&to_pack, obj)
@@ -2707,10 +2708,6 @@ static int git_pack_config(const char *k, const char *v, 
void *cb)
use_bitmap_index_default = git_config_bool(k, v);
return 0;
}
-   if (!strcmp(k, "pack.usesparse")) {
-   sparse = git_config_bool(k, v);
-   return 0;
-   }
if (!strcmp(k, "pack.threads")) {
delta_search_threads = git_config_int(k, v);
if (delta_search_threads < 0)
@@ -3330,6 +3327,10 @@ int cmd_pack_objects(int argc, const char **argv, const 
char *prefix)
read_replace_refs = 0;
 
sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0);
+   prepare_repo_settings(the_repository);
+   if (!sparse && the_repository->settings->pack_use_sparse != -1)
+   sparse = the_repository->settings->pack_use_sparse;
+
reset_pack_idx_option(&pack_idx_opts);
git_config(git_pack_config, NULL);
 
diff --git a/repo-settings.c b/repo-settings.c
index 5753153a84..c700edc286 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -16,6 +16,9 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
UPDATE_DEFAULT(rs->gc_write_commit_graph, 1);
UPDATE_DEFAULT(rs->index_version, 4);
}
+   if (rate >= 5) {
+   UPDATE_DEFAULT(rs->pack_use_sparse, 1);
+   }
return 0;
}
if (!strcmp(key, "core.commitgraph")) {
@@ -26,6 +29,10 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
rs->gc_write_commit_graph = git_config_bool(key, value);
return 0;
}
+   if (!strcmp(key, "pack.usesparse")) {
+   rs->pack_use_sparse = git_config_bool(key, value);
+   return 0;
+   }
if (!strcmp(key, "index.version")) {
rs->index_version = git_config_int(key, value);
return 0;
@@ -44,6 +51,7 @@ void prepare_repo_settings(struct repository *r)
/* Defaults */
r->settings->core_commit_graph = -1;
r->

Re: [PATCH 1/2] object-store.h: move for_each_alternate_ref() from transport.h

2019-07-01 Thread Derrick Stolee
On 7/1/2019 9:17 AM, Jeff King wrote:
> There's nothing inherently transport-related about enumerating the
> alternate ref tips. The code has lived in transport.[ch] because the
> only use so far had been advertising available tips during transport.
> But it could be used for more, and a future patch will teach rev-list to
> access these refs.
> 
> Let's move it alongside the other alt-odb code, declaring it in
> object-store.h with the implementation in sha1-file.c.

Thanks!

> This lets us drop the inclusion of transport.h from receive-pack, which
> perhaps shows how it was misplaced (though receive-pack is about
> transporting objects, transport.h is mostly about the client side).

That's an interesting de-coupling. Thanks for looking into reorganizing
the code. This series looks good to me.

-Stolee


[PATCH 0/1] Fix test bug with spaces in file owner

2019-07-01 Thread Derrick Stolee via GitGitGadget
Thanks to Johannes Sixt for reporting this bug and Dscho for presenting the
correct test-tool to use.

This applies on ds/midx-expire-repack. If we instead want to squash this
into that branch, the two hunks will need to be squashed into these commits:

THIRD_SMALLEST_SIZE applies to ce1e4a105b4 (midx: implement midx_repack(),
2019-06-10).

MINSIZE applies to 2af890bb280 (multi-pack-index: prepare 'repack'
subcommand, 2019-06-10).

Thanks, -Stolee

Derrick Stolee (1):
  t5319: use 'test-tool path-utils' instead of 'ls -l'

 t/t5319-multi-pack-index.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)


base-commit: b526d8cbbb8740faa10411caa757c6586395fcab
Published-As: 
https://github.com/gitgitgadget/git/releases/tag/pr-280%2Fderrickstolee%2Fmidx-test-fix-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git 
pr-280/derrickstolee/midx-test-fix-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/280
-- 
gitgitgadget


[PATCH 1/1] t5319: use 'test-tool path-utils' instead of 'ls -l'

2019-07-01 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Using 'ls -l' and parsing the columns to find file sizes is
problematic when the platform could report the owner as a name
with spaces. Instead, use the 'test-tool path-utils file-size'
command to list only the sizes.

Reported-by: Johannes Sixt 
Helped-by: Johannes Schindelin 
Signed-off-by: Derrick Stolee 
---
 t/t5319-multi-pack-index.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index 79bfaeafa9..c72ca04399 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -443,7 +443,7 @@ test_expect_success 'repack with minimum size does not 
alter existing packs' '
touch -m -t 201901010002 .git/objects/pack/pack-B* &&
touch -m -t 201901010003 .git/objects/pack/pack-A* &&
ls .git/objects/pack >expect &&
-   MINSIZE=$(ls -l .git/objects/pack/*pack | awk "{print \$5;}" | 
sort -n | head -n 1) &&
+   MINSIZE=$(test-tool path-utils file-size 
.git/objects/pack/*pack | sort -n | head -n 1) &&
git multi-pack-index repack --batch-size=$MINSIZE &&
ls .git/objects/pack >actual &&
test_cmp expect actual
@@ -455,7 +455,7 @@ test_expect_success 'repack creates a new pack' '
cd dup &&
ls .git/objects/pack/*idx >idx-list &&
test_line_count = 5 idx-list &&
-   THIRD_SMALLEST_SIZE=$(ls -l .git/objects/pack/*pack | awk 
"{print \$5;}" | sort -n | head -n 3 | tail -n 1) &&
+   THIRD_SMALLEST_SIZE=$(test-tool path-utils file-size 
.git/objects/pack/*pack | sort -n | head -n 3 | tail -n 1) &&
BATCH_SIZE=$(($THIRD_SMALLEST_SIZE + 1)) &&
git multi-pack-index repack --batch-size=$BATCH_SIZE &&
ls .git/objects/pack/*idx >idx-list &&
-- 
gitgitgadget


Re: [PATCH v2 1/3] repo-settings: create core.featureAdoptionRate setting

2019-07-01 Thread Derrick Stolee
On 6/30/2019 2:35 PM, Carlo Arenas wrote:
> On Fri, Jun 28, 2019 at 6:44 PM Derrick Stolee  wrote:
>>
>> On 6/28/2019 5:42 PM, Junio C Hamano wrote:
>>> "Derrick Stolee via GitGitGadget"  writes:
>>>
>>> Use of "signed char" would be OK, but this is a singleton instance
>>> per repository, so I am not sure how much it matters to save a few
>>> words here by not using the most natural "int" type.
>>
>> I'll use 'int' in v2.
> 
> FWIW, this broke the build in (at least) Linux AArch64, unless the following
> is applied on top of ds/early-access

Thanks, Carlo, for reporting this. I'm glad we have someone running early
builds on platforms with these special cases!

I'm working on rerolling to use int.

-Stolee



Re: [PATCH] t5319: don't trip over a user name with whitespace

2019-07-01 Thread Derrick Stolee
On 7/1/2019 8:11 AM, Johannes Schindelin wrote:
> Hi Peff,
> 
> On Sun, 30 Jun 2019, Jeff King wrote:
> 
>> On Sun, Jun 30, 2019 at 10:59:34PM +0200, Johannes Sixt wrote:
>>
>>> Am 30.06.19 um 21:48 schrieb Eric Sunshine:
 On Sun, Jun 30, 2019 at 2:57 PM Johannes Sixt  wrote:
> diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
> @@ -443,7 +443,7 @@ test_expect_success 'repack with minimum size does 
> not alter existing packs' '
> -   MINSIZE=$(ls -l .git/objects/pack/*pack | awk "{print 
> \$5;}" | sort -n | head -n 1) &&
> +   MINSIZE=$(stat -c %s .git/objects/pack/*pack | sort -n | 
> head -n 1) &&

 Unfortunately, this is not portable. While "stat -c %s" works on Linux
 and MSYS2, neither that option nor the format directive are recognized
 on BSD-like platforms (I tested Mac OS and FreeBSD), which instead
 need "stat -f %z".
>>>
>>> Ouch! I did notice that stat(1) is not in POSIX, but hoped that it was
>>> sufficiently portable. I need a new idea...
>>
>> If we are OK relying on rudimentary perl[1], then:
>>
>>   perl -le "print((stat)[7]) for @ARGV"
>>
>> works. If you want it more readable, then maybe:
>>
>>   perl -MFile::stat -le "print stat(\$_)->size for @ARGV"
> 
> Or we stop introducing new Perl calls, and use the perfectly fine
> `test-tool path-utils file-size` command:
> 
> https://github.com/git/git/blob/v2.22.0/t/helper/test-path-utils.c#L302-L312
> 
> This solves not only portability problems but also avoids yet another
> obstacle into making a `NO_PERL` test suite run really work without Perl.
Thanks! This does seem like the best option. Thanks for bringing this to our
attention. Here is a diff, and I'll prepare a full patch:

diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index dd6083e61a2..5379e59168a 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -447,7 +447,7 @@ test_expect_success 'repack with minimum size does not 
alter existing packs' '
touch -m -t 201901010002 .git/objects/pack/pack-B* &&
touch -m -t 201901010003 .git/objects/pack/pack-A* &&
ls .git/objects/pack >expect &&
-   MINSIZE=$(ls -l .git/objects/pack/*pack | awk "{print \$5;}" | 
sort -n | head -n 1) &&
+   MINSIZE=$(test-tool path-utils file-size 
.git/objects/pack/*pack | sort -n | head -n 1) &&
git multi-pack-index repack --batch-size=$MINSIZE &&
ls .git/objects/pack >actual &&
test_cmp expect actual
@@ -459,7 +459,7 @@ test_expect_success 'repack creates a new pack' '
cd dup &&
ls .git/objects/pack/*idx >idx-list &&
test_line_count = 5 idx-list &&
-   THIRD_SMALLEST_SIZE=$(ls -l .git/objects/pack/*pack | awk 
"{print \$5;}" | sort -n | head -n 3 | tail -n 1) &&
+   THIRD_SMALLEST_SIZE=$(test-tool path-utils file-size 
.git/objects/pack/*pack | sort -n | head -n 3 | tail -n 1) &&
BATCH_SIZE=$(($THIRD_SMALLEST_SIZE + 1)) &&
git multi-pack-index repack --batch-size=$BATCH_SIZE &&
ls .git/objects/pack/*idx >idx-list &&

Thanks,
-Stolee


Re: [PATCH] check_everything_connected: assume alternate ref tips are valid

2019-07-01 Thread Derrick Stolee
On 6/29/2019 3:55 AM, Jeff King wrote:
> On Fri, Jun 28, 2019 at 09:22:56AM -0700, Junio C Hamano wrote:
> 
>>> argv_array_push(&rev_list.args, "--quiet");
>>> +   argv_array_push(&rev_list.args, "--alternate-refs");
>>> if (opt->progress)
>>> argv_array_pushf(&rev_list.args, "--progress=%s",
>>>  _("Checking connectivity"));
>>
>> Quite honestly, I am very surprised that we did not do this.  The
>> idea of alternate object store, as well as reducing transfer cost by
>> advertising their tips as '.have' phony refs, is almost as old as
>> the pack protocol itself.
> 
> Yeah, as you note we are already telling the other side of the push
> "hey, we already have these objects". So we are almost always just
> walking over our own local objects in the connectivity check, which is
> silly.
> 
> I only did "clone --reference" in the perf test because it was the
> simplest, but a push to a server with alternates should be similarly
> improved. E.g., doing this in a clone of linux.git:
> 
>   git init --bare dst.git
>   echo '../../.git/objects' >dst.git/objects/info/alternates
>   time git push dst.git HEAD
> 
> goes from 40+ seconds to 100ms or so. Again, obviously that's the best
> case, but it should also improve the normal case of somebody pulling
> down "torvalds/linux.git" and pushing it back up to their own
> "peff/linux.git", too.
> 
> I don't have real-world numbers yet from GitHub, because we're not
> actually advertising .haves on push right now. All of the Git pieces are
> now in places to do so, but we still have to make some tweaks at our
> replication layer. But soon. :)

Exciting! Should improve the user's experience keeping their forks
updated!

-Stolee


Re: [PATCH] check_everything_connected: assume alternate ref tips are valid

2019-07-01 Thread Derrick Stolee
On 6/29/2019 3:43 AM, Jeff King wrote:
> On Fri, Jun 28, 2019 at 08:51:04AM -0400, Derrick Stolee wrote:
> 
>> On 6/28/2019 6:11 AM, Jeff King wrote:
>>> When we receive a remote ref update to sha1 "X", we want to check that
>>> we have all of the objects needed by "X". We can assume that our
>>> repository is not currently corrupted, and therefore if we have a ref
>>> pointing at "Y", we have all of its objects. So we can stop our
>>> traversal from "X" as soon as we hit "Y".
>>>
>>> If we make the same non-corruption assumption about any repositories we
>>> use to store alternates, then we can also use their ref tips to shorten
>>> the traversal.
>>
>> I was confused by this paragraph, because I didn't know about
>> for_each_alternate_ref() and how refs_From_alternate_cb() will
>> strip the "/objects" and append "/refs" to check refs if they
>> exist. All of that logic is in transport.c but used by
>> fetch-pack.c and builtin/receive-pack.c. But now we are adding
>> to revision.c, so the restriction to "this helps data transfer"
>> is getting murkier.
> 
> Using it for data transfer is still the main thing for our internal
> calls, but I think it's worth exposing it for general use via rev-list.
> I imagine it would mostly be for poking around and debugging, but it
> should allow things like:
> 
>   # what do we have that our alternate does not
>   git rev-list --all --not --alternate-refs

So this is an example where the alternate refs are being used without
any network activity.

>> Is this something that should be extracted to the object-store
>> layer? Or is it so tricky to use that we shouldn't make it too
>> easy to fall into a bad pattern?
> 
> I'm not sure what you have in mind, exactly. If you are asking whether
> there are more places that alternate refs could be used, I can't think
> of any. If you are asking whether this is in the wrong place, no, I
> think it's the right place. :)

Just double-checking that it is appropriate for revision.c to take
dependence on transport.h instead of moving the alternate ref stuff
into a different header file. I trust your opinion.

-Stolee


Re: [PATCH v5 02/11] commit-graph: return with errors during write

2019-07-01 Thread Derrick Stolee
On 6/29/2019 1:23 PM, SZEDER Gábor wrote:
> On Wed, Jun 12, 2019 at 06:29:37AM -0700, Derrick Stolee via GitGitGadget 
> wrote:
>> diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
>> index e80c1cac02..3b6fd0d728 100755
>> --- a/t/t5318-commit-graph.sh
>> +++ b/t/t5318-commit-graph.sh
>> @@ -23,6 +23,14 @@ test_expect_success 'write graph with no packs' '
>>  test_path_is_file info/commit-graph
>>  '
>>  
>> +test_expect_success 'close with correct error on bad input' '
>> +cd "$TRASH_DIRECTORY/full" &&
>> +echo doesnotexist >in &&
>> +{ git commit-graph write --stdin-packs stderr; ret=$?; } &&
>> +test "$ret" = 1 &&
> 
> This could be: 
> 
>   test_expect_code 1 git commit-graph write --stdin-packs stderr
> 
> 
>> +test_i18ngrep "error adding pack" stderr
>> +'

Thanks!, you are right! test_expect_code is what I should have used here
instead of finding the "ret=$?" trick in t0005-signals.sh, which needs to
do more interesting logic on the return code.

Here is your suggestion as a diff. Junio: could you squash this in, or
should I submit a full patch?

diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index 22cb9d66430..4391007f4c1 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -26,8 +26,7 @@ test_expect_success 'write graph with no packs' '
 test_expect_success 'close with correct error on bad input' '
cd "$TRASH_DIRECTORY/full" &&
echo doesnotexist >in &&
-   { git commit-graph write --stdin-packs stderr; ret=$?; } &&
-   test "$ret" = 1 &&
+   test_expect_code 1 git commit-graph write --stdin-packs stderr &&
test_i18ngrep "error adding pack" stderr
 '

I took inventory of when we are using "=$?" in the test scripts and saw
this was the only one that could easily be removed. Every other place is
doing something that can't be replaced by test_expect_code.

Thanks,
-Stolee


Re: [PATCH] t5319: don't trip over a user name with whitespace

2019-07-01 Thread Derrick Stolee
Sorry I'm late to the thread. Thanks for reporting this issue, Johannes.

On 7/1/2019 7:33 AM, SZEDER Gábor wrote:
> On Mon, Jul 01, 2019 at 05:16:02AM -0400, Jeff King wrote:
>> I see Gábor suggested using "wc -c" elsewhere in the thread. That would
>> be fine with me, too, though I think the required sed there may be
>> getting pretty unreadable, too. :)
> 
> It could be done even without 'sed', though at the expense of running
> a coupe more 'wc -c's in a loop:>
> diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
> index 79bfaeafa9..bacec5e2e4 100755
> --- a/t/t5319-multi-pack-index.sh
> +++ b/t/t5319-multi-pack-index.sh
> @@ -443,7 +443,12 @@ test_expect_success 'repack with minimum size does not 
> alter existing packs' '
>   touch -m -t 201901010002 .git/objects/pack/pack-B* &&
>   touch -m -t 201901010003 .git/objects/pack/pack-A* &&
>   ls .git/objects/pack >expect &&
> - MINSIZE=$(ls -l .git/objects/pack/*pack | awk "{print \$5;}" | 
> sort -n | head -n 1) &&
> + MINSIZE=$(
> + for pack in .git/objects/pack/*pack
> + do
> + wc -c <"$pack"
> + done | sort -n | head -n 1
> + ) &&
>   git multi-pack-index repack --batch-size=$MINSIZE &&
>   ls .git/objects/pack >actual &&
>   test_cmp expect actual
> @@ -455,7 +460,12 @@ test_expect_success 'repack creates a new pack' '
>   cd dup &&
>   ls .git/objects/pack/*idx >idx-list &&
>   test_line_count = 5 idx-list &&
> - THIRD_SMALLEST_SIZE=$(ls -l .git/objects/pack/*pack | awk 
> "{print \$5;}" | sort -n | head -n 3 | tail -n 1) &&
> + THIRD_SMALLEST_SIZE=$(
> + for pack in .git/objects/pack/*pack
> + do
> + wc -c <"$pack"
> + done | sort -n | head -n 3 | tail -n 1
> + ) &&
>   BATCH_SIZE=$(($THIRD_SMALLEST_SIZE + 1)) &&
>   git multi-pack-index repack --batch-size=$BATCH_SIZE &&
>   ls .git/objects/pack/*idx >idx-list &&
> 
> Is it really better?  Dunno, but at least there is no subtlety with
> the leading padding spaces.

Your change is certainly more straight-forward, and avoids all issues
around Perl compatibility.

It does have the issue of more 'wc' processes, which is a downside. The
count here is not too bad, but if we need to duplicate this pattern elsewhere
we may be better off creating a stat(1) replacement in test-lib.sh.

Thanks,
-Stolee


Re: [PATCH v2 1/3] repo-settings: create core.featureAdoptionRate setting

2019-06-28 Thread Derrick Stolee
On 6/28/2019 5:42 PM, Junio C Hamano wrote:
> "Derrick Stolee via GitGitGadget"  writes:
>
>> +struct repo_settings {
>> +char core_commit_graph;
>> +char gc_write_commit_graph;
>> +};
> 
> I do not see a particular reason to favor type "char" here. "char"
> is wider than e.g. "signed int :2", if you wanted to save space
> compared to a more obvious and naive type, e.g. "int".  More
> importantly, the language does not guarantee signedness of "char",
> so the sentinel value of "-1" is a bit tricky to use, as you have to
> be prepared to see your code used on an "unsigned char" platform.

I was unaware that platforms could change the signedness of "char".
Thanks for guarding against it.

You're right that it probably isn't worth saving space here, as these
values are replacing existing globals somewhere anyway. If we start
worrying about these being present for each of thousands of submodules
then we probably have bigger problems.

> Use of "signed char" would be OK, but this is a singleton instance
> per repository, so I am not sure how much it matters to save a few
> words here by not using the most natural "int" type.

I'll use 'int' in v2.

Thanks,
-Stolee


Re: Git Test Coverage Report (Thurs. June 27)

2019-06-28 Thread Derrick Stolee
On 6/28/2019 7:59 PM, Jeff King wrote:
> On Fri, Jun 28, 2019 at 08:23:49AM -0400, Derrick Stolee wrote:
> 
>> On 6/28/2019 2:45 AM, Jeff King wrote:
>>> On Thu, Jun 27, 2019 at 01:35:17PM -0400, Derrick Stolee wrote:
>>>
>>>>> t/helper/test-example-decorate.c
>>>>> 0ebbcf70 29) one = lookup_unknown_object(&one_oid);
>>>>> 0ebbcf70 30) two = lookup_unknown_object(&two_oid);
>>>>> 0ebbcf70 59) three = lookup_unknown_object(&three_oid);
>>>>
>>>> Peff: again interesting that these lines you refactored were not covered, 
>>>> especially
>>>> because they are part of a test helper. Perhaps the tests they were 
>>>> intended for are
>>>> now defunct?
>>>
>>> They should be run by t9004 (and if I replace them with a `die`, they
>>> clearly are). Are you sure your coverage script is not mistaken?
>>
>> It looks like I'm missing the 9000+ tests. The following line was in the 
>> script
>> I adapted from another CI job:
>>
>>  rm -f t/t9*.sh
>>
>> This was probably because the job I adapted from needed to run quickly, but 
>> for
>> this coverage report we should do the hard work of running whatever t9*.sh 
>> tests
>> we can.
> 
> I suspect most of those _are_ low-value. The git-p4 tests, for instance,
> are mostly exercising the p4 script and not our C code, and the same
> with git-svn. However I wouldn't be surprised if there are a few dusty
> corners they manage to hit that aren't covered elsewhere.
> 
> Still, if it's not too painful to add them in time-wise, it probably
> makes sense for the coverage tests to be as exhaustive as possible.

Unfortunately, even running the t9*.sh tests once (among the two runs:
first with default options and then with several GIT_TEST_* options)
causes the build to go beyond the three hour limit, and the builds time
out.

I'll just need to keep this in mind and do some more diligence myself
to check if things are covered in the 9000 tests before bugging people
about coverage.

Thanks,
-Stolee
 


Re: [PATCH v2 1/3] repo-settings: create core.featureAdoptionRate setting

2019-06-28 Thread Derrick Stolee
On 6/28/2019 4:50 PM, Junio C Hamano wrote:
> "Derrick Stolee via GitGitGadget"  writes:
> 
>> +core.featureAdoptionRate::
>> +Set an integer value on a scale from 0 to 10 describing your
>> +desire to adopt new performance features. Defaults to 0. As
>> +the value increases, features are enabled by changing the
>> +default values of other config settings. If a config variable
>> +is specified explicitly, the explicit value will override these
>> +defaults:
>> ++
>> +If the value is at least 3, then the following defaults are modified.
>> +These represent relatively new features that have existed for multiple
>> +major releases, and present significant performance benefits. They do
>> +not modify the user-facing output of porcelain commands.
>> ++
>> +* `core.commitGraph=true` enables reading commit-graph files.
>> ++
>> +* `gc.writeCommitGraph=true` eneables writing commit-graph files during
>> +`git gc`.
> 
> I was re-reading the whole series, and found that the phrase
> "present significant benefits" was somewhat overselling.  Wouldn't
> that claim largely depend on the end-user's workflow?  The same
> comment applies to the description of "at least 5" level, too.
> 
> I would not mind if we say "enabling this may present performance
> benefits", with or without "significant" before "performance
> benefits", and with or without ", depending how your repository is
> used" at the end.

Thanks for taking such a close look. Indeed, it is not appropriate
to over-sell here. I will take another stab at this documentation
next week.

-Stolee


Re: [PATCH] check_everything_connected: assume alternate ref tips are valid

2019-06-28 Thread Derrick Stolee
On 6/28/2019 6:11 AM, Jeff King wrote:
> When we receive a remote ref update to sha1 "X", we want to check that
> we have all of the objects needed by "X". We can assume that our
> repository is not currently corrupted, and therefore if we have a ref
> pointing at "Y", we have all of its objects. So we can stop our
> traversal from "X" as soon as we hit "Y".
> 
> If we make the same non-corruption assumption about any repositories we
> use to store alternates, then we can also use their ref tips to shorten
> the traversal.

I was confused by this paragraph, because I didn't know about
for_each_alternate_ref() and how refs_From_alternate_cb() will
strip the "/objects" and append "/refs" to check refs if they
exist. All of that logic is in transport.c but used by
fetch-pack.c and builtin/receive-pack.c. But now we are adding
to revision.c, so the restriction to "this helps data transfer"
is getting murkier.

Is this something that should be extracted to the object-store
layer? Or is it so tricky to use that we shouldn't make it too
easy to fall into a bad pattern?

> This is especially useful when cloning with "--reference", as we
> otherwise do not have any local refs to check against, and have to
> traverse the whole history, even though the other side may have sent us
> few or no objects. Here are results for the included perf test (which
> shows off more or less the maximal savings, getting one new commit and
> sharing the whole history):
> 
> TestHEAD^ HEAD
> 
> [on git.git]
> 5600.3: clone --reference   2.94(2.86+0.08)   0.09(0.08+0.01) -96.9%
> [on linux.git]
> 5600.3: clone --reference   45.74(45.34+0.41)   0.36(0.30+0.08) -99.2%

It's really hard to argue with numbers like these. Kudos!

> 
> Signed-off-by: Jeff King 
> ---
>  Documentation/rev-list-options.txt |  8 
>  connected.c|  1 +
>  revision.c | 30 +++
>  t/perf/p5600-clone-reference.sh| 27 ++
>  t/t5618-alternate-refs.sh  | 60 ++

Other than the high-level questions above, the code and tests look
good to me.

Thanks,
-Stolee 



Re: Git Test Coverage Report (Thurs. June 27)

2019-06-28 Thread Derrick Stolee
On 6/28/2019 5:47 AM, Duy Nguyen wrote:
> On Fri, Jun 28, 2019 at 12:35 AM Derrick Stolee  wrote:
>>> dir.c
>>> 3b2385cf 2840) static void jw_object_untracked_cache_dir(struct json_writer 
>>> *jw,
>>> 3b2385cf 2845) jw_object_bool(jw, "valid", ucd->valid);
>>> 3b2385cf 2846) jw_object_bool(jw, "check-only", ucd->check_only);
>>> 3b2385cf 2847) jw_object_stat_data(jw, "stat", &ucd->stat_data);
>>> 3b2385cf 2848) jw_object_string(jw, "exclude-oid", 
>>> oid_to_hex(&ucd->exclude_oid));
>>> 3b2385cf 2849) jw_object_inline_begin_array(jw, "untracked");
>>> 3b2385cf 2850) for (i = 0; i < ucd->untracked_nr; i++)
>>> 3b2385cf 2851) jw_array_string(jw, ucd->untracked[i]);
>>> 3b2385cf 2852) jw_end(jw);
>>> 3b2385cf 2854) jw_object_inline_begin_object(jw, "dirs");
>>> 3b2385cf 2855) for (i = 0; i < ucd->dirs_nr; i++) {
>>> 3b2385cf 2856) jw_object_inline_begin_object(jw, ucd->dirs[i]->name);
>>> 3b2385cf 2857) jw_object_untracked_cache_dir(jw, ucd->dirs[i]);
>>> 3b2385cf 2858) jw_end(jw);
>>> 3b2385cf 2860) jw_end(jw);
>>> 3b2385cf 2861) }
>>> 3b2385cf 2958) jw_object_inline_begin_object(jw, "root");
>>> 3b2385cf 2959) jw_object_untracked_cache_dir(jw, uc->root);
>>> 3b2385cf 2960) jw_end(jw);
>>
>> Duy: I know you were working on some tests for these options. This is 
>> specifically
>> in the "untracked cache" mode, so enabling the cache with at least one entry 
>> and
>> running --debug-json should be sufficient.
> 
> It's a bit more complicated than that, but I see your point.

It usually is. I don't mean to underestimate the effort here.

> I initially looked at the output and saw "something" and moved on. I
> should have examined the json output more carefully.

Thanks for taking a second look!

>>> read-cache.c
>>> 8eeabe15 1752) ret = error(_("index uses %.4s extension, which we do not 
>>> understand"),
>>> ee70c128 1754) if (advice_unknown_index_extension) {
>>> ee70c128 1755) warning(_("ignoring optional %.4s index extension"), ext);
>>> ee70c128 1756) advise(_("This is likely due to the file having been written 
>>> by a newer\n"
>>> 272b3f2a 2026) jw_object_true(jw, "assume_unchanged");
>>> 272b3f2a 2030) jw_object_true(jw, "skip_worktree");
>>> 272b3f2a 2032) jw_object_intmax(jw, "stage", ce_stage(ce));
>>> f0f544da 2309) ieot = read_ieot_extension(istate, mmap, mmap_size, 
>>> extension_offset);
>>> f0f544da 3651) static struct index_entry_offset_table *read_ieot_extension(
>>> f0f544da 3673) return do_read_ieot_extension(istate, index, extsize);
>>
>> Duy: more JSON output cases that could be interesting to cover.
> 
> This is because I changed the function signature, I think. Both IEOT
> and EOIE extensions, if I'm not mistaken, are never tested in the test
> suite. You need to set GIT_TEST_INDEX_THREADS, then the last three
> lines should be covered.

Thanks! Unfortunately, the threading is removed at compile-time in
order to prevent race conditions with the gcov output. This means
the report will never report the threading code as covered. :(

Does that same reasoning apply to the assume_unchanged, skip_worktree,
and "stage" lines?

-Stolee



Re: [PATCH 1/6] test-lib: introduce test_commit_bulk

2019-06-28 Thread Derrick Stolee
On 6/28/2019 5:39 AM, Jeff King wrote:
> Some tests need to create a string of commits. Doing this with
> test_commit is very heavy-weight, as it needs at least one process per
> commit (and in fact, uses several).
> 
> For bulk creation, we can do much better by using fast-import, but it's
> often a pain to generate the input. Let's provide a helper to do so.

What a quick turnaround! I'm happy to nerd-snipe you here, and am glad
the result was so positive.

> We'll use t5310 as a guinea pig, as it has three 10-commit loops. Here
> are hyperfine results before and after:
> 
>   [before]
>   Benchmark #1: ./t5310-pack-bitmaps.sh --root=/var/ram/git-tests
> Time (mean ± σ):  2.846 s ±  0.305 s[User: 3.042 s, System: 0.919 
> s]
> Range (min … max):2.250 s …  3.210 s10 runs
> 
>   [after]
>   Benchmark #1: ./t5310-pack-bitmaps.sh --root=/var/ram/git-tests
> Time (mean ± σ):  2.210 s ±  0.174 s[User: 2.570 s, System: 0.604 
> s]
> Range (min … max):1.999 s …  2.590 s10 runs

I ran these tests on my Windows machine, where the process startup time is
a higher cost. The improvement is noticeable from just watching the test lines
pause on the steps creating the commits.

 Before: 30.8-31.2s
  After: 23.5-23.8s

> So we're over 20% faster, while making the callers slightly shorter.

I see about the same relative measurement (~23%). The callers are a bit
cleaner, which is good. They are also slightly less clear of what's
happening, but that's the cost of abstraction. Definitely worth it in
this case!
 
> +# Similar to test_commit, but efficiently create  commits, each with a
> +# unique number $n (from 1 to  by default) in the commit message.
> +#
> +# Usage: test_commit_bulk [options] 
> +#   -C :
> +#Run all git commands in directory 
> +#   --ref=:
> +#ref on which to create commits (default: HEAD)
> +#   --start=:
> +#number commit messages from  (default: 1)
> +#   --message=:
> +#use  as the commit mesasge (default: "commit $n")
> +#   --filename=:
> +#modify  in each commit (default: $n.t)
> +#   --contents=:
> +#place  in each file (default: "content $n")
> +#   --id=:
> +#shorthand to use  and $n in message, filename, and contents
> +#
> +# The message, filename, and contents strings are evaluated by the shell 
> inside
> +# double-quotes, with $n set to the current commit number. So you can do:
> +#
> +#   test_commit_bulk --filename=file --contents='modification $n'
> +#
> +# to have every commit touch the same file, but with unique content. Spaces 
> are
> +# OK, but you must escape any metacharacters (like backslashes or
> +# double-quotes) you do not want expanded.
> +#

I appreciate all the documentation here!

> + while test "$total" -gt 0
> + do
> + echo "commit $ref" &&
> + printf 'author %s <%s> %s\n' \
> + "$GIT_AUTHOR_NAME" \
> + "$GIT_AUTHOR_EMAIL" \
> + "$cur_time -0700" &&
> + printf 'committer %s <%s> %s\n' \
> + "$GIT_COMMITTER_NAME" \
> + "$GIT_COMMITTER_EMAIL" \
> + "$cur_time -0700" &&
> + echo "data < + eval "echo \"$message\"" &&
> + echo "EOF" &&
> + eval "echo \"M 644 inline $filename\"" &&
> + echo "data < + eval "echo \"$contents\"" &&
> + echo "EOF" &&
> + echo &&
> + n=$((n + 1)) &&
> + cur_time=$((cur_time + 1)) &&
> + total=$((total - 1)) ||
> + echo "poison fast-import stream"
> + done

I am not very good at the nitty-gritty details of our scripts, but
looking at this I wonder if there is a cleaner and possibly faster
way to do this loop. The top thing on my mind are the 'eval "echo X"'
lines. If they start processes, then we can improve the performance.
If not, then it may not be worth it.

In wonder if instead we could create some format string outside the
loop and then pass the values that change between iterations into
that format string.

Thanks,
-Stolee



Re: Git Test Coverage Report (Thurs. June 27)

2019-06-28 Thread Derrick Stolee
On 6/28/2019 2:45 AM, Jeff King wrote:
> On Thu, Jun 27, 2019 at 01:35:17PM -0400, Derrick Stolee wrote:
> 
>>> t/helper/test-example-decorate.c
>>> 0ebbcf70 29) one = lookup_unknown_object(&one_oid);
>>> 0ebbcf70 30) two = lookup_unknown_object(&two_oid);
>>> 0ebbcf70 59) three = lookup_unknown_object(&three_oid);
>>
>> Peff: again interesting that these lines you refactored were not covered, 
>> especially
>> because they are part of a test helper. Perhaps the tests they were intended 
>> for are
>> now defunct?
> 
> They should be run by t9004 (and if I replace them with a `die`, they
> clearly are). Are you sure your coverage script is not mistaken?

It looks like I'm missing the 9000+ tests. The following line was in the script
I adapted from another CI job:

rm -f t/t9*.sh

This was probably because the job I adapted from needed to run quickly, but for
this coverage report we should do the hard work of running whatever t9*.sh tests
we can.

Sorry for the noise here, and thanks for checking!

-Stolee


Re: Git Test Coverage Report (Thurs. June 27)

2019-06-27 Thread Derrick Stolee
Here are some interesting sections I found when examining the test coverage
report. I am only highlighting these sections because they seem to include
non-trivial logic. In some cases, maybe the code isn't needed.

On 6/27/2019 1:05 PM, Derrick Stolee wrote:
> promisor-remote.c
> db27dca5 25) die(_("Remote with no URL"));
> 48de3158 61) warning(_("promisor remote name cannot begin with '/': %s"),
> 48de3158 63) return NULL;
> faf2abf4 93) previous->next = r->next;
> 4ca9474e 108) return git_config_string(&core_partial_clone_filter_default,
> fa3d1b63 139) return 0;
> 9e27beaa 202) static int remove_fetched_oids(struct repository *repo,
> 9e27beaa 206) int i, remaining_nr = 0;
> 9e27beaa 207) int *remaining = xcalloc(oid_nr, sizeof(*remaining));
> 9e27beaa 208) struct object_id *old_oids = *oids;
> 9e27beaa 211) for (i = 0; i < oid_nr; i++)
> 9e27beaa 212) if (oid_object_info_extended(repo, &old_oids[i], NULL,
> 9e27beaa 214) remaining[i] = 1;
> 9e27beaa 215) remaining_nr++;
> 9e27beaa 218) if (remaining_nr) {
> 9e27beaa 219) int j = 0;
> 9e27beaa 220) new_oids = xcalloc(remaining_nr, sizeof(*new_oids));
> 9e27beaa 221) for (i = 0; i < oid_nr; i++)
> 9e27beaa 222) if (remaining[i])
> 9e27beaa 223) oidcpy(&new_oids[j++], &old_oids[i]);
> 9e27beaa 224) *oids = new_oids;
> 9e27beaa 225) if (to_free)
> 9e27beaa 226) free(old_oids);
> 9e27beaa 229) free(remaining);
> 9e27beaa 231) return remaining_nr;
> 9e27beaa 248) if (remaining_nr == 1)
> 9e27beaa 249) continue;
> 9e27beaa 250) remaining_nr = remove_fetched_oids(repo, &remaining_oids,
> 9e27beaa 252) if (remaining_nr) {
> 9e27beaa 253) to_free = 1;
> 9e27beaa 254) continue;
> 9e27beaa 262) free(remaining_oids);

Christian: this section continues to be untested, but I think you were
working on creating tests for this.

> repo-settings.c
> 0a01e977 13) int rate = git_config_int(key, value);
> 0a01e977 14) if (rate >= 3) {
> 0a01e977 15) UPDATE_DEFAULT(rs->core_commit_graph, 1);
> 0a01e977 16) UPDATE_DEFAULT(rs->gc_write_commit_graph, 1);
> c5c84f32 17) UPDATE_DEFAULT(rs->index_version, 4);
> 3172404b 19) if (rate >= 5) {
> 3172404b 20) UPDATE_DEFAULT(rs->pack_use_sparse, 1);
> 0a01e977 22) return 0;

These are mine. Since no one has been complaining about the design
of core.featureAdoptionRate in ds/early-access [1], I'll move forward
to add some tests for this setting. It may come in the form of a
GIT_TEST_ADOPTION_RATE environment variable so it has wider coverage
across the test suite. I may even add explicit tests that demonstrate
the new defaults enabled by core.featureAdoptionRate are overridden by
explicit config settings. index.version is a good candidate.

[1] https://public-inbox.org/git/pull.254.v2.git.gitgitgad...@gmail.com/

> t/helper/test-match-trees.c
> 3fe87a7f 23) shift_tree(the_repository, &one->object.oid, &two->object.oid, 
> &shifted, -1);

Duy: here is another example of a conversion to "struct repository *" but
since it is using the_repository here it is definitely safe (no worse than 
before).

> builtin/fetch.c
> cdbd70c4 88) fetch_show_forced_updates = git_config_bool(k, v);
> cdbd70c4 89) return 0;

Mine again. I should explicitly test the fetch.showForcedUpdates config option,
especially against the '--no-show-forced-updates' argument (to show the argument
wins).

> builtin/pull.c
> 3883c551 556) argv_array_push(&args, "--show-forced-updates");
> 3883c551 558) argv_array_push(&args, "--no-show-forced-updates");

Not sure if this is super-important to test. It's a simple argument 
pass-through.
But also maybe I change my 'git fetch' tests to be 'git pull' tests and it 
covers
the 'git fetch' builtin at the same time.

> dir.c
> 3b2385cf 2840) static void jw_object_untracked_cache_dir(struct json_writer 
> *jw,
> 3b2385cf 2845) jw_object_bool(jw, "valid", ucd->valid);
> 3b2385cf 2846) jw_object_bool(jw, "check-only", ucd->check_only);
> 3b2385cf 2847) jw_object_stat_data(jw, "stat", &ucd->stat_data);
> 3b2385cf 2848) jw_object_string(jw, "exclude-oid", 
> oid_to_hex(&ucd->exclude_oid));
> 3b2385cf 2849) jw_object_inline_begin_array(jw, "untracked");
> 3b2385cf 2850) for (i = 0; i < ucd->untracked_nr; i++)
> 3b2385cf 2851) jw_array_string(jw, ucd->untracked[i]);
> 3b2385cf 2852) jw_end(jw);
> 3b2385cf 2854) jw_object_inline_begin_object(jw, "dirs");
> 3b2385cf 2855) for (i = 0; i < ucd->dirs_nr; i++) {
> 3b2385cf 2856) jw_object_inline_begin_object(jw, ucd->dirs[i]->name);
> 3b2385cf 2857) jw_object_untracked_cache_dir(jw, ucd->dirs[i]);
> 

Re: [PATCH 0/6] Kill the_repository in tree-walk.c

2019-06-27 Thread Derrick Stolee
On 6/27/2019 9:04 AM, Johannes Schindelin wrote:
> Given that this bug was only caught by a failing CI build, it does make me
> wonder what other bugs are hidden and would slip into our code base just
> because of gaps in the code coverage.

Here are the lines introduced by this series that are not covered by the
test suite. I'm not asking you to write tests to cover these lines, but
please re-examine the lines to be sure the correct conversion was made.

Thanks,
-Stolee

> Uncovered code in 'pu' not in 'jch'
> 
> 
> archive.c
> 47f956bd 421) err = get_tree_entry(ar_args->repo,
> 47f956bd 422)  &tree->object.oid,
> 
> fast-import.c
> 35d7cdbe 2565) char *buf = read_object_with_reference(the_repository,
> 35d7cdbe 2566)&n->oid,
> 
> match-trees.c
> 3fe87a7f 294) if (get_tree_entry(r, hash2, del_prefix, shifted, &mode))
> 
> t/helper/test-match-trees.c
> 3fe87a7f 23) shift_tree(the_repository, &one->object.oid, &two->object.oid, 
> &shifted, -1);
>
> Nguyễn Thái Ngọc Duy  35d7cdbe sha1-file.c: remove the_repo from 
> read_object_with_reference()
> Nguyễn Thái Ngọc Duy  3fe87a7f match-trees.c: remove the_repo from 
> shift_tree*()
> Nguyễn Thái Ngọc Duy  47f956bd tree-walk.c: remove the_repo from 
> get_tree_entry()



Git Test Coverage Report (Thurs. June 27)

2019-06-27 Thread Derrick Stolee
14) remaining[i] = 1;
9e27beaa 215) remaining_nr++;
9e27beaa 218) if (remaining_nr) {
9e27beaa 219) int j = 0;
9e27beaa 220) new_oids = xcalloc(remaining_nr, sizeof(*new_oids));
9e27beaa 221) for (i = 0; i < oid_nr; i++)
9e27beaa 222) if (remaining[i])
9e27beaa 223) oidcpy(&new_oids[j++], &old_oids[i]);
9e27beaa 224) *oids = new_oids;
9e27beaa 225) if (to_free)
9e27beaa 226) free(old_oids);
9e27beaa 229) free(remaining);
9e27beaa 231) return remaining_nr;
9e27beaa 248) if (remaining_nr == 1)
9e27beaa 249) continue;
9e27beaa 250) remaining_nr = remove_fetched_oids(repo, &remaining_oids,
9e27beaa 252) if (remaining_nr) {
9e27beaa 253) to_free = 1;
9e27beaa 254) continue;
9e27beaa 262) free(remaining_oids);

protocol.c

remote-curl.c

repo-settings.c
0a01e977 13) int rate = git_config_int(key, value);
0a01e977 14) if (rate >= 3) {
0a01e977 15) UPDATE_DEFAULT(rs->core_commit_graph, 1);
0a01e977 16) UPDATE_DEFAULT(rs->gc_write_commit_graph, 1);
c5c84f32 17) UPDATE_DEFAULT(rs->index_version, 4);
3172404b 19) if (rate >= 5) {
3172404b 20) UPDATE_DEFAULT(rs->pack_use_sparse, 1);
0a01e977 22) return 0;

strbuf.c
fb819691 818) strbuf_addf(buf, "%u.%2.2u ",
fb819691 822) strbuf_addstr(buf, _("GiB"));

t/helper/test-dir-iterator.c
655af733 24) die("invalid option '%s'", *argv);
655af733 28) die("dir-iterator needs exactly one non-option argument");
9bd70db7 46) printf("[?] ");

t/helper/test-match-trees.c
3fe87a7f 23) shift_tree(the_repository, &one->object.oid, &two->object.oid, 
&shifted, -1);

upload-pack.c
a8d662e3 130) return readsz;
820a5361 149) BUG("packfile_uris requires sideband-all");
9b93d269 221) sq_quote_buf(&buf, spec);
a8d662e3 354) send_client_data(1, output_state.buffer, output_state.used);
820a5361 1387) string_list_clear(&data->uri_protocols, 0);

wrapper.c

Commits introducting uncovered code:
Ævar Arnfjörð Bjarmason b4f207f3 env--helper: new undocumented builtin wrapping 
git_env_*()
Ævar Arnfjörð Bjarmason 2e43cd4c config.c: refactor die_bad_number() to not 
call gettext() early
Christian Couderfa3d1b63 promisor-remote: parse 
remote.*.partialclonefilter
Christian Couderfaf2abf4 promisor-remote: use 
repository_format_partial_clone
Christian Couder4ca9474e Move core_partial_clone_filter_default to 
promisor-remote.c
Christian Couder    db27dca5 Remove fetch-object.{c,h} in favor of 
promisor-remote.{c,h}
Christian Couder48de3158 Add initial support for many promisor remotes
Christian Couder9e27beaa promisor-remote: implement 
promisor_remote_get_direct()
Christian Couderb14ed5ad Use promisor_remote_get_direct() and 
has_promisor_remote()
Daniel Ferreira 9bd70db7 dir-iterator: add tests for dir-iterator API
Denton Liu  f39a9c65 remote: add --save-to-push option to git remote set-url
Derrick Stolee  0a01e977 repo-settings: create core.featureAdoptionRate setting
Derrick Stolee  c5c84f32 repo-settings: use index.version=4 by default
Derrick Stolee  3172404b repo-settings: pack.useSparse=true
Dimitriy Ryazantcev fb819691 l10n: localizable upload progress messages
Jonathan Tan820a5361 upload-pack: send part of packfile response as uri
Jonathan Tana8d662e3 upload-pack: refactor reading of pack-objects out
Junio C Hamano  3d908bb8 Merge branch 'jt/fetch-cdn-offload' into pu
Matheus Tavares 655af733 dir-iterator: add flags parameter to dir_iterator_begin
Matheus Tavares 50e85c40 dir-iterator: refactor state machine model
Matheus Tavares d95432d7 clone: use dir-iterator to avoid explicit dir traversal
Matheus Tavares fbb4a33c clone: extract function from copy_or_link_directory
Matheus Tavares c40f077a dir-iterator: use warning_errno when possible
Matthew DeVore  aa36553a list-objects-filter: make API easier to use
Matthew DeVore  1e43301f list-objects-filter: implement composite filters
Matthew DeVore  d3d10e56 list-objects-filter-options: move error check up
Matthew DeVore  9b93d269 list-objects-filter-options: make filter_spec a 
string_list
Nguyễn Thái Ngọc Duy35d7cdbe sha1-file.c: remove the_repo from 
read_object_with_reference()
Nguyễn Thái Ngọc Duy3fe87a7f match-trees.c: remove the_repo from 
shift_tree*()
Nguyễn Thái Ngọc Duy47f956bd tree-walk.c: remove the_repo from 
get_tree_entry()


Uncovered code in 'jch' not in 'next'


blame.c
1fc73384 990) return;
a07a9776 1599) continue;
ae3f36de 2417) continue;

builtin/blame.c

builtin/checkout.c
d16dc428 1345) warning(_("you are switching branch while bisecting"));
3ec37ad1 1370) die(_("'%s' cannot be used with '%s'"), "--discard-changes", 
"--merge");
c9c935f6 1508) BUG("make up your mind, you need to take _something_");
183fb44f 1540) opts->checkout_index = 0;
183fb44f 1550) BUG("these 

Re: [PATCH v2 05/10] split-index.c: dump "link" extension as json

2019-06-27 Thread Derrick Stolee
On 6/27/2019 9:24 AM, Jeff Hostetler wrote:
> On 6/27/2019 6:48 AM, Duy Nguyen wrote:
>> On Tue, Jun 25, 2019 at 7:40 PM Derrick Stolee  wrote:
>>>
>>> On 6/25/2019 6:29 AM, Duy Nguyen wrote:
>>>> On Tue, Jun 25, 2019 at 3:06 AM Jeff Hostetler  
>>>> wrote:
>>>>> I'm curious how big these EWAHs will be in practice and
>>>>> how useful an array of integers will be (especially as the
>>>>> pretty format will be one integer per line).  Perhaps it
>>>>> would helpful to have an extended example in one of the
>>>>> tests.
>>>>
>>>> It's one integer per updated entry. So if you have a giant index and
>>>> updated every single one of them, the EWAH bitmap contains that many
>>>> integers.
>>>>
>>>> If it was easy to just merge these bitmaps back to the entry (e.g. in
>>>> this example, add "replaced": true to entry zero) I would have done
>>>> it. But we dump as we stream and it's already too late to do it.
>>>>
>>>>> Would it be better to have the caller of ewah_each_bit()
>>>>> build a hex or bit string in a strbuf and then write it
>>>>> as a single string?
>>>>
>>>> I don't think the current EWAH representation is easy to read in the
>>>> first place. You'll probably have to run through some script to update
>>>> the main entries part and will have a much better view, but that's
>>>> pretty quick. If it's for scripts, then it's probably best to keep as
>>>> an array of integers, not a string. Less post processing.
>>>
>>> I don't think the intent is to dump the EWAH directly, but instead to
>>> dump a string of the uncompressed bitmap. Something like:
>>>
>>>  "delete_bitmap" : "01101101101"
>>>
>>> instead of
>>>
>>>  "delete_bitmap" : [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1 ]
>>
>> I get this part. But the numbers in the array were the position of the
>> set bits. It's not showing just the actual bit map.
>>
>> The same bitmap would be currently displayed as
>>
>>   "delete_bitmap": [ 1, 2, 4, 5, 7, 8, 9, 11 ]
>>
>> And that maps back to the entry[1], entry[2], entry[4]... in the index
>> being deleted from the base index. So displaying as a real bit map
>> actually adds more work for both the reader and the tool because you
>> have to calculate the position either way. And it gets harder if the
>> bit you're intereted in is on the far right.
> 
> 
> Thanks for the clarification.  That helps.

Same here! We expect these to be much smaller than the full set, correct?

Thanks,
-Stolee



Re: [PATCH 2/6] tree-walk.c: remove the_repo from fill_tree_descriptor()

2019-06-26 Thread Derrick Stolee
On 6/26/2019 12:27 PM, Junio C Hamano wrote:
> Derrick Stolee  writes:
> 
>>> diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
>>> index 34ca0258b1..97b54caeb9 100644
>>> --- a/builtin/merge-tree.c
>>> +++ b/builtin/merge-tree.c
>>> @@ -205,6 +205,7 @@ static void resolve(const struct traverse_info *info, 
>>> struct name_entry *ours, s
>>>  static void unresolved_directory(const struct traverse_info *info,
>>>  struct name_entry n[3])
>>>  {
>>> +   struct repository *r = the_repository;
>>
>> I like this trick to make the change below minimal:
>>> +   buf0 = fill_tree_descriptor(r, t + 0, ENTRY_OID(n + 0));
>>> +   buf1 = fill_tree_descriptor(r, t + 1, ENTRY_OID(n + 1));
>>> +   buf2 = fill_tree_descriptor(r, t + 2, ENTRY_OID(n + 2));
>>
>> I wonder if _every_ conversion should include this trick,
>> so when we move to change that method we simply move the definition
>> from the method block to the prototype. (No need to adjust what you've
>> done already, just an idea for future conversions.)
> 
> Hmm, interesting.  So those callers in builtin/rebase.c::reset_head()
> and other places that adds the_repository as the new first parameter
> can take a local variable "r" (or perhaps a bit more descriptive,
> e.g. "repo") that is initialized to "the_repository" (and never
> reassigned at least at this step) in this same patch?

Yes, that is what I was thinking. It means that we can stop munging
the function calls for the method that is converted. When the caller
is updated, the call site already uses "r" but we change how "r" is
initialized (as a parameter instead of a local variable).

Thanks,
-Stolee


Re: [BUG] Bad coloring

2019-06-26 Thread Derrick Stolee
On 6/26/2019 4:32 AM, Eugen Konkov wrote:
> Hello,
> 
> For next diff the `}` line should not be detected as removed/added.
> 
> As  you can see there are differences only at new lines. (see attached
> pics)
> 
> git version 2.21.0
> 
> 
> git diff -b -w --ignore-blank-lines
> 
> 
> git config:
> 
> [color "diff"]
> old = red bold
> new = green bold
> 
> [diff]
> tool = sublimerge
> colorMoved = default
> colorMovedWS = ignore-all-space
> 

Below, I pasted your diff in the message body for
easier discussion on-list.

Your diff shows that the content was moved. However, it was
also changed during the move by adding that extra line of
whitespace. The "ignore-all-space" setting can only ignore
whitespace when matching lines. It cannot ignore entire
lines of whitespace.

For that reason, the last three lines are not matched as moved
because they are not part of the larger hunk that moved. You
are using the 'default' mode for diff.colorMoved which defaults
to the 'zebra' mode and that matches blocks of at least 20
characters. These lines at the end are only four characters
("}\n\n\n").

If we used a smaller block size, then we would have too many
false-positives when using the color-moved option.

Thanks,
-Stolee

---

--- a/Service.pm
+++ b/Service.pm
@@ -34,46 +34,4 @@ $X->set_primary_key("id");
 $X->has_many( service_types => $X, 'parent_id' );
 
 
-sub sqlt_deploy_hook {
-my( $self, $sqlt_table ) =  @_;
-
-my $sqlt =  $sqlt_table->schema;
-$sqlt->add_procedure(
-name =>  'service_level_tree',
-parameters =>  [
-{ argmode => 'in',  type => 'integer' },
-],
-extra =>  {
-returns =>  { type => 'table( id int, parent_id int, name text, 
display text,
-definitions =>  [
-{ language  =>  'sql' },
-{ attribute =>  'STABLE' },
-{ quote => "\$\$\n",  body => <<'   FUNC' =~ s!^\t!!grm  =~ 
s!;\n!;/**/\n
-WITH RECURSIVE service_level_tree (id, parent_id, name, display, depth 
) AS (
-SELECT
-  id,
-  parent_id,
-  name,
-  display,
-  1
-FROM service_level
-WHERE id = $1
-UNION
-SELECT
-  sl.id,
-  sl.parent_id,
-  sl.name,
-  sl.display,
-  depth +1
-FROM service_level_tree t
-INNER JOIN service_level sl ON sl.id = t.parent_id
-WHERE depth < 10   -- Prohibit deep hierarchy
-)
-SELECT * FROM service_level_tree;
-FUNC
-]});
-
-}
-
-
 1;
diff --git a/ServiceLevel.pm b/S
index 73fa9dea..f7da48c8 100644
--- a/ServiceLevel.pm
+++ b/ServiceLevel.pm
@@ -34,4 +34,46 @@ $X->add_unique_constraint([ 'parent_id', 'name' ]);
 
 $X->has_many( service_levels => $X, 'parent_id' );
 
+
+sub sqlt_deploy_hook {
+my( $self, $sqlt_table ) =  @_;
+
+my $sqlt =  $sqlt_table->schema;
+$sqlt->add_procedure(
+name =>  'service_level_tree',
+parameters =>  [
+{ argmode => 'in',  type => 'integer' },
+],
+extra =>  {
+returns =>  { type => 'table( id int, parent_id int, name text, 
display text,
+definitions =>  [
+{ language  =>  'sql' },
+{ attribute =>  'STABLE' },
+{ quote => "\$\$\n",  body => <<'   FUNC' =~ s!^\t!!grm  =~ 
s!;\n!;/**/\n
+WITH RECURSIVE service_level_tree (id, parent_id, name, display, depth 
) AS (
+SELECT
+  id,
+  parent_id,
+  name,
+  display,
+  1
+FROM service_level
+WHERE id = $1
+UNION
+SELECT
+  sl.id,
+  sl.parent_id,
+  sl.name,
+  sl.display,
+  depth +1
+FROM service_level_tree t
+INNER JOIN service_level sl ON sl.id = t.parent_id
+WHERE depth < 10   -- Prohibit deep hierarchy
+)
+SELECT * FROM service_level_tree;
+FUNC
+]});
+}
+
+
 1;



Re: [PATCH v2 05/10] split-index.c: dump "link" extension as json

2019-06-25 Thread Derrick Stolee
On 6/25/2019 6:29 AM, Duy Nguyen wrote:
> On Tue, Jun 25, 2019 at 3:06 AM Jeff Hostetler  wrote:
>> I'm curious how big these EWAHs will be in practice and
>> how useful an array of integers will be (especially as the
>> pretty format will be one integer per line).  Perhaps it
>> would helpful to have an extended example in one of the
>> tests.
> 
> It's one integer per updated entry. So if you have a giant index and
> updated every single one of them, the EWAH bitmap contains that many
> integers.
> 
> If it was easy to just merge these bitmaps back to the entry (e.g. in
> this example, add "replaced": true to entry zero) I would have done
> it. But we dump as we stream and it's already too late to do it.
> 
>> Would it be better to have the caller of ewah_each_bit()
>> build a hex or bit string in a strbuf and then write it
>> as a single string?
> 
> I don't think the current EWAH representation is easy to read in the
> first place. You'll probably have to run through some script to update
> the main entries part and will have a much better view, but that's
> pretty quick. If it's for scripts, then it's probably best to keep as
> an array of integers, not a string. Less post processing.

I don't think the intent is to dump the EWAH directly, but instead to
dump a string of the uncompressed bitmap. Something like:

"delete_bitmap" : "01101101101"

instead of

"delete_bitmap" : [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1 ]

> Another reason for not merging to one string (might not be a very good
> argument though) is to help diff between two indexes.
> One-number-per-line works well with "git diff --no-index" while one
> long string is a bit harder. I did this kind of comparison when I made
> changes in read-cache.c and wanted to check if the new index file is
> completely broken, or just slighly broken.

You're right that the diff of the json output is an interesting
use, and the "single string" output is not helpful. What about
batches of 64-bit strings? For example:

"delete_bitmap" : [

"0101010101010101010101010101010101010101010101010101010101010101",

"0101010101010101010101010101010101010101010101010101010101010101",

"0101010101010101010101010101010101010101010101010101010101010101",
"01010101010101"
]

This could be a happy medium between the two options, but does require
some extra work in the formatter.

Thanks,
-Stolee


Re: Revision walking, commit dates, slop

2019-06-25 Thread Derrick Stolee
On 6/25/2019 3:51 AM, Jakub Narebski wrote:
> Jakub Narebski  writes:
>> Derrick Stolee  writes:
>>> On 5/20/2019 7:02 AM, Jakub Narebski wrote:
>>>>
>>>> Are there any blockers that prevent the switch to this
>>>> "generation number v2"?
>>>>
>>>> - Is it a problem with insufficient data to choose the correct numbering
>>>>   as "generation number v2' (there can be only one)?
>>>> - Is it a problem with selected "generation number v2" being
>>>>   incompatibile with gen v2, and Git failing when new version of
>>>>   commit-graph is used instead of softly just not using commit-graph?
>>>> - Or is it something else?
> [...]
> 
>>>  Using the generation number column for the corrected
>>> commit-date offsets (assuming we also guarantee the offset is strictly
>>> increasing from parent to child), these new values will be backwards-
>>> compatible _except_ for 'git commit-graph verify'.
>>
>> O.K., so the "generation number v2 (legacy)" would be incremental and
>> backward-compatibile in use (though not in generation and validation).
>>
>> Do I understand it correctly how it is calculated:
>>
>>   corrected_date(C) = max(committer_date(C),
>>   max_{P ∈ parents(C)}(corrected_date(P)) + 1)
>>   offset(C) = corrected_date(C) - committer_date(C)
>>   gen_v2(C) = max(offset(C), max_{P ∈ parents(C)}(gen_v2(P)) + 1) 
> 
> Do you remember who first came up with this idea for backward
> compatibile corrected commit date offsets (monotonically offset
> corrected date)?

I remember saying that the "corrected commit date" that I had suggested
was weak because it was not backwards-compatible with generation numbers
if you are only looking at the offsets. I don't remember who suggested
simply increasing the offset so they do become backwards-compatible.
 
>> Do you have benchmark for this "monotonically offset corrected commit
>> date" generation number in 
>> https://github.com/derrickstolee/git/commits/reach-perf
>> and https://github.com/derrickstolee/gen-test ?
> 
> I guess this will have to wait...

I have not had time to revisit this topic and re-run performance
numbers, sorry.

-Stolee


Re: [PATCH 6/6] Use the right 'struct repository' instead of the_repository

2019-06-24 Thread Derrick Stolee
On 6/24/2019 5:55 AM, Nguyễn Thái Ngọc Duy wrote:
> There are a couple of places where 'struct repository' is already passed
> around, but the_repository is still used. Use the right repo.
> 
> Signed-off-by: Nguyễn Thái Ngọc Duy 

nit: the subject line doesn't use the standard "area: topic" format (including
the capitalization of the first word). Perhaps:

treewide: use the right 'struct repository' instead of the_repository

The changes here are straight-forward, but how do we check if we are done?

Thanks,
-Stolee


Re: [PATCH 3/6] tree-walk.c: remove the_repo from get_tree_entry()

2019-06-24 Thread Derrick Stolee
On 6/24/2019 5:55 AM, Nguyễn Thái Ngọc Duy wrote:
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---
>  archive.c  |  4 +++-
>  blame.c|  4 ++--
>  builtin/rm.c   |  2 +-
>  builtin/update-index.c |  2 +-
>  line-log.c |  7 ---
>  match-trees.c  |  6 +++---
>  merge-recursive.c  |  8 +---
>  notes.c|  2 +-
>  sha1-name.c|  9 +
>  tree-walk.c| 18 --
>  tree-walk.h|  2 +-
>  11 files changed, 38 insertions(+), 26 deletions(-)
> 
> diff --git a/archive.c b/archive.c
> index 53141c1f0e..a8da0fcc4f 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -418,7 +418,9 @@ static void parse_treeish_arg(const char **argv,
>   unsigned short mode;
>   int err;
>  
> - err = get_tree_entry(&tree->object.oid, prefix, &tree_oid,
> + err = get_tree_entry(ar_args->repo,

If I'm reading this correctly, this is a place where we previously converted
to using a custom repository pointer but this function boundary reverted us
to the_repository anyway. I know we have some tests around the commit-graph
that ensures it works with an arbitrary repository (and I frequently stumble
over them when I add new dependencies). How can we add more testing around
these new conversions?

The rest looks straight-forward.

Thanks,
-Stolee



Re: [PATCH 2/6] tree-walk.c: remove the_repo from fill_tree_descriptor()

2019-06-24 Thread Derrick Stolee
On 6/24/2019 5:55 AM, Nguyễn Thái Ngọc Duy wrote:
> While at there, clean up the_repo usage in builtin/merge-tree.c a tiny
> bit.
> 
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---
>  builtin/merge-tree.c | 22 +-
>  builtin/rebase.c |  4 ++--
>  builtin/reset.c  |  4 ++--
>  notes.c  |  2 +-
>  sequencer.c  |  2 +-
>  tree-diff.c  |  4 ++--
>  tree-walk.c  |  6 --
>  tree-walk.h  |  4 +++-
>  unpack-trees.c   |  2 +-
>  9 files changed, 29 insertions(+), 21 deletions(-)
> 
> diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
> index 34ca0258b1..97b54caeb9 100644
> --- a/builtin/merge-tree.c
> +++ b/builtin/merge-tree.c
> @@ -205,6 +205,7 @@ static void resolve(const struct traverse_info *info, 
> struct name_entry *ours, s
>  static void unresolved_directory(const struct traverse_info *info,
>struct name_entry n[3])
>  {
> + struct repository *r = the_repository;

I like this trick to make the change below minimal:
> + buf0 = fill_tree_descriptor(r, t + 0, ENTRY_OID(n + 0));
> + buf1 = fill_tree_descriptor(r, t + 1, ENTRY_OID(n + 1));
> + buf2 = fill_tree_descriptor(r, t + 2, ENTRY_OID(n + 2));

I wonder if _every_ conversion should include this trick,
so when we move to change that method we simply move the definition
from the method block to the prototype. (No need to adjust what you've
done already, just an idea for future conversions.)

Thanks,
-Stolee


Re: [PATCH v3 4/5] repack: optionally assume transitive kept packs

2019-06-24 Thread Derrick Stolee
On 6/24/2019 8:07 AM, Nathaniel Filardo wrote:
> If the user is careful to mark .pack files as kept only when they refer
> to (other) kept packs, then we can rely on this when walking the object
> graph in subsequent repack operations and reduce the time and memory
> spent building the object graph.
> 
> Towards that end, then, teach git repack to enumerate the COMMITs and
> TREEs in kept packs and mark them as UNINTERESTING for pack-object's
> operation.  Because this creates UNINTERESTING marks, we make repack's
> --assume-pack-keep-transitive imply --sparse for pack-object to avoid
> hauling the entire object graph into memory.
> 
> In testing with a 203GB repository with 80M objects, this amounts to
> several gigabytes of memory and over ten minutes saved.  This machine
> has 32G of RAM and the repository is on a fast SSD to speed testing.

Thanks for the performance details!

> All told, this test repository takes about 33 minutes elapsed (28
> minutes in user code) and 3 GB of RAM to repack 12M objects occupying
> 33GB scattered across 477 pack files not marked as kept (the remainder
> of the objects are spread across three kept pack files).  The time and
> RAM usage with --assume-pack-keep-transitive should be dominated by
> factors proportional to the size and number of un-kept objects.
> 
> Without these optimizations, it takes about 45 minutes (34 minutes in
> user code) and 7 GB of RAM to compute the same pack file.  The extra
> time and total memory use are expected to be proportional to the kept
> packs' size, not the working set of the repack.  The time discrepancy
> can be expected to be more dramatic when the repository is on spinning
> rust rather than SSD.
> 
> Signed-off-by: Nathaniel Filardo 
> ---
>  Documentation/git-repack.txt | 21 +
>  builtin/repack.c | 57 ++--
>  2 files changed, 76 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
> index 836d81457a..014812c4bb 100644
> --- a/Documentation/git-repack.txt
> +++ b/Documentation/git-repack.txt
> @@ -169,6 +169,27 @@ depth is 4095.
>   Pass the `--sparse` option to `git-pack-objects`; see
>   linkgit:git-pack-objects[1].
>  
> +--assume-pack-keep-transitive::
> + This flag accelerates the search for objects to pack by assuming
> + that commit objects found in kept packfiles make reference only
> + to that or other kept packfiles.  This is very useful if, for
> + example, this repository periodically repacks all reachable objects
> + and marks the resulting pack file as kept.
> +
> + Because this option may prevent git from descending into kept packs,
> + no objects inside kept packs will be available for delta processing.
> + One should therefore restrict the use of this option to situations
> + where delta processing is disabled or when relatively large amounts
> + of data are considered and the relative gain of a wider set of
> + possible delta base objects is reduced.
> +
> + The simplest way to ensure transitivity of kept packs is to run `git
> + repack` with `-a` (or `-A`) and `-d` and mark all resulting packs as
> + kept, never removing the kept marker.  In practice, one may wish to
> + wait to mark packs as kept until they are sufficiently large
> + (reducing the number of pack files necessary for a given set of
> + objects) but not so large as to be unwieldy.
> +
>  Configuration
>  -
>  
> diff --git a/builtin/repack.c b/builtin/repack.c
> index 4cfdd62bb8..a2cd479686 100644
> --- a/builtin/repack.c
> +++ b/builtin/repack.c
> @@ -11,6 +11,8 @@
>  #include "midx.h"
>  #include "packfile.h"
>  #include "object-store.h"
> +#include "revision.h"
> +#include "list-objects.h"
>  
>  static int delta_base_offset = 1;
>  static int pack_kept_objects = -1;
> @@ -256,6 +258,30 @@ static void repack_promisor_objects(const struct 
> pack_objects_args *args,
>   die(_("could not finish pack-objects to repack promisor 
> objects"));
>  }
>  
> +static void apkt_show_commit(struct commit *commit, void *data)
> +{
> + struct tree *tree;
> + struct pack_entry e;
> +
> + if (!find_pack_entry(the_repository, &commit->object.oid, &e))
> + return;
> +
> + if (!(e.p->pack_keep || e.p->pack_keep_in_core))
> + return;
> +
> + tree = get_commit_tree(commit);
> + if (tree)
> + tree->object.flags |= UNINTERESTING;
> +
> + write_oid(&commit->object.oid, e.p, 0, data);
> + write_oid(&tree->object.oid, NULL, 0, data);
> +}
> +
> +static void apkt_show_object(struct object *obj, const char *name, void 
> *data)
> +{
> + return;
> +}
> +
>  #define ALL_INTO_ONE 1
>  #define LOOSEN_UNREACHABLE 2
>  
> @@ -287,6 +313,7 @@ int cmd_repack(int argc, const char **argv, const char 
> *prefix)
>   struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
>   int no_update_s

Re: [PATCH v3 3/5] repack: add --sparse and pass to pack-objects

2019-06-24 Thread Derrick Stolee
On 6/24/2019 8:07 AM, Nathaniel Filardo wrote:
> The sparse connectivity algorithm saves a whole lot of time when there
> are UNINTERESTING trees around.

Neat! Any chance for some example performance stats?

> ---

Looks like you forgot your Signed-off-by here.

> +# repack --sparse invokes pack-objects --sparse
> +test_expect_success 'repack --sparse and fsck' '
> + git repack -a --sparse &&
> + git fsck
> +'

This test may not be enough to properly test the sparse
algorithm, as it is only really enabled when the --revs
argument is given to the pack-objects process AND there
is a "NOT" ref (i.e. "!{oid}" over stdin). Using "-a" here
will not have any "NOT" references. OR maybe this already
is enough when you have the .keep packs. Is there a way
you could set up the packs in this test to explicitly be
in the situation you describe where the sparse option
speeds things up?

It's hard to check that the '--sparse' option is doing
anything in a test, but it is important that we run the
logic. One way to see if this test is doing anything is
to insert a die() somewhere. For example, this die()
statement will check if we ever needed to mark things
uninteresting in a workdir path with both UNINTERESTING
and INTERESTING trees:

diff --git a/revision.c b/revision.c
index eb8e51bc63..8835f8e7b1 100644
--- a/revision.c
+++ b/revision.c
@@ -227,6 +227,7 @@ void mark_trees_uninteresting_sparse(struct repository *r,
if (!has_uninteresting || !has_interesting)
return;

+   die("we exercised the logic!");
paths_and_oids_init(&map);

oidset_iter_init(trees, &iter);

The implementation of the argument looks straight-forward.
It appears we are passing the argument to the sub-process,
so the bitflag isn't used yet.

-Stolee



Re: [PATCH v3 2/5] revision walk: optionally use sparse reachability

2019-06-24 Thread Derrick Stolee
On 6/24/2019 8:07 AM, Nathaniel Filardo wrote:
> Add another bit flag to the struct rev_info.
> 
> The only caller that uses this after this patch is builtin/pack-objects.
> Without this, sparsity seems to do little good therein, as
> prepare_revision_walk will densely propagate UNINTERESTING flags from
> trees to tree contents, before mark_edges_uninteresting has a chance to
> be faster by being sparse.
> 
> While here, drop the "sparse" parameter to mark_edges_uninteresting,
> introduced in 4f6d26b167 ("list-objects: consume sparse tree walk",
> 2019-01-16) which can now use the flag in struct rev_info.  No
> functional change intended.

Looks like a straight-forward refactor to me. I'll keep reading
for the use of this new bitflag.

-Stolee


Re: [PATCH v3 1/5] count-objects: report statistics about kept packs

2019-06-24 Thread Derrick Stolee
On 6/24/2019 8:07 AM, Nathaniel Filardo wrote:
> Signed-off-by: Nathaniel Filardo 
> 
> update count-objects
> ---

This post-signoff text looks like cruft from a rebase.

-Stolee


Git Test Coverage Report (Thurs, June 20)

2019-06-20 Thread Derrick Stolee
t;wt->head_ref);
28438e84 1500) strbuf_addstr(&desc, _("no branch"));

sequencer.c
37e9ee5c 293) ret = -1;
37e9ee5c 311) ret = error(_("could not remove '%s'"), buf.buf);
6bd767d1 2669) in_progress_error = _("revert is already in progress");
6bd767d1 2670) in_progress_advice =
6bd767d1 2672) break;
6bd767d1 2679) BUG("unexpected action in create_seq_dir");
f4b0f3c8 2779) return error(_("cannot resolve HEAD"));
f4b0f3c8 2862) if (!rollback_is_safe())
f4b0f3c8 2863) goto give_advice;
f4b0f3c8 2875) BUG("unexpected action in sequencer_skip");
f4b0f3c8 2879) return error(_("failed to skip the commit"));

sh-i18n--envsubst.c
568a05c5 252)   size_t j = j1 + ((j2 - j1) >> 1);

t/helper/test-oidmap.c
11510dec 52) if (get_oid(p1, &oid)) {
11510dec 53) printf("Unknown oid: %s\n", p1);
11510dec 54) continue;
11510dec 58) FLEX_ALLOC_STR(entry, name, p2);
11510dec 59) oidcpy(&entry->entry.oid, &oid);
11510dec 62) oidmap_put(&map, entry);
11510dec 97) if (get_oid(p1, &oid)) {
11510dec 98) printf("Unknown oid: %s\n", p1);
11510dec 99) continue;
11510dec 103) entry = oidmap_remove(&map, &oid);
11510dec 106) puts(entry ? entry->name : "NULL");
11510dec 107) free(entry);

Commits introducting uncovered code:
Christian Couder19cfa0e0 oidmap: use sha1hash() instead of static 
hash() function
Christian Couder    11510dec t/helper: add test-oidmap.c
Denton Liu  526c03b5 rebase: refactor can_fast_forward into goto tower
Denton Liu  10572de1 rebase: fast-forward --onto in more cases
Denton Liu  07b2c0ea config: learn the "onbranch:" includeIf condition
Derrick Stolee  19575c7c multi-pack-index: implement 'expire' subcommand
Derrick Stolee  d01bf2e6 midx: refactor permutation logic and pack sorting
Derrick Stolee  8434e85d repack: refactor pack deletion for future use
Derrick Stolee  1771be90 commit-graph: merge commit-graph chains
Derrick Stolee  c523035c commit-graph: allow cross-alternate chains
Derrick Stolee  135a7123 commit-graph: add --split option to builtin
Derrick Stolee  238def57 commit-graph: extract write_commit_graph_file()
Derrick Stolee  4c9efe85 commit-graph: extract fill_oids_from_commit_hex()
Derrick Stolee  ce1e4a10 midx: implement midx_repack()
Derrick Stolee  ef5b83f2 commit-graph: extract fill_oids_from_packs()
Derrick Stolee  6c622f9f commit-graph: write commit-graph chains
Derrick Stolee  d4f4d60f commit-graph: prepare for commit-graph chains
Derrick Stolee  5c84b339 commit-graph: load commit-graph chains
Derrick Stolee  118bd570 commit-graph: add base graphs chunk
Derrick Stolee  e103f727 commit-graph: return with errors during write
Derrick Stolee  c2bc6e6a commit-graph: create options for split files
Derrick Stolee  b2c83060 commit-graph: extract fill_oids_from_all_packs()
Johannes Schindelin 08e04506 kwset: allow building with GCC 8
Jonathan Nieder ee70c128 index: offer advice for unknown index extensions
Matthew DeVore  28438e84 ref-filter: sort detached HEAD lines firstly
Nickolai Belakovski 2582083f ref-filter: add worktreepath atom
Philip Oakley   1fde99cf doc branch: provide examples for listing remote 
tracking branches
Phillip Wood37e9ee5c sequencer: return errors from sequencer_remove_state()
Phillip Woodd559f502 rebase --abort/--quit: cleanup refs/rewritten
René Scharfe921d49be use COPY_ARRAY for copying arrays
René Scharfe568a05c5 cleanup: fix possible overflow errors in binary 
search, part 2
Rohit Ashiwal   6bd767d1 sequencer: add advice for revert
Rohit Ashiwal   f4b0f3c8 cherry-pick/revert: add --skip option
SZEDER Gábor70d07115 progress: use term_clear_line()
SZEDER Gábor0ec9e491 pager: add a helper function to clear the last line in 
the terminal


Uncovered code in 'next' not in 'master'


list-objects-filter-options.c
5c03bc8b 94) strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg);

Commits introducting uncovered code:
Matthew DeVore  5c03bc8b list-objects-filter-options: error is localizeable


Uncovered code in 'master' not in 'master@{1}'


builtin/am.c
97387c8b 1662) die("unable to read from stdin; aborting");
6e7baf24 2336) die(_("interactive mode requires patches on the command line"));

builtin/bisect--helper.c
7877ac3d 574) retval = error(_("invalid ref: '%s'"), start_head.buf);
7877ac3d 575) goto finish;

grep.c
de99eb0c 1784) BUG("grep call which could print a name requires "

Commits introducting uncovered code:
Emily Shaffer   de99eb0c grep: fail if call could output and name is null
Jeff King   97387c8b am: read interactive input from stdin
Jeff King   6e7baf24 am: drop tty requirement for --interactive
Johannes Schindelin 7877ac3d bisect--helper: verify HEAD could be parsed 
before continuing



Re: [PATCH 4/4] restore: add --intent-to-add (restoring worktree only)

2019-06-20 Thread Derrick Stolee
On 6/20/2019 5:55 AM, Nguyễn Thái Ngọc Duy wrote:
> "git restore --source" (without --staged) could create new files
> (i.e. not present in index) on worktree to match the given source. But
> the new files are not tracked, so both "git diff" and "git diff
> " ignore new files. "git commit -a" will not recreate a commit
> exactly as the given source either.
> 
> Add --intent-to-add to help track new files in this case, which is the
> default on the least surprise principle.

I was unfamiliar with this behavior, but did check the 'restore' command
myself and saw that it would register the file as untracked. I agree that
could be confusing for a user, so adding it to the staging environment
makes this more in-line with `git checkout  -- `.

> 
> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---
>  Documentation/git-restore.txt |  7 
>  builtin/checkout.c| 78 +++
>  t/t2070-restore.sh| 17 
>  3 files changed, 102 insertions(+)
> 
> diff --git a/Documentation/git-restore.txt b/Documentation/git-restore.txt
> index d90093f195..43a7f43b2b 100644
> --- a/Documentation/git-restore.txt
> +++ b/Documentation/git-restore.txt
> @@ -93,6 +93,13 @@ in linkgit:git-checkout[1] for details.
>   are "merge" (default) and "diff3" (in addition to what is
>   shown by "merge" style, shows the original contents).
>  
> +--intent-to-add::
> +--no-intent-to-add::
> + When restoring files only on working tree with `--source`,
> + new files are marked as "intent to add" (see
> + linkgit:git-add[1]). This is the default behavior. Use
> + `--no-intent-to-add` to disable it.
> +
>  --ignore-unmerged::
>   When restoring files on the working tree from the index, do
>   not abort the operation if there are unmerged entries and
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index f884d27f1f..c519067d3d 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -70,6 +70,7 @@ struct checkout_opts {
>   int checkout_worktree;
>   const char *ignore_unmerged_opt;
>   int ignore_unmerged;
> + int intent_to_add;
>  
>   const char *new_branch;
>   const char *new_branch_force;
> @@ -392,6 +393,69 @@ static int checkout_worktree(const struct checkout_opts 
> *opts)
>   return errs;
>  }
>  
> +/*
> + * Input condition: r->index contains the file list matching worktree.
> + *
> + * r->index is reloaded with $GIT_DIR/index. Files that exist in the
> + * current worktree but not in $GIT_DIR/index are added back as
> + * intent-to-add.
> + */

Reading this code (and being unfamiliar with the cache array) I thought
it might accidentally add untracked files from the working directory into
the index. A local test verified that was not the case. Is that worth
adding to your test below?
  
> +test_expect_success 'restore worktree only adds new files back as 
> intent-to-add' '
> + git init ita &&
> + (
> + cd ita &&
> + test_commit one &&
> + test_commit two &&
> + git rm one.t &&
> + git commit -m one-is-gone &&
+   touch garbage &&
> + git restore --source one one.t &&
> + git diff --summary >actual &&
> + echo " create mode 100644 one.t" >expected &&
> + test_cmp expected actual &&
> + git diff --cached >empty &&
> + test_must_be_empty empty
> + )
> +'
> +
>  test_done

Perhaps the line I inserted above would suffice to add this extra check?

Outside of that extra test (which may not be necessary), this series makes
sense to me.

Thanks,
-Stolee




Re: [PATCH 2/4] switch: allow to switch in the middle of bisect

2019-06-20 Thread Derrick Stolee
On 6/20/2019 5:55 AM, Nguyễn Thái Ngọc Duy wrote:
> In c45f0f525d (switch: reject if some operation is in progress,
> 2019-03-29), a check is added to prevent switching when some operation
> is in progress. The reason is it's often not safe to do so.
> 
> This is true for merge, am, rebase, cherry-pick and revert, but not so
> much for bisect because bisecting is basically jumping/switching between
> a bunch of commits to pin point the first bad one. git-bisect suggests
> the next commit to test, but it's not wrong for the user to test a
> different commit because git-bisect cannot have the knowledge to know
> better.

When a user switches commits during a bisect, it can just create new
known-good or known-bad commits, right? It won't mess up the next
selection of a test commit? I'm imagining someone jumping to a commit
between two known-bad commits or something, when it is more likely that
they are jumping to a parallel history.

> For this reason, allow to switch when bisecting (*). I considered if we
> should still prevent switching by default and allow it with
> --ignore-in-progress. But I don't think the prevention really adds
> anything much.
>
> If the user switches away by mistake, since we print the previous HEAD
> value, even if they don't know about the "-" shortcut, switching back is
> still possible.

I tell everyone I know about the "-" shortcut, and I'm always surprised
they didn't already know about "cd -".

> The warning will be printed on every switch while bisect is still
> ongoing, not the first time you switch away from bisect's suggested
> commit, so it could become a bit annoying.

I think a one-line warning is fine, as a power user doing this a lot
will develop blinders that ignore the message as they switch during
a bisect.

-Stolee


Re: [PATCH] delta-islands: respect progress flag

2019-06-20 Thread Derrick Stolee
On 6/20/2019 4:58 AM, Jeff King wrote:
> The delta island code always prints "Marked %d islands", even if
> progress has been suppressed with --no-progress or by sending stderr to
> a non-tty.
> 
> Let's pass a progress boolean to load_delta_islands(). We already do
> the same thing for the progress meter in resolve_tree_islands().
> 
> Signed-off-by: Jeff King 
> ---
> Arguably this should be a real progress meter that counts up, but I'm
> not sure what it should be counting. Refs we analyzed? Islands found?
> Unless you have a ton of refs, it doesn't really matter, so I just
> punted on that part for now and only fixed the egregious bug. :)

I agree that the first goal should be to stop writing 'progress' output
to stderr when progress is disabled. Changing this to a full progress
indicator can be done on top of this patch later (without changing the
method prototypes again) if desired.

LGTM.
-Stolee



Re: [PATCH 2/2] sha1-file: use OBJECT_INFO_NO_FETCH_IF_MISSING

2019-06-20 Thread Derrick Stolee
On 6/20/2019 4:50 AM, Jeff King wrote:
> On Thu, Jun 20, 2019 at 10:30:26AM +0200, Christian Couder wrote:
> 
>> Currently the OBJECT_INFO_FOR_PREFETCH flag is used to check
>> if we should fetch objects from promisor remotes when we
>> haven't found them elsewhere.
>>
>> Now that OBJECT_INFO_NO_FETCH_IF_MISSING exists, let's use
>> it instead to be more correct in case this new flag is ever
>> used without OBJECT_INFO_QUICK.
> 
> I said earlier that this one would need to be tweaked for the new
> upstream name. But actually, I think it is not necessary after Stolee's
> patch.

Yes, I believe that 31f5256c82  does an equivalent thing to the
combination of these patches.

Thanks,
-Stolee



[PATCH v2 3/3] repo-settings: pack.useSparse=true

2019-06-19 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

If a repo is large, then it probably has a very large working
directory. In this case, a typical developer's edits usually impact
many fewer paths than the full path set. The sparse treewalk
algorithm is optimized for this case, speeding up 'git push' calls.

Use pack.useSparse=true when core.featureAdoptionRate is at least
five. This is the first setting where the feature has only been
out for a single major version. This could be moved to the "at
least three" category after another major version.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/core.txt | 9 +
 Documentation/config/pack.txt | 3 ++-
 builtin/pack-objects.c| 9 +
 repo-settings.c   | 8 
 repo-settings.h   | 1 +
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index d16503a9d7..77fdd02660 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -623,3 +623,12 @@ not modify the user-facing output of porcelain commands.
 +
 * `index.version=4` uses prefix-compression to reduce the size of the
 .git/index file.
++
+If the value is at least 5, then all of the defaults above are included,
+plus the defaults below. These represent new features that present
+significant performance benefits, but may not have been released for
+multiple major versions.
++
+* `pack.useSparse=true` uses the sparse tree-walk algorithm, which is
+optimized for enumerating objects during linkgit:git-push[1] from a
+client machine.
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index 9cdcfa7324..9c4f8ea9ff 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -112,7 +112,8 @@ pack.useSparse::
objects. This can have significant performance benefits when
computing a pack to send a small change. However, it is possible
that extra objects are added to the pack-file if the included
-   commits contain certain types of direct renames.
+   commits contain certain types of direct renames. Defaults to
+   false, unless `core.featureAdoptionRate` is at least five.
 
 pack.writeBitmaps (deprecated)::
This is a deprecated synonym for `repack.writeBitmaps`.
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 41d7fc5983..f26b3f2892 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -34,6 +34,7 @@
 #include "dir.h"
 #include "midx.h"
 #include "trace2.h"
+#include "repo-settings.h"
 
 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 #define SIZE(obj) oe_size(&to_pack, obj)
@@ -2707,10 +2708,6 @@ static int git_pack_config(const char *k, const char *v, 
void *cb)
use_bitmap_index_default = git_config_bool(k, v);
return 0;
}
-   if (!strcmp(k, "pack.usesparse")) {
-   sparse = git_config_bool(k, v);
-   return 0;
-   }
if (!strcmp(k, "pack.threads")) {
delta_search_threads = git_config_int(k, v);
if (delta_search_threads < 0)
@@ -3330,6 +3327,10 @@ int cmd_pack_objects(int argc, const char **argv, const 
char *prefix)
read_replace_refs = 0;
 
sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0);
+   prepare_repo_settings(the_repository);
+   if (!sparse && the_repository->settings->pack_use_sparse != -1)
+   sparse = the_repository->settings->pack_use_sparse;
+
reset_pack_idx_option(&pack_idx_opts);
git_config(git_pack_config, NULL);
 
diff --git a/repo-settings.c b/repo-settings.c
index 5753153a84..c700edc286 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -16,6 +16,9 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
UPDATE_DEFAULT(rs->gc_write_commit_graph, 1);
UPDATE_DEFAULT(rs->index_version, 4);
}
+   if (rate >= 5) {
+   UPDATE_DEFAULT(rs->pack_use_sparse, 1);
+   }
return 0;
}
if (!strcmp(key, "core.commitgraph")) {
@@ -26,6 +29,10 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
rs->gc_write_commit_graph = git_config_bool(key, value);
return 0;
}
+   if (!strcmp(key, "pack.usesparse")) {
+   rs->pack_use_sparse = git_config_bool(key, value);
+   return 0;
+   }
if (!strcmp(key, "index.version")) {
rs->index_version = git_config_int(key, value);
return 0;
@@ -44,6 +51,7 @@ void prepare_repo_settings(struct repository *r)
/* Defaults */
r->settings->core_commit_graph = -1;
r->settings->gc_

[PATCH v2 1/3] repo-settings: create core.featureAdoptionRate setting

2019-06-19 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Several advanced config settings are highly recommended for clients
using large repositories. Power users learn these one-by-one and
enable them as they see fit. This could be made simpler, to allow
more users to have access to these almost-always beneficial features
(and more beneficial in larger repos).

Create a 'core.featureAdoptionRate' config setting that allows integer
values. This is a rating from 0 to 10 for the user's willingness to
adopt new or experimental features that improve Git performance.
The default is 0, meaning "don't change anything!" A value of 10
would mean "I'm willing for some behavior to change to get the
best performance I can get, and can take experimental features
in their first release." As we integrate this with more config
settings, we will make this scale more clear.

This config setting only changes the default values of other config
settings. If the setting is given explicitly, then take the
explicit value.

This change adds these two defaults when core.featureAdoptionRate
is at least three:

 * core.commitGraph=true
 * gc.writeCommitGraph=true

The use of "three or higher" for these settings means that a value
of 3 means "I'm willing to add optional features that can augment
the data on disk in favor of improved performance, but those
features should be stable after being included in multiple major
releases."

To centralize these config options and properly set the defaults,
create a repo_settings that contains chars for each config variable.
Use -1 as "unset", with 0 for false and 1 for true.

The prepare_repo_settings() method ensures that this settings
struct has been initialized, and avoids double-scanning the config
settings.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/core.txt | 21 -
 Documentation/config/gc.txt   |  4 ++--
 Makefile  |  1 +
 builtin/gc.c  |  6 ++---
 commit-graph.c|  7 +++---
 repo-settings.c   | 44 +++
 repo-settings.h   | 13 +++
 repository.h  |  3 +++
 8 files changed, 90 insertions(+), 9 deletions(-)
 create mode 100644 repo-settings.c
 create mode 100644 repo-settings.h

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index 75538d27e7..6a9f707815 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -577,7 +577,8 @@ the `GIT_NOTES_REF` environment variable.  See 
linkgit:git-notes[1].
 
 core.commitGraph::
If true, then git will read the commit-graph file (if it exists)
-   to parse the graph structure of commits. Defaults to false. See
+   to parse the graph structure of commits. Defaults to false, unless
+   `core.featureAdoptionRate` is at least three. See
linkgit:git-commit-graph[1] for more information.
 
 core.useReplaceRefs::
@@ -601,3 +602,21 @@ core.abbrev::
in your repository, which hopefully is enough for
abbreviated object names to stay unique for some time.
The minimum length is 4.
+
+core.featureAdoptionRate::
+   Set an integer value on a scale from 0 to 10 describing your
+   desire to adopt new performance features. Defaults to 0. As
+   the value increases, features are enabled by changing the
+   default values of other config settings. If a config variable
+   is specified explicitly, the explicit value will override these
+   defaults:
++
+If the value is at least 3, then the following defaults are modified.
+These represent relatively new features that have existed for multiple
+major releases, and present significant performance benefits. They do
+not modify the user-facing output of porcelain commands.
++
+* `core.commitGraph=true` enables reading commit-graph files.
++
+* `gc.writeCommitGraph=true` eneables writing commit-graph files during
+`git gc`.
diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt
index 02b92b18b5..898263209c 100644
--- a/Documentation/config/gc.txt
+++ b/Documentation/config/gc.txt
@@ -63,8 +63,8 @@ gc.writeCommitGraph::
If true, then gc will rewrite the commit-graph file when
linkgit:git-gc[1] is run. When using `git gc --auto`
the commit-graph will be updated if housekeeping is
-   required. Default is false. See linkgit:git-commit-graph[1]
-   for details.
+   required. Default is false, unless `core.featureAdoptionRage`
+   is at least three. See linkgit:git-commit-graph[1] for details.
 
 gc.logExpiry::
If the file gc.log exists, then `git gc --auto` will print
diff --git a/Makefile b/Makefile
index 8a7e235352..2d3499d7ac 100644
--- a/Makefile
+++ b/Makefile
@@ -967,6 +967,7 @@ LIB_OBJS += refspec.o
 LIB_OBJS += ref-filter.o
 LIB_OBJS += remote.o
 LIB_OBJS += replace-object.o
+LIB_OBJS += repo-settings.o
 LIB_OBJS += repository

[PATCH v2 2/3] repo-settings: use index.version=4 by default

2019-06-19 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

If a repo is large, it likely has many paths in its working directory.
This means the index could be compressed using version 4. Set this as
a default when core.featureAdoptionRate is at least three.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/core.txt  |  3 +++
 Documentation/config/index.txt |  2 ++
 read-cache.c   | 12 +++-
 repo-settings.c|  6 ++
 repo-settings.h|  1 +
 5 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index 6a9f707815..d16503a9d7 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -620,3 +620,6 @@ not modify the user-facing output of porcelain commands.
 +
 * `gc.writeCommitGraph=true` eneables writing commit-graph files during
 `git gc`.
++
+* `index.version=4` uses prefix-compression to reduce the size of the
+.git/index file.
diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt
index f181503041..98a88c30be 100644
--- a/Documentation/config/index.txt
+++ b/Documentation/config/index.txt
@@ -24,3 +24,5 @@ index.threads::
 index.version::
Specify the version with which new index files should be
initialized.  This does not affect existing repositories.
+   If `core.featureAdoptionRate` is at least three, then the
+   default value is 4.
diff --git a/read-cache.c b/read-cache.c
index 22e7b9944e..7fab8ff748 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -25,6 +25,7 @@
 #include "fsmonitor.h"
 #include "thread-utils.h"
 #include "progress.h"
+#include "repo-settings.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1599,16 +1600,17 @@ struct cache_entry *refresh_cache_entry(struct 
index_state *istate,
 
 #define INDEX_FORMAT_DEFAULT 3
 
-static unsigned int get_index_format_default(void)
+static unsigned int get_index_format_default(struct repository *r)
 {
char *envversion = getenv("GIT_INDEX_VERSION");
char *endp;
-   int value;
unsigned int version = INDEX_FORMAT_DEFAULT;
 
if (!envversion) {
-   if (!git_config_get_int("index.version", &value))
-   version = value;
+   prepare_repo_settings(r);
+
+   if (r->settings->index_version >= 0)
+   version = r->settings->index_version;
if (version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < version) {
warning(_("index.version set, but the value is 
invalid.\n"
  "Using version %i"), INDEX_FORMAT_DEFAULT);
@@ -2765,7 +2767,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
}
 
if (!istate->version) {
-   istate->version = get_index_format_default();
+   istate->version = get_index_format_default(the_repository);
if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0))
init_split_index(istate);
}
diff --git a/repo-settings.c b/repo-settings.c
index f7fc2a1959..5753153a84 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -14,6 +14,7 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
if (rate >= 3) {
UPDATE_DEFAULT(rs->core_commit_graph, 1);
UPDATE_DEFAULT(rs->gc_write_commit_graph, 1);
+   UPDATE_DEFAULT(rs->index_version, 4);
}
return 0;
}
@@ -25,6 +26,10 @@ static int git_repo_config(const char *key, const char 
*value, void *cb)
rs->gc_write_commit_graph = git_config_bool(key, value);
return 0;
}
+   if (!strcmp(key, "index.version")) {
+   rs->index_version = git_config_int(key, value);
+   return 0;
+   }
 
return 1;
 }
@@ -39,6 +44,7 @@ void prepare_repo_settings(struct repository *r)
/* Defaults */
r->settings->core_commit_graph = -1;
r->settings->gc_write_commit_graph = -1;
+   r->settings->index_version = -1;
 
repo_config(r, git_repo_config, r->settings);
 }
diff --git a/repo-settings.h b/repo-settings.h
index 11d08648e1..9b8104042e 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -4,6 +4,7 @@
 struct repo_settings {
char core_commit_graph;
char gc_write_commit_graph;
+   int index_version;
 };
 
 struct repository;
-- 
gitgitgadget



[PATCH v2 0/3] [RFC] Create 'core.featureAdoptionRate' setting to update config defaults

2019-06-19 Thread Derrick Stolee via GitGitGadget
Here is a second run at this RFC, which aims to create a "meta" config
setting that automatically turns on other settings according to a user's
willingness to trade new Git behavior or new feature risk for performance
benefits. The new name for the setting is "core.featureAdoptionRate" and is
an integer scale from 0 to 10. There will be multiple "categories" of
settings, and the intention is to allow more granular levels as necessary.

The first category is "3 or higher" which means that the user is willing to
adopt features that have been tested in multiple major releases. The
settings to include here are core.commitGraph=true,
gc.writeCommitGraph=true, and index.version=4.

The second category is "5 or higher" which means the user is willing to
adopt features that have not been out for multiple major releases. The
setting included here is pack.useSparse=true.

In the future, I would add a "7 or higher" setting which means the user is
willing to have a change of behavior in exchange for performance benefits.
The two settings to place here are 'status.aheadBehind=false' and
'fetch.showForcedUpdates=false'. Instead of including these settings in the
current series, I've submitted them independently for full review [1, 2].

Hopefully this direction is amenable to allow "early adopters" gain access
to new performance features even if they are not necessary reading every
line of the release notes.

Thanks, -Stolee

[1] https://public-inbox.org/git/pull.272.git.gitgitgad...@gmail.com/

[2] https://public-inbox.org/git/pull.273.git.gitgitgad...@gmail.com/

Derrick Stolee (3):
  repo-settings: create core.featureAdoptionRate setting
  repo-settings: use index.version=4 by default
  repo-settings: pack.useSparse=true

 Documentation/config/core.txt  | 33 ++-
 Documentation/config/gc.txt|  4 +--
 Documentation/config/index.txt |  2 ++
 Documentation/config/pack.txt  |  3 +-
 Makefile   |  1 +
 builtin/gc.c   |  6 ++--
 builtin/pack-objects.c |  9 +++---
 commit-graph.c |  7 ++--
 read-cache.c   | 12 ---
 repo-settings.c| 58 ++
 repo-settings.h| 15 +
 repository.h   |  3 ++
 12 files changed, 134 insertions(+), 19 deletions(-)
 create mode 100644 repo-settings.c
 create mode 100644 repo-settings.h


base-commit: aa25c82427ae70aebf3b8f970f2afd54e9a2a8c6
Published-As: 
https://github.com/gitgitgadget/git/releases/tag/pr-254%2Fderrickstolee%2Fconfig-large%2Fupstream-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git 
pr-254/derrickstolee/config-large/upstream-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/254

Range-diff vs v1:

  1:  704613f448 !  1:  bdaee3ea9d repo-settings: create repo.size=large setting
 @@ -1,6 +1,6 @@
  Author: Derrick Stolee 
  
 -repo-settings: create repo.size=large setting
 +repo-settings: create core.featureAdoptionRate setting
  
  Several advanced config settings are highly recommended for clients
  using large repositories. Power users learn these one-by-one and
 @@ -8,16 +8,31 @@
  more users to have access to these almost-always beneficial features
  (and more beneficial in larger repos).
  
 -Create a 'repo.size' config setting whose only accepted value is
 -'large'. When a repo.size=large is given, change the default values
 -of some config settings. If the setting is given explicitly, then
 -take the explicit value.
 +Create a 'core.featureAdoptionRate' config setting that allows integer
 +values. This is a rating from 0 to 10 for the user's willingness to
 +adopt new or experimental features that improve Git performance.
 +The default is 0, meaning "don't change anything!" A value of 10
 +would mean "I'm willing for some behavior to change to get the
 +best performance I can get, and can take experimental features
 +in their first release." As we integrate this with more config
 +settings, we will make this scale more clear.
  
 -This change adds these two defaults to the repo.size=large setting:
 +This config setting only changes the default values of other config
 +settings. If the setting is given explicitly, then take the
 +explicit value.
 +
 +This change adds these two defaults when core.featureAdoptionRate
 +is at least three:
  
   * core.commitGraph=true
   * gc.writeCommitGraph=true
  
 +The use of "three or higher" for these settings means that a value
 +of 3 means "I'm willing to add optional features that can augment
 

Re: ds/commit-graph-incremental (was Re: What's cooking in git.git (Jun 2019, #04; Fri, 14))

2019-06-19 Thread Derrick Stolee
On 6/19/2019 10:32 AM, Junio C Hamano wrote:
> Derrick Stolee  writes:
> 
>> On 6/14/2019 4:50 PM, Junio C Hamano wrote:
>>> * ds/commit-graph-incremental (2019-06-12) 16 commits
>>>  - commit-graph: test --split across alternate without --split
>>
>> Please hold on this one. I've found multiple issues while integrating
>> this with VFS for Git and there are enough to merit a full re-roll.
> 
> Thanks.
> 
>> Please ignore the two patches I sent yesterday as I will incorporate them
>> into the next version of this series.
> 
> I think I picked up "normalize c-g filenames" and queued it directly
> on top of this topic, but I do not recall the other one, which
> probably means I did ignore it so hopefully no harm done ;-)

Do you mean you applied the v6 patches? [1]

The range-diff included some changes that are necessary due to textual or
semantic conflicts with ds/close-object-store, but one change was hidden
in PATCH 10 that is important:
.
10:  2093bab5b1 ! 10:  65b1cc6ae9 commit-graph: allow cross-alternate chains
 @@ -97,6 +97,8 @@
  - for (i = 0; i < count && valid; i++) {
  - char *graph_name;
  - struct commit_graph *g;
 ++ prepare_alt_odb(r);
 ++
  + for (i = 0; i < count; i++) {
  + struct object_directory *odb;

If you replayed the entire v6 series on top of ds/close-object-store, then
everything is as I had hoped. Otherwise, I can take a look at what you have
applied after the next cooking email and send new patches.

Thanks,
-Stolee   

[1] https://public-inbox.org/git/pull.184.v6.git.gitgitgad...@gmail.com/T/#u


Re: [PATCH 6/8] read-cache.c: dump "IEOT" extension as json

2019-06-19 Thread Derrick Stolee
On 6/19/2019 9:24 AM, Duy Nguyen wrote:
> On Wed, Jun 19, 2019 at 8:18 PM Derrick Stolee  wrote:
>>
>> On 6/19/2019 5:58 AM, Nguyễn Thái Ngọc Duy wrote:> @@ -2266,7 +2271,7 @@ int 
>> do_read_index(struct index_state *istate, const char *path, int must_exist)
>>>* to multi-thread the reading of the cache entries.
>>>*/
>>>   if (extension_offset && nr_threads > 1)
>>> - ieot = read_ieot_extension(mmap, mmap_size, extension_offset);
>>> + ieot = read_ieot_extension(mmap, mmap_size, extension_offset, 
>>> NULL);
>>
>> I tried applying this series on top of v2.22.0 and ran into an issue
>> on this patch, and the message seemed to imply the problem was at this
>> block. I couldn't figure out what was wrong, but maybe the series is
>> based on a different commit?
> 
> it's on 'master', a6a95cd1b4 (The second batch, 2019-06-17). There are
> a couple patches since v2.22.0 that touch read-cache.c, but they don't
> touch these lines explictly...

Thanks, I should have tried from master myself. Starting there worked.


Re: [PATCH 6/8] read-cache.c: dump "IEOT" extension as json

2019-06-19 Thread Derrick Stolee
On 6/19/2019 5:58 AM, Nguyễn Thái Ngọc Duy wrote:> @@ -2266,7 +2271,7 @@ int 
do_read_index(struct index_state *istate, const char *path, int must_exist)
>* to multi-thread the reading of the cache entries.
>*/
>   if (extension_offset && nr_threads > 1)
> - ieot = read_ieot_extension(mmap, mmap_size, extension_offset);
> + ieot = read_ieot_extension(mmap, mmap_size, extension_offset, 
> NULL);

I tried applying this series on top of v2.22.0 and ran into an issue
on this patch, and the message seemed to imply the problem was at this
block. I couldn't figure out what was wrong, but maybe the series is
based on a different commit?

That said, I applied the previous patches, compiled, and manually
tested those features. Seemed to be working as advertised.

Thanks,
-Stolee


Re: [PATCH 4/8] resolve-undo.c: dump "REUC" extension as json

2019-06-19 Thread Derrick Stolee
On 6/19/2019 5:58 AM, Nguyễn Thái Ngọc Duy wrote:

> + if (jw) {
> + jw_object_inline_begin_object(jw, "resolve-undo");
> + jw_object_intmax(jw, "ext-size", size);
> + jw_object_inline_begin_array(jw, "entries");
> + }

While reading this block, I noticed the use of hyphens in the
member names could cause some problems when translating into
object models in some languages. While this is valid JSON, I
found helpful recommendations in the Google JSON Style Guide [1]
that could apply here and elsewhere. Specifically, this
recommendation:

  "Property names must be camel-cased, ascii strings."

Treating JSON members as camel-cased variable names would
promote consumption by third-party tools.

Thanks,
-Stolee

[1] 
https://google.github.io/styleguide/jsoncstyleguide.xml?showone=Property_Name_Format#Property_Name_Format


Re: [PATCH 1/8] ls-files: add --json to dump the index

2019-06-19 Thread Derrick Stolee
On 6/19/2019 5:58 AM, Nguyễn Thái Ngọc Duy wrote:
> So far we don't have a command to basically dump the index file out,
> with all its glory details. Checking some info, for example, stat
> time, usually involves either writing new code or firing up "xxd" and
> decoding values by yourself.
> 
> This --json is supposed to help that. It dumps the index in a human
> readable format but also easy to be processed with tools. And it will
> print almost enough info to reconstruct the index later.

In an earlier message, I stated that I like the idea of this feature.
I know that you wanted to get that feedback before working too hard on
the patch series, so now that interest is declared, please add some tests
that verify this output looks as you expect.

> In this patch we only dump the main part, not extensions. But at the
> end of the series, the entire index is dumped. The end result could be
> very verbose even on a small repository such as git.git.

I would expect this commit to include a complete "output matches expected"
test, and the later patches can update the index to include these
extensions then verify that their sections appear in the output using
a grep.

> +--json::
> + Dump the entire index content in JSON format. This is for
> + debugging purposes and the JSON structure may change from time
> + to time.
> +

"...purposes and the JSON structure may change from time to time" may be better
as "...purposes. The JSON structure is subject to change."

> + OPT_BOOL(0, "json", &show_json,
> + N_("dump index content in json format")),

We should probably use "JSON" here in the help text.

> - show_files(the_repository, &dir);
> -
> - if (show_resolve_undo)
> - show_ru_info(the_repository->index);
> + if (!show_json) {
> + show_files(the_repository, &dir);
> +
> + if (show_resolve_undo)
> + show_ru_info(the_repository->index);
> + } else {
> + struct json_writer jw = JSON_WRITER_INIT;
> +
> + discard_index(the_repository->index);
> + the_repository->index->jw = &jw;
> + if (repo_read_index(the_repository) < 0)
> + die("index file corrupt");
> + puts(jw.json.buf);
> + the_repository->index->jw = NULL;
> + jw_release(&jw);
> + }
>  
>   if (ps_matched) {
>   int bad;

I see this 'ps_matched' condition at the end, which is related to
the '--error-unmatch' option. I added "--error-unmatch foo" to my
command and got the appropriate error message:

  error: pathspec 'foo' did not match any file(s) known to git
  Did you forget to 'git add'?

This was sent to stderr while the JSON was in stdout, so this should
be appropriate to allow both options. Just pointing it out to make
sure this is intended.

> +void jw_object_stat_data(struct json_writer *jw, const char *name,
> +  const struct stat_data *sd)
> +{
> + jw_object_inline_begin_object(jw, name);
> + jw_object_intmax(jw, "st_ctime.sec", sd->sd_ctime.sec);
> + jw_object_intmax(jw, "st_ctime.nsec", sd->sd_ctime.nsec);
> + jw_object_intmax(jw, "st_mtime.sec", sd->sd_mtime.sec);
> + jw_object_intmax(jw, "st_mtime.nsec", sd->sd_mtime.nsec);
> + jw_object_intmax(jw, "st_dev", sd->sd_dev);
> + jw_object_intmax(jw, "st_ino", sd->sd_ino);
> + jw_object_intmax(jw, "st_uid", sd->sd_uid);
> + jw_object_intmax(jw, "st_gid", sd->sd_gid);
> + jw_object_intmax(jw, "st_size", sd->sd_size);
> + jw_end(jw);
> +}

If these are all part of the same object, are the "st_" prefixes
necessary for every member?

> + /*
> +  * again redundant info, just so you don't have to decode
> +  * flags values manually
> +  */
> + if (ce->ce_flags & CE_VALID)
> + jw_object_true(jw, "assume-unchanged");
> + if (ce->ce_flags & CE_INTENT_TO_ADD)
> + jw_object_true(jw, "intent-to-add");
> + if (ce->ce_flags & CE_SKIP_WORKTREE)
> + jw_object_true(jw, "skip-worktree");
> + if (ce_stage(ce))
> + jw_object_intmax(jw, "stage", ce_stage(ce));

I'm really glad these flags are getting expanded! Much easier to
understand what's going on this way.

> + if (istate->jw) {
> + jw_object_begin(istate->jw, jw_pretty);
> + jw_object_intmax(istate->jw, "version", istate->version);
> + jw_object_string(istate->jw, "oid", oid_to_hex(&istate->oid));
> + jw_object_intmax(istate->jw, "st_mtime.sec", 
> istate->timestamp.sec);
> + jw_object_intmax(istate->jw, "st_mtime.nsec", 
> istate->timestamp.nsec);

Here, the "st_" prefixes are not on every member, but would it
be confusing if they were not there? Also, including a "." in
a member name may be troublesome for JSON, as that typically
means we are accessing a member of an object. Perhaps use _sec
and _nsec here and in the earlier 

Re: [PATCH 0/8] Add 'ls-files --json' to dump the index in json

2019-06-19 Thread Derrick Stolee
On 6/19/2019 8:42 AM, Duy Nguyen wrote:
> On Wed, Jun 19, 2019 at 6:58 PM Derrick Stolee  wrote:
>>
>> On 6/19/2019 5:58 AM, Nguyễn Thái Ngọc Duy wrote:
>>> This is probably just my itch. Every time I have to do something with
>>> the index, I need to add a little bit code here, a little bit there to
>>> get a better "view" of the index.
>>>
>>> This solves it for me. It allows me to see pretty much everything in the
>>> index (except really low detail stuff like pathname compression). It's
>>> readable by human, but also easy to parse if you need to do statistics
>>> and stuff. You could even do a "diff" between two indexes.
>>>
>>> I'm not really sure if anybody else finds this useful. Because if not,
>>> I guess there's not much point trying to merge it to git.git just for a
>>> single user. Maintaining off tree is still a pain for me, but I think
>>> I can manage it.
>>
>> I think we (Microsoft/VFS for Git engineers) would use this tool, as we
>> frequently need to diagnose something that went wrong in a user's index.
>> Kevin Willford built a tool to search the index and figure out what's
>> going on, but I'm not sure it parses all of the new extensions or was
>> updated to parse the v5 index.
> 
> OK I suggest you try it out and see if it really fits your internal
> tools. I wanted to balance between manual inspection and automation so
> the output may not be the best for tools. I also try not to freeze the
> format for more wiggle room, which would be fine for one-time scripts,
> but if you want to have real tools depend on it, we may have to look
> harder at the output format and make sure it's good enough for some
> time, and have some documentation.
> 
> Also, I don't suppose it matters, but just for the record I don't care
> at all about --json performance. I suppose Jeff's json writer does not
> cache the entire json output in memory, so dumping giant index files
> is fine. But some other things, like reading the index with multiple
> threads, are also disabled.

Performance is not critical here, and in fact would become slower for
sure because of the extra parsing details. However, I think using JSON
as a translation layer will make any tools that consume the JSON be
more resilient to future index format updates. That stability is
valuable. Even though the JSON format is not guaranteed to stay the
same, it is easier to update an object model to the JSON format than
a new binary parser.

Thanks,
-Stolee


Re: [PATCH 0/8] Add 'ls-files --json' to dump the index in json

2019-06-19 Thread Derrick Stolee
On 6/19/2019 5:58 AM, Nguyễn Thái Ngọc Duy wrote:
> This is probably just my itch. Every time I have to do something with
> the index, I need to add a little bit code here, a little bit there to
> get a better "view" of the index.
> 
> This solves it for me. It allows me to see pretty much everything in the
> index (except really low detail stuff like pathname compression). It's
> readable by human, but also easy to parse if you need to do statistics
> and stuff. You could even do a "diff" between two indexes.
> 
> I'm not really sure if anybody else finds this useful. Because if not,
> I guess there's not much point trying to merge it to git.git just for a
> single user. Maintaining off tree is still a pain for me, but I think
> I can manage it.

I think we (Microsoft/VFS for Git engineers) would use this tool, as we
frequently need to diagnose something that went wrong in a user's index.
Kevin Willford built a tool to search the index and figure out what's
going on, but I'm not sure it parses all of the new extensions or was
updated to parse the v5 index.

Having a translation from the internal index format to an easier-to-parse
format is valuable.

Thanks,
-Stolee


[PATCH 2/3] fetch: warn about forced updates in branch listing

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The --[no-]show-forced-updates option in 'git fetch' can be confusing
for some users, especially if it is enabled via config setting and not
by argument. Add advice to warn the user that the (forced update)
messages were not listed.

Additionally, warn users when the forced update check takes longer
than ten seconds, and recommend that they disable the check. These
messages can be disabled by the advice.fetchShowForcedUpdates config
setting.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/advice.txt |  4 
 advice.c|  2 ++
 advice.h|  1 +
 builtin/fetch.c | 25 -
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt
index ec4f6ae658..1f1e847fb4 100644
--- a/Documentation/config/advice.txt
+++ b/Documentation/config/advice.txt
@@ -4,6 +4,10 @@ advice.*::
can tell Git that you do not need help by setting these to 'false':
 +
 --
+   fetchShowForcedUpdates::
+   Advice shown when linkgit:git-fetch[1] takes a long time
+   to calculate forced updates after ref updates, or to warn
+   that the check is disabled.
pushUpdateRejected::
Set this variable to 'false' if you want to disable
'pushNonFFCurrent',
diff --git a/advice.c b/advice.c
index ce5f374ecd..4b283be51a 100644
--- a/advice.c
+++ b/advice.c
@@ -3,6 +3,7 @@
 #include "color.h"
 #include "help.h"
 
+int advice_fetch_show_forced_updates = 1;
 int advice_push_update_rejected = 1;
 int advice_push_non_ff_current = 1;
 int advice_push_non_ff_matching = 1;
@@ -59,6 +60,7 @@ static struct {
const char *name;
int *preference;
 } advice_config[] = {
+   { "fetchShowForcedUpdates", &advice_fetch_show_forced_updates },
{ "pushUpdateRejected", &advice_push_update_rejected },
{ "pushNonFFCurrent", &advice_push_non_ff_current },
{ "pushNonFFMatching", &advice_push_non_ff_matching },
diff --git a/advice.h b/advice.h
index e50f02cdfe..495e53255c 100644
--- a/advice.h
+++ b/advice.h
@@ -3,6 +3,7 @@
 
 #include "git-compat-util.h"
 
+extern int advice_fetch_show_forced_updates;
 extern int advice_push_update_rejected;
 extern int advice_push_non_ff_current;
 extern int advice_push_non_ff_matching;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 571c255218..cf7eb0dd8d 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -24,6 +24,8 @@
 #include "list-objects-filter-options.h"
 #include "commit-reach.h"
 
+#define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
+
 static const char * const builtin_fetch_usage[] = {
N_("git fetch [] [ [...]]"),
N_("git fetch [] "),
@@ -40,6 +42,7 @@ enum {
 
 static int fetch_prune_config = -1; /* unspecified */
 static int fetch_show_forced_updates = 1;
+static uint64_t forced_updates_ms = 0;
 static int prune = -1; /* unspecified */
 #define PRUNE_BY_DEFAULT 0 /* do we prune by default? */
 
@@ -707,6 +710,7 @@ static int update_local_ref(struct ref *ref,
enum object_type type;
struct branch *current_branch = branch_get(NULL);
const char *pretty_ref = prettify_refname(ref->name);
+   int fast_forward = 0;
 
type = oid_object_info(the_repository, &ref->new_oid, NULL);
if (type < 0)
@@ -781,7 +785,15 @@ static int update_local_ref(struct ref *ref,
return r;
}
 
-   if (!fetch_show_forced_updates || in_merge_bases(current, updated)) {
+   if (fetch_show_forced_updates) {
+   uint64_t t_before = getnanotime();
+   fast_forward = in_merge_bases(current, updated);
+   forced_updates_ms += (getnanotime() - t_before) / 100;
+   } else {
+   fast_forward = 1;
+   }
+
+   if (fast_forward) {
struct strbuf quickref = STRBUF_INIT;
int r;
 
@@ -980,6 +992,17 @@ static int store_updated_refs(const char *raw_url, const 
char *remote_name,
  " 'git remote prune %s' to remove any old, conflicting "
  "branches"), remote_name);
 
+   if (advice_fetch_show_forced_updates) {
+   if (!fetch_show_forced_updates) {
+   warning(_("Fetch normally indicates which branches had 
a forced update, but that check has been disabled."));
+   warning(_("To re-enable, use '--show-forced-updates' 
flag or run 'git config fetch.showForcedUpdates true'."));
+   } else if (forced_updates_ms > 
FORCED_UPDATES_DELAY_WARNING_IN_MS) {
+   warning(_("It 

[PATCH 0/3] fetch: add --[no-]show-forced-updates

2019-06-18 Thread Derrick Stolee via GitGitGadget
The git fetch builtin includes a calculation to see if the ref values were
forced updates (i.e. if the old value is not in the history of the new
value). In a repo with many refs and a fast-moving history, this calculation
can be very slow.

This series adds a new --[no-]show-forced-updates option to 'git fetch' to
avoid this calculation when requested. Further:

 1. Add a new fetch.showForcedUpdates config setting that provides a default
value for --[no-]show-forced-updates.


 2. Add a new advice.fetchShowForcedUpdates config setting associated with a
warning that suggests fetch.showForcedUpdates when the calculation takes
a long time, or warns about the lack of "(forced update)" messages.


 3. Add the command-line options to 'git pull'.



We have been running with these commits in microsoft/git for a while now and
no user has complained that the messages were removed (VFS for Git sets 
fetch.showForcedUpdates=false by default). Users have complained about the
warning messages always appearing, so this series includes the advice config
setting. Further, I added a test to demonstrate the behavior change between
the two options.

The fetch.showForcedUpdates config setting is a candidate to be added to the
proposed "large repo" meta-config setting previously discussed [1]. I'm
putting this out for independent review as it is much smaller compared to
the potential of that series.

Thanks, -Stolee

[1] https://public-inbox.org/git/pull.254.git.gitgitgad...@gmail.com/

Derrick Stolee (3):
  fetch: add --[no-]show-forced-updates argument
  fetch: warn about forced updates in branch listing
  pull: add --[no-]show-forced-updates passthrough

 Documentation/config/advice.txt |  4 
 Documentation/config/fetch.txt  |  5 +
 Documentation/fetch-options.txt | 13 +
 advice.c|  2 ++
 advice.h|  1 +
 builtin/fetch.c | 34 -
 builtin/pull.c  |  7 +++
 t/t5510-fetch.sh| 23 ++
 8 files changed, 88 insertions(+), 1 deletion(-)


base-commit: b697d92f56511e804b8ba20ccbe7bdc85dc66810
Published-As: 
https://github.com/gitgitgadget/git/releases/tag/pr-273%2Fderrickstolee%2Fshow-forced-updates-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git 
pr-273/derrickstolee/show-forced-updates-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/273
-- 
gitgitgadget


[PATCH 3/3] pull: add --[no-]show-forced-updates passthrough

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The 'git fetch' command can avoid calculating forced updates, so
allow users of 'git pull' to provide that option. This is particularly
necessary when the advice to use '--no-show-forced-updates' is given
at the end of the command.

Signed-off-by: Derrick Stolee 
---
 builtin/pull.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/builtin/pull.c b/builtin/pull.c
index 9dd32a115b..f1eaf6e6ed 100644
--- a/builtin/pull.c
+++ b/builtin/pull.c
@@ -128,6 +128,7 @@ static char *opt_update_shallow;
 static char *opt_refmap;
 static char *opt_ipv4;
 static char *opt_ipv6;
+static int opt_show_forced_updates = -1;
 
 static struct option pull_options[] = {
/* Shared options */
@@ -240,6 +241,8 @@ static struct option pull_options[] = {
OPT_PASSTHRU('6',  "ipv6", &opt_ipv6, NULL,
N_("use IPv6 addresses only"),
PARSE_OPT_NOARG),
+   OPT_BOOL(0, "show-forced-updates", &opt_show_forced_updates,
+N_("check for forced-updates on all updated branches")),
 
OPT_END()
 };
@@ -549,6 +552,10 @@ static int run_fetch(const char *repo, const char 
**refspecs)
argv_array_push(&args, opt_ipv4);
if (opt_ipv6)
argv_array_push(&args, opt_ipv6);
+   if (opt_show_forced_updates > 0)
+   argv_array_push(&args, "--show-forced-updates");
+   else if (opt_show_forced_updates == 0)
+   argv_array_push(&args, "--no-show-forced-updates");
 
if (repo) {
argv_array_push(&args, repo);
-- 
gitgitgadget


[PATCH 1/3] fetch: add --[no-]show-forced-updates argument

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

After updating a set of remove refs during a 'git fetch', we walk the
commits in the new ref value and not in the old ref value to discover
if the update was a forced update. This results in two things happening
during the command:

 1. The line including the ref update has an additional "(forced-update)"
marker at the end.

 2. The ref log for that remote branch includes a bit saying that update
is a forced update.

For many situations, this forced-update message happens infrequently, or
is a small bit of information among many ref updates. Many users ignore
these messages, but the calculation required here slows down their fetches
significantly. Keep in mind that they do not have the opportunity to
calculate a commit-graph file containing the newly-fetched commits, so
these comparisons can be very slow.

Add a '--[no-]show-forced-updates' option that allows a user to skip this
calculation. The only permanent result is dropping the forced-update bit
in the reflog.

Include a new fetch.showForcedUpdates config setting that allows this
behavior without including the argument in every command. The config
setting is overridden by the command-line arguments.

Signed-off-by: Derrick Stolee 
---
 Documentation/config/fetch.txt  |  5 +
 Documentation/fetch-options.txt | 13 +
 builtin/fetch.c | 11 ++-
 t/t5510-fetch.sh| 23 +++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/Documentation/config/fetch.txt b/Documentation/config/fetch.txt
index cbfad6cdbb..ba890b5884 100644
--- a/Documentation/config/fetch.txt
+++ b/Documentation/config/fetch.txt
@@ -63,3 +63,8 @@ fetch.negotiationAlgorithm::
Unknown values will cause 'git fetch' to error out.
 +
 See also the `--negotiation-tip` option for linkgit:git-fetch[1].
+
+fetch.showForcedUpdates::
+   Set to false to enable `--no-show-forced-updates` in
+   linkgit:git-fetch[1] and linkgit:git-pull[1] commands.
+   Defaults to true.
diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 91c47752ec..5801d23ae4 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -221,6 +221,19 @@ endif::git-pull[]
When multiple `--server-option=` are given, they are all
sent to the other side in the order listed on the command line.
 
+--show-forced-updates::
+   By default, git checks if a branch is force-updated during
+   fetch. This can be disabled through fetch.showForcedUpdates, but
+   the --show-forced-updates option guarantees this check occurs.
+   See linkgit:git-config[1].
+
+--no-show-forced-updates::
+   By default, git checks if a branch is force-updated during
+   fetch. Pass --no-show-forced-updates or set fetch.showForcedUpdates
+   to false to skip this check for performance reasons. If used during
+   'git-pull' the --ff-only option will still check for forced updates
+   before attempting a fast-forward update. See linkgit:git-config[1].
+
 -4::
 --ipv4::
Use IPv4 addresses only, ignoring IPv6 addresses.
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4ba63d5ac6..571c255218 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -39,6 +39,7 @@ enum {
 };
 
 static int fetch_prune_config = -1; /* unspecified */
+static int fetch_show_forced_updates = 1;
 static int prune = -1; /* unspecified */
 #define PRUNE_BY_DEFAULT 0 /* do we prune by default? */
 
@@ -79,6 +80,11 @@ static int git_fetch_config(const char *k, const char *v, 
void *cb)
return 0;
}
 
+   if (!strcmp(k, "fetch.showforcedupdates")) {
+   fetch_show_forced_updates = git_config_bool(k, v);
+   return 0;
+   }
+
if (!strcmp(k, "submodule.recurse")) {
int r = git_config_bool(k, v) ?
RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
@@ -169,6 +175,8 @@ static struct option builtin_fetch_options[] = {
OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
N_("report that we have only objects reachable from 
this object")),
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
+   OPT_BOOL(0, "show-forced-updates", &fetch_show_forced_updates,
+N_("check for forced-updates on all updated branches")),
OPT_END()
 };
 
@@ -773,9 +781,10 @@ static int update_local_ref(struct ref *ref,
return r;
}
 
-   if (in_merge_bases(current, updated)) {
+   if (!fetch_show_forced_updates || in_merge_bases(current, updated)) {
struct strbuf quickref = STRBUF_INIT;
int r;
+
strbuf_add_unique_abbrev(&quickref, ¤t->object.oid, 
DEFAULT_ABBREV);
strbuf_adds

[PATCH 0/3] git-status: create config for ahead/behind calculation

2019-06-18 Thread Derrick Stolee via GitGitGadget
The git status builtin includes a calculation for how far the local ref is
ahead or behind the remote tracking branch. This check can be expensive, so
we already have a --[no-]ahead-behind command-line option.

This series adds two bits of functionality to the feature:

 1. Add a new status.aheadBehind config setting.


 2. Add a new advice.statusAheadBehind config setting associated with a
warning that suggests status.aheadBehind when the calculation takes a
long time.



We have been running with these commits in microsoft/git for a while now.
The only change I made in adapting Jeff's commits was to add the advice
config documentation.

The status.aheadBehind config setting is a candidate to be added to the
proposed "large repo" meta-config setting previously discussed [1]. I'm
putting this out for independent review as it is much smaller compared to
the potential of that series.

Thanks, -Stolee

[1] https://public-inbox.org/git/pull.254.git.gitgitgad...@gmail.com/

Jeff Hostetler (3):
  status: add status.aheadbehind setting
  status: warn when a/b calculation takes too long
  status: ignore status.aheadbehind in porcelain formats

 Documentation/config/advice.txt |  6 ++
 Documentation/config/status.txt |  5 +
 advice.c|  2 ++
 advice.h|  1 +
 builtin/commit.c| 19 ++-
 t/t6040-tracking-info.sh| 31 +++
 t/t7064-wtstatus-pv2.sh |  8 
 wt-status.c | 17 +
 8 files changed, 88 insertions(+), 1 deletion(-)


base-commit: b697d92f56511e804b8ba20ccbe7bdc85dc66810
Published-As: 
https://github.com/gitgitgadget/git/releases/tag/pr-272%2Fderrickstolee%2Fstatus-ahead-behind-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git 
pr-272/derrickstolee/status-ahead-behind-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/272
-- 
gitgitgadget


[PATCH v6 03/18] commit-graph: rename commit_compare to oid_compare

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The helper function commit_compare() actually compares object_id
structs, not commits. A future change to commit-graph.c will need
to sort commit structs, so rename this function in advance.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 8f5c09363c..96b07a674e 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -770,7 +770,7 @@ static void write_graph_chunk_extra_edges(struct hashfile 
*f,
}
 }
 
-static int commit_compare(const void *_a, const void *_b)
+static int oid_compare(const void *_a, const void *_b)
 {
const struct object_id *a = (const struct object_id *)_a;
const struct object_id *b = (const struct object_id *)_b;
@@ -1039,7 +1039,7 @@ static uint32_t count_distinct_commits(struct 
write_commit_graph_context *ctx)
_("Counting distinct commits in commit graph"),
ctx->oids.nr);
display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
-   QSORT(ctx->oids.list, ctx->oids.nr, commit_compare);
+   QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
 
for (i = 1; i < ctx->oids.nr; i++) {
display_progress(ctx->progress, i + 1);
-- 
gitgitgadget



[PATCH v6 10/18] commit-graph: allow cross-alternate chains

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

In an environment like a fork network, it is helpful to have a
commit-graph chain that spans both the base repo and the fork repo. The
fork is usually a small set of data on top of the large repo, but
sometimes the fork is much larger. For example, git-for-windows/git has
almost double the number of commits as git/git because it rebases its
commits on every major version update.

To allow cross-alternate commit-graph chains, we need a few pieces:

1. When looking for a graph-{hash}.graph file, check all alternates.

2. When merging commit-graph chains, do not merge across alternates.

3. When writing a new commit-graph chain based on a commit-graph file
   in another object directory, do not allow success if the base file
   has of the name "commit-graph" instead of
   "commit-graphs/graph-{hash}.graph".

Signed-off-by: Derrick Stolee 
---
 Documentation/technical/commit-graph.txt | 40 +
 commit-graph.c   | 56 +++-
 commit-graph.h   |  1 +
 t/t5324-split-commit-graph.sh| 37 
 4 files changed, 123 insertions(+), 11 deletions(-)

diff --git a/Documentation/technical/commit-graph.txt 
b/Documentation/technical/commit-graph.txt
index d9c6253b0a..473032e476 100644
--- a/Documentation/technical/commit-graph.txt
+++ b/Documentation/technical/commit-graph.txt
@@ -266,6 +266,42 @@ The merge strategy values (2 for the size multiple, 64,000 
for the maximum
 number of commits) could be extracted into config settings for full
 flexibility.
 
+## Chains across multiple object directories
+
+In a repo with alternates, we look for the `commit-graph-chain` file starting
+in the local object directory and then in each alternate. The first file that
+exists defines our chain. As we look for the `graph-{hash}` files for
+each `{hash}` in the chain file, we follow the same pattern for the host
+directories.
+
+This allows commit-graphs to be split across multiple forks in a fork network.
+The typical case is a large "base" repo with many smaller forks.
+
+As the base repo advances, it will likely update and merge its commit-graph
+chain more frequently than the forks. If a fork updates their commit-graph 
after
+the base repo, then it should "reparent" the commit-graph chain onto the new
+chain in the base repo. When reading each `graph-{hash}` file, we track
+the object directory containing it. During a write of a new commit-graph file,
+we check for any changes in the source object directory and read the
+`commit-graph-chain` file for that source and create a new file based on those
+files. During this "reparent" operation, we necessarily need to collapse all
+levels in the fork, as all of the files are invalid against the new base file.
+
+It is crucial to be careful when cleaning up "unreferenced" 
`graph-{hash}.graph`
+files in this scenario. It falls to the user to define the proper settings for
+their custom environment:
+
+ 1. When merging levels in the base repo, the unreferenced files may still be
+referenced by chains from fork repos.
+
+ 2. The expiry time should be set to a length of time such that every fork has
+time to recompute their commit-graph chain to "reparent" onto the new base
+file(s).
+
+ 3. If the commit-graph chain is updated in the base, the fork will not have
+access to the new chain until its chain is updated to reference those 
files.
+(This may change in the future [5].)
+
 Related Links
 -
 [0] https://bugs.chromium.org/p/git/issues/detail?id=8
@@ -292,3 +328,7 @@ Related Links
 
 [4] 
https://public-inbox.org/git/20180108154822.54829-1-...@jeffhostetler.com/T/#u
 A patch to remove the ahead-behind calculation from 'status'.
+
+[5] 
https://public-inbox.org/git/f27db281-abad-5043-6d71-cbb083b1c...@gmail.com/
+A discussion of a "two-dimensional graph position" that can allow reading
+multiple commit-graph chains at the same time.
diff --git a/commit-graph.c b/commit-graph.c
index fb3100921c..fba705bc51 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -320,6 +320,9 @@ static struct commit_graph *load_commit_graph_v1(struct 
repository *r, const cha
struct commit_graph *g = load_commit_graph_one(graph_name);
free(graph_name);
 
+   if (g)
+   g->obj_dir = obj_dir;
+
return g;
 }
 
@@ -379,9 +382,10 @@ static struct commit_graph *load_commit_graph_chain(struct 
repository *r, const
count = st.st_size / (the_hash_algo->hexsz + 1);
oids = xcalloc(count, sizeof(struct object_id));
 
-   for (i = 0; i < count && valid; i++) {
-   char *graph_name;
-   struct commit_graph *g;
+   prepare_alt_odb(r);
+
+   for (i = 0; i < count; i++) {
+   struct object_directory *odb;
 
if (strbuf

[PATCH v6 14/18] commit-graph: clean up chains after flattened write

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

If we write a commit-graph file without the split option, then
we write to $OBJDIR/info/commit-graph and start to ignore
the chains in $OBJDIR/info/commit-graphs/.

Unlink the commit-graph-chain file and expire the graph-{hash}.graph
files in $OBJDIR/info/commit-graphs/ during every write.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c| 12 +---
 t/t5324-split-commit-graph.sh | 12 
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index f33f4fe009..3599ae664d 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1687,6 +1687,12 @@ static void expire_commit_graphs(struct 
write_commit_graph_context *ctx)
 
if (ctx->split_opts && ctx->split_opts->expire_time)
expire_time -= ctx->split_opts->expire_time;
+   if (!ctx->split) {
+   char *chain_file_name = get_chain_filename(ctx->obj_dir);
+   unlink(chain_file_name);
+   free(chain_file_name);
+   ctx->num_commit_graphs_after = 0;
+   }
 
strbuf_addstr(&path, ctx->obj_dir);
strbuf_addstr(&path, "/info/commit-graphs");
@@ -1841,10 +1847,10 @@ int write_commit_graph(const char *obj_dir,
 
res = write_commit_graph_file(ctx);
 
-   if (ctx->split) {
+   if (ctx->split)
mark_commit_graphs(ctx);
-   expire_commit_graphs(ctx);
-   }
+
+   expire_commit_graphs(ctx);
 
 cleanup:
free(ctx->graph_name);
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index 3df90ae58f..e8df35c30b 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -216,6 +216,18 @@ test_expect_success 'test merge stragety constants' '
)
 '
 
+test_expect_success 'remove commit-graph-chain file after flattening' '
+   git clone . flatten &&
+   (
+   cd flatten &&
+   test_line_count = 2 $graphdir/commit-graph-chain &&
+   git commit-graph write --reachable &&
+   test_path_is_missing $graphdir/commit-graph-chain &&
+   ls $graphdir >graph-files &&
+   test_line_count = 0 graph-files
+   )
+'
+
 corrupt_file() {
file=$1
pos=$2
-- 
gitgitgadget



[PATCH v6 17/18] commit-graph: normalize commit-graph filenames

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

When writing commit-graph files, we append path data to an
object directory, which may be specified by the user via the
'--object-dir' option. If the user supplies a trailing slash,
or some other alternative path format, the resulting path may
be usable for writing to the correct location. However, when
expiring graph files from the /info/commit-graphs
directory during a write, we need to compare paths with exact
string matches.

Normalize the commit-graph filenames to avoid ambiguity. This
creates extra allocations, but this is a constant multiple of
the number of commit-graph files, which should be a number in
the single digits.

Further normalize the object directory in the context. Due to
a comparison between g->obj_dir and ctx->obj_dir in
split_graph_merge_strategy(), a trailing slash would prevent
any merging of layers within the same object directory. The
check is there to ensure we do not merge across alternates.
Update the tests to include a case with this trailing slash
problem.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c| 30 +++---
 t/t5324-split-commit-graph.sh |  7 ++-
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 3599ae664d..c91e6f0fb8 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -43,15 +43,23 @@
 
 char *get_commit_graph_filename(const char *obj_dir)
 {
-   return xstrfmt("%s/info/commit-graph", obj_dir);
+   char *filename = xstrfmt("%s/info/commit-graph", obj_dir);
+   char *normalized = xmalloc(strlen(filename) + 1);
+   normalize_path_copy(normalized, filename);
+   free(filename);
+   return normalized;
 }
 
 static char *get_split_graph_filename(const char *obj_dir,
  const char *oid_hex)
 {
-   return xstrfmt("%s/info/commit-graphs/graph-%s.graph",
-  obj_dir,
-  oid_hex);
+   char *filename = xstrfmt("%s/info/commit-graphs/graph-%s.graph",
+obj_dir,
+oid_hex);
+   char *normalized = xmalloc(strlen(filename) + 1);
+   normalize_path_copy(normalized, filename);
+   free(filename);
+   return normalized;
 }
 
 static char *get_chain_filename(const char *obj_dir)
@@ -746,7 +754,7 @@ struct packed_oid_list {
 
 struct write_commit_graph_context {
struct repository *r;
-   const char *obj_dir;
+   char *obj_dir;
char *graph_name;
struct packed_oid_list oids;
struct packed_commit_list commits;
@@ -1729,7 +1737,6 @@ static void expire_commit_graphs(struct 
write_commit_graph_context *ctx)
 
if (!found)
unlink(path.buf);
-
}
 }
 
@@ -1741,6 +1748,7 @@ int write_commit_graph(const char *obj_dir,
 {
struct write_commit_graph_context *ctx;
uint32_t i, count_distinct = 0;
+   size_t len;
int res = 0;
 
if (!commit_graph_compatible(the_repository))
@@ -1748,7 +1756,14 @@ int write_commit_graph(const char *obj_dir,
 
ctx = xcalloc(1, sizeof(struct write_commit_graph_context));
ctx->r = the_repository;
-   ctx->obj_dir = obj_dir;
+
+   /* normalize object dir with no trailing slash */
+   ctx->obj_dir = xmallocz(strlen(obj_dir) + 1);
+   normalize_path_copy(ctx->obj_dir, obj_dir);
+   len = strlen(ctx->obj_dir);
+   if (len && ctx->obj_dir[len - 1] == '/')
+   ctx->obj_dir[len - 1] = 0;
+
ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0;
ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0;
ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0;
@@ -1856,6 +1871,7 @@ int write_commit_graph(const char *obj_dir,
free(ctx->graph_name);
free(ctx->commits.list);
free(ctx->oids.list);
+   free(ctx->obj_dir);
 
if (ctx->commit_graph_filenames_after) {
for (i = 0; i < ctx->num_commit_graphs_after; i++) {
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index 130f2baf44..fc0d00751c 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -163,7 +163,12 @@ test_expect_success 'create fork and chain across 
alternate' '
test_line_count = 1 graph-files &&
git -c core.commitGraph=true  rev-list HEAD >expect &&
git -c core.commitGraph=false rev-list HEAD >actual &&
-   test_cmp expect actual
+   test_cmp expect actual &&
+   test_commit 14 &&
+   git commit-graph write --reachable --split 
--object-dir=.git/objects/ &&
+   test_line_count = 3 $graphdir/commit-graph-chain &&
+   ls $graphdir/graph-*.graph >graph-files &&
+   test_line_count = 1 graph-files
)
 '
 
-- 
gitgitgadget



[PATCH v6 16/18] commit-graph: test --split across alternate without --split

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

We allow sharing commit-graph files across alternates. When we are
writing a split commit-graph, we allow adding tip graph files that
are not in the alternate, but include commits from our local repo.

However, if our alternate is not using the split commit-graph format,
its file is at .git/objects/info/commit-graph and we are trying to
write files in .git/objects/info/commit-graphs/graph-{hash}.graph.

We already have logic to ensure we do not merge across alternate
boundaries, but we also cannot have a commit-graph chain to our
alternate if uses the old filename structure.

Create a test that verifies we create a new split commit-graph
with only one level and we do not modify the existing commit-graph
in the alternate.

Signed-off-by: Derrick Stolee 
---
 t/t5324-split-commit-graph.sh | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index 704def70bb..130f2baf44 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -301,4 +301,19 @@ test_expect_success 'add octopus merge' '
 
 graph_git_behavior 'graph exists' merge/octopus commits/12
 
+test_expect_success 'split across alternate where alternate is not split' '
+   git commit-graph write --reachable &&
+   test_path_is_file .git/objects/info/commit-graph &&
+   cp .git/objects/info/commit-graph . &&
+   git clone --no-hardlinks . alt-split &&
+   (
+   cd alt-split &&
+   echo "$(pwd)"/../.git/objects >.git/objects/info/alternates &&
+   test_commit 18 &&
+   git commit-graph write --reachable --split &&
+   test_line_count = 1 $graphdir/commit-graph-chain
+   ) &&
+   test_cmp commit-graph .git/objects/info/commit-graph
+'
+
 test_done
-- 
gitgitgadget



[PATCH v6 05/18] commit-graph: add base graphs chunk

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

To quickly verify a commit-graph chain is valid on load, we will
read from the new "Base Graphs Chunk" of each file in the chain.
This will prevent accidentally loading incorrect data from manually
editing the commit-graph-chain file or renaming graph-{hash}.graph
files.

The commit_graph struct already had an object_id struct "oid", but
it was never initialized or used. Add a line to read the hash from
the end of the commit-graph file and into the oid member.

Signed-off-by: Derrick Stolee 
---
 .../technical/commit-graph-format.txt | 11 --
 commit-graph.c| 22 +++
 commit-graph.h|  1 +
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/Documentation/technical/commit-graph-format.txt 
b/Documentation/technical/commit-graph-format.txt
index 16452a0504..a4f17441ae 100644
--- a/Documentation/technical/commit-graph-format.txt
+++ b/Documentation/technical/commit-graph-format.txt
@@ -44,8 +44,9 @@ HEADER:
 
   1-byte number (C) of "chunks"
 
-  1-byte (reserved for later use)
- Current clients should ignore this value.
+  1-byte number (B) of base commit-graphs
+  We infer the length (H*B) of the Base Graphs chunk
+  from this value.
 
 CHUNK LOOKUP:
 
@@ -92,6 +93,12 @@ CHUNK DATA:
   positions for the parents until reaching a value with the 
most-significant
   bit on. The other bits correspond to the position of the last parent.
 
+  Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional]
+  This list of H-byte hashes describe a set of B commit-graph files that
+  form a commit-graph chain. The graph position for the ith commit in this
+  file's OID Lookup chunk is equal to i plus the number of commits in all
+  base graphs.  If B is non-zero, this chunk must exist.
+
 TRAILER:
 
H-byte HASH-checksum of all of the above.
diff --git a/commit-graph.c b/commit-graph.c
index f7dfc6aecd..6f7961d071 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -22,6 +22,7 @@
 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
+#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
 
 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
 
@@ -262,6 +263,12 @@ struct commit_graph *parse_commit_graph(void *graph_map, 
int fd,
else
graph->chunk_extra_edges = data + chunk_offset;
break;
+
+   case GRAPH_CHUNKID_BASE:
+   if (graph->chunk_base_graphs)
+   chunk_repeated = 1;
+   else
+   graph->chunk_base_graphs = data + chunk_offset;
}
 
if (chunk_repeated) {
@@ -280,6 +287,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, 
int fd,
last_chunk_offset = chunk_offset;
}
 
+   hashcpy(graph->oid.hash, graph->data + graph->data_len - 
graph->hash_len);
+
if (verify_commit_graph_lite(graph))
return NULL;
 
@@ -315,8 +324,21 @@ static int add_graph_to_chain(struct commit_graph *g,
 {
struct commit_graph *cur_g = chain;
 
+   if (n && !g->chunk_base_graphs) {
+   warning(_("commit-graph has no base graphs chunk"));
+   return 0;
+   }
+
while (n) {
n--;
+
+   if (!cur_g ||
+   !oideq(&oids[n], &cur_g->oid) ||
+   !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * 
n)) {
+   warning(_("commit-graph chain does not match"));
+   return 0;
+   }
+
cur_g = cur_g->base_graph;
}
 
diff --git a/commit-graph.h b/commit-graph.h
index 5819910a5b..6e7d42cf32 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -55,6 +55,7 @@ struct commit_graph {
const unsigned char *chunk_oid_lookup;
const unsigned char *chunk_commit_data;
const unsigned char *chunk_extra_edges;
+   const unsigned char *chunk_base_graphs;
 };
 
 struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st);
-- 
gitgitgadget



[PATCH v6 12/18] commit-graph: create options for split files

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The split commit-graph feature is now fully implemented, but needs
some more run-time configurability. Allow direct callers to 'git
commit-graph write --split' to specify the values used in the
merge strategy and the expire time.

Update the documentation to specify these values.

Signed-off-by: Derrick Stolee 
---
 Documentation/git-commit-graph.txt   | 21 ++-
 Documentation/technical/commit-graph.txt |  7 ++--
 builtin/commit-graph.c   | 25 ++---
 builtin/commit.c |  2 +-
 builtin/gc.c |  3 +-
 commit-graph.c   | 35 --
 commit-graph.h   | 12 +-
 t/t5324-split-commit-graph.sh| 47 
 8 files changed, 128 insertions(+), 24 deletions(-)

diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
index 624470e198..365e145e82 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -26,7 +26,7 @@ OPTIONS
Use given directory for the location of packfiles and commit-graph
file. This parameter exists to specify the location of an alternate
that only has the objects directory, not a full `.git` directory. The
-   commit-graph file is expected to be at `/info/commit-graph` and
+   commit-graph file is expected to be in the `/info` directory and
the packfiles are expected to be in `/pack`.
 
 
@@ -51,6 +51,25 @@ or `--stdin-packs`.)
 +
 With the `--append` option, include all commits that are present in the
 existing commit-graph file.
++
+With the `--split` option, write the commit-graph as a chain of multiple
+commit-graph files stored in `/info/commit-graphs`. The new commits
+not already in the commit-graph are added in a new "tip" file. This file
+is merged with the existing file if the following merge conditions are
+met:
++
+* If `--size-multiple=` is not specified, let `X` equal 2. If the new
+tip file would have `N` commits and the previous tip has `M` commits and
+`X` times `N` is greater than  `M`, instead merge the two files into a
+single file.
++
+* If `--max-commits=` is specified with `M` a positive integer, and the
+new tip file would have more than `M` commits, then instead merge the new
+tip with the previous tip.
++
+Finally, if `--expire-time=` is not specified, let `datetime`
+be the current time. After writing the split commit-graph, delete all
+unused commit-graph whose modified times are older than `datetime`.
 
 'read'::
 
diff --git a/Documentation/technical/commit-graph.txt 
b/Documentation/technical/commit-graph.txt
index aed4350a59..729fbcb32f 100644
--- a/Documentation/technical/commit-graph.txt
+++ b/Documentation/technical/commit-graph.txt
@@ -248,10 +248,11 @@ When writing a set of commits that do not exist in the 
commit-graph stack of
 height N, we default to creating a new file at level N + 1. We then decide to
 merge with the Nth level if one of two conditions hold:
 
-  1. The expected file size for level N + 1 is at least half the file size for
- level N.
+  1. `--size-multiple=` is specified or X = 2, and the number of commits in
+ level N is less than X times the number of commits in level N + 1.
 
-  2. Level N + 1 contains more than 64, commits.
+  2. `--max-commits=` is specified with non-zero C and the number of commits
+ in level N + 1 is more than C commits.
 
 This decision cascades down the levels: when we merge a level we create a new
 set of commits that then compares to the next level.
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index ff8bc3c8b9..6c0b5b17e0 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--object-dir ]"),
N_("git commit-graph read [--object-dir ]"),
N_("git commit-graph verify [--object-dir ]"),
-   N_("git commit-graph write [--object-dir ] [--append|--split] 
[--reachable|--stdin-packs|--stdin-commits]"),
+   N_("git commit-graph write [--object-dir ] [--append|--split] 
[--reachable|--stdin-packs|--stdin-commits] "),
NULL
 };
 
@@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = 
{
 };
 
 static const char * const builtin_commit_graph_write_usage[] = {
-   N_("git commit-graph write [--object-dir ] [--append|--split] 
[--reachable|--stdin-packs|--stdin-commits]"),
+   N_("git commit-graph write [--object-dir ] [--append|--split] 
[--reachable|--stdin-packs|--stdin-commits] "),
NULL
 };
 
@@ -135,6 +135,7 @@ static int graph_read(int argc, const char **argv)
 }
 
 extern int read_replace_refs;
+static struct split_commit_graph_opts split_opts;
 
 static int graph_write(int argc, const char **argv)
 {

[PATCH v6 11/18] commit-graph: expire commit-graph files

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

As we merge commit-graph files in a commit-graph chain, we should clean
up the files that are no longer used.

This change introduces an 'expiry_window' value to the context, which is
always zero (for now). We then check the modified time of each
graph-{hash}.graph file in the $OBJDIR/info/commit-graphs folder and
unlink the files that are older than the expiry_window.

Since this is always zero, this immediately clears all unused graph
files. We will update the value to match a config setting in a future
change.

Signed-off-by: Derrick Stolee 
---
 Documentation/technical/commit-graph.txt | 15 ++
 commit-graph.c   | 69 
 t/t5324-split-commit-graph.sh|  2 +-
 3 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/Documentation/technical/commit-graph.txt 
b/Documentation/technical/commit-graph.txt
index 473032e476..aed4350a59 100644
--- a/Documentation/technical/commit-graph.txt
+++ b/Documentation/technical/commit-graph.txt
@@ -266,6 +266,21 @@ The merge strategy values (2 for the size multiple, 64,000 
for the maximum
 number of commits) could be extracted into config settings for full
 flexibility.
 
+## Deleting graph-{hash} files
+
+After a new tip file is written, some `graph-{hash}` files may no longer
+be part of a chain. It is important to remove these files from disk, 
eventually.
+The main reason to delay removal is that another process could read the
+`commit-graph-chain` file before it is rewritten, but then look for the
+`graph-{hash}` files after they are deleted.
+
+To allow holding old split commit-graphs for a while after they are 
unreferenced,
+we update the modified times of the files when they become unreferenced. Then,
+we scan the `$OBJDIR/info/commit-graphs/` directory for `graph-{hash}`
+files whose modified times are older than a given expiry window. This window
+defaults to zero, but can be changed using command-line arguments or a config
+setting.
+
 ## Chains across multiple object directories
 
 In a repo with alternates, we look for the `commit-graph-chain` file starting
diff --git a/commit-graph.c b/commit-graph.c
index fba705bc51..0cc2ceb349 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1652,6 +1652,70 @@ static void merge_commit_graphs(struct 
write_commit_graph_context *ctx)
sort_and_scan_merged_commits(ctx);
 }
 
+static void mark_commit_graphs(struct write_commit_graph_context *ctx)
+{
+   uint32_t i;
+   time_t now = time(NULL);
+
+   for (i = ctx->num_commit_graphs_after - 1; i < 
ctx->num_commit_graphs_before; i++) {
+   struct stat st;
+   struct utimbuf updated_time;
+
+   stat(ctx->commit_graph_filenames_before[i], &st);
+
+   updated_time.actime = st.st_atime;
+   updated_time.modtime = now;
+   utime(ctx->commit_graph_filenames_before[i], &updated_time);
+   }
+}
+
+static void expire_commit_graphs(struct write_commit_graph_context *ctx)
+{
+   struct strbuf path = STRBUF_INIT;
+   DIR *dir;
+   struct dirent *de;
+   size_t dirnamelen;
+   time_t expire_time = time(NULL);
+
+   strbuf_addstr(&path, ctx->obj_dir);
+   strbuf_addstr(&path, "/info/commit-graphs");
+   dir = opendir(path.buf);
+
+   if (!dir) {
+   strbuf_release(&path);
+   return;
+   }
+
+   strbuf_addch(&path, '/');
+   dirnamelen = path.len;
+   while ((de = readdir(dir)) != NULL) {
+   struct stat st;
+   uint32_t i, found = 0;
+
+   strbuf_setlen(&path, dirnamelen);
+   strbuf_addstr(&path, de->d_name);
+
+   stat(path.buf, &st);
+
+   if (st.st_mtime > expire_time)
+   continue;
+   if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph"))
+   continue;
+
+   for (i = 0; i < ctx->num_commit_graphs_after; i++) {
+   if (!strcmp(ctx->commit_graph_filenames_after[i],
+   path.buf)) {
+   found = 1;
+   break;
+   }
+   }
+
+   if (!found)
+   unlink(path.buf);
+
+   }
+}
+
 int write_commit_graph(const char *obj_dir,
   struct string_list *pack_indexes,
   struct string_list *commit_hex,
@@ -1764,6 +1828,11 @@ int write_commit_graph(const char *obj_dir,
 
res = write_commit_graph_file(ctx);
 
+   if (ctx->split) {
+   mark_commit_graphs(ctx);
+   expire_commit_graphs(ctx);
+   }
+
 cleanup:
free(ctx->graph_name);
free(ctx->commits.list);
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-co

[PATCH v6 04/18] commit-graph: load commit-graph chains

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Prepare the logic for reading a chain of commit-graphs.

First, look for a file at $OBJDIR/info/commit-graph. If it exists,
then use that file and stop.

Next, look for the chain file at $OBJDIR/info/commit-graphs/commit-graph-chain.
If this file exists, then load the hash values as line-separated values in that
file and load $OBJDIR/info/commit-graphs/graph-{hash[i]}.graph for each hash[i]
in that file. The file is given in order, so the first hash corresponds to the
"base" file and the final hash corresponds to the "tip" file.

This implementation assumes that all of the graph-{hash}.graph files are in
the same object directory as the commit-graph-chain file. This will be updated
in a future change. This change is purposefully simple so we can isolate the
different concerns.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 112 ++---
 1 file changed, 106 insertions(+), 6 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 96b07a674e..f7dfc6aecd 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -45,6 +45,19 @@ char *get_commit_graph_filename(const char *obj_dir)
return xstrfmt("%s/info/commit-graph", obj_dir);
 }
 
+static char *get_split_graph_filename(const char *obj_dir,
+ const char *oid_hex)
+{
+   return xstrfmt("%s/info/commit-graphs/graph-%s.graph",
+  obj_dir,
+  oid_hex);
+}
+
+static char *get_chain_filename(const char *obj_dir)
+{
+   return xstrfmt("%s/info/commit-graphs/commit-graph-chain", obj_dir);
+}
+
 static uint8_t oid_version(void)
 {
return 1;
@@ -286,18 +299,105 @@ static struct commit_graph *load_commit_graph_one(const 
char *graph_file)
return load_commit_graph_one_fd_st(fd, &st);
 }
 
+static struct commit_graph *load_commit_graph_v1(struct repository *r, const 
char *obj_dir)
+{
+   char *graph_name = get_commit_graph_filename(obj_dir);
+   struct commit_graph *g = load_commit_graph_one(graph_name);
+   free(graph_name);
+
+   return g;
+}
+
+static int add_graph_to_chain(struct commit_graph *g,
+ struct commit_graph *chain,
+ struct object_id *oids,
+ int n)
+{
+   struct commit_graph *cur_g = chain;
+
+   while (n) {
+   n--;
+   cur_g = cur_g->base_graph;
+   }
+
+   g->base_graph = chain;
+
+   if (chain)
+   g->num_commits_in_base = chain->num_commits + 
chain->num_commits_in_base;
+
+   return 1;
+}
+
+static struct commit_graph *load_commit_graph_chain(struct repository *r, 
const char *obj_dir)
+{
+   struct commit_graph *graph_chain = NULL;
+   struct strbuf line = STRBUF_INIT;
+   struct stat st;
+   struct object_id *oids;
+   int i = 0, valid = 1, count;
+   char *chain_name = get_chain_filename(obj_dir);
+   FILE *fp;
+   int stat_res;
+
+   fp = fopen(chain_name, "r");
+   stat_res = stat(chain_name, &st);
+   free(chain_name);
+
+   if (!fp ||
+   stat_res ||
+   st.st_size <= the_hash_algo->hexsz)
+   return NULL;
+
+   count = st.st_size / (the_hash_algo->hexsz + 1);
+   oids = xcalloc(count, sizeof(struct object_id));
+
+   for (i = 0; i < count && valid; i++) {
+   char *graph_name;
+   struct commit_graph *g;
+
+   if (strbuf_getline_lf(&line, fp) == EOF)
+   break;
+
+   if (get_oid_hex(line.buf, &oids[i])) {
+   warning(_("invalid commit-graph chain: line '%s' not a 
hash"),
+   line.buf);
+   valid = 0;
+   break;
+   }
+
+   graph_name = get_split_graph_filename(obj_dir, line.buf);
+   g = load_commit_graph_one(graph_name);
+   free(graph_name);
+
+   if (g && add_graph_to_chain(g, graph_chain, oids, i))
+   graph_chain = g;
+   else
+   valid = 0;
+   }
+
+   free(oids);
+   fclose(fp);
+
+   return graph_chain;
+}
+
+static struct commit_graph *read_commit_graph_one(struct repository *r, const 
char *obj_dir)
+{
+   struct commit_graph *g = load_commit_graph_v1(r, obj_dir);
+
+   if (!g)
+   g = load_commit_graph_chain(r, obj_dir);
+
+   return g;
+}
+
 static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
 {
-   char *graph_name;
 
if (r->objects->commit_graph)
return;
 
-   graph_name = get_commit_graph_filename(obj_dir);
-   r->objects->commit_graph =
-   load_commit_graph_one(graph_name);
-
-   FREE_AND_NULL(graph_name);
+   r->objects->commit_graph = read_commit_graph_one(r, obj_dir);
 }
 
 /*
-- 
gitgitgadget



[PATCH v6 15/18] commit-graph: test octopus merges with --split

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Octopus merges require an extra chunk of data in the commit-graph
file format. Create a test that ensures the new --split option
continues to work with an octopus merge. Specifically, ensure
that the octopus merge has parents across layers to truly check
that our graph position logic holds up correctly.

Signed-off-by: Derrick Stolee 
---
 t/t5324-split-commit-graph.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index e8df35c30b..704def70bb 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -290,4 +290,15 @@ test_expect_success 'verify after commit-graph-chain 
corruption' '
)
 '
 
+test_expect_success 'add octopus merge' '
+   git reset --hard commits/10 &&
+   git merge commits/3 commits/4 &&
+   git branch merge/octopus &&
+   git commit-graph write --reachable --split &&
+   git commit-graph verify &&
+   test_line_count = 3 $graphdir/commit-graph-chain
+'
+
+graph_git_behavior 'graph exists' merge/octopus commits/12
+
 test_done
-- 
gitgitgadget



[PATCH v6 13/18] commit-graph: verify chains with --shallow mode

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

If we wrote a commit-graph chain, we only modified the tip file in
the chain. It is valuable to verify what we wrote, but not waste
time checking files we did not write.

Add a '--shallow' option to the 'git commit-graph verify' subcommand
and check that it does not read the base graph in a two-file chain.

Making the verify subcommand read from a chain of commit-graphs takes
some rearranging of the builtin code.

Signed-off-by: Derrick Stolee 
---
 Documentation/git-commit-graph.txt |  5 ++-
 builtin/commit-graph.c | 27 +
 commit-graph.c | 15 ++--
 commit-graph.h |  6 ++-
 t/t5324-split-commit-graph.sh  | 62 ++
 5 files changed, 101 insertions(+), 14 deletions(-)

diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
index 365e145e82..eb5e7865f0 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -10,7 +10,7 @@ SYNOPSIS
 
 [verse]
 'git commit-graph read' [--object-dir ]
-'git commit-graph verify' [--object-dir ]
+'git commit-graph verify' [--object-dir ] [--shallow]
 'git commit-graph write'  [--object-dir ]
 
 
@@ -80,6 +80,9 @@ Used for debugging purposes.
 
 Read the commit-graph file and verify its contents against the object
 database. Used to check for corrupted data.
++
+With the `--shallow` option, only check the tip commit-graph file in
+a chain of split commit-graphs.
 
 
 EXAMPLES
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 6c0b5b17e0..38027b83d9 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -5,17 +5,18 @@
 #include "parse-options.h"
 #include "repository.h"
 #include "commit-graph.h"
+#include "object-store.h"
 
 static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--object-dir ]"),
N_("git commit-graph read [--object-dir ]"),
-   N_("git commit-graph verify [--object-dir ]"),
+   N_("git commit-graph verify [--object-dir ] [--shallow]"),
N_("git commit-graph write [--object-dir ] [--append|--split] 
[--reachable|--stdin-packs|--stdin-commits] "),
NULL
 };
 
 static const char * const builtin_commit_graph_verify_usage[] = {
-   N_("git commit-graph verify [--object-dir ]"),
+   N_("git commit-graph verify [--object-dir ] [--shallow]"),
NULL
 };
 
@@ -36,6 +37,7 @@ static struct opts_commit_graph {
int stdin_commits;
int append;
int split;
+   int shallow;
 } opts;
 
 static int graph_verify(int argc, const char **argv)
@@ -45,11 +47,14 @@ static int graph_verify(int argc, const char **argv)
int open_ok;
int fd;
struct stat st;
+   int flags = 0;
 
static struct option builtin_commit_graph_verify_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir,
   N_("dir"),
   N_("The object directory to store the graph")),
+   OPT_BOOL(0, "shallow", &opts.shallow,
+N_("if the commit-graph is split, only verify the tip 
file")),
OPT_END(),
};
 
@@ -59,21 +64,27 @@ static int graph_verify(int argc, const char **argv)
 
if (!opts.obj_dir)
opts.obj_dir = get_object_directory();
+   if (opts.shallow)
+   flags |= COMMIT_GRAPH_VERIFY_SHALLOW;
 
graph_name = get_commit_graph_filename(opts.obj_dir);
open_ok = open_commit_graph(graph_name, &fd, &st);
-   if (!open_ok && errno == ENOENT)
-   return 0;
-   if (!open_ok)
+   if (!open_ok && errno != ENOENT)
die_errno(_("Could not open commit-graph '%s'"), graph_name);
-   graph = load_commit_graph_one_fd_st(fd, &st);
+
FREE_AND_NULL(graph_name);
 
+   if (open_ok)
+   graph = load_commit_graph_one_fd_st(fd, &st);
+else
+   graph = read_commit_graph_one(the_repository, opts.obj_dir);
+
+   /* Return failure if open_ok predicted success */
if (!graph)
-   return 1;
+   return !!open_ok;
 
UNLEAK(graph);
-   return verify_commit_graph(the_repository, graph);
+   return verify_commit_graph(the_repository, graph, flags);
 }
 
 static int graph_read(int argc, const char **argv)
diff --git a/commit-graph.c b/commit-graph.c
index 315088d205..f33f4fe009 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -428,7 +428,7 @@ static struct commit_graph *load_commit_graph_chain(struct 
repository *r, const
return graph_chain;
 }
 
-static struct commit_graph *read_commit_graph_one(s

[PATCH v6 08/18] commit-graph: add --split option to builtin

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Add a new "--split" option to the 'git commit-graph write' subcommand. This
option allows the optional behavior of writing a commit-graph chain.

The current behavior will add a tip commit-graph containing any commits that
are not in the existing commit-graph or commit-graph chain. Later changes
will allow merging the chain and expiring out-dated files.

Add a new test script (t5324-split-commit-graph.sh) that demonstrates this
behavior.

Helped-by: Johannes Schindelin 
Signed-off-by: Derrick Stolee 
---
 builtin/commit-graph.c|  10 ++-
 commit-graph.c|  14 ++--
 t/t5324-split-commit-graph.sh | 122 ++
 3 files changed, 138 insertions(+), 8 deletions(-)
 create mode 100755 t/t5324-split-commit-graph.sh

diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index d8efa5bab2..ff8bc3c8b9 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--object-dir ]"),
N_("git commit-graph read [--object-dir ]"),
N_("git commit-graph verify [--object-dir ]"),
-   N_("git commit-graph write [--object-dir ] [--append] 
[--reachable|--stdin-packs|--stdin-commits]"),
+   N_("git commit-graph write [--object-dir ] [--append|--split] 
[--reachable|--stdin-packs|--stdin-commits]"),
NULL
 };
 
@@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = 
{
 };
 
 static const char * const builtin_commit_graph_write_usage[] = {
-   N_("git commit-graph write [--object-dir ] [--append] 
[--reachable|--stdin-packs|--stdin-commits]"),
+   N_("git commit-graph write [--object-dir ] [--append|--split] 
[--reachable|--stdin-packs|--stdin-commits]"),
NULL
 };
 
@@ -35,9 +35,9 @@ static struct opts_commit_graph {
int stdin_packs;
int stdin_commits;
int append;
+   int split;
 } opts;
 
-
 static int graph_verify(int argc, const char **argv)
 {
struct commit_graph *graph = NULL;
@@ -156,6 +156,8 @@ static int graph_write(int argc, const char **argv)
N_("start walk at commits listed by stdin")),
OPT_BOOL(0, "append", &opts.append,
N_("include all commits already in the commit-graph 
file")),
+   OPT_BOOL(0, "split", &opts.split,
+   N_("allow writing an incremental commit-graph file")),
OPT_END(),
};
 
@@ -169,6 +171,8 @@ static int graph_write(int argc, const char **argv)
opts.obj_dir = get_object_directory();
if (opts.append)
flags |= COMMIT_GRAPH_APPEND;
+   if (opts.split)
+   flags |= COMMIT_GRAPH_SPLIT;
 
read_replace_refs = 0;
 
diff --git a/commit-graph.c b/commit-graph.c
index f0698b0599..1224309e5f 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1472,12 +1472,16 @@ static int write_commit_graph_file(struct 
write_commit_graph_context *ctx)
}
 
if (ctx->base_graph_name) {
-   result = rename(ctx->base_graph_name,
-   
ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]);
+   const char *dest = ctx->commit_graph_filenames_after[
+   ctx->num_commit_graphs_after - 
2];
 
-   if (result) {
-   error(_("failed to rename base commit-graph 
file"));
-   return -1;
+   if (strcmp(ctx->base_graph_name, dest)) {
+   result = rename(ctx->base_graph_name, dest);
+
+   if (result) {
+   error(_("failed to rename base 
commit-graph file"));
+   return -1;
+   }
}
} else {
char *graph_name = 
get_commit_graph_filename(ctx->obj_dir);
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
new file mode 100755
index 00..ccd24bd22b
--- /dev/null
+++ b/t/t5324-split-commit-graph.sh
@@ -0,0 +1,122 @@
+#!/bin/sh
+
+test_description='split commit graph'
+. ./test-lib.sh
+
+GIT_TEST_COMMIT_GRAPH=0
+
+test_expect_success 'setup repo' '
+   git init &&
+   git config core.commitGraph true &&
+   infodir=".git/objects/info" &&
+   graphdir="$infodir/commit-graphs" &&
+   test_oid_init
+'
+
+graph_read_expect() {
+   NUM_BASE=0
+   if test ! -z $2
+

[PATCH v6 07/18] commit-graph: write commit-graph chains

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Extend write_commit_graph() to write a commit-graph chain when given the
COMMIT_GRAPH_SPLIT flag.

This implementation is purposefully simplistic in how it creates a new
chain. The commits not already in the chain are added to a new tip
commit-graph file.

Much of the logic around writing a graph-{hash}.graph file and updating
the commit-graph-chain file is the same as the commit-graph file case.
However, there are several places where we need to do some extra logic
in the split case.

Track the list of graph filenames before and after the planned write.
This will be more important when we start merging graph files, but it
also allows us to upgrade our commit-graph file to the appropriate
graph-{hash}.graph file when we upgrade to a chain of commit-graphs.

Note that we use the eighth byte of the commit-graph header to store the
number of base graph files. This determines the length of the base
graphs chunk.

A subtle change of behavior with the new logic is that we do not write a
commit-graph if we our commit list is empty. This extends to the typical
case, which is reflected in t5318-commit-graph.sh.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c  | 286 ++--
 commit-graph.h  |   2 +
 t/t5318-commit-graph.sh |   2 +-
 3 files changed, 278 insertions(+), 12 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index f2163e109f..f0698b0599 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -300,12 +300,18 @@ static struct commit_graph *load_commit_graph_one(const 
char *graph_file)
 
struct stat st;
int fd;
+   struct commit_graph *g;
int open_ok = open_commit_graph(graph_file, &fd, &st);
 
if (!open_ok)
return NULL;
 
-   return load_commit_graph_one_fd_st(fd, &st);
+   g = load_commit_graph_one_fd_st(fd, &st);
+
+   if (g)
+   g->filename = xstrdup(graph_file);
+
+   return g;
 }
 
 static struct commit_graph *load_commit_graph_v1(struct repository *r, const 
char *obj_dir)
@@ -730,8 +736,19 @@ struct write_commit_graph_context {
struct progress *progress;
int progress_done;
uint64_t progress_cnt;
+
+   char *base_graph_name;
+   int num_commit_graphs_before;
+   int num_commit_graphs_after;
+   char **commit_graph_filenames_before;
+   char **commit_graph_filenames_after;
+   char **commit_graph_hash_after;
+   uint32_t new_num_commits_in_base;
+   struct commit_graph *new_base_graph;
+
unsigned append:1,
-report_progress:1;
+report_progress:1,
+split:1;
 };
 
 static void write_graph_chunk_fanout(struct hashfile *f,
@@ -801,6 +818,16 @@ static void write_graph_chunk_data(struct hashfile *f, int 
hash_len,
  ctx->commits.nr,
  commit_to_sha1);
 
+   if (edge_value >= 0)
+   edge_value += ctx->new_num_commits_in_base;
+   else {
+   uint32_t pos;
+   if (find_commit_in_graph(parent->item,
+ctx->new_base_graph,
+&pos))
+   edge_value = pos;
+   }
+
if (edge_value < 0)
BUG("missing parent %s for commit %s",
oid_to_hex(&parent->item->object.oid),
@@ -821,6 +848,17 @@ static void write_graph_chunk_data(struct hashfile *f, int 
hash_len,
  ctx->commits.list,
  ctx->commits.nr,
  commit_to_sha1);
+
+   if (edge_value >= 0)
+   edge_value += ctx->new_num_commits_in_base;
+   else {
+   uint32_t pos;
+   if (find_commit_in_graph(parent->item,
+ctx->new_base_graph,
+&pos))
+   edge_value = pos;
+   }
+
if (edge_value < 0)
BUG("missing parent %s for commit %s",
oid_to_hex(&parent->item->object.oid),
@@ -878,6 +916,16 @@ static void write_graph_chunk_extra_edges(struct hashfile 
*f,
  ctx->commits.nr,
  commit_to_sha1);
 
+   if (edge_value &

[PATCH v6 18/18] commit-graph: test verify across alternates

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The 'git commit-graph verify' subcommand loads a commit-graph from
a given object directory instead of using the standard method
prepare_commit_graph(). During development of load_commit_graph_chain(),
a version did not include prepare_alt_odb() as it was previously
run by prepare_commit_graph() in most cases.

Add a test that prevents that mistake from happening again.

Signed-off-by: Derrick Stolee 
---
 t/t5324-split-commit-graph.sh | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index fc0d00751c..03f45a1ed9 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -237,6 +237,7 @@ corrupt_file() {
file=$1
pos=$2
data="${3:-\0}"
+   chmod a+w "$file" &&
printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc
 }
 
@@ -295,6 +296,24 @@ test_expect_success 'verify after commit-graph-chain 
corruption' '
)
 '
 
+test_expect_success 'verify across alternates' '
+   git clone --no-hardlinks . verify-alt &&
+   (
+   cd verify-alt &&
+   rm -rf $graphdir &&
+   altdir="$(pwd)/../.git/objects" &&
+   echo "$altdir" >.git/objects/info/alternates &&
+   git commit-graph verify --object-dir="$altdir/" &&
+   test_commit extra &&
+   git commit-graph write --reachable --split &&
+   tip_file=$graphdir/graph-$(tail -n 1 
$graphdir/commit-graph-chain).graph &&
+   corrupt_file "$tip_file" 100 "\01" &&
+   test_must_fail git commit-graph verify --shallow 2>test_err &&
+   grep -v "^+" test_err >err &&
+   test_i18ngrep "commit-graph has incorrect fanout value" err
+   )
+'
+
 test_expect_success 'add octopus merge' '
git reset --hard commits/10 &&
git merge commits/3 commits/4 &&
-- 
gitgitgadget


[PATCH v6 06/18] commit-graph: rearrange chunk count logic

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The number of chunks in a commit-graph file can change depending on
whether we need the Extra Edges Chunk. We are going to add more optional
chunks, and it will be helpful to rearrange this logic around the chunk
count before doing so.

Specifically, we need to finalize the number of chunks before writing
the commit-graph header. Further, we also need to fill out the chunk
lookup table dynamically and using "num_chunks" as we add optional
chunks is useful for adding optional chunks in the future.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 35 +--
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 6f7961d071..f2163e109f 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1213,7 +1213,7 @@ static int write_commit_graph_file(struct 
write_commit_graph_context *ctx)
uint64_t chunk_offsets[5];
const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT;
-   int num_chunks = ctx->num_extra_edges ? 4 : 3;
+   int num_chunks = 3;
 
ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
if (safe_create_leading_directories(ctx->graph_name)) {
@@ -1226,27 +1226,34 @@ static int write_commit_graph_file(struct 
write_commit_graph_context *ctx)
hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR);
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
 
-   hashwrite_be32(f, GRAPH_SIGNATURE);
-
-   hashwrite_u8(f, GRAPH_VERSION);
-   hashwrite_u8(f, oid_version());
-   hashwrite_u8(f, num_chunks);
-   hashwrite_u8(f, 0); /* unused padding byte */
-
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
chunk_ids[2] = GRAPH_CHUNKID_DATA;
-   if (ctx->num_extra_edges)
-   chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES;
-   else
-   chunk_ids[3] = 0;
-   chunk_ids[4] = 0;
+   if (ctx->num_extra_edges) {
+   chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES;
+   num_chunks++;
+   }
+
+   chunk_ids[num_chunks] = 0;
 
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
-   chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges;
+
+   num_chunks = 3;
+   if (ctx->num_extra_edges) {
+   chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
+   4 * ctx->num_extra_edges;
+   num_chunks++;
+   }
+
+   hashwrite_be32(f, GRAPH_SIGNATURE);
+
+   hashwrite_u8(f, GRAPH_VERSION);
+   hashwrite_u8(f, oid_version());
+   hashwrite_u8(f, num_chunks);
+   hashwrite_u8(f, 0);
 
for (i = 0; i <= num_chunks; i++) {
uint32_t chunk_write[3];
-- 
gitgitgadget



[PATCH v6 09/18] commit-graph: merge commit-graph chains

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

When searching for a commit in a commit-graph chain of G graphs with N
commits, the search takes O(G log N) time. If we always add a new tip
graph with every write, the linear G term will start to dominate and
slow the lookup process.

To keep lookups fast, but also keep most incremental writes fast, create
a strategy for merging levels of the commit-graph chain. The strategy is
detailed in the commit-graph design document, but is summarized by these
two conditions:

  1. If the number of commits we are adding is more than half the number
 of commits in the graph below, then merge with that graph.

  2. If we are writing more than 64,000 commits into a single graph,
 then merge with all lower graphs.

The numeric values in the conditions above are currently constant, but
can become config options in a future update.

As we merge levels of the commit-graph chain, check that the commits
still exist in the repository. A garbage-collection operation may have
removed those commits from the object store and we do not want to
persist them in the commit-graph chain. This is a non-issue if the
'git gc' process wrote a new, single-level commit-graph file.

After we merge levels, the old graph-{hash}.graph files are no longer
referenced by the commit-graph-chain file. We will expire these files in
a future change.

Signed-off-by: Derrick Stolee 
---
 Documentation/technical/commit-graph.txt |  80 ++
 commit-graph.c   | 180 ++-
 t/t5324-split-commit-graph.sh|  13 ++
 3 files changed, 240 insertions(+), 33 deletions(-)

diff --git a/Documentation/technical/commit-graph.txt 
b/Documentation/technical/commit-graph.txt
index 1dca3bd8fe..d9c6253b0a 100644
--- a/Documentation/technical/commit-graph.txt
+++ b/Documentation/technical/commit-graph.txt
@@ -186,6 +186,86 @@ positions to refer to their parents, which may be in 
`graph-{hash1}.graph` or
 its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 +
 X2).
 
+Each commit-graph file (except the base, `graph-{hash0}.graph`) contains data
+specifying the hashes of all files in the lower layers. In the above example,
+`graph-{hash1}.graph` contains `{hash0}` while `graph-{hash2}.graph` contains
+`{hash0}` and `{hash1}`.
+
+## Merging commit-graph files
+
+If we only added a new commit-graph file on every write, we would run into a
+linear search problem through many commit-graph files.  Instead, we use a merge
+strategy to decide when the stack should collapse some number of levels.
+
+The diagram below shows such a collapse. As a set of new commits are added, it
+is determined by the merge strategy that the files should collapse to
+`graph-{hash1}`. Thus, the new commits, the commits in `graph-{hash2}` and
+the commits in `graph-{hash1}` should be combined into a new `graph-{hash3}`
+file.
+
+   +-+
+   | |
+   |(new commits)|
+   | |
+   +-+
+   | |
+ +---+  +-+
+ |  graph-{hash2} |->| |
+ +---+  +-+
+ | | |
+ +---+  +-+
+ |   |  | |
+ |  graph-{hash1} |->| |
+ |   |  | |
+ +---+  +-+
+ |  tmp_graphXXX
+ +---+
+ |   |
+ |   |
+ |   |
+ |  graph-{hash0} |
+ |   |
+ |   |
+ |   |
+ +---+
+
+During this process, the commits to write are combined, sorted and we write the
+contents to a temporary file, all while holding a `commit-graph-chain.lock`
+lock-file.  When the file is flushed, we rename it to `graph-{hash3}`
+according to the computed `{hash3}`. Finally, we write the new chain data to
+`commit-graph-chain.lock`:
+
+```
+   {hash3}
+   {hash0}
+```
+
+We then close the lock-file.
+
+## Merge Strategy
+
+When writing a set of commits that do not exist in the commit-graph stack of
+height N, we default to creating a new file at level N + 1. We then decide to
+merge with the Nth level if one of two conditions hold:
+
+  1. The expected file size for level N + 1 is at least half the file size for
+ level N.
+
+  2. Level N + 1 contains more than 64, commits.
+
+This decision cascades down the levels: when we merge a level we create a new
+set of commits that then compares to the next level.
+
+The first condition bounds the number of levels to be logarithmic in the total
+number 

[PATCH v6 02/18] commit-graph: prepare for commit-graph chains

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

To prepare for a chain of commit-graph files, augment the
commit_graph struct to point to a base commit_graph. As we load
commits from the graph, we may actually want to read from a base
file according to the graph position.

The "graph position" of a commit is given by concatenating the
lexicographic commit orders from each of the commit-graph files in
the chain. This means that we must distinguish two values:

 * lexicographic index : the position within the lexicographic
   order in a single commit-graph file.

 * graph position: the position within the concatenated order
   of multiple commit-graph files

Given the lexicographic index of a commit in a graph, we can
compute the graph position by adding the number of commits in
the lower-level graphs. To find the lexicographic index of
a commit, we subtract the number of commits in lower-level graphs.

While here, change insert_parent_or_die() to take a uint32_t
position, as that is the type used by its only caller and that
makes more sense with the limits in the commit-graph format.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 89 +++---
 commit-graph.h |  3 ++
 2 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 76d189de45..8f5c09363c 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -359,9 +359,18 @@ int generation_numbers_enabled(struct repository *r)
return !!first_generation;
 }
 
+static void close_commit_graph_one(struct commit_graph *g)
+{
+   if (!g)
+   return;
+
+   close_commit_graph_one(g->base_graph);
+   free_commit_graph(g);
+}
+
 void close_commit_graph(struct raw_object_store *o)
 {
-   free_commit_graph(o->commit_graph);
+   close_commit_graph_one(o->commit_graph);
o->commit_graph = NULL;
 }
 
@@ -371,18 +380,38 @@ static int bsearch_graph(struct commit_graph *g, struct 
object_id *oid, uint32_t
g->chunk_oid_lookup, g->hash_len, pos);
 }
 
+static void load_oid_from_graph(struct commit_graph *g,
+   uint32_t pos,
+   struct object_id *oid)
+{
+   uint32_t lex_index;
+
+   while (g && pos < g->num_commits_in_base)
+   g = g->base_graph;
+
+   if (!g)
+   BUG("NULL commit-graph");
+
+   if (pos >= g->num_commits + g->num_commits_in_base)
+   die(_("invalid commit position. commit-graph is likely 
corrupt"));
+
+   lex_index = pos - g->num_commits_in_base;
+
+   hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index);
+}
+
 static struct commit_list **insert_parent_or_die(struct repository *r,
 struct commit_graph *g,
-uint64_t pos,
+uint32_t pos,
 struct commit_list **pptr)
 {
struct commit *c;
struct object_id oid;
 
-   if (pos >= g->num_commits)
-   die("invalid parent position %"PRIu64, pos);
+   if (pos >= g->num_commits + g->num_commits_in_base)
+   die("invalid parent position %"PRIu32, pos);
 
-   hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos);
+   load_oid_from_graph(g, pos, &oid);
c = lookup_commit(r, &oid);
if (!c)
die(_("could not find commit %s"), oid_to_hex(&oid));
@@ -392,7 +421,14 @@ static struct commit_list **insert_parent_or_die(struct 
repository *r,
 
 static void fill_commit_graph_info(struct commit *item, struct commit_graph 
*g, uint32_t pos)
 {
-   const unsigned char *commit_data = g->chunk_commit_data + 
GRAPH_DATA_WIDTH * pos;
+   const unsigned char *commit_data;
+   uint32_t lex_index;
+
+   while (pos < g->num_commits_in_base)
+   g = g->base_graph;
+
+   lex_index = pos - g->num_commits_in_base;
+   commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
item->graph_pos = pos;
item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
 }
@@ -405,10 +441,25 @@ static int fill_commit_in_graph(struct repository *r,
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
struct commit_list **pptr;
-   const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len 
+ 16) * pos;
+   const unsigned char *commit_data;
+   uint32_t lex_index;
 
-   item->object.parsed = 1;
+   while (pos < g->num_commits_in_base)
+   g = g->base_graph;
+
+   if (pos >= g->num_commits + g->num_commits_in_base)
+   die(_("invalid commit position. commit-graph is l

[PATCH v6 00/18] Commit-graph: Write incremental files

2019-06-18 Thread Derrick Stolee via GitGitGadget
This version is now ready for review.

The commit-graph is a valuable performance feature for repos with large
commit histories, but suffers from the same problem as git repack: it
rewrites the entire file every time. This can be slow when there are
millions of commits, especially after we stopped reading from the
commit-graph file during a write in 43d3561 (commit-graph write: don't die
if the existing graph is corrupt).

Instead, create a "chain" of commit-graphs in the
.git/objects/info/commit-graphs folder with name graph-{hash}.graph. The
list of hashes is given by the commit-graph-chain file, and also in a "base
graph chunk" in the commit-graph format. As we read a chain, we can verify
that the hashes match the trailing hash of each commit-graph we read along
the way and each hash below a level is expected by that graph file.

When writing, we don't always want to add a new level to the stack. This
would eventually result in performance degradation, especially when
searching for a commit (before we know its graph position). We decide to
merge levels of the stack when the new commits we will write is less than
half of the commits in the level above. This can be tweaked by the
--size-multiple and --max-commits options.

The performance is necessarily amortized across multiple writes, so I tested
by writing commit-graphs from the (non-rc) tags in the Linux repo. My test
included 72 tags, and wrote everything reachable from the tag using 
--stdin-commits. Here are the overall perf numbers:

write --stdin-commits: 8m 12s
write --stdin-commits --split:28s
write --split && verify --shallow: 60s

Updates in V3:

 * git commit-graph verify now works on commit-graph chains. We do a simple
   test to check the behavior of a new --shallow option.
   
   
 * When someone writes a flat commit-graph, we now expire the old chain
   according to the expire time.
   
   
 * The "max commits" limit is no longer enabled by default, but instead is
   enabled by a --max-commits= option. Ignored if n=0.
   
   

Updates in V4:

Johannes pointed out some test failures on the Windows platform. We found
that the tests were not running on Windows in the gitgitgadget PR builds,
which is now resolved.

 * We need to close commit-graphs recursively down the chain. This prevented
   an unlink() from working because of an open handle.
   
   
 * Creating the alternates file used a path-specification that didn't work
   on Windows.
   
   
 * Renaming a file to the same name failed, but is probably related to the
   unlink() error mentioned above.
   
   

Updates in V5:

 * Responding to multiple items of feedback. Thanks Philip, Junio, and
   Ramsay!
   
   
 * Used the test coverage report to find holes in the test coverage. While
   adding tests, I found a bug in octopus merges. The fix is in the rewrite
   of "deduplicate_commits()" as "sort_and_scan_merged_commits()" and
   covered by the new tests.
   
   

Updates in V6:

 * Rebased onto ds/close-object-store and resolved conflicts around
   close_commit_graph().
   
   
 * Updated path normalization to be resilient to double-slashes and trailing
   slashes.
   
   
 * Added a prepare_alt_odb() call in load_commit_graph_one() for
   cross-alternate graph loads during 'verify' subcommands.
   
   

Thanks, -Stolee

[1] 
https://github.com/git/git/commit/43d356180556180b4ef6ac232a14498a5bb2b446
commit-graph write: don't die if the existing graph is corrupt

Derrick Stolee (18):
  commit-graph: document commit-graph chains
  commit-graph: prepare for commit-graph chains
  commit-graph: rename commit_compare to oid_compare
  commit-graph: load commit-graph chains
  commit-graph: add base graphs chunk
  commit-graph: rearrange chunk count logic
  commit-graph: write commit-graph chains
  commit-graph: add --split option to builtin
  commit-graph: merge commit-graph chains
  commit-graph: allow cross-alternate chains
  commit-graph: expire commit-graph files
  commit-graph: create options for split files
  commit-graph: verify chains with --shallow mode
  commit-graph: clean up chains after flattened write
  commit-graph: test octopus merges with --split
  commit-graph: test --split across alternate without --split
  commit-graph: normalize commit-graph filenames
  commit-graph: test verify across alternates

 Documentation/git-commit-graph.txt|  26 +-
 .../technical/commit-graph-format.txt |  11 +-
 Documentation/technical/commit-graph.txt  | 195 +
 builtin/commit-graph.c|  58 +-
 builtin/commit.c  |   2 +-
 builtin/gc.c  |   3 +-
 commit-graph.c| 823 --
 commit-graph.h|  25 +-
 t/t5318-commit-graph.sh   |   2 +-
 t/t5324-split-commit-graph.sh | 343 +++

[PATCH v6 01/18] commit-graph: document commit-graph chains

2019-06-18 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

Add a basic description of commit-graph chains. More details about the
feature will be added as we add functionality. This introduction gives a
high-level overview to the goals of the feature and the basic layout of
commit-graph chains.

Signed-off-by: Derrick Stolee 
---
 Documentation/technical/commit-graph.txt | 59 
 1 file changed, 59 insertions(+)

diff --git a/Documentation/technical/commit-graph.txt 
b/Documentation/technical/commit-graph.txt
index fb53341d5e..1dca3bd8fe 100644
--- a/Documentation/technical/commit-graph.txt
+++ b/Documentation/technical/commit-graph.txt
@@ -127,6 +127,65 @@ Design Details
   helpful for these clones, anyway. The commit-graph will not be read or
   written when shallow commits are present.
 
+Commit Graphs Chains
+
+
+Typically, repos grow with near-constant velocity (commits per day). Over time,
+the number of commits added by a fetch operation is much smaller than the
+number of commits in the full history. By creating a "chain" of commit-graphs,
+we enable fast writes of new commit data without rewriting the entire commit
+history -- at least, most of the time.
+
+## File Layout
+
+A commit-graph chain uses multiple files, and we use a fixed naming convention
+to organize these files. Each commit-graph file has a name
+`$OBJDIR/info/commit-graphs/graph-{hash}.graph` where `{hash}` is the hex-
+valued hash stored in the footer of that file (which is a hash of the file's
+contents before that hash). For a chain of commit-graph files, a plain-text
+file at `$OBJDIR/info/commit-graphs/commit-graph-chain` contains the
+hashes for the files in order from "lowest" to "highest".
+
+For example, if the `commit-graph-chain` file contains the lines
+
+```
+   {hash0}
+   {hash1}
+   {hash2}
+```
+
+then the commit-graph chain looks like the following diagram:
+
+ +---+
+ |  graph-{hash2}.graph  |
+ +---+
+ |
+ +---+
+ |   |
+ |  graph-{hash1}.graph  |
+ |   |
+ +---+
+ |
+ +---+
+ |   |
+ |   |
+ |   |
+ |  graph-{hash0}.graph  |
+ |   |
+ |   |
+ |   |
+ +---+
+
+Let X0 be the number of commits in `graph-{hash0}.graph`, X1 be the number of
+commits in `graph-{hash1}.graph`, and X2 be the number of commits in
+`graph-{hash2}.graph`. If a commit appears in position i in 
`graph-{hash2}.graph`,
+then we interpret this as being the commit in position (X0 + X1 + i), and that
+will be used as its "graph position". The commits in `graph-{hash2}.graph` use 
these
+positions to refer to their parents, which may be in `graph-{hash1}.graph` or
+`graph-{hash0}.graph`. We can navigate to an arbitrary commit in position j by 
checking
+its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 +
+X2).
+
 Related Links
 -
 [0] https://bugs.chromium.org/p/git/issues/detail?id=8
-- 
gitgitgadget



ds/commit-graph-incremental (was Re: What's cooking in git.git (Jun 2019, #04; Fri, 14))

2019-06-18 Thread Derrick Stolee
On 6/14/2019 4:50 PM, Junio C Hamano wrote:
> * ds/commit-graph-incremental (2019-06-12) 16 commits
>  - commit-graph: test --split across alternate without --split
>  - commit-graph: test octopus merges with --split
>  - commit-graph: clean up chains after flattened write
>  - commit-graph: verify chains with --shallow mode
>  - commit-graph: create options for split files
>  - commit-graph: expire commit-graph files
>  - commit-graph: allow cross-alternate chains
>  - commit-graph: merge commit-graph chains
>  - commit-graph: add --split option to builtin
>  - commit-graph: write commit-graph chains
>  - commit-graph: rearrange chunk count logic
>  - commit-graph: add base graphs chunk
>  - commit-graph: load commit-graph chains
>  - commit-graph: rename commit_compare to oid_compare
>  - commit-graph: prepare for commit-graph chains
>  - commit-graph: document commit-graph chains
>  (this branch uses ds/commit-graph-write-refactor; is tangled with 
> ds/close-object-store.)
> 
>  The commits in a repository can be described by multiple
>  commit-graph files now, which allows the commit-graph files to be
>  updated incrementally.
> 
>  Will merge to 'next'.

Please hold on this one. I've found multiple issues while integrating
this with VFS for Git and there are enough to merit a full re-roll.
Please ignore the two patches I sent yesterday as I will incorporate them
into the next version of this series.

Thanks,
-Stolee


[PATCH v2] commit-graph: normalize commit-graph filenames

2019-06-17 Thread Derrick Stolee
When writing commit-graph files, we append path data to an
object directory, which may be specified by the user via the
'--object-dir' option. If the user supplies a trailing slash,
or some other alternative path format, the resulting path may
be usable for writing to the correct location. However, when
expiring graph files from the /info/commit-graphs
directory during a write, we need to compare paths with exact
string matches.

Normalize the commit-graph filenames to avoid ambiguity. This
creates extra allocations, but this is a constant multiple of
the number of commit-graph files, which should be a number in
the single digits.

Further normalize the object directory in the context. Due to
a comparison between g->obj_dir and ctx->obj_dir in
split_graph_merge_strategy(), a trailing slash would prevent
any merging of layers within the same object directory. The
check is there to ensure we do not merge across alternates.
Update the tests to include a case with this trailing slash
problem.

Signed-off-by: Derrick Stolee 
---

Sorry for the previously-bad patch. This one I believe to
be a full correction, and also includes a test to ensure
we do not make this mistake again.

Thanks,
-Stolee

 commit-graph.c | 36 
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 8842f93910..2836a6fc3d 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -43,15 +43,23 @@
 
 char *get_commit_graph_filename(const char *obj_dir)
 {
-   return xstrfmt("%s/info/commit-graph", obj_dir);
+   char *filename = xstrfmt("%s/info/commit-graph", obj_dir);
+   char *normalized = xmalloc(strlen(filename) + 1);
+   normalize_path_copy(normalized, filename);
+   free(filename);
+   return normalized;
 }
 
 static char *get_split_graph_filename(const char *obj_dir,
  const char *oid_hex)
 {
-   return xstrfmt("%s/info/commit-graphs/graph-%s.graph",
-  obj_dir,
-  oid_hex);
+   char *filename = xstrfmt("%s/info/commit-graphs/graph-%s.graph",
+obj_dir,
+oid_hex);
+   char *normalized = xmalloc(strlen(filename) + 1);
+   normalize_path_copy(normalized, filename);
+   free(filename);
+   return normalized;
 }
 
 static char *get_chain_filename(const char *obj_dir)
@@ -744,7 +752,7 @@ struct packed_oid_list {
 
 struct write_commit_graph_context {
struct repository *r;
-   const char *obj_dir;
+   char *obj_dir;
char *graph_name;
struct packed_oid_list oids;
struct packed_commit_list commits;
@@ -1693,7 +1701,11 @@ static void expire_commit_graphs(struct 
write_commit_graph_context *ctx)
}
 
strbuf_addstr(&path, ctx->obj_dir);
-   strbuf_addstr(&path, "/info/commit-graphs");
+
+   if (path.buf[path.len - 1] != '/')
+   strbuf_addch(&path, '/');
+
+   strbuf_addstr(&path, "info/commit-graphs");
dir = opendir(path.buf);
 
if (!dir) {
@@ -1727,7 +1739,6 @@ static void expire_commit_graphs(struct 
write_commit_graph_context *ctx)
 
if (!found)
unlink(path.buf);
-
}
 }
 
@@ -1739,6 +1750,7 @@ int write_commit_graph(const char *obj_dir,
 {
struct write_commit_graph_context *ctx;
uint32_t i, count_distinct = 0;
+   size_t len;
int res = 0;
 
if (!commit_graph_compatible(the_repository))
@@ -1746,7 +1758,14 @@ int write_commit_graph(const char *obj_dir,
 
ctx = xcalloc(1, sizeof(struct write_commit_graph_context));
ctx->r = the_repository;
-   ctx->obj_dir = obj_dir;
+
+   /* normalize object dir with no trailing slash */
+   ctx->obj_dir = xmallocz(strlen(obj_dir) + 1);
+   normalize_path_copy(ctx->obj_dir, obj_dir);
+   len = strlen(ctx->obj_dir);
+   if (len && ctx->obj_dir[len - 1] == '/')
+   ctx->obj_dir[len - 1] = 0;
+
ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0;
ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0;
ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0;
@@ -1854,6 +1873,7 @@ int write_commit_graph(const char *obj_dir,
free(ctx->graph_name);
free(ctx->commits.list);
free(ctx->oids.list);
+   free(ctx->obj_dir);
 
if (ctx->commit_graph_filenames_after) {
for (i = 0; i < ctx->num_commit_graphs_after; i++) {
-- 
2.22.0.430.g4f3aec613b



Re: [PATCH] commit-graph: normalize commit-graph filenames

2019-06-17 Thread Derrick Stolee
On 6/17/2019 11:02 AM, Derrick Stolee wrote:
> - strbuf_setlen(&path, dirnamelen);
> - strbuf_addstr(&path, de->d_name);
> + char *filename = get_split_graph_filename(path.buf, de->d_name);

Please ignore this patch for now, as this line is incredibly stupid and
breaks the tests that I somehow forgot to run before submitting.

Thanks,
-Stolee




[PATCH] commit-graph: normalize commit-graph filenames

2019-06-17 Thread Derrick Stolee
When writing commit-graph files, we append path data to an
object directory, which may be specified by the user via the
'--object-dir' option. If the user supplies a trailing slash,
or some other alternative path format, the resulting path may
be usable for writing to the correct location. However, when
expiring graph files from the /info/commit-graphs
directory during a write, we need to compare paths with exact
string matches.

Normalize the commit-graph filenames to avoid ambiguity. This
creates extra allocations, but this is a constant multiple of
the number of commit-graph files, which should be a number in
the single digits.

To complete the effectiveness, we need to use the filename
methods when iterating over the info/commit-graphs directory
instead of constructing the paths manually.

Signed-off-by: Derrick Stolee 
---

Junio,

I noticed the need for this patch while integrating the
split commit-graph series into VFS for Git, since our
C# code was storing the alternates directory with a trailing
slash, causing some confusion in the expire logic.

This could be added to ds/commit-graph-incremental (and I
could add it to a future revision, if necessary) or could
be a new branch on top of that series.

Thanks,
-Stolee

 commit-graph.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 8842f93910..e0f3e8a954 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -43,15 +43,23 @@
 
 char *get_commit_graph_filename(const char *obj_dir)
 {
-   return xstrfmt("%s/info/commit-graph", obj_dir);
+   char *filename = xstrfmt("%s/info/commit-graph", obj_dir);
+   char *normalized = xmalloc(strlen(filename) + 1);
+   normalize_path_copy(normalized, filename);
+   free(filename);
+   return normalized;
 }
 
 static char *get_split_graph_filename(const char *obj_dir,
  const char *oid_hex)
 {
-   return xstrfmt("%s/info/commit-graphs/graph-%s.graph",
-  obj_dir,
-  oid_hex);
+   char *filename = xstrfmt("%s/info/commit-graphs/graph-%s.graph",
+obj_dir,
+oid_hex);
+   char *normalized = xmalloc(strlen(filename) + 1);
+   normalize_path_copy(normalized, filename);
+   free(filename);
+   return normalized;
 }
 
 static char *get_chain_filename(const char *obj_dir)
@@ -1680,7 +1688,6 @@ static void expire_commit_graphs(struct 
write_commit_graph_context *ctx)
struct strbuf path = STRBUF_INIT;
DIR *dir;
struct dirent *de;
-   size_t dirnamelen;
timestamp_t expire_time = time(NULL);
 
if (ctx->split_opts && ctx->split_opts->expire_time)
@@ -1701,16 +1708,13 @@ static void expire_commit_graphs(struct 
write_commit_graph_context *ctx)
return;
}
 
-   strbuf_addch(&path, '/');
-   dirnamelen = path.len;
while ((de = readdir(dir)) != NULL) {
struct stat st;
uint32_t i, found = 0;
 
-   strbuf_setlen(&path, dirnamelen);
-   strbuf_addstr(&path, de->d_name);
+   char *filename = get_split_graph_filename(path.buf, de->d_name);
 
-   stat(path.buf, &st);
+   stat(filename, &st);
 
if (st.st_mtime > expire_time)
continue;
@@ -1719,15 +1723,16 @@ static void expire_commit_graphs(struct 
write_commit_graph_context *ctx)
 
for (i = 0; i < ctx->num_commit_graphs_after; i++) {
if (!strcmp(ctx->commit_graph_filenames_after[i],
-   path.buf)) {
+   filename)) {
found = 1;
break;
}
}
 
if (!found)
-   unlink(path.buf);
+   unlink(filename);
 
+   free(filename);
}
 }
 
-- 
2.22.0.430.g4f3aec613b



Re: [PATCH] fsmonitor: avoid signed integer overflow / infinite loop

2019-06-16 Thread Derrick Stolee
On 6/15/2019 12:11 PM, Carlo Marcelo Arenas Belón wrote:
> 883e248b8a ("fsmonitor: teach git to optionally utilize a file system
> monitor to speed up detecting new or changed files.", 2017-09-22) uses
> an int in a loop that would wrap if index_state->cache_nr (unsigned)
> is bigger than INT_MAX

Thanks for catching this. I wonder if there is a compiler setting or
static analysis that caught this so we can avoid the issue in the future.

Also, INT_MAX is ~2.1 billion, and the largest indexes I know about have
around 4 million entries, so it is unlikely that you hit this as a runtime
error. Still worth fixing!

Thanks,
-Stolee


Re: [PATCH 2/2] use COPY_ARRAY for copying arrays

2019-06-16 Thread Derrick Stolee
On 6/15/2019 2:36 PM, René Scharfe wrote:
> Convert calls of memcpy(3) to use COPY_ARRAY, which shortens and
> simplifies the code a bit.

These changes do look simpler. Thanks!

> Patch generated by Coccinelle and contrib/coccinelle/array.cocci.

And this auto-generation is particularly useful!

> 
> Signed-off-by: Rene Scharfe 
> ---
>  fast-import.c | 2 +-
>  kwset.c   | 2 +-
>  packfile.c| 6 +++---
>  pretty.c  | 4 ++--
>  4 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/kwset.c b/kwset.c
> index 4fb6455aca..090ffcafa2 100644
> --- a/kwset.c
> +++ b/kwset.c
> @@ -475,7 +475,7 @@ kwsprep (kwset_t kws)
>   for (i = 0; i < NCHAR; ++i)
> kwset->next[i] = next[U(trans[i])];
>else
> - memcpy(kwset->next, next, NCHAR * sizeof(struct trie *));
> + COPY_ARRAY(kwset->next, next, NCHAR);

I was unfamiliar with kwset.c, so I took a look and noticed that
it was adapted from GNU grep, so this change takes us even farther
from the original source (and closer to Git source conventions).

I think this is the right thing to do, but thought I'd point it
out in case anyone thought differently.

Both patches LGTM.

Thanks,
-Stolee


Git Test Coverage Report (Friday, June 14)

2019-06-14 Thread Derrick Stolee
"));
5e5c46e6 1047) continue;
ef5b83f2 1169) error(_("error opening index for %s"), packname.buf);
ef5b83f2 1170) return -1;
4c9efe85 1209) continue;
b2c83060 1232) display_progress(ctx->progress, ctx->approx_nr_objects);
5e5c46e6 1321) error(_("failed to write correct number of base graph ids"));
5e5c46e6 1322) return -1;
238def57 1354) error(_("unable to create leading directories of %s"),
238def57 1356) return -1;
5e5c46e6 1366) error(_("unable to create '%s'"), ctx->graph_name);
5e5c46e6 1367) return -1;
5e5c46e6 1441) return -1;
5e5c46e6 1467) error(_("unable to open commit-graph chain file"));
5e5c46e6 1468) return -1;
efb38519 1479) error(_("failed to rename base commit-graph file"));
5e5c46e6 1499) error(_("failed to rename temporary commit-graph file"));
5e5c46e6 1500) return -1;
97e2eb8d 1529) break;
97e2eb8d 1544) ctx->num_commit_graphs_after = 1;
97e2eb8d 1545) ctx->new_base_graph = NULL;
857973d4 1621) die(_("unexpected duplicate commit id %s"),
857973d4 1622) oid_to_hex(&ctx->commits.list[i]->object.oid));
e2cd6bfb 1789) ctx->oids.alloc = split_opts->max_commits;
e103f727 1825) error(_("the commit graph format cannot write %d commits"), 
count_distinct);
e103f727 1826) res = -1;
e103f727 1827) goto cleanup;
e103f727 1836) error(_("too many commits to write graph"));
e103f727 1837) res = -1;
e103f727 1838) goto cleanup;

config.c
07b2c0ea 283) return 0;

kwset.c
08e04506 45) BUG("Cannot allocate a negative amount: %ld", size);

midx.c
d01bf2e6 478) close_pack(packs->info[packs->nr].p);
d01bf2e6 479) FREE_AND_NULL(packs->info[packs->nr].p);
19575c7c 738) BUG("object %s is in an expired pack with int-id %d",
19575c7c 865) error(_("did not see pack-file %s to drop"),
19575c7c 867) drop_index++;
19575c7c 868) missing_drops++;
19575c7c 869) i--;
19575c7c 876) result = 1;
19575c7c 877) goto cleanup;
19575c7c 1194) return 0;
19575c7c 1209) continue;
ce1e4a10 1248) return 0;
ce1e4a10 1275) continue;
ce1e4a10 1295) continue;
ce1e4a10 1297) continue;
ce1e4a10 1329) return 0;
ce1e4a10 1350) error(_("could not start pack-objects"));
ce1e4a10 1351) result = 1;
ce1e4a10 1352) goto cleanup;
ce1e4a10 1369) error(_("could not finish pack-objects"));
ce1e4a10 1370) result = 1;
ce1e4a10 1371) goto cleanup;

name-hash.c
568a05c5 348) assert(begin >= 0);
568a05c5 350) int mid = begin + ((end - begin) >> 1);

oidmap.c
3ff9c400 42) hashmap_entry_init(&entry, sha1hash(key->hash));

packfile.c
8434e85d 372) strbuf_release(&buf);
8434e85d 373) return;

pager.c
0ec9e491 197) fputs("\r\033[K", stderr);

progress.c
70d07115 121) fprintf(stderr, "  %s%s", counters_sb->buf,
70d07115 127) fprintf(stderr, "%s:\n  %s%s",

read-cache.c
ee70c128 1723) if (advice_unknown_index_extension) {
ee70c128 1724) warning(_("ignoring optional %.4s index extension"), ext);
ee70c128 1725) advise(_("This is likely due to the file having been written by 
a newer\n"

ref-filter.c
2582083f 93) keydata_aka_refname ? keydata_aka_refname : k->wt->head_ref);

sequencer.c
37e9ee5c 293) ret = -1;
37e9ee5c 311) ret = error(_("could not remove '%s'"), buf.buf);

sh-i18n--envsubst.c
568a05c5 252)   size_t j = j1 + ((j2 - j1) >> 1);

t/helper/test-oidmap.c
8b7e641f 60) if (get_oid(p1, &oid)) {
8b7e641f 61) printf("Unknown oid: %s\n", p1);
8b7e641f 62) continue;
8b7e641f 66) FLEX_ALLOC_STR(entry, name, p2);
8b7e641f 67) oidcpy(&entry->entry.oid, &oid);
8b7e641f 70) oidmap_put(&map, entry);
8b7e641f 105) if (get_oid(p1, &oid)) {
8b7e641f 106) printf("Unknown oid: %s\n", p1);
8b7e641f 107) continue;
8b7e641f 111) entry = oidmap_remove(&map, &oid);
8b7e641f 114) puts(entry ? entry->name : "NULL");
8b7e641f 115) free(entry);

Commits introducting uncovered code:
Christian Couder    8b7e641f t/helper: add test-oidmap.c
Christian Couder3ff9c400 oidmap: use sha1hash() instead of static 
hash() function
Denton Liu  526c03b5 rebase: refactor can_fast_forward into goto tower
Denton Liu  10572de1 rebase: fast-forward --onto in more cases
Denton Liu  07b2c0ea config: learn the "onbranch:" includeIf condition
Derrick Stolee  19575c7c multi-pack-index: implement 'expire' subcommand
Derrick Stolee  8434e85d repack: refactor pack deletion for future use
Derrick Stolee  ce1e4a10 midx: implement midx_repack()
Derrick Stolee  d01bf2e6 midx: refactor permutation logic and pack sorting
Derrick Stolee  e103f727 commit-graph: return with errors during write
Derrick Stolee  e2cd6bfb commit-graph: create options for split files
Derrick Stolee  857973d4 commit-graph: merge commit-graph chains
Derrick Stolee  97e2eb8d commit-graph: allow cross-alternate chains
Derri

Re: [PATCH] cleanup: fix possible overflow errors in binary search, part 2

2019-06-13 Thread Derrick Stolee
On 6/13/2019 1:51 PM, René Scharfe wrote:
> Calculating the sum of two array indexes to find the midpoint between
> them can overflow, i.e. code like this is unsafe for big arrays:
> 
>   mid = (first + last) >> 1;
> 
> Make sure the intermediate value stays within the boundaries instead,
> like this:
> 
>   mid = first + ((last - first) >> 1);
> 
> The loop condition of the binary search makes sure that 'last' is
> always greater than 'first', so this is safe as long as 'first' is
> not negative.  And that can be verified easily using the pre-context
> of each change, except for name-hash.c, so add an assertion to that
> effect there.
> 
> The unsafe calculations were found with:
> 
>   git grep '(.*+.*) *>> *1'
> 
> This is a continuation of 19716b21a4 (cleanup: fix possible overflow
> errors in binary search, 2017-10-08).

Thank you for finding these additional examples!

LGTM. Pretty easy to see this is correct.

-Stolee



[PATCH v5 09/11] commit-graph: extract count_distinct_commits()

2019-06-12 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The write_commit_graph() method is too complex, so we are
extracting helper functions one by one.

Extract count_distinct_commits(), which sorts the oids list, then
iterates through to find duplicates.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 61cb43ddf8..1a0a875a7b 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -963,6 +963,27 @@ static void fill_oids_from_all_packs(struct 
write_commit_graph_context *ctx)
stop_progress(&ctx->progress);
 }
 
+static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
+{
+   uint32_t i, count_distinct = 1;
+
+   if (ctx->report_progress)
+   ctx->progress = start_delayed_progress(
+   _("Counting distinct commits in commit graph"),
+   ctx->oids.nr);
+   display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
+   QSORT(ctx->oids.list, ctx->oids.nr, commit_compare);
+
+   for (i = 1; i < ctx->oids.nr; i++) {
+   display_progress(ctx->progress, i + 1);
+   if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
+   count_distinct++;
+   }
+   stop_progress(&ctx->progress);
+
+   return count_distinct;
+}
+
 int write_commit_graph(const char *obj_dir,
   struct string_list *pack_indexes,
   struct string_list *commit_hex,
@@ -1024,19 +1045,7 @@ int write_commit_graph(const char *obj_dir,
 
close_reachable(ctx);
 
-   if (ctx->report_progress)
-   ctx->progress = start_delayed_progress(
-   _("Counting distinct commits in commit graph"),
-   ctx->oids.nr);
-   display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
-   QSORT(ctx->oids.list, ctx->oids.nr, commit_compare);
-   count_distinct = 1;
-   for (i = 1; i < ctx->oids.nr; i++) {
-   display_progress(ctx->progress, i + 1);
-   if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
-   count_distinct++;
-   }
-   stop_progress(&ctx->progress);
+   count_distinct = count_distinct_commits(ctx);
 
if (count_distinct >= GRAPH_EDGE_LAST_MASK) {
error(_("the commit graph format cannot write %d commits"), 
count_distinct);
-- 
gitgitgadget



[PATCH v5 11/11] commit-graph: extract write_commit_graph_file()

2019-06-12 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The write_commit_graph() method is too complex, so we are
extracting helper functions one by one.

Extract write_commit_graph_file() that takes all of the information
in the context struct and writes the data to a commit-graph file.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 155 +
 1 file changed, 80 insertions(+), 75 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 72f9c5c7e2..9d2c72f5b4 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1015,21 +1015,91 @@ static void copy_oids_to_commits(struct 
write_commit_graph_context *ctx)
stop_progress(&ctx->progress);
 }
 
-int write_commit_graph(const char *obj_dir,
-  struct string_list *pack_indexes,
-  struct string_list *commit_hex,
-  unsigned int flags)
+static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 {
-   struct write_commit_graph_context *ctx;
+   uint32_t i;
struct hashfile *f;
-   uint32_t i, count_distinct = 0;
-   char *graph_name = NULL;
struct lock_file lk = LOCK_INIT;
uint32_t chunk_ids[5];
uint64_t chunk_offsets[5];
-   int num_chunks;
const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT;
+   int num_chunks = ctx->num_extra_edges ? 4 : 3;
+
+   ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
+   if (safe_create_leading_directories(ctx->graph_name)) {
+   UNLEAK(ctx->graph_name);
+   error(_("unable to create leading directories of %s"),
+   ctx->graph_name);
+   return -1;
+   }
+
+   hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR);
+   f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
+
+   hashwrite_be32(f, GRAPH_SIGNATURE);
+
+   hashwrite_u8(f, GRAPH_VERSION);
+   hashwrite_u8(f, oid_version());
+   hashwrite_u8(f, num_chunks);
+   hashwrite_u8(f, 0); /* unused padding byte */
+
+   chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
+   chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
+   chunk_ids[2] = GRAPH_CHUNKID_DATA;
+   if (ctx->num_extra_edges)
+   chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES;
+   else
+   chunk_ids[3] = 0;
+   chunk_ids[4] = 0;
+
+   chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
+   chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
+   chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
+   chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
+   chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges;
+
+   for (i = 0; i <= num_chunks; i++) {
+   uint32_t chunk_write[3];
+
+   chunk_write[0] = htonl(chunk_ids[i]);
+   chunk_write[1] = htonl(chunk_offsets[i] >> 32);
+   chunk_write[2] = htonl(chunk_offsets[i] & 0x);
+   hashwrite(f, chunk_write, 12);
+   }
+
+   if (ctx->report_progress) {
+   strbuf_addf(&progress_title,
+   Q_("Writing out commit graph in %d pass",
+  "Writing out commit graph in %d passes",
+  num_chunks),
+   num_chunks);
+   ctx->progress = start_delayed_progress(
+   progress_title.buf,
+   num_chunks * ctx->commits.nr);
+   }
+   write_graph_chunk_fanout(f, ctx);
+   write_graph_chunk_oids(f, hashsz, ctx);
+   write_graph_chunk_data(f, hashsz, ctx);
+   if (ctx->num_extra_edges)
+   write_graph_chunk_extra_edges(f, ctx);
+   stop_progress(&ctx->progress);
+   strbuf_release(&progress_title);
+
+   close_commit_graph(ctx->r);
+   finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
+   commit_lock_file(&lk);
+
+   return 0;
+}
+
+int write_commit_graph(const char *obj_dir,
+  struct string_list *pack_indexes,
+  struct string_list *commit_hex,
+  unsigned int flags)
+{
+   struct write_commit_graph_context *ctx;
+   uint32_t i, count_distinct = 0;
int res = 0;
 
if (!commit_graph_compatible(the_repository))
@@ -1096,75 +1166,10 @@ int write_commit_graph(const char *obj_dir,
 
compute_generation_numbers(ctx);
 
-   num_chunks = ctx->num_extra_edges ? 4 : 3;
-
-   ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
-   if (safe_create_leading_directories(ctx->graph_name)) {
-   UNLEAK(ctx->graph_name);
-   error(_("unable to create leading directories of %s"),
-

[PATCH v5 07/11] commit-graph: extract fill_oids_from_commit_hex()

2019-06-12 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The write_commit_graph() method is too complex, so we are
extracting helper functions one by one.

Extract fill_oids_from_commit_hex() that reads the given commit
id list and fille the oid list in the context.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 72 --
 1 file changed, 40 insertions(+), 32 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 02e5f8c651..4fae1fcdb2 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -912,6 +912,44 @@ static int fill_oids_from_packs(struct 
write_commit_graph_context *ctx,
return 0;
 }
 
+static void fill_oids_from_commit_hex(struct write_commit_graph_context *ctx,
+ struct string_list *commit_hex)
+{
+   uint32_t i;
+   struct strbuf progress_title = STRBUF_INIT;
+
+   if (ctx->report_progress) {
+   strbuf_addf(&progress_title,
+   Q_("Finding commits for commit graph from %d ref",
+  "Finding commits for commit graph from %d refs",
+  commit_hex->nr),
+   commit_hex->nr);
+   ctx->progress = start_delayed_progress(
+   progress_title.buf,
+   commit_hex->nr);
+   }
+   for (i = 0; i < commit_hex->nr; i++) {
+   const char *end;
+   struct object_id oid;
+   struct commit *result;
+
+   display_progress(ctx->progress, i + 1);
+   if (commit_hex->items[i].string &&
+   parse_oid_hex(commit_hex->items[i].string, &oid, &end))
+   continue;
+
+   result = lookup_commit_reference_gently(ctx->r, &oid, 1);
+
+   if (result) {
+   ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, 
ctx->oids.alloc);
+   oidcpy(&ctx->oids.list[ctx->oids.nr], 
&(result->object.oid));
+   ctx->oids.nr++;
+   }
+   }
+   stop_progress(&ctx->progress);
+   strbuf_release(&progress_title);
+}
+
 int write_commit_graph(const char *obj_dir,
   struct string_list *pack_indexes,
   struct string_list *commit_hex,
@@ -965,38 +1003,8 @@ int write_commit_graph(const char *obj_dir,
goto cleanup;
}
 
-   if (commit_hex) {
-   if (ctx->report_progress) {
-   strbuf_addf(&progress_title,
-   Q_("Finding commits for commit graph from 
%d ref",
-  "Finding commits for commit graph from 
%d refs",
-  commit_hex->nr),
-   commit_hex->nr);
-   ctx->progress = start_delayed_progress(
-   progress_title.buf,
-   commit_hex->nr);
-   }
-   for (i = 0; i < commit_hex->nr; i++) {
-   const char *end;
-   struct object_id oid;
-   struct commit *result;
-
-   display_progress(ctx->progress, i + 1);
-   if (commit_hex->items[i].string &&
-   parse_oid_hex(commit_hex->items[i].string, &oid, 
&end))
-   continue;
-
-   result = lookup_commit_reference_gently(ctx->r, &oid, 
1);
-
-   if (result) {
-   ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, 
ctx->oids.alloc);
-   oidcpy(&ctx->oids.list[ctx->oids.nr], 
&(result->object.oid));
-   ctx->oids.nr++;
-   }
-   }
-   stop_progress(&ctx->progress);
-   strbuf_reset(&progress_title);
-   }
+   if (commit_hex)
+   fill_oids_from_commit_hex(ctx, commit_hex);
 
if (!pack_indexes && !commit_hex) {
if (ctx->report_progress)
-- 
gitgitgadget



[PATCH v5 06/11] commit-graph: extract fill_oids_from_packs()

2019-06-12 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The write_commit_graph() method is too complex, so we are
extracting helper functions one by one.

This extracts fill_oids_from_packs() that reads the given
pack-file list and fills the oid list in the context.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 83 --
 1 file changed, 47 insertions(+), 36 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 6d7e83cfe8..02e5f8c651 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -867,6 +867,51 @@ int write_commit_graph_reachable(const char *obj_dir, 
unsigned int flags)
return result;
 }
 
+static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
+   struct string_list *pack_indexes)
+{
+   uint32_t i;
+   struct strbuf progress_title = STRBUF_INIT;
+   struct strbuf packname = STRBUF_INIT;
+   int dirlen;
+
+   strbuf_addf(&packname, "%s/pack/", ctx->obj_dir);
+   dirlen = packname.len;
+   if (ctx->report_progress) {
+   strbuf_addf(&progress_title,
+   Q_("Finding commits for commit graph in %d pack",
+  "Finding commits for commit graph in %d packs",
+  pack_indexes->nr),
+   pack_indexes->nr);
+   ctx->progress = start_delayed_progress(progress_title.buf, 0);
+   ctx->progress_done = 0;
+   }
+   for (i = 0; i < pack_indexes->nr; i++) {
+   struct packed_git *p;
+   strbuf_setlen(&packname, dirlen);
+   strbuf_addstr(&packname, pack_indexes->items[i].string);
+   p = add_packed_git(packname.buf, packname.len, 1);
+   if (!p) {
+   error(_("error adding pack %s"), packname.buf);
+   return -1;
+   }
+   if (open_pack_index(p)) {
+   error(_("error opening index for %s"), packname.buf);
+   return -1;
+   }
+   for_each_object_in_pack(p, add_packed_commits, ctx,
+   FOR_EACH_OBJECT_PACK_ORDER);
+   close_pack(p);
+   free(p);
+   }
+
+   stop_progress(&ctx->progress);
+   strbuf_reset(&progress_title);
+   strbuf_release(&packname);
+
+   return 0;
+}
+
 int write_commit_graph(const char *obj_dir,
   struct string_list *pack_indexes,
   struct string_list *commit_hex,
@@ -916,42 +961,8 @@ int write_commit_graph(const char *obj_dir,
}
 
if (pack_indexes) {
-   struct strbuf packname = STRBUF_INIT;
-   int dirlen;
-   strbuf_addf(&packname, "%s/pack/", obj_dir);
-   dirlen = packname.len;
-   if (ctx->report_progress) {
-   strbuf_addf(&progress_title,
-   Q_("Finding commits for commit graph in %d 
pack",
-  "Finding commits for commit graph in %d 
packs",
-  pack_indexes->nr),
-   pack_indexes->nr);
-   ctx->progress = 
start_delayed_progress(progress_title.buf, 0);
-   ctx->progress_done = 0;
-   }
-   for (i = 0; i < pack_indexes->nr; i++) {
-   struct packed_git *p;
-   strbuf_setlen(&packname, dirlen);
-   strbuf_addstr(&packname, pack_indexes->items[i].string);
-   p = add_packed_git(packname.buf, packname.len, 1);
-   if (!p) {
-   error(_("error adding pack %s"), packname.buf);
-   res = -1;
-   goto cleanup;
-   }
-   if (open_pack_index(p)) {
-   error(_("error opening index for %s"), 
packname.buf);
-   res = -1;
-   goto cleanup;
-   }
-   for_each_object_in_pack(p, add_packed_commits, ctx,
-   FOR_EACH_OBJECT_PACK_ORDER);
-   close_pack(p);
-   free(p);
-   }
-   stop_progress(&ctx->progress);
-   strbuf_reset(&progress_title);
-   strbuf_release(&packname);
+   if ((res = fill_oids_from_packs(ctx, pack_indexes)))
+   goto cleanup;
}
 
if (commit_hex) {
-- 
gitgitgadget



[PATCH v5 08/11] commit-graph: extract fill_oids_from_all_packs()

2019-06-12 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The write_commit_graph() method is too complex, so we are
extracting helper functions one by one.

Extract fill_oids_from_all_packs() that reads all pack-files
for commits and fills the oid list in the context.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 4fae1fcdb2..61cb43ddf8 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -950,6 +950,19 @@ static void fill_oids_from_commit_hex(struct 
write_commit_graph_context *ctx,
strbuf_release(&progress_title);
 }
 
+static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
+{
+   if (ctx->report_progress)
+   ctx->progress = start_delayed_progress(
+   _("Finding commits for commit graph among packed 
objects"),
+   ctx->approx_nr_objects);
+   for_each_packed_object(add_packed_commits, ctx,
+  FOR_EACH_OBJECT_PACK_ORDER);
+   if (ctx->progress_done < ctx->approx_nr_objects)
+   display_progress(ctx->progress, ctx->approx_nr_objects);
+   stop_progress(&ctx->progress);
+}
+
 int write_commit_graph(const char *obj_dir,
   struct string_list *pack_indexes,
   struct string_list *commit_hex,
@@ -1006,17 +1019,8 @@ int write_commit_graph(const char *obj_dir,
if (commit_hex)
fill_oids_from_commit_hex(ctx, commit_hex);
 
-   if (!pack_indexes && !commit_hex) {
-   if (ctx->report_progress)
-   ctx->progress = start_delayed_progress(
-   _("Finding commits for commit graph among 
packed objects"),
-   ctx->approx_nr_objects);
-   for_each_packed_object(add_packed_commits, ctx,
-  FOR_EACH_OBJECT_PACK_ORDER);
-   if (ctx->progress_done < ctx->approx_nr_objects)
-   display_progress(ctx->progress, ctx->approx_nr_objects);
-   stop_progress(&ctx->progress);
-   }
+   if (!pack_indexes && !commit_hex)
+   fill_oids_from_all_packs(ctx);
 
close_reachable(ctx);
 
-- 
gitgitgadget



[PATCH v5 03/11] commit-graph: collapse parameters into flags

2019-06-12 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The write_commit_graph() and write_commit_graph_reachable() methods
currently take two boolean parameters: 'append' and 'report_progress'.
As we update these methods, adding more parameters this way becomes
cluttered and hard to maintain.

Collapse these parameters into a 'flags' parameter, and adjust the
callers to provide flags as necessary.

Signed-off-by: Derrick Stolee 
---
 builtin/commit-graph.c | 8 +---
 builtin/commit.c   | 2 +-
 builtin/gc.c   | 4 ++--
 commit-graph.c | 9 +
 commit-graph.h | 8 +---
 5 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 2a1c4d701f..d8efa5bab2 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -142,6 +142,7 @@ static int graph_write(int argc, const char **argv)
struct string_list *commit_hex = NULL;
struct string_list lines;
int result = 0;
+   unsigned int flags = COMMIT_GRAPH_PROGRESS;
 
static struct option builtin_commit_graph_write_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir,
@@ -166,11 +167,13 @@ static int graph_write(int argc, const char **argv)
die(_("use at most one of --reachable, --stdin-commits, or 
--stdin-packs"));
if (!opts.obj_dir)
opts.obj_dir = get_object_directory();
+   if (opts.append)
+   flags |= COMMIT_GRAPH_APPEND;
 
read_replace_refs = 0;
 
if (opts.reachable)
-   return write_commit_graph_reachable(opts.obj_dir, opts.append, 
1);
+   return write_commit_graph_reachable(opts.obj_dir, flags);
 
string_list_init(&lines, 0);
if (opts.stdin_packs || opts.stdin_commits) {
@@ -190,8 +193,7 @@ static int graph_write(int argc, const char **argv)
if (write_commit_graph(opts.obj_dir,
   pack_indexes,
   commit_hex,
-  opts.append,
-  1))
+  flags))
result = 1;
 
UNLEAK(lines);
diff --git a/builtin/commit.c b/builtin/commit.c
index b9ea7222fa..b001ef565d 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1670,7 +1670,7 @@ int cmd_commit(int argc, const char **argv, const char 
*prefix)
  "not exceeded, and then \"git reset HEAD\" to recover."));
 
if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
-   write_commit_graph_reachable(get_object_directory(), 0, 0))
+   write_commit_graph_reachable(get_object_directory(), 0))
return 1;
 
repo_rerere(the_repository, 0);
diff --git a/builtin/gc.c b/builtin/gc.c
index 3984addf73..df2573f124 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -665,8 +665,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
}
 
if (gc_write_commit_graph &&
-   write_commit_graph_reachable(get_object_directory(), 0,
-!quiet && !daemonized))
+   write_commit_graph_reachable(get_object_directory(),
+!quiet && !daemonized ? 
COMMIT_GRAPH_PROGRESS : 0))
return 1;
 
if (auto_gc && too_many_loose_objects())
diff --git a/commit-graph.c b/commit-graph.c
index 1b58d1da14..fc40b531af 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -851,15 +851,14 @@ static int add_ref_to_list(const char *refname,
return 0;
 }
 
-int write_commit_graph_reachable(const char *obj_dir, int append,
-int report_progress)
+int write_commit_graph_reachable(const char *obj_dir, unsigned int flags)
 {
struct string_list list = STRING_LIST_INIT_DUP;
int result;
 
for_each_ref(add_ref_to_list, &list);
result = write_commit_graph(obj_dir, NULL, &list,
-   append, report_progress);
+   flags);
 
string_list_clear(&list, 0);
return result;
@@ -868,7 +867,7 @@ int write_commit_graph_reachable(const char *obj_dir, int 
append,
 int write_commit_graph(const char *obj_dir,
   struct string_list *pack_indexes,
   struct string_list *commit_hex,
-  int append, int report_progress)
+  unsigned int flags)
 {
struct packed_oid_list oids;
struct packed_commit_list commits;
@@ -887,6 +886,8 @@ int write_commit_graph(const char *obj_dir,
struct strbuf progress_title = STRBUF_INIT;
unsigned long approx_nr_objects;
int res = 0;
+   int append = flags & COMMIT_GRAPH_APPEND;
+   int report_progress = flags & COMMIT_GRAPH_PROGRESS;
 
if (!commit_graph_compatibl

[PATCH v5 10/11] commit-graph: extract copy_oids_to_commits()

2019-06-12 Thread Derrick Stolee via GitGitGadget
From: Derrick Stolee 

The write_commit_graph() method is too complex, so we are
extracting helper functions one by one.

Extract copy_oids_to_commits(), which fills the commits list
with the distinct commits from the oids list. During this loop,
it also counts the number of "extra" edges from octopus merges.

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 57 --
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 1a0a875a7b..72f9c5c7e2 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -984,6 +984,37 @@ static uint32_t count_distinct_commits(struct 
write_commit_graph_context *ctx)
return count_distinct;
 }
 
+static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
+{
+   uint32_t i;
+   struct commit_list *parent;
+
+   ctx->num_extra_edges = 0;
+   if (ctx->report_progress)
+   ctx->progress = start_delayed_progress(
+   _("Finding extra edges in commit graph"),
+   ctx->oids.nr);
+   for (i = 0; i < ctx->oids.nr; i++) {
+   int num_parents = 0;
+   display_progress(ctx->progress, i + 1);
+   if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
+   continue;
+
+   ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, 
&ctx->oids.list[i]);
+   parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
+
+   for (parent = ctx->commits.list[ctx->commits.nr]->parents;
+parent; parent = parent->next)
+   num_parents++;
+
+   if (num_parents > 2)
+   ctx->num_extra_edges += num_parents - 1;
+
+   ctx->commits.nr++;
+   }
+   stop_progress(&ctx->progress);
+}
+
 int write_commit_graph(const char *obj_dir,
   struct string_list *pack_indexes,
   struct string_list *commit_hex,
@@ -997,7 +1028,6 @@ int write_commit_graph(const char *obj_dir,
uint32_t chunk_ids[5];
uint64_t chunk_offsets[5];
int num_chunks;
-   struct commit_list *parent;
const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT;
int res = 0;
@@ -1056,30 +1086,7 @@ int write_commit_graph(const char *obj_dir,
ctx->commits.alloc = count_distinct;
ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc);
 
-   ctx->num_extra_edges = 0;
-   if (ctx->report_progress)
-   ctx->progress = start_delayed_progress(
-   _("Finding extra edges in commit graph"),
-   ctx->oids.nr);
-   for (i = 0; i < ctx->oids.nr; i++) {
-   int num_parents = 0;
-   display_progress(ctx->progress, i + 1);
-   if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
-   continue;
-
-   ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, 
&ctx->oids.list[i]);
-   parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
-
-   for (parent = ctx->commits.list[ctx->commits.nr]->parents;
-parent; parent = parent->next)
-   num_parents++;
-
-   if (num_parents > 2)
-   ctx->num_extra_edges += num_parents - 1;
-
-   ctx->commits.nr++;
-   }
-   stop_progress(&ctx->progress);
+   copy_oids_to_commits(ctx);
 
if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
error(_("too many commits to write graph"));
-- 
gitgitgadget



[PATCH v5 00/11] Commit-graph write refactor (was: Create commit-graph file format v2)

2019-06-12 Thread Derrick Stolee via GitGitGadget
This series replaces ds/commit-graph-file-v2, and I'm using the same
gitgitgadget PR to continue the version numbers and hopefully make that
clear. This is a slight modification on patches 1-11 from the incremental
file format RFC [0].

The commit-graph feature is growing, thanks to all of the contributions by
several community members. This also means that the write_commit_graph()
method is a bit unwieldy now. This series refactors that method to use a
write_commit_graph_context struct that is passed between several smaller
methods. The final result should be a write_commit_graph() method that has a
clear set of steps. Future changes should then be easier to understand.

 * Patches 1-4: these are small changes which either fix issues or just
   provide clean-up. These are mostly borrowed from
   ds/commit-graph-format-v2. 
   
   
 * Patches 5-11: these provide a non-functional refactor of
   write_commit_graph() into several methods using a "struct
   write_commit_graph_context" to share across the methods.
   
   

Updates to commits previously in this thread:

 * "commit-graph: remove Future Work section" no longer says that 'verify'
   takes as long as 'write'. [1]
   
   
 * "commit-graph: return with errors during write" now has a test to check
   we don't die(). [2]
   
   

Ævar: Looking at the old thread, I only saw two comments that still apply to
this series [1] [2]. Please point me to any comments I have missed.

Updates in V5:

 * API calls are updated to return 0 on success and a negative value on
   failure.
   
   
 * Stopped passing 'errno' through an API function, instead returns -1.
   
   
 * "extracting methods" -> "extracting helper functions" in commit messages.
   
   
 * flags are now unsigned ints.
   
   

Thanks, -Stolee

[0] https://public-inbox.org/git/pull.184.git.gitgitgad...@gmail.com/

[1] https://public-inbox.org/git/87o94mql0a@evledraar.gmail.com/

[2] https://public-inbox.org/git/87pnp2qlkv@evledraar.gmail.com/

Derrick Stolee (11):
  commit-graph: fix the_repository reference
  commit-graph: return with errors during write
  commit-graph: collapse parameters into flags
  commit-graph: remove Future Work section
  commit-graph: create write_commit_graph_context
  commit-graph: extract fill_oids_from_packs()
  commit-graph: extract fill_oids_from_commit_hex()
  commit-graph: extract fill_oids_from_all_packs()
  commit-graph: extract count_distinct_commits()
  commit-graph: extract copy_oids_to_commits()
  commit-graph: extract write_commit_graph_file()

 Documentation/technical/commit-graph.txt |  17 -
 builtin/commit-graph.c   |  22 +-
 builtin/commit.c |   5 +-
 builtin/gc.c |   7 +-
 commit-graph.c   | 607 +--
 commit-graph.h   |  20 +-
 commit.c |   2 +-
 t/t5318-commit-graph.sh  |   8 +
 8 files changed, 378 insertions(+), 310 deletions(-)


base-commit: 93b4405ffe4ad9308740e7c1c71383bfc369baaa
Published-As: 
https://github.com/gitgitgadget/git/releases/tag/pr-112%2Fderrickstolee%2Fgraph%2Fv2-head-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git 
pr-112/derrickstolee/graph/v2-head-v5
Pull-Request: https://github.com/gitgitgadget/git/pull/112

Range-diff vs v4:

  1:  0be7713a25 =  1:  0be7713a25 commit-graph: fix the_repository reference
  2:  a4082b827e !  2:  95f66e85b2 commit-graph: return with errors during write
 @@ -5,14 +5,17 @@
  The write_commit_graph() method uses die() to report failure and
  exit when confronted with an unexpected condition. This use of
  die() in a library function is incorrect and is now replaced by
 -error() statements and an int return type.
 +error() statements and an int return type. Return zero on success
 +and a negative value on failure.
  
  Now that we use 'goto cleanup' to jump to the terminal condition
  on an error, we have new paths that could lead to uninitialized
  values. New initializers are added to correct for this.
  
  The builtins 'commit-graph', 'gc', and 'commit' call these methods,
 -so update them to check the return value.
 +so update them to check the return value. Test that 'git commit-graph
 +    write' returns a proper error code when hitting a failure condition
 +in write_commit_graph().
  
  Signed-off-by: Derrick Stolee 
  
 @@ -23,7 +26,7 @@
struct string_list *pack_indexes = NULL;
struct string_list *commit_hex = NULL;
struct string_list lines;
 -+ int result;
 ++ int result = 0;
   
static struct option builtin_commit_graph_write_options[] = {
OP

<    1   2   3   4   5   6   7   8   9   10   >