[PATCH v3 8/8] pack-objects: move 'layer' into 'struct packing_data'

2018-08-09 Thread Christian Couder
This reduces the size of 'struct object_entry' from 88 bytes
to 80 and therefore makes packing objects more efficient.

For example on a Linux repo with 12M objects,
`git pack-objects --all` needs extra 96MB memory even if the
layer feature is not used.

Helped-by: Jeff King 
Helped-by: Duy Nguyen 
Signed-off-by: Christian Couder 
---
 builtin/pack-objects.c |  4 ++--
 delta-islands.c|  4 ++--
 pack-objects.c |  6 ++
 pack-objects.h | 20 ++--
 4 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index fd3e514b31..d5d91eeed5 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -611,7 +611,7 @@ static inline void add_to_write_order(struct object_entry 
**wo,
   unsigned int *endp,
   struct object_entry *e)
 {
-   if (e->filled || e->layer != write_layer)
+   if (e->filled || oe_layer(&to_pack, e) != write_layer)
return;
wo[(*endp)++] = e;
e->filled = 1;
@@ -714,7 +714,7 @@ static void compute_layer_order(struct object_entry **wo, 
unsigned int *wo_end)
 * Finally all the rest in really tight order
 */
for (i = last_untagged; i < to_pack.nr_objects; i++) {
-   if (!objects[i].filled && objects[i].layer == write_layer)
+   if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == 
write_layer)
add_family_to_write_order(wo, wo_end, &objects[i]);
}
 }
diff --git a/delta-islands.c b/delta-islands.c
index 3c8fe60801..92137f2eca 100644
--- a/delta-islands.c
+++ b/delta-islands.c
@@ -492,13 +492,13 @@ int compute_pack_layers(struct packing_data *to_pack)
struct object_entry *entry = &to_pack->objects[i];
khiter_t pos = kh_get_sha1(island_marks, entry->idx.oid.hash);
 
-   entry->layer = 1;
+   oe_set_layer(to_pack, entry, 1);
 
if (pos < kh_end(island_marks)) {
struct island_bitmap *bitmap = kh_value(island_marks, 
pos);
 
if (island_bitmap_get(bitmap, island_counter_core))
-   entry->layer = 0;
+   oe_set_layer(to_pack, entry, 0);
}
}
 
diff --git a/pack-objects.c b/pack-objects.c
index 30314572e6..98389460c2 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -163,6 +163,9 @@ struct object_entry *packlist_alloc(struct packing_data 
*pdata,
 
if (pdata->tree_depth)
REALLOC_ARRAY(pdata->tree_depth, pdata->nr_alloc);
+
+   if (pdata->layer)
+   REALLOC_ARRAY(pdata->layer, pdata->nr_alloc);
}
 
new_entry = pdata->objects + pdata->nr_objects++;
@@ -181,5 +184,8 @@ struct object_entry *packlist_alloc(struct packing_data 
*pdata,
if (pdata->tree_depth)
pdata->tree_depth[pdata->nr_objects - 1] = 0;
 
+   if (pdata->layer)
+   pdata->layer[pdata->nr_objects - 1] = 0;
+
return new_entry;
 }
diff --git a/pack-objects.h b/pack-objects.h
index 3cb5527eeb..ad3c208764 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -101,8 +101,6 @@ struct object_entry {
unsigned no_try_delta:1;
unsigned in_pack_type:TYPE_BITS; /* could be delta */
 
-   unsigned char layer;
-
unsigned preferred_base:1; /*
* we do not pack this, but is available
* to be used as the base object to delta
@@ -147,6 +145,7 @@ struct packing_data {
 
/* delta islands */
unsigned int *tree_depth;
+   unsigned char *layer;
 };
 
 void prepare_packing_data(struct packing_data *pdata);
@@ -369,4 +368,21 @@ static inline void oe_set_tree_depth(struct packing_data 
*pack,
pack->tree_depth[e - pack->objects] = tree_depth;
 }
 
+static inline unsigned char oe_layer(struct packing_data *pack,
+struct object_entry *e)
+{
+   if (!pack->layer)
+   return 0;
+   return pack->layer[e - pack->objects];
+}
+
+static inline void oe_set_layer(struct packing_data *pack,
+   struct object_entry *e,
+   unsigned char layer)
+{
+   if (!pack->layer)
+   ALLOC_ARRAY(pack->layer, pack->nr_objects);
+   pack->layer[e - pack->objects] = layer;
+}
+
 #endif
-- 
2.18.0.555.g17f9c4abba



[PATCH v3 4/8] pack-objects: add delta-islands support

2018-08-09 Thread Christian Couder
From: Jeff King 

Implement support for delta islands in git pack-objects
and document how delta islands work in
"Documentation/git-pack-objects.txt" and Documentation/config.txt.

This allows users to setup delta islands in their config and
get the benefit of less disk usage while cloning and fetching
is still quite fast and not much more CPU intensive.

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Documentation/config.txt   | 15 +
 Documentation/git-pack-objects.txt | 97 ++
 builtin/pack-objects.c | 57 +++---
 3 files changed, 161 insertions(+), 8 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 63365dcf3d..27bb77f9e7 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2585,6 +2585,21 @@ Note that changing the compression level will not 
automatically recompress
 all existing objects. You can force recompression by passing the -F option
 to linkgit:git-repack[1].
 
+pack.island::
+   An extended regular expression configuring a set of delta
+   islands. See "DELTA ISLANDS" in linkgit:git-pack-objects[1]
+   for details.
+
+pack.islandCore::
+   Specify an island name which gets to have its objects be
+   packed first. This creates a kind of pseudo-pack at the front
+   of one pack, so that the objects from the specified island are
+   hopefully faster to copy into any pack that should be served
+   to a user requesting these objects. In practice this means
+   that the island specified should likely correspond to what is
+   the most commonly cloned in the repo. See also "DELTA ISLANDS"
+   in linkgit:git-pack-objects[1].
+
 pack.deltaCacheSize::
The maximum memory in bytes used for caching deltas in
linkgit:git-pack-objects[1] before writing them out to a pack.
diff --git a/Documentation/git-pack-objects.txt 
b/Documentation/git-pack-objects.txt
index d95b472d16..40c825c381 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -289,6 +289,103 @@ Unexpected missing object will raise an error.
 --unpack-unreachable::
Keep unreachable objects in loose form. This implies `--revs`.
 
+--delta-islands::
+   Restrict delta matches based on "islands". See DELTA ISLANDS
+   below.
+
+
+DELTA ISLANDS
+-
+
+When possible, `pack-objects` tries to reuse existing on-disk deltas to
+avoid having to search for new ones on the fly. This is an important
+optimization for serving fetches, because it means the server can avoid
+inflating most objects at all and just send the bytes directly from
+disk. This optimization can't work when an object is stored as a delta
+against a base which the receiver does not have (and which we are not
+already sending). In that case the server "breaks" the delta and has to
+find a new one, which has a high CPU cost. Therefore it's important for
+performance that the set of objects in on-disk delta relationships match
+what a client would fetch.
+
+In a normal repository, this tends to work automatically. The objects
+are mostly reachable from the branches and tags, and that's what clients
+fetch. Any deltas we find on the server are likely to be between objects
+the client has or will have.
+
+But in some repository setups, you may have several related but separate
+groups of ref tips, with clients tending to fetch those groups
+independently. For example, imagine that you are hosting several "forks"
+of a repository in a single shared object store, and letting clients
+view them as separate repositories through `GIT_NAMESPACE` or separate
+repos using the alternates mechanism. A naive repack may find that the
+optimal delta for an object is against a base that is only found in
+another fork. But when a client fetches, they will not have the base
+object, and we'll have to find a new delta on the fly.
+
+A similar situation may exist if you have many refs outside of
+`refs/heads/` and `refs/tags/` that point to related objects (e.g.,
+`refs/pull` or `refs/changes` used by some hosting providers). By
+default, clients fetch only heads and tags, and deltas against objects
+found only in those other groups cannot be sent as-is.
+
+Delta islands solve this problem by allowing you to group your refs into
+distinct "islands". Pack-objects computes which objects are reachable
+from which islands, and refuses to make a delta from an object `A`
+against a base which is not present in all of `A`'s islands. This
+results in slightly larger packs (because we miss some delta
+opportunities), but guarantees that a fetch of one island will not have
+to recompute deltas on the fly due to crossing island boundaries.
+
+When repacking with delta islands the delta window tends to get
+clogged with candidates that are forbidden by the config. Repac

[PATCH v3 3/8] pack-objects: refactor code into compute_layer_order()

2018-08-09 Thread Christian Couder
In a following commit, as we will use delta islands, we will
have to compute the write order for different layers, not just
for one.

Let's prepare for that by refactoring the code that will be
used to compute the write order for a given layer into a new
compute_layer_order() function.

This will make it easier to see and understand what the
following changes are doing.

Helped-by: Duy Nguyen 
Signed-off-by: Christian Couder 
---
 builtin/pack-objects.c | 90 +++---
 1 file changed, 50 insertions(+), 40 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4391504a91..99b6329399 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -667,48 +667,15 @@ static void add_family_to_write_order(struct object_entry 
**wo,
add_descendants_to_write_order(wo, endp, root);
 }
 
-static struct object_entry **compute_write_order(void)
+static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
 {
-   unsigned int i, wo_end, last_untagged;
-
-   struct object_entry **wo;
+   unsigned int i, last_untagged;
struct object_entry *objects = to_pack.objects;
 
for (i = 0; i < to_pack.nr_objects; i++) {
-   objects[i].tagged = 0;
-   objects[i].filled = 0;
-   SET_DELTA_CHILD(&objects[i], NULL);
-   SET_DELTA_SIBLING(&objects[i], NULL);
-   }
-
-   /*
-* Fully connect delta_child/delta_sibling network.
-* Make sure delta_sibling is sorted in the original
-* recency order.
-*/
-   for (i = to_pack.nr_objects; i > 0;) {
-   struct object_entry *e = &objects[--i];
-   if (!DELTA(e))
-   continue;
-   /* Mark me as the first child */
-   e->delta_sibling_idx = DELTA(e)->delta_child_idx;
-   SET_DELTA_CHILD(DELTA(e), e);
-   }
-
-   /*
-* Mark objects that are at the tip of tags.
-*/
-   for_each_tag_ref(mark_tagged, NULL);
-
-   /*
-* Give the objects in the original recency order until
-* we see a tagged tip.
-*/
-   ALLOC_ARRAY(wo, to_pack.nr_objects);
-   for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
if (objects[i].tagged)
break;
-   add_to_write_order(wo, &wo_end, &objects[i]);
+   add_to_write_order(wo, wo_end, &objects[i]);
}
last_untagged = i;
 
@@ -717,7 +684,7 @@ static struct object_entry **compute_write_order(void)
 */
for (; i < to_pack.nr_objects; i++) {
if (objects[i].tagged)
-   add_to_write_order(wo, &wo_end, &objects[i]);
+   add_to_write_order(wo, wo_end, &objects[i]);
}
 
/*
@@ -727,7 +694,7 @@ static struct object_entry **compute_write_order(void)
if (oe_type(&objects[i]) != OBJ_COMMIT &&
oe_type(&objects[i]) != OBJ_TAG)
continue;
-   add_to_write_order(wo, &wo_end, &objects[i]);
+   add_to_write_order(wo, wo_end, &objects[i]);
}
 
/*
@@ -736,7 +703,7 @@ static struct object_entry **compute_write_order(void)
for (i = last_untagged; i < to_pack.nr_objects; i++) {
if (oe_type(&objects[i]) != OBJ_TREE)
continue;
-   add_to_write_order(wo, &wo_end, &objects[i]);
+   add_to_write_order(wo, wo_end, &objects[i]);
}
 
/*
@@ -744,8 +711,51 @@ static struct object_entry **compute_write_order(void)
 */
for (i = last_untagged; i < to_pack.nr_objects; i++) {
if (!objects[i].filled)
-   add_family_to_write_order(wo, &wo_end, &objects[i]);
+   add_family_to_write_order(wo, wo_end, &objects[i]);
}
+}
+
+static struct object_entry **compute_write_order(void)
+{
+   unsigned int i, wo_end;
+
+   struct object_entry **wo;
+   struct object_entry *objects = to_pack.objects;
+
+   for (i = 0; i < to_pack.nr_objects; i++) {
+   objects[i].tagged = 0;
+   objects[i].filled = 0;
+   SET_DELTA_CHILD(&objects[i], NULL);
+   SET_DELTA_SIBLING(&objects[i], NULL);
+   }
+
+   /*
+* Fully connect delta_child/delta_sibling network.
+* Make sure delta_sibling is sorted in the original
+* recency order.
+*/
+   for (i = to_pack.nr_objects; i > 0;) {
+   struct object_entry *e = &objects[--i];
+   if (!DELTA(e))
+   continue;
+   /* Mark me as the first child */
+   e->delta_sibling_idx = DELTA(e)->delta_child_idx;
+

[PATCH v3 1/8] packfile: make get_delta_base() non static

2018-08-09 Thread Christian Couder
From: Jeff King 

As get_delta_base() will be used outside 'packfile.c' in
a following commit, let's make it non static and let's
declare it in 'packfile.h'.

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 packfile.c | 10 +-
 packfile.h |  7 +++
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/packfile.c b/packfile.c
index 6974903e58..04d387f312 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1037,11 +1037,11 @@ const struct packed_git *has_packed_and_bad(const 
unsigned char *sha1)
return NULL;
 }
 
-static off_t get_delta_base(struct packed_git *p,
-   struct pack_window **w_curs,
-   off_t *curpos,
-   enum object_type type,
-   off_t delta_obj_offset)
+off_t get_delta_base(struct packed_git *p,
+struct pack_window **w_curs,
+off_t *curpos,
+enum object_type type,
+off_t delta_obj_offset)
 {
unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
off_t base_offset;
diff --git a/packfile.h b/packfile.h
index cc7eaffe1b..1265fd9b06 100644
--- a/packfile.h
+++ b/packfile.h
@@ -126,6 +126,13 @@ extern unsigned long unpack_object_header_buffer(const 
unsigned char *buf, unsig
 extern unsigned long get_size_from_delta(struct packed_git *, struct 
pack_window **, off_t);
 extern int unpack_object_header(struct packed_git *, struct pack_window **, 
off_t *, unsigned long *);
 
+/*
+ * Return the offset of the object that is the delta base of the object at 
curpos.
+ */
+extern off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
+   off_t *curpos, enum object_type type,
+   off_t delta_obj_offset);
+
 extern void release_pack_memory(size_t);
 
 /* global flag to enable extra checks when accessing packed objects */
-- 
2.18.0.555.g17f9c4abba



[PATCH v3 0/8] Add delta islands support

2018-08-09 Thread Christian Couder
This patch series is upstreaming work made by GitHub and available in:

https://github.com/peff/git/commits/jk/delta-islands

The above work has been already described in the following article:

https://githubengineering.com/counting-objects/

The above branch contains only one patch. In this patch series the
patch has been split into 5 patches (1/8, 2/8, 4/8, 5/8 and 6/8) with
their own commit message, and on top of that 3 new patches (3/8, 7/8
and 8/8) have been added. The new patches implement things that were
requested after the previous iterations.

I kept Peff as the author of the original 5 patches and took the
liberty to add his Signed-off-by to them.

As explained in details in the Counting Object article referenced
above, the goal of the delta island mechanism is for a hosting
provider to make it possible to have the "forks" of a repository share
as much storage as possible while preventing object packs to contain
deltas between different forks.

If deltas between different forks are not prevented, when users clone
or fetch a fork, preparing the pack that should be sent to them can be
very costly CPU wise, as objects from a different fork should not be
sent, which means that a lot of deltas might need to be computed
again (instead of reusing existing deltas).


The following changes have been made since the previous iteration:

* suggested by Duy: move the code computing the write order for a
  layer to a new separate function called compute_layer_order() in
  builtin/pack-objects.c in new patch 3/8

  I think that indeed this makes the following patch (4/8) shorter and
  easier to understand as well as the overall result nicer.

* suggested by Duy and Peff: rework the way the 'tree_depth' field is
  moved from 'struct object_entry' to 'struct packing_data' in
  pack-object.h in patch 7/8

* suggested by Duy and Peff: move field 'layer' from
  'struct object_entry' to 'struct packing_data' in pack-object.h in
  new patch 8/8

This patch series is also available on GitHub in:

https://github.com/chriscool/git/commits/delta-islands

The previous versions are available there:

V2: https://github.com/chriscool/git/commits/delta-islands19
V1: https://github.com/chriscool/git/commits/delta-islands6

V2: https://public-inbox.org/git/20180805172525.15278-1-chrisc...@tuxfamily.org/
V1: https://public-inbox.org/git/20180722054836.28935-1-chrisc...@tuxfamily.org/


Christian Couder (3):
  pack-objects: refactor code into compute_layer_order()
  pack-objects: move tree_depth into 'struct packing_data'
  pack-objects: move 'layer' into 'struct packing_data'

Jeff King (5):
  packfile: make get_delta_base() non static
  Add delta-islands.{c,h}
  pack-objects: add delta-islands support
  repack: add delta-islands support
  t: add t5319-delta-islands.sh

 Documentation/config.txt   |  19 ++
 Documentation/git-pack-objects.txt |  97 ++
 Documentation/git-repack.txt   |   5 +
 Makefile   |   1 +
 builtin/pack-objects.c | 137 +---
 builtin/repack.c   |   9 +
 delta-islands.c| 506 +
 delta-islands.h|  11 +
 pack-objects.c |  12 +
 pack-objects.h |  39 +++
 packfile.c |  10 +-
 packfile.h |   7 +
 t/t5319-delta-islands.sh   | 143 
 13 files changed, 948 insertions(+), 48 deletions(-)
 create mode 100644 delta-islands.c
 create mode 100644 delta-islands.h
 create mode 100755 t/t5319-delta-islands.sh

-- 
2.18.0.555.g17f9c4abba



Re: [GSoC][PATCH v5 02/20] rebase -i: rewrite append_todo_help() in C

2018-08-07 Thread Christian Couder
On Tue, Aug 7, 2018 at 6:15 PM, Phillip Wood  wrote:
> On 07/08/18 16:25, Christian Couder wrote:
>>
>> I agree about checking the return value from fputs(), but it seems to
>> me that we don't usually check the value of fclose().
>
> A quick grep shows you're right, there are only a handful of places where
> the return value of fclose() is checked (there aren't many checks for the
> return value of close() either), I'm don't think that is safe though given
> that write errors may only show up when the file gets flushed by closing it.

I vaguely remember we tried to check those return values but it didn't
work well sometimes.


Re: [GSoC][PATCH v5 02/20] rebase -i: rewrite append_todo_help() in C

2018-08-07 Thread Christian Couder
Hi Phillip,

On Tue, Aug 7, 2018 at 3:57 PM, Phillip Wood  wrote:
>
> On 31/07/18 18:59, Alban Gruin wrote:
>>
>> +
>> + ret = fputs(buf.buf, todo);
>
> It is not worth changing the patch just for this but strbuf_write()
> might be clearer (you use it in a later patch)
>
>> + if (ret < 0)
>> + error_errno(_("could not append help text to '%s'"), 
>> rebase_path_todo());
>> +
>> + fclose(todo);
>
> You should definitely check the return value and return an error if
> appropriate as fputs() might not actually write any data until you try
> and close the file.

I agree about checking the return value from fputs(), but it seems to
me that we don't usually check the value of fclose().

Thanks,
Christian.


Re: [RFC PATCH 2/5] Add delta-islands.{c,h}

2018-08-06 Thread Christian Couder
On Mon, Aug 6, 2018 at 5:53 PM, Duy Nguyen  wrote:
> On Sun, Aug 5, 2018 at 8:53 PM Christian Couder
>  wrote:
>>
>> As you can see the patch 6/6 (in the v2 of this patch series that I
>> just sent) moves `unsigned int tree_depth` from 'struct object_entry'
>> to 'struct packing_data'. I am not sure that I did it right and that
>> it is worth it though as it is a bit more complex.
>
> It is a bit more complex than I expected. But I think if you go with
> Jeff's suggestion (i.e. think of the new tree_depth array an extension
> of objects array) it's a bit simpler: you access both arrays using the
> same index, both arrays should have the same size and be realloc'd at
> the same time in packlist_alloc().

Ok, I will take a look at doing that to simplify things. Thanks to
Peff and you for that suggestion!

> Is it worth it? The "pahole" comment in this file is up to date. We
> use 80 bytes per object. This series makes the struct 88 bytes (I've
> just rerun pahole).

Did you run it on V1 or on V2? I guess on V2, but then what do you
think about converting the 'layer' field into a bit field, which might
be simpler and save space?

> On linux repo with 12M objects, "git pack-objects
> --all" needs extra 96MB memory even if this feature is not used. So
> yes I still think it's worth moving these fields out of struct
> object_entry.

And what about the fields already in struct object_entry? While I am
at it, I think I could move some of them too if it is really so worth
it.


Re: [RFC PATCH 3/5] pack-objects: add delta-islands support

2018-08-06 Thread Christian Couder
On Sun, Aug 5, 2018 at 7:40 PM, Christian Couder
 wrote:
> On Tue, Jul 24, 2018 at 7:11 PM, Junio C Hamano  wrote:

> This is the new documentation:
>
> -> Refs are grouped into islands based on their "names", and two regexes
> -> that produce the same name are considered to be in the same
> -> island. The names are computed from the regexes by concatenating any
> -> capture groups from the regex, with a '-' dash in between. (And if
> -> there are no capture groups, then the name is the empty string, as in
> -> the above example.) This allows you to create arbitrary numbers of
> -> islands. Only up to 7 such capture groups are supported though.
>
> I added the last sentence above, but I wonder if it is 7 or 8.

Actually after reading the man page again, it looks like the first
element in the array we pass is used for "the entire regular
expression's match addresses", so we only get 7 capture groups (not
8).

> The code is the following:
>
> -> static int find_island_for_ref(const char *refname, const struct
> object_id *oid,
> ->int flags, void *data)
> -> {
> -> int i, m;
> -> regmatch_t matches[8];
> -> struct strbuf island_name = STRBUF_INIT;
> ->
> -> /* walk backwards to get last-one-wins ordering */
> -> for (i = island_regexes_nr - 1; i >= 0; i--) {
> -> if (!regexec(&island_regexes[i], refname,
> ->  ARRAY_SIZE(matches), matches, 0))
> -> break;
> -> }
>
> I also wonder if the above is enough to diagnose end-user input when
> it requires more captures than we support. A quick look at the man
> page of the regex functions wasn't enough to enlighten me. Any input
> on these issues is very welcome!

Taking a look at how we use regexec() in our code base, it looks like
it might be better to use regexec_buf() defined in
"git-compat-util.h".

I am not completely sure about that because apply.c has:

status = regexec(stamp, timestamp, ARRAY_SIZE(m), m, 0);
if (status) {
if (status != REG_NOMATCH)
warning(_("regexec returned %d for input: %s"),
status, timestamp);
return 0;
}

Though the above uses a regex that is defined in apply.c. The regex
doesn't come from the config file.

Actually except the above there is a mix of regexec() and
regexec_buf() in our code base, which are used with only 0, 1 or 2
capture groups, so it is not very clear what should be used.

And anyway I still don't see how we could diagnose when the end user
input requires more captures than we support.


Re: [RFC PATCH 2/5] Add delta-islands.{c,h}

2018-08-05 Thread Christian Couder
On Sun, Jul 22, 2018 at 10:50 AM, Duy Nguyen  wrote:
> On Sun, Jul 22, 2018 at 7:51 AM Christian Couder
>  wrote:

>> diff --git a/pack-objects.h b/pack-objects.h
>> index edf74dabdd..8eecd67991 100644
>> --- a/pack-objects.h
>> +++ b/pack-objects.h
>> @@ -100,6 +100,10 @@ struct object_entry {
>> unsigned type_:TYPE_BITS;
>> unsigned no_try_delta:1;
>> unsigned in_pack_type:TYPE_BITS; /* could be delta */
>> +
>> +   unsigned int tree_depth; /* should be repositioned for packing? */
>> +   unsigned char layer;
>> +
>
> This looks very much like an optional feature. To avoid increasing
> pack-objects memory usage for common case, please move this to struct
> packing_data.

As you can see the patch 6/6 (in the v2 of this patch series that I
just sent) moves `unsigned int tree_depth` from 'struct object_entry'
to 'struct packing_data'. I am not sure that I did it right and that
it is worth it though as it is a bit more complex.

About `unsigned char layer` I am even less sure that it's worth it for
the following reasons:

- 'struct object_entry' contains bit fields as its last members and
then the following comment:

/*
 * pahole results on 64-bit linux (gcc and clang)
 *
 *   size: 80, bit_padding: 20 bits, holes: 8 bits
 *
 * and on 32-bit (gcc)
 *
 *   size: 76, bit_padding: 20 bits, holes: 8 bits
 */

I am not sure if this comment is still up to date, but if it true
maybe there is enough space in the padding to add 'layer' as a 8 bit
bit field somewhere without increasing the size of 'struct
object_entry'.

- 'layer' is used in add_to_write_order() which is called from many
places in add_descendants_to_write_order() and compute_write_order()
for example like this:

for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
add_to_write_order(wo, endp, s);
}

So it seems to me that moving 'layer' to 'struct packing_data' would
require changing the DELTA_SIBLING macros or adding a hack to easily
find the index in an array corresponding to a 'struct object_entry'.

- I don't see why it is different from other fields in 'struct
object_entry' that are also used in compute_write_order(), for
example:

uint32_t delta_idx;/* delta base object */
uint32_t delta_child_idx; /* deltified objects who bases me */
uint32_t delta_sibling_idx; /* other deltified objects who ... */

Would we also want to move these fields to 'struct packing_data'? Why
or why not? If we want to move them, then it might be a good idea to
do all the moving at the same time to make sure we get something
coherent.

Thanks,
Christian.


Re: [RFC PATCH 3/5] pack-objects: add delta-islands support

2018-08-05 Thread Christian Couder
On Tue, Jul 24, 2018 at 7:11 PM, Junio C Hamano  wrote:
>
> Another thing I noticed from 2/5 is that you can have up to 7 such
> capture groups.  I do not have any opinion if 7 is too few or too
> many, but we would want the number to be documented, and end-user
> input diagnosed when it requires more captures than we support (if
> we are not already checking, that is).

This is the new documentation:

-> Refs are grouped into islands based on their "names", and two regexes
-> that produce the same name are considered to be in the same
-> island. The names are computed from the regexes by concatenating any
-> capture groups from the regex, with a '-' dash in between. (And if
-> there are no capture groups, then the name is the empty string, as in
-> the above example.) This allows you to create arbitrary numbers of
-> islands. Only up to 7 such capture groups are supported though.

I added the last sentence above, but I wonder if it is 7 or 8. The
code is the following:

-> static int find_island_for_ref(const char *refname, const struct
object_id *oid,
->int flags, void *data)
-> {
-> int i, m;
-> regmatch_t matches[8];
-> struct strbuf island_name = STRBUF_INIT;
->
-> /* walk backwards to get last-one-wins ordering */
-> for (i = island_regexes_nr - 1; i >= 0; i--) {
-> if (!regexec(&island_regexes[i], refname,
->  ARRAY_SIZE(matches), matches, 0))
-> break;
-> }

I also wonder if the above is enough to diagnose end-user input when
it requires more captures than we support. A quick look at the man
page of the regex functions wasn't enough to enlighten me. Any input
on these issues is very welcome!


Re: [RFC PATCH 3/5] pack-objects: add delta-islands support

2018-08-05 Thread Christian Couder
On Sun, Jul 22, 2018 at 10:55 AM, Duy Nguyen  wrote:
> On Sun, Jul 22, 2018 at 7:52 AM Christian Couder
>  wrote:
>>
>> -   /*
>> -* And then all remaining commits and tags.
>> -*/
>> -   for (i = last_untagged; i < to_pack.nr_objects; i++) {
>> -   if (oe_type(&objects[i]) != OBJ_COMMIT &&
>> -   oe_type(&objects[i]) != OBJ_TAG)
>> -   continue;
>> -   add_to_write_order(wo, &wo_end, &objects[i]);
>> -   }
>> +   /*
>> +* Then fill all the tagged tips.
>> +*/
>
> If we move the code in this loop to a separate function, in a separate
> patch, first, would it produce a better diff? I think all the
> indentation change here makes it a bit hard to read.

Sorry I just realized that I forgot to try to do that. It will be on
my todo list for the next iteration.

Thanks,
Christian.


[PATCH v2 6/6] pack-objects: move tree_depth into 'struct packing_data'

2018-08-05 Thread Christian Couder
This reduces the size of 'struct object_entry' and therefore
makes packing objects more efficient.

This also renames cmp_tree_depth() into tree_depth_compare(),
as it is more modern to have the name of the compare functions
end with "compare".

Signed-off-by: Christian Couder 
---
 builtin/pack-objects.c | 20 
 delta-islands.c| 27 ++-
 pack-objects.h |  4 +++-
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 3d09742d91..da6dbb22d2 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2707,14 +2707,26 @@ static void show_object(struct object *obj, const char 
*name, void *data)
if (use_delta_islands) {
const char *p;
unsigned depth = 0;
-   struct object_entry *ent;
+   uint32_t index_pos;
 
for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
depth++;
 
-   ent = packlist_find(&to_pack, obj->oid.hash, NULL);
-   if (ent && depth > ent->tree_depth)
-   ent->tree_depth = depth;
+   if (!to_pack.tree_depth) {
+   to_pack.tree_depth = xcalloc(to_pack.nr_alloc, 
sizeof(*to_pack.tree_depth));
+   to_pack.tree_depth_size = to_pack.nr_alloc;
+   } else if (to_pack.nr_objects > to_pack.tree_depth_size) {
+   REALLOC_ARRAY(to_pack.tree_depth, to_pack.nr_alloc);
+   memset(to_pack.tree_depth + to_pack.tree_depth_size, 0,
+  (to_pack.nr_alloc - to_pack.tree_depth_size) * 
sizeof(*to_pack.tree_depth));
+   to_pack.tree_depth_size = to_pack.nr_alloc;
+   }
+
+   if (packlist_find(&to_pack, obj->oid.hash, &index_pos)) {
+   uint32_t i = to_pack.index[index_pos] - 1;
+   if (depth > to_pack.tree_depth[i])
+   to_pack.tree_depth[i] = depth;
+   }
}
 }
 
diff --git a/delta-islands.c b/delta-islands.c
index f7902a64ad..e8e6ce3dc4 100644
--- a/delta-islands.c
+++ b/delta-islands.c
@@ -224,17 +224,23 @@ static void mark_remote_island_1(struct remote_island 
*rl, int is_core_island)
island_counter++;
 }
 
-static int cmp_tree_depth(const void *va, const void *vb)
+struct tree_islands_todo {
+   struct object_entry *entry;
+   unsigned int depth;
+};
+
+static int tree_depth_compare(const void *a, const void *b)
 {
-   struct object_entry *a = *(struct object_entry **)va;
-   struct object_entry *b = *(struct object_entry **)vb;
-   return a->tree_depth - b->tree_depth;
+   const struct tree_islands_todo *todo_a = a;
+   const struct tree_islands_todo *todo_b = b;
+
+   return todo_a->depth - todo_b->depth;
 }
 
 void resolve_tree_islands(int progress, struct packing_data *to_pack)
 {
struct progress *progress_state = NULL;
-   struct object_entry **todo;
+   struct tree_islands_todo *todo;
int nr = 0;
int i;
 
@@ -250,16 +256,19 @@ void resolve_tree_islands(int progress, struct 
packing_data *to_pack)
 */
ALLOC_ARRAY(todo, to_pack->nr_objects);
for (i = 0; i < to_pack->nr_objects; i++) {
-   if (oe_type(&to_pack->objects[i]) == OBJ_TREE)
-   todo[nr++] = &to_pack->objects[i];
+   if (oe_type(&to_pack->objects[i]) == OBJ_TREE) {
+   todo[nr].entry = &to_pack->objects[i];
+   todo[nr].depth = to_pack->tree_depth[i];
+   nr++;
+   }
}
-   QSORT(todo, nr, cmp_tree_depth);
+   QSORT(todo, nr, tree_depth_compare);
 
if (progress)
progress_state = start_progress(_("Propagating island marks"), 
nr);
 
for (i = 0; i < nr; i++) {
-   struct object_entry *ent = todo[i];
+   struct object_entry *ent = todo[i].entry;
struct island_bitmap *root_marks;
struct tree *tree;
struct tree_desc desc;
diff --git a/pack-objects.h b/pack-objects.h
index 8eecd67991..522b09a31e 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -101,7 +101,6 @@ struct object_entry {
unsigned no_try_delta:1;
unsigned in_pack_type:TYPE_BITS; /* could be delta */
 
-   unsigned int tree_depth; /* should be repositioned for packing? */
unsigned char layer;
 
unsigned preferred_base:1; /*
@@ -145,6 +144,9 @@ struct packing_data {
struct packed_git **in_pack;
 
uintmax_t oe_size_limit;
+
+   unsigned int *tree_depth;
+   uint32_t tree_depth_size;
 };
 
 void prepare_packing_data(struct packing_data *pdata);
-- 
2.18.0.327.ga7d188ab43



[PATCH v2 5/6] t: add t5319-delta-islands.sh

2018-08-05 Thread Christian Couder
From: Jeff King 

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 t/t5319-delta-islands.sh | 143 +++
 1 file changed, 143 insertions(+)
 create mode 100755 t/t5319-delta-islands.sh

diff --git a/t/t5319-delta-islands.sh b/t/t5319-delta-islands.sh
new file mode 100755
index 00..fea92a5777
--- /dev/null
+++ b/t/t5319-delta-islands.sh
@@ -0,0 +1,143 @@
+#!/bin/sh
+
+test_description='exercise delta islands'
+. ./test-lib.sh
+
+# returns true iff $1 is a delta based on $2
+is_delta_base () {
+   delta_base=$(echo "$1" | git cat-file --batch-check='%(deltabase)') &&
+   echo >&2 "$1 has base $delta_base" &&
+   test "$delta_base" = "$2"
+}
+
+# generate a commit on branch $1 with a single file, "file", whose
+# content is mostly based on the seed $2, but with a unique bit
+# of content $3 appended. This should allow us to see whether
+# blobs of different refs delta against each other.
+commit() {
+   blob=$({ test-tool genrandom "$2" 10240 && echo "$3"; } |
+  git hash-object -w --stdin) &&
+   tree=$(printf '100644 blob %s\tfile\n' "$blob" | git mktree) &&
+   commit=$(echo "$2-$3" | git commit-tree "$tree" ${4:+-p "$4"}) &&
+   git update-ref "refs/heads/$1" "$commit" &&
+   eval "$1"'=$(git rev-parse $1:file)' &&
+   eval "echo >&2 $1=\$$1"
+}
+
+test_expect_success 'setup commits' '
+   commit one seed 1 &&
+   commit two seed 12
+'
+
+# Note: This is heavily dependent on the "prefer larger objects as base"
+# heuristic.
+test_expect_success 'vanilla repack deltas one against two' '
+   git repack -adf &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island repack with no island definition is vanilla' '
+   git repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island repack with no matches is vanilla' '
+   git -c "pack.island=refs/foo" repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'separate islands disallows delta' '
+   git -c "pack.island=refs/heads/(.*)" repack -adfi &&
+   ! is_delta_base $one $two &&
+   ! is_delta_base $two $one
+'
+
+test_expect_success 'same island allows delta' '
+   git -c "pack.island=refs/heads" repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'coalesce same-named islands' '
+   git \
+   -c "pack.island=refs/(.*)/one" \
+   -c "pack.island=refs/(.*)/two" \
+   repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island restrictions drop reused deltas' '
+   git repack -adfi &&
+   is_delta_base $one $two &&
+   git -c "pack.island=refs/heads/(.*)" repack -adi &&
+   ! is_delta_base $one $two &&
+   ! is_delta_base $two $one
+'
+
+test_expect_success 'island regexes are left-anchored' '
+   git -c "pack.island=heads/(.*)" repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island regexes follow last-one-wins scheme' '
+   git \
+   -c "pack.island=refs/heads/(.*)" \
+   -c "pack.island=refs/heads/" \
+   repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'setup shared history' '
+   commit root shared root &&
+   commit one shared 1 root &&
+   commit two shared 12-long root
+'
+
+# We know that $two will be preferred as a base from $one,
+# because we can transform it with a pure deletion.
+#
+# We also expect $root as a delta against $two by the "longest is base" rule.
+test_expect_success 'vanilla delta goes between branches' '
+   git repack -adf &&
+   is_delta_base $one $two &&
+   is_delta_base $root $two
+'
+
+# Here we should allow $one to base itself on $root; even though
+# they are in different islands, the objects in $root are in a superset
+# of islands compared to those in $one.
+#
+# Similarly, $two can delta against $root by our rules. And unlike $one,
+# in which we are just allowing it, the island rules actually put $root
+# as a possible base for $two, which it would not otherwise be (due to the size
+# sorting).
+test_expect_success 'deltas allowed against superset islands'

[PATCH v2 0/6] Add delta islands support

2018-08-05 Thread Christian Couder
This patch series is upstreaming work made by GitHub and available in:

https://github.com/peff/git/commits/jk/delta-islands

The above work has been already described in the following article:

https://githubengineering.com/counting-objects/

The above branch contains only one patch. In this patch series the
patch has been split into 5 patches (1/6 to 5/6) with their own commit
message, and on top of that one patch (6/6) has been added. This patch
implements something that was requested following the previous
iteration.

I kept Peff as the author of the first 5 patches and took the liberty
to add his Signed-off-by to them.

As explained in details in the Counting Object article referenced
above, the goal of the delta island mechanism is for a hosting
provider to make it possible to have the "forks" of a repository share
as much storage as possible while preventing object packs to contain
deltas between different forks.

If deltas between different forks are not prevented, when users clone
or fetch a fork, preparing the pack that should be sent to them can be
very costly CPU wise, as objects from a different fork should not be
sent, which means that a lot of deltas might need to be computed
again (instead of reusing existing deltas).


The following changes have been made since the previous iteration:

* suggested Dscho: explain in the cover letter what the patches are
  all about

* suggested by Peff and Junio: improve the commit messages

* suggested by Junio: add comment before get_delta_base() in
  "packfile.h" in patch 1/6

* suggested by Duy: move 'pack.island' documentation (in
  "Documentation/config.txt") from patch 2/6 to patch 3/6

* suggested by Junio: improve pack.island documentation (in
  "Documentation/config.txt") to tell that it is an ERE in patch 3/6

* suggested by Peff: add doc about 'pack.islandCore' in patch 3/6

* suggested by Peff: add info about repacking with a big --window to
  avoid the delta window being clogged
  "Documentation/git-pack-objects.txt" in patch 3/6

* suggested by Duy: remove `#include "builtin.h"` from delta-islands.c
  in patch 2/6

* suggested by Duy: mark strings for translation in patch 2/6

* suggested by Peff: modernize code using ALLOC_ARRAY, QSORT() and
  free_tree_buffer() in patch 2/6

* suggested by Peff: use "respect islands during delta compression" as
  help text for --delta-islands in "builtin/pack-objects.c" in patch
  3/6

* suggested by Junio: improve documentation explaining how capture groups from
  the pack.island regexes are concatenated in
  Documentation/git-pack-objects.txt in patch 3/6

* suggested by Junio: add that only up to 7 capture groups are supported in
  the pack.island regexes in Documentation/git-pack-objects.txt in patch
  3/6

* suggested by Peff: move test script from the t99XX range to the t53XX range
  in commit 5/6

* suggested by Duy: move field 'tree_depth' from 'struct object_entry'
  to 'struct packing_data' in pack-object.h in new patch 6/6


The following changes have been suggested in the previous iteration,
but have not been implemented:

* suggested by Peff: rename get_delta_base() in patch 1/6

I am not sure which name to use, especially as there are a number of
other functions static to "packfile.c" with a name that starts with
"get_delta_base" and they should probably be renamed too.

* suggested by Duy: move field 'layer' from 'struct object_entry' to 'struct
  packing_data' in pack-object.h

I will respond in the original email about this.

* suggested by Peff: using FLEX_ALLOC_MEM() in island_bitmap_new() in
  patch 2/6

In his email Peff says that'd waste 4 bytes per struct, so it's not
worth it in my opinion.


This patch series is also available on GitHub in:

https://github.com/chriscool/git/commits/delta-islands

The previous version is available there:

https://github.com/chriscool/git/commits/delta-islands6
https://public-inbox.org/git/20180722054836.28935-1-chrisc...@tuxfamily.org/

Christian Couder (1):
  pack-objects: move tree_depth into 'struct packing_data'

Jeff King (5):
  packfile: make get_delta_base() non static
  Add delta-islands.{c,h}
  pack-objects: add delta-islands support
  repack: add delta-islands support
  t: add t5319-delta-islands.sh

 Documentation/config.txt   |  19 ++
 Documentation/git-pack-objects.txt |  97 ++
 Documentation/git-repack.txt   |   5 +
 Makefile   |   1 +
 builtin/pack-objects.c | 142 ++---
 builtin/repack.c   |   9 +
 delta-islands.c| 496 +
 delta-islands.h|  11 +
 pack-objects.h |   6 +
 packfile.c |  10 +-
 packfile.h |   7 +
 t/t5319-delta-islands

[PATCH v2 4/6] repack: add delta-islands support

2018-08-05 Thread Christian Couder
From: Jeff King 

Implement simple support for --delta-islands option and
repack.useDeltaIslands config variable in git repack.

This allows users to setup delta islands in their config and
get the benefit of less disk usage while cloning and fetching
is still quite fast and not much more CPU intensive.

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Documentation/config.txt | 4 
 Documentation/git-repack.txt | 5 +
 builtin/repack.c | 9 +
 3 files changed, 18 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index b74a1f60f4..c41febc373 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3102,6 +3102,10 @@ repack.packKeptObjects::
index is being written (either via `--write-bitmap-index` or
`repack.writeBitmaps`).
 
+repack.useDeltaIslands::
+   If set to true, makes `git repack` act as if `--delta-islands`
+   was passed. Defaults to `false`.
+
 repack.writeBitmaps::
When true, git will write a bitmap index when packing all
objects to disk (e.g., when `git repack -a` is run).  This
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index d90e7907f4..a8b2d4722f 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -155,6 +155,11 @@ depth is 4095.
being removed. In addition, any unreachable loose objects will
be packed (and their loose counterparts removed).
 
+-i::
+--delta-islands::
+   Pass the `--delta-islands` option to `git-pack-objects`, see
+   linkgit:git-pack-objects[1].
+
 Configuration
 -
 
diff --git a/builtin/repack.c b/builtin/repack.c
index 6c636e159e..5ab9ee69e4 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -12,6 +12,7 @@
 static int delta_base_offset = 1;
 static int pack_kept_objects = -1;
 static int write_bitmaps;
+static int use_delta_islands;
 static char *packdir, *packtmp;
 
 static const char *const git_repack_usage[] = {
@@ -40,6 +41,10 @@ static int repack_config(const char *var, const char *value, 
void *cb)
write_bitmaps = git_config_bool(var, value);
return 0;
}
+   if (!strcmp(var, "repack.usedeltaislands")) {
+   use_delta_islands = git_config_bool(var, value);
+   return 0;
+   }
return git_default_config(var, value, cb);
 }
 
@@ -194,6 +199,8 @@ int cmd_repack(int argc, const char **argv, const char 
*prefix)
N_("pass --local to git-pack-objects")),
OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
N_("write bitmap index")),
+   OPT_BOOL('i', "delta-islands", &use_delta_islands,
+   N_("pass --delta-islands to git-pack-objects")),
OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, 
N_("approxidate"),
N_("with -A, do not loosen objects older than 
this")),
OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
@@ -267,6 +274,8 @@ int cmd_repack(int argc, const char **argv, const char 
*prefix)
argv_array_pushf(&cmd.args, "--no-reuse-object");
if (write_bitmaps)
argv_array_push(&cmd.args, "--write-bitmap-index");
+   if (use_delta_islands)
+   argv_array_push(&cmd.args, "--delta-islands");
 
if (pack_everything & ALL_INTO_ONE) {
get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
-- 
2.18.0.327.ga7d188ab43



[PATCH v2 3/6] pack-objects: add delta-islands support

2018-08-05 Thread Christian Couder
From: Jeff King 

Implement support for delta islands in git pack-objects
and document how delta islands work in
"Documentation/git-pack-objects.txt" and Documentation/config.txt.

This allows users to setup delta islands in their config and
get the benefit of less disk usage while cloning and fetching
is still quite fast and not much more CPU intensive.

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Documentation/config.txt   |  15 
 Documentation/git-pack-objects.txt |  97 +
 builtin/pack-objects.c | 130 -
 3 files changed, 201 insertions(+), 41 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 43b2de7b5f..b74a1f60f4 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2542,6 +2542,21 @@ Note that changing the compression level will not 
automatically recompress
 all existing objects. You can force recompression by passing the -F option
 to linkgit:git-repack[1].
 
+pack.island::
+   An extended regular expression configuring a set of delta
+   islands. See "DELTA ISLANDS" in linkgit:git-pack-objects[1]
+   for details.
+
+pack.islandCore::
+   Specify an island name which gets to have its objects be
+   packed first. This creates a kind of pseudo-pack at the front
+   of one pack, so that the objects from the specified island are
+   hopefully faster to copy into any pack that should be served
+   to a user requesting these objects. In practice this means
+   that the island specified should likely correspond to what is
+   the most commonly cloned in the repo. See also "DELTA ISLANDS"
+   in linkgit:git-pack-objects[1].
+
 pack.deltaCacheSize::
The maximum memory in bytes used for caching deltas in
linkgit:git-pack-objects[1] before writing them out to a pack.
diff --git a/Documentation/git-pack-objects.txt 
b/Documentation/git-pack-objects.txt
index d95b472d16..05f49c1d17 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -289,6 +289,103 @@ Unexpected missing object will raise an error.
 --unpack-unreachable::
Keep unreachable objects in loose form. This implies `--revs`.
 
+--delta-islands::
+   Restrict delta matches based on "islands". See DELTA ISLANDS
+   below.
+
+
+DELTA ISLANDS
+-
+
+When possible, `pack-objects` tries to reuse existing on-disk deltas to
+avoid having to search for new ones on the fly. This is an important
+optimization for serving fetches, because it means the server can avoid
+inflating most objects at all and just send the bytes directly from
+disk. This optimization can't work when an object is stored as a delta
+against a base which the receiver does not have (and which we are not
+already sending). In that case the server "breaks" the delta and has to
+find a new one, which has a high CPU cost. Therefore it's important for
+performance that the set of objects in on-disk delta relationships match
+what a client would fetch.
+
+In a normal repository, this tends to work automatically. The objects
+are mostly reachable from the branches and tags, and that's what clients
+fetch. Any deltas we find on the server are likely to be between objects
+the client has or will have.
+
+But in some repository setups, you may have several related but separate
+groups of ref tips, with clients tending to fetch those groups
+independently. For example, imagine that you are hosting several "forks"
+of a repository in a single shared object store, and letting clients
+view them as separate repositories through `GIT_NAMESPACE` or separate
+repos using the alternates mechanism. A naive repack may find that the
+optimal delta for an object is against a base that is only found in
+another fork. But when a client fetches, they will not have the base
+object, and we'll have to find a new delta on the fly.
+
+A similar situation may exist if you have many refs outside of
+`refs/heads/` and `refs/tags/` that point to related objects (e.g.,
+`refs/pull` or `refs/changes` used by some hosting providers). By
+default, clients fetch only heads and tags, and deltas against objects
+found only in those other groups cannot be sent as-is.
+
+Delta islands solve this problem by allowing you to group your refs into
+distinct "islands". Pack-objects computes which objects are reachable
+from which islands, and refuses to make a delta from an object `A`
+against a base which is not present in all of `A`'s islands. This
+results in slightly larger packs (because we miss some delta
+opportunities), but guarantees that a fetch of one island will not have
+to recompute deltas on the fly due to crossing island boundaries.
+
+When repacking with delta islands the delta window tends to get
+clogged with candidates that are forbidden by the config. Repac

[PATCH v2 2/6] Add delta-islands.{c,h}

2018-08-05 Thread Christian Couder
From: Jeff King 

Hosting providers that allow users to "fork" existing
repos want those forks to share as much disk space as
possible.

Alternates are an existing solution to keep all the
objects from all the forks into a unique central repo,
but this can have some drawbacks. Especially when
packing the central repo, deltas will be created
between objects from different forks.

This can make cloning or fetching a fork much slower
and much more CPU intensive as Git might have to
compute new deltas for many objects to avoid sending
objects from a different fork.

Because the inefficiency primarily arises when an
object is delitified against another object that does
not exist in the same fork, we partition objects into
sets that appear in the same fork, and define
"delta islands". When finding delta base, we do not
allow an object outside the same island to be
considered as its base.

So "delta islands" is a way to store objects from
different forks in the same repo and packfile without
having deltas between objects from different forks.

This patch implements the delta islands mechanism in
"delta-islands.{c,h}", but does not yet make use of it.

A few new fields are added in 'struct object_entry'
in "pack-objects.h" though.

The documentation will follow in a patch that actually
uses delta islands in "builtin/pack-objects.c".

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Makefile|   1 +
 delta-islands.c | 487 
 delta-islands.h |  11 ++
 pack-objects.h  |   4 +
 4 files changed, 503 insertions(+)
 create mode 100644 delta-islands.c
 create mode 100644 delta-islands.h

diff --git a/Makefile b/Makefile
index 08e5c54549..2804298977 100644
--- a/Makefile
+++ b/Makefile
@@ -840,6 +840,7 @@ LIB_OBJS += csum-file.o
 LIB_OBJS += ctype.o
 LIB_OBJS += date.o
 LIB_OBJS += decorate.o
+LIB_OBJS += delta-islands.o
 LIB_OBJS += diffcore-break.o
 LIB_OBJS += diffcore-delta.o
 LIB_OBJS += diffcore-order.o
diff --git a/delta-islands.c b/delta-islands.c
new file mode 100644
index 00..f7902a64ad
--- /dev/null
+++ b/delta-islands.c
@@ -0,0 +1,487 @@
+#include "cache.h"
+#include "attr.h"
+#include "object.h"
+#include "blob.h"
+#include "commit.h"
+#include "tag.h"
+#include "tree.h"
+#include "delta.h"
+#include "pack.h"
+#include "tree-walk.h"
+#include "diff.h"
+#include "revision.h"
+#include "list-objects.h"
+#include "progress.h"
+#include "refs.h"
+#include "khash.h"
+#include "pack-bitmap.h"
+#include "pack-objects.h"
+#include "delta-islands.h"
+#include "sha1-array.h"
+#include "config.h"
+
+KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal)
+
+static khash_sha1 *island_marks;
+static unsigned island_counter;
+static unsigned island_counter_core;
+
+static kh_str_t *remote_islands;
+
+struct remote_island {
+   uint64_t hash;
+   struct oid_array oids;
+};
+
+struct island_bitmap {
+   uint32_t refcount;
+   uint32_t bits[];
+};
+
+static uint32_t island_bitmap_size;
+
+/*
+ * Allocate a new bitmap; if "old" is not NULL, the new bitmap will be a copy
+ * of "old". Otherwise, the new bitmap is empty.
+ */
+static struct island_bitmap *island_bitmap_new(const struct island_bitmap *old)
+{
+   size_t size = sizeof(struct island_bitmap) + (island_bitmap_size * 4);
+   struct island_bitmap *b = xcalloc(1, size);
+
+   if (old)
+   memcpy(b, old, size);
+
+   b->refcount = 1;
+   return b;
+}
+
+static void island_bitmap_or(struct island_bitmap *a, const struct 
island_bitmap *b)
+{
+   uint32_t i;
+
+   for (i = 0; i < island_bitmap_size; ++i)
+   a->bits[i] |= b->bits[i];
+}
+
+static int island_bitmap_is_subset(struct island_bitmap *self,
+   struct island_bitmap *super)
+{
+   uint32_t i;
+
+   if (self == super)
+   return 1;
+
+   for (i = 0; i < island_bitmap_size; ++i) {
+   if ((self->bits[i] & super->bits[i]) != self->bits[i])
+   return 0;
+   }
+
+   return 1;
+}
+
+#define ISLAND_BITMAP_BLOCK(x) (x / 32)
+#define ISLAND_BITMAP_MASK(x) (1 << (x % 32))
+
+static void island_bitmap_set(struct island_bitmap *self, uint32_t i)
+{
+   self->bits[ISLAND_BITMAP_BLOCK(i)] |= ISLAND_BITMAP_MASK(i);
+}
+
+static int island_bitmap_get(struct island_bitmap *self, uint32_t i)
+{
+   return (self->bits[ISLAND_BITMAP_BLOCK(i)] & ISLAND_BITMAP_MASK(i)) != 
0;
+}
+
+int in_same_island(const struct object_id *trg_oid, const struct object_id 
*src_oid)
+{
+   khiter_t trg_pos, src_pos;
+
+   /* If we aren't using islan

[PATCH v2 1/6] packfile: make get_delta_base() non static

2018-08-05 Thread Christian Couder
From: Jeff King 

As get_delta_base() will be used outside 'packfile.c' in
a following commit, let's make it non static and let's
declare it in 'packfile.h'.

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 packfile.c | 10 +-
 packfile.h |  7 +++
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/packfile.c b/packfile.c
index 7cd45aa4b2..4646bff5ff 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1037,11 +1037,11 @@ const struct packed_git *has_packed_and_bad(const 
unsigned char *sha1)
return NULL;
 }
 
-static off_t get_delta_base(struct packed_git *p,
-   struct pack_window **w_curs,
-   off_t *curpos,
-   enum object_type type,
-   off_t delta_obj_offset)
+off_t get_delta_base(struct packed_git *p,
+struct pack_window **w_curs,
+off_t *curpos,
+enum object_type type,
+off_t delta_obj_offset)
 {
unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
off_t base_offset;
diff --git a/packfile.h b/packfile.h
index cc7eaffe1b..1265fd9b06 100644
--- a/packfile.h
+++ b/packfile.h
@@ -126,6 +126,13 @@ extern unsigned long unpack_object_header_buffer(const 
unsigned char *buf, unsig
 extern unsigned long get_size_from_delta(struct packed_git *, struct 
pack_window **, off_t);
 extern int unpack_object_header(struct packed_git *, struct pack_window **, 
off_t *, unsigned long *);
 
+/*
+ * Return the offset of the object that is the delta base of the object at 
curpos.
+ */
+extern off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
+   off_t *curpos, enum object_type type,
+   off_t delta_obj_offset);
+
 extern void release_pack_memory(size_t);
 
 /* global flag to enable extra checks when accessing packed objects */
-- 
2.18.0.327.ga7d188ab43



Re: What's cooking in git.git (Jul 2018, #03; Wed, 25)

2018-08-02 Thread Christian Couder
On Thu, Jul 26, 2018 at 6:57 PM, Junio C Hamano  wrote:
> Оля Тележная   writes:
>
>> 2018-07-26 1:13 GMT+03:00 Junio C Hamano :
>>>
>>> * ot/ref-filter-object-info (2018-07-17) 5 commits
>>>  - ref-filter: use oid_object_info() to get object
>>>  - ref-filter: merge get_obj and get_object
>>>  - ref-filter: initialize eaten variable
>>>  - ref-filter: fill empty fields with empty values
>>>  - ref-filter: add info_source to valid_atom
>>>
>>>  A few atoms like %(objecttype) and %(objectsize) in the format
>>>  specifier of "for-each-ref --format=" can be filled without
>>>  getting the full contents of the object, but just with the object
>>>  header.  These cases have been optimzied by calling
>>>  oid_object_info() API.
>>>
>>>  What's the doneness of this one?
>>
>> It is ready. Thanks.
>
> Thanks, the question was meant more to the reviewers and mentors
> than the original author, though ;-)

I just tested this patch series on git.git on my laptop with the following:

$ time git for-each-ref --format='%(objecttype)' >/dev/null

and the best I can get without it is:

real0m0,038s
user0m0,034s
sys 0m0,004s

while with it I can get:

real0m0,017s
user0m0,016s
sys 0m0,000s

The results are similar with --format='%(objectsize)'.

So I think it is a nice improvement.

Looking at the code of the patches again, I can't see any simple way
to improve on the way it is done.

Thanks,
Christian.


[PATCH v4 9/9] Documentation/config: add odb..promisorRemote

2018-08-01 Thread Christian Couder
From: Christian Couder 

Signed-off-by: Junio C Hamano 
---
 Documentation/config.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 43b2de7b5f..2d048d47f2 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2513,6 +2513,11 @@ This setting can be overridden with the 
`GIT_NOTES_REWRITE_REF`
 environment variable, which must be a colon separated list of refs or
 globs.
 
+odb..promisorRemote::
+   The name of a promisor remote. For now promisor remotes are
+   the only kind of remote object database (odb) that is
+   supported.
+
 pack.window::
The size of the window used by linkgit:git-pack-objects[1] when no
window size is given on the command line. Defaults to 10.
-- 
2.18.0.330.g17eb9fed90



[PATCH v4 1/9] fetch-object: make functions return an error code

2018-08-01 Thread Christian Couder
From: Christian Couder 

The callers of the fetch_object() and fetch_objects() might
be interested in knowing if these functions succeeded or not.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 fetch-object.c | 15 +--
 fetch-object.h |  6 +++---
 sha1-file.c|  4 ++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/fetch-object.c b/fetch-object.c
index 48fe63dd6c..3c52009266 100644
--- a/fetch-object.c
+++ b/fetch-object.c
@@ -5,11 +5,12 @@
 #include "transport.h"
 #include "fetch-object.h"
 
-static void fetch_refs(const char *remote_name, struct ref *ref)
+static int fetch_refs(const char *remote_name, struct ref *ref)
 {
struct remote *remote;
struct transport *transport;
int original_fetch_if_missing = fetch_if_missing;
+   int res;
 
fetch_if_missing = 0;
remote = remote_get(remote_name);
@@ -19,18 +20,20 @@ static void fetch_refs(const char *remote_name, struct ref 
*ref)
 
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
-   transport_fetch_refs(transport, ref, NULL);
+   res = transport_fetch_refs(transport, ref, NULL);
fetch_if_missing = original_fetch_if_missing;
+
+   return res;
 }
 
-void fetch_object(const char *remote_name, const unsigned char *sha1)
+int fetch_object(const char *remote_name, const unsigned char *sha1)
 {
struct ref *ref = alloc_ref(sha1_to_hex(sha1));
hashcpy(ref->old_oid.hash, sha1);
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
 
-void fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
+int fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
 {
struct ref *ref = NULL;
int i;
@@ -41,5 +44,5 @@ void fetch_objects(const char *remote_name, const struct 
oid_array *to_fetch)
new_ref->next = ref;
ref = new_ref;
}
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
diff --git a/fetch-object.h b/fetch-object.h
index 4b269d07ed..12e1f9ee70 100644
--- a/fetch-object.h
+++ b/fetch-object.h
@@ -3,9 +3,9 @@
 
 #include "sha1-array.h"
 
-extern void fetch_object(const char *remote_name, const unsigned char *sha1);
+extern int fetch_object(const char *remote_name, const unsigned char *sha1);
 
-extern void fetch_objects(const char *remote_name,
- const struct oid_array *to_fetch);
+extern int fetch_objects(const char *remote_name,
+const struct oid_array *to_fetch);
 
 #endif
diff --git a/sha1-file.c b/sha1-file.c
index de4839e634..c099f5584d 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1312,8 +1312,8 @@ int oid_object_info_extended(struct repository *r, const 
struct object_id *oid,
if (fetch_if_missing && repository_format_partial_clone &&
!already_retried && r == the_repository) {
/*
-* TODO Investigate having fetch_object() return
-* TODO error/success and stopping the music here.
+* TODO Investigate checking fetch_object() return
+* TODO value and stopping on error here.
 * TODO Pass a repository struct through fetch_object,
 * such that arbitrary repositories work.
 */
-- 
2.18.0.330.g17eb9fed90



[PATCH v4 2/9] Add initial remote odb support

2018-08-01 Thread Christian Couder
From: Christian Couder 

The remote-odb.{c,h} files will contain the functions
that are called by the rest of Git mostly from
"sha1-file.c" to access the objects managed by the
remote odbs.

The odb-helper.{c,h} files will contain the functions to
actually implement communication with either the internal
functions or the external scripts or processes that will
manage and provide remote git objects.

For now only infrastructure to create helpers from the
config and to check if there are remote odbs and odb
helpers is implemented.

We expect that there will not be a lot of helpers, so it
is ok to use a simple linked list to manage them.

Helped-by: Jeff King 
Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 Makefile |  2 ++
 odb-helper.c | 16 +
 odb-helper.h | 19 +++
 remote-odb.c | 91 
 remote-odb.h |  7 
 5 files changed, 135 insertions(+)
 create mode 100644 odb-helper.c
 create mode 100644 odb-helper.h
 create mode 100644 remote-odb.c
 create mode 100644 remote-odb.h

diff --git a/Makefile b/Makefile
index 08e5c54549..2a9bd02208 100644
--- a/Makefile
+++ b/Makefile
@@ -896,6 +896,8 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += odb-helper.o
+LIB_OBJS += remote-odb.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
 LIB_OBJS += packfile.o
diff --git a/odb-helper.c b/odb-helper.c
new file mode 100644
index 00..b4d403ffa9
--- /dev/null
+++ b/odb-helper.c
@@ -0,0 +1,16 @@
+#include "cache.h"
+#include "object.h"
+#include "argv-array.h"
+#include "odb-helper.h"
+#include "run-command.h"
+#include "sha1-lookup.h"
+
+struct odb_helper *odb_helper_new(const char *name, int namelen)
+{
+   struct odb_helper *o;
+
+   o = xcalloc(1, sizeof(*o));
+   o->name = xmemdupz(name, namelen);
+
+   return o;
+}
diff --git a/odb-helper.h b/odb-helper.h
new file mode 100644
index 00..4b792a3896
--- /dev/null
+++ b/odb-helper.h
@@ -0,0 +1,19 @@
+#ifndef ODB_HELPER_H
+#define ODB_HELPER_H
+
+/*
+ * An odb helper is a way to access a remote odb.
+ *
+ * Information in its fields comes from the config file [odb "NAME"]
+ * entries.
+ */
+struct odb_helper {
+   const char *name; /* odb..* */
+   const char *remote;   /* odb..promisorRemote */
+
+   struct odb_helper *next;
+};
+
+extern struct odb_helper *odb_helper_new(const char *name, int namelen);
+
+#endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
new file mode 100644
index 00..03be54ba2e
--- /dev/null
+++ b/remote-odb.c
@@ -0,0 +1,91 @@
+#include "cache.h"
+#include "remote-odb.h"
+#include "odb-helper.h"
+#include "config.h"
+
+static struct odb_helper *helpers;
+static struct odb_helper **helpers_tail = &helpers;
+
+static struct odb_helper *find_or_create_helper(const char *name, int len)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (!strncmp(o->name, name, len) && !o->name[len])
+   return o;
+
+   o = odb_helper_new(name, len);
+   *helpers_tail = o;
+   helpers_tail = &o->next;
+
+   return o;
+}
+
+static struct odb_helper *do_find_odb_helper(const char *remote)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (o->remote && !strcmp(o->remote, remote))
+   return o;
+
+   return NULL;
+}
+
+static int remote_odb_config(const char *var, const char *value, void *data)
+{
+   struct odb_helper *o;
+   const char *name;
+   int namelen;
+   const char *subkey;
+
+   if (parse_config_key(var, "odb", &name, &namelen, &subkey) < 0)
+   return 0;
+
+   o = find_or_create_helper(name, namelen);
+
+   if (!strcmp(subkey, "promisorremote")) {
+   const char *remote;
+   int res = git_config_string(&remote, var, value);
+
+   if (res)
+   return res;
+
+   if (do_find_odb_helper(remote))
+   return error(_("when parsing config key '%s' "
+  "helper for remote '%s' already exists"),
+var, remote);
+
+   o->remote = remote;
+
+   return 0;
+   }
+
+   return 0;
+}
+
+static void remote_odb_init(void)
+{
+   static int initialized;
+
+   if (initialized)
+   return;
+   initialized = 1;
+
+   git_config(remote_odb_config, NULL);
+}
+
+struct odb_helper *find_odb_helper(const char *remote)
+{
+   remote_odb_init();
+
+   if (!remote)
+   return help

[PATCH v4 3/9] remote-odb: implement remote_odb_get_direct()

2018-08-01 Thread Christian Couder
From: Christian Couder 

This is implemented only in the promisor remote mode
for now by calling fetch_object().

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 odb-helper.c | 14 ++
 odb-helper.h |  3 ++-
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/odb-helper.c b/odb-helper.c
index b4d403ffa9..99b5a345e8 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -4,6 +4,7 @@
 #include "odb-helper.h"
 #include "run-command.h"
 #include "sha1-lookup.h"
+#include "fetch-object.h"
 
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
@@ -14,3 +15,16 @@ struct odb_helper *odb_helper_new(const char *name, int 
namelen)
 
return o;
 }
+
+int odb_helper_get_direct(struct odb_helper *o,
+ const unsigned char *sha1)
+{
+   int res;
+   uint64_t start = getnanotime();
+
+   res = fetch_object(o->remote, sha1);
+
+   trace_performance_since(start, "odb_helper_get_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4b792a3896..4c52e81ce8 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -15,5 +15,6 @@ struct odb_helper {
 };
 
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
-
+extern int odb_helper_get_direct(struct odb_helper *o,
+const unsigned char *sha1);
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index 03be54ba2e..7f815c7ebb 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -89,3 +89,20 @@ int has_remote_odb(void)
 {
return !!find_odb_helper(NULL);
 }
+
+int remote_odb_get_direct(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_direct: %s", sha1_to_hex(sha1));
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_direct(o, sha1) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index 4c7b13695f..c5384c922d 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -3,5 +3,6 @@
 
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
+extern int remote_odb_get_direct(const unsigned char *sha1);
 
 #endif /* REMOTE_ODB_H */
-- 
2.18.0.330.g17eb9fed90



[PATCH v4 4/9] remote-odb: implement remote_odb_get_many_direct()

2018-08-01 Thread Christian Couder
From: Christian Couder 

This function will be used to get many objects from a promisor
remote.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 odb-helper.c | 15 +++
 odb-helper.h |  3 +++
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 36 insertions(+)

diff --git a/odb-helper.c b/odb-helper.c
index 99b5a345e8..246ebf8f7a 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -28,3 +28,18 @@ int odb_helper_get_direct(struct odb_helper *o,
 
return res;
 }
+
+int odb_helper_get_many_direct(struct odb_helper *o,
+  const struct oid_array *to_get)
+{
+   int res;
+   uint64_t start;
+
+   start = getnanotime();
+
+   res = fetch_objects(o->remote, to_get);
+
+   trace_performance_since(start, "odb_helper_get_many_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4c52e81ce8..154ce4c7e4 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -17,4 +17,7 @@ struct odb_helper {
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
 extern int odb_helper_get_direct(struct odb_helper *o,
 const unsigned char *sha1);
+extern int odb_helper_get_many_direct(struct odb_helper *o,
+ const struct oid_array *to_get);
+
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index 7f815c7ebb..09dfc2e16f 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -106,3 +106,20 @@ int remote_odb_get_direct(const unsigned char *sha1)
 
return -1;
 }
+
+int remote_odb_get_many_direct(const struct oid_array *to_get)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_many_direct: nr: %d", to_get->nr);
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_many_direct(o, to_get) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index c5384c922d..e10a8bf855 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -4,5 +4,6 @@
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
+extern int remote_odb_get_many_direct(const struct oid_array *to_get);
 
 #endif /* REMOTE_ODB_H */
-- 
2.18.0.330.g17eb9fed90



[PATCH v4 0/9] Introducing remote ODBs

2018-08-01 Thread Christian Couder
This path series is a follow up from the patch series called "odb
remote" that I sent earlier this year, which was itself a follow up
from previous series. See the links section for more information.

As with the previous "odb remote" series, this series is only about
integrating with the promisor/narrow clone work and showing that it
makes it possible to use more than one promisor remote. Everything
that is not necessary for that integration has been removed for now
(though you can still find it in one of my branches on GitHub if you
want).

There is one test in patch 8/9 that shows that more than one promisor
remote can now be used, but I still feel that it could be interesting
to add other such tests, so I am open to ideas in this area.

Changes compared to V3 of this patch series
~~~

  - the remote_odb_reinit() function is not "inline" anymore in patch
5/9 as suggested by Beat Bolli in:

https://public-inbox.org/git/20180724215223.27516-3-dev+...@drbeat.li/

High level overview of this patch series


All the patches except 5/9 are unchanged compared to V3.

  - Patch 1/9:

This makes functions in fetch-object.c return an error code, which is
necessary to later tell that they failed and try another remote odb
when there is more than one. This could also just be seen as a fix to
these functions.

  - Patch 2/9:

This introduces the minimum infrastructure for remote odbs.

  - Patches 3/9 and 4/9:

These patches implement remote_odb_get_direct() and
remote_odb_get_many_direct() using the functions from fetch-object.c.
These new functions will be used in following patches to replace the
functions from fetch-object.c.

  - Patch 5/9:

This implement remote_odb_reinit() which will be needed to reparse the
remote odb configuration.

  - Patches 6/9 and 7/9:

These patches integrate the remote odb mechanism into the
promisor/narrow clone code. The "extensions.partialClone" config
option is replaced by "odb..promisorRemote" and
"core.partialCloneFilter" is replaced by
"odb..partialCloneFilter".

  - Patch 8/9:

This adds a test case that shows that now more than one promisor
remote can be used.

  - Patch 9/9:

This starts documenting the remote odb mechanism.

Discussion
~~

I am not sure that it is ok to completely replace the
"extensions.partialclone" config option. Even if it is fully replaced,
no "extensions.remoteodb" is implemented in these patches, as maybe
the "extensions.partialclone" name could be kept even if the
underlying mechanism is the remote odb mechanism.

Anyway I think that the remote odb mechanism is much more extensible,
so I think using "extensions.partialclone" to specify a promisor
remote should be at least deprecated.

Links
~

This patch series on GitHub:

V4: https://github.com/chriscool/git/commits/remote-odb
V3: https://github.com/chriscool/git/commits/remote-odb3
V2: https://github.com/chriscool/git/commits/remote-odb2
V1: https://github.com/chriscool/git/commits/remote-odb1

Discussions related to previous versions:

V3: https://public-inbox.org/git/20180713174959.16748-1-chrisc...@tuxfamily.org/
V2: https://public-inbox.org/git/20180630083542.20347-1-chrisc...@tuxfamily.org/
V1: https://public-inbox.org/git/20180623121846.19750-1-chrisc...@tuxfamily.org/

Previous "odb remote" series:

https://public-inbox.org/git/20180513103232.17514-1-chrisc...@tuxfamily.org/
https://github.com/chriscool/git/commits/odb-remote

Version 1 and 2 of the "Promisor remotes and external ODB support" series:

https://public-inbox.org/git/20180103163403.11303-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20180319133147.15413-1-chrisc...@tuxfamily.org/

Version 1 and 2 of the "Promisor remotes and external ODB support" series on 
GitHub:

https://github.com/chriscool/git/commits/gl-small-promisor-external-odb12
https://github.com/chriscool/git/commits/gl-small-promisor-external-odb71

Christian Couder (9):
  fetch-object: make functions return an error code
  Add initial remote odb support
  remote-odb: implement remote_odb_get_direct()
  remote-odb: implement remote_odb_get_many_direct()
  remote-odb: add remote_odb_reinit()
  Use remote_odb_get_direct() and has_remote_odb()
  Use odb.origin.partialclonefilter instead of core.partialclonefilter
  t0410: test fetching from many promisor remotes
  Documentation/config: add odb..promisorRemote

 Documentation/config.txt  |   5 ++
 Makefile  |   2 +
 builtin/cat-file.c|   5 +-
 builtin/fetch.c   |  13 ++--
 builtin/gc.c  |   3 +-
 builtin/repack.c  |   3 +-
 cache.h   |   2 -
 connected.c   |   3 +-
 environment.c |   1 -
 fetch-object.c|  15 ++--
 fet

[PATCH v4 8/9] t0410: test fetching from many promisor remotes

2018-08-01 Thread Christian Couder
From: Christian Couder 

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 t/t0410-partial-clone.sh | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 71a9b9a3e8..9d513ebf57 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -170,8 +170,30 @@ test_expect_success 'fetching of missing objects' '
git verify-pack --verbose "$IDX" | grep "$HASH"
 '
 
+test_expect_success 'fetching of missing objects from another odb remote' '
+   git clone "file://$(pwd)/server" server2 &&
+   test_commit -C server2 bar &&
+   git -C server2 repack -a -d --write-bitmap-index &&
+   HASH2=$(git -C server2 rev-parse bar) &&
+
+   git -C repo remote add server2 "file://$(pwd)/server2" &&
+   git -C repo config odb.magic2.promisorRemote server2 &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   git -C repo fetch server2 &&
+   rm -rf repo/.git/objects/* &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   # Ensure that the .promisor file is written, and check that its
+   # associated packfile contains the object
+   ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
+   test_line_count = 1 promisorlist &&
+   IDX=$(cat promisorlist | sed "s/promisor$/idx/") &&
+   git verify-pack --verbose "$IDX" | grep "$HASH2"
+'
+
 test_expect_success 'rev-list stops traversal at missing and promised commit' '
-   rm -rf repo &&
+   rm -rf repo server server2 &&
test_create_repo repo &&
test_commit -C repo foo &&
test_commit -C repo bar &&
-- 
2.18.0.330.g17eb9fed90



[PATCH v4 5/9] remote-odb: add remote_odb_reinit()

2018-08-01 Thread Christian Couder
From: Christian Couder 

We will need to reinitialize the remote odb configuration
as we will make some changes to it in a later commit when
we will detect that a remote is also a remote odb.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 remote-odb.c | 14 --
 remote-odb.h |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/remote-odb.c b/remote-odb.c
index 09dfc2e16f..f063ba0fda 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -64,17 +64,27 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
return 0;
 }
 
-static void remote_odb_init(void)
+static void remote_odb_do_init(int force)
 {
static int initialized;
 
-   if (initialized)
+   if (!force && initialized)
return;
initialized = 1;
 
git_config(remote_odb_config, NULL);
 }
 
+static inline void remote_odb_init(void)
+{
+   remote_odb_do_init(0);
+}
+
+void remote_odb_reinit(void)
+{
+   remote_odb_do_init(1);
+}
+
 struct odb_helper *find_odb_helper(const char *remote)
 {
remote_odb_init();
diff --git a/remote-odb.h b/remote-odb.h
index e10a8bf855..79803782af 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -1,6 +1,7 @@
 #ifndef REMOTE_ODB_H
 #define REMOTE_ODB_H
 
+extern void remote_odb_reinit(void);
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
-- 
2.18.0.330.g17eb9fed90



[PATCH v4 7/9] Use odb.origin.partialclonefilter instead of core.partialclonefilter

2018-08-01 Thread Christian Couder
From: Christian Couder 

Let's make the partial clone filter specific to one odb
instead of general to all the odbs.

This makes it possible to have different partial clone
filters for different odbs.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 builtin/fetch.c   |  2 +-
 list-objects-filter-options.c | 28 
 list-objects-filter-options.h |  3 ++-
 odb-helper.h  |  1 +
 remote-odb.c  |  2 ++
 t/t5616-partial-clone.sh  |  2 +-
 6 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 9c7df8356c..f3dee73179 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1369,7 +1369,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * the config.
 */
if (!filter_options.choice)
-   partial_clone_get_default_filter_spec(&filter_options);
+   partial_clone_get_default_filter_spec(&filter_options, 
remote->name);
return;
 }
 
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 60452c8f36..755a793664 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -7,6 +7,7 @@
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
 #include "remote-odb.h"
+#include "odb-helper.h"
 
 /*
  * Parse value of the argument to the "filter" keyword.
@@ -29,6 +30,9 @@ static int gently_parse_list_objects_filter(
 {
const char *v0;
 
+   if (!arg)
+   return 0;
+
if (filter_options->choice) {
if (errbuf) {
strbuf_init(errbuf, 0);
@@ -116,6 +120,7 @@ void partial_clone_register(
const struct list_objects_filter_options *filter_options)
 {
char *cfg_name;
+   char *filter_name;
 
/* Check if it is already registered */
if (find_odb_helper(remote))
@@ -131,27 +136,26 @@ void partial_clone_register(
/*
 * Record the initial filter-spec in the config as
 * the default for subsequent fetches from this remote.
-*
-* TODO: move core.partialclonefilter into odb.
 */
-   core_partial_clone_filter_default =
-   xstrdup(filter_options->filter_spec);
-   git_config_set("core.partialclonefilter",
-  core_partial_clone_filter_default);
+   filter_name = xstrfmt("odb.%s.partialclonefilter", remote);
+   git_config_set(filter_name, filter_options->filter_spec);
+   free(filter_name);
 
/* Make sure the config info are reset */
remote_odb_reinit();
 }
 
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options)
+   struct list_objects_filter_options *filter_options,
+   const char *remote)
 {
+   struct odb_helper *helper = find_odb_helper(remote);
+
/*
 * Parse default value, but silently ignore it if it is invalid.
 */
-   if (!core_partial_clone_filter_default)
-   return;
-   gently_parse_list_objects_filter(filter_options,
-core_partial_clone_filter_default,
-NULL);
+   if (helper)
+   gently_parse_list_objects_filter(filter_options,
+helper->partial_clone_filter,
+NULL);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index a61f82..12ceef3230 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -74,6 +74,7 @@ void partial_clone_register(
const char *remote,
const struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options);
+   struct list_objects_filter_options *filter_options,
+   const char *remote);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/odb-helper.h b/odb-helper.h
index 154ce4c7e4..4af75ef55c 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -10,6 +10,7 @@
 struct odb_helper {
const char *name; /* odb..* */
const char *remote;   /* odb..promisorRemote */
+   const char *partial_clone_filter; /* odb..partialCloneFilter */
 
struct odb_helper *next;
 };
diff --git a/remote-odb.c b/remote-odb.c
index f063ba0fda..49cf8e30aa 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -60,6 +60,8 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
 
return 0;
}
+   if (!strcmp(subkey, "partialclonefilter"))
+   return git_config_string(&o->partial_clone_filter, var, value);
 
return 0;
 }
diff --git a/t/t5616-par

[PATCH v4 6/9] Use remote_odb_get_direct() and has_remote_odb()

2018-08-01 Thread Christian Couder
From: Christian Couder 

Instead of using the repository_format_partial_clone global
and fetch_object() directly, let's use has_remote_odb() and
remote_odb_get_direct().

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 builtin/cat-file.c|  5 +++--
 builtin/fetch.c   | 11 ++-
 builtin/gc.c  |  3 ++-
 builtin/repack.c  |  3 ++-
 cache.h   |  2 --
 connected.c   |  3 ++-
 environment.c |  1 -
 list-objects-filter-options.c | 27 +++
 packfile.c|  3 ++-
 setup.c   |  7 +--
 sha1-file.c   | 14 --
 t/t0410-partial-clone.sh  | 34 +-
 t/t5500-fetch-pack.sh |  4 ++--
 t/t5601-clone.sh  |  2 +-
 t/t5616-partial-clone.sh  |  2 +-
 t/t5702-protocol-v2.sh|  2 +-
 unpack-trees.c|  6 +++---
 17 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 4a44b2404f..4d19e9277e 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -14,6 +14,7 @@
 #include "sha1-array.h"
 #include "packfile.h"
 #include "object-store.h"
+#include "remote-odb.h"
 
 struct batch_options {
int enabled;
@@ -478,8 +479,8 @@ static int batch_objects(struct batch_options *opt)
 
for_each_loose_object(batch_loose_object, &sa, 0);
for_each_packed_object(batch_packed_object, &sa, 0);
-   if (repository_format_partial_clone)
-   warning("This repository has extensions.partialClone 
set. Some objects may not be loaded.");
+   if (has_remote_odb())
+   warning("This repository uses an odb. Some objects may 
not be loaded.");
 
cb.opt = opt;
cb.expand = &data;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ac06f6a576..9c7df8356c 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -22,6 +22,7 @@
 #include "utf8.h"
 #include "packfile.h"
 #include "list-objects-filter-options.h"
+#include "remote-odb.h"
 
 static const char * const builtin_fetch_usage[] = {
N_("git fetch [] [ [...]]"),
@@ -1339,7 +1340,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * If no prior partial clone/fetch and the current fetch DID NOT
 * request a partial-fetch, do a normal fetch.
 */
-   if (!repository_format_partial_clone && !filter_options.choice)
+   if (!has_remote_odb() && !filter_options.choice)
return;
 
/*
@@ -1347,7 +1348,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * on this repo and remember the given filter-spec as the default
 * for subsequent fetches to this remote.
 */
-   if (!repository_format_partial_clone && filter_options.choice) {
+   if (!has_remote_odb() && filter_options.choice) {
partial_clone_register(remote->name, &filter_options);
return;
}
@@ -1356,7 +1357,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * We are currently limited to only ONE promisor remote and only
 * allow partial-fetches from the promisor remote.
 */
-   if (strcmp(remote->name, repository_format_partial_clone)) {
+   if (!find_odb_helper(remote->name)) {
if (filter_options.choice)
die(_("--filter can only be used with the remote 
configured in core.partialClone"));
return;
@@ -1487,7 +1488,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
if (depth || deepen_since || deepen_not.nr)
deepen = 1;
 
-   if (filter_options.choice && !repository_format_partial_clone)
+   if (filter_options.choice && !has_remote_odb())
die("--filter can only be used when extensions.partialClone is 
set");
 
if (all) {
@@ -1521,7 +1522,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
}
 
if (remote) {
-   if (filter_options.choice || repository_format_partial_clone)
+   if (filter_options.choice || has_remote_odb())
fetch_one_setup_partial(remote);
result = fetch_one(remote, argc, argv, prune_tags_ok);
} else {
diff --git a/builtin/gc.c b/builtin/gc.c
index ccfb1ceaeb..02c783b514 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -26,6 +26,7 @@
 #include "pack-objects.h"
 #include "blob.h"
 #include "tree.h"
+#include "remote-odb.h"
 
 #define FAILED_RUN "failed to run %

Re: [RFC PATCH 3/5] pack-objects: add delta-islands support

2018-07-28 Thread Christian Couder
On Sat, Jul 28, 2018 at 11:00 AM, Jeff King  wrote:
>
> On Fri, Jul 27, 2018 at 10:22:09AM -0700, Stefan Beller wrote:
>
> > > That would solve the problem that fetching torvalds/linux from GitHub
> > > yields a bigger pack than fetching it from kernel.org. But again, it's
> > > making that root fork weirdly magical. People who fetch solely from
> > > other forks won't get any benefit (and may even see worse packs).
> >
> > Thanks for the explanation.
> > I think this discussion just hints at me being dense reading the
> > documentation. After all I like the concept.
>
> I actually think it hints that the documentation in the commits
> themselves is not adequate. :)

Ok, I will try to improve it using information from the discussion threads.

Thanks,
Christian.


Re: [PATCH 2/2] remote-odb: un-inline function remote_odb_reinit

2018-07-25 Thread Christian Couder
Hi,

On Wed, Jul 25, 2018 at 12:03 AM, Beat Bolli  wrote:
>
> On 24.07.18 23:59, Jonathan Nieder wrote:
>>
>> Beat Bolli wrote:

>>> -inline void remote_odb_reinit(void)
>>> +void remote_odb_reinit(void)
>>
>> This looks like an oversight in
>> https://public-inbox.org/git/20180713174959.16748-6-chrisc...@tuxfamily.org/:
>> there isn't any reason for this function to be inline.
>>
>> Christian, can you squash it in on your next reroll?
>
> That would probably make sense. I didn't check how mature the topics
> were that caused errors.

Ok, it is in the next version I will send.

Thanks,
Christian.


Re: [RFC PATCH 2/5] Add delta-islands.{c,h}

2018-07-22 Thread Christian Couder
On Sun, Jul 22, 2018 at 10:50 AM, Duy Nguyen  wrote:
> On Sun, Jul 22, 2018 at 7:51 AM Christian Couder
>  wrote:

>> +pack.island::
>> +   A regular expression configuring a set of delta islands. See
>> +   "DELTA ISLANDS" in linkgit:git-pack-objects[1] for details.
>> +
>
> That section is not added until 3/5 though.

Yeah, so I guess it is better to move this hunk to 3/5 and keep
pack.island undocumented until the delta islands code is actually used
by pack-objects.

>> diff --git a/delta-islands.c b/delta-islands.c
>> new file mode 100644
>> index 00..645fe966c5
>> --- /dev/null
>> +++ b/delta-islands.c
>> @@ -0,0 +1,490 @@
>> +#include "builtin.h"
>
> A bit weird that builtin.h would be needed...

Yeah, I will get rid of that include in the next iteration.

>> +   if (progress)
>> +   progress_state = start_progress("Propagating island marks", 
>> nr);
>
> _() (same comment for other strings too)

Ok, the strings will be marked for translation in the next iteration.

>> diff --git a/pack-objects.h b/pack-objects.h
>> index edf74dabdd..8eecd67991 100644
>> --- a/pack-objects.h
>> +++ b/pack-objects.h
>> @@ -100,6 +100,10 @@ struct object_entry {
>> unsigned type_:TYPE_BITS;
>> unsigned no_try_delta:1;
>> unsigned in_pack_type:TYPE_BITS; /* could be delta */
>> +
>> +   unsigned int tree_depth; /* should be repositioned for packing? */
>> +   unsigned char layer;
>> +
>
> This looks very much like an optional feature. To avoid increasing
> pack-objects memory usage for common case, please move this to struct
> packing_data.

Ok, I will take a look at that.

Thanks for the review,
Christian.


[RFC PATCH 0/5] Add delta islands support

2018-07-21 Thread Christian Couder
This patch series is upstreaming work made by GitHub and available in:

https://github.com/peff/git/commits/jk/delta-islands

The patch in the above branch has been split into 5 patches with their
own new commit message, but no other change has been made.

I kept Peff as the author and took the liberty to add his
Signed-off-by to all the patches.

The patches look good to me except perhaps that "pack.islandCore" is
not documented. Maybe something could be added in
Documentation/technical/ too, or maybe improving commit messages could
be enough.

Anyway I wanted to send this nearly "as is" first to get early
feedback about what should be done.

This patch series is also available on GitHub in:

https://github.com/chriscool/git/commits/delta-islands

Jeff King (5):
  packfile: make get_delta_base() non static
  Add delta-islands.{c,h}
  pack-objects: add delta-islands support
  repack: add delta-islands support
  t: add t9930-delta-islands.sh

 Documentation/config.txt   |   8 +
 Documentation/git-pack-objects.txt |  88 ++
 Documentation/git-repack.txt   |   5 +
 Makefile   |   1 +
 builtin/pack-objects.c | 130 +---
 builtin/repack.c   |   9 +
 delta-islands.c| 490 +
 delta-islands.h|  11 +
 pack-objects.h |   4 +
 packfile.c |  10 +-
 packfile.h |   3 +
 t/t9930-delta-islands.sh   | 143 +
 12 files changed, 856 insertions(+), 46 deletions(-)
 create mode 100644 delta-islands.c
 create mode 100644 delta-islands.h
 create mode 100755 t/t9930-delta-islands.sh

-- 
2.18.0.237.gffdb1dbdaa



[RFC PATCH 3/5] pack-objects: add delta-islands support

2018-07-21 Thread Christian Couder
From: Jeff King 

Implement support for delta islands in git pack-objects
and document how delta islands work in
"Documentation/git-pack-objects.txt".

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Documentation/git-pack-objects.txt |  88 +++
 builtin/pack-objects.c | 130 -
 2 files changed, 177 insertions(+), 41 deletions(-)

diff --git a/Documentation/git-pack-objects.txt 
b/Documentation/git-pack-objects.txt
index d95b472d16..7b7a36056f 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -289,6 +289,94 @@ Unexpected missing object will raise an error.
 --unpack-unreachable::
Keep unreachable objects in loose form. This implies `--revs`.
 
+--delta-islands::
+   Restrict delta matches based on "islands". See DELTA ISLANDS
+   below.
+
+
+DELTA ISLANDS
+-
+
+When possible, `pack-objects` tries to reuse existing on-disk deltas to
+avoid having to search for new ones on the fly. This is an important
+optimization for serving fetches, because it means the server can avoid
+inflating most objects at all and just send the bytes directly from
+disk. This optimization can't work when an object is stored as a delta
+against a base which the receiver does not have (and which we are not
+already sending). In that case the server "breaks" the delta and has to
+find a new one, which has a high CPU cost. Therefore it's important for
+performance that the set of objects in on-disk delta relationships match
+what a client would fetch.
+
+In a normal repository, this tends to work automatically. The objects
+are mostly reachable from the branches and tags, and that's what clients
+fetch. Any deltas we find on the server are likely to be between objects
+the client has or will have.
+
+But in some repository setups, you may have several related but separate
+groups of ref tips, with clients tending to fetch those groups
+independently. For example, imagine that you are hosting several "forks"
+of a repository in a single shared object store, and letting clients
+view them as separate repositories through `GIT_NAMESPACE` or separate
+repos using the alternates mechanism. A naive repack may find that the
+optimal delta for an object is against a base that is only found in
+another fork. But when a client fetches, they will not have the base
+object, and we'll have to find a new delta on the fly.
+
+A similar situation may exist if you have many refs outside of
+`refs/heads/` and `refs/tags/` that point to related objects (e.g.,
+`refs/pull` or `refs/changes` used by some hosting providers). By
+default, clients fetch only heads and tags, and deltas against objects
+found only in those other groups cannot be sent as-is.
+
+Delta islands solve this problem by allowing you to group your refs into
+distinct "islands". Pack-objects computes which objects are reachable
+from which islands, and refuses to make a delta from an object `A`
+against a base which is not present in all of `A`'s islands. This
+results in slightly larger packs (because we miss some delta
+opportunities), but guarantees that a fetch of one island will not have
+to recompute deltas on the fly due to crossing island boundaries.
+
+Islands are configured via the `pack.island` option, which can be
+specified multiple times. Each value is a left-anchored regular
+expressions matching refnames. For example:
+
+---
+[pack]
+island = refs/heads/
+island = refs/tags/
+---
+
+puts heads and tags into an island (whose name is the empty string; see
+below for more on naming). Any refs which do not match those regular
+expressions (e.g., `refs/pull/123`) is not in any island. Any object
+which is reachable only from `refs/pull/` (but not heads or tags) is
+therefore not a candidate to be used as a base for `refs/heads/`.
+
+Refs are grouped into islands based on their "names", and two regexes
+that produce the same name are considered to be in the same island. The
+names are computed from the regexes by concatenating any capture groups
+from the regex (and if there are none, then the name is the empty
+string, as in the above example). This allows you to create arbitrary
+numbers of islands. For example, imagine you store the refs for each
+fork in `refs/virtual/ID`, where `ID` is a numeric identifier. You might
+then configure:
+
+---
+[pack]
+island = refs/virtual/([0-9]+)/heads/
+island = refs/virtual/([0-9]+)/tags/
+island = refs/virtual/([0-9]+)/(pull)/
+---
+
+That puts the heads and tags for each fork in their own island (named
+"1234" or similar), and the pull refs for each go into their own
+"1234-pull".
+
+Note that we pick a single island for each regex to 

[RFC PATCH 4/5] repack: add delta-islands support

2018-07-21 Thread Christian Couder
From: Jeff King 

Implement simple support for --delta-islands option and
repack.useDeltaIslands config variable in git repack.

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Documentation/config.txt | 4 
 Documentation/git-repack.txt | 5 +
 builtin/repack.c | 9 +
 3 files changed, 18 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index f682e92a1a..1da39da2a6 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3091,6 +3091,10 @@ repack.packKeptObjects::
index is being written (either via `--write-bitmap-index` or
`repack.writeBitmaps`).
 
+repack.useDeltaIslands::
+   If set to true, makes `git repack` act as if `--delta-islands`
+   was passed. Defaults to `false`.
+
 repack.writeBitmaps::
When true, git will write a bitmap index when packing all
objects to disk (e.g., when `git repack -a` is run).  This
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index d90e7907f4..a8b2d4722f 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -155,6 +155,11 @@ depth is 4095.
being removed. In addition, any unreachable loose objects will
be packed (and their loose counterparts removed).
 
+-i::
+--delta-islands::
+   Pass the `--delta-islands` option to `git-pack-objects`, see
+   linkgit:git-pack-objects[1].
+
 Configuration
 -
 
diff --git a/builtin/repack.c b/builtin/repack.c
index 6c636e159e..5ab9ee69e4 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -12,6 +12,7 @@
 static int delta_base_offset = 1;
 static int pack_kept_objects = -1;
 static int write_bitmaps;
+static int use_delta_islands;
 static char *packdir, *packtmp;
 
 static const char *const git_repack_usage[] = {
@@ -40,6 +41,10 @@ static int repack_config(const char *var, const char *value, 
void *cb)
write_bitmaps = git_config_bool(var, value);
return 0;
}
+   if (!strcmp(var, "repack.usedeltaislands")) {
+   use_delta_islands = git_config_bool(var, value);
+   return 0;
+   }
return git_default_config(var, value, cb);
 }
 
@@ -194,6 +199,8 @@ int cmd_repack(int argc, const char **argv, const char 
*prefix)
N_("pass --local to git-pack-objects")),
OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
N_("write bitmap index")),
+   OPT_BOOL('i', "delta-islands", &use_delta_islands,
+   N_("pass --delta-islands to git-pack-objects")),
OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, 
N_("approxidate"),
N_("with -A, do not loosen objects older than 
this")),
OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
@@ -267,6 +274,8 @@ int cmd_repack(int argc, const char **argv, const char 
*prefix)
argv_array_pushf(&cmd.args, "--no-reuse-object");
if (write_bitmaps)
argv_array_push(&cmd.args, "--write-bitmap-index");
+   if (use_delta_islands)
+   argv_array_push(&cmd.args, "--delta-islands");
 
if (pack_everything & ALL_INTO_ONE) {
get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
-- 
2.18.0.237.gffdb1dbdaa



[RFC PATCH 2/5] Add delta-islands.{c,h}

2018-07-21 Thread Christian Couder
From: Jeff King 

Hosting providers that allow users to "fork" existing
repos want as much as possible those forks to use a
small amount of disk space.

Using alternates to keep all the objects from all
the forks into a unique central repo is way to do
that, but it can have some drawbacks. Especially when
packing the central repo, deltas will be created
between objects from different forks.

This can make cloning or fetching a fork much slower
and much more CPU intensive as Git might have to
compute new deltas for many objects to avoid sending
objects from a different fork.

Delta islands is a way to store objects from
different forks in the same repo and packfile without
having deltas between objects from different forks.

This patch implements the delta islands mechanism in
"delta-islands.{c,h}", but does not yet make use of it.

A few new fields are added in 'struct object_entry'
in "pack-objects.h" though.

The new "pack.island" config variable which can be
used to configure the different islands is described
a bit in "Documentation/config.txt", but the real
documentation will follow in a patch that actually
uses delta islands in "builtin/pack-objects.c".

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Documentation/config.txt |   4 +
 Makefile |   1 +
 delta-islands.c  | 490 +++
 delta-islands.h  |  11 +
 pack-objects.h   |   4 +
 5 files changed, 510 insertions(+)
 create mode 100644 delta-islands.c
 create mode 100644 delta-islands.h

diff --git a/Documentation/config.txt b/Documentation/config.txt
index a32172a43c..f682e92a1a 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2542,6 +2542,10 @@ Note that changing the compression level will not 
automatically recompress
 all existing objects. You can force recompression by passing the -F option
 to linkgit:git-repack[1].
 
+pack.island::
+   A regular expression configuring a set of delta islands. See
+   "DELTA ISLANDS" in linkgit:git-pack-objects[1] for details.
+
 pack.deltaCacheSize::
The maximum memory in bytes used for caching deltas in
linkgit:git-pack-objects[1] before writing them out to a pack.
diff --git a/Makefile b/Makefile
index 08e5c54549..2804298977 100644
--- a/Makefile
+++ b/Makefile
@@ -840,6 +840,7 @@ LIB_OBJS += csum-file.o
 LIB_OBJS += ctype.o
 LIB_OBJS += date.o
 LIB_OBJS += decorate.o
+LIB_OBJS += delta-islands.o
 LIB_OBJS += diffcore-break.o
 LIB_OBJS += diffcore-delta.o
 LIB_OBJS += diffcore-order.o
diff --git a/delta-islands.c b/delta-islands.c
new file mode 100644
index 00..645fe966c5
--- /dev/null
+++ b/delta-islands.c
@@ -0,0 +1,490 @@
+#include "builtin.h"
+#include "cache.h"
+#include "attr.h"
+#include "object.h"
+#include "blob.h"
+#include "commit.h"
+#include "tag.h"
+#include "tree.h"
+#include "delta.h"
+#include "pack.h"
+#include "tree-walk.h"
+#include "diff.h"
+#include "revision.h"
+#include "list-objects.h"
+#include "progress.h"
+#include "refs.h"
+#include "khash.h"
+#include "pack-bitmap.h"
+#include "pack-objects.h"
+#include "delta-islands.h"
+#include "sha1-array.h"
+#include "config.h"
+
+KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal)
+
+static khash_sha1 *island_marks;
+static unsigned island_counter;
+static unsigned island_counter_core;
+
+static kh_str_t *remote_islands;
+
+struct remote_island {
+   uint64_t hash;
+   struct oid_array oids;
+};
+
+struct island_bitmap {
+   uint32_t refcount;
+   uint32_t bits[];
+};
+
+static uint32_t island_bitmap_size;
+
+/*
+ * Allocate a new bitmap; if "old" is not NULL, the new bitmap will be a copy
+ * of "old". Otherwise, the new bitmap is empty.
+ */
+static struct island_bitmap *island_bitmap_new(const struct island_bitmap *old)
+{
+   size_t size = sizeof(struct island_bitmap) + (island_bitmap_size * 4);
+   struct island_bitmap *b = xcalloc(1, size);
+
+   if (old)
+   memcpy(b, old, size);
+
+   b->refcount = 1;
+   return b;
+}
+
+static void island_bitmap_or(struct island_bitmap *a, const struct 
island_bitmap *b)
+{
+   uint32_t i;
+
+   for (i = 0; i < island_bitmap_size; ++i)
+   a->bits[i] |= b->bits[i];
+}
+
+static int island_bitmap_is_subset(struct island_bitmap *self,
+   struct island_bitmap *super)
+{
+   uint32_t i;
+
+   if (self == super)
+   return 1;
+
+   for (i = 0; i < island_bitmap_size; ++i) {
+   if ((self->bits[i] & super->bits[i]) != self->bits[i])
+   ret

[RFC PATCH 5/5] t: add t9930-delta-islands.sh

2018-07-21 Thread Christian Couder
From: Jeff King 

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 t/t9930-delta-islands.sh | 143 +++
 1 file changed, 143 insertions(+)
 create mode 100755 t/t9930-delta-islands.sh

diff --git a/t/t9930-delta-islands.sh b/t/t9930-delta-islands.sh
new file mode 100755
index 00..fea92a5777
--- /dev/null
+++ b/t/t9930-delta-islands.sh
@@ -0,0 +1,143 @@
+#!/bin/sh
+
+test_description='exercise delta islands'
+. ./test-lib.sh
+
+# returns true iff $1 is a delta based on $2
+is_delta_base () {
+   delta_base=$(echo "$1" | git cat-file --batch-check='%(deltabase)') &&
+   echo >&2 "$1 has base $delta_base" &&
+   test "$delta_base" = "$2"
+}
+
+# generate a commit on branch $1 with a single file, "file", whose
+# content is mostly based on the seed $2, but with a unique bit
+# of content $3 appended. This should allow us to see whether
+# blobs of different refs delta against each other.
+commit() {
+   blob=$({ test-tool genrandom "$2" 10240 && echo "$3"; } |
+  git hash-object -w --stdin) &&
+   tree=$(printf '100644 blob %s\tfile\n' "$blob" | git mktree) &&
+   commit=$(echo "$2-$3" | git commit-tree "$tree" ${4:+-p "$4"}) &&
+   git update-ref "refs/heads/$1" "$commit" &&
+   eval "$1"'=$(git rev-parse $1:file)' &&
+   eval "echo >&2 $1=\$$1"
+}
+
+test_expect_success 'setup commits' '
+   commit one seed 1 &&
+   commit two seed 12
+'
+
+# Note: This is heavily dependent on the "prefer larger objects as base"
+# heuristic.
+test_expect_success 'vanilla repack deltas one against two' '
+   git repack -adf &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island repack with no island definition is vanilla' '
+   git repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island repack with no matches is vanilla' '
+   git -c "pack.island=refs/foo" repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'separate islands disallows delta' '
+   git -c "pack.island=refs/heads/(.*)" repack -adfi &&
+   ! is_delta_base $one $two &&
+   ! is_delta_base $two $one
+'
+
+test_expect_success 'same island allows delta' '
+   git -c "pack.island=refs/heads" repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'coalesce same-named islands' '
+   git \
+   -c "pack.island=refs/(.*)/one" \
+   -c "pack.island=refs/(.*)/two" \
+   repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island restrictions drop reused deltas' '
+   git repack -adfi &&
+   is_delta_base $one $two &&
+   git -c "pack.island=refs/heads/(.*)" repack -adi &&
+   ! is_delta_base $one $two &&
+   ! is_delta_base $two $one
+'
+
+test_expect_success 'island regexes are left-anchored' '
+   git -c "pack.island=heads/(.*)" repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'island regexes follow last-one-wins scheme' '
+   git \
+   -c "pack.island=refs/heads/(.*)" \
+   -c "pack.island=refs/heads/" \
+   repack -adfi &&
+   is_delta_base $one $two
+'
+
+test_expect_success 'setup shared history' '
+   commit root shared root &&
+   commit one shared 1 root &&
+   commit two shared 12-long root
+'
+
+# We know that $two will be preferred as a base from $one,
+# because we can transform it with a pure deletion.
+#
+# We also expect $root as a delta against $two by the "longest is base" rule.
+test_expect_success 'vanilla delta goes between branches' '
+   git repack -adf &&
+   is_delta_base $one $two &&
+   is_delta_base $root $two
+'
+
+# Here we should allow $one to base itself on $root; even though
+# they are in different islands, the objects in $root are in a superset
+# of islands compared to those in $one.
+#
+# Similarly, $two can delta against $root by our rules. And unlike $one,
+# in which we are just allowing it, the island rules actually put $root
+# as a possible base for $two, which it would not otherwise be (due to the size
+# sorting).
+test_expect_success 'deltas allowed against superset islands'

[RFC PATCH 1/5] packfile: make get_delta_base() non static

2018-07-21 Thread Christian Couder
From: Jeff King 

As get_delta_base() will be used outside 'packfile.c' in
a following commit, let's make it non static and let's
declare it in 'packfile.h'.

Signed-off-by: Jeff King 
Signed-off-by: Christian Couder 
---
 packfile.c | 10 +-
 packfile.h |  3 +++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/packfile.c b/packfile.c
index 7cd45aa4b2..4646bff5ff 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1037,11 +1037,11 @@ const struct packed_git *has_packed_and_bad(const 
unsigned char *sha1)
return NULL;
 }
 
-static off_t get_delta_base(struct packed_git *p,
-   struct pack_window **w_curs,
-   off_t *curpos,
-   enum object_type type,
-   off_t delta_obj_offset)
+off_t get_delta_base(struct packed_git *p,
+struct pack_window **w_curs,
+off_t *curpos,
+enum object_type type,
+off_t delta_obj_offset)
 {
unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
off_t base_offset;
diff --git a/packfile.h b/packfile.h
index cc7eaffe1b..30f0811382 100644
--- a/packfile.h
+++ b/packfile.h
@@ -125,6 +125,9 @@ extern void *unpack_entry(struct repository *r, struct 
packed_git *, off_t, enum
 extern unsigned long unpack_object_header_buffer(const unsigned char *buf, 
unsigned long len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct 
pack_window **, off_t);
 extern int unpack_object_header(struct packed_git *, struct pack_window **, 
off_t *, unsigned long *);
+extern off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
+   off_t *curpos, enum object_type type,
+   off_t delta_obj_offset);
 
 extern void release_pack_memory(size_t);
 
-- 
2.18.0.237.gffdb1dbdaa



Re: [PATCH] Documentation/git-interpret-trailers: explain possible values

2018-07-20 Thread Christian Couder
On Sat, Jul 21, 2018 at 12:23 AM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> Signed-off-by: Stefan Beller 
>> ---
>>
>> Maybe we rather want to refer to the options that are described further
>> down in the document?
>
> I have no strong preference either way.
>
> The patch looks reasonable to me; Christian?

Yeah, it looks reasonable to me too.

I wouldn't mind also referring to the options described elsewhere, but
it could be in another patch.


[ANNOUNCE] Git Rev News edition 41

2018-07-18 Thread Christian Couder
Hi everyone,

The 41th edition of Git Rev News is now published:

  https://git.github.io/rev_news/2018/07/18/edition-41/

Thanks a lot to the contributors: Luca Milanesio and Derrick Stolee!

Enjoy,
Christian, Jakub, Markus and Gabriel.


Draft of Git Rev News edition 41

2018-07-16 Thread Christian Couder
Hi,

A draft of a new Git Rev News edition is available here:

  https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-41.md

Everyone is welcome to contribute in any section either by editing the
above page on GitHub and sending a pull request, or by commenting on
this GitHub issue:

  https://github.com/git/git.github.io/issues/299

You can also reply to this email.

In general all kinds of contribution, for example proofreading,
suggestions for articles or links, help on the issues in GitHub, and
so on, are very much appreciated.

This month a summary of one of the Git standups on IRC (see
https://public-inbox.org/git/20180713170018.ga139...@aiede.svl.corp.google.com)
would be very much appreciated.

I tried to cc everyone who appears in this edition, but maybe I missed
some people, sorry about that.

Jakub, Markus, Gabriel and me plan to publish this edition on
Wednesday July 18th.

Thanks,
Christian.


[PATCH v3 4/9] remote-odb: implement remote_odb_get_many_direct()

2018-07-13 Thread Christian Couder
From: Christian Couder 

This function will be used to get many objects from a promisor
remote.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 odb-helper.c | 15 +++
 odb-helper.h |  3 +++
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 36 insertions(+)

diff --git a/odb-helper.c b/odb-helper.c
index 99b5a345e8..246ebf8f7a 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -28,3 +28,18 @@ int odb_helper_get_direct(struct odb_helper *o,
 
return res;
 }
+
+int odb_helper_get_many_direct(struct odb_helper *o,
+  const struct oid_array *to_get)
+{
+   int res;
+   uint64_t start;
+
+   start = getnanotime();
+
+   res = fetch_objects(o->remote, to_get);
+
+   trace_performance_since(start, "odb_helper_get_many_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4c52e81ce8..154ce4c7e4 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -17,4 +17,7 @@ struct odb_helper {
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
 extern int odb_helper_get_direct(struct odb_helper *o,
 const unsigned char *sha1);
+extern int odb_helper_get_many_direct(struct odb_helper *o,
+ const struct oid_array *to_get);
+
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index 7f815c7ebb..09dfc2e16f 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -106,3 +106,20 @@ int remote_odb_get_direct(const unsigned char *sha1)
 
return -1;
 }
+
+int remote_odb_get_many_direct(const struct oid_array *to_get)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_many_direct: nr: %d", to_get->nr);
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_many_direct(o, to_get) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index c5384c922d..e10a8bf855 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -4,5 +4,6 @@
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
+extern int remote_odb_get_many_direct(const struct oid_array *to_get);
 
 #endif /* REMOTE_ODB_H */
-- 
2.18.0.171.ge260395bfe



[PATCH v3 9/9] Documentation/config: add odb..promisorRemote

2018-07-13 Thread Christian Couder
From: Christian Couder 

Signed-off-by: Junio C Hamano 
---
 Documentation/config.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 1cc18a828c..066858886b 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2505,6 +2505,11 @@ This setting can be overridden with the 
`GIT_NOTES_REWRITE_REF`
 environment variable, which must be a colon separated list of refs or
 globs.
 
+odb..promisorRemote::
+   The name of a promisor remote. For now promisor remotes are
+   the only kind of remote object database (odb) that is
+   supported.
+
 pack.window::
The size of the window used by linkgit:git-pack-objects[1] when no
window size is given on the command line. Defaults to 10.
-- 
2.18.0.171.ge260395bfe



[PATCH v3 7/9] Use odb.origin.partialclonefilter instead of core.partialclonefilter

2018-07-13 Thread Christian Couder
From: Christian Couder 

Let's make the partial clone filter specific to one odb
instead of general to all the odbs.

This makes it possible to have different partial clone
filters for different odbs.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 builtin/fetch.c   |  2 +-
 list-objects-filter-options.c | 28 
 list-objects-filter-options.h |  3 ++-
 odb-helper.h  |  1 +
 remote-odb.c  |  2 ++
 t/t5616-partial-clone.sh  |  2 +-
 6 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 1fd4dfd024..52ec2fd200 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1343,7 +1343,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * the config.
 */
if (!filter_options.choice)
-   partial_clone_get_default_filter_spec(&filter_options);
+   partial_clone_get_default_filter_spec(&filter_options, 
remote->name);
return;
 }
 
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 60452c8f36..755a793664 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -7,6 +7,7 @@
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
 #include "remote-odb.h"
+#include "odb-helper.h"
 
 /*
  * Parse value of the argument to the "filter" keyword.
@@ -29,6 +30,9 @@ static int gently_parse_list_objects_filter(
 {
const char *v0;
 
+   if (!arg)
+   return 0;
+
if (filter_options->choice) {
if (errbuf) {
strbuf_init(errbuf, 0);
@@ -116,6 +120,7 @@ void partial_clone_register(
const struct list_objects_filter_options *filter_options)
 {
char *cfg_name;
+   char *filter_name;
 
/* Check if it is already registered */
if (find_odb_helper(remote))
@@ -131,27 +136,26 @@ void partial_clone_register(
/*
 * Record the initial filter-spec in the config as
 * the default for subsequent fetches from this remote.
-*
-* TODO: move core.partialclonefilter into odb.
 */
-   core_partial_clone_filter_default =
-   xstrdup(filter_options->filter_spec);
-   git_config_set("core.partialclonefilter",
-  core_partial_clone_filter_default);
+   filter_name = xstrfmt("odb.%s.partialclonefilter", remote);
+   git_config_set(filter_name, filter_options->filter_spec);
+   free(filter_name);
 
/* Make sure the config info are reset */
remote_odb_reinit();
 }
 
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options)
+   struct list_objects_filter_options *filter_options,
+   const char *remote)
 {
+   struct odb_helper *helper = find_odb_helper(remote);
+
/*
 * Parse default value, but silently ignore it if it is invalid.
 */
-   if (!core_partial_clone_filter_default)
-   return;
-   gently_parse_list_objects_filter(filter_options,
-core_partial_clone_filter_default,
-NULL);
+   if (helper)
+   gently_parse_list_objects_filter(filter_options,
+helper->partial_clone_filter,
+NULL);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index a61f82..12ceef3230 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -74,6 +74,7 @@ void partial_clone_register(
const char *remote,
const struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options);
+   struct list_objects_filter_options *filter_options,
+   const char *remote);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/odb-helper.h b/odb-helper.h
index 154ce4c7e4..4af75ef55c 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -10,6 +10,7 @@
 struct odb_helper {
const char *name; /* odb..* */
const char *remote;   /* odb..promisorRemote */
+   const char *partial_clone_filter; /* odb..partialCloneFilter */
 
struct odb_helper *next;
 };
diff --git a/remote-odb.c b/remote-odb.c
index 53a72b4ba5..847a865057 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -60,6 +60,8 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
 
return 0;
}
+   if (!strcmp(subkey, "partialclonefilter"))
+   return git_config_string(&o->partial_clone_filter, var, value);
 
return 0;
 }
diff --git a/t/t5616-par

[PATCH v3 6/9] Use remote_odb_get_direct() and has_remote_odb()

2018-07-13 Thread Christian Couder
From: Christian Couder 

Instead of using the repository_format_partial_clone global
and fetch_object() directly, let's use has_remote_odb() and
remote_odb_get_direct().

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 builtin/cat-file.c|  5 +++--
 builtin/fetch.c   | 11 ++-
 builtin/gc.c  |  3 ++-
 builtin/repack.c  |  3 ++-
 cache.h   |  2 --
 connected.c   |  3 ++-
 environment.c |  1 -
 list-objects-filter-options.c | 27 +++
 packfile.c|  3 ++-
 setup.c   |  7 +--
 sha1-file.c   | 14 --
 t/t0410-partial-clone.sh  | 34 +-
 t/t5500-fetch-pack.sh |  4 ++--
 t/t5601-clone.sh  |  2 +-
 t/t5616-partial-clone.sh  |  2 +-
 t/t5702-protocol-v2.sh|  2 +-
 unpack-trees.c|  6 +++---
 17 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 665b581949..3b24c1b9b7 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -13,6 +13,7 @@
 #include "tree-walk.h"
 #include "sha1-array.h"
 #include "packfile.h"
+#include "remote-odb.h"
 
 struct batch_options {
int enabled;
@@ -477,8 +478,8 @@ static int batch_objects(struct batch_options *opt)
 
for_each_loose_object(batch_loose_object, &sa, 0);
for_each_packed_object(batch_packed_object, &sa, 0);
-   if (repository_format_partial_clone)
-   warning("This repository has extensions.partialClone 
set. Some objects may not be loaded.");
+   if (has_remote_odb())
+   warning("This repository uses an odb. Some objects may 
not be loaded.");
 
cb.opt = opt;
cb.expand = &data;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669ad..1fd4dfd024 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -21,6 +21,7 @@
 #include "utf8.h"
 #include "packfile.h"
 #include "list-objects-filter-options.h"
+#include "remote-odb.h"
 
 static const char * const builtin_fetch_usage[] = {
N_("git fetch [] [ [...]]"),
@@ -1313,7 +1314,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * If no prior partial clone/fetch and the current fetch DID NOT
 * request a partial-fetch, do a normal fetch.
 */
-   if (!repository_format_partial_clone && !filter_options.choice)
+   if (!has_remote_odb() && !filter_options.choice)
return;
 
/*
@@ -1321,7 +1322,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * on this repo and remember the given filter-spec as the default
 * for subsequent fetches to this remote.
 */
-   if (!repository_format_partial_clone && filter_options.choice) {
+   if (!has_remote_odb() && filter_options.choice) {
partial_clone_register(remote->name, &filter_options);
return;
}
@@ -1330,7 +1331,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * We are currently limited to only ONE promisor remote and only
 * allow partial-fetches from the promisor remote.
 */
-   if (strcmp(remote->name, repository_format_partial_clone)) {
+   if (!find_odb_helper(remote->name)) {
if (filter_options.choice)
die(_("--filter can only be used with the remote 
configured in core.partialClone"));
return;
@@ -1461,7 +1462,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
if (depth || deepen_since || deepen_not.nr)
deepen = 1;
 
-   if (filter_options.choice && !repository_format_partial_clone)
+   if (filter_options.choice && !has_remote_odb())
die("--filter can only be used when extensions.partialClone is 
set");
 
if (all) {
@@ -1495,7 +1496,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
}
 
if (remote) {
-   if (filter_options.choice || repository_format_partial_clone)
+   if (filter_options.choice || has_remote_odb())
fetch_one_setup_partial(remote);
result = fetch_one(remote, argc, argv, prune_tags_ok);
} else {
diff --git a/builtin/gc.c b/builtin/gc.c
index ccfb1ceaeb..02c783b514 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -26,6 +26,7 @@
 #include "pack-objects.h"
 #include "blob.h"
 #include "tree.h"
+#include "remote-odb.h"
 
 #define FAILED_RUN "failed to run %

[PATCH v3 8/9] t0410: test fetching from many promisor remotes

2018-07-13 Thread Christian Couder
From: Christian Couder 

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 t/t0410-partial-clone.sh | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 71a9b9a3e8..9d513ebf57 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -170,8 +170,30 @@ test_expect_success 'fetching of missing objects' '
git verify-pack --verbose "$IDX" | grep "$HASH"
 '
 
+test_expect_success 'fetching of missing objects from another odb remote' '
+   git clone "file://$(pwd)/server" server2 &&
+   test_commit -C server2 bar &&
+   git -C server2 repack -a -d --write-bitmap-index &&
+   HASH2=$(git -C server2 rev-parse bar) &&
+
+   git -C repo remote add server2 "file://$(pwd)/server2" &&
+   git -C repo config odb.magic2.promisorRemote server2 &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   git -C repo fetch server2 &&
+   rm -rf repo/.git/objects/* &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   # Ensure that the .promisor file is written, and check that its
+   # associated packfile contains the object
+   ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
+   test_line_count = 1 promisorlist &&
+   IDX=$(cat promisorlist | sed "s/promisor$/idx/") &&
+   git verify-pack --verbose "$IDX" | grep "$HASH2"
+'
+
 test_expect_success 'rev-list stops traversal at missing and promised commit' '
-   rm -rf repo &&
+   rm -rf repo server server2 &&
test_create_repo repo &&
test_commit -C repo foo &&
test_commit -C repo bar &&
-- 
2.18.0.171.ge260395bfe



[PATCH v3 5/9] remote-odb: add remote_odb_reinit()

2018-07-13 Thread Christian Couder
From: Christian Couder 

We will need to reinitialize the remote odb configuration
as we will make some changes to it in a later commit when
we will detect that a remote is also a remote odb.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 remote-odb.c | 14 --
 remote-odb.h |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/remote-odb.c b/remote-odb.c
index 09dfc2e16f..53a72b4ba5 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -64,17 +64,27 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
return 0;
 }
 
-static void remote_odb_init(void)
+static void remote_odb_do_init(int force)
 {
static int initialized;
 
-   if (initialized)
+   if (!force && initialized)
return;
initialized = 1;
 
git_config(remote_odb_config, NULL);
 }
 
+static inline void remote_odb_init(void)
+{
+   remote_odb_do_init(0);
+}
+
+inline void remote_odb_reinit(void)
+{
+   remote_odb_do_init(1);
+}
+
 struct odb_helper *find_odb_helper(const char *remote)
 {
remote_odb_init();
diff --git a/remote-odb.h b/remote-odb.h
index e10a8bf855..79803782af 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -1,6 +1,7 @@
 #ifndef REMOTE_ODB_H
 #define REMOTE_ODB_H
 
+extern void remote_odb_reinit(void);
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
-- 
2.18.0.171.ge260395bfe



[PATCH v3 2/9] Add initial remote odb support

2018-07-13 Thread Christian Couder
From: Christian Couder 

The remote-odb.{c,h} files will contain the functions
that are called by the rest of Git mostly from
"sha1-file.c" to access the objects managed by the
remote odbs.

The odb-helper.{c,h} files will contain the functions to
actually implement communication with either the internal
functions or the external scripts or processes that will
manage and provide remote git objects.

For now only infrastructure to create helpers from the
config and to check if there are remote odbs and odb
helpers is implemented.

We expect that there will not be a lot of helpers, so it
is ok to use a simple linked list to manage them.

Helped-by: Jeff King 
Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 Makefile |  2 ++
 odb-helper.c | 16 +
 odb-helper.h | 19 +++
 remote-odb.c | 91 
 remote-odb.h |  7 
 5 files changed, 135 insertions(+)
 create mode 100644 odb-helper.c
 create mode 100644 odb-helper.h
 create mode 100644 remote-odb.c
 create mode 100644 remote-odb.h

diff --git a/Makefile b/Makefile
index 0cb6590f24..5af048ddd5 100644
--- a/Makefile
+++ b/Makefile
@@ -896,6 +896,8 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += odb-helper.o
+LIB_OBJS += remote-odb.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
 LIB_OBJS += packfile.o
diff --git a/odb-helper.c b/odb-helper.c
new file mode 100644
index 00..b4d403ffa9
--- /dev/null
+++ b/odb-helper.c
@@ -0,0 +1,16 @@
+#include "cache.h"
+#include "object.h"
+#include "argv-array.h"
+#include "odb-helper.h"
+#include "run-command.h"
+#include "sha1-lookup.h"
+
+struct odb_helper *odb_helper_new(const char *name, int namelen)
+{
+   struct odb_helper *o;
+
+   o = xcalloc(1, sizeof(*o));
+   o->name = xmemdupz(name, namelen);
+
+   return o;
+}
diff --git a/odb-helper.h b/odb-helper.h
new file mode 100644
index 00..4b792a3896
--- /dev/null
+++ b/odb-helper.h
@@ -0,0 +1,19 @@
+#ifndef ODB_HELPER_H
+#define ODB_HELPER_H
+
+/*
+ * An odb helper is a way to access a remote odb.
+ *
+ * Information in its fields comes from the config file [odb "NAME"]
+ * entries.
+ */
+struct odb_helper {
+   const char *name; /* odb..* */
+   const char *remote;   /* odb..promisorRemote */
+
+   struct odb_helper *next;
+};
+
+extern struct odb_helper *odb_helper_new(const char *name, int namelen);
+
+#endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
new file mode 100644
index 00..03be54ba2e
--- /dev/null
+++ b/remote-odb.c
@@ -0,0 +1,91 @@
+#include "cache.h"
+#include "remote-odb.h"
+#include "odb-helper.h"
+#include "config.h"
+
+static struct odb_helper *helpers;
+static struct odb_helper **helpers_tail = &helpers;
+
+static struct odb_helper *find_or_create_helper(const char *name, int len)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (!strncmp(o->name, name, len) && !o->name[len])
+   return o;
+
+   o = odb_helper_new(name, len);
+   *helpers_tail = o;
+   helpers_tail = &o->next;
+
+   return o;
+}
+
+static struct odb_helper *do_find_odb_helper(const char *remote)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (o->remote && !strcmp(o->remote, remote))
+   return o;
+
+   return NULL;
+}
+
+static int remote_odb_config(const char *var, const char *value, void *data)
+{
+   struct odb_helper *o;
+   const char *name;
+   int namelen;
+   const char *subkey;
+
+   if (parse_config_key(var, "odb", &name, &namelen, &subkey) < 0)
+   return 0;
+
+   o = find_or_create_helper(name, namelen);
+
+   if (!strcmp(subkey, "promisorremote")) {
+   const char *remote;
+   int res = git_config_string(&remote, var, value);
+
+   if (res)
+   return res;
+
+   if (do_find_odb_helper(remote))
+   return error(_("when parsing config key '%s' "
+  "helper for remote '%s' already exists"),
+var, remote);
+
+   o->remote = remote;
+
+   return 0;
+   }
+
+   return 0;
+}
+
+static void remote_odb_init(void)
+{
+   static int initialized;
+
+   if (initialized)
+   return;
+   initialized = 1;
+
+   git_config(remote_odb_config, NULL);
+}
+
+struct odb_helper *find_odb_helper(const char *remote)
+{
+   remote_odb_init();
+
+   if (!remote)
+   return help

[PATCH v3 0/9] Introducing remote ODBs

2018-07-13 Thread Christian Couder
This path series is a follow up from the patch series called "odb
remote" that I sent earlier this year, which was itself a follow up
from previous series. See the links section for more information.

As with the previous "odb remote" series, this series is only about
integrating with the promisor/narrow clone work and showing that it
makes it possible to use more than one promisor remote. Everything
that is not necessary for that integration has been removed for now
(though you can still find it in one of my branches on GitHub if you
want).

There is one test in patch 8/9 that shows that more than one promisor
remote can now be used, but I still feel that it could be interesting
to add other such tests, so I am open to ideas in this area.

Changes compared to V2 of this patch series
~~~

  - fix unused variable in patch 2/9: the "SQUASH???" patch that Junio
had created (thanks!) has been squashed

High level overview of this patch series


All the patches except 2/9 are unchanged compared to V2.

  - Patch 1/9:

This makes functions in fetch-object.c return an error code, which is
necessary to later tell that they failed and try another remote odb
when there is more than one. This could also just be seen as a fix to
these functions.

  - Patch 2/9:

This introduces the minimum infrastructure for remote odbs.

  - Patches 3/9 and 4/9:

These patches implement remote_odb_get_direct() and
remote_odb_get_many_direct() using the functions from fetch-object.c.
These new functions will be used in following patches to replace the
functions from fetch-object.c.

  - Patch 5/9:

This implement remote_odb_reinit() which will be needed to reparse the
remote odb configuration.

  - Patches 6/9 and 7/9:

These patches integrate the remote odb mechanism into the
promisor/narrow clone code. The "extensions.partialClone" config
option is replaced by "odb..promisorRemote" and
"core.partialCloneFilter" is replaced by
"odb..partialCloneFilter".

  - Patch 8/9:

This adds a test case that shows that now more than one promisor
remote can be used.

  - Patch 9/9:

This starts documenting the remote odb mechanism.

Discussion
~~

I am not sure that it is ok to completely replace the
"extensions.partialclone" config option. Even if it is fully replaced,
no "extensions.remoteodb" is implemented in these patches, as maybe
the "extensions.partialclone" name could be kept even if the
underlying mechanism is the remote odb mechanism.

Anyway I think that the remote odb mechanism is much more extensible,
so I think using "extensions.partialclone" to specify a promisor
remote should be at least deprecated.

Links
~

This patch series on GitHub:

V3: https://github.com/chriscool/git/commits/remote-odb
V2: https://github.com/chriscool/git/commits/remote-odb2
V1: https://github.com/chriscool/git/commits/remote-odb1

Discussions related to previous versions:

V2: https://public-inbox.org/git/20180630083542.20347-1-chrisc...@tuxfamily.org/
V1: https://public-inbox.org/git/20180623121846.19750-1-chrisc...@tuxfamily.org/

Previous "odb remote" series:

https://public-inbox.org/git/20180513103232.17514-1-chrisc...@tuxfamily.org/
https://github.com/chriscool/git/commits/odb-remote

Version 1 and 2 of the "Promisor remotes and external ODB support" series:

https://public-inbox.org/git/20180103163403.11303-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20180319133147.15413-1-chrisc...@tuxfamily.org/

Version 1 and 2 of the "Promisor remotes and external ODB support" series on 
GitHub:

https://github.com/chriscool/git/commits/gl-small-promisor-external-odb12
https://github.com/chriscool/git/commits/gl-small-promisor-external-odb71

Christian Couder (9):
  fetch-object: make functions return an error code
  Add initial remote odb support
  remote-odb: implement remote_odb_get_direct()
  remote-odb: implement remote_odb_get_many_direct()
  remote-odb: add remote_odb_reinit()
  Use remote_odb_get_direct() and has_remote_odb()
  Use odb.origin.partialclonefilter instead of core.partialclonefilter
  t0410: test fetching from many promisor remotes
  Documentation/config: add odb..promisorRemote

 Documentation/config.txt  |   5 ++
 Makefile  |   2 +
 builtin/cat-file.c|   5 +-
 builtin/fetch.c   |  13 ++--
 builtin/gc.c  |   3 +-
 builtin/repack.c  |   3 +-
 cache.h   |   2 -
 connected.c   |   3 +-
 environment.c |   1 -
 fetch-object.c|  15 ++--
 fetch-object.h|   6 +-
 list-objects-filter-options.c |  51 +++--
 list-objects-filter-options.h |   3 +-
 odb-helper.c  |  45 +++
 odb-helper.h

[PATCH v3 3/9] remote-odb: implement remote_odb_get_direct()

2018-07-13 Thread Christian Couder
From: Christian Couder 

This is implemented only in the promisor remote mode
for now by calling fetch_object().

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 odb-helper.c | 14 ++
 odb-helper.h |  3 ++-
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/odb-helper.c b/odb-helper.c
index b4d403ffa9..99b5a345e8 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -4,6 +4,7 @@
 #include "odb-helper.h"
 #include "run-command.h"
 #include "sha1-lookup.h"
+#include "fetch-object.h"
 
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
@@ -14,3 +15,16 @@ struct odb_helper *odb_helper_new(const char *name, int 
namelen)
 
return o;
 }
+
+int odb_helper_get_direct(struct odb_helper *o,
+ const unsigned char *sha1)
+{
+   int res;
+   uint64_t start = getnanotime();
+
+   res = fetch_object(o->remote, sha1);
+
+   trace_performance_since(start, "odb_helper_get_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4b792a3896..4c52e81ce8 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -15,5 +15,6 @@ struct odb_helper {
 };
 
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
-
+extern int odb_helper_get_direct(struct odb_helper *o,
+const unsigned char *sha1);
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index 03be54ba2e..7f815c7ebb 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -89,3 +89,20 @@ int has_remote_odb(void)
 {
return !!find_odb_helper(NULL);
 }
+
+int remote_odb_get_direct(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_direct: %s", sha1_to_hex(sha1));
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_direct(o, sha1) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index 4c7b13695f..c5384c922d 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -3,5 +3,6 @@
 
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
+extern int remote_odb_get_direct(const unsigned char *sha1);
 
 #endif /* REMOTE_ODB_H */
-- 
2.18.0.171.ge260395bfe



[PATCH v3 1/9] fetch-object: make functions return an error code

2018-07-13 Thread Christian Couder
From: Christian Couder 

The callers of the fetch_object() and fetch_objects() might
be interested in knowing if these functions succeeded or not.

Signed-off-by: Christian Couder 
Signed-off-by: Junio C Hamano 
---
 fetch-object.c | 15 +--
 fetch-object.h |  6 +++---
 sha1-file.c|  4 ++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/fetch-object.c b/fetch-object.c
index 853624f811..ccc4ea7f1a 100644
--- a/fetch-object.c
+++ b/fetch-object.c
@@ -5,11 +5,12 @@
 #include "transport.h"
 #include "fetch-object.h"
 
-static void fetch_refs(const char *remote_name, struct ref *ref)
+static int fetch_refs(const char *remote_name, struct ref *ref)
 {
struct remote *remote;
struct transport *transport;
int original_fetch_if_missing = fetch_if_missing;
+   int res;
 
fetch_if_missing = 0;
remote = remote_get(remote_name);
@@ -19,18 +20,20 @@ static void fetch_refs(const char *remote_name, struct ref 
*ref)
 
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
-   transport_fetch_refs(transport, ref);
+   res = transport_fetch_refs(transport, ref);
fetch_if_missing = original_fetch_if_missing;
+
+   return res;
 }
 
-void fetch_object(const char *remote_name, const unsigned char *sha1)
+int fetch_object(const char *remote_name, const unsigned char *sha1)
 {
struct ref *ref = alloc_ref(sha1_to_hex(sha1));
hashcpy(ref->old_oid.hash, sha1);
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
 
-void fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
+int fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
 {
struct ref *ref = NULL;
int i;
@@ -41,5 +44,5 @@ void fetch_objects(const char *remote_name, const struct 
oid_array *to_fetch)
new_ref->next = ref;
ref = new_ref;
}
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
diff --git a/fetch-object.h b/fetch-object.h
index 4b269d07ed..12e1f9ee70 100644
--- a/fetch-object.h
+++ b/fetch-object.h
@@ -3,9 +3,9 @@
 
 #include "sha1-array.h"
 
-extern void fetch_object(const char *remote_name, const unsigned char *sha1);
+extern int fetch_object(const char *remote_name, const unsigned char *sha1);
 
-extern void fetch_objects(const char *remote_name,
- const struct oid_array *to_fetch);
+extern int fetch_objects(const char *remote_name,
+const struct oid_array *to_fetch);
 
 #endif
diff --git a/sha1-file.c b/sha1-file.c
index de4839e634..c099f5584d 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1312,8 +1312,8 @@ int oid_object_info_extended(struct repository *r, const 
struct object_id *oid,
if (fetch_if_missing && repository_format_partial_clone &&
!already_retried && r == the_repository) {
/*
-* TODO Investigate having fetch_object() return
-* TODO error/success and stopping the music here.
+* TODO Investigate checking fetch_object() return
+* TODO value and stopping on error here.
 * TODO Pass a repository struct through fetch_object,
 * such that arbitrary repositories work.
 */
-- 
2.18.0.171.ge260395bfe



Subscribing Apple people to git-secur...@googlegroups.com

2018-07-09 Thread Christian Couder
Hi Peff (and Jonathan),

When people complained a month ago about the MacOS package on
https://git-scm.com/ not being up-to-date after the Git security
release, I got in touch with Apple people GitLab has been working with
to see if they could help on this.

As the urgent issue was resolved when Tim Harper eventually generated
a new MacOS package, it took some time, but now the Apple persons in
CC are asking for the following:

Please add these addresses to the git-security mailing list:
jerem...@apple.com
akils...@apple.com
dt-...@group.apple.com

Please add these GitHub accounts to the cabal repo:
jeremyhu
productsecurityOSSapple

I think it could significantly improve things, even if Tim Harper
continues to prepare the MacOS package published on
https://git-scm.com/, as Apple people also release their own Git
versions at least along with XCode.

I am also personally very happy with the Apple developers' willingness
to get involved and help.

Thanks in advance,
Christian.


Re: [PATCH v3 4/4] builtin/rebase: support running "git rebase "

2018-07-06 Thread Christian Couder
On Fri, Jul 6, 2018 at 2:08 PM, Pratik Karki  wrote:

> +   switch (opts->type) {
> +   case REBASE_AM:
> +   backend = "git-rebase--am";
> +   backend_func = "git_rebase__am";
> +   break;
> +   case REBASE_INTERACTIVE:
> +   backend = "git-rebase--interactive";
> +   backend_func = "git_rebase__interactive";
> +   break;
> +   case REBASE_MERGE:
> +   backend = "git-rebase--merge";
> +   backend_func = "git_rebase__merge";
> +   break;
> +   case REBASE_PRESERVE_MERGES:
> +   backend = "git-rebase--preserve-merges";
> +   backend_func = "git_rebase__preserve_merges";
> +   break;
> +   default:
> +   BUG("Unhandled rebase type %d", opts->type);
> +   break;

Nit: I think the "break;" line could be removed as the BUG() should always exit.

A quick grep shows that there are other places where there is a
"break;" line after a BUG() though. Maybe one of the #leftoverbits
could be about removing those "break;" lines.

> +   }


Re: [PATCH v2 0/9] Introducing remote ODBs

2018-06-30 Thread Christian Couder
On Sat, Jun 30, 2018 at 10:35 AM, Christian Couder
 wrote:

> Changes compared to V1 of this patch series
> ~~~
>
>   - fix tests failures
>   - error out when more than one "odb..promisorremote" exist
> with the same 

Here is the diff with V1:

diff --git a/remote-odb.c b/remote-odb.c
index 5b9d1930b5..eb580ca513 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -21,6 +21,17 @@ static struct odb_helper
*find_or_create_helper(const char *name, int len)
 return o;
 }

+static struct odb_helper *do_find_odb_helper(const char *remote)
+{
+struct odb_helper *o;
+
+for (o = helpers; o; o = o->next)
+if (o->remote && !strcmp(o->remote, remote))
+return o;
+
+return NULL;
+}
+
 static int remote_odb_config(const char *var, const char *value, void *data)
 {
 struct odb_helper *o;
@@ -33,8 +44,22 @@ static int remote_odb_config(const char *var, const
char *value, void *data)

 o = find_or_create_helper(name, namelen);

-if (!strcmp(subkey, "promisorremote"))
-return git_config_string(&o->remote, var, value);
+if (!strcmp(subkey, "promisorremote")) {
+const char *remote;
+int res = git_config_string(&remote, var, value);
+
+if (res)
+return res;
+
+if (do_find_odb_helper(remote))
+return error(_("when parsing config key '%s' "
+   "helper for remote '%s' already exists"),
+ var, remote);
+
+o->remote = remote;
+
+return 0;
+}
 if (!strcmp(subkey, "partialclonefilter"))
 return git_config_string(&o->partial_clone_filter, var, value);

@@ -71,11 +96,7 @@ struct odb_helper *find_odb_helper(const char *remote)
 if (!remote)
 return helpers;

-for (o = helpers; o; o = o->next)
-if (!strcmp(o->remote, remote))
-return o;
-
-return NULL;
+return do_find_odb_helper(remote);
 }

 int has_remote_odb(void)
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index b29c0d3d39..9d513ebf57 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -23,10 +23,10 @@ promise_and_delete () {
 delete_object repo "$HASH"
 }

-test_expect_success 'extensions.partialclone without filter' '
+test_expect_success 'promisor remote without filter' '
 test_create_repo server &&
 git clone --filter="blob:none" "file://$(pwd)/server" client &&
-git -C client config --unset core.partialclonefilter &&
+git -C client config --unset odb.origin.partialclonefilter &&
 git -C client fetch origin
 '

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index a4fe6508bd..0c47599568 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -285,7 +285,7 @@ test_expect_success 'partial fetch' '
 rm -rf client "$(pwd)/trace" &&
 git init client &&
 SERVER="file://$(pwd)/server" &&
-test_config -C client extensions.partialClone "$SERVER" &&
+test_config -C client odb.magic.promisorRemote "$SERVER" &&

 GIT_TRACE_PACKET="$(pwd)/trace" git -C client -c protocol.version=2 \
 fetch --filter=blob:none "$SERVER" master:refs/heads/other &&


[PATCH v2 8/9] t0410: test fetching from many promisor remotes

2018-06-30 Thread Christian Couder
Signed-off-by: Christian Couder 
---
 t/t0410-partial-clone.sh | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 71a9b9a3e8..9d513ebf57 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -170,8 +170,30 @@ test_expect_success 'fetching of missing objects' '
git verify-pack --verbose "$IDX" | grep "$HASH"
 '
 
+test_expect_success 'fetching of missing objects from another odb remote' '
+   git clone "file://$(pwd)/server" server2 &&
+   test_commit -C server2 bar &&
+   git -C server2 repack -a -d --write-bitmap-index &&
+   HASH2=$(git -C server2 rev-parse bar) &&
+
+   git -C repo remote add server2 "file://$(pwd)/server2" &&
+   git -C repo config odb.magic2.promisorRemote server2 &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   git -C repo fetch server2 &&
+   rm -rf repo/.git/objects/* &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   # Ensure that the .promisor file is written, and check that its
+   # associated packfile contains the object
+   ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
+   test_line_count = 1 promisorlist &&
+   IDX=$(cat promisorlist | sed "s/promisor$/idx/") &&
+   git verify-pack --verbose "$IDX" | grep "$HASH2"
+'
+
 test_expect_success 'rev-list stops traversal at missing and promised commit' '
-   rm -rf repo &&
+   rm -rf repo server server2 &&
test_create_repo repo &&
test_commit -C repo foo &&
test_commit -C repo bar &&
-- 
2.18.0.138.gac082779dc



[PATCH v2 7/9] Use odb.origin.partialclonefilter instead of core.partialclonefilter

2018-06-30 Thread Christian Couder
Let's make the partial clone filter specific to one odb
instead of general to all the odbs.

This makes it possible to have different partial clone
filters for different odbs.

Signed-off-by: Christian Couder 
---
 builtin/fetch.c   |  2 +-
 list-objects-filter-options.c | 28 
 list-objects-filter-options.h |  3 ++-
 odb-helper.h  |  1 +
 remote-odb.c  |  2 ++
 t/t5616-partial-clone.sh  |  2 +-
 6 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 1fd4dfd024..52ec2fd200 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1343,7 +1343,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * the config.
 */
if (!filter_options.choice)
-   partial_clone_get_default_filter_spec(&filter_options);
+   partial_clone_get_default_filter_spec(&filter_options, 
remote->name);
return;
 }
 
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 60452c8f36..755a793664 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -7,6 +7,7 @@
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
 #include "remote-odb.h"
+#include "odb-helper.h"
 
 /*
  * Parse value of the argument to the "filter" keyword.
@@ -29,6 +30,9 @@ static int gently_parse_list_objects_filter(
 {
const char *v0;
 
+   if (!arg)
+   return 0;
+
if (filter_options->choice) {
if (errbuf) {
strbuf_init(errbuf, 0);
@@ -116,6 +120,7 @@ void partial_clone_register(
const struct list_objects_filter_options *filter_options)
 {
char *cfg_name;
+   char *filter_name;
 
/* Check if it is already registered */
if (find_odb_helper(remote))
@@ -131,27 +136,26 @@ void partial_clone_register(
/*
 * Record the initial filter-spec in the config as
 * the default for subsequent fetches from this remote.
-*
-* TODO: move core.partialclonefilter into odb.
 */
-   core_partial_clone_filter_default =
-   xstrdup(filter_options->filter_spec);
-   git_config_set("core.partialclonefilter",
-  core_partial_clone_filter_default);
+   filter_name = xstrfmt("odb.%s.partialclonefilter", remote);
+   git_config_set(filter_name, filter_options->filter_spec);
+   free(filter_name);
 
/* Make sure the config info are reset */
remote_odb_reinit();
 }
 
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options)
+   struct list_objects_filter_options *filter_options,
+   const char *remote)
 {
+   struct odb_helper *helper = find_odb_helper(remote);
+
/*
 * Parse default value, but silently ignore it if it is invalid.
 */
-   if (!core_partial_clone_filter_default)
-   return;
-   gently_parse_list_objects_filter(filter_options,
-core_partial_clone_filter_default,
-NULL);
+   if (helper)
+   gently_parse_list_objects_filter(filter_options,
+helper->partial_clone_filter,
+NULL);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index a61f82..12ceef3230 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -74,6 +74,7 @@ void partial_clone_register(
const char *remote,
const struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options);
+   struct list_objects_filter_options *filter_options,
+   const char *remote);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/odb-helper.h b/odb-helper.h
index 154ce4c7e4..4af75ef55c 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -10,6 +10,7 @@
 struct odb_helper {
const char *name; /* odb..* */
const char *remote;   /* odb..promisorRemote */
+   const char *partial_clone_filter; /* odb..partialCloneFilter */
 
struct odb_helper *next;
 };
diff --git a/remote-odb.c b/remote-odb.c
index d465875a4d..eb580ca513 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -60,6 +60,8 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
 
return 0;
}
+   if (!strcmp(subkey, "partialclonefilter"))
+   return git_config_string(&o->partial_clone_filter, var, value);
 
return 0;
 }
diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 5ddd1e011c..42

[PATCH v2 6/9] Use remote_odb_get_direct() and has_remote_odb()

2018-06-30 Thread Christian Couder
Instead of using the repository_format_partial_clone global
and fetch_object() directly, let's use has_remote_odb() and
remote_odb_get_direct().

Signed-off-by: Christian Couder 
---
 builtin/cat-file.c|  5 +++--
 builtin/fetch.c   | 11 ++-
 builtin/gc.c  |  3 ++-
 builtin/repack.c  |  3 ++-
 cache.h   |  2 --
 connected.c   |  3 ++-
 environment.c |  1 -
 list-objects-filter-options.c | 27 +++
 packfile.c|  3 ++-
 setup.c   |  7 +--
 sha1-file.c   | 14 --
 t/t0410-partial-clone.sh  | 34 +-
 t/t5500-fetch-pack.sh |  4 ++--
 t/t5601-clone.sh  |  2 +-
 t/t5616-partial-clone.sh  |  2 +-
 t/t5702-protocol-v2.sh|  2 +-
 unpack-trees.c|  6 +++---
 17 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 665b581949..3b24c1b9b7 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -13,6 +13,7 @@
 #include "tree-walk.h"
 #include "sha1-array.h"
 #include "packfile.h"
+#include "remote-odb.h"
 
 struct batch_options {
int enabled;
@@ -477,8 +478,8 @@ static int batch_objects(struct batch_options *opt)
 
for_each_loose_object(batch_loose_object, &sa, 0);
for_each_packed_object(batch_packed_object, &sa, 0);
-   if (repository_format_partial_clone)
-   warning("This repository has extensions.partialClone 
set. Some objects may not be loaded.");
+   if (has_remote_odb())
+   warning("This repository uses an odb. Some objects may 
not be loaded.");
 
cb.opt = opt;
cb.expand = &data;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669ad..1fd4dfd024 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -21,6 +21,7 @@
 #include "utf8.h"
 #include "packfile.h"
 #include "list-objects-filter-options.h"
+#include "remote-odb.h"
 
 static const char * const builtin_fetch_usage[] = {
N_("git fetch [] [ [...]]"),
@@ -1313,7 +1314,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * If no prior partial clone/fetch and the current fetch DID NOT
 * request a partial-fetch, do a normal fetch.
 */
-   if (!repository_format_partial_clone && !filter_options.choice)
+   if (!has_remote_odb() && !filter_options.choice)
return;
 
/*
@@ -1321,7 +1322,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * on this repo and remember the given filter-spec as the default
 * for subsequent fetches to this remote.
 */
-   if (!repository_format_partial_clone && filter_options.choice) {
+   if (!has_remote_odb() && filter_options.choice) {
partial_clone_register(remote->name, &filter_options);
return;
}
@@ -1330,7 +1331,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * We are currently limited to only ONE promisor remote and only
 * allow partial-fetches from the promisor remote.
 */
-   if (strcmp(remote->name, repository_format_partial_clone)) {
+   if (!find_odb_helper(remote->name)) {
if (filter_options.choice)
die(_("--filter can only be used with the remote 
configured in core.partialClone"));
return;
@@ -1461,7 +1462,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
if (depth || deepen_since || deepen_not.nr)
deepen = 1;
 
-   if (filter_options.choice && !repository_format_partial_clone)
+   if (filter_options.choice && !has_remote_odb())
die("--filter can only be used when extensions.partialClone is 
set");
 
if (all) {
@@ -1495,7 +1496,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
}
 
if (remote) {
-   if (filter_options.choice || repository_format_partial_clone)
+   if (filter_options.choice || has_remote_odb())
fetch_one_setup_partial(remote);
result = fetch_one(remote, argc, argv, prune_tags_ok);
} else {
diff --git a/builtin/gc.c b/builtin/gc.c
index ccfb1ceaeb..02c783b514 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -26,6 +26,7 @@
 #include "pack-objects.h"
 #include "blob.h"
 #include "tree.h"
+#include "remote-odb.h"
 
 #define FAILED_RUN "failed to run %s"
 
@@ -619,7 +620,7 @@ int cmd_gc(int argc, const char *

[PATCH v2 5/9] remote-odb: add remote_odb_reinit()

2018-06-30 Thread Christian Couder
We will need to reinitialize the remote odb configuration
as we will make some changes to it in a later commit when
we will detect that a remote is also a remote odb.

Signed-off-by: Christian Couder 
---
 remote-odb.c | 14 --
 remote-odb.h |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/remote-odb.c b/remote-odb.c
index 5aa36021dd..d465875a4d 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -64,17 +64,27 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
return 0;
 }
 
-static void remote_odb_init(void)
+static void remote_odb_do_init(int force)
 {
static int initialized;
 
-   if (initialized)
+   if (!force && initialized)
return;
initialized = 1;
 
git_config(remote_odb_config, NULL);
 }
 
+static inline void remote_odb_init(void)
+{
+   remote_odb_do_init(0);
+}
+
+inline void remote_odb_reinit(void)
+{
+   remote_odb_do_init(1);
+}
+
 struct odb_helper *find_odb_helper(const char *remote)
 {
struct odb_helper *o;
diff --git a/remote-odb.h b/remote-odb.h
index e10a8bf855..79803782af 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -1,6 +1,7 @@
 #ifndef REMOTE_ODB_H
 #define REMOTE_ODB_H
 
+extern void remote_odb_reinit(void);
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
-- 
2.18.0.138.gac082779dc



[PATCH v2 9/9] Documentation/config: add odb..promisorRemote

2018-06-30 Thread Christian Couder
---
 Documentation/config.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 1cc18a828c..066858886b 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2505,6 +2505,11 @@ This setting can be overridden with the 
`GIT_NOTES_REWRITE_REF`
 environment variable, which must be a colon separated list of refs or
 globs.
 
+odb..promisorRemote::
+   The name of a promisor remote. For now promisor remotes are
+   the only kind of remote object database (odb) that is
+   supported.
+
 pack.window::
The size of the window used by linkgit:git-pack-objects[1] when no
window size is given on the command line. Defaults to 10.
-- 
2.18.0.138.gac082779dc



[PATCH v2 4/9] remote-odb: implement remote_odb_get_many_direct()

2018-06-30 Thread Christian Couder
This function will be used to get many objects from a promisor
remote.

Signed-off-by: Christian Couder 
---
 odb-helper.c | 15 +++
 odb-helper.h |  3 +++
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 36 insertions(+)

diff --git a/odb-helper.c b/odb-helper.c
index 99b5a345e8..246ebf8f7a 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -28,3 +28,18 @@ int odb_helper_get_direct(struct odb_helper *o,
 
return res;
 }
+
+int odb_helper_get_many_direct(struct odb_helper *o,
+  const struct oid_array *to_get)
+{
+   int res;
+   uint64_t start;
+
+   start = getnanotime();
+
+   res = fetch_objects(o->remote, to_get);
+
+   trace_performance_since(start, "odb_helper_get_many_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4c52e81ce8..154ce4c7e4 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -17,4 +17,7 @@ struct odb_helper {
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
 extern int odb_helper_get_direct(struct odb_helper *o,
 const unsigned char *sha1);
+extern int odb_helper_get_many_direct(struct odb_helper *o,
+ const struct oid_array *to_get);
+
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index a1811c1bfc..5aa36021dd 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -108,3 +108,20 @@ int remote_odb_get_direct(const unsigned char *sha1)
 
return -1;
 }
+
+int remote_odb_get_many_direct(const struct oid_array *to_get)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_many_direct: nr: %d", to_get->nr);
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_many_direct(o, to_get) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index c5384c922d..e10a8bf855 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -4,5 +4,6 @@
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
+extern int remote_odb_get_many_direct(const struct oid_array *to_get);
 
 #endif /* REMOTE_ODB_H */
-- 
2.18.0.138.gac082779dc



[PATCH v2 0/9] Introducing remote ODBs

2018-06-30 Thread Christian Couder
This is a follow up from the patch series called "odb remote" that I
sent earlier this year, which was itself a follow up from previous
series. See the links section for more information.

As with the previous "odb remote" series, this series is only about
integrating with the promisor/narrow clone work and showing that it
makes it possible to use more than one promisor remote. Everything
that is not necessary for that integration has been removed for now
(though you can still find it in one of my branches on GitHub if you
want).

There is one test in patch 8/9 that shows that more than one promisor
remote can now be used, but I still feel that it could be interesting
to add other such tests, so I am open to ideas in this area.

Changes compared to V1 of this patch series
~~~

  - fix tests failures  
  - error out when more than one "odb..promisorremote" exist
with the same 

High level overview of this patch series


  - Patch 1/9:

This makes functions in fetch-object.c return an error code, which is
necessary to later tell that they failed and try another remote odb
when there is more than one. This could also just be seen as a fix to
these functions.

  - Patch 2/9:

This introduces the minimum infrastructure for remote odbs.

  - Patches 3/9 and 4/9:

These patches implement remote_odb_get_direct() and
remote_odb_get_many_direct() using the functions from fetch-object.c.
These new functions will be used in following patches to replace the
functions from fetch-object.c.

  - Patch 5/9:

This implement remote_odb_reinit() which will be needed to reparse the
remote odb configuration.

  - Patches 6/9 and 7/9:

These patches integrate the remote odb mechanism into the
promisor/narrow clone code. The "extensions.partialClone" config
option is replaced by "odb..promisorRemote" and
"core.partialCloneFilter" is replaced by
"odb..partialCloneFilter".

  - Patch 8/9:

This adds a test case that shows that now more than one promisor
remote can be used.

  - Patch 9/9:

This starts documenting the remote odb mechanism.

Discussion
~~

I am not sure that it is ok to completely replace the
"extensions.partialclone" config option. Even if it is fully replaced,
no "extensions.remoteodb" is implemented in these patches, as maybe
the "extensions.partialclone" name could be kept even if the
underlying mechanism is the remote odb mechanism.

Anyway I think that the remote odb mechanism is much more extensible,
so I think using "extensions.partialclone" to specify a promisor
remote should be at least deprecated.

Links
~

This patch series on GitHub:

V2: https://github.com/chriscool/git/commits/remote-odb
V1: https://github.com/chriscool/git/commits/remote-odb1

Discussions related to V1:

https://public-inbox.org/git/20180623121846.19750-1-chrisc...@tuxfamily.org/

Previous "odb remote" series:

https://public-inbox.org/git/20180513103232.17514-1-chrisc...@tuxfamily.org/
https://github.com/chriscool/git/commits/odb-remote

Version 1 and 2 of the "Promisor remotes and external ODB support" series:

https://public-inbox.org/git/20180103163403.11303-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20180319133147.15413-1-chrisc...@tuxfamily.org/

Version 1 and 2 of the "Promisor remotes and external ODB support" series on 
GitHub:

https://github.com/chriscool/git/commits/gl-small-promisor-external-odb12
https://github.com/chriscool/git/commits/gl-small-promisor-external-odb71

Christian Couder (9):
  fetch-object: make functions return an error code
  Add initial remote odb support
  remote-odb: implement remote_odb_get_direct()
  remote-odb: implement remote_odb_get_many_direct()
  remote-odb: add remote_odb_reinit()
  Use remote_odb_get_direct() and has_remote_odb()
  Use odb.origin.partialclonefilter instead of core.partialclonefilter
  t0410: test fetching from many promisor remotes
  Documentation/config: add odb..promisorRemote

 Documentation/config.txt  |   5 ++
 Makefile  |   2 +
 builtin/cat-file.c|   5 +-
 builtin/fetch.c   |  13 ++--
 builtin/gc.c  |   3 +-
 builtin/repack.c  |   3 +-
 cache.h   |   2 -
 connected.c   |   3 +-
 environment.c |   1 -
 fetch-object.c|  15 ++--
 fetch-object.h|   6 +-
 list-objects-filter-options.c |  51 +++--
 list-objects-filter-options.h |   3 +-
 odb-helper.c  |  45 +++
 odb-helper.h  |  24 ++
 packfile.c|   3 +-
 remote-odb.c  | 139 ++
 remote-odb.h  |  10 +++
 setup.c   |   7 +-
 sha1-file.c   |  14 ++--

[PATCH v2 2/9] Add initial remote odb support

2018-06-30 Thread Christian Couder
The remote-odb.{c,h} files will contain the functions
that are called by the rest of Git mostly from
"sha1-file.c" to access the objects managed by the
remote odbs.

The odb-helper.{c,h} files will contain the functions to
actually implement communication with either the internal
functions or the external scripts or processes that will
manage and provide remote git objects.

For now only infrastructure to create helpers from the
config and to check if there are remote odbs and odb
helpers is implemented.

We expect that there will not be a lot of helpers, so it
is ok to use a simple linked list to manage them.

Helped-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Makefile |  2 ++
 odb-helper.c | 16 +
 odb-helper.h | 19 +++
 remote-odb.c | 93 
 remote-odb.h |  7 
 5 files changed, 137 insertions(+)
 create mode 100644 odb-helper.c
 create mode 100644 odb-helper.h
 create mode 100644 remote-odb.c
 create mode 100644 remote-odb.h

diff --git a/Makefile b/Makefile
index 0cb6590f24..5af048ddd5 100644
--- a/Makefile
+++ b/Makefile
@@ -896,6 +896,8 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += odb-helper.o
+LIB_OBJS += remote-odb.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
 LIB_OBJS += packfile.o
diff --git a/odb-helper.c b/odb-helper.c
new file mode 100644
index 00..b4d403ffa9
--- /dev/null
+++ b/odb-helper.c
@@ -0,0 +1,16 @@
+#include "cache.h"
+#include "object.h"
+#include "argv-array.h"
+#include "odb-helper.h"
+#include "run-command.h"
+#include "sha1-lookup.h"
+
+struct odb_helper *odb_helper_new(const char *name, int namelen)
+{
+   struct odb_helper *o;
+
+   o = xcalloc(1, sizeof(*o));
+   o->name = xmemdupz(name, namelen);
+
+   return o;
+}
diff --git a/odb-helper.h b/odb-helper.h
new file mode 100644
index 00..4b792a3896
--- /dev/null
+++ b/odb-helper.h
@@ -0,0 +1,19 @@
+#ifndef ODB_HELPER_H
+#define ODB_HELPER_H
+
+/*
+ * An odb helper is a way to access a remote odb.
+ *
+ * Information in its fields comes from the config file [odb "NAME"]
+ * entries.
+ */
+struct odb_helper {
+   const char *name; /* odb..* */
+   const char *remote;   /* odb..promisorRemote */
+
+   struct odb_helper *next;
+};
+
+extern struct odb_helper *odb_helper_new(const char *name, int namelen);
+
+#endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
new file mode 100644
index 00..7156855155
--- /dev/null
+++ b/remote-odb.c
@@ -0,0 +1,93 @@
+#include "cache.h"
+#include "remote-odb.h"
+#include "odb-helper.h"
+#include "config.h"
+
+static struct odb_helper *helpers;
+static struct odb_helper **helpers_tail = &helpers;
+
+static struct odb_helper *find_or_create_helper(const char *name, int len)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (!strncmp(o->name, name, len) && !o->name[len])
+   return o;
+
+   o = odb_helper_new(name, len);
+   *helpers_tail = o;
+   helpers_tail = &o->next;
+
+   return o;
+}
+
+static struct odb_helper *do_find_odb_helper(const char *remote)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (o->remote && !strcmp(o->remote, remote))
+   return o;
+
+   return NULL;
+}
+
+static int remote_odb_config(const char *var, const char *value, void *data)
+{
+   struct odb_helper *o;
+   const char *name;
+   int namelen;
+   const char *subkey;
+
+   if (parse_config_key(var, "odb", &name, &namelen, &subkey) < 0)
+   return 0;
+
+   o = find_or_create_helper(name, namelen);
+
+   if (!strcmp(subkey, "promisorremote")) {
+   const char *remote;
+   int res = git_config_string(&remote, var, value);
+
+   if (res)
+   return res;
+
+   if (do_find_odb_helper(remote))
+   return error(_("when parsing config key '%s' "
+  "helper for remote '%s' already exists"),
+var, remote);
+
+   o->remote = remote;
+
+   return 0;
+   }
+
+   return 0;
+}
+
+static void remote_odb_init(void)
+{
+   static int initialized;
+
+   if (initialized)
+   return;
+   initialized = 1;
+
+   git_config(remote_odb_config, NULL);
+}
+
+struct odb_helper *find_odb_helper(const char *remote)
+{
+   struct odb_helper *o;
+
+   remote_odb_init();
+
+   if (!remote)
+   return helpers;
+
+   return do_

[PATCH v2 1/9] fetch-object: make functions return an error code

2018-06-30 Thread Christian Couder
The callers of the fetch_object() and fetch_objects() might
be interested in knowing if these functions succeeded or not.

Signed-off-by: Christian Couder 
---
 fetch-object.c | 15 +--
 fetch-object.h |  6 +++---
 sha1-file.c|  4 ++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/fetch-object.c b/fetch-object.c
index 853624f811..ccc4ea7f1a 100644
--- a/fetch-object.c
+++ b/fetch-object.c
@@ -5,11 +5,12 @@
 #include "transport.h"
 #include "fetch-object.h"
 
-static void fetch_refs(const char *remote_name, struct ref *ref)
+static int fetch_refs(const char *remote_name, struct ref *ref)
 {
struct remote *remote;
struct transport *transport;
int original_fetch_if_missing = fetch_if_missing;
+   int res;
 
fetch_if_missing = 0;
remote = remote_get(remote_name);
@@ -19,18 +20,20 @@ static void fetch_refs(const char *remote_name, struct ref 
*ref)
 
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
-   transport_fetch_refs(transport, ref);
+   res = transport_fetch_refs(transport, ref);
fetch_if_missing = original_fetch_if_missing;
+
+   return res;
 }
 
-void fetch_object(const char *remote_name, const unsigned char *sha1)
+int fetch_object(const char *remote_name, const unsigned char *sha1)
 {
struct ref *ref = alloc_ref(sha1_to_hex(sha1));
hashcpy(ref->old_oid.hash, sha1);
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
 
-void fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
+int fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
 {
struct ref *ref = NULL;
int i;
@@ -41,5 +44,5 @@ void fetch_objects(const char *remote_name, const struct 
oid_array *to_fetch)
new_ref->next = ref;
ref = new_ref;
}
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
diff --git a/fetch-object.h b/fetch-object.h
index 4b269d07ed..12e1f9ee70 100644
--- a/fetch-object.h
+++ b/fetch-object.h
@@ -3,9 +3,9 @@
 
 #include "sha1-array.h"
 
-extern void fetch_object(const char *remote_name, const unsigned char *sha1);
+extern int fetch_object(const char *remote_name, const unsigned char *sha1);
 
-extern void fetch_objects(const char *remote_name,
- const struct oid_array *to_fetch);
+extern int fetch_objects(const char *remote_name,
+const struct oid_array *to_fetch);
 
 #endif
diff --git a/sha1-file.c b/sha1-file.c
index de4839e634..c099f5584d 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1312,8 +1312,8 @@ int oid_object_info_extended(struct repository *r, const 
struct object_id *oid,
if (fetch_if_missing && repository_format_partial_clone &&
!already_retried && r == the_repository) {
/*
-* TODO Investigate having fetch_object() return
-* TODO error/success and stopping the music here.
+* TODO Investigate checking fetch_object() return
+* TODO value and stopping on error here.
 * TODO Pass a repository struct through fetch_object,
 * such that arbitrary repositories work.
 */
-- 
2.18.0.138.gac082779dc



[PATCH v2 3/9] remote-odb: implement remote_odb_get_direct()

2018-06-30 Thread Christian Couder
This is implemented only in the promisor remote mode
for now by calling fetch_object().

Signed-off-by: Christian Couder 
---
 odb-helper.c | 14 ++
 odb-helper.h |  3 ++-
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/odb-helper.c b/odb-helper.c
index b4d403ffa9..99b5a345e8 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -4,6 +4,7 @@
 #include "odb-helper.h"
 #include "run-command.h"
 #include "sha1-lookup.h"
+#include "fetch-object.h"
 
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
@@ -14,3 +15,16 @@ struct odb_helper *odb_helper_new(const char *name, int 
namelen)
 
return o;
 }
+
+int odb_helper_get_direct(struct odb_helper *o,
+ const unsigned char *sha1)
+{
+   int res;
+   uint64_t start = getnanotime();
+
+   res = fetch_object(o->remote, sha1);
+
+   trace_performance_since(start, "odb_helper_get_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4b792a3896..4c52e81ce8 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -15,5 +15,6 @@ struct odb_helper {
 };
 
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
-
+extern int odb_helper_get_direct(struct odb_helper *o,
+const unsigned char *sha1);
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index 7156855155..a1811c1bfc 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -91,3 +91,20 @@ int has_remote_odb(void)
 {
return !!find_odb_helper(NULL);
 }
+
+int remote_odb_get_direct(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_direct: %s", sha1_to_hex(sha1));
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_direct(o, sha1) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index 4c7b13695f..c5384c922d 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -3,5 +3,6 @@
 
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
+extern int remote_odb_get_direct(const unsigned char *sha1);
 
 #endif /* REMOTE_ODB_H */
-- 
2.18.0.138.gac082779dc



Re: [PATCH v1 0/9] Introducing remote ODBs

2018-06-28 Thread Christian Couder
On Tue, Jun 26, 2018 at 2:37 AM, Eric Sunshine  wrote:

> In addition to the t5702 failures, I'm also seeing failures of
> t0410.1, t5616.6 and t5616.7 at the tip of 'pu' as of [1], all of
> which seem to be related to these changes.

Yeah but only s/core.partialclonefilter/odb.origin.partialclonefilter/
is needed on top of the fix for for the master branch.

Thanks for the report!


Re: [PATCH v1 0/9] Introducing remote ODBs

2018-06-28 Thread Christian Couder
On Mon, Jun 25, 2018 at 11:49 PM, Junio C Hamano  wrote:
>
> Just an early warning, as I haven't even complained on patch titles
> of these patches in the series ;-)
>
> 5702.20 and 5702.21 seems to fail in standalone test, when these are
> directly queued on top of Git v2.18.0; I haven't looked into the
> failure myself (yet).

Yeah sorry about that.

These tests can be fixed with only
s/extensions.partialClone/odb.magic.promisorRemote/ though.


Re: [PATCH 4/5] sequencer: refactor the code to detach HEAD to checkout.c

2018-06-28 Thread Christian Couder
On Thu, Jun 28, 2018 at 9:46 AM, Pratik Karki  wrote:
> The motivation behind this commit is to extract the core part of
> do_reset() from sequencer.c and move it to a new detach_head_to()
> function in checkout.c.

If this is independent from your other patches and if this can be used
by Alban's work, then it might be a good idea to send this patch
separately (and then to state in this patch series that it depends on
the separate patch) or at least to move this patch to the beginning of
the patch series.


Re: [PATCH 2/5] rebase: start implementing it as a builtin

2018-06-28 Thread Christian Couder
On Thu, Jun 28, 2018 at 9:46 AM, Pratik Karki  wrote:

> diff --git a/builtin/rebase.c b/builtin/rebase.c
> new file mode 100644
> index 0..1152b7229
> --- /dev/null
> +++ b/builtin/rebase.c
> @@ -0,0 +1,55 @@
> +/*
> + * "git rebase" builtin command
> + *
> + * Copyright (c) 2018 Pratik Karki
> + */

[...]

> +   die("TODO");
> +}
> \ No newline at end of file

Please add a newline at the end of the files you create.


Re: [PATCH 3/5] rebase: refactor common shell functions into their own file

2018-06-28 Thread Christian Couder
On Thu, Jun 28, 2018 at 9:46 AM, Pratik Karki  wrote:

> The motivation behind this is to call the backend functions
> *directly* from C, bypassing `git-rebase.sh`. Therefore those functions
> need to live in a separate file: we need to be able to call
> `.git-rebase--common` in that script snippet so that those functions

I think it should be `. git-rebase--common` (space missing between .
and git-rebase--common).

> are defined.


Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect

2018-06-26 Thread Christian Couder
Hi Dscho,

On Tue, Jun 26, 2018 at 4:10 PM, Johannes Schindelin
 wrote:
>
> On Tue, 26 Jun 2018, Christian Couder wrote:
>
>> On Mon, Jun 25, 2018 at 7:33 PM, Junio C Hamano  wrote:
>>
>> > I hate to say this, but the above looks like a typical
>> > unmaintainable mess.
>> >
>> > What happens when you or somebody else later needs to update the
>> > graph to be tested to add one more commit (or even more)?  Would it
>> > be enough to add another "rev-parse" plus "dist=X" line in both
>> > expects?  Or do we see a trap for combinatorial explosion that
>> > requires us to add new expect$N?
>>
>> What about the following then:
>>
>> test_dist_order () {
>> file="$1"
>> n="$2"
>> while read -r hash dist
>> do
>> d=$(echo "$dist" | sed -e "s/(dist=\(.*\))/\1/")
>> case "$d" in
>> ''|*[!0-9]*) return 1 ;;
>> *) ;;
>> esac
>> test "$d" -le "$n" || return 1
>> n="$d"
>> done <"$file"
>> }
>>
>> test_expect_success "--bisect-all --first-parent" '
>> cat >expect <> $(git rev-parse CC) (dist=2)
>> $(git rev-parse EX) (dist=1)
>> $(git rev-parse D) (dist=1)
>> $(git rev-parse FX) (dist=0)
>> EOF
>> sort expect >expect_sorted &&
>> git rev-list --bisect-all --first-parent FX ^A >actual &&
>> sort actual >actual_sorted &&
>> test_cmp expect_sorted actual_sorted &&
>> test_dist_order actual 2
>> '
>>
>> This feels overkill to me, but it should scale if we ever make more
>> complex tests.
>
> I *think* that you misunderstand Junio. At least when I read this:
>
>> $(git rev-parse CC) (dist=2)
>> $(git rev-parse EX) (dist=1)
>> $(git rev-parse D) (dist=1)
>> $(git rev-parse FX) (dist=0)
>
> I go: Huh?!?!?!?!?!
>
> What's CC? Is it Christian Couder? Creative Commons? Crudely Complex?

I agree that the name of the commit could be improved.

> The point, for me, is: if this test fails, at some stage in the future,
> for any reason, it will be a major pain to even dissect what the test is
> supposed to do.

I think the test is quite simple and there is an ascii picture, so it
is not so difficult to understand.

> This is no good. And you can do better. A lot better. You
> can write the test in a way that the head of a reader does not hurt, and
> at the same time it is still obvious what it does, and obvious that it
> does the right thing.

Obviousness is often not the same for everybody.

> One thing that makes the brain stumble for certain is when you deviate
> from the conventions, especially when it is for no good reason at all. In
> this case (and I am not sure why you, as a long-time contributor, did not
> spot this before public review):

Please note that I never committed myself (like most of us who are not
maintainer actually) to reviewing everything bisect related (or
everything that Tiago works on).

> - the titles of the test cases leave a lot of room for improvement,
>
> - the lines are too long,
>
> - the convention to end the `test_expect_success` line with an opening
>   single quote is not heeded,

If you take a look at the beginning of the script you will see that
there are those problems there too.

I know that we should try to do better, but here I am mostly
interested in moving forward a feature that people have requested for
ages, not cleaning up those tests. If someone else like you or Junio
thinks that it would be a good time to clean things up a bit, then I
am ok with it, but that's not my main goal here.

> - the graph is definitely written in an unnecessarily different format
>   than in the same test script!!!

Just above you complain about things that are similar to the previous
tests and now you complain about things that are different...

> - speaking of the graph: there is a perfectly fine commit graph already.
>   Why on earth is it not reused?

Perhaps because it is more complex than needed to test this feature
and/or to understand what happens. And I don't think we require
everywhere only one commit graph per test script.

> In this particular case it even feels as if this test is not even testing
> what it should test at all:
>
> - it should verify that all of the commits in the first parent lineage are
>   part of the list

It does that.

> - it should verify that none of the other commits are in the list

It does that too.

> And that is really all there is to test.

I 

Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect

2018-06-26 Thread Christian Couder
On Mon, Jun 25, 2018 at 7:33 PM, Junio C Hamano  wrote:
> Tiago Botelho  writes:
>
>> +test_expect_success "--bisect-all --first-parent" '
>> +cat >expect1 <> +$(git rev-parse CC) (dist=2)
>> +$(git rev-parse EX) (dist=1)
>> +$(git rev-parse D) (dist=1)
>> +$(git rev-parse FX) (dist=0)
>> +EOF
>> +
>> +cat >expect2 <> +$(git rev-parse CC) (dist=2)
>> +$(git rev-parse D) (dist=1)
>> +$(git rev-parse EX) (dist=1)
>> +$(git rev-parse FX) (dist=0)
>> +EOF
>> +
>> +git rev-list --bisect-all --first-parent FX ^A >actual &&
>> +  ( test_cmp expect1 actual || test_cmp expect2 actual )
>> +'
>
> I hate to say this, but the above looks like a typical
> unmaintainable mess.
>
> What happens when you or somebody else later needs to update the
> graph to be tested to add one more commit (or even more)?  Would it
> be enough to add another "rev-parse" plus "dist=X" line in both
> expects?  Or do we see a trap for combinatorial explosion that
> requires us to add new expect$N?

What about the following then:

test_dist_order () {
file="$1"
n="$2"
while read -r hash dist
do
d=$(echo "$dist" | sed -e "s/(dist=\(.*\))/\1/")
case "$d" in
''|*[!0-9]*) return 1 ;;
*) ;;
esac
test "$d" -le "$n" || return 1
n="$d"
done <"$file"
}

test_expect_success "--bisect-all --first-parent" '
cat >expect actual &&
sort actual >actual_sorted &&
test_cmp expect_sorted actual_sorted &&
test_dist_order actual 2
'

This feels overkill to me, but it should scale if we ever make more
complex tests.


Re: [PATCH v1 0/9] Introducing remote ODBs

2018-06-23 Thread Christian Couder
On Sat, Jun 23, 2018 at 2:18 PM, Christian Couder
 wrote:

> High level overview of this patch series
> 
>
>   - Patch 1/9:

Sorry the patches are numbered X/11 but they should be numbered X/9 as
only the first 9 should be in the series and have been sent.


[PATCH v1 0/9] Introducing remote ODBs

2018-06-23 Thread Christian Couder
This is a follow up from the patch series called "odb remote" that I
sent earlier this year, which was itself a follow up from the patch
series called "Promisor remotes and external ODB support" that I sent
earlier this year, which was itself a follow up from previous series.
See the links section for more information.

Following discussions of the previous patch series, the mechanism is
renamed from "odb remote" to "remote odb". I hope that this will will
be the last name change.

As with the previous "odb remote" series, this series is only about
integrating with the promisor/narrow clone work and showing that it
makes it possible to use more than one promisor remote. Everything
that is not necessary for that integration has been removed for now
(though you can still find it in one of my branches on GitHub if you
want).

There is one test in patch 8/9 that shows that more than one promisor
remote can now be used, but I still feel that it could be interesting
to add other such tests, so I am open to ideas in this area.

Changes compared to previous patch series


  - big rename from "odb remote" to "remote odb"
  - rename of field from "dealer" to "remote" in struct odb_helper
  - add comments around struct odb_helper
  - improve a few commit messages
  - add a documentation patch

High level overview of this patch series


  - Patch 1/9:

This makes functions in fetch-object.c return an error code, which is
necessary to later tell that they failed and try another remote odb
when there is more than one. This could also just be seen as a fix to
these functions.

  - Patch 2/9:

This introduces the minimum infrastructure for remote odbs.

  - Patches 3/9 and 4/9:

These patches implement remote_odb_get_direct() and
remote_odb_get_many_direct() using the functions from fetch-object.c.
These new functions will be used in following patches to replace the
functions from fetch-object.c.

  - Patch 5/9:

This implement remote_odb_reinit() which will be needed to reparse the
remote odb configuration.

  - Patches 6/9 and 7/9:

These patches integrate the remote odb mechanism into the
promisor/narrow clone code. The "extensions.partialclone" config
option is replaced by "odb..promisorRemote" and
"core.partialclonefilter" is replaced by
"odb..partialclonefilter".

  - Patch 8/9:

This adds a test case that shows that now more than one promisor
remote can be used.

  - Patch 9/9:

This starts documenting the remote odb mechanism.

Discussion
~~

I am not sure that it is ok to completely replace the
"extensions.partialclone" config option. Even if it is fully replaced,
no "extensions.remoteodb" is implemented in these patches, as maybe
the "extensions.partialclone" name could be kept even if the
underlying mechanism is the remote odb mechanism.

Anyway I think that the remote odb mechanism is much more extensible,
so I think using "extensions.partialclone" to specify a promisor
remote should be at least deprecated.

Links
~

This patch series on GitHub:

https://github.com/chriscool/git/commits/remote-odb

Previous "odb remote" series:

https://public-inbox.org/git/20180513103232.17514-1-chrisc...@tuxfamily.org/
https://github.com/chriscool/git/commits/odb-remote

Version 1 and 2 of the "Promisor remotes and external ODB support" series:

https://public-inbox.org/git/20180103163403.11303-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20180319133147.15413-1-chrisc...@tuxfamily.org/

Version 1 and 2 of the "Promisor remotes and external ODB support" series on 
GitHub:

https://github.com/chriscool/git/commits/gl-small-promisor-external-odb12
https://github.com/chriscool/git/commits/gl-small-promisor-external-odb71


Christian Couder (9):
  fetch-object: make functions return an error code
  Add initial remote odb support
  remote-odb: implement remote_odb_get_direct()
  remote-odb: implement remote_odb_get_many_direct()
  remote-odb: add remote_odb_reinit()
  Use remote_odb_get_direct() and has_remote_odb()
  Use odb.origin.partialclonefilter instead of core.partialclonefilter
  t0410: test fetching from many promisor remotes
  Documentation/config: add odb..promisorRemote

 Documentation/config.txt  |   5 ++
 Makefile  |   2 +
 builtin/cat-file.c|   5 +-
 builtin/fetch.c   |  13 ++--
 builtin/gc.c  |   3 +-
 builtin/repack.c  |   3 +-
 cache.h   |   2 -
 connected.c   |   3 +-
 environment.c |   1 -
 fetch-object.c|  15 +++--
 fetch-object.h|   6 +-
 list-objects-filter-options.c |  49 --
 list-objects-filter-options.h |   3 +-
 odb-helper.c

[PATCH v1 01/11] fetch-object: make functions return an error code

2018-06-23 Thread Christian Couder
The callers of the fetch_object() and fetch_objects() might
be interested in knowing if these functions succeeded or not.

Signed-off-by: Christian Couder 
---
 fetch-object.c | 15 +--
 fetch-object.h |  6 +++---
 sha1-file.c|  4 ++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/fetch-object.c b/fetch-object.c
index 853624f811..ccc4ea7f1a 100644
--- a/fetch-object.c
+++ b/fetch-object.c
@@ -5,11 +5,12 @@
 #include "transport.h"
 #include "fetch-object.h"
 
-static void fetch_refs(const char *remote_name, struct ref *ref)
+static int fetch_refs(const char *remote_name, struct ref *ref)
 {
struct remote *remote;
struct transport *transport;
int original_fetch_if_missing = fetch_if_missing;
+   int res;
 
fetch_if_missing = 0;
remote = remote_get(remote_name);
@@ -19,18 +20,20 @@ static void fetch_refs(const char *remote_name, struct ref 
*ref)
 
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
-   transport_fetch_refs(transport, ref);
+   res = transport_fetch_refs(transport, ref);
fetch_if_missing = original_fetch_if_missing;
+
+   return res;
 }
 
-void fetch_object(const char *remote_name, const unsigned char *sha1)
+int fetch_object(const char *remote_name, const unsigned char *sha1)
 {
struct ref *ref = alloc_ref(sha1_to_hex(sha1));
hashcpy(ref->old_oid.hash, sha1);
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
 
-void fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
+int fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
 {
struct ref *ref = NULL;
int i;
@@ -41,5 +44,5 @@ void fetch_objects(const char *remote_name, const struct 
oid_array *to_fetch)
new_ref->next = ref;
ref = new_ref;
}
-   fetch_refs(remote_name, ref);
+   return fetch_refs(remote_name, ref);
 }
diff --git a/fetch-object.h b/fetch-object.h
index 4b269d07ed..12e1f9ee70 100644
--- a/fetch-object.h
+++ b/fetch-object.h
@@ -3,9 +3,9 @@
 
 #include "sha1-array.h"
 
-extern void fetch_object(const char *remote_name, const unsigned char *sha1);
+extern int fetch_object(const char *remote_name, const unsigned char *sha1);
 
-extern void fetch_objects(const char *remote_name,
- const struct oid_array *to_fetch);
+extern int fetch_objects(const char *remote_name,
+const struct oid_array *to_fetch);
 
 #endif
diff --git a/sha1-file.c b/sha1-file.c
index 695e5c6276..374a56d0e1 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1312,8 +1312,8 @@ int oid_object_info_extended(struct repository *r, const 
struct object_id *oid,
if (fetch_if_missing && repository_format_partial_clone &&
!already_retried && r == the_repository) {
/*
-* TODO Investigate having fetch_object() return
-* TODO error/success and stopping the music here.
+* TODO Investigate checking fetch_object() return
+* TODO value and stopping on error here.
 * TODO Pass a repository struct through fetch_object,
 * such that arbitrary repositories work.
 */
-- 
2.17.0.762.g886f54ea46



[PATCH v1 03/11] remote-odb: implement remote_odb_get_direct()

2018-06-23 Thread Christian Couder
This is implemented only in the promisor remote mode
for now by calling fetch_object().

Signed-off-by: Christian Couder 
---
 odb-helper.c | 14 ++
 odb-helper.h |  3 ++-
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/odb-helper.c b/odb-helper.c
index b4d403ffa9..99b5a345e8 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -4,6 +4,7 @@
 #include "odb-helper.h"
 #include "run-command.h"
 #include "sha1-lookup.h"
+#include "fetch-object.h"
 
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
@@ -14,3 +15,16 @@ struct odb_helper *odb_helper_new(const char *name, int 
namelen)
 
return o;
 }
+
+int odb_helper_get_direct(struct odb_helper *o,
+ const unsigned char *sha1)
+{
+   int res;
+   uint64_t start = getnanotime();
+
+   res = fetch_object(o->remote, sha1);
+
+   trace_performance_since(start, "odb_helper_get_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4b792a3896..4c52e81ce8 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -15,5 +15,6 @@ struct odb_helper {
 };
 
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
-
+extern int odb_helper_get_direct(struct odb_helper *o,
+const unsigned char *sha1);
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index 1dc165eaf6..a34537c05c 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -70,3 +70,20 @@ int has_remote_odb(void)
 {
return !!find_odb_helper(NULL);
 }
+
+int remote_odb_get_direct(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_direct: %s", sha1_to_hex(sha1));
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_direct(o, sha1) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index 4c7b13695f..c5384c922d 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -3,5 +3,6 @@
 
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
+extern int remote_odb_get_direct(const unsigned char *sha1);
 
 #endif /* REMOTE_ODB_H */
-- 
2.17.0.762.g886f54ea46



[PATCH v1 07/11] Use odb.origin.partialclonefilter instead of core.partialclonefilter

2018-06-23 Thread Christian Couder
Let's make the partial clone filter specific to one odb
instead of general to all the odbs.

This makes it possible to have different partial clone
filters for different odbs.

Signed-off-by: Christian Couder 
---
 builtin/fetch.c   |  2 +-
 list-objects-filter-options.c | 26 --
 list-objects-filter-options.h |  3 ++-
 odb-helper.h  |  1 +
 remote-odb.c  |  2 ++
 t/t5616-partial-clone.sh  |  2 +-
 6 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 1fd4dfd024..52ec2fd200 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1343,7 +1343,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * the config.
 */
if (!filter_options.choice)
-   partial_clone_get_default_filter_spec(&filter_options);
+   partial_clone_get_default_filter_spec(&filter_options, 
remote->name);
return;
 }
 
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 8e7b9ae776..755a793664 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -7,6 +7,7 @@
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
 #include "remote-odb.h"
+#include "odb-helper.h"
 
 /*
  * Parse value of the argument to the "filter" keyword.
@@ -29,6 +30,9 @@ static int gently_parse_list_objects_filter(
 {
const char *v0;
 
+   if (!arg)
+   return 0;
+
if (filter_options->choice) {
if (errbuf) {
strbuf_init(errbuf, 0);
@@ -116,6 +120,7 @@ void partial_clone_register(
const struct list_objects_filter_options *filter_options)
 {
char *cfg_name;
+   char *filter_name;
 
/* Check if it is already registered */
if (find_odb_helper(remote))
@@ -131,25 +136,26 @@ void partial_clone_register(
/*
 * Record the initial filter-spec in the config as
 * the default for subsequent fetches from this remote.
-*
-* TODO: move core.partialclonefilter into odb.
 */
-   core_partial_clone_filter_default =
-   xstrdup(filter_options->filter_spec);
-   git_config_set("core.partialclonefilter",
-  core_partial_clone_filter_default);
+   filter_name = xstrfmt("odb.%s.partialclonefilter", remote);
+   git_config_set(filter_name, filter_options->filter_spec);
+   free(filter_name);
 
/* Make sure the config info are reset */
remote_odb_reinit();
 }
 
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options)
+   struct list_objects_filter_options *filter_options,
+   const char *remote)
 {
+   struct odb_helper *helper = find_odb_helper(remote);
+
/*
 * Parse default value, but silently ignore it if it is invalid.
 */
-   gently_parse_list_objects_filter(filter_options,
-core_partial_clone_filter_default,
-NULL);
+   if (helper)
+   gently_parse_list_objects_filter(filter_options,
+helper->partial_clone_filter,
+NULL);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index a61f82..12ceef3230 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -74,6 +74,7 @@ void partial_clone_register(
const char *remote,
const struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
-   struct list_objects_filter_options *filter_options);
+   struct list_objects_filter_options *filter_options,
+   const char *remote);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/odb-helper.h b/odb-helper.h
index 154ce4c7e4..4af75ef55c 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -10,6 +10,7 @@
 struct odb_helper {
const char *name; /* odb..* */
const char *remote;   /* odb..promisorRemote */
+   const char *partial_clone_filter; /* odb..partialCloneFilter */
 
struct odb_helper *next;
 };
diff --git a/remote-odb.c b/remote-odb.c
index 2b93d13abd..5b9d1930b5 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -35,6 +35,8 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
 
if (!strcmp(subkey, "promisorremote"))
return git_config_string(&o->remote, var, value);
+   if (!strcmp(subkey, "partialclonefilter"))
+   return git_config_string(&o->partial_clone_filter, var, value);
 
return 0;
 }
diff --git a/t/t5616-partial-clone.sh b/t/t56

[PATCH v1 08/11] t0410: test fetching from many promisor remotes

2018-06-23 Thread Christian Couder
Signed-off-by: Christian Couder 
---
 t/t0410-partial-clone.sh | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 6af4712da8..4a7a662512 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -162,8 +162,30 @@ test_expect_success 'fetching of missing objects' '
git verify-pack --verbose "$IDX" | grep "$HASH"
 '
 
+test_expect_success 'fetching of missing objects from another odb remote' '
+   git clone "file://$(pwd)/server" server2 &&
+   test_commit -C server2 bar &&
+   git -C server2 repack -a -d --write-bitmap-index &&
+   HASH2=$(git -C server2 rev-parse bar) &&
+
+   git -C repo remote add server2 "file://$(pwd)/server2" &&
+   git -C repo config odb.magic2.promisorRemote server2 &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   git -C repo fetch server2 &&
+   rm -rf repo/.git/objects/* &&
+   git -C repo cat-file -p "$HASH2" &&
+
+   # Ensure that the .promisor file is written, and check that its
+   # associated packfile contains the object
+   ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
+   test_line_count = 1 promisorlist &&
+   IDX=$(cat promisorlist | sed "s/promisor$/idx/") &&
+   git verify-pack --verbose "$IDX" | grep "$HASH2"
+'
+
 test_expect_success 'rev-list stops traversal at missing and promised commit' '
-   rm -rf repo &&
+   rm -rf repo server server2 &&
test_create_repo repo &&
test_commit -C repo foo &&
test_commit -C repo bar &&
-- 
2.17.0.762.g886f54ea46



[PATCH v1 09/11] Documentation/config: add odb..promisorRemote

2018-06-23 Thread Christian Couder
---
 Documentation/config.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf5a9..8df0c7177e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2504,6 +2504,11 @@ This setting can be overridden with the 
`GIT_NOTES_REWRITE_REF`
 environment variable, which must be a colon separated list of refs or
 globs.
 
+odb..promisorRemote::
+   The name of a promisor remote. For now promisor remotes are
+   the only kind of remote object database (odb) that is
+   supported.
+
 pack.window::
The size of the window used by linkgit:git-pack-objects[1] when no
window size is given on the command line. Defaults to 10.
-- 
2.17.0.762.g886f54ea46



[PATCH v1 06/11] Use remote_odb_get_direct() and has_remote_odb()

2018-06-23 Thread Christian Couder
Instead of using the repository_format_partial_clone global
and fetch_object() directly, let's use has_remote_odb() and
remote_odb_get_direct().

Signed-off-by: Christian Couder 
---
 builtin/cat-file.c|  5 +++--
 builtin/fetch.c   | 11 ++-
 builtin/gc.c  |  3 ++-
 builtin/repack.c  |  3 ++-
 cache.h   |  2 --
 connected.c   |  3 ++-
 environment.c |  1 -
 list-objects-filter-options.c | 27 +++
 packfile.c|  3 ++-
 setup.c   |  7 +--
 sha1-file.c   | 14 --
 t/t0410-partial-clone.sh  | 30 +++---
 t/t5500-fetch-pack.sh |  4 ++--
 t/t5601-clone.sh  |  2 +-
 t/t5616-partial-clone.sh  |  2 +-
 unpack-trees.c|  6 +++---
 16 files changed, 63 insertions(+), 60 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 665b581949..3b24c1b9b7 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -13,6 +13,7 @@
 #include "tree-walk.h"
 #include "sha1-array.h"
 #include "packfile.h"
+#include "remote-odb.h"
 
 struct batch_options {
int enabled;
@@ -477,8 +478,8 @@ static int batch_objects(struct batch_options *opt)
 
for_each_loose_object(batch_loose_object, &sa, 0);
for_each_packed_object(batch_packed_object, &sa, 0);
-   if (repository_format_partial_clone)
-   warning("This repository has extensions.partialClone 
set. Some objects may not be loaded.");
+   if (has_remote_odb())
+   warning("This repository uses an odb. Some objects may 
not be loaded.");
 
cb.opt = opt;
cb.expand = &data;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669ad..1fd4dfd024 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -21,6 +21,7 @@
 #include "utf8.h"
 #include "packfile.h"
 #include "list-objects-filter-options.h"
+#include "remote-odb.h"
 
 static const char * const builtin_fetch_usage[] = {
N_("git fetch [] [ [...]]"),
@@ -1313,7 +1314,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * If no prior partial clone/fetch and the current fetch DID NOT
 * request a partial-fetch, do a normal fetch.
 */
-   if (!repository_format_partial_clone && !filter_options.choice)
+   if (!has_remote_odb() && !filter_options.choice)
return;
 
/*
@@ -1321,7 +1322,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * on this repo and remember the given filter-spec as the default
 * for subsequent fetches to this remote.
 */
-   if (!repository_format_partial_clone && filter_options.choice) {
+   if (!has_remote_odb() && filter_options.choice) {
partial_clone_register(remote->name, &filter_options);
return;
}
@@ -1330,7 +1331,7 @@ static inline void fetch_one_setup_partial(struct remote 
*remote)
 * We are currently limited to only ONE promisor remote and only
 * allow partial-fetches from the promisor remote.
 */
-   if (strcmp(remote->name, repository_format_partial_clone)) {
+   if (!find_odb_helper(remote->name)) {
if (filter_options.choice)
die(_("--filter can only be used with the remote 
configured in core.partialClone"));
return;
@@ -1461,7 +1462,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
if (depth || deepen_since || deepen_not.nr)
deepen = 1;
 
-   if (filter_options.choice && !repository_format_partial_clone)
+   if (filter_options.choice && !has_remote_odb())
die("--filter can only be used when extensions.partialClone is 
set");
 
if (all) {
@@ -1495,7 +1496,7 @@ int cmd_fetch(int argc, const char **argv, const char 
*prefix)
}
 
if (remote) {
-   if (filter_options.choice || repository_format_partial_clone)
+   if (filter_options.choice || has_remote_odb())
fetch_one_setup_partial(remote);
result = fetch_one(remote, argc, argv, prune_tags_ok);
} else {
diff --git a/builtin/gc.c b/builtin/gc.c
index ccfb1ceaeb..02c783b514 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -26,6 +26,7 @@
 #include "pack-objects.h"
 #include "blob.h"
 #include "tree.h"
+#include "remote-odb.h"
 
 #define FAILED_RUN "failed to run %s"
 
@@ -619,7 +620,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 

[PATCH v1 02/11] Add initial remote odb support

2018-06-23 Thread Christian Couder
The remote-odb.{c,h} files will contain the functions
that are called by the rest of Git mostly from
"sha1-file.c" to access the objects managed by the
remote odbs.

The odb-helper.{c,h} files will contain the functions to
actually implement communication with either the internal
functions or the external scripts or processes that will
manage and provide remote git objects.

For now only infrastructure to create helpers from the
config and to check if there are remote odbs and odb
helpers is implemented.

We expect that there will not be a lot of helpers, so it
is ok to use a simple linked list to manage them.

Helped-by: Jeff King 
Signed-off-by: Christian Couder 
---
 Makefile |  2 ++
 odb-helper.c | 16 
 odb-helper.h | 19 ++
 remote-odb.c | 72 
 remote-odb.h |  7 +
 5 files changed, 116 insertions(+)
 create mode 100644 odb-helper.c
 create mode 100644 odb-helper.h
 create mode 100644 remote-odb.c
 create mode 100644 remote-odb.h

diff --git a/Makefile b/Makefile
index e4b503d259..8465e3ccd4 100644
--- a/Makefile
+++ b/Makefile
@@ -896,6 +896,8 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += odb-helper.o
+LIB_OBJS += remote-odb.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
 LIB_OBJS += packfile.o
diff --git a/odb-helper.c b/odb-helper.c
new file mode 100644
index 00..b4d403ffa9
--- /dev/null
+++ b/odb-helper.c
@@ -0,0 +1,16 @@
+#include "cache.h"
+#include "object.h"
+#include "argv-array.h"
+#include "odb-helper.h"
+#include "run-command.h"
+#include "sha1-lookup.h"
+
+struct odb_helper *odb_helper_new(const char *name, int namelen)
+{
+   struct odb_helper *o;
+
+   o = xcalloc(1, sizeof(*o));
+   o->name = xmemdupz(name, namelen);
+
+   return o;
+}
diff --git a/odb-helper.h b/odb-helper.h
new file mode 100644
index 00..4b792a3896
--- /dev/null
+++ b/odb-helper.h
@@ -0,0 +1,19 @@
+#ifndef ODB_HELPER_H
+#define ODB_HELPER_H
+
+/*
+ * An odb helper is a way to access a remote odb.
+ *
+ * Information in its fields comes from the config file [odb "NAME"]
+ * entries.
+ */
+struct odb_helper {
+   const char *name; /* odb..* */
+   const char *remote;   /* odb..promisorRemote */
+
+   struct odb_helper *next;
+};
+
+extern struct odb_helper *odb_helper_new(const char *name, int namelen);
+
+#endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
new file mode 100644
index 00..1dc165eaf6
--- /dev/null
+++ b/remote-odb.c
@@ -0,0 +1,72 @@
+#include "cache.h"
+#include "remote-odb.h"
+#include "odb-helper.h"
+#include "config.h"
+
+static struct odb_helper *helpers;
+static struct odb_helper **helpers_tail = &helpers;
+
+static struct odb_helper *find_or_create_helper(const char *name, int len)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (!strncmp(o->name, name, len) && !o->name[len])
+   return o;
+
+   o = odb_helper_new(name, len);
+   *helpers_tail = o;
+   helpers_tail = &o->next;
+
+   return o;
+}
+
+static int remote_odb_config(const char *var, const char *value, void *data)
+{
+   struct odb_helper *o;
+   const char *name;
+   int namelen;
+   const char *subkey;
+
+   if (parse_config_key(var, "odb", &name, &namelen, &subkey) < 0)
+   return 0;
+
+   o = find_or_create_helper(name, namelen);
+
+   if (!strcmp(subkey, "promisorremote"))
+   return git_config_string(&o->remote, var, value);
+
+   return 0;
+}
+
+static void remote_odb_init(void)
+{
+   static int initialized;
+
+   if (initialized)
+   return;
+   initialized = 1;
+
+   git_config(remote_odb_config, NULL);
+}
+
+struct odb_helper *find_odb_helper(const char *remote)
+{
+   struct odb_helper *o;
+
+   remote_odb_init();
+
+   if (!remote)
+   return helpers;
+
+   for (o = helpers; o; o = o->next)
+   if (!strcmp(o->remote, remote))
+   return o;
+
+   return NULL;
+}
+
+int has_remote_odb(void)
+{
+   return !!find_odb_helper(NULL);
+}
diff --git a/remote-odb.h b/remote-odb.h
new file mode 100644
index 00..4c7b13695f
--- /dev/null
+++ b/remote-odb.h
@@ -0,0 +1,7 @@
+#ifndef REMOTE_ODB_H
+#define REMOTE_ODB_H
+
+extern struct odb_helper *find_odb_helper(const char *remote);
+extern int has_remote_odb(void);
+
+#endif /* REMOTE_ODB_H */
-- 
2.17.0.762.g886f54ea46



[PATCH v1 04/11] remote-odb: implement remote_odb_get_many_direct()

2018-06-23 Thread Christian Couder
This function will be used to get many objects from a promisor
remote.

Signed-off-by: Christian Couder 
---
 odb-helper.c | 15 +++
 odb-helper.h |  3 +++
 remote-odb.c | 17 +
 remote-odb.h |  1 +
 4 files changed, 36 insertions(+)

diff --git a/odb-helper.c b/odb-helper.c
index 99b5a345e8..246ebf8f7a 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -28,3 +28,18 @@ int odb_helper_get_direct(struct odb_helper *o,
 
return res;
 }
+
+int odb_helper_get_many_direct(struct odb_helper *o,
+  const struct oid_array *to_get)
+{
+   int res;
+   uint64_t start;
+
+   start = getnanotime();
+
+   res = fetch_objects(o->remote, to_get);
+
+   trace_performance_since(start, "odb_helper_get_many_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 4c52e81ce8..154ce4c7e4 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -17,4 +17,7 @@ struct odb_helper {
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
 extern int odb_helper_get_direct(struct odb_helper *o,
 const unsigned char *sha1);
+extern int odb_helper_get_many_direct(struct odb_helper *o,
+ const struct oid_array *to_get);
+
 #endif /* ODB_HELPER_H */
diff --git a/remote-odb.c b/remote-odb.c
index a34537c05c..cc7059b23a 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -87,3 +87,20 @@ int remote_odb_get_direct(const unsigned char *sha1)
 
return -1;
 }
+
+int remote_odb_get_many_direct(const struct oid_array *to_get)
+{
+   struct odb_helper *o;
+
+   trace_printf("trace: remote_odb_get_many_direct: nr: %d", to_get->nr);
+
+   remote_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_many_direct(o, to_get) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/remote-odb.h b/remote-odb.h
index c5384c922d..e10a8bf855 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -4,5 +4,6 @@
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
+extern int remote_odb_get_many_direct(const struct oid_array *to_get);
 
 #endif /* REMOTE_ODB_H */
-- 
2.17.0.762.g886f54ea46



[PATCH v1 05/11] remote-odb: add remote_odb_reinit()

2018-06-23 Thread Christian Couder
We will need to reinitialize the remote odb configuration
as we will make some changes to it in a later commit when
we will detect that a remote is also a remote odb.

Signed-off-by: Christian Couder 
---
 remote-odb.c | 14 --
 remote-odb.h |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/remote-odb.c b/remote-odb.c
index cc7059b23a..2b93d13abd 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -39,17 +39,27 @@ static int remote_odb_config(const char *var, const char 
*value, void *data)
return 0;
 }
 
-static void remote_odb_init(void)
+static void remote_odb_do_init(int force)
 {
static int initialized;
 
-   if (initialized)
+   if (!force && initialized)
return;
initialized = 1;
 
git_config(remote_odb_config, NULL);
 }
 
+static inline void remote_odb_init(void)
+{
+   remote_odb_do_init(0);
+}
+
+inline void remote_odb_reinit(void)
+{
+   remote_odb_do_init(1);
+}
+
 struct odb_helper *find_odb_helper(const char *remote)
 {
struct odb_helper *o;
diff --git a/remote-odb.h b/remote-odb.h
index e10a8bf855..79803782af 100644
--- a/remote-odb.h
+++ b/remote-odb.h
@@ -1,6 +1,7 @@
 #ifndef REMOTE_ODB_H
 #define REMOTE_ODB_H
 
+extern void remote_odb_reinit(void);
 extern struct odb_helper *find_odb_helper(const char *remote);
 extern int has_remote_odb(void);
 extern int remote_odb_get_direct(const unsigned char *sha1);
-- 
2.17.0.762.g886f54ea46



Re: [PATCH v1 2/8] Add initial odb remote support

2018-06-23 Thread Christian Couder
On Tue, May 15, 2018 at 3:44 AM, Junio C Hamano  wrote:
> Christian Couder  writes:

>> --- /dev/null
>> +++ b/odb-helper.h
>> @@ -0,0 +1,13 @@
>> +#ifndef ODB_HELPER_H
>> +#define ODB_HELPER_H
>
> Here is a good space to write a comment on what this structure and
> its fields are about.  Who are the dealers of helpers anyway?

Ok I added a few comments and renamed "dealer" to "remote".

>> +static struct odb_helper *helpers;
>> +static struct odb_helper **helpers_tail = &helpers;
>> +
>> +static struct odb_helper *find_or_create_helper(const char *name, int len)
>> +{
>> + struct odb_helper *o;
>> +
>> + for (o = helpers; o; o = o->next)
>> + if (!strncmp(o->name, name, len) && !o->name[len])
>> + return o;
>> +
>> + o = odb_helper_new(name, len);
>> + *helpers_tail = o;
>> + helpers_tail = &o->next;
>> +
>> + return o;
>> +}
>
> This is a tangent, but I wonder if we can do better than
> hand-rolling these singly-linked list of custom structure types
> every time we add new code.  I am just guessing (because it is not
> described in the log message) that the expectation is to have only
> just a handful of helpers so looking for a helper by name is OK at
> O(n), as long as we very infrequently look up a helper by name.

Yeah, I think there will be very few helpers. I added a comment in the
version I will send really soon now.

>> + if (!strcmp(subkey, "promisorremote"))
>> + return git_config_string(&o->dealer, var, value);
>
> If the field is meant to record the name of the promisor remote,
> then it is better to name it as such, no?

Yeah, it is renamed "remote" now.

>> + for (o = helpers; o; o = o->next)
>> + if (!strcmp(o->dealer, dealer))
>> + return o;
>
> The code to create a new helper tried to avoid creating a helper
> with duplicated name, but nobody tries to create more than one
> helpers that point at the same promisor remote.  Yet here we try to
> grab the first one that happens to point at the given promisor.
>
> That somehow smells wrong.

For each promisor remote I think it makes no sense to have more than
one remote odb pointing to it. So I am not sure what to do here.


[ANNOUNCE] Git Rev News edition 40

2018-06-20 Thread Christian Couder
Hi everyone,

The 40th edition of Git Rev News is now published:

  https://git.github.io/rev_news/2018/06/20/edition-40/

Thanks a lot to all the contributors: Adam Spiers, Bryan Turner and
Nicolas Pitre!

Enjoy,
Christian, Jakub, Markus and Gabriel.


Re: [GSoC][PATCH 1/3] sequencer: add a new function to silence a command, except if it fails.

2018-06-18 Thread Christian Couder
Hi Alban,

On Mon, Jun 18, 2018 at 3:18 PM, Alban Gruin  wrote:
> This adds a new function, run_command_silent_if_successful(),

He re the function is called run_command_silent_if_successful()...

> to
> redirect the stdout and stderr of a command to a strbuf, and then to run
> that command. This strbuf is printed only if the command fails. It is
> functionnaly similar to output() from git-rebase.sh.
>
> run_git_commit() is then refactored to use of
> run_command_silent_if_successful().

...here also...

[...]

> +static int run_command_silent_on_success(struct child_process *cmd)

...but here it is called run_command_silent_on_success().

Thanks,
Christian.


Draft of Git Rev News edition 40

2018-06-18 Thread Christian Couder
Hi,

A draft of a new Git Rev News edition is available here:

  https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-40.md

Everyone is welcome to contribute in any section either by editing the
above page on GitHub and sending a pull request, or by commenting on
this GitHub issue:

  https://github.com/git/git.github.io/issues/295

You can also reply to this email.

In general all kinds of contribution, for example proofreading,
suggestions for articles or links, help on the issues in GitHub, and
so on, are very much appreciated.

This month it would be nice to have a small summary, or maybe just a
number of links, related to the security release on May 29th, and the
same related to the GitHub acquisition by Microsoft. Thanks in
advance!

I tried to cc everyone who appears in this edition, but maybe I missed
some people, sorry about that.

Jakub, Markus, Gabriel and me plan to publish this edition on
Wednesday June 20th.

Thanks,
Christian.


Re: Is NO_ICONV misnamed or is it broken?

2018-06-16 Thread Christian Couder
Hi,

On Fri, Jun 15, 2018 at 12:47 AM, Mahmoud Al-Qudsi  wrote:
> Hello list,
>
> With regards to the Makefile define/variable `NO_ICONV` - the Makefile
> comments imply that it should be used if "your libc doesn't properly support
> iconv," which could mean anything from "a patch will be applied" to "iconv
> won't be used."

b6e56eca8a (Allow building Git in systems without iconv, 2006-02-16)
which added NO_ICONV says:

Systems using some uClibc versions do not properly support
iconv stuff. This patch allows Git to be built on those
systems by passing NO_ICONV=YesPlease to make. The only
drawback is mailinfo won't do charset conversion in those
systems.

> Based off the name of the varibale, the assumption is that iconv is an
> optional dependency that can be omitted if compiled with NO_ICONV. However, in
> practice attempting to compile git with `make ... NO_ICONV=1` and libiconv not
> installed results in linker errors as follows:
>
> ```
> ~> make clean
> # omitted
> ~> make NO_ICONV=1
> # ommitted
> LINK git-credential-store
> /usr/bin/ld: cannot find -liconv
> cc: error: linker command failed with exit code 1 (use -v to see invocation)
> gmake: *** [Makefile:2327: git-credential-store] Error 1
> ```

Yeah, this might be an issue with the Makefile options.

> Am I misunderstanding the intended behavior when NO_ICONV is defined (i.e. it
> does not remove the dependency on libiconv) or is this a bug and iconv should
> not, in fact, be required?

It's difficult to tell from reading the comments and commit messages.

I think 597c9cc540 (Flatten tools/ directory to make build procedure
simpler., 2005-09-07) which introduces NEEDS_LIBICONV is even older
than the commit that introduced NO_ICONV (see above), so you might
want to play with NEEDS_LIBICONV too and see if it works better for
you.

(I understand that "you might want to play with such and such other
options" is perhaps not as helpful as what you expected, but I
previously tried to tighten the way we handle dependencies in the
Makefile and it was considered "too heavy handed". So yeah, we
consider it ok if people have to tinker a bit when they want to build
Git.)

I CC'ed the people involved in related commits. Maybe they can give
you a better answer. It might also help if you could tell us on which
OS/Platform and perhaps for which purpose you want to compile Git.

Best,
Christian.


Re: How to delete files and directories from git commit history?

2018-06-12 Thread Christian Couder
Hi,

On Tue, Jun 12, 2018 at 9:44 PM, Steve Litt  wrote:

> My project (call it myproject) had a directory (call it docs/propdir)
> that was unnecessary for the project, and I've decided I don't want to
> offer the files in that directory as free software. So I need to delete
> docs/propdir from all commits in the repository. I did the following,
> while in my working repository's myproject directory:
>
> git filter-branch --tree-filter 'rm -rf docs/propdir' HEAD

[...]

> What command do I do to remove all mention of doc/propdir and its
> files from my git history?

Did you check the "CHECKLIST FOR SHRINKING A REPOSITORY" section of
the filter-branch man/help page?

Best,
Christian.


Re: [RFC PATCH 2/2] sha1-name: add core.validateAbbrev & relative core.abbrev

2018-06-06 Thread Christian Couder
On Wed, Jun 6, 2018 at 12:27 PM, Ævar Arnfjörð Bjarmason
 wrote:

> +This setting changes that to `O(1)`, but with the trade-off that
> +depending on the value of `core.abbrev` way may be printing

s/way may be printing/we may be printing/

> +abbreviated hashes that collide. Too see how likely this is, try

s/Too see/To see/

> +running:

[...]

> +Even without `core.validateAbbrev=false` the results abbreviation
> +already a bit of a probability game.

s/the results abbreviation already a bit of/the resulting abbreviation
is already a bit of/ maybe?

> diff --git a/sha1-name.c b/sha1-name.c
> index 60d9ef3c7e..aa7ccea14d 100644
> --- a/sha1-name.c
> +++ b/sha1-name.c
> @@ -576,6 +576,7 @@ int find_unique_abbrev_r(char *hex, const struct 
> object_id *oid, int len)
> struct disambiguate_state ds;
> struct min_abbrev_data mad;
> struct object_id oid_ret;
> +   int dar = default_abbrev_relative;
> if (len < 0) {
> unsigned long count = approximate_object_count();
> /*
> @@ -602,6 +603,20 @@ int find_unique_abbrev_r(char *hex, const struct 
> object_id *oid, int len)
> if (len == GIT_SHA1_HEXSZ || !len)
> return GIT_SHA1_HEXSZ;
>
> +   if (dar) {
> +   if (len + dar < MINIMUM_ABBREV) {
> +   len = MINIMUM_ABBREV;
> +   dar = 0;
> +   }
> +
> +   if (validate_abbrev) {
> +   len += dar;
> +   } else {
> +   hex[len + dar] = 0;
> +   return len + dar;
> +   }

I wonder what happens if len + dar > GIT_SHA1_HEXSZ

> +   }


[PATCH] t9104: kosherly remove remote refs

2018-05-31 Thread Christian Couder
As there are plans to implement other ref storage systems,
let's use a way to remove remote refs that does not depend
on refs being files.

This makes it clear to readers that this test does not
depend on which ref backend is used.

Suggested-by: Michael Haggerty 
Helped-by: Jeff King 
Signed-off-by: Christian Couder 
---
This was suggested and discussed in:

https://public-inbox.org/git/20180525085906.ga2...@sigill.intra.peff.net/

 t/t9104-git-svn-follow-parent.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/t/t9104-git-svn-follow-parent.sh b/t/t9104-git-svn-follow-parent.sh
index 9c49b6c1fe..5e0ad19177 100755
--- a/t/t9104-git-svn-follow-parent.sh
+++ b/t/t9104-git-svn-follow-parent.sh
@@ -215,7 +215,9 @@ test_expect_success "multi-fetch continues to work" "
"
 
 test_expect_success "multi-fetch works off a 'clean' repository" '
-   rm -rf "$GIT_DIR/svn" "$GIT_DIR/refs/remotes" &&
+   rm -rf "$GIT_DIR/svn" &&
+   git for-each-ref --format="option no-deref%0adelete %(refname)" 
refs/remotes |
+   git update-ref --stdin &&
git reflog expire --all --expire=all &&
mkdir "$GIT_DIR/svn" &&
git svn multi-fetch
-- 
2.17.0.1035.g12039e008f



Re: Bug: Install from .tar.xz fails without write permission on /usr/local/share/man/man3

2018-05-31 Thread Christian Couder
Hi,

On Thu, May 31, 2018 at 6:30 PM,   wrote:
>
> I was trying to build git 2.9.5 as a normal user, as I have no root access
> on a cluster with outdated software.
>
> The build fails, unless I change the PREFIX=/usr/local line in
> per/perl.mak:80 to a folder where I have write permission.
> Apparently, perl.mak does not honour the --prefix= setting of ./configure.
>
> Is it possible to change perl.mak to honor the PREFIX?

I don't think we will support old versions like v2.9.X.

There was a security release and we only released v2.17.1, v2.13.7,
v2.14.4, v2.15.2 and v2.16.4:

https://public-inbox.org/git/xmqqy3g2flb6@gitster-ct.c.googlers.com/

So it looks like v2.13.X is the oldest version we support. Do you
really need v2.9.5?

Best,
Christian.


Re: [GSoC] GSoC with git, week 4

2018-05-27 Thread Christian Couder
Hi Alban,

On Sat, May 26, 2018 at 6:32 PM, Alban Gruin  wrote:
>
> I published my blog post about this week. You can read it here:
>
> https://blog.pa1ch.fr/posts/2018/05/26/en/gsoc2018-week-4.html
>
> All comments are welcome!

Thanks for publishing a nice update!

Christian.


[PATCH] t990X: use '.git/objects' as 'deep inside .git' path

2018-05-25 Thread Christian Couder
Tests t9902-completion.sh and t9903-bash-prompt.sh each have tests
that check what happens when we are "in the '.git' directory" and
when we are "deep inside the '.git' directory".

To test the case when we are "deep inside the '.git' directory" the
test scripts used to perform a `cd .git/refs/heads`.

As there are plans to implement other ref storage systems, let's
use '.git/objects' instead of '.git/refs/heads' as the "deep inside
the '.git' directory" path.

This makes it clear to readers that these tests do not depend on
which ref backend is used.

The internals of the loose refs backend are still tested in
t1400-update-ref.sh.

Helped-by: SZEDER Gábor 
Signed-off-by: David Turner 
Signed-off-by: Christian Couder 
---
 t/t9902-completion.sh  | 2 +-
 t/t9903-bash-prompt.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh
index 36deb0b123..a28640ce1a 100755
--- a/t/t9902-completion.sh
+++ b/t/t9902-completion.sh
@@ -181,7 +181,7 @@ test_expect_success '__git_find_repo_path - cwd is a .git 
directory' '
 test_expect_success '__git_find_repo_path - parent is a .git directory' '
echo "$ROOT/.git" >expected &&
(
-   cd .git/refs/heads &&
+   cd .git/objects &&
__git_find_repo_path &&
echo "$__git_repo_path" >"$actual"
) &&
diff --git a/t/t9903-bash-prompt.sh b/t/t9903-bash-prompt.sh
index 8f5c811dd7..c3b89ae783 100755
--- a/t/t9903-bash-prompt.sh
+++ b/t/t9903-bash-prompt.sh
@@ -148,7 +148,7 @@ test_expect_success 'prompt - inside .git directory' '
 test_expect_success 'prompt - deep inside .git directory' '
printf " (GIT_DIR!)" >expected &&
(
-   cd .git/refs/heads &&
+   cd .git/objects &&
__git_ps1 >"$actual"
) &&
test_cmp expected "$actual"
-- 
2.17.0.1181.g093e983b05.dirty



Re: [PATCH v2 4/5] config doc: mention future aspirations for transfer.fsckObjects

2018-05-25 Thread Christian Couder
On Fri, May 25, 2018 at 9:28 PM, Ævar Arnfjörð Bjarmason
 wrote:
>
> Jeff King has said on at least a couple of occasions (at least one of
> Let's note that in the documentation so we don't seem to be claiming
> that this is by design. A previous version of this change called the
> current behavior a "bug", that's probably too strong a claim, but I
> don't think anyone would dislike a hypothetical local quarantine
> patch, so let's we might change this in the future.

Maybe: s/so let's we might/so let's say we might/


Re: [PATCH v2] t: make many tests depend less on the refs being files

2018-05-25 Thread Christian Couder
On Fri, May 25, 2018 at 11:05 AM, Michael Haggerty  wrote:
> On Fri, May 25, 2018 at 10:59 AM, Jeff King  wrote:
>> On Fri, May 25, 2018 at 10:48:04AM +0200, Michael Haggerty wrote:
>>
>>> >  test_expect_success "multi-fetch works off a 'clean' repository" '
>>> > -   rm -r "$GIT_DIR/svn" "$GIT_DIR/refs/remotes" "$GIT_DIR/logs" &&
>>> > +   rm -rf "$GIT_DIR/svn" "$GIT_DIR/refs/remotes" &&
>>> > +   git reflog expire --all --expire=all &&
>>> > mkdir "$GIT_DIR/svn" &&
>>> > git svn multi-fetch
>>> > '
>>> >
>>>
>>> `rm -rf "$GIT_DIR/refs/remotes"` is not kosher. I think it can be written
>>>
>>> printf 'option no-deref\ndelete %s\n' $(git for-each-ref
>>> --format='%(refname)' refs/remotes) | git update-ref --stdin
>>>
>>> as long as the number of references doesn't exceed command-line limits.
>>> This will also take care of the reflogs. Another alternative would be to
>>> write it as a loop.
>>
>> Perhaps:
>>
>>   git for-each-ref --format="option no-deref%0adelete %(refname)" 
>> refs/remotes |
>>   git update-ref --stdin
>
> Ah yes, that's nicer. I tried with `\n`, but that's not supported
> (wouldn't it be nice if it were?). I didn't think to try `%0a` (let
> alone look in the documentation!)

Thanks both for this suggestion. I plan to use it in another patch.


Re: [PATCH v2] t: make many tests depend less on the refs being files

2018-05-25 Thread Christian Couder
On Fri, May 25, 2018 at 10:48 AM, Michael Haggerty  wrote:
> On 05/23/2018 07:25 AM, Christian Couder wrote:
>>
>> diff --git a/t/t1401-symbolic-ref.sh b/t/t1401-symbolic-ref.sh
>> index 9e782a8122..a4ebb0b65f 100755
>> --- a/t/t1401-symbolic-ref.sh
>> +++ b/t/t1401-symbolic-ref.sh
>> @@ -65,7 +65,7 @@ reset_to_sane
>>  test_expect_success 'symbolic-ref fails to delete real ref' '
>>   echo "fatal: Cannot delete refs/heads/foo, not a symbolic ref" >expect 
>> &&
>>   test_must_fail git symbolic-ref -d refs/heads/foo >actual 2>&1 &&
>> - test_path_is_file .git/refs/heads/foo &&
>> + git rev-parse --verify refs/heads/foo &&
>>   test_cmp expect actual
>>  '
>>  reset_to_sane
>
> Should t1401 be considered a backend-agnostic test, or is it needed to
> ensure that symbolic refs are written correctly in the files backend?

I don't know. And I am ok to go either way. Another possibility would
be to split in two parts.


Re: git difftool with symlink to readonly jar failed

2018-05-24 Thread Christian Couder
Hi,

On Thu, May 24, 2018 at 11:11 PM, Etienne d'Hautefeuille
 wrote:
>
> #try  a diff
> git difftool --dir-diff 4cb98b4a307ce97d9e6b8e4b03211fa5ca8af7e7
> 0244799661b993b1f78fa5afb621de3fe4c4a39c
> fatal: impossible d'ouvrir '/tmp/git-difftool.UQ4mqo/left/jenkins.war' en
> écriture: Permission non accordée

You should use LANG=C so that people can understand the error message.

Also git difftool launches another program that will actually perform
the diff. It looks like it is bcompare on your setup. Did you try with
another program?


Re: [PATCH] t: make many tests depend less on the refs being files

2018-05-22 Thread Christian Couder
On Mon, May 21, 2018 at 9:34 PM, Stefan Beller  wrote:
> On Sun, May 20, 2018 at 10:51 PM, Christian Couder
>  wrote:
>> From: David Turner 
>>
>> So that they work under alternate ref storage backends.
>
> Sometimes I have a disconnect between the subject and the commit
> message, (e.g. in an email reader the subject is not displayed accurately on
> top of the message).
>
> So I would prefer if the first part of the body message is an actual
> sentence, and
> not a continuum from the subject.
>
> Maybe elaborate a bit more:
>
>   The current tests are very focused on the file system representation of
>   the loose and packed refs code.  As there are plans to implement other
>   ref storage systems, migrate most tests to a form that test the intent of 
> the
>   refs storage system instead of it internals. The internals of the loose and
>   packed refs are tested in , whereas the tests in this patch focus
>   on testing other aspects.

Thanks for this suggestion!

>> This will be really needed when such alternate ref storage backends are
>> developed. But this could already help by making clear to readers that
>> some tests do not depend on which ref backend is used.
>
> Ah, this is what I picked up already in the suggested edit above. :/

I actually mixed parts of your suggested message with parts of the
existing message in the V2 I just sent:

https://public-inbox.org/git/20180523052517.4443-1-chrisc...@tuxfamily.org/


Re: [PATCH] t: make many tests depend less on the refs being files

2018-05-22 Thread Christian Couder
On Mon, May 21, 2018 at 1:49 PM, SZEDER Gábor  wrote:
>> > diff --git a/t/t9903-bash-prompt.sh b/t/t9903-bash-prompt.sh
>> > index 8f5c811dd7..c3b89ae783 100755
>> > --- a/t/t9903-bash-prompt.sh
>> > +++ b/t/t9903-bash-prompt.sh
>> > @@ -148,7 +148,7 @@ test_expect_success 'prompt - inside .git directory' '
>> >  test_expect_success 'prompt - deep inside .git directory' '
>> > printf " (GIT_DIR!)" >expected &&
>> > (
>> > -   cd .git/refs/heads &&
>> > +   cd .git/objects &&
>> > __git_ps1 >"$actual"
>> > ) &&
>> > test_cmp expected "$actual"
>>
>> This one looks wrong.
>
> It's right, though.
>
>> Why not `cd .git` instead?
>
> That case is covered in the previous test.
>
> Once upon a time it mattered how deep we were in the .git directory
> when running __git_ps1(), because it executed different branches of a
> long-ish if-elif chain.  For the prompt it doesn't matter anymore,
> because those ifs were eliminated, but for the completion script it
> still does, which brings us to:

Thanks for the explanations!

> Christian!  There is a similar test, '__git_find_repo_path - parent is
> a .git directory' in 't9902-completion.sh', which, too, performs a 'cd
> .git/refs/heads'.

Ok, I will take care of both of these tests in another patch.


<    1   2   3   4   5   6   7   8   9   10   >