from:"Christian Couder"

[PATCH v2 02/36] sha1_file: add prepare_external_alt_odb()

2018-03-19 Thread Christian Couder

This new function adds the external odb cache to all
the other odbs.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h |  1 +
 sha1_file.c | 17 +
 2 files changed, 18 insertions(+)

diff --git a/cache.h b/cache.h
index d06932ed0b..2ac7d63e5c 100644
--- a/cache.h
+++ b/cache.h
@@ -1583,6 +1583,7 @@ extern void prepare_alt_odb(void);
 extern char *compute_alternate_path(const char *path, struct strbuf *err);
 typedef int alt_odb_fn(struct alternate_object_database *, void *);
 extern int foreach_alt_odb(alt_odb_fn, void*);
+extern void prepare_external_alt_odb(void);
 
 /*
  * Allocate a "struct alternate_object_database" but do _not_ actually
diff --git a/sha1_file.c b/sha1_file.c
index 1b94f39c4c..3f00fc716e 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -29,6 +29,7 @@
 #include "quote.h"
 #include "packfile.h"
 #include "fetch-object.h"
+#include "external-odb.h"
 
 const unsigned char null_sha1[GIT_MAX_RAWSZ];
 const struct object_id null_oid;
@@ -663,6 +664,21 @@ int foreach_alt_odb(alt_odb_fn fn, void *cb)
return r;
 }
 
+void prepare_external_alt_odb(void)
+{
+   static int linked_external;
+   const char *path;
+
+   if (linked_external)
+   return;
+
+   path = external_odb_root();
+   if (!access(path, F_OK)) {
+   link_alt_odb_entry(path, NULL, 0, "");
+   linked_external = 1;
+   }
+}
+
 void prepare_alt_odb(void)
 {
const char *alt;
@@ -676,6 +692,7 @@ void prepare_alt_odb(void)
link_alt_odb_entries(alt, PATH_SEP, NULL, 0);
 
read_info_alternates(get_object_directory(), 0);
+   prepare_external_alt_odb();
 }
 
 /* Returns 1 if we have successfully freshened the file, 0 otherwise. */
-- 
2.17.0.rc0.37.g8f476fabe9

[PATCH v2 04/36] external-odb: add has_external_odb()

2018-03-19 Thread Christian Couder

This function will be used to check if the external odb
mechanism is actually used.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 7 +++
 external-odb.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/external-odb.c b/external-odb.c
index 390958dbfe..d26e63d8b1 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -50,6 +50,13 @@ static void external_odb_init(void)
git_config(external_odb_config, NULL);
 }
 
+int has_external_odb(void)
+{
+   external_odb_init();
+
+   return !!helpers;
+}
+
 const char *external_odb_root(void)
 {
static const char *root;
diff --git a/external-odb.h b/external-odb.h
index ae2b228792..9a3c2f01b3 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -1,6 +1,7 @@
 #ifndef EXTERNAL_ODB_H
 #define EXTERNAL_ODB_H
 
+extern int has_external_odb(void);
 extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
 
-- 
2.17.0.rc0.37.g8f476fabe9

[PATCH v2 07/36] odb-helper: add 'enum odb_helper_type'

2018-03-19 Thread Christian Couder

As there will be different kinds of helpers, let's add
an "enum odb_helper_type" to tell between the different
kinds.

Let's add a field with this type in "struct odb_helper",
and set it when reading the config file.

While at it let's also make it possible to find an helper
of a specific kind by adding a new find_odb_helper()
function.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 13 +
 external-odb.h | 10 ++
 odb-helper.h   |  3 +++
 3 files changed, 26 insertions(+)

diff --git a/external-odb.c b/external-odb.c
index 5d0afb9762..9c77180d6c 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -57,6 +57,19 @@ int has_external_odb(void)
return !!helpers;
 }
 
+struct odb_helper *find_odb_helper(const char *dealer, enum odb_helper_type 
type)
+{
+   struct odb_helper *o;
+
+   external_odb_init();
+
+   for (o = helpers; o; o = o->next)
+   if (!strcmp(o->dealer, dealer) && o->type == type)
+   return o;
+
+   return NULL;
+}
+
 const char *external_odb_root(void)
 {
static const char *root;
diff --git a/external-odb.h b/external-odb.h
index fd6708163e..27c3d39c1b 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -1,7 +1,17 @@
 #ifndef EXTERNAL_ODB_H
 #define EXTERNAL_ODB_H
 
+enum odb_helper_type {
+   ODB_HELPER_NONE = 0,
+   ODB_HELPER_GIT_REMOTE,
+   ODB_HELPER_SCRIPT_CMD,
+   ODB_HELPER_SUBPROCESS_CMD,
+   OBJ_HELPER_MAX
+};
+
 extern int has_external_odb(void);
+extern struct odb_helper *find_odb_helper(const char *dealer,
+ enum odb_helper_type type);
 extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
 extern int external_odb_get_direct(const unsigned char *sha1);
diff --git a/odb-helper.h b/odb-helper.h
index 57b14fe814..d4b968693f 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -1,9 +1,12 @@
 #ifndef ODB_HELPER_H
 #define ODB_HELPER_H
 
+#include "external-odb.h"
+
 struct odb_helper {
const char *name;
const char *dealer;
+   enum odb_helper_type type;
 
struct odb_helper_object {
struct object_id oid;
-- 
2.17.0.rc0.37.g8f476fabe9

[PATCH v2 05/36] external-odb: implement external_odb_get_direct

2018-03-19 Thread Christian Couder

This is implemented only in the promisor remote mode
for now by calling fetch_object().

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 15 +++
 external-odb.h |  1 +
 odb-helper.c   | 13 +
 odb-helper.h   |  3 ++-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/external-odb.c b/external-odb.c
index d26e63d8b1..5d0afb9762 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -76,3 +76,18 @@ int external_odb_has_object(const unsigned char *sha1)
return 1;
return 0;
 }
+
+int external_odb_get_direct(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   external_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_direct(o, sha1) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/external-odb.h b/external-odb.h
index 9a3c2f01b3..fd6708163e 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -4,5 +4,6 @@
 extern int has_external_odb(void);
 extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
+extern int external_odb_get_direct(const unsigned char *sha1);
 
 #endif /* EXTERNAL_ODB_H */
diff --git a/odb-helper.c b/odb-helper.c
index 67196e6317..c6127ce81f 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -4,6 +4,7 @@
 #include "odb-helper.h"
 #include "run-command.h"
 #include "sha1-lookup.h"
+#include "fetch-object.h"
 
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
@@ -52,3 +53,15 @@ int odb_helper_has_object(struct odb_helper *o, const 
unsigned char *sha1)
return !!odb_helper_lookup(o, sha1);
 }
 
+int odb_helper_get_direct(struct odb_helper *o,
+ const unsigned char *sha1)
+{
+   int res = 0;
+   uint64_t start = getnanotime();
+
+   fetch_object(o->dealer, sha1);
+
+   trace_performance_since(start, "odb_helper_get_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index e3c9c1cfe4..57b14fe814 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -20,5 +20,6 @@ struct odb_helper {
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
 extern int odb_helper_has_object(struct odb_helper *o,
 const unsigned char *sha1);
-
+extern int odb_helper_get_direct(struct odb_helper *o,
+const unsigned char *sha1);
 #endif /* ODB_HELPER_H */
-- 
2.17.0.rc0.37.g8f476fabe9

[PATCH v2 01/36] Add initial external odb support

2018-03-19 Thread Christian Couder

The external-odb.{c,h} files will contain the functions
that are called by the rest of Git mostly from
"sha1_file.c" to access the objects managed by the
external odbs.

The odb-helper.{c,h} files will contain the functions to
actually implement communication with either the internal
functions or the external scripts or processes that will
manage and provide external git objects.

For now only infrastructure to create helpers from the
config and to manage a cache for the 'have' command is
implemented.

Helped-by: Jeff King <p...@peff.net>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Makefile   |  2 ++
 external-odb.c | 72 ++
 external-odb.h |  7 +
 odb-helper.c   | 54 +
 odb-helper.h   | 24 +
 5 files changed, 159 insertions(+)
 create mode 100644 external-odb.c
 create mode 100644 external-odb.h
 create mode 100644 odb-helper.c
 create mode 100644 odb-helper.h

diff --git a/Makefile b/Makefile
index a1d8775adb..11dd620155 100644
--- a/Makefile
+++ b/Makefile
@@ -808,6 +808,7 @@ LIB_OBJS += ewah/ewah_bitmap.o
 LIB_OBJS += ewah/ewah_io.o
 LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec_cmd.o
+LIB_OBJS += external-odb.o
 LIB_OBJS += fetch-object.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
@@ -843,6 +844,7 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += odb-helper.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
 LIB_OBJS += packfile.o
diff --git a/external-odb.c b/external-odb.c
new file mode 100644
index 00..f3ea491333
--- /dev/null
+++ b/external-odb.c
@@ -0,0 +1,72 @@
+#include "cache.h"
+#include "external-odb.h"
+#include "odb-helper.h"
+#include "config.h"
+
+static struct odb_helper *helpers;
+static struct odb_helper **helpers_tail = 
+
+static struct odb_helper *find_or_create_helper(const char *name, int len)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (!strncmp(o->name, name, len) && !o->name[len])
+   return o;
+
+   o = odb_helper_new(name, len);
+   *helpers_tail = o;
+   helpers_tail = >next;
+
+   return o;
+}
+
+static int external_odb_config(const char *var, const char *value, void *data)
+{
+   struct odb_helper *o;
+   const char *name;
+   int namelen;
+   const char *subkey;
+
+   if (parse_config_key(var, "odb", , , ) < 0)
+   return 0;
+
+   o = find_or_create_helper(name, namelen);
+
+   if (!strcmp(subkey, "promisorremote"))
+   return git_config_string(>dealer, var, value);
+
+   return 0;
+}
+
+static void external_odb_init(void)
+{
+   static int initialized;
+
+   if (initialized)
+   return;
+   initialized = 1;
+
+   git_config(external_odb_config, NULL);
+}
+
+const char *external_odb_root(void)
+{
+   static const char *root;
+   if (!root)
+   root = git_pathdup("objects/external");
+   return root;
+}
+
+int external_odb_has_object(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   external_odb_init();
+
+   for (o = helpers; o; o = o->next)
+   if (odb_helper_has_object(o, sha1))
+   return 1;
+   return 0;
+}
+
diff --git a/external-odb.h b/external-odb.h
new file mode 100644
index 00..ae2b228792
--- /dev/null
+++ b/external-odb.h
@@ -0,0 +1,7 @@
+#ifndef EXTERNAL_ODB_H
+#define EXTERNAL_ODB_H
+
+extern const char *external_odb_root(void);
+extern int external_odb_has_object(const unsigned char *sha1);
+
+#endif /* EXTERNAL_ODB_H */
diff --git a/odb-helper.c b/odb-helper.c
new file mode 100644
index 00..67196e6317
--- /dev/null
+++ b/odb-helper.c
@@ -0,0 +1,54 @@
+#include "cache.h"
+#include "object.h"
+#include "argv-array.h"
+#include "odb-helper.h"
+#include "run-command.h"
+#include "sha1-lookup.h"
+
+struct odb_helper *odb_helper_new(const char *name, int namelen)
+{
+   struct odb_helper *o;
+
+   o = xcalloc(1, sizeof(*o));
+   o->name = xmemdupz(name, namelen);
+
+   return o;
+}
+
+struct odb_helper_cmd {
+   struct argv_array argv;
+   struct child_process child;
+};
+
+static void odb_helper_load_have(struct odb_helper *o)
+{
+   if (o->have_valid)
+   return;
+   o->have_valid = 1;
+
+   /* TODO */
+}
+
+static const unsigned char *have_sha1_access(size_t index, void *table)
+{
+   struct odb_helper_object *have = table;
+   return have[index].oid.hash;
+}
+
+static struct odb_helper_object *odb_helper_lookup(struct odb_helper *o,
+  const unsigned char *sha1)
+{
+   int idx;
+
+   odb_helper_l

[PATCH v2 06/36] sha1_file: prepare for external odbs

2018-03-19 Thread Christian Couder

In the following commits we will need some functions that were
internal to sha1_file.c, so let's first make them non static
and declare them in "cache.h". While at it, let's rename
'create_tmpfile()' to 'create_object_tmpfile()' to make its
name less generic.

Let's also split out 'sha1_file_name_alt()' from
'sha1_file_name()' and 'open_sha1_file_alt()' from
'open_sha1_file()', as we will need both of these new
functions too.

Helped-by: Jeff King <p...@peff.net>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h |  8 
 sha1_file.c | 49 ++---
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/cache.h b/cache.h
index 4aa0129cf8..e6fd9b97ec 100644
--- a/cache.h
+++ b/cache.h
@@ -951,6 +951,12 @@ extern void check_repository_format(void);
  */
 extern void sha1_file_name(struct strbuf *buf, const unsigned char *sha1);
 
+/*
+ * Like sha1_file_name, but put in `buf` the filename within a
+ * specific alternate object directory.
+ */
+extern void sha1_file_name_alt(struct strbuf *buf, const char *objdir, const 
unsigned char *sha1);
+
 /*
  * Return an abbreviated sha1 unique within this repository's object database.
  * The result will be at least `len` characters long, and will be NUL
@@ -1247,6 +1253,8 @@ extern int parse_sha1_header(const char *hdr, unsigned 
long *sizep);
 
 extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned 
long size, const char *type);
 
+extern int create_object_tmpfile(struct strbuf *tmp, const char *filename);
+extern void close_sha1_file(int fd);
 extern int finalize_object_file(const char *tmpfile, const char *filename);
 
 /*
diff --git a/sha1_file.c b/sha1_file.c
index f2e078e840..c460eb2658 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -320,13 +320,18 @@ static void fill_sha1_path(struct strbuf *buf, const 
unsigned char *sha1)
}
 }
 
-void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
+void sha1_file_name_alt(struct strbuf *buf, const char *objdir, const unsigned 
char *sha1)
 {
-   strbuf_addstr(buf, get_object_directory());
+   strbuf_addstr(buf, objdir);
strbuf_addch(buf, '/');
fill_sha1_path(buf, sha1);
 }
 
+void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
+{
+   return sha1_file_name_alt(buf, get_object_directory(), sha1);
+}
+
 struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
 {
strbuf_setlen(>scratch, alt->base_len);
@@ -905,6 +910,25 @@ static int stat_sha1_file(const unsigned char *sha1, 
struct stat *st,
return -1;
 }
 
+static int open_sha1_file_alt(const unsigned char *sha1, const char **path)
+{
+   struct alternate_object_database *alt;
+   int most_interesting_errno = errno;
+
+   prepare_alt_odb();
+   for (alt = alt_odb_list; alt; alt = alt->next) {
+   int fd;
+   *path = alt_sha1_path(alt, sha1);
+   fd = git_open(*path);
+   if (fd >= 0)
+   return fd;
+   if (most_interesting_errno == ENOENT)
+   most_interesting_errno = errno;
+   }
+   errno = most_interesting_errno;
+   return -1;
+}
+
 /*
  * Like stat_sha1_file(), but actually open the object and return the
  * descriptor. See the caveats on the "path" parameter above.
@@ -912,8 +936,6 @@ static int stat_sha1_file(const unsigned char *sha1, struct 
stat *st,
 static int open_sha1_file(const unsigned char *sha1, const char **path)
 {
int fd;
-   struct alternate_object_database *alt;
-   int most_interesting_errno;
static struct strbuf buf = STRBUF_INIT;
 
strbuf_reset();
@@ -923,19 +945,8 @@ static int open_sha1_file(const unsigned char *sha1, const 
char **path)
fd = git_open(*path);
if (fd >= 0)
return fd;
-   most_interesting_errno = errno;
 
-   prepare_alt_odb();
-   for (alt = alt_odb_list; alt; alt = alt->next) {
-   *path = alt_sha1_path(alt, sha1);
-   fd = git_open(*path);
-   if (fd >= 0)
-   return fd;
-   if (most_interesting_errno == ENOENT)
-   most_interesting_errno = errno;
-   }
-   errno = most_interesting_errno;
-   return -1;
+   return open_sha1_file_alt(sha1, path);
 }
 
 /*
@@ -1537,7 +1548,7 @@ int hash_object_file(const void *buf, unsigned long len, 
const char *type,
 }
 
 /* Finalize a file on disk, and close it. */
-static void close_sha1_file(int fd)
+void close_sha1_file(int fd)
 {
if (fsync_object_files)
fsync_or_die(fd, "sha1 file");
@@ -1561,7 +1572,7 @@ static inline int directory_size(const char *filename)
  * We want to avoid cross-directory filename renames, because those
  * can have problems on various filesystems (FAT, NFS, Coda).
  */
-static in

Re: [PATCH 28/40] pack-objects: don't pack objects in external odbs

2018-03-19 Thread Christian Couder

On Thu, Jan 4, 2018 at 9:54 PM, Jeff Hostetler <g...@jeffhostetler.com> wrote:
>
> On 1/3/2018 11:33 AM, Christian Couder wrote:
>>
>> Objects managed by an external ODB should not be put into
>> pack files. They should be transfered using other mechanism
>> that can be specific to the external odb.
>>
>> Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
>> ---
>>   builtin/pack-objects.c | 4 
>>   1 file changed, 4 insertions(+)
>>
>> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
>> index 6c71552cdf..4ed66c7677 100644
>> --- a/builtin/pack-objects.c
>> +++ b/builtin/pack-objects.c
>> @@ -28,6 +28,7 @@
>>   #include "argv-array.h"
>>   #include "mru.h"
>>   #include "packfile.h"
>> +#include "external-odb.h"
>> static const char *pack_usage[] = {
>> N_("git pack-objects --stdout [...] [<  | <
>> ]"),
>> @@ -1026,6 +1027,9 @@ static int want_object_in_pack(const struct
>> object_id *oid,
>> return want;
>> }
>>   + if (external_odb_has_object(oid->hash))
>> +   return 0;
>
> I worry about the performance of this in light of my comments
> earlier in the patch series about the expense of building the
> "have" sets.

Building the have set is done only if the cap_have capability is used,
(see my answer to your comments on patch 15/40).

> Since we've already checked for a loose object and we are about
> to walk thru the local packfiles, so if we don't find it in any
> of them, then we don't have it locally.  Only then do we need
> to worry about external odbs.
>
> If we don't have it locally, does the caller of this function
> have sufficient "promisor" infomation infer that the object
> should exist on the promisor remote?   Since you're going to
> omit it from the packfile anyway, you don't really need to know
> if the remote actually has it.

Yeah, I think it works in the same way as how the promisor code works,
but I haven't checked carefully, so I will take a look at that.

Thanks,
Christian.

Re: [PATCH 15/40] external-odb: add script mode support

2018-03-19 Thread Christian Couder

On Thu, Jan 4, 2018 at 8:55 PM, Jeff Hostetler <g...@jeffhostetler.com> wrote:
>
> On 1/3/2018 11:33 AM, Christian Couder wrote:
>>
>> diff --git a/odb-helper.c b/odb-helper.c
>> index 4b70b287af..c1a3443dc7 100644
>> --- a/odb-helper.c
>> +++ b/odb-helper.c
>> @@ -21,13 +21,124 @@ struct odb_helper_cmd {
>> struct child_process child;
>>   };
>>   +/*
>> + * Callers are responsible to ensure that the result of vaddf(fmt, ap)
>> + * is properly shell-quoted.
>> + */
>> +static void prepare_helper_command(struct argv_array *argv, const char
>> *cmd,
>> +  const char *fmt, va_list ap)
>> +{
>> +   struct strbuf buf = STRBUF_INIT;
>> +
>> +   strbuf_addstr(, cmd);
>> +   strbuf_addch(, ' ');
>> +   strbuf_vaddf(, fmt, ap);
>
> I do find this a bit odd that you're putting the cmd, a space,
> and the printf results into a single arg in the argv, rather than
> directly loading up the argv.
>
> Is there an issue with the whitespace between the cmd and the
> printf result being in the same arg -- especially if there are
> quoting issues in the fmt string as you mention in the comment
> above?  (not sure, just asking)

This was discussed with Junio here:

https://public-inbox.org/git/xmqqmvggbl6m@gitster.mtv.corp.google.com/

I agree that I should take another look at it though. I will do that
in the next version after the one I will send really soon now.

>> +static int parse_object_line(struct odb_helper_object *o, const char
>> *line)
>> +{
>
> Is there a reason to order the fields this way?  In the test
> at the bottom of this commit, you take cat-file output and
> re-order the columns with awk.   I'm just wondering if we kept
> cat-file ordering in the format here, we could simplify things.

Yeah, maybe the shell script could be simplified while the C code
would not be more complex. I don't remember if that was in the
original version from Peff or if there was a reason to do it this way.
I will take a look at that in the next version after the one I will
send really soon now.

>> +   char *end;
>> +   if (get_sha1_hex(line, o->sha1) < 0)
>> +   return -1;
>> +
>> +   line += 40;
>> +   if (*line++ != ' ')
>> +   return -1;
>> +
>> +   o->size = strtoul(line, , 10);
>> +   if (line == end || *end++ != ' ')
>> +   return -1;
>> +
>> +   o->type = type_from_string(end);
>> +   return 0;
>> +}
>> +
>> +static int add_have_entry(struct odb_helper *o, const char *line)
>> +{
>> +   ALLOC_GROW(o->have, o->have_nr+1, o->have_alloc);
>
> I didn't see where o->have is initially allocated.  The default is
> to start with 64 and then grow by 3/2 as needed.  If we are getting
> lots of objects here, we'll have lots of reallocs slowing things down.
> It would be better to seed it somewhere to a large value.

Yeah but using this is optional. It depends on the cap_have
capability. And I think that if there is a really huge number of
objects in the external odb, it might be better to just not use
cap_have.

Another possibility would be for the helper to send the number of
objects it has first, so that we could alloc the right number before
receiving the haves, but this could require the helper to be more
complex.

>> +   if (parse_object_line(>have[o->have_nr], line) < 0) {
>> +   warning("bad 'have' input from odb helper '%s': %s",
>> +   o->name, line);
>> +   return 1;
>> +   }
>> +   o->have_nr++;
>> +   return 0;
>> +}
>> +
>> +static int odb_helper_object_cmp(const void *va, const void *vb)
>> +{
>> +   const struct odb_helper_object *a = va, *b = vb;
>> +   return hashcmp(a->sha1, b->sha1);
>> +}
>> +
>>   static void odb_helper_load_have(struct odb_helper *o)
>>   {
>> +   struct odb_helper_cmd cmd;
>> +   FILE *fh;
>> +   struct strbuf line = STRBUF_INIT;
>> +
>> if (o->have_valid)
>> return;
>> o->have_valid = 1;
>>   - /* TODO */
>> +   if (odb_helper_start(o, , "have") < 0)
>> +   return;
>> +
>> +   fh = xfdopen(cmd.child.out, "r");
>> +   while (strbuf_getline(, fh) != EOF)
>> +   if (add_have_entry(o, line.buf))
>> +   break;
>> +
>> +   strbuf_release();
>> +   fclose(fh);
>> +

Re: Draft of Git Rev News edition 37

2018-03-18 Thread Christian Couder

On Sun, Mar 18, 2018 at 11:41 PM, Jacob Keller  wrote:
>
> I don't have a good summary yet, but I think a section about the
> discussion regarding the new recreate-merges and rebasing merges
> that's been on going might be useful?

Yeah sure, we would gladly accept a summary of this discussion.

> a lot of that discussion
> occurred prior to git-merge (tho it's been ongoing since then?).

If you want to take the latest discussions into account, the summary
could be either split into two parts, one for this edition and the
other one for the next edition. Or we could wait and have the whole
summary in the next edition.

Thanks,
Christian.

Draft of Git Rev News edition 37

2018-03-18 Thread Christian Couder

Hi,

A draft of a new Git Rev News edition is available here:

  https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-37.md

Everyone is welcome to contribute in any section either by editing the
above page on GitHub and sending a pull request, or by commenting on
this GitHub issue:

  https://github.com/git/git.github.io/issues/279

You can also reply to this email.

In general all kinds of contribution, for example proofreading,
suggestions for articles or links, help on the issues in GitHub, and
so on, are very much appreciated.

I tried to cc everyone who appears in this edition, but maybe I missed
some people, sorry about that.

Jakub, Markus, Gabriel and me plan to publish this edition on
Wednesday March 21st.

Thanks,
Christian.

Re: [GSoC] Scripts to be conversted into builtins

2018-03-17 Thread Christian Couder

On Sat, Mar 17, 2018 at 7:26 PM, Ævar Arnfjörð Bjarmason
 wrote:
>
> On Sat, Mar 17 2018, Yash Yadav jotted:
>
>> Hello,
>>
>> I am a student going through the GSoC process.
>>
>> In the project ideas listed there is one idea talking of conversion of
>> scripts to builtins. This interests me but no pointer forward is given
>> and I'd like to dive more into that idea and go through the script(s).
>>
>> So, where should I look further from here?
>
> One good place to start is to start reading at:
>
> git log -p --stat --full-diff --reverse 74703a1e4d.. -- 
> 'builtin/*--helper.c'
>
> And then search for git-.*\.sh in your pager. These are a bunch of
> commits where the bisect, rebase and submodule helpers have had their
> shellscript code incrementally replaced by C.

Yeah, and we have been proposing this kind of GSoC projects for a
number of years now, so we have had a number of GSoC students doing
this kind of projects, and therefore a lot of information about their
projects is available in the mailing list archive.

Re: [RFC][GSoC] Project proposal: convert interactive rebase to C

2018-03-17 Thread Christian Couder

Hi,

On Sat, Mar 17, 2018 at 8:14 PM, Alban Gruin  wrote:
>
> Weeks 3 & 4 — May 18, 2018 – June 11, 2018
> Then, I would start to rewrite git-rebase--interactive, and get rid of git-
> rebase--helper.

Usually to rewrite a shell script in C, we first rewrite shell
functions into option arguments in a C builtin helper and make the
schell script call the builtin helper (instead of the original shell
functions). Eventually when the shell script is mostly only calling
the builtin helper, we add what is needed into the builtin helper and
we rename it to make it fully replace the shell script.

See for example 0cce4a2756 (rebase -i -x: add exec commands via the
rebase--helper, 2017-12-05) or b903674b35 (bisect--helper:
`is_expected_rev` & `check_expected_revs` shell function in C,
2017-09-29). These examples show that we can do step by step rewrites.

I would suggest planning to use the same approach, and describing in
your proposal which shell functions you would like to rewrite into the
C builtin helper in which order, before planning to fully replace the
current git-rebase--interactive.

Thanks,
Christian.

Running some tests with -x fails

2018-03-14 Thread Christian Couder

I don't know if this is well known already, but when when I run some
tests using -x on master they consistently fail (while they
consistently pass without -x).

For example:

./t5500-fetch-pack.sh -x

gives: # failed 3 among 353 test(s)

./t0008-ignores.sh -x

gives: # failed 208 among 394 test(s)

Are we interested in trying to fix those failures or are we ok with them?

Re: Use of uninitialised value of size 8 in sha1_name.c

2018-02-26 Thread Christian Couder

On Mon, Feb 26, 2018 at 3:06 PM, Derrick Stolee  wrote:
>
> Christian: do you want to submit the patch, or should I put one together?

I'd rather have you put one together.

Thanks,
Christian.

Re: Use of uninitialised value of size 8 in sha1_name.c

2018-02-26 Thread Christian Couder

On Mon, Feb 26, 2018 at 10:53 AM, Jeff King <p...@peff.net> wrote:
> On Mon, Feb 26, 2018 at 10:04:22AM +0100, Christian Couder wrote:
>
>> ==21455== Use of uninitialised value of size 8
>> ==21455==at 0x2D2A73: get_hex_char_from_oid (sha1_name.c:492)
>> ==21455==by 0x2D2AFE: extend_abbrev_len (sha1_name.c:502)
>> ==21455==by 0x2D2C3D: find_abbrev_len_for_pack (sha1_name.c:551)
>> ==21455==by 0x2D2CFF: find_abbrev_len_packed (sha1_name.c:569)
>> ==21455==by 0x2D2E12: find_unique_abbrev_r (sha1_name.c:608)
>> ==21455==by 0x2DCB66: strbuf_add_unique_abbrev (strbuf.c:877)
>> ==21455==by 0x14F7CE: update_local_ref (fetch.c:700)
>> ==21455==by 0x1500CF: store_updated_refs (fetch.c:871)
>> ==21455==by 0x15035B: fetch_refs (fetch.c:932)
>> ==21455==by 0x150CF8: do_fetch (fetch.c:1146)
>> ==21455==by 0x1515AB: fetch_one (fetch.c:1370)
>> ==21455==by 0x151A1D: cmd_fetch (fetch.c:1457)
>> ==21455==  Uninitialised value was created by a stack allocation
>> ==21455==at 0x2D2B2E: find_abbrev_len_for_pack (sha1_name.c:513)
>> ==21455==
>>
>> A quick git blame seems to point to 0e87b85683 (sha1_name: minimize
>> OID comparisons during disambiguation, 2017-10-12).
>>
>> It is difficult to tell for sure though as t5616-partial-clone.sh was
>> added after that commit.
>
> I think that commit is to blame, though the error isn't exactly where
> that stack trace puts it. Try this:
>
> diff --git a/sha1_name.c b/sha1_name.c
> index 611c7d24dd..6f7f36436f 100644
> --- a/sha1_name.c
> +++ b/sha1_name.c
> @@ -547,7 +547,10 @@ static void find_abbrev_len_for_pack(struct packed_git 
> *p,
>  */
> mad->init_len = 0;
> if (!match) {
> -   nth_packed_object_oid(, p, first);
> +   warning("p->num_objects = %u, first = %u",
> +   p->num_objects, first);
> +   if (!nth_packed_object_oid(, p, first))
> +   die("oops!");
> extend_abbrev_len(, mad);
> } else if (first < num - 1) {
> nth_packed_object_oid(, p, first + 1);
>
> I get failures all over the test suite, like this:
>
>   warning: p->num_objects = 4, first = 3
>   warning: p->num_objects = 8, first = 6
>   warning: p->num_objects = 10, first = 0
>   warning: p->num_objects = 4, first = 0
>   warning: p->num_objects = 8, first = 0
>   warning: p->num_objects = 10, first = 10
>   fatal: oops!

Yeah, I tried to t5601-clone.sh with --valgrind and I also get one
error (the same "Uninitialised value" error actually).

> Any time the abbreviated hex would go after the last object in the pack,
> then first==p->num_objects, and nth_packed_object_oid() will fail. That
> leaves uninitialized data in "oid", which is what valgrind complains
> about when we examine it in extend_abbrev_len().
>
> Most of the time the code behaves correctly anyway, because the
> uninitialized bytes are unlikely to match up with our hex and extend the
> length. Probably we just need to detect that case and skip the call to
> extend_abbrev_len() altogether.

Yeah, something like:

diff --git a/sha1_name.c b/sha1_name.c
index 611c7d24dd..a041d8d24f 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -547,15 +547,15 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
 */
mad->init_len = 0;
if (!match) {
-   nth_packed_object_oid(, p, first);
-   extend_abbrev_len(, mad);
+   if (nth_packed_object_oid(, p, first))
+   extend_abbrev_len(, mad);
} else if (first < num - 1) {
-   nth_packed_object_oid(, p, first + 1);
-   extend_abbrev_len(, mad);
+   if (nth_packed_object_oid(, p, first + 1))
+   extend_abbrev_len(, mad);
}
if (first > 0) {
-   nth_packed_object_oid(, p, first - 1);
-   extend_abbrev_len(, mad);
+   if (nth_packed_object_oid(, p, first - 1))
+   extend_abbrev_len(, mad);
}
mad->init_len = mad->cur_len;
 }

seems to prevent valgrind from complaining when running t5616-partial-clone.sh.

Use of uninitialised value of size 8 in sha1_name.c

2018-02-26 Thread Christian Couder

Hi Derrick,

These days when running:

./t5616-partial-clone.sh --valgrind

on master, I get a bunch of:

==21455== Use of uninitialised value of size 8
==21455==at 0x2D2A73: get_hex_char_from_oid (sha1_name.c:492)
==21455==by 0x2D2AFE: extend_abbrev_len (sha1_name.c:502)
==21455==by 0x2D2C3D: find_abbrev_len_for_pack (sha1_name.c:551)
==21455==by 0x2D2CFF: find_abbrev_len_packed (sha1_name.c:569)
==21455==by 0x2D2E12: find_unique_abbrev_r (sha1_name.c:608)
==21455==by 0x2DCB66: strbuf_add_unique_abbrev (strbuf.c:877)
==21455==by 0x14F7CE: update_local_ref (fetch.c:700)
==21455==by 0x1500CF: store_updated_refs (fetch.c:871)
==21455==by 0x15035B: fetch_refs (fetch.c:932)
==21455==by 0x150CF8: do_fetch (fetch.c:1146)
==21455==by 0x1515AB: fetch_one (fetch.c:1370)
==21455==by 0x151A1D: cmd_fetch (fetch.c:1457)
==21455==  Uninitialised value was created by a stack allocation
==21455==at 0x2D2B2E: find_abbrev_len_for_pack (sha1_name.c:513)
==21455==

A quick git blame seems to point to 0e87b85683 (sha1_name: minimize
OID comparisons during disambiguation, 2017-10-12).

It is difficult to tell for sure though as t5616-partial-clone.sh was
added after that commit.

Best,
Christian.

[ANNOUNCE] Git Rev News edition 36

2018-02-21 Thread Christian Couder

Hi everyone,

The 36th edition of Git Rev News is now published:

  https://git.github.io/rev_news/2018/02/21/edition-36/

Thanks a lot to all the contributors!

Enjoy,
Christian, Jakub, Markus and Gabriel.

Draft of Git Rev News edition 36

2018-02-18 Thread Christian Couder

Hi,

A draft of a new Git Rev News edition is available here:

  https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-36.md

Everyone is welcome to contribute in any section either by editing the
above page on GitHub and sending a pull request, or by commenting on
this GitHub issue:

  https://github.com/git/git.github.io/issues/274

You can also reply to this email.

In general all kinds of contribution, for example proofreading,
suggestions for articles or links, help on the issues in GitHub, and
so on, are very much appreciated.

I tried to cc everyone who appears in this edition, but maybe I missed
some people, sorry about that.

Jakub, Markus, Gabriel and me plan to publish this edition on
Wednesday February 21st.

Thanks,
Christian.

GSoC 2018: Git has been accepted as a mentor organization!

2018-02-18 Thread Christian Couder

Hi,

Just a quick message to let everyone know that Git has been accepted
as a mentor organization for the Google Summer of Code 2018.

If anyone wants to mentor or co-mentor, that is still possible. Just
let us know so that we can invite you to register on the GSoC web
site.

Dscho, I just sent you an invite as a mentor, as I think you said you
are ok to mentor.

Best,
Christian.

Re: totally confused as to what "git bisect skip" is supposed to do

2018-02-13 Thread Christian Couder

On Tue, Feb 13, 2018 at 1:28 PM, Robert P. J. Day  wrote:
>
> p.s. i suspect i should RTFS to see exactly how git bisect does its
> work.

You might want to read https://git-scm.com/docs/git-bisect-lk2009.html
before reading the source code.

Re: totally confused as to what "git bisect skip" is supposed to do

2018-02-12 Thread Christian Couder

On Mon, Feb 12, 2018 at 11:44 AM, Robert P. J. Day
 wrote:
> On Fri, 9 Feb 2018, Junio C Hamano wrote:
>
>> "Robert P. J. Day"  writes:
>>
>> >   i'm confused ... why, after skipping a good chunk in the interval
>> > [v4.13,v4.14], do i still have exactly 7300 revisions to bisect? what
>> > am i so hopelessly misunderstanding here?
>>
>> Are you really "skipping" a chunk in the interval?
>>
>> I thought that "git bisect skip" is a way for you to respond, when
>> "git bisect" gave you a commit to test, saying "sorry, I cannot test
>> that exact version, please offer me something else to test".  And
>> each time you say that, you are not narrowing the search space in
>> any way, so it is understandable that the numver of candidate bad
>> commits will not decrease.
>
>   this might be an issue of terminology, then, as "man git-bisect"
> clearly suggests you can skip a range:
>
> You can also skip a range of commits, instead of just one
> commit, using range notation. For example:
>
>$ git bisect skip v2.5..v2.6
>
> This tells the bisect process that no commit after v2.5, up to
> and including v2.6, should be tested.

Yeah, I think this is kind of a terminology related.

First when git bisect says "Bisecting: XXX revisions left to test
after this" it doesn't mean that all those revisions left will
actually be tested, as git bisect's purpose is to avoid testing as
many revisions as possible.

So the XXX revisions are actually the revisions that possibly contain
the first bad commit.

And, as Junio wrote, when you tell git bisect that you cannot test
some revisions, it doesn't mean that those revisions cannot contain
the first bad commit.

> my issue (if this is indeed an issue) is that if i select to skip a
> sizable range of commits to test, should that not result in git bisect
> telling me it now has far fewer revisions to test? if i, in fact,
> manage to "disqualify" a number of commits from testing, is there no
> visual confirmation that i now have fewer commits to test?

I hope that the above clarification I gave is enough, but maybe the
following will help you.

If you cannot test let's say 20 commits because there is build problem
in those commits, and in the end Git tells you that the first bad
commit could be any of 3 commits, 2 of them that were previously
marked with skip, then you could still, if you wanted, fix those
commits, so that they can be built and test them.

So yeah if we only talk about the current bisection, the skipped
commits will not be tested, but if we talk about completely finishing
the bisection and finding the first bad commit, then those commits
could still be tested.

Re: "git bisect run make" adequate to locate first unbuildable commit?

2018-02-09 Thread Christian Couder

On Fri, Feb 9, 2018 at 2:20 PM, Robert P. J. Day  wrote:
>
>   writing a short tutorial on "git bisect" and, all the details of
> special exit code 125 aside, if one wanted to locate the first
> unbuildable commit, would it be sufficient to just run?
>
>   $ git bisect run make
>
>   as i read it, make returns either 0, 1 or 2 so there doesn't appear
> to be any possibility of weirdness with clashing with a 125 exit code.
> am i overlooking some subtle detail here i should be aware of? thanks.

I think you are not overlooking anything.

[PATCH v2 3/3] perf/aggregate: sort JSON fields in output

2018-02-01 Thread Christian Couder

It is much easier to diff the output against a previous
one when the fields are sorted.

Helped-by: Philip Oakley <philipoak...@iee.org>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

The only change compared to v1 is a typo fix suggested by Philip in
the commti message.

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index a609292491..749d6689f9 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -253,7 +253,7 @@ sub print_codespeed_results {
}
}
 
-   print to_json(\@data, {utf8 => 1, pretty => 1}), "\n";
+   print to_json(\@data, {utf8 => 1, pretty => 1, canonical => 1}), "\n";
 }
 
 binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
-- 
2.16.0.rc2.45.g09a1bbd803

[PATCH v2 2/3] perf/aggregate: add --reponame option

2018-02-01 Thread Christian Couder

This makes it easier to use the aggregate script
on the command line when one wants to get the
"environment" fields set in the codespeed output.

Previously setting GIT_REPO_NAME was needed
for this purpose.

Helped-by: Eric Sunshine <sunsh...@sunshineco.com>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

The only change compared to v1 is a logical change suggested by Eric
in the 'if ... elsif ... else ...' sequence that sets $environment.

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index bbf0f30898..a609292491 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -37,7 +37,7 @@ sub format_times {
 }
 
 my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests,
-$codespeed, $subsection);
+$codespeed, $subsection, $reponame);
 while (scalar @ARGV) {
my $arg = $ARGV[0];
my $dir;
@@ -55,6 +55,15 @@ while (scalar @ARGV) {
}
next;
}
+   if ($arg eq "--reponame") {
+   shift @ARGV;
+   $reponame = $ARGV[0];
+   shift @ARGV;
+   if (! $reponame) {
+   die "empty reponame";
+   }
+   next;
+   }
last if -f $arg or $arg eq "--";
if (! -d $arg) {
my $rev = Git::command_oneline(qw(rev-parse --verify), $arg);
@@ -210,7 +219,9 @@ sub print_codespeed_results {
}
 
my $environment;
-   if (exists $ENV{GIT_PERF_REPO_NAME} and $ENV{GIT_PERF_REPO_NAME} ne "") 
{
+   if ($reponame) {
+   $environment = $reponame;
+   } elsif (exists $ENV{GIT_PERF_REPO_NAME} and $ENV{GIT_PERF_REPO_NAME} 
ne "") {
$environment = $ENV{GIT_PERF_REPO_NAME};
} elsif (exists $ENV{GIT_TEST_INSTALLED} and $ENV{GIT_TEST_INSTALLED} 
ne "") {
$environment = $ENV{GIT_TEST_INSTALLED};
-- 
2.16.0.rc2.45.g09a1bbd803

[PATCH v2 1/3] perf/aggregate: add --subsection option

2018-02-01 Thread Christian Couder

This makes it easier to use the aggregate script
on the command line, to get results from
subsections.

Previously setting GIT_PERF_SUBSECTION was needed
for this purpose.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 33 -
 1 file changed, 24 insertions(+), 9 deletions(-)

This small patch series contains a few small Codespeed related
usability improvements of the perf/aggregate.perl script on top the
cc/codespeed patch series that was recently merged into master.

Changes compared to v1 are described in each patch if any.

The discussion thread about v1 is there:  

https://public-inbox.org/git/20180128111843.2690-1-chrisc...@tuxfamily.org/T/#u

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index 5c439f6bc2..bbf0f30898 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -36,7 +36,8 @@ sub format_times {
return $out;
 }
 
-my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests, $codespeed);
+my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests,
+$codespeed, $subsection);
 while (scalar @ARGV) {
my $arg = $ARGV[0];
my $dir;
@@ -45,6 +46,15 @@ while (scalar @ARGV) {
shift @ARGV;
next;
}
+   if ($arg eq "--subsection") {
+   shift @ARGV;
+   $subsection = $ARGV[0];
+   shift @ARGV;
+   if (! $subsection) {
+   die "empty subsection";
+   }
+   next;
+   }
last if -f $arg or $arg eq "--";
if (! -d $arg) {
my $rev = Git::command_oneline(qw(rev-parse --verify), $arg);
@@ -76,10 +86,15 @@ if (not @tests) {
 }
 
 my $resultsdir = "test-results";
-my $results_section = "";
-if (exists $ENV{GIT_PERF_SUBSECTION} and $ENV{GIT_PERF_SUBSECTION} ne "") {
-   $resultsdir .= "/" . $ENV{GIT_PERF_SUBSECTION};
-   $results_section = $ENV{GIT_PERF_SUBSECTION};
+
+if (! $subsection and
+exists $ENV{GIT_PERF_SUBSECTION} and
+$ENV{GIT_PERF_SUBSECTION} ne "") {
+   $subsection = $ENV{GIT_PERF_SUBSECTION};
+}
+
+if ($subsection) {
+   $resultsdir .= "/" . $subsection;
 }
 
 my @subtests;
@@ -183,15 +198,15 @@ sub print_default_results {
 }
 
 sub print_codespeed_results {
-   my ($results_section) = @_;
+   my ($subsection) = @_;
 
my $project = "Git";
 
my $executable = `uname -s -m`;
chomp $executable;
 
-   if ($results_section ne "") {
-   $executable .= ", " . $results_section;
+   if ($subsection) {
+   $executable .= ", " . $subsection;
}
 
my $environment;
@@ -233,7 +248,7 @@ sub print_codespeed_results {
 binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
 
 if ($codespeed) {
-   print_codespeed_results($results_section);
+   print_codespeed_results($subsection);
 } else {
print_default_results();
 }
-- 
2.16.0.rc2.45.g09a1bbd803

Re: Shawn Pearce has died

2018-01-29 Thread Christian Couder

On Mon, Jan 29, 2018 at 6:21 PM, Jeff King  wrote:
> On Mon, Jan 29, 2018 at 10:33:08AM +0100, Johannes Schindelin wrote:
>
>> I found these sad news in my timeline today:
>>
>> https://twitter.com/cdibona/status/957822400518696960
>
> Thanks for posting this.

Yeah, thanks.

> I know Shawn has not been all that active on this list in the past few
> years, so many may not know how active he has been behind the scenes:
>
>  - serving on the Git project leadership committee
>
>  - managing many of the contributors whose names you see here daily
>
>  - working on other Git ecosystem projects that aren't on this list
>(like JGit!)

also:

-  organizing most of the first in real life meetings of Git
developers (https://git.wiki.kernel.org/index.php/GitTogether)

Re: [PATCH 2/3] perf/aggregate: add --reponame option

2018-01-28 Thread Christian Couder

On Sun, Jan 28, 2018 at 8:57 PM, Eric Sunshine  wrote:
>
> Not a big deal, but the extra indentation (and noisy diff) could be
> avoided like this:
>
> my $environment;
> if ($reponame) {
> $environment = $reponame;
> } else if (exists ...) {
> ...as before
> }

Ok I will reroll with something like this and also the typo fix
suggested by Philip.

Thanks both.

[PATCH 2/3] perf/aggregate: add --reponame option

2018-01-28 Thread Christian Couder

This makes it easier to use the aggregate script
on the command line when one wants to get the
"environment" fields set in the codespeed output.

Previously setting GIT_REPO_NAME was needed
for this purpose.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index bbf0f30898..d616d31ca8 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -37,7 +37,7 @@ sub format_times {
 }
 
 my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests,
-$codespeed, $subsection);
+$codespeed, $subsection, $reponame);
 while (scalar @ARGV) {
my $arg = $ARGV[0];
my $dir;
@@ -55,6 +55,15 @@ while (scalar @ARGV) {
}
next;
}
+   if ($arg eq "--reponame") {
+   shift @ARGV;
+   $reponame = $ARGV[0];
+   shift @ARGV;
+   if (! $reponame) {
+   die "empty reponame";
+   }
+   next;
+   }
last if -f $arg or $arg eq "--";
if (! -d $arg) {
my $rev = Git::command_oneline(qw(rev-parse --verify), $arg);
@@ -209,15 +218,17 @@ sub print_codespeed_results {
$executable .= ", " . $subsection;
}
 
-   my $environment;
-   if (exists $ENV{GIT_PERF_REPO_NAME} and $ENV{GIT_PERF_REPO_NAME} ne "") 
{
-   $environment = $ENV{GIT_PERF_REPO_NAME};
-   } elsif (exists $ENV{GIT_TEST_INSTALLED} and $ENV{GIT_TEST_INSTALLED} 
ne "") {
-   $environment = $ENV{GIT_TEST_INSTALLED};
-   $environment =~ s|/bin-wrappers$||;
-   } else {
-   $environment = `uname -r`;
-   chomp $environment;
+   my $environment = $reponame;
+   if (! $environment) {
+   if (exists $ENV{GIT_PERF_REPO_NAME} and 
$ENV{GIT_PERF_REPO_NAME} ne "") {
+   $environment = $ENV{GIT_PERF_REPO_NAME};
+   } elsif (exists $ENV{GIT_TEST_INSTALLED} and 
$ENV{GIT_TEST_INSTALLED} ne "") {
+   $environment = $ENV{GIT_TEST_INSTALLED};
+   $environment =~ s|/bin-wrappers$||;
+   } else {
+   $environment = `uname -r`;
+   chomp $environment;
+   }
}
 
my @data;
-- 
2.16.0.rc2.45.g09a1bbd803

[PATCH 1/3] perf/aggregate: add --subsection option

2018-01-28 Thread Christian Couder

This makes it easier to use the aggregate script
on the command line, to get results from
subsections.

Previously setting GIT_PERF_SUBSECTION was needed
for this purpose.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 33 -
 1 file changed, 24 insertions(+), 9 deletions(-)

This small patch series contains a few small Codespeed related
usability improvements of the perf/aggregate.perl script on top the
cc/codespeed patch series that was recently merged into master.

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index 5c439f6bc2..bbf0f30898 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -36,7 +36,8 @@ sub format_times {
return $out;
 }
 
-my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests, $codespeed);
+my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests,
+$codespeed, $subsection);
 while (scalar @ARGV) {
my $arg = $ARGV[0];
my $dir;
@@ -45,6 +46,15 @@ while (scalar @ARGV) {
shift @ARGV;
next;
}
+   if ($arg eq "--subsection") {
+   shift @ARGV;
+   $subsection = $ARGV[0];
+   shift @ARGV;
+   if (! $subsection) {
+   die "empty subsection";
+   }
+   next;
+   }
last if -f $arg or $arg eq "--";
if (! -d $arg) {
my $rev = Git::command_oneline(qw(rev-parse --verify), $arg);
@@ -76,10 +86,15 @@ if (not @tests) {
 }
 
 my $resultsdir = "test-results";
-my $results_section = "";
-if (exists $ENV{GIT_PERF_SUBSECTION} and $ENV{GIT_PERF_SUBSECTION} ne "") {
-   $resultsdir .= "/" . $ENV{GIT_PERF_SUBSECTION};
-   $results_section = $ENV{GIT_PERF_SUBSECTION};
+
+if (! $subsection and
+exists $ENV{GIT_PERF_SUBSECTION} and
+$ENV{GIT_PERF_SUBSECTION} ne "") {
+   $subsection = $ENV{GIT_PERF_SUBSECTION};
+}
+
+if ($subsection) {
+   $resultsdir .= "/" . $subsection;
 }
 
 my @subtests;
@@ -183,15 +198,15 @@ sub print_default_results {
 }
 
 sub print_codespeed_results {
-   my ($results_section) = @_;
+   my ($subsection) = @_;
 
my $project = "Git";
 
my $executable = `uname -s -m`;
chomp $executable;
 
-   if ($results_section ne "") {
-   $executable .= ", " . $results_section;
+   if ($subsection) {
+   $executable .= ", " . $subsection;
}
 
my $environment;
@@ -233,7 +248,7 @@ sub print_codespeed_results {
 binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
 
 if ($codespeed) {
-   print_codespeed_results($results_section);
+   print_codespeed_results($subsection);
 } else {
print_default_results();
 }
-- 
2.16.0.rc2.45.g09a1bbd803

[PATCH 3/3] perf/aggregate: sort JSON fields in output

2018-01-28 Thread Christian Couder

It is much easier to diff the output against a preivous
one when the fields are sorted.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index d616d31ca8..fcc0313e65 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -253,7 +253,7 @@ sub print_codespeed_results {
}
}
 
-   print to_json(\@data, {utf8 => 1, pretty => 1}), "\n";
+   print to_json(\@data, {utf8 => 1, pretty => 1, canonical => 1}), "\n";
 }
 
 binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
-- 
2.16.0.rc2.45.g09a1bbd803

Re: Please review my code

2018-01-26 Thread Christian Couder

On Fri, Jan 26, 2018 at 11:32 AM, Оля Тележная <olyatelezhn...@gmail.com> wrote:
> 2018-01-25 23:22 GMT+03:00 Christian Couder <christian.cou...@gmail.com>:
>> On Thu, Jan 25, 2018 at 6:20 PM, Оля Тележная <olyatelezhn...@gmail.com> 
>> wrote:
>>> Please look at my code:
>>> https://github.com/telezhnaya/git/commits/catfile
>>> You could send me any ideas here or in Github.
>>
>> I left some comments on GitHub. My main suggestion is to try to get
>> rid of the is_cat global and if possible to remove the "struct
>> expand_data *cat_file_info" global.
>
> Thanks for your comments, I find them very useful. Most of issues are
> fixed except the main one, that you mentioned here :)

Ok, no problem, we will see what happens on the mailing list.

It looks like the for-each-ref documentation has not been changed though.

Otherwise it looks good to me and perhaps you could send your series
to the mailing list even if it's long. For the first version, you may
want to add "RFC" in the subject of the patch emails you send.

Thanks,

Re: Please review my code

2018-01-25 Thread Christian Couder

Hi Olga,

On Thu, Jan 25, 2018 at 6:20 PM, Оля Тележная  wrote:
> Hi everyone,
> I haven't sent the code by mailing lists because 25 commits (every
> commit in separate message) look like a spam.

Yeah, so now that you added tests, it might be interesting to see if
the patch series can be refactored to be shorter or to be clearer.

> Please look at my code:
> https://github.com/telezhnaya/git/commits/catfile
> You could send me any ideas here or in Github.

I left some comments on GitHub. My main suggestion is to try to get
rid of the is_cat global and if possible to remove the "struct
expand_data *cat_file_info" global.

> The main idea of the patch is to get rid of using custom formatting in
> cat-file and start using general one from ref-filter.
> Additional bonus is that cat-file becomes to support many new
> formatting commands like %(if), %(color), %(committername) etc.

Yeah, that is a really nice result.

> I remember I need to rewrite docs, I will do that in the near future.

Great, thanks,
Christian.

Re: [PATCH] mru: Replace mru.[ch] with list.h implementation

2018-01-19 Thread Christian Couder

On Tue, Jan 16, 2018 at 2:46 AM, Gargi Sharma  wrote:
> Replace the custom calls to mru.[ch] with calls to list.h. This patch is the
> final step in removing the mru API completely and inlining the logic.

You might want to say that this provides a significant code reduction
which shows that the mru API is not a very useful abstraction anymore.

> Another discussion, here
> (https://public-inbox.org/git/caoci2dgyqr4jff5oby2buyhnjeaapqkf8tbojn2w0b18eo+...@mail.gmail.com/)
> was on what has to be done with the next pointer of packed git type

I think using "pointer to a 'struct packed_git'" instead of "pointer
of packed git type" would be clearer here, but anyway see below.

> inside the
> packed_git structure. It can be removed _given_ that no one needs to
> access the list in order and can be sent as another patch.

I don't think it's worth pointing to a discussion about a future
improvement in the commit message. You could perhaps even remove all
the above paragraph as this commit is valuable and self contained
enough by itself.

> ---
> Changes in v2:
> - Add a move to front function to the list API.
> - Remove memory leak.
> - Remove redundant remove operations on the list.
>
> The commit has been built on top of ot/mru-on-list branch.

Nice!

>  Makefile   |  1 -
>  builtin/pack-objects.c | 12 ++--
>  cache.h|  9 +
>  list.h |  7 +++
>  mru.c  | 27 ---
>  mru.h  | 40 
>  packfile.c | 18 +-
>  sha1_file.c|  1 -
>  8 files changed, 27 insertions(+), 88 deletions(-)
>  delete mode 100644 mru.c
>  delete mode 100644 mru.h

Very nice!

[...]

> @@ -1030,8 +1029,9 @@ static int want_object_in_pack(const unsigned char 
> *sha1,
> *found_pack = p;
> }
> want = want_found_object(exclude, p);
> -   if (!exclude && want > 0)
> -   mru_mark(_git_mru, entry);
> +   if (!exclude && want > 0) {
> +   list_move_to_front(>mru, _git_mru);
> +   }

Style: we usually remove brackets when there is one line after the
if(...) line. (See the 2 lines that you delete.)

Otherwise the patch looks good to me.

Thanks,
Christian.

Re: [PATCH v2 03/18] ref-filter: make valid_atom as function parameter

2018-01-19 Thread Christian Couder

On Fri, Jan 19, 2018 at 6:22 PM, Оля Тележная <olyatelezhn...@gmail.com> wrote:
> 2018-01-19 20:14 GMT+03:00 Christian Couder <christian.cou...@gmail.com>:
>> On Thu, Jan 18, 2018 at 7:20 AM, Оля Тележная <olyatelezhn...@gmail.com> 
>> wrote:

>>> And another thoughts here - we were thinking about creating format.h
>>> but decided not to move forward with it, and now we are suffering
>>> because of it. Can I create it right now or the history of commits
>>> would be too dirty because of it?
>>
>> It would also make it difficult to refactor your patch series if there
>> is a big move or renaming in the middle.
>>
>>> Also, do you mean just renaming of
>>> ref-filter? I was thinking that I need to put formatting-related logic
>>> to another file and leave all other stuff in ref-filter.
>>
>> Yeah, you can do both a move and a renaming.
>
> Thanks for a response! That thought is not clear enough for me. Do you
> want me to split ref-filter into 2 files (one is for formatting only
> called format and other one is for anything else still called
> ref-filter) - here is a second question by the way, do I need to
> create only format.h (and leave all realizations in ref-filter.c), or
> I also need to create format.c. Or, just to rename ref-filter into
> format and that's all.

Just renaming ref-filter into format (including the filenames) will
probably be enough, but it's also possible that it will make more
sense to keep some code only relevant to ref filtering into
ref-filter.{c,h}. We will be in a better position to decide what we
should do when the migration is finished.

Re: [PATCH v2 03/18] ref-filter: make valid_atom as function parameter

2018-01-19 Thread Christian Couder

On Fri, Jan 19, 2018 at 6:23 PM, Jeff King <p...@peff.net> wrote:
> On Fri, Jan 19, 2018 at 06:14:56PM +0100, Christian Couder wrote:
>
>> > Let's discuss, what behavior we are waiting for
>> > when atom seems useless for the command. Die or ignore?
>>
>> We could alternatively just emit a warning, but it looks like there
>> are a lot of die() calls already in ref-filter.c, so I would suggest
>> die().
>
> I actually think it makes sense to just expand nonsense into the empty
> string, for two reasons:
>
>   1. That's what ref-filter already does for the existing cases. For
>  example, try:
>
>git for-each-ref --format='%(objecttype) %(authordate)'
>
>  and you will see that the annotated tags just get a blank author
>  field.
>
>   2. I think we may end up with a case where we feed multiple objects
>  via --batch-check, and the format is only nonsense for _some_ of
>  them. E.g., I envision a world where you can do:
>
>git cat-file --batch-check='%(objecttype) %(refname)' <<-\EOF
>master
>12345abcde12345abcde12345abcde12345abcde
>EOF
>
>  and get output like:
>
>commit refs/heads/master
>commit
>
>  (i.e., if we would remember the refname discovered during the name
>  resolution, we could still report it). It would be annoying if the
>  second line caused us to die().

Yeah, ok, that makes sense.

>> > And, which
>> > atoms are useless (as I understand, "rest" and "deltabase" from
>> > cat-file are useless for all ref-filter users, so the question is
>> > about - am I right in it, and about ref-filter atoms for cat-file).
>>
>> For now and I think until the migration process is finished, you could
>> just die() in case of any atom not already supported by the command.
>
> I'm OK with dying in the interim if it's easier, though I suspect it is
> not much extra work to just expand to the empty string in such cases. If
> that's where we want to end up eventually, it may be worth going
> straight there.
>
> I also think %(deltabase) does make sense for anything that points to an
> object. I suspect it's not all that _useful_ for for-each-ref, but that
> doesn't mean we can't return the sensible thing if somebody asks for it.
>
> I agree that %(rest) probably doesn't make any sense for a caller which
> isn't parsing input.

Yeah, ok.

Re: [PATCH v2 03/18] ref-filter: make valid_atom as function parameter

2018-01-19 Thread Christian Couder

On Thu, Jan 18, 2018 at 7:20 AM, Оля Тележная <olyatelezhn...@gmail.com> wrote:
> 2018-01-18 1:39 GMT+03:00 Christian Couder <christian.cou...@gmail.com>:
>> On Wed, Jan 17, 2018 at 10:43 PM, Jeff King <p...@peff.net> wrote:
>>> On Tue, Jan 16, 2018 at 09:55:22AM +0300, Оля Тележная wrote:
>>>
>>>> > IOW, the progression I'd expect in a series like this is:
>>>> >
>>>> >   1. Teach ref-filter.c to support everything that cat-file can do.
>>>> >
>>>> >   2. Convert cat-file to use ref-filter.c.
>>>>
>>>> I agree, I even made this and it's working fine:
>>>> https://github.com/git/git/pull/450/commits/1b74f1047f07434dccb207534d1ad45a143e3f2b
>>
>> (Nit: it looks like the above link does not work any more, but it
>> seems that you are talking about the last patch on the catfile
>> branch.)
>>
>>>> But I decided not to add that to patch because I expand the
>>>> functionality of several commands (not only cat-file and
>>>> for-each-ref), and I need to support all new functionality in a proper
>>>> way, make these error messages, test everything and - the hardest one
>>>> - support many new commands for cat-file. As I understand, it is not
>>>> possible unless we finally move to ref-filter and print results also
>>>> there. Oh, and I also need to rewrite docs in that case. And I decided
>>>> to apply this in another patch. But, please, say your opinion, maybe
>>>> we could do that here in some way.
>>>
>>> Yeah, I agree that it will cause changes to other users of ref-filter,
>>> and you'd need to deal with documentation and tests there. But I think
>>> we're going to have to do those things eventually (since supporting
>>> those extra fields everywhere is part of the point of the project). And
>>> by doing them now, I think it can make the transition for cat-file a lot
>>> simpler, because we never have to puzzle over this intermediate state
>>> where only some of the atoms are valid for cat-file.
>>
>> I agree that you will have to deal with documentation and tests at one
>> point and that it could be a good idea to do that now.
>>
>> I wonder if it is possible to add atoms one by one into ref-filter and
>> to add tests and documentation at the same time, instead of merging
>> cat-file atoms with ref-filter atoms in one big step.
>>
>> When all the cat-file atoms have been added to ref-filter's
>> valid_atom, maybe you could add ref-filter's atoms to cat-file's
>> valid_cat_file_atom one by one and add tests and documentation at the
>> same time.
>>
>> And then when ref-filter's valid_atom and cat-file's
>> valid_cat_file_atom are identical you can remove cat-file's
>> valid_cat_file_atom and maybe after that rename "ref-filter" to
>> "format".
>
> I think it's important to finish migrating process at first. I mean,
> now we are preparing and collecting everything in ref-filter, but we
> make resulting string and print still in cat-file. And I am not sure,
> but maybe it will not be possible to start using new atoms in cat-file
> while some part of logic still differs.

Ok, you can finish the migration process then.

> And another thoughts here - we were thinking about creating format.h
> but decided not to move forward with it, and now we are suffering
> because of it. Can I create it right now or the history of commits
> would be too dirty because of it?

It would also make it difficult to refactor your patch series if there
is a big move or renaming in the middle.

> Also, do you mean just renaming of
> ref-filter? I was thinking that I need to put formatting-related logic
> to another file and leave all other stuff in ref-filter.

Yeah, you can do both a move and a renaming.

> Anyway, your suggested steps looks good, and I am planning to
> implement them later.

Ok.

> Let's discuss, what behavior we are waiting for
> when atom seems useless for the command. Die or ignore?

We could alternatively just emit a warning, but it looks like there
are a lot of die() calls already in ref-filter.c, so I would suggest
die().

> And, which
> atoms are useless (as I understand, "rest" and "deltabase" from
> cat-file are useless for all ref-filter users, so the question is
> about - am I right in it, and about ref-filter atoms for cat-file).

For now and I think until the migration process is finished, you could
just die() in case of any atom not already supported by the command.

> I have never written any tests and docs for Git, I will try to explore
> by myself how to do that, but if you have any special links/materials
> about it - please send them to me :)

I think that looking at the existing documentation and tests is
probably the best way to learn about it.

Re: [PATCH v2 03/18] ref-filter: make valid_atom as function parameter

2018-01-19 Thread Christian Couder

On Fri, Jan 19, 2018 at 1:24 PM, Оля Тележная <olyatelezhn...@gmail.com> wrote:
> 2018-01-18 17:23 GMT+03:00 Christian Couder <christian.cou...@gmail.com>:
>> On Thu, Jan 18, 2018 at 12:49 PM, Оля Тележная <olyatelezhn...@gmail.com> 
>> wrote:
>>> 2018-01-18 9:20 GMT+03:00 Оля Тележная <olyatelezhn...@gmail.com>:
>>>>
>>>> I think it's important to finish migrating process at first. I mean,
>>>> now we are preparing and collecting everything in ref-filter, but we
>>>> make resulting string and print still in cat-file. And I am not sure,
>>>> but maybe it will not be possible to start using new atoms in cat-file
>>>> while some part of logic still differs.
>>>
>>> I tried to make that part here:
>>> https://github.com/telezhnaya/git/commit/19a148614f1d4db1f8e628eb4e6d7c819d2da875
>>> I know that the code is disgusting and there is a memory leak :) I
>>> just try to reuse ref-filter logic, I will cleanup everything later.
>>> At first, I try to make it work.
>>> The problem is that I have segfault, and if I use gdb, I get:
>>>
>>> Program received signal SIGSEGV, Segmentation fault.
>>> 0x in ?? ()
>>
>> Make sure that you compile with debug options like -g3. For example I use:
>>
>> $ make -j 4 DEVELOPER=1 CFLAGS="-g3"
>
> Is it OK that I get different test results with simple make and with
> make with all that flags?
> Have a code: https://github.com/telezhnaya/git/commits/catfile
> I do:
>
> olya@ubuntu17-vm:~/git$ make install
> olya@ubuntu17-vm:~/git$ cd t
> olya@ubuntu17-vm:~/git/t$ ./t1006-cat-file.sh
>
> And I have 17 tests broken.
> Then, without any changes in code, I do:
>
> olya@ubuntu17-vm:~/git$ make -j 4 DEVELOPER=1 CFLAGS="-g3" install
> olya@ubuntu17-vm:~/git$ cd t
> olya@ubuntu17-vm:~/git/t$ ./t1006-cat-file.sh
>
> And there is 42 tests broken.
> And it's really hard to search for errors in such situation.

Some segfaults and other memory issues might happen or not happen
depending on how the code has been compiled. I think that if you fix
all the memory related issues, the behavior should be the same.

Re: [PATCH v2 03/18] ref-filter: make valid_atom as function parameter

2018-01-18 Thread Christian Couder

On Thu, Jan 18, 2018 at 12:49 PM, Оля Тележная  wrote:
> 2018-01-18 9:20 GMT+03:00 Оля Тележная :
>>
>> I think it's important to finish migrating process at first. I mean,
>> now we are preparing and collecting everything in ref-filter, but we
>> make resulting string and print still in cat-file. And I am not sure,
>> but maybe it will not be possible to start using new atoms in cat-file
>> while some part of logic still differs.
>
> I tried to make that part here:
> https://github.com/telezhnaya/git/commit/19a148614f1d4db1f8e628eb4e6d7c819d2da875
> I know that the code is disgusting and there is a memory leak :) I
> just try to reuse ref-filter logic, I will cleanup everything later.
> At first, I try to make it work.
> The problem is that I have segfault, and if I use gdb, I get:
>
> Program received signal SIGSEGV, Segmentation fault.
> 0x in ?? ()

Make sure that you compile with debug options like -g3. For example I use:

$ make -j 4 DEVELOPER=1 CFLAGS="-g3"

> I tried to google it, it's my first time when I get that strange
> message, and unfortunately find nothing. So please explain me the
> reason, why I can't find a place of segfault that way.

I get the following:

(gdb) run cat-file --batch < myarg.txt
Starting program: /home/ubuntu/bin/git cat-file --batch < myarg.txt
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".

Program received signal SIGSEGV, Segmentation fault.
0x556ea7cf in format_ref_array_item (info=0x7fffd460,
format=0x7fffd6e0,
final_buf=0x7fffd410) at ref-filter.c:2234
2234atomv->handler(atomv, );
(gdb) bt
#0  0x556ea7cf in format_ref_array_item (info=0x7fffd460,
format=0x7fffd6e0, final_buf=0x7fffd410) at ref-filter.c:2234
#1  0x556ea91c in show_ref_array_item (info=0x7fffd460,
format=0x7fffd6e0)
at ref-filter.c:2256
#2  0x55577ef7 in batch_object_write (
obj_name=0x55a66770 "5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689",
opt=0x7fffd6e0, data=0x7fffd5e0) at builtin/cat-file.c:298

[PATCH v3 2/2] sha1_file: improve sha1_file_name() perfs

2018-01-18 Thread Christian Couder

As sha1_file_name() could be performance sensitive, let's
make it faster by using strbuf_addstr() and strbuf_addc()
instead of strbuf_addf().

Helped-by: Derrick Stolee <sto...@gmail.com>
Helped-by: Jeff Hostetler <g...@jeffhostetler.com>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 sha1_file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

So compared to v2 I just removed the call to strbuf_grow()
and the related computation. 

diff --git a/sha1_file.c b/sha1_file.c
index f66c21b2da..4f7f17a24c 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -323,8 +323,8 @@ static void fill_sha1_path(struct strbuf *buf, const 
unsigned char *sha1)
 
 void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
 {
-   strbuf_addf(buf, "%s/", get_object_directory());
-
+   strbuf_addstr(buf, get_object_directory());
+   strbuf_addch(buf, '/');
fill_sha1_path(buf, sha1);
 }
 
-- 
2.16.0.rc2.3.g7330290b09

[PATCH v3 1/2] sha1_file: remove static strbuf from sha1_file_name()

2018-01-18 Thread Christian Couder

Using a static buffer in sha1_file_name() is error prone
and the performance improvements it gives are not needed
in many of the callers.

So let's get rid of this static buffer and, if necessary
or helpful, let's use one in the caller.

Suggested-by: Jeff Hostetler <g...@jeffhostetler.com>
Helped-by: Kevin Daudt <m...@ikke.info>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h   |  8 +++-
 http-walker.c |  6 --
 http.c| 16 ++--
 sha1_file.c   | 38 +-
 4 files changed, 42 insertions(+), 26 deletions(-)

This patch is the same as v2.

diff --git a/cache.h b/cache.h
index d8b975a571..6db565408e 100644
--- a/cache.h
+++ b/cache.h
@@ -957,12 +957,10 @@ extern void check_repository_format(void);
 #define TYPE_CHANGED0x0040
 
 /*
- * Return the name of the file in the local object database that would
- * be used to store a loose object with the specified sha1.  The
- * return value is a pointer to a statically allocated buffer that is
- * overwritten each time the function is called.
+ * Put in `buf` the name of the file in the local object database that
+ * would be used to store a loose object with the specified sha1.
  */
-extern const char *sha1_file_name(const unsigned char *sha1);
+extern void sha1_file_name(struct strbuf *buf, const unsigned char *sha1);
 
 /*
  * Return an abbreviated sha1 unique within this repository's object database.
diff --git a/http-walker.c b/http-walker.c
index 1ae8363de2..07c2b1af82 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -544,8 +544,10 @@ static int fetch_object(struct walker *walker, unsigned 
char *sha1)
} else if (hashcmp(obj_req->sha1, req->real_sha1)) {
ret = error("File %s has bad hash", hex);
} else if (req->rename < 0) {
-   ret = error("unable to write sha1 filename %s",
-   sha1_file_name(req->sha1));
+   struct strbuf buf = STRBUF_INIT;
+   sha1_file_name(, req->sha1);
+   ret = error("unable to write sha1 filename %s", buf.buf);
+   strbuf_release();
}
 
release_http_object_request(req);
diff --git a/http.c b/http.c
index 5977712712..5979305bc9 100644
--- a/http.c
+++ b/http.c
@@ -2168,7 +2168,7 @@ struct http_object_request *new_http_object_request(const 
char *base_url,
unsigned char *sha1)
 {
char *hex = sha1_to_hex(sha1);
-   const char *filename;
+   struct strbuf filename = STRBUF_INIT;
char prevfile[PATH_MAX];
int prevlocal;
char prev_buf[PREV_BUF_SIZE];
@@ -2180,14 +2180,15 @@ struct http_object_request 
*new_http_object_request(const char *base_url,
hashcpy(freq->sha1, sha1);
freq->localfile = -1;
 
-   filename = sha1_file_name(sha1);
+   sha1_file_name(, sha1);
snprintf(freq->tmpfile, sizeof(freq->tmpfile),
-"%s.temp", filename);
+"%s.temp", filename.buf);
 
-   snprintf(prevfile, sizeof(prevfile), "%s.prev", filename);
+   snprintf(prevfile, sizeof(prevfile), "%s.prev", filename.buf);
unlink_or_warn(prevfile);
rename(freq->tmpfile, prevfile);
unlink_or_warn(freq->tmpfile);
+   strbuf_release();
 
if (freq->localfile != -1)
error("fd leakage in start: %d", freq->localfile);
@@ -2302,6 +2303,7 @@ void process_http_object_request(struct 
http_object_request *freq)
 int finish_http_object_request(struct http_object_request *freq)
 {
struct stat st;
+   struct strbuf filename = STRBUF_INIT;
 
close(freq->localfile);
freq->localfile = -1;
@@ -2327,8 +2329,10 @@ int finish_http_object_request(struct 
http_object_request *freq)
unlink_or_warn(freq->tmpfile);
return -1;
}
-   freq->rename =
-   finalize_object_file(freq->tmpfile, sha1_file_name(freq->sha1));
+
+   sha1_file_name(, freq->sha1);
+   freq->rename = finalize_object_file(freq->tmpfile, filename.buf);
+   strbuf_release();
 
return freq->rename;
 }
diff --git a/sha1_file.c b/sha1_file.c
index 3da70ac650..f66c21b2da 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -321,15 +321,11 @@ static void fill_sha1_path(struct strbuf *buf, const 
unsigned char *sha1)
}
 }
 
-const char *sha1_file_name(const unsigned char *sha1)
+void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
 {
-   static struct strbuf buf = STRBUF_INIT;
-
-   strbuf_reset();
-   strbuf_addf(, "%s/", get_object_directory());
+   strbuf_addf(buf, "%s/", get_object_directory());
 
-   fill_sha1_path(, sha1);
-   return buf.buf;
+   fill_sha1_path(buf, sha1);
 }
 
 struct strbuf *alt_scratch_buf(st

Re: What's cooking in git.git (Jan 2018, #02; Tue, 9)

2018-01-18 Thread Christian Couder

On Wed, Jan 10, 2018 at 11:03 PM, Jeff Hostetler  wrote:
>
>
> On 1/10/2018 2:57 PM, Junio C Hamano wrote:
>>
>> Jeff Hostetler  writes:
>>
>>> On 1/9/2018 6:33 PM, Junio C Hamano wrote:

 --
 [Cooking]

 * jh/fsck-promisors (2017-12-08) 10 commits
>>>
>>> [...]
>>>
 * jh/partial-clone (2017-12-08) 13 commits
>>>
>>> [...]
>>>
>>> Parts 2 and 3 of partial clone have been simmering
>>> for a while now.  I was wondering if there were any
>>> more comments or questions on them.  I don't recall
>>> any existing issues.
>>
>> Me neither.

I am still not very happy with fetch_object() not returning anything.
I wonder what happens when that function is used to fetch from a repo
that cannot provide the requested object.

Also I think the "extensions.partialclone = " config option is
not very future proof. If people start using partial clone, it is
likely that at some point they will need their repo to talk to more
than one remote that is partial clone enabled and I don't see how such
a config option can scale in this case.

>> I do not mind merging them to 'next' during the feature freeze, but
>> we won't be merging them to 'master' soon anyway, and I'd like to
>> see us concentrate more on finding and fixing regressions on the
>> 'master' front for now.
>
> I didn't want to rush this -- just check to see if there was
> any thing that I should focus on while waiting for 2.16 to ship
> and settle down.

Thanks for already having commented on the patch series I sent to
integrate the external odb with the above work,
Christian.

Re: [PATCH v2 01/18] cat-file: split expand_atom into 2 functions

2018-01-18 Thread Christian Couder

On Thu, Jan 18, 2018 at 7:22 AM, Оля Тележная <olyatelezhn...@gmail.com> wrote:
> 2018-01-18 2:04 GMT+03:00 Christian Couder <christian.cou...@gmail.com>:
>> On Wed, Jan 17, 2018 at 10:49 PM, Jeff King <p...@peff.net> wrote:
>>> On Tue, Jan 16, 2018 at 10:22:23AM +0300, Оля Тележная wrote:
>>>
>>>> >> In other words, I think the endgame is that expand_atom() isn't there at
>>>> >> all, and we're calling the equivalent of format_ref_item() for each
>>>> >> object (except that in a unified formatting world, it probably doesn't
>>>> >> have the word "ref" in it, since that's just one of the items a caller
>>>> >> might pass in).
>>>>
>>>> Agree! I want to merge current edits, then create format.h file and
>>>> make some renames, then finish migrating process to new format.h and
>>>> support all new meaningful tags.
>>>
>>> I think we have a little bit of chicken and egg there, though. I'm
>>> having trouble reviewing the current work, because it's hard to evaluate
>>> whether it's doing the right thing without seeing the end state.
>>
>> Yeah, to me it feels like you are at a middle point and there are many
>> ways to go forward.
>
> OK. Maybe I misunderstood you and Jeff in our call, I thought that was
> your idea to make a merge now, sorry. I will continue my work here.

If you think you can now prepare a patch series that looks like it is
going in the right direction, then yes you can do that. But after
looking at the current patch series I agree with Peff that, as it
doesn't look finished, it's difficult to tell if all the steps are
good.

In general it is a good idea to try to merge things as soon possible,
but for that to be possible the state of the code that we want to be
merged should look quite stable, and it doesn't look very stable to
me.

Re: [PATCH v2 01/18] cat-file: split expand_atom into 2 functions

2018-01-17 Thread Christian Couder

On Wed, Jan 17, 2018 at 10:49 PM, Jeff King  wrote:
> On Tue, Jan 16, 2018 at 10:22:23AM +0300, Оля Тележная wrote:
>
>> >> In other words, I think the endgame is that expand_atom() isn't there at
>> >> all, and we're calling the equivalent of format_ref_item() for each
>> >> object (except that in a unified formatting world, it probably doesn't
>> >> have the word "ref" in it, since that's just one of the items a caller
>> >> might pass in).
>>
>> Agree! I want to merge current edits, then create format.h file and
>> make some renames, then finish migrating process to new format.h and
>> support all new meaningful tags.
>
> I think we have a little bit of chicken and egg there, though. I'm
> having trouble reviewing the current work, because it's hard to evaluate
> whether it's doing the right thing without seeing the end state.

Yeah, to me it feels like you are at a middle point and there are many
ways to go forward.

As I wrote in another email though, I think it might be a good time to
consolidate new functionality by adding tests (and perhaps
documentation at the same time) for each new atom that is added to
ref-filter or cat-file. It will help you refactor the code and your
patch series later without breaking the new functionality.

> So what
> I was suggesting in my earlier mails was that we actually _not_ try to
> merge this series, but use its components and ideas to build a new
> series that does things in a bit different order.

Yeah, I think you will have to do that, but the tests that you can add
now for the new features will help you when you will build the new
series.

And hopefully it will not be too much work to create this new series
as you will perhaps be able to just use the interactive rebase to
build it.

I also don't think it's a big problem if the current patch series gets
quite long before you start creating a new series.

Re: [PATCH v2 03/18] ref-filter: make valid_atom as function parameter

2018-01-17 Thread Christian Couder

On Wed, Jan 17, 2018 at 10:43 PM, Jeff King  wrote:
> On Tue, Jan 16, 2018 at 09:55:22AM +0300, Оля Тележная wrote:
>
>> > IOW, the progression I'd expect in a series like this is:
>> >
>> >   1. Teach ref-filter.c to support everything that cat-file can do.
>> >
>> >   2. Convert cat-file to use ref-filter.c.
>>
>> I agree, I even made this and it's working fine:
>> https://github.com/git/git/pull/450/commits/1b74f1047f07434dccb207534d1ad45a143e3f2b

(Nit: it looks like the above link does not work any more, but it
seems that you are talking about the last patch on the catfile
branch.)

>> But I decided not to add that to patch because I expand the
>> functionality of several commands (not only cat-file and
>> for-each-ref), and I need to support all new functionality in a proper
>> way, make these error messages, test everything and - the hardest one
>> - support many new commands for cat-file. As I understand, it is not
>> possible unless we finally move to ref-filter and print results also
>> there. Oh, and I also need to rewrite docs in that case. And I decided
>> to apply this in another patch. But, please, say your opinion, maybe
>> we could do that here in some way.
>
> Yeah, I agree that it will cause changes to other users of ref-filter,
> and you'd need to deal with documentation and tests there. But I think
> we're going to have to do those things eventually (since supporting
> those extra fields everywhere is part of the point of the project). And
> by doing them now, I think it can make the transition for cat-file a lot
> simpler, because we never have to puzzle over this intermediate state
> where only some of the atoms are valid for cat-file.

I agree that you will have to deal with documentation and tests at one
point and that it could be a good idea to do that now.

I wonder if it is possible to add atoms one by one into ref-filter and
to add tests and documentation at the same time, instead of merging
cat-file atoms with ref-filter atoms in one big step.

When all the cat-file atoms have been added to ref-filter's
valid_atom, maybe you could add ref-filter's atoms to cat-file's
valid_cat_file_atom one by one and add tests and documentation at the
same time.

And then when ref-filter's valid_atom and cat-file's
valid_cat_file_atom are identical you can remove cat-file's
valid_cat_file_atom and maybe after that rename "ref-filter" to
"format".

Re: [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs

2018-01-17 Thread Christian Couder

On Wed, Jan 17, 2018 at 9:37 PM, Jeff Hostetler <g...@jeffhostetler.com> wrote:
>
>
> On 1/17/2018 12:54 PM, Christian Couder wrote:
>>
>> As sha1_file_name() could be performance sensitive, let's
>> try to make it faster by seeding the initial buffer size
>> to avoid any need to realloc and by using strbuf_addstr()
>> and strbuf_addc() instead of strbuf_addf().
>>
>> Helped-by: Derrick Stolee <sto...@gmail.com>
>> Helped-by: Jeff Hostetler <g...@jeffhostetler.com>
>> Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
>> ---
>>   sha1_file.c | 8 +++-
>>   1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/sha1_file.c b/sha1_file.c
>> index f66c21b2da..1a94716962 100644
>> --- a/sha1_file.c
>> +++ b/sha1_file.c
>> @@ -323,8 +323,14 @@ static void fill_sha1_path(struct strbuf *buf, const
>> unsigned char *sha1)
>> void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
>>   {
>> -   strbuf_addf(buf, "%s/", get_object_directory());
>> +   const char *obj_dir = get_object_directory();
>> +   size_t extra = strlen(obj_dir) + 1 + GIT_MAX_HEXSZ;
>
> Very minor nit.  Should this be "+3" rather than "+1"?
> One for the slash after obj_dir, one for the slash between
> "xx/y[38]", and one for the trailing NUL.

I think the trailing NUL is handled by strbuf_grow(), but I forgot
about the / between "xx/y[38]", so I think it should be "+2" .

Anyway I agree with Junio that using strbuf_grow() is not really needed.

>>   + if (extra > strbuf_avail(buf))
>> +   strbuf_grow(buf, extra);
>> +
>> +   strbuf_addstr(buf, obj_dir);
>> +   strbuf_addch(buf, '/');
>> fill_sha1_path(buf, sha1);
>>   }

Re: [PATCH] sha1_file: remove static strbuf from sha1_file_name()

2018-01-17 Thread Christian Couder

On Tue, Jan 16, 2018 at 8:00 PM, Jeff Hostetler <g...@jeffhostetler.com> wrote:
>
>
> On 1/16/2018 9:01 AM, Derrick Stolee wrote:
>>
>> On 1/16/2018 2:18 AM, Christian Couder wrote:
>>>
>>> Using a static buffer in sha1_file_name() is error prone
>>> and the performance improvements it gives are not needed
>>> in most of the callers.
>>>
>>> So let's get rid of this static buffer and, if necessary
>>> or helpful, let's use one in the caller.
>>
>>
>> First: this is a good change for preventing bugs in the future. Do not let
>> my next thought deter you from making this change.
>>
>> Second: I wonder if there is any perf hit now that we are allocating
>> buffers much more often.

When I though that the caller might be performance sensitive, I used a
"static struct strbuf" in the caller to avoid any performance
regression.

Yeah, that means that further work is needed if we want to get rid of
all the static buffers, but that is not my goal. I am just concerned
with cleaning up sha1_file_name() before changing it, and avoiding
some possible trouble when using it.

Feel free to improve on that or even take over this series, otherwise
it can be part of the #leftoverbits.

>> Also, how often does get_object_directory() change,
>> so in some cases we could cache the buffer and only append the parts for the
>> loose object (and not reallocate because the filenames will have equal
>> length).

Again feel free to work on this kind of optimizations on top of this series.

>> I'm concerned about the perf implications when inspecting many loose
>> objects (100k+) but these code paths seem to be involved with more
>> substantial work, such as opening and parsing the objects, so keeping a
>> buffer in-memory is probably unnecessary.

Yeah, I also think it is not necessary to optimize too much.

>>> diff --git a/sha1_file.c b/sha1_file.c
>>> index 3da70ac650..f66c21b2da 100644
>>> --- a/sha1_file.c
>>> +++ b/sha1_file.c
>>> @@ -321,15 +321,11 @@ static void fill_sha1_path(struct strbuf *buf,
>>> const unsigned char *sha1)
>>>   }
>>>   }
>>> -const char *sha1_file_name(const unsigned char *sha1)
>>> +void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
>>>   {
>>> -static struct strbuf buf = STRBUF_INIT;
>>> -
>>> -strbuf_reset();
>>> -strbuf_addf(, "%s/", get_object_directory());
>>> +strbuf_addf(buf, "%s/", get_object_directory());
>>> -fill_sha1_path(, sha1);
>>> -return buf.buf;
>>> +fill_sha1_path(buf, sha1);
>>>   }
>>
>>
>> Could you change this to use strbuf_addstr(buf, get_object_directory())
>> followed by a strbuf_addch(buf, '/')? This format string is unnecessary and
>> could become slow if this method is called in a tight loop.
>
> Yes, an _addstr() and _addch() would avoid a sprintf and
> we've seen perf problems with this before.
>
> Could we also add seed the initial buffer size to avoid
> any need to realloc as the buffer is filled in?
>
> Something like:
> size_t len = strlen(get_object_directory()) + GIT_MAX_HEXSZ + 3;
> strbuf_reset();
> if (len > strbuf_avail())
> strbuf_grow(, len);
> strbuf_addstr(, ...);

Ok, I did something like that in another patch on top of the first
patch which is just about using a "struct strbuf *" passed as an
argument instead of a static buffer.

>>>   struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
>>> @@ -710,7 +706,12 @@ int check_and_freshen_file(const char *fn, int
>>> freshen)
>>>   static int check_and_freshen_local(const unsigned char *sha1, int
>>> freshen)
>>>   {
>>> -return check_and_freshen_file(sha1_file_name(sha1), freshen);
>>> +static struct strbuf buf = STRBUF_INIT;
>>> +
>>> +strbuf_reset();
>>> +sha1_file_name(, sha1);
>>> +
>>> +return check_and_freshen_file(buf.buf, freshen);
>>>   }
>
> Does "buf" really need to be static here?  Doesn't this just move the
> problem from sha1_file_name() to here?

Yes, but maybe check_and_freshen_local() is performance sensitive, so
I think it is safer performance wise to still use a static buf.

If there is a consensus that it is ok to not use one here, I am ok to
change that. On the other hand the change could also be part of
another patch on top of this one...

>>>   static int check_and_freshen_nonlocal(const unsigned char *sha1, int
>>> freshen)
>>

[PATCH v2 1/2] sha1_file: remove static strbuf from sha1_file_name()

2018-01-17 Thread Christian Couder

Using a static buffer in sha1_file_name() is error prone
and the performance improvements it gives are not needed
in many of the callers.

So let's get rid of this static buffer and, if necessary
or helpful, let's use one in the caller.

Suggested-by: Jeff Hostetler <g...@jeffhostetler.com>
Helped-by: Kevin Daudt <m...@ikke.info>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h   |  8 +++-
 http-walker.c |  6 --
 http.c| 16 ++--
 sha1_file.c   | 38 +-
 4 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/cache.h b/cache.h
index d8b975a571..6db565408e 100644
--- a/cache.h
+++ b/cache.h
@@ -957,12 +957,10 @@ extern void check_repository_format(void);
 #define TYPE_CHANGED0x0040
 
 /*
- * Return the name of the file in the local object database that would
- * be used to store a loose object with the specified sha1.  The
- * return value is a pointer to a statically allocated buffer that is
- * overwritten each time the function is called.
+ * Put in `buf` the name of the file in the local object database that
+ * would be used to store a loose object with the specified sha1.
  */
-extern const char *sha1_file_name(const unsigned char *sha1);
+extern void sha1_file_name(struct strbuf *buf, const unsigned char *sha1);
 
 /*
  * Return an abbreviated sha1 unique within this repository's object database.
diff --git a/http-walker.c b/http-walker.c
index 1ae8363de2..07c2b1af82 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -544,8 +544,10 @@ static int fetch_object(struct walker *walker, unsigned 
char *sha1)
} else if (hashcmp(obj_req->sha1, req->real_sha1)) {
ret = error("File %s has bad hash", hex);
} else if (req->rename < 0) {
-   ret = error("unable to write sha1 filename %s",
-   sha1_file_name(req->sha1));
+   struct strbuf buf = STRBUF_INIT;
+   sha1_file_name(, req->sha1);
+   ret = error("unable to write sha1 filename %s", buf.buf);
+   strbuf_release();
}
 
release_http_object_request(req);
diff --git a/http.c b/http.c
index 5977712712..5979305bc9 100644
--- a/http.c
+++ b/http.c
@@ -2168,7 +2168,7 @@ struct http_object_request *new_http_object_request(const 
char *base_url,
unsigned char *sha1)
 {
char *hex = sha1_to_hex(sha1);
-   const char *filename;
+   struct strbuf filename = STRBUF_INIT;
char prevfile[PATH_MAX];
int prevlocal;
char prev_buf[PREV_BUF_SIZE];
@@ -2180,14 +2180,15 @@ struct http_object_request 
*new_http_object_request(const char *base_url,
hashcpy(freq->sha1, sha1);
freq->localfile = -1;
 
-   filename = sha1_file_name(sha1);
+   sha1_file_name(, sha1);
snprintf(freq->tmpfile, sizeof(freq->tmpfile),
-"%s.temp", filename);
+"%s.temp", filename.buf);
 
-   snprintf(prevfile, sizeof(prevfile), "%s.prev", filename);
+   snprintf(prevfile, sizeof(prevfile), "%s.prev", filename.buf);
unlink_or_warn(prevfile);
rename(freq->tmpfile, prevfile);
unlink_or_warn(freq->tmpfile);
+   strbuf_release();
 
if (freq->localfile != -1)
error("fd leakage in start: %d", freq->localfile);
@@ -2302,6 +2303,7 @@ void process_http_object_request(struct 
http_object_request *freq)
 int finish_http_object_request(struct http_object_request *freq)
 {
struct stat st;
+   struct strbuf filename = STRBUF_INIT;
 
close(freq->localfile);
freq->localfile = -1;
@@ -2327,8 +2329,10 @@ int finish_http_object_request(struct 
http_object_request *freq)
unlink_or_warn(freq->tmpfile);
return -1;
}
-   freq->rename =
-   finalize_object_file(freq->tmpfile, sha1_file_name(freq->sha1));
+
+   sha1_file_name(, freq->sha1);
+   freq->rename = finalize_object_file(freq->tmpfile, filename.buf);
+   strbuf_release();
 
return freq->rename;
 }
diff --git a/sha1_file.c b/sha1_file.c
index 3da70ac650..f66c21b2da 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -321,15 +321,11 @@ static void fill_sha1_path(struct strbuf *buf, const 
unsigned char *sha1)
}
 }
 
-const char *sha1_file_name(const unsigned char *sha1)
+void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
 {
-   static struct strbuf buf = STRBUF_INIT;
-
-   strbuf_reset();
-   strbuf_addf(, "%s/", get_object_directory());
+   strbuf_addf(buf, "%s/", get_object_directory());
 
-   fill_sha1_path(, sha1);
-   return buf.buf;
+   fill_sha1_path(buf, sha1);
 }
 
 struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
@@ -7

[PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs

2018-01-17 Thread Christian Couder

As sha1_file_name() could be performance sensitive, let's
try to make it faster by seeding the initial buffer size
to avoid any need to realloc and by using strbuf_addstr()
and strbuf_addc() instead of strbuf_addf().

Helped-by: Derrick Stolee <sto...@gmail.com>
Helped-by: Jeff Hostetler <g...@jeffhostetler.com>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 sha1_file.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sha1_file.c b/sha1_file.c
index f66c21b2da..1a94716962 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -323,8 +323,14 @@ static void fill_sha1_path(struct strbuf *buf, const 
unsigned char *sha1)
 
 void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
 {
-   strbuf_addf(buf, "%s/", get_object_directory());
+   const char *obj_dir = get_object_directory();
+   size_t extra = strlen(obj_dir) + 1 + GIT_MAX_HEXSZ;
 
+   if (extra > strbuf_avail(buf))
+   strbuf_grow(buf, extra);
+
+   strbuf_addstr(buf, obj_dir);
+   strbuf_addch(buf, '/');
fill_sha1_path(buf, sha1);
 }
 
-- 
2.16.0.rc2.3.g254af8b974

[ANNOUNCE] Git Rev News edition 35

2018-01-17 Thread Christian Couder

Hi everyone,

The 35th edition of Git Rev News is now published:

  https://git.github.io/rev_news/2018/01/17/edition-35/

Thanks a lot to all the contributors!

Enjoy,
Christian, Jakub, Markus and Gabriel.

Re: GSoC 2018 Org applications. Deadline = January 23, 2018 at 18:00 (CET)

2018-01-17 Thread Christian Couder

On Tue, Jan 16, 2018 at 9:08 PM, Stefan Beller  wrote:
>
> I'll be fine as a co-mentor this year.

Great!

It looks like you also accepted the invite to be an admin, so we are 3
admins now (Matthieu, you and me), maybe 4 if Dscho joins, so our
application is complete.

We can still improve the Idea and Microproject pages though:

https://git.github.io/SoC-2018-Ideas/
https://git.github.io/SoC-2018-Microprojects/

Re: [PATCH 14/40] sha1_file: prepare for external odbs

2018-01-15 Thread Christian Couder

On Thu, Jan 4, 2018 at 7:00 PM, Jeff Hostetler <g...@jeffhostetler.com> wrote:
>
>
> On 1/3/2018 11:33 AM, Christian Couder wrote:

>> diff --git a/sha1_file.c b/sha1_file.c
>> index 261baf800f..785e8dda03 100644
>> --- a/sha1_file.c
>> +++ b/sha1_file.c
>> @@ -322,17 +322,22 @@ static void fill_sha1_path(struct strbuf *buf, const
>> unsigned char *sha1)
>> }
>>   }
>>   -const char *sha1_file_name(const unsigned char *sha1)
>> +const char *sha1_file_name_alt(const char *objdir, const unsigned char
>> *sha1)
>>   {
>> static struct strbuf buf = STRBUF_INIT;
>
> While we are refactoring sha1_file_name() and adding
> sha1_file_name_alt(), could we also change the API and
> pass in the strbuf so we can get rid of the static buffer?
> Granted, it is a little off topic, but it will help out
> in the long run.

Ok, but I prefer to do that in a separate patch series, so I just sent:

https://public-inbox.org/git/20180116071814.19884-1-chrisc...@tuxfamily.org

>> @@ -1551,7 +1562,7 @@ static inline int directory_size(const char
>> *filename)
>>* We want to avoid cross-directory filename renames, because those
>>* can have problems on various filesystems (FAT, NFS, Coda).
>>*/
>> -static int create_tmpfile(struct strbuf *tmp, const char *filename)
>> +int create_object_tmpfile(struct strbuf *tmp, const char *filename)
>>   {
>> int fd, dirlen = directory_size(filename);
>>   @@ -1591,7 +1602,7 @@ static int write_loose_object(const unsigned char
>> *sha1, char *hdr, int hdrlen,
>> static struct strbuf tmp_file = STRBUF_INIT;
>
> Same thing here, since we are renaming the function anyway, could we
> add a strbuf arg and get rid of the static one?

I will see how it goes with the above patch to remove the static
buffer in sha1_file_name() before preparing a patch to remove the
static buffer in create_tmpfile().

Thanks,
Christian.

[PATCH] sha1_file: remove static strbuf from sha1_file_name()

2018-01-15 Thread Christian Couder

Using a static buffer in sha1_file_name() is error prone
and the performance improvements it gives are not needed
in most of the callers.

So let's get rid of this static buffer and, if necessary
or helpful, let's use one in the caller.
---
 cache.h   |  8 +++-
 http-walker.c |  6 --
 http.c| 16 ++--
 sha1_file.c   | 38 +-
 4 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/cache.h b/cache.h
index d8b975a571..6db565408e 100644
--- a/cache.h
+++ b/cache.h
@@ -957,12 +957,10 @@ extern void check_repository_format(void);
 #define TYPE_CHANGED0x0040
 
 /*
- * Return the name of the file in the local object database that would
- * be used to store a loose object with the specified sha1.  The
- * return value is a pointer to a statically allocated buffer that is
- * overwritten each time the function is called.
+ * Put in `buf` the name of the file in the local object database that
+ * would be used to store a loose object with the specified sha1.
  */
-extern const char *sha1_file_name(const unsigned char *sha1);
+extern void sha1_file_name(struct strbuf *buf, const unsigned char *sha1);
 
 /*
  * Return an abbreviated sha1 unique within this repository's object database.
diff --git a/http-walker.c b/http-walker.c
index 1ae8363de2..07c2b1af82 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -544,8 +544,10 @@ static int fetch_object(struct walker *walker, unsigned 
char *sha1)
} else if (hashcmp(obj_req->sha1, req->real_sha1)) {
ret = error("File %s has bad hash", hex);
} else if (req->rename < 0) {
-   ret = error("unable to write sha1 filename %s",
-   sha1_file_name(req->sha1));
+   struct strbuf buf = STRBUF_INIT;
+   sha1_file_name(, req->sha1);
+   ret = error("unable to write sha1 filename %s", buf.buf);
+   strbuf_release();
}
 
release_http_object_request(req);
diff --git a/http.c b/http.c
index 5977712712..5979305bc9 100644
--- a/http.c
+++ b/http.c
@@ -2168,7 +2168,7 @@ struct http_object_request *new_http_object_request(const 
char *base_url,
unsigned char *sha1)
 {
char *hex = sha1_to_hex(sha1);
-   const char *filename;
+   struct strbuf filename = STRBUF_INIT;
char prevfile[PATH_MAX];
int prevlocal;
char prev_buf[PREV_BUF_SIZE];
@@ -2180,14 +2180,15 @@ struct http_object_request 
*new_http_object_request(const char *base_url,
hashcpy(freq->sha1, sha1);
freq->localfile = -1;
 
-   filename = sha1_file_name(sha1);
+   sha1_file_name(, sha1);
snprintf(freq->tmpfile, sizeof(freq->tmpfile),
-"%s.temp", filename);
+"%s.temp", filename.buf);
 
-   snprintf(prevfile, sizeof(prevfile), "%s.prev", filename);
+   snprintf(prevfile, sizeof(prevfile), "%s.prev", filename.buf);
unlink_or_warn(prevfile);
rename(freq->tmpfile, prevfile);
unlink_or_warn(freq->tmpfile);
+   strbuf_release();
 
if (freq->localfile != -1)
error("fd leakage in start: %d", freq->localfile);
@@ -2302,6 +2303,7 @@ void process_http_object_request(struct 
http_object_request *freq)
 int finish_http_object_request(struct http_object_request *freq)
 {
struct stat st;
+   struct strbuf filename = STRBUF_INIT;
 
close(freq->localfile);
freq->localfile = -1;
@@ -2327,8 +2329,10 @@ int finish_http_object_request(struct 
http_object_request *freq)
unlink_or_warn(freq->tmpfile);
return -1;
}
-   freq->rename =
-   finalize_object_file(freq->tmpfile, sha1_file_name(freq->sha1));
+
+   sha1_file_name(, freq->sha1);
+   freq->rename = finalize_object_file(freq->tmpfile, filename.buf);
+   strbuf_release();
 
return freq->rename;
 }
diff --git a/sha1_file.c b/sha1_file.c
index 3da70ac650..f66c21b2da 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -321,15 +321,11 @@ static void fill_sha1_path(struct strbuf *buf, const 
unsigned char *sha1)
}
 }
 
-const char *sha1_file_name(const unsigned char *sha1)
+void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
 {
-   static struct strbuf buf = STRBUF_INIT;
-
-   strbuf_reset();
-   strbuf_addf(, "%s/", get_object_directory());
+   strbuf_addf(buf, "%s/", get_object_directory());
 
-   fill_sha1_path(, sha1);
-   return buf.buf;
+   fill_sha1_path(buf, sha1);
 }
 
 struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
@@ -710,7 +706,12 @@ int check_and_freshen_file(const char *fn, int freshen)
 
 static int check_and_freshen_local(const unsigned char *sha1, int freshen)
 {
-   return check_and_freshen_file(sha1_file_name(sha1), freshen);
+   static struct strbuf buf = STRBUF_INIT;
+
+   strbuf_reset();
+   sha1_file_name(, sha1);
+
+

Re: [PATCH 10/40] external-odb: implement external_odb_get_direct

2018-01-15 Thread Christian Couder

On Thu, Jan 4, 2018 at 6:44 PM, Jeff Hostetler <g...@jeffhostetler.com> wrote:
>
>
> On 1/3/2018 11:33 AM, Christian Couder wrote:
>>
>> This is implemented only in the promisor remote mode
>> for now by calling fetch_object().
>>
>> Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
>> ---
>>   external-odb.c | 15 +++
>>   external-odb.h |  1 +
>>   odb-helper.c   | 13 +
>>   odb-helper.h   |  3 ++-
>>   4 files changed, 31 insertions(+), 1 deletion(-)
>>
>> diff --git a/external-odb.c b/external-odb.c
>> index d26e63d8b1..5d0afb9762 100644
>> --- a/external-odb.c
>> +++ b/external-odb.c
>> @@ -76,3 +76,18 @@ int external_odb_has_object(const unsigned char *sha1)
>> return 1;
>> return 0;
>>   }
>> +
>> +int external_odb_get_direct(const unsigned char *sha1)
>> +{
>> +   struct odb_helper *o;
>> +
>> +   external_odb_init();
>> +
>> +   for (o = helpers; o; o = o->next) {
>> +   if (odb_helper_get_direct(o, sha1) < 0)
>> +   continue;
>> +   return 0;
>
>> + }
>
> Would this be simpler said as:
> for (o = ...)
> if (!odb_helper_get_direct(...))
> return 0;

At then end of the series the content of the loop is:

if (!(o->supported_capabilities & ODB_HELPER_CAP_GET_DIRECT))
continue;
if (odb_helper_get_direct(o, sha1) < 0)
continue;
return 0;

And I think it is fine like that, so I don't think changing this
commit is a good idea.

>> +   return -1;
>> +}
>> diff --git a/external-odb.h b/external-odb.h
>> index 9a3c2f01b3..fd6708163e 100644
>> --- a/external-odb.h
>> +++ b/external-odb.h
>> @@ -4,5 +4,6 @@
>>   extern int has_external_odb(void);
>>   extern const char *external_odb_root(void);
>>   extern int external_odb_has_object(const unsigned char *sha1);
>> +extern int external_odb_get_direct(const unsigned char *sha1);
>> #endif /* EXTERNAL_ODB_H */
>> diff --git a/odb-helper.c b/odb-helper.c
>> index 1404393807..4b70b287af 100644
>> --- a/odb-helper.c
>> +++ b/odb-helper.c
>> @@ -4,6 +4,7 @@
>>   #include "odb-helper.h"
>>   #include "run-command.h"
>>   #include "sha1-lookup.h"
>> +#include "fetch-object.h"
>> struct odb_helper *odb_helper_new(const char *name, int namelen)
>>   {
>> @@ -52,3 +53,15 @@ int odb_helper_has_object(struct odb_helper *o, const
>> unsigned char *sha1)
>> return !!odb_helper_lookup(o, sha1);
>>   }
>>   +int odb_helper_get_direct(struct odb_helper *o,
>> + const unsigned char *sha1)
>> +{
>> +   int res = 0;
>> +   uint64_t start = getnanotime();
>> +
>> +   fetch_object(o->dealer, sha1);
>> +
>> +   trace_performance_since(start, "odb_helper_get_direct");
>> +
>> +   return res;
>
>
> 'res' will always be 0, so the external_odb_get_direct() will
> only do the first helper.  i haven't looked at the rest of the
> series yet, so maybe you've already addressed this.

That's why I previously suggested in one of your or Jonathan's patch
that fetch_object() should return an int that tells the caller if the
object has been fetched instead of void.

If we make it possible at one point to have the objects fetched
fetch_object() in different remotes (and I think that's a
straightforward goal), this will actually fail but callers will have
no simple way to know that.

> Also, I put a TODO comment in the fetch_object() header to
> consider returning an error/success, so maybe that could help
> here too.

Yeah, indeed.

Re: [PATCH 01/40] Add initial external odb support

2018-01-15 Thread Christian Couder

On Thu, Jan 4, 2018 at 8:59 PM, Jeff Hostetler  wrote:
>
>> diff --git a/odb-helper.h b/odb-helper.h
>> new file mode 100644
>> index 00..9395e606ce
>> --- /dev/null
>> +++ b/odb-helper.h
>> @@ -0,0 +1,24 @@
>> +#ifndef ODB_HELPER_H
>> +#define ODB_HELPER_H
>> +
>> +struct odb_helper {
>> +   const char *name;
>> +   const char *dealer;
>> +
>> +   struct odb_helper_object {
>> +   unsigned char sha1[20];
>
>
> Should this be "struct object_id" ?

Yeah, I changed this in my current version.

I cannot change all the functions to take a struct object_id though as
some are called inside sha1_file.c where there are only sha1.

Draft of Git Rev News edition 35

2018-01-14 Thread Christian Couder

Hi,

A draft of a new Git Rev News edition is available here:

  https://github.com/git/git.github.io/blob/master/rev_news/drafts/edition-35.md

Everyone is welcome to contribute in any section either by editing the
above page on GitHub and sending a pull request, or by commenting on
this GitHub issue:

  https://github.com/git/git.github.io/issues/271

You can also reply to this email.

In general all kinds of contribution, for example proofreading,
suggestions for articles or links, help on the issues in GitHub, and
so on, are very much appreciated.

I tried to cc everyone who appears in this edition, but maybe I missed
some people, sorry about that.

Jakub, Markus, Gabriel and me plan to publish this edition on
Wednesday January 17th.

Thanks,
Christian.

Re: GSoC 2018 Org applications. Deadline = January 23, 2018 at 18:00 (CET)

2018-01-13 Thread Christian Couder

On Mon, Jan 8, 2018 at 12:03 AM, Christian Couder
<christian.cou...@gmail.com> wrote:
> On Fri, Jan 5, 2018 at 12:18 PM, Johannes Schindelin
> <johannes.schinde...@gmx.de> wrote:
>> On Fri, 5 Jan 2018, Matthieu Moy wrote:
>>
>>> If we go for it, we need:
>>>
>>> * Admins

>From the application site I filled in the application forms and
registered as an admin for this year and sent invites to Stefan,
Matthieu and Dscho. If one of you guys accept the invite, we will have
completed our application.

>>> * Potential mentors
>>
>> Count me in as a potential mentor.
>
> I am ok to be admin and mentor.

>>> * List of microproject and project ideas
>>>   (https://git.github.io/SoC-2017-Ideas/ and
>>>   https://git.github.io/SoC-2017-Microprojects/ are good starting
>>>   points)

I copied the above pages and the application page to new 2018 pages so we have:

https://git.github.io/SoC-2018-Ideas/
https://git.github.io/SoC-2018-Microprojects/
https://git.github.io/SoC-2018-Org-Application/

I updated a bit the above pages but there are probably more
improvements to be made. Especially I kept only the projects that both
Dscho and me accepted to mentor last year as it looks like we are the
only possible mentors this year.

>> I have spent a bit of time recently to document a couple of Git for
>> Windows-specific projects thoroughly, I could easily copy/paste them
>> there, too. (Yaaay, Markdown! Now, if only we could use it in Git's man
>> pages, too...)

Dscho, please copy paste what you have in the above pages.

Thanks,
Christian.

Re: [BUG] v2.16.0-rc0 seg faults when git bisect skip

2018-01-08 Thread Christian Couder

Hi,

On Mon, Jan 8, 2018 at 3:45 PM, Yasushi SHOJI  wrote:
> Hi all,
>
> Thank you guys for insightful help.  I just read the code and now I understand
> what you guys are saying.  Yeah, I can say the fix is "spot on".
>
> But, to be honest, it's hard to see why you need "if (p)" at  the first 
> glance.
> So, how about this fix, instead?

+for (p = list, i = 0; i < cnt; i++, p = p->next) {

Here "i" can reach "cnt - 1" at most, so ...

+if (i == cnt) {
+/* clean-up unused elements if any */
+free_commit_list(p->next);
+p->next = NULL;
+}

... "i == cnt" is always false above. I think it should be "i == cnt - 1".

And with your code one can wonder why the cleanup is part of the loop.

> I also found a bug in show_list().  I'm attaching a patch as well.

Could you send it inline as explained in Documentation/SubmittingPatches?

Thanks,
Christian.

Re: GSoC 2018 Org applications. Deadline = January 23, 2018 at 18:00 (CET)

2018-01-07 Thread Christian Couder

Hi,

On Fri, Jan 5, 2018 at 12:18 PM, Johannes Schindelin
 wrote:
> Hi,
>
> On Fri, 5 Jan 2018, Matthieu Moy wrote:
>
>> If we go for it, we need:
>>
>> * Admins
>>
>> * Potential mentors
>
> Count me in as a potential mentor.

I am ok to be admin and mentor.

>> * List of microproject and project ideas
>>   (https://git.github.io/SoC-2017-Ideas/ and
>>   https://git.github.io/SoC-2017-Microprojects/ are good starting
>>   points)
>
> I have spent a bit of time recently to document a couple of Git for
> Windows-specific projects thoroughly, I could easily copy/paste them
> there, too. (Yaaay, Markdown! Now, if only we could use it in Git's man
> pages, too...)

Nice!

I will fill in the application.

Thanks,
Christian.

Re: [BUG] v2.16.0-rc0 seg faults when git bisect skip

2018-01-06 Thread Christian Couder

Hi Yasushi,

On Sat, Jan 6, 2018 at 3:27 PM, Yasushi SHOJI  wrote:

> best_bisection_sorted() seems to do
>
>  - get the commit list along with the number of elements in the list
>  - walk the list one by one to check whether a element have TREESAME or not
>  - if TREESAME, skip
>  - if not, add it to array
>  - sort the array by distance
>  - put elements back to the list

Yes.

> so, if you find TREESAME, you get less elements than given, right?

Yes.

> Also, if you sort, the last commit, which has NULL in the ->next,
> might get in the middle of the array??

Well, first the array is just necessary to sort the items pointed to
by the list. It is freed at the end of the function. We are mostly
interested in getting a sorted list from this function, not a sorted
array.

Also the array contains only items (commits) and distances, not list
elements, so the elements in the array don't have a ->next pointer.

About items in the list, when we put them back into the list we only
change p->item, so p->next still points to the next one. Only for the
last one we set p->next to NULL (if p is not NULL). So we don't
actually sort the list elements, we sort the items pointed to by the
list.

> # BTW, is it really fast to use QSORT even if you have to convert to
> # an array from list?

It is easier and probably faster to just use qsort, which we get from
#include , as it is hopefully optimized, rather than
reimplementing our own list sorting.

 Since nobody noticed it since 7c117184d7, it must be a rare case, right?
>>
>> Right, you marked a commit both good and bad. That's probably not very
>> common. But it obviously happens. :-)

Yeah, mistakes unfortunately happens :-)

>> Thank you for providing a script for reproducing this. It helped me come
>> up with the attached patch. The patch is based on ma/bisect-leakfix,
>> which includes Ævar's patch.
>>
>> I think this patch could be useful, either as a final "let's test
>> something previously non-tested; this would have caught the segfault",
>> or simply squashed into Ævar's patch as a "let's add a test that would
>> have caught this, and which also tests a previously non-tested code
>> path."
>
> Do we really need that?  What is a practical use of a commit having
> both good and bad?

It is not practical, but it is a user mistake that happens.

Thanks for your reports and test cases,
Christian.

[PATCH v3 3/7] perf/aggregate: implement codespeed JSON output

2018-01-05 Thread Christian Couder

Codespeed (https://github.com/tobami/codespeed/) is an open source
project that can be used to track how some software performs over
time. It stores performance test results in a database and can show
nice graphs and charts on a web interface.

As it can be interesting to use Codespeed to see how Git performance
evolves over time and releases, let's implement a Codespeed output
in "perf/aggregate.perl".

Helped-by: Eric Sunshine <sunsh...@sunshineco.com>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 64 +--
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index 3609cb5dc3..5c439f6bc2 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -3,6 +3,7 @@
 use lib '../../perl/blib/lib';
 use strict;
 use warnings;
+use JSON;
 use Git;
 
 sub get_times {
@@ -35,10 +36,15 @@ sub format_times {
return $out;
 }
 
-my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests);
+my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests, $codespeed);
 while (scalar @ARGV) {
my $arg = $ARGV[0];
my $dir;
+   if ($arg eq "--codespeed") {
+   $codespeed = 1;
+   shift @ARGV;
+   next;
+   }
last if -f $arg or $arg eq "--";
if (! -d $arg) {
my $rev = Git::command_oneline(qw(rev-parse --verify), $arg);
@@ -70,8 +76,10 @@ if (not @tests) {
 }
 
 my $resultsdir = "test-results";
+my $results_section = "";
 if (exists $ENV{GIT_PERF_SUBSECTION} and $ENV{GIT_PERF_SUBSECTION} ne "") {
$resultsdir .= "/" . $ENV{GIT_PERF_SUBSECTION};
+   $results_section = $ENV{GIT_PERF_SUBSECTION};
 }
 
 my @subtests;
@@ -174,6 +182,58 @@ sub print_default_results {
}
 }
 
+sub print_codespeed_results {
+   my ($results_section) = @_;
+
+   my $project = "Git";
+
+   my $executable = `uname -s -m`;
+   chomp $executable;
+
+   if ($results_section ne "") {
+   $executable .= ", " . $results_section;
+   }
+
+   my $environment;
+   if (exists $ENV{GIT_PERF_REPO_NAME} and $ENV{GIT_PERF_REPO_NAME} ne "") 
{
+   $environment = $ENV{GIT_PERF_REPO_NAME};
+   } elsif (exists $ENV{GIT_TEST_INSTALLED} and $ENV{GIT_TEST_INSTALLED} 
ne "") {
+   $environment = $ENV{GIT_TEST_INSTALLED};
+   $environment =~ s|/bin-wrappers$||;
+   } else {
+   $environment = `uname -r`;
+   chomp $environment;
+   }
+
+   my @data;
+
+   for my $t (@subtests) {
+   for my $d (@dirs) {
+   my $commitid = $prefixes{$d};
+   $commitid =~ s/^build_//;
+   $commitid =~ s/\.$//;
+   my ($result_value, $u, $s) = 
get_times("$resultsdir/$prefixes{$d}$t.times");
+
+   my %vals = (
+   "commitid" => $commitid,
+   "project" => $project,
+   "branch" => $dirnames{$d},
+   "executable" => $executable,
+   "benchmark" => $shorttests{$t} . " " . 
read_descr("$resultsdir/$t.descr"),
+   "environment" => $environment,
+   "result_value" => $result_value,
+   );
+   push @data, \%vals;
+   }
+   }
+
+   print to_json(\@data, {utf8 => 1, pretty => 1}), "\n";
+}
+
 binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
 
-print_default_results();
+if ($codespeed) {
+   print_codespeed_results($results_section);
+} else {
+   print_default_results();
+}
-- 
2.16.0.rc0.40.gbe5e688583

[PATCH v3 2/7] perf/aggregate: refactor printing results

2018-01-05 Thread Christian Couder

As we want to implement another kind of output than
the current output for the perf test results, let's
refactor the existing code that outputs the results
in its own print_default_results() function.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 96 +++
 1 file changed, 50 insertions(+), 46 deletions(-)

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index 769d418708..3609cb5dc3 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -100,13 +100,6 @@ sub read_descr {
return $line;
 }
 
-my %descrs;
-my $descrlen = 4; # "Test"
-for my $t (@subtests) {
-   $descrs{$t} = $shorttests{$t}.": ".read_descr("$resultsdir/$t.descr");
-   $descrlen = length $descrs{$t} if length $descrs{$t}>$descrlen;
-}
-
 sub have_duplicate {
my %seen;
for (@_) {
@@ -122,54 +115,65 @@ sub have_slash {
return 0;
 }
 
-my %newdirabbrevs = %dirabbrevs;
-while (!have_duplicate(values %newdirabbrevs)) {
-   %dirabbrevs = %newdirabbrevs;
-   last if !have_slash(values %dirabbrevs);
-   %newdirabbrevs = %dirabbrevs;
-   for (values %newdirabbrevs) {
-   s{^[^/]*/}{};
+sub print_default_results {
+   my %descrs;
+   my $descrlen = 4; # "Test"
+   for my $t (@subtests) {
+   $descrs{$t} = $shorttests{$t}.": 
".read_descr("$resultsdir/$t.descr");
+   $descrlen = length $descrs{$t} if length $descrs{$t}>$descrlen;
}
-}
 
-my %times;
-my @colwidth = ((0)x@dirs);
-for my $i (0..$#dirs) {
-   my $d = $dirs[$i];
-   my $w = length (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : 
$dirnames{$d});
-   $colwidth[$i] = $w if $w > $colwidth[$i];
-}
-for my $t (@subtests) {
-   my $firstr;
+   my %newdirabbrevs = %dirabbrevs;
+   while (!have_duplicate(values %newdirabbrevs)) {
+   %dirabbrevs = %newdirabbrevs;
+   last if !have_slash(values %dirabbrevs);
+   %newdirabbrevs = %dirabbrevs;
+   for (values %newdirabbrevs) {
+   s{^[^/]*/}{};
+   }
+   }
+
+   my %times;
+   my @colwidth = ((0)x@dirs);
for my $i (0..$#dirs) {
my $d = $dirs[$i];
-   $times{$prefixes{$d}.$t} = 
[get_times("$resultsdir/$prefixes{$d}$t.times")];
-   my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
-   my $w = length format_times($r,$u,$s,$firstr);
+   my $w = length (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : 
$dirnames{$d});
$colwidth[$i] = $w if $w > $colwidth[$i];
-   $firstr = $r unless defined $firstr;
}
-}
-my $totalwidth = 3*@dirs+$descrlen;
-$totalwidth += $_ for (@colwidth);
-
-binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
+   for my $t (@subtests) {
+   my $firstr;
+   for my $i (0..$#dirs) {
+   my $d = $dirs[$i];
+   $times{$prefixes{$d}.$t} = 
[get_times("$resultsdir/$prefixes{$d}$t.times")];
+   my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
+   my $w = length format_times($r,$u,$s,$firstr);
+   $colwidth[$i] = $w if $w > $colwidth[$i];
+   $firstr = $r unless defined $firstr;
+   }
+   }
+   my $totalwidth = 3*@dirs+$descrlen;
+   $totalwidth += $_ for (@colwidth);
 
-printf "%-${descrlen}s", "Test";
-for my $i (0..$#dirs) {
-   my $d = $dirs[$i];
-   printf "   %-$colwidth[$i]s", (exists $dirabbrevs{$d} ? $dirabbrevs{$d} 
: $dirnames{$d});
-}
-print "\n";
-print "-"x$totalwidth, "\n";
-for my $t (@subtests) {
-   printf "%-${descrlen}s", $descrs{$t};
-   my $firstr;
+   printf "%-${descrlen}s", "Test";
for my $i (0..$#dirs) {
my $d = $dirs[$i];
-   my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
-   printf "   %-$colwidth[$i]s", format_times($r,$u,$s,$firstr);
-   $firstr = $r unless defined $firstr;
+   printf "   %-$colwidth[$i]s", (exists $dirabbrevs{$d} ? 
$dirabbrevs{$d} : $dirnames{$d});
}
print "\n";
+   print "-"x$totalwidth, "\n";
+   for my $t (@subtests) {
+   printf "%-${descrlen}s", $descrs{$t};
+   my $firstr;
+   for my $i (0..$#dirs) {
+   my $d = $dirs[$i];
+   my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
+   printf "   %-$colwidth[$i]s", 
format_times($r,$u,$s,$firstr);
+   $firstr = $r unless defined $firstr;
+   }
+   print "\n";
+   }
 }
+
+binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
+
+print_default_results();
-- 
2.16.0.rc0.40.gbe5e688583

[PATCH v3 5/7] perf/run: learn about perf.codespeedOutput

2018-01-05 Thread Christian Couder

Let's make it possible to set in a config file the output
format (regular or codespeed) of the perf tests.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/run | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/t/perf/run b/t/perf/run
index 214d658172..4e62d6bb3f 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -144,10 +144,15 @@ run_subsection () {
set -- . "$@"
fi
 
+   codespeed_opt=
+   test "$GIT_PERF_CODESPEED_OUTPUT" = "true" && 
codespeed_opt="--codespeed"
+
run_dirs "$@"
-   ./aggregate.perl "$@"
+   ./aggregate.perl $codespeed_opt "$@"
 }
 
+get_var_from_env_or_config "GIT_PERF_CODESPEED_OUTPUT" "perf" 
"codespeedOutput" "--bool"
+
 cd "$(dirname $0)"
 . ../../GIT-BUILD-OPTIONS
 
-- 
2.16.0.rc0.40.gbe5e688583

[PATCH v3 0/7] Codespeed perf results

2018-01-05 Thread Christian Couder

This patch series is built on top of cc/perf-run-config which recently
graduated to master.

It makes it possible to send perf results to a Codespeed server. See
https://github.com/tobami/codespeed/ and web sites like
http://speed.pypy.org/ which are using Codespeed.

The end goal would be to have such a server always available to track
how the different git commands perform over time on different kind of
repos (small, medium, large, ...) with different optimizations on and
off (split-index, libpcre2, BLK_SHA1, ...)

With this series and a config file like:

$ cat perf.conf
[perf]
dirsOrRevs = v2.12.0 v2.13.0
repeatCount = 10
sendToCodespeed = http://localhost:8000
repoName = Git repo
[perf "with libpcre"]
makeOpts = "DEVELOPER=1 USE_LIBPCRE=YesPlease"
[perf "without libpcre"]
makeOpts = "DEVELOPER=1"

One should be able to just launch:

$ ./run --config perf.conf p7810-grep.sh

and then get nice graphs in a Codespeed instance running on
http://localhost:8000.

Caveat
~~

For now one has to create the "Git repo" environment (in fact all the
values of the "environment" field sent to Codespeed) in the Codespeed
admin interface. (We send the perf.repoName config variable in the
"environment" Codespeed field.) This is because Codespeed requires the
environment fields to be created and does not provide a simple way to
create these fields programmatically.

There are discussions on a Codespeed issue
(https://github.com/tobami/codespeed/issues/232) about creating a
proper API for Codespeed that could address this problem in the
future.

Changes since previous version
~~

There are very few changes compared to v2:

  - In patch 1/7 commit message has been improved following a comment
by Junio.

  - In patch 3/7 some debugging comments were removed and 'use JSON;'
was moved to the top of aggregate.perl as suggested by Junio.

The diff is the following:

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index 34d74fc015..5c439f6bc2 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -3,6 +3,7 @@
 use lib '../../perl/blib/lib';
 use strict;
 use warnings;
+use JSON;
 use Git;
 
 sub get_times {
@@ -226,10 +227,6 @@ sub print_codespeed_results {
}
}
 
-   #use Data::Dumper qw/ Dumper /;
-   #print Dumper(\@data);
-
-   use JSON;
print to_json(\@data, {utf8 => 1, pretty => 1}), "\n";
 }

Links
~

This patch series:

https://github.com/chriscool/git/commits/codespeed

Previous versions:

v1: https://github.com/chriscool/git/commits/codespeed1
v2: https://github.com/chriscool/git/commits/codespeed10

Discussions:

v1: 
https://public-inbox.org/git/cap8ufd3q4h-aybdabspow948lqyvydwz1hlpad+kr9zpxvz...@mail.gmail.com/
v2: https://public-inbox.org/git/20171226215908.425-1-chrisc...@tuxfamily.org/

Discussions about the cc/perf-run-config patch series:

v1: https://public-inbox.org/git/20170713065050.19215-1-chrisc...@tuxfamily.org/
v2: 
https://public-inbox.org/git/cap8ufd2j-ufh+9awz91gtz-jusq7euoexmguro59vpf29jx...@mail.gmail.com/


Christian Couder (7):
  perf/aggregate: fix checking ENV{GIT_PERF_SUBSECTION}
  perf/aggregate: refactor printing results
  perf/aggregate: implement codespeed JSON output
  perf/run: add conf_opts argument to get_var_from_env_or_config()
  perf/run: learn about perf.codespeedOutput
  perf/run: learn to send output to codespeed server
  perf/run: read GIT_PERF_REPO_NAME from perf.repoName

 t/perf/aggregate.perl | 160 +++---
 t/perf/run|  31 --
 2 files changed, 137 insertions(+), 54 deletions(-)

-- 
2.16.0.rc0.40.gbe5e688583

[PATCH v3 1/7] perf/aggregate: fix checking ENV{GIT_PERF_SUBSECTION}

2018-01-05 Thread Christian Couder

The way we check ENV{GIT_PERF_SUBSECTION} could trigger
comparison between undef and "" that may be flagged by
use of strict & warnings. Let's fix that.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/aggregate.perl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index e401208488..769d418708 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -70,7 +70,7 @@ if (not @tests) {
 }
 
 my $resultsdir = "test-results";
-if ($ENV{GIT_PERF_SUBSECTION} ne "") {
+if (exists $ENV{GIT_PERF_SUBSECTION} and $ENV{GIT_PERF_SUBSECTION} ne "") {
$resultsdir .= "/" . $ENV{GIT_PERF_SUBSECTION};
 }
 
-- 
2.16.0.rc0.40.gbe5e688583

[PATCH v3 7/7] perf/run: read GIT_PERF_REPO_NAME from perf.repoName

2018-01-05 Thread Christian Couder

The GIT_PERF_REPO_NAME env variable is used in
the `aggregate.perl` script to set the 'environment'
field in the JSON Codespeed output.

Let's make it easy to set this variable by setting it
in a config file.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/perf/run | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/t/perf/run b/t/perf/run
index ef56396546..1a100d6134 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -137,6 +137,9 @@ run_subsection () {
get_var_from_env_or_config "GIT_PERF_MAKE_COMMAND" "perf" "makeCommand"
get_var_from_env_or_config "GIT_PERF_MAKE_OPTS" "perf" "makeOpts"
 
+   get_var_from_env_or_config "GIT_PERF_REPO_NAME" "perf" "repoName"
+   export GIT_PERF_REPO_NAME
+
GIT_PERF_AGGREGATING_LATER=t
export GIT_PERF_AGGREGATING_LATER
 
-- 
2.16.0.rc0.40.gbe5e688583

Re: Rewrite cat-file.c : need help to find a bug

2018-01-04 Thread Christian Couder

On Thu, Jan 4, 2018 at 11:23 PM, Оля Тележная  wrote:
>
> So for now 2 of my last commits fail, and I am tired of searching for the 
> error.
> I was also trying to leave cat_file_info variable and fill in both new
> and old variables and then compare resulting values by printing them
> into file. Everything is OK, but I find it dudpicious that the
> resulting file is too small (fprintf was invoked only 3 times, it was
> here: 
> https://github.com/telezhnaya/git/commit/54a5b5a0167ad634c26e4fd7df234a46286ede0a#diff-2846189963e8aec1bcb559b69b7f20d0R1489)
>
> I have left few comments in github to simplify your understanding what
> I was trying to achieve. Feel free to ask any questions if you find
> the code strange, unclear or suspicious.

Let me try to see how I can debug it.

Running `./t1006-cat-file.sh -v -i` gives:

---
expecting success:
maybe_remove_timestamp "$batch_output" $no_ts >expect &&
maybe_remove_timestamp "$(echo $sha1 | git cat-file --batch)"
$no_ts >actual &&
test_cmp expect actual

Segmentation fault (core dumped)
--- expect  2018-01-04 23:31:20.515114634 +
+++ actual  2018-01-04 23:31:20.635114274 +
@@ -1,2 +0,0 @@
-5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689 blob 11
-Hello World
\ No newline at end of file
not ok 9 - --batch output of blob is correct
#
#   maybe_remove_timestamp "$batch_output" $no_ts >expect &&
#   maybe_remove_timestamp "$(echo $sha1 | git cat-file
--batch)" $no_ts >actual &&
#   test_cmp expect actual
#
---

So there is a segfault probably when running $(echo $sha1 | git
cat-file --batch). Let's try to run that manually.

$ cd trash\ directory.t1006-cat-file/
$  echo 5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689 | git cat-file --batch
Segmentation fault (core dumped)

That's it. Now let's use gdb to see where it comes from:

$ echo 5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689 > myarg.txt
$ gdb git
GNU gdb (Ubuntu 8.0.1-0ubuntu1) 8.0.1
Copyright (C) 2017 Free Software Foundation, Inc.
...
(gdb)

Let's run the cat-file command inside gdb:

(gdb) run cat-file --batch < myarg.txt
Starting program: /home/ubuntu/bin/git cat-file --batch < myarg.txt
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".

Program received signal SIGSEGV, Segmentation fault.
0x556e88e6 in populate_value (ref=0x7fffd430) at ref-filter.c:1496
1496ref->disk_size = *obj_info.disk_sizep;
(gdb)

Let's get a backtrace:

(gdb)  bt
#0  0x556e88e6 in populate_value (ref=0x7fffd430) at
ref-filter.c:1496
#1  0x555783f1 in batch_object_write (
obj_name=0x55a655f0
"5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689", opt=0x7fffd6e0,
data=0x7fffd5e0) at builtin/cat-file.c:291
#2  0x55578660 in batch_one_object (
obj_name=0x55a655f0
"5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689", opt=0x7fffd6e0,
data=0x7fffd5e0) at builtin/cat-file.c:346

Let's see what's the code that makes it segfault:

(gdb) l
1491fflush(stdout);
1492return -1;
1493}
1494ref->type = *obj_info.typep;
1495ref->size = *obj_info.sizep;
1496ref->disk_size = *obj_info.disk_sizep;
1497hashcpy(ref->delta_base_oid.hash,
obj_info.delta_base_sha1);
1498}
1499
1500/* Fill in specials first */

Line 1496 has "ref->disk_size = *obj_info.disk_sizep;" so let's look
at those variables:

(gdb) p *ref
$1 = {objectname = {hash =
"^\034\060\235\256\177E\340\363\233\033\363\254<\331\333\022\347\326\211"},
  flag = 0, kind = 4148386208, symref = 0x7778b9e0
<_IO_2_1_stdin_> "\210 \255\373",
  commit = 0x7fffd510, values = 0x55a66cb0, type = OBJ_BLOB, size = 11,
  disk_size = -7613955248136140544, rest = 0x0, delta_base_oid = {
hash = "-\334qUUU\000\000\360\324\377\377\377\177\000\000\340\325\377\377"},
  start_of_request = 0x55a655f0 "5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689",
  refname = 0x7fffd4a8 ""}
(gdb)  p obj_info
$2 = {typep = 0x55a53df8 , sizep = 0x55a66c30,
disk_sizep = 0x0, delta_base_sha1 = 0x0,
  typename = 0x0, contentp = 0x0, whence = OI_LOOSE, u = {packed =
{pack = 0x0, offset = 0,
  is_delta = 0}}}

Ok we can see that "disk_sizep = 0x0" which means that it segfault
because line 1496 tries to read the value pointed to by disk_sizep
which is NULL.

I hope this will help you.

Best,
Christian.

[PATCH 04/40] fsck: introduce promisor objects

2018-01-03 Thread Christian Couder

From: Jonathan Tan 

Currently, Git does not support repos with very large numbers of objects
or repos that wish to minimize manipulation of certain blobs (for
example, because they are very large) very well, even if the user
operates mostly on part of the repo, because Git is designed on the
assumption that every referenced object is available somewhere in the
repo storage. In such an arrangement, the full set of objects is usually
available in remote storage, ready to be lazily downloaded.

Teach fsck about the new state of affairs. In this commit, teach fsck
that missing promisor objects referenced from the reflog are not an
error case; in future commits, fsck will be taught about other cases.

Signed-off-by: Jonathan Tan 
Signed-off-by: Junio C Hamano 
---
 builtin/fsck.c   |  2 +-
 cache.h  |  3 +-
 packfile.c   | 78 --
 packfile.h   | 13 
 t/t0410-partial-clone.sh | 81 
 5 files changed, 172 insertions(+), 5 deletions(-)
 create mode 100755 t/t0410-partial-clone.sh

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 04846d46f9..793d289367 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -398,7 +398,7 @@ static void fsck_handle_reflog_oid(const char *refname, 
struct object_id *oid,
xstrfmt("%s@{%"PRItime"}", refname, 
timestamp));
obj->flags |= USED;
mark_object_reachable(obj);
-   } else {
+   } else if (!is_promisor_object(oid)) {
error("%s: invalid reflog entry %s", refname, 
oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
}
diff --git a/cache.h b/cache.h
index 7b27abac35..078607ee91 100644
--- a/cache.h
+++ b/cache.h
@@ -1649,7 +1649,8 @@ extern struct packed_git {
unsigned pack_local:1,
 pack_keep:1,
 freshened:1,
-do_not_close:1;
+do_not_close:1,
+pack_promisor:1;
unsigned char sha1[20];
struct revindex_entry *revindex;
/* something like ".git/objects/pack/x.pack" */
diff --git a/packfile.c b/packfile.c
index 4a5fe7ab18..aee6c3d674 100644
--- a/packfile.c
+++ b/packfile.c
@@ -8,6 +8,12 @@
 #include "list.h"
 #include "streaming.h"
 #include "sha1-lookup.h"
+#include "commit.h"
+#include "object.h"
+#include "tag.h"
+#include "tree-walk.h"
+#include "tree.h"
+#include "external-odb.h"
 
 char *odb_pack_name(struct strbuf *buf,
const unsigned char *sha1,
@@ -643,10 +649,10 @@ struct packed_git *add_packed_git(const char *path, 
size_t path_len, int local)
return NULL;
 
/*
-* ".pack" is long enough to hold any suffix we're adding (and
+* ".promisor" is long enough to hold any suffix we're adding (and
 * the use xsnprintf double-checks that)
 */
-   alloc = st_add3(path_len, strlen(".pack"), 1);
+   alloc = st_add3(path_len, strlen(".promisor"), 1);
p = alloc_packed_git(alloc);
memcpy(p->pack_name, path, path_len);
 
@@ -654,6 +660,10 @@ struct packed_git *add_packed_git(const char *path, size_t 
path_len, int local)
if (!access(p->pack_name, F_OK))
p->pack_keep = 1;
 
+   xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");
+   if (!access(p->pack_name, F_OK))
+   p->pack_promisor = 1;
+
xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
if (stat(p->pack_name, ) || !S_ISREG(st.st_mode)) {
free(p);
@@ -781,7 +791,8 @@ static void prepare_packed_git_one(char *objdir, int local)
if (ends_with(de->d_name, ".idx") ||
ends_with(de->d_name, ".pack") ||
ends_with(de->d_name, ".bitmap") ||
-   ends_with(de->d_name, ".keep"))
+   ends_with(de->d_name, ".keep") ||
+   ends_with(de->d_name, ".promisor"))
string_list_append(, path.buf);
else
report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
@@ -1889,6 +1900,9 @@ int for_each_packed_object(each_packed_object_fn cb, void 
*data, unsigned flags)
for (p = packed_git; p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
+   if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+   !p->pack_promisor)
+   continue;
if (open_pack_index(p)) {
pack_errors = 1;
continue;
@@ -1899,3 +1913,61 @@ int for_each_packed_object(each_packed_object_fn cb, 
void *data, unsigned flags)
}
return r ? r :

[PATCH 05/40] fsck: support refs pointing to promisor objects

2018-01-03 Thread Christian Couder

From: Jonathan Tan 

Teach fsck to not treat refs referring to missing promisor objects as an
error when extensions.partialclone is set.

For the purposes of warning about no default refs, such refs are still
treated as legitimate refs.

Signed-off-by: Jonathan Tan 
Signed-off-by: Junio C Hamano 
---
 builtin/fsck.c   |  8 
 t/t0410-partial-clone.sh | 24 
 2 files changed, 32 insertions(+)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 793d289367..c6bb29d242 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -434,6 +434,14 @@ static int fsck_handle_ref(const char *refname, const 
struct object_id *oid,
 
obj = parse_object(oid);
if (!obj) {
+   if (is_promisor_object(oid)) {
+   /*
+* Increment default_refs anyway, because this is a
+* valid ref.
+*/
+default_refs++;
+return 0;
+   }
error("%s: invalid sha1 pointer %s", refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
/* We'll continue with the rest despite the error.. */
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 9257b8c885..c4639e1134 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -13,6 +13,14 @@ pack_as_from_promisor () {
>repo/.git/objects/pack/pack-$HASH.promisor
 }
 
+promise_and_delete () {
+   HASH=$(git -C repo rev-parse "$1") &&
+   git -C repo tag -a -m message my_annotated_tag "$HASH" &&
+   git -C repo rev-parse my_annotated_tag | pack_as_from_promisor &&
+   git -C repo tag -d my_annotated_tag &&
+   delete_object repo "$HASH"
+}
+
 test_expect_success 'missing reflog object, but promised by a commit, passes 
fsck' '
test_create_repo repo &&
test_commit -C repo my_commit &&
@@ -78,4 +86,20 @@ test_expect_success 'missing reflog object alone fails fsck, 
even with extension
test_must_fail git -C repo fsck
 '
 
+test_expect_success 'missing ref object, but promised, passes fsck' '
+   rm -rf repo &&
+   test_create_repo repo &&
+   test_commit -C repo my_commit &&
+
+   A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
+
+   # Reference $A only from ref
+   git -C repo branch my_branch "$A" &&
+   promise_and_delete "$A" &&
+
+   git -C repo config core.repositoryformatversion 1 &&
+   git -C repo config odb.magic.promisorRemote "arbitrary string" &&
+   git -C repo fsck
+'
+
 test_done
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 02/40] Add GIT_NO_EXTERNAL_ODB env variable

2018-01-03 Thread Christian Couder

This new environment variable will be used to perform git
commands without involving any external odb mechanism.

This makes it possible for example to create new blobs that
will not be sent to an external odb even if the external odb
supports "put_*" instructions.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h| 9 +
 environment.c  | 4 
 external-odb.c | 3 +--
 sha1_file.c| 3 +++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 21af6442af..7b27abac35 100644
--- a/cache.h
+++ b/cache.h
@@ -437,6 +437,7 @@ static inline enum object_type object_type(unsigned int 
mode)
 #define CEILING_DIRECTORIES_ENVIRONMENT "GIT_CEILING_DIRECTORIES"
 #define NO_REPLACE_OBJECTS_ENVIRONMENT "GIT_NO_REPLACE_OBJECTS"
 #define GIT_REPLACE_REF_BASE_ENVIRONMENT "GIT_REPLACE_REF_BASE"
+#define NO_EXTERNAL_ODB_ENVIRONMENT "GIT_NO_EXTERNAL_ODB"
 #define GITATTRIBUTES_FILE ".gitattributes"
 #define INFOATTRIBUTES_FILE "info/attributes"
 #define ATTRIBUTE_MACRO_PREFIX "[attr]"
@@ -813,6 +814,14 @@ void reset_shared_repository(void);
 extern int check_replace_refs;
 extern char *git_replace_ref_base;
 
+/*
+ * Do external odbs need to be used this run?  This variable is
+ * initialized to true unless $GIT_NO_EXTERNAL_ODB is set, but it
+ * maybe set to false by some commands that do not want external
+ * odbs to be active.
+ */
+extern int use_external_odb;
+
 extern int fsync_object_files;
 extern int core_preload_index;
 extern int core_apply_sparse_checkout;
diff --git a/environment.c b/environment.c
index 63ac38a46f..b3bd0daae2 100644
--- a/environment.c
+++ b/environment.c
@@ -48,6 +48,7 @@ const char *excludes_file;
 enum auto_crlf auto_crlf = AUTO_CRLF_FALSE;
 int check_replace_refs = 1;
 char *git_replace_ref_base;
+int use_external_odb = 1;
 enum eol core_eol = EOL_UNSET;
 enum safe_crlf safe_crlf = SAFE_CRLF_WARN;
 unsigned whitespace_rule_cfg = WS_DEFAULT_RULE;
@@ -117,6 +118,7 @@ const char * const local_repo_env[] = {
INDEX_ENVIRONMENT,
NO_REPLACE_OBJECTS_ENVIRONMENT,
GIT_REPLACE_REF_BASE_ENVIRONMENT,
+   NO_EXTERNAL_ODB_ENVIRONMENT,
GIT_PREFIX_ENVIRONMENT,
GIT_SUPER_PREFIX_ENVIRONMENT,
GIT_SHALLOW_FILE_ENVIRONMENT,
@@ -156,6 +158,8 @@ void setup_git_env(void)
free(git_replace_ref_base);
git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base
  : "refs/replace/");
+   if (getenv(NO_EXTERNAL_ODB_ENVIRONMENT))
+   use_external_odb = 0;
free(namespace);
namespace = expand_namespace(getenv(GIT_NAMESPACE_ENVIRONMENT));
shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT);
diff --git a/external-odb.c b/external-odb.c
index f3ea491333..390958dbfe 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -43,7 +43,7 @@ static void external_odb_init(void)
 {
static int initialized;
 
-   if (initialized)
+   if (initialized || !use_external_odb)
return;
initialized = 1;
 
@@ -69,4 +69,3 @@ int external_odb_has_object(const unsigned char *sha1)
return 1;
return 0;
 }
-
diff --git a/sha1_file.c b/sha1_file.c
index 3f5ff274e2..cba6b2a537 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -675,6 +675,9 @@ void prepare_external_alt_odb(void)
static int linked_external;
const char *path;
 
+   if (!use_external_odb)
+   return;
+
if (linked_external)
return;
 
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 06/40] fsck: support referenced promisor objects

2018-01-03 Thread Christian Couder

From: Jonathan Tan 

Teach fsck to not treat missing promisor objects indirectly pointed to
by refs as an error when extensions.partialclone is set.

Signed-off-by: Jonathan Tan 
Signed-off-by: Junio C Hamano 
---
 builtin/fsck.c   | 11 +++
 t/t0410-partial-clone.sh | 23 +++
 2 files changed, 34 insertions(+)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index c6bb29d242..b8bcb0e40c 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -149,6 +149,15 @@ static int mark_object(struct object *obj, int type, void 
*data, struct fsck_opt
if (obj->flags & REACHABLE)
return 0;
obj->flags |= REACHABLE;
+
+   if (is_promisor_object(>oid))
+   /*
+* Further recursion does not need to be performed on this
+* object since it is a promisor object (so it does not need to
+* be added to "pending").
+*/
+   return 0;
+
if (!(obj->flags & HAS_OBJ)) {
if (parent && !has_object_file(>oid)) {
printf("broken link from %7s %s\n",
@@ -208,6 +217,8 @@ static void check_reachable_object(struct object *obj)
 * do a full fsck
 */
if (!(obj->flags & HAS_OBJ)) {
+   if (is_promisor_object(>oid))
+   return;
if (has_sha1_pack(obj->oid.hash))
return; /* it is in pack - forget about it */
printf("missing %s %s\n", printable_type(obj),
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index c4639e1134..46c88e8dfa 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -102,4 +102,27 @@ test_expect_success 'missing ref object, but promised, 
passes fsck' '
git -C repo fsck
 '
 
+test_expect_success 'missing object, but promised, passes fsck' '
+   rm -rf repo &&
+   test_create_repo repo &&
+   test_commit -C repo 1 &&
+   test_commit -C repo 2 &&
+   test_commit -C repo 3 &&
+   git -C repo tag -a annotated_tag -m "annotated tag" &&
+
+   C=$(git -C repo rev-parse 1) &&
+   T=$(git -C repo rev-parse 2^{tree}) &&
+   B=$(git hash-object repo/3.t) &&
+   AT=$(git -C repo rev-parse annotated_tag) &&
+
+   promise_and_delete "$C" &&
+   promise_and_delete "$T" &&
+   promise_and_delete "$B" &&
+   promise_and_delete "$AT" &&
+
+   git -C repo config core.repositoryformatversion 1 &&
+   git -C repo config odb.magic.promisorRemote "arbitrary string" &&
+   git -C repo fsck
+'
+
 test_done
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 03/40] external-odb: add has_external_odb()

2018-01-03 Thread Christian Couder

This function will be used to check if the external odb
mechanism is actually used.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 7 +++
 external-odb.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/external-odb.c b/external-odb.c
index 390958dbfe..d26e63d8b1 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -50,6 +50,13 @@ static void external_odb_init(void)
git_config(external_odb_config, NULL);
 }
 
+int has_external_odb(void)
+{
+   external_odb_init();
+
+   return !!helpers;
+}
+
 const char *external_odb_root(void)
 {
static const char *root;
diff --git a/external-odb.h b/external-odb.h
index ae2b228792..9a3c2f01b3 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -1,6 +1,7 @@
 #ifndef EXTERNAL_ODB_H
 #define EXTERNAL_ODB_H
 
+extern int has_external_odb(void);
 extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
 
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 15/40] external-odb: add script mode support

2018-01-03 Thread Christian Couder

This adds support for the script command mode where
an helper script or command is called to retrieve or
manage objects.

This implements the 'have' and 'get_git_obj'
instructions for the script mode.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c  |  51 ++-
 external-odb.h  |   1 +
 odb-helper.c| 218 +++-
 odb-helper.h|   4 +
 sha1_file.c |  12 ++-
 t/t0400-external-odb.sh |  44 ++
 6 files changed, 327 insertions(+), 3 deletions(-)
 create mode 100755 t/t0400-external-odb.sh

diff --git a/external-odb.c b/external-odb.c
index 5d0afb9762..81f2aa5fac 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -33,8 +33,14 @@ static int external_odb_config(const char *var, const char 
*value, void *data)
 
o = find_or_create_helper(name, namelen);
 
-   if (!strcmp(subkey, "promisorremote"))
+   if (!strcmp(subkey, "promisorremote")) {
+   o->type = ODB_HELPER_GIT_REMOTE;
return git_config_string(>dealer, var, value);
+   }
+   if (!strcmp(subkey, "scriptcommand")) {
+   o->type = ODB_HELPER_SCRIPT_CMD;
+   return git_config_string(>dealer, var, value);
+   }
 
return 0;
 }
@@ -77,6 +83,49 @@ int external_odb_has_object(const unsigned char *sha1)
return 0;
 }
 
+int external_odb_get_object(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+   const char *path;
+
+   if (!external_odb_has_object(sha1))
+   return -1;
+
+   path = sha1_file_name_alt(external_odb_root(), sha1);
+   safe_create_leading_directories_const(path);
+   prepare_external_alt_odb();
+
+   for (o = helpers; o; o = o->next) {
+   struct strbuf tmpfile = STRBUF_INIT;
+   int ret;
+   int fd;
+
+   if (!odb_helper_has_object(o, sha1))
+   continue;
+
+   fd = create_object_tmpfile(, path);
+   if (fd < 0) {
+   strbuf_release();
+   return -1;
+   }
+
+   if (odb_helper_get_object(o, sha1, fd) < 0) {
+   close(fd);
+   unlink(tmpfile.buf);
+   strbuf_release();
+   continue;
+   }
+
+   close_sha1_file(fd);
+   ret = finalize_object_file(tmpfile.buf, path);
+   strbuf_release();
+   if (!ret)
+   return 0;
+   }
+
+   return -1;
+}
+
 int external_odb_get_direct(const unsigned char *sha1)
 {
struct odb_helper *o;
diff --git a/external-odb.h b/external-odb.h
index fd6708163e..fb8b94972f 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -4,6 +4,7 @@
 extern int has_external_odb(void);
 extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
+extern int external_odb_get_object(const unsigned char *sha1);
 extern int external_odb_get_direct(const unsigned char *sha1);
 
 #endif /* EXTERNAL_ODB_H */
diff --git a/odb-helper.c b/odb-helper.c
index 4b70b287af..c1a3443dc7 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -21,13 +21,124 @@ struct odb_helper_cmd {
struct child_process child;
 };
 
+/*
+ * Callers are responsible to ensure that the result of vaddf(fmt, ap)
+ * is properly shell-quoted.
+ */
+static void prepare_helper_command(struct argv_array *argv, const char *cmd,
+  const char *fmt, va_list ap)
+{
+   struct strbuf buf = STRBUF_INIT;
+
+   strbuf_addstr(, cmd);
+   strbuf_addch(, ' ');
+   strbuf_vaddf(, fmt, ap);
+
+   argv_array_push(argv, buf.buf);
+   strbuf_release();
+}
+
+__attribute__((format (printf,3,4)))
+static int odb_helper_start(struct odb_helper *o,
+   struct odb_helper_cmd *cmd,
+   const char *fmt, ...)
+{
+   va_list ap;
+
+   memset(cmd, 0, sizeof(*cmd));
+   argv_array_init(>argv);
+
+   if (!o->dealer)
+   return -1;
+
+   va_start(ap, fmt);
+   prepare_helper_command(>argv, o->dealer, fmt, ap);
+   va_end(ap);
+
+   cmd->child.argv = cmd->argv.argv;
+   cmd->child.use_shell = 1;
+   cmd->child.no_stdin = 1;
+   cmd->child.out = -1;
+
+   if (start_command(>child) < 0) {
+   argv_array_clear(>argv);
+   return -1;
+   }
+
+   return 0;
+}
+
+static int odb_helper_finish(struct odb_helper *o,
+struct odb_helper_cmd *cmd)
+{
+   int ret = finish_command(>child);
+   argv_array_clear(>argv);
+   if (ret) {
+   warning("odb helper '%s' reported failure", o->name);
+   return

[PATCH 01/40] Add initial external odb support

2018-01-03 Thread Christian Couder

The external-odb.{c,h} files will contain the functions
that are called by the rest of Git mostly from
"sha1_file.c" to access the objects managed by the
external odbs.

The odb-helper.{c,h} files will contain the functions to
actually implement communication with either the internal
functions or the external scripts or processes that will
manage and provide external git objects.

For now only infrastructure to create helpers from the
config and to manage a cache for the 'have' command is
implemented.

Helped-by: Jeff King <p...@peff.net>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Makefile   |  2 ++
 cache.h|  1 +
 external-odb.c | 72 ++
 external-odb.h |  7 ++
 odb-helper.c   | 54 +++
 odb-helper.h   | 24 
 sha1_file.c| 19 +++-
 7 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 external-odb.c
 create mode 100644 external-odb.h
 create mode 100644 odb-helper.c
 create mode 100644 odb-helper.h

diff --git a/Makefile b/Makefile
index 2a81ae22e9..07694185c9 100644
--- a/Makefile
+++ b/Makefile
@@ -799,6 +799,7 @@ LIB_OBJS += ewah/ewah_bitmap.o
 LIB_OBJS += ewah/ewah_io.o
 LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec_cmd.o
+LIB_OBJS += external-odb.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
 LIB_OBJS += fsmonitor.o
@@ -834,6 +835,7 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += odb-helper.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
 LIB_OBJS += packfile.o
diff --git a/cache.h b/cache.h
index a2ec8c0b55..21af6442af 100644
--- a/cache.h
+++ b/cache.h
@@ -1587,6 +1587,7 @@ extern void prepare_alt_odb(void);
 extern char *compute_alternate_path(const char *path, struct strbuf *err);
 typedef int alt_odb_fn(struct alternate_object_database *, void *);
 extern int foreach_alt_odb(alt_odb_fn, void*);
+extern void prepare_external_alt_odb(void);
 
 /*
  * Allocate a "struct alternate_object_database" but do _not_ actually
diff --git a/external-odb.c b/external-odb.c
new file mode 100644
index 00..f3ea491333
--- /dev/null
+++ b/external-odb.c
@@ -0,0 +1,72 @@
+#include "cache.h"
+#include "external-odb.h"
+#include "odb-helper.h"
+#include "config.h"
+
+static struct odb_helper *helpers;
+static struct odb_helper **helpers_tail = 
+
+static struct odb_helper *find_or_create_helper(const char *name, int len)
+{
+   struct odb_helper *o;
+
+   for (o = helpers; o; o = o->next)
+   if (!strncmp(o->name, name, len) && !o->name[len])
+   return o;
+
+   o = odb_helper_new(name, len);
+   *helpers_tail = o;
+   helpers_tail = >next;
+
+   return o;
+}
+
+static int external_odb_config(const char *var, const char *value, void *data)
+{
+   struct odb_helper *o;
+   const char *name;
+   int namelen;
+   const char *subkey;
+
+   if (parse_config_key(var, "odb", , , ) < 0)
+   return 0;
+
+   o = find_or_create_helper(name, namelen);
+
+   if (!strcmp(subkey, "promisorremote"))
+   return git_config_string(>dealer, var, value);
+
+   return 0;
+}
+
+static void external_odb_init(void)
+{
+   static int initialized;
+
+   if (initialized)
+   return;
+   initialized = 1;
+
+   git_config(external_odb_config, NULL);
+}
+
+const char *external_odb_root(void)
+{
+   static const char *root;
+   if (!root)
+   root = git_pathdup("objects/external");
+   return root;
+}
+
+int external_odb_has_object(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   external_odb_init();
+
+   for (o = helpers; o; o = o->next)
+   if (odb_helper_has_object(o, sha1))
+   return 1;
+   return 0;
+}
+
diff --git a/external-odb.h b/external-odb.h
new file mode 100644
index 00..ae2b228792
--- /dev/null
+++ b/external-odb.h
@@ -0,0 +1,7 @@
+#ifndef EXTERNAL_ODB_H
+#define EXTERNAL_ODB_H
+
+extern const char *external_odb_root(void);
+extern int external_odb_has_object(const unsigned char *sha1);
+
+#endif /* EXTERNAL_ODB_H */
diff --git a/odb-helper.c b/odb-helper.c
new file mode 100644
index 00..1404393807
--- /dev/null
+++ b/odb-helper.c
@@ -0,0 +1,54 @@
+#include "cache.h"
+#include "object.h"
+#include "argv-array.h"
+#include "odb-helper.h"
+#include "run-command.h"
+#include "sha1-lookup.h"
+
+struct odb_helper *odb_helper_new(const char *name, int namelen)
+{
+   struct odb_helper *o;
+
+   o = xcalloc(1, sizeof(*o));
+   o->name = xmemdupz(name, namelen);
+
+   return o;
+}
+
+struct odb_helper_cmd {
+   struct argv_array argv;
+

[PATCH 16/40] odb-helper: add 'enum odb_helper_type'

2018-01-03 Thread Christian Couder

As there will be different kinds of helpers, let's add
an "enum odb_helper_type" to tell between the different
kinds.

Let's add a field with this type in "struct odb_helper",
and set it when reading the config file.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 odb-helper.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/odb-helper.h b/odb-helper.h
index 90b279c07e..4f2ac5e476 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -1,9 +1,18 @@
 #ifndef ODB_HELPER_H
 #define ODB_HELPER_H
 
+enum odb_helper_type {
+   ODB_HELPER_NONE = 0,
+   ODB_HELPER_GIT_REMOTE,
+   ODB_HELPER_SCRIPT_CMD,
+   ODB_HELPER_PROCESS_CMD,
+   OBJ_HELPER_MAX
+};
+
 struct odb_helper {
const char *name;
const char *dealer;
+   enum odb_helper_type type;
 
struct odb_helper_object {
unsigned char sha1[20];
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 07/40] fsck: support promisor objects as CLI argument

2018-01-03 Thread Christian Couder

From: Jonathan Tan 

Teach fsck to not treat missing promisor objects provided on the CLI as
an error when extensions.partialclone is set.

Signed-off-by: Jonathan Tan 
Signed-off-by: Junio C Hamano 
---
 builtin/fsck.c   |  2 ++
 t/t0410-partial-clone.sh | 13 +
 2 files changed, 15 insertions(+)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index b8bcb0e40c..a6fa6d6482 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -750,6 +750,8 @@ int cmd_fsck(int argc, const char **argv, const char 
*prefix)
struct object *obj = lookup_object(oid.hash);
 
if (!obj || !(obj->flags & HAS_OBJ)) {
+   if (is_promisor_object())
+   continue;
error("%s: object missing", oid_to_hex());
errors_found |= ERROR_OBJECT;
continue;
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 46c88e8dfa..a0f901fa1d 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -125,4 +125,17 @@ test_expect_success 'missing object, but promised, passes 
fsck' '
git -C repo fsck
 '
 
+test_expect_success 'missing CLI object, but promised, passes fsck' '
+   rm -rf repo &&
+   test_create_repo repo &&
+   test_commit -C repo my_commit &&
+
+   A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
+   promise_and_delete "$A" &&
+
+   git -C repo config core.repositoryformatversion 1 &&
+   git -C repo config odb.magic.promisorRemote "arbitrary string" &&
+   git -C repo fsck "$A"
+'
+
 test_done
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 17/40] odb-helper: add odb_helper_init() to send 'init' instruction

2018-01-03 Thread Christian Couder

Let's add an odb_helper_init() function to send an 'init'
instruction to the helpers. This 'init' instruction is
especially useful to get the capabilities that are supported
by the helpers.

So while at it, let's also add a parse_capabilities()
function to parse them and a supported_capabilities
variable in struct odb_helper to store them.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c  | 13 +++-
 odb-helper.c| 54 +
 odb-helper.h| 12 +++
 t/t0400-external-odb.sh |  4 
 4 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/external-odb.c b/external-odb.c
index 81f2aa5fac..2622c12853 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -35,6 +35,8 @@ static int external_odb_config(const char *var, const char 
*value, void *data)
 
if (!strcmp(subkey, "promisorremote")) {
o->type = ODB_HELPER_GIT_REMOTE;
+   o->supported_capabilities |= ODB_HELPER_CAP_HAVE;
+   o->supported_capabilities |= ODB_HELPER_CAP_GET_DIRECT;
return git_config_string(>dealer, var, value);
}
if (!strcmp(subkey, "scriptcommand")) {
@@ -48,12 +50,16 @@ static int external_odb_config(const char *var, const char 
*value, void *data)
 static void external_odb_init(void)
 {
static int initialized;
+   struct odb_helper *o;
 
if (initialized || !use_external_odb)
return;
initialized = 1;
 
git_config(external_odb_config, NULL);
+
+   for (o = helpers; o; o = o->next)
+   odb_helper_init(o);
 }
 
 int has_external_odb(void)
@@ -77,9 +83,12 @@ int external_odb_has_object(const unsigned char *sha1)
 
external_odb_init();
 
-   for (o = helpers; o; o = o->next)
+   for (o = helpers; o; o = o->next) {
+   if (!(o->supported_capabilities & ODB_HELPER_CAP_HAVE))
+   return 1;
if (odb_helper_has_object(o, sha1))
return 1;
+   }
return 0;
 }
 
@@ -133,6 +142,8 @@ int external_odb_get_direct(const unsigned char *sha1)
external_odb_init();
 
for (o = helpers; o; o = o->next) {
+   if (!(o->supported_capabilities & ODB_HELPER_CAP_GET_DIRECT))
+   continue;
if (odb_helper_get_direct(o, sha1) < 0)
continue;
return 0;
diff --git a/odb-helper.c b/odb-helper.c
index c1a3443dc7..ea642fd438 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -6,6 +6,40 @@
 #include "sha1-lookup.h"
 #include "fetch-object.h"
 
+static void parse_capabilities(char *cap_buf,
+  unsigned int *supported_capabilities,
+  const char *process_name)
+{
+   struct string_list cap_list = STRING_LIST_INIT_NODUP;
+
+   string_list_split_in_place(_list, cap_buf, '=', 1);
+
+   if (cap_list.nr == 2 && !strcmp(cap_list.items[0].string, 
"capability")) {
+   const char *cap_name = cap_list.items[1].string;
+
+   if (!strcmp(cap_name, "get_git_obj")) {
+   *supported_capabilities |= ODB_HELPER_CAP_GET_GIT_OBJ;
+   } else if (!strcmp(cap_name, "get_raw_obj")) {
+   *supported_capabilities |= ODB_HELPER_CAP_GET_RAW_OBJ;
+   } else if (!strcmp(cap_name, "get_direct")) {
+   *supported_capabilities |= ODB_HELPER_CAP_GET_DIRECT;
+   } else if (!strcmp(cap_name, "put_git_obj")) {
+   *supported_capabilities |= ODB_HELPER_CAP_PUT_GIT_OBJ;
+   } else if (!strcmp(cap_name, "put_raw_obj")) {
+   *supported_capabilities |= ODB_HELPER_CAP_PUT_RAW_OBJ;
+   } else if (!strcmp(cap_name, "put_direct")) {
+   *supported_capabilities |= ODB_HELPER_CAP_PUT_DIRECT;
+   } else if (!strcmp(cap_name, "have")) {
+   *supported_capabilities |= ODB_HELPER_CAP_HAVE;
+   } else {
+   warning("external process '%s' requested unsupported 
read-object capability '%s'",
+   process_name, cap_name);
+   }
+   }
+
+   string_list_clear(_list, 0);
+}
+
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
struct odb_helper *o;
@@ -80,6 +114,26 @@ static int odb_helper_finish(struct odb_helper *o,
return 0;
 }
 
+int odb_helper_init(struct odb_helper *o)
+{
+   struct odb_helper_cmd cmd;
+   FILE *fh;
+   struct strbuf line = STRBUF_INIT;
+
+   if (odb_helper_start(o, , "init") < 0)
+   return -1;
+
+   fh = x

[PATCH 14/40] sha1_file: prepare for external odbs

2018-01-03 Thread Christian Couder

In the following commits we will need some functions that were
internal to sha1_file.c, so let's first make them non static
and declare them in "cache.h". While at it, let's rename
'create_tmpfile()' to 'create_object_tmpfile()' to make its
name less generic.

Let's also split out 'sha1_file_name_alt()' from
'sha1_file_name()' and 'open_sha1_file_alt()' from
'open_sha1_file()', as we will need both of these new
functions too.

Helped-by: Jeff King <p...@peff.net>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 cache.h |  8 
 sha1_file.c | 47 +--
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/cache.h b/cache.h
index 3fabf998ce..f41c102cb4 100644
--- a/cache.h
+++ b/cache.h
@@ -964,6 +964,12 @@ extern void check_repository_format(void);
  */
 extern const char *sha1_file_name(const unsigned char *sha1);
 
+/*
+ * Like sha1_file_name, but return the filename within a specific alternate
+ * object directory. Shares the same static buffer with sha1_file_name.
+ */
+extern const char *sha1_file_name_alt(const char *objdir, const unsigned char 
*sha1);
+
 /*
  * Return an abbreviated sha1 unique within this repository's object database.
  * The result will be at least `len` characters long, and will be NUL
@@ -1251,6 +1257,8 @@ extern int parse_sha1_header(const char *hdr, unsigned 
long *sizep);
 
 extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned 
long size, const char *type);
 
+extern int create_object_tmpfile(struct strbuf *tmp, const char *filename);
+extern void close_sha1_file(int fd);
 extern int finalize_object_file(const char *tmpfile, const char *filename);
 
 /*
diff --git a/sha1_file.c b/sha1_file.c
index 261baf800f..785e8dda03 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -322,17 +322,22 @@ static void fill_sha1_path(struct strbuf *buf, const 
unsigned char *sha1)
}
 }
 
-const char *sha1_file_name(const unsigned char *sha1)
+const char *sha1_file_name_alt(const char *objdir, const unsigned char *sha1)
 {
static struct strbuf buf = STRBUF_INIT;
 
strbuf_reset();
-   strbuf_addf(, "%s/", get_object_directory());
+   strbuf_addf(, "%s/", objdir);
 
fill_sha1_path(, sha1);
return buf.buf;
 }
 
+const char *sha1_file_name(const unsigned char *sha1)
+{
+   return sha1_file_name_alt(get_object_directory(), sha1);
+}
+
 struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
 {
strbuf_setlen(>scratch, alt->base_len);
@@ -902,24 +907,14 @@ static int stat_sha1_file(const unsigned char *sha1, 
struct stat *st,
return -1;
 }
 
-/*
- * Like stat_sha1_file(), but actually open the object and return the
- * descriptor. See the caveats on the "path" parameter above.
- */
-static int open_sha1_file(const unsigned char *sha1, const char **path)
+static int open_sha1_file_alt(const unsigned char *sha1, const char **path)
 {
-   int fd;
struct alternate_object_database *alt;
-   int most_interesting_errno;
-
-   *path = sha1_file_name(sha1);
-   fd = git_open(*path);
-   if (fd >= 0)
-   return fd;
-   most_interesting_errno = errno;
+   int most_interesting_errno = errno;
 
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
+   int fd;
*path = alt_sha1_path(alt, sha1);
fd = git_open(*path);
if (fd >= 0)
@@ -931,6 +926,22 @@ static int open_sha1_file(const unsigned char *sha1, const 
char **path)
return -1;
 }
 
+/*
+ * Like stat_sha1_file(), but actually open the object and return the
+ * descriptor. See the caveats on the "path" parameter above.
+ */
+static int open_sha1_file(const unsigned char *sha1, const char **path)
+{
+   int fd;
+
+   *path = sha1_file_name(sha1);
+   fd = git_open(*path);
+   if (fd >= 0)
+   return fd;
+
+   return open_sha1_file_alt(sha1, path);
+}
+
 /*
  * Map the loose object at "path" if it is not NULL, or the path found by
  * searching for a loose object named "sha1".
@@ -1527,7 +1538,7 @@ int hash_sha1_file(const void *buf, unsigned long len, 
const char *type,
 }
 
 /* Finalize a file on disk, and close it. */
-static void close_sha1_file(int fd)
+void close_sha1_file(int fd)
 {
if (fsync_object_files)
fsync_or_die(fd, "sha1 file");
@@ -1551,7 +1562,7 @@ static inline int directory_size(const char *filename)
  * We want to avoid cross-directory filename renames, because those
  * can have problems on various filesystems (FAT, NFS, Coda).
  */
-static int create_tmpfile(struct strbuf *tmp, const char *filename)
+int create_object_tmpfile(struct strbuf *tmp, const char *filename)
 {
int fd, dirlen = directory_size(filename);
 
@@ -1591,7 +1602,7 @@ static i

[PATCH 13/40] gc: do not repack promisor packfiles

2018-01-03 Thread Christian Couder

From: Jonathan Tan 

Teach gc to stop traversal at promisor objects, and to leave promisor
packfiles alone. This has the effect of only repacking non-promisor
packfiles, and preserves the distinction between promisor packfiles and
non-promisor packfiles.

Signed-off-by: Jonathan Tan 
Signed-off-by: Jeff Hostetler 
Signed-off-by: Junio C Hamano 
---
 Documentation/git-pack-objects.txt | 11 
 builtin/gc.c   |  4 +++
 builtin/pack-objects.c | 37 --
 builtin/prune.c|  7 +
 builtin/repack.c   |  9 +--
 t/t0410-partial-clone.sh   | 54 --
 6 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-pack-objects.txt 
b/Documentation/git-pack-objects.txt
index aa403d02f3..81bc490ac5 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -255,6 +255,17 @@ a missing object is encountered.  This is the default 
action.
 The form '--missing=allow-any' will allow object traversal to continue
 if a missing object is encountered.  Missing objects will silently be
 omitted from the results.
++
+The form '--missing=allow-promisor' is like 'allow-any', but will only
+allow object traversal to continue for EXPECTED promisor missing objects.
+Unexpected missing object will raise an error.
+
+--exclude-promisor-objects::
+   Omit objects that are known to be in the promisor remote.  (This
+   option has the purpose of operating only on locally created objects,
+   so that when we repack, we still maintain a distinction between
+   locally created objects [without .promisor] and objects from the
+   promisor remote [with .promisor].)  This is used with partial clone.
 
 SEE ALSO
 
diff --git a/builtin/gc.c b/builtin/gc.c
index 3c5eae0edf..cef1461d1a 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -20,6 +20,7 @@
 #include "argv-array.h"
 #include "commit.h"
 #include "packfile.h"
+#include "external-odb.h"
 
 #define FAILED_RUN "failed to run %s"
 
@@ -458,6 +459,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
argv_array_push(, prune_expire);
if (quiet)
argv_array_push(, "--no-progress");
+   if (has_external_odb())
+   argv_array_push(,
+   "--exclude-promisor-objects");
if (run_command_v_opt(prune.argv, RUN_GIT_CMD))
return error(FAILED_RUN, prune.argv[0]);
}
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6b9cfc289d..6c71552cdf 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -75,6 +75,8 @@ static int use_bitmap_index = -1;
 static int write_bitmap_index;
 static uint16_t write_bitmap_options;
 
+static int exclude_promisor_objects;
+
 static unsigned long delta_cache_size = 0;
 static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
 static unsigned long cache_max_small_delta_size = 1000;
@@ -84,8 +86,9 @@ static unsigned long window_memory_limit = 0;
 static struct list_objects_filter_options filter_options;
 
 enum missing_action {
-   MA_ERROR = 0,/* fail if any missing objects are encountered */
-   MA_ALLOW_ANY,/* silently allow ALL missing objects */
+   MA_ERROR = 0,  /* fail if any missing objects are encountered */
+   MA_ALLOW_ANY,  /* silently allow ALL missing objects */
+   MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
 };
 static enum missing_action arg_missing_action;
 static show_object_fn fn_show_object;
@@ -2578,6 +2581,20 @@ static void show_object__ma_allow_any(struct object 
*obj, const char *name, void
show_object(obj, name, data);
 }
 
+static void show_object__ma_allow_promisor(struct object *obj, const char 
*name, void *data)
+{
+   assert(arg_missing_action == MA_ALLOW_PROMISOR);
+
+   /*
+* Quietly ignore EXPECTED missing objects.  This avoids problems with
+* staging them now and getting an odd error later.
+*/
+   if (!has_object_file(>oid) && is_promisor_object(>oid))
+   return;
+
+   show_object(obj, name, data);
+}
+
 static int option_parse_missing_action(const struct option *opt,
   const char *arg, int unset)
 {
@@ -2592,10 +2609,18 @@ static int option_parse_missing_action(const struct 
option *opt,
 
if (!strcmp(arg, "allow-any")) {
arg_missing_action = MA_ALLOW_ANY;
+   fetch_if_missing = 0;
fn_show_object = show_object__ma_allow_any;
return 0;
}
 
+   if (!strcmp(arg, "allow-promisor")) {
+   arg_missing_action =

[PATCH 00/40] Promisor remotes and external ODB support

2018-01-03 Thread Christian Couder

This is an early patch series that start to merge the
jh/fsck-promisors patch series (which is currently in pu) with the
external odb patch series.

The merge is not complete and there is still work needed, but all the
tests pass and in my opinion this shows that it is a good way forward
to share the same mechanism to handle (many) remote object stores and
the related fsck and gc problems.

The jh/partial-clone (a separate patch series on top of
jh/fsck-promisors) still needs some work before it can be used on top
of this series. I rebased it on top but the tests do not pass yet. The
result of my rebase and current attempt to fix tests is here:

https://github.com/chriscool/git/commits/gl-partial-clone-rebased

This patch series does not include the last part of the previous
external odb series which was about adding an `--inital-refspec`
option to `git clone`.

A few promisor related links


v6 partial clone part 2:
https://public-inbox.org/git/20171205165854.64979-1-...@jeffhostetler.com/

v7 partial clone part 3:
https://public-inbox.org/git/20171208155851.855-1-...@jeffhostetler.com/

External odb related links
~~

Peff started to work on external odbs some years ago:

http://thread.gmane.org/gmane.comp.version-control.git/206886/focus=207040
http://thread.gmane.org/gmane.comp.version-control.git/247171
http://thread.gmane.org/gmane.comp.version-control.git/202902/focus=203020

His work, which is not compile-tested any more, is still there:

https://github.com/peff/git/commits/jk/external-odb-wip

Initial discussions about external odbs are there:

http://thread.gmane.org/gmane.comp.version-control.git/288151/focus=295160

Version 1, 2, 3, 4, 5 and 6 of the external odbs series are here:

https://public-inbox.org/git/20160613085546.11784-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20160628181933.24620-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20161130210420.15982-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20170620075523.26961-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20170803091926.1755-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20170916080731.13925-1-chrisc...@tuxfamily.org/

Some of the discussions related to Ben Peart's work that is used by
this series are here:

https://public-inbox.org/git/20170113155253.1644-1-benpe...@microsoft.com/
https://public-inbox.org/git/20170322165220.5660-1-benpe...@microsoft.com/
https://public-inbox.org/git/20170714132651.170708-1-benpe...@microsoft.com/

Version 1, 2, 3, 4, 5 and 6 of the external odbs series are here:

https://github.com/chriscool/git/commits/gl-external-odb12
https://github.com/chriscool/git/commits/gl-external-odb22
https://github.com/chriscool/git/commits/gl-external-odb61
https://github.com/chriscool/git/commits/gl-external-odb239
https://github.com/chriscool/git/commits/gl-external-odb373
https://github.com/chriscool/git/commits/gl-external-odb411

A patch series to add Git/Packet.pm that is now in master is also
related:

https://public-inbox.org/git/20171019123030.17338-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20171105213836.11717-1-chrisc...@tuxfamily.org/
https://public-inbox.org/git/20171110132200.7871-1-chrisc...@tuxfamily.org/


https://public-inbox.org/git/20171019123030.17338-1-chrisc...@tuxfamily.org/

Ben Peart (1):
  Add t0450 to test 'get_direct' mechanism

Christian Couder (30):
  Add initial external odb support
  Add GIT_NO_EXTERNAL_ODB env variable
  external-odb: add has_external_odb()
  external-odb: implement external_odb_get_direct
  sha1_file: prepare for external odbs
  external-odb: add script mode support
  odb-helper: add 'enum odb_helper_type'
  odb-helper: add odb_helper_init() to send 'init' instruction
  t0400: add 'put_raw_obj' instruction to odb-helper script
  external odb: add 'put_raw_obj' support
  external-odb: accept only blobs for now
  t0400: add test for external odb write support
  Add t0410 to test external ODB transfer
  lib-httpd: pass config file to start_httpd()
  lib-httpd: add upload.sh
  lib-httpd: add list.sh
  lib-httpd: add apache-e-odb.conf
  odb-helper: add odb_helper_get_raw_object()
  pack-objects: don't pack objects in external odbs
  Add t0420 to test transfer to HTTP external odb
  external-odb: add 'get_direct' support
  odb-helper: add 'script_mode' to 'struct odb_helper'
  odb-helper: add init_object_process()
  Add t0460 to test passing git objects
  odb-helper: add put_object_process()
  Add t0470 to test passing raw objects
  odb-helper: add have_object_process()
  Add t0480 to test "have" capability and raw objects
  external-odb: use 'odb=magic' attribute to mark odb blobs
  Add Documentation/technical/external-odb.txt

Jonathan Tan (9):
  fsck: introduce promisor objects
  fsck: support refs pointing to promisor objects
  fsck: support referenced promisor objects
  fsck: support promisor objects as CLI argument
  index-pack: refact

[PATCH 39/40] external-odb: use 'odb=magic' attribute to mark odb blobs

2018-01-03 Thread Christian Couder

To tell which blobs should be sent to the "magic" external odb,
let's require that the blobs be marked using the 'odb=magic'
attribute.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 25 ++---
 external-odb.h |  3 ++-
 sha1_file.c| 20 +++-
 t/t0400-external-odb.sh|  3 +++
 t/t0410-transfer-e-odb.sh  |  3 +++
 t/t0420-transfer-http-e-odb.sh |  3 +++
 t/t0470-read-object-http-e-odb.sh  |  3 +++
 t/t0480-read-object-have-http-e-odb.sh |  3 +++
 8 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/external-odb.c b/external-odb.c
index 50c1cec50b..e3a05e24e3 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -2,6 +2,7 @@
 #include "external-odb.h"
 #include "odb-helper.h"
 #include "config.h"
+#include "attr.h"
 
 static struct odb_helper *helpers;
 static struct odb_helper **helpers_tail = 
@@ -165,19 +166,37 @@ int external_odb_get_object(const unsigned char *sha1)
return external_odb_do_get_object(sha1);
 }
 
+static int has_odb_attrs(struct odb_helper *o, const char *path)
+{
+   static struct attr_check *check;
+
+   if (!check)
+   check = attr_check_initl("odb", NULL);
+
+   if (!git_check_attr(path, check)) {
+   const char *value = check->items[0].value;
+   return value ? !strcmp(o->name, value) : 0;
+   }
+   return 0;
+}
+
 int external_odb_put_object(const void *buf, size_t len,
-   const char *type, unsigned char *sha1)
+   const char *type, unsigned char *sha1,
+   const char *path)
 {
struct odb_helper *o;
 
external_odb_init();
 
/* For now accept only blobs */
-   if (strcmp(type, "blob"))
+   if (!path || strcmp(type, "blob"))
return 1;
 
for (o = helpers; o; o = o->next) {
-   int r = odb_helper_put_object(o, buf, len, type, sha1);
+   int r;
+   if (!has_odb_attrs(o, path))
+   continue;
+   r = odb_helper_put_object(o, buf, len, type, sha1);
if (r <= 0)
return r;
}
diff --git a/external-odb.h b/external-odb.h
index 26bb931685..5a8936f417 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -7,6 +7,7 @@ extern int external_odb_has_object(const unsigned char *sha1);
 extern int external_odb_get_object(const unsigned char *sha1);
 extern int external_odb_get_direct(const unsigned char *sha1);
 extern int external_odb_put_object(const void *buf, size_t len,
-  const char *type, unsigned char *sha1);
+  const char *type, unsigned char *sha1,
+  const char *path);
 
 #endif /* EXTERNAL_ODB_H */
diff --git a/sha1_file.c b/sha1_file.c
index 300029459f..d3f395e967 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1696,7 +1696,9 @@ static int freshen_packed_object(const unsigned char 
*sha1)
return 1;
 }
 
-int write_sha1_file(const void *buf, unsigned long len, const char *type, 
unsigned char *sha1)
+static int write_sha1_file_with_path(const void *buf, unsigned long len,
+const char *type, unsigned char *sha1,
+const char *path)
 {
char hdr[32];
int hdrlen = sizeof(hdr);
@@ -1705,13 +1707,19 @@ int write_sha1_file(const void *buf, unsigned long len, 
const char *type, unsign
 * it out into .git/objects/??/?{38} file.
 */
write_sha1_file_prepare(buf, len, type, sha1, hdr, );
-   if (!external_odb_put_object(buf, len, type, sha1))
+   if (!external_odb_put_object(buf, len, type, sha1, path))
return 0;
if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
return 0;
return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
 }
 
+int write_sha1_file(const void *buf, unsigned long len,
+   const char *type, unsigned char *sha1)
+{
+   return write_sha1_file_with_path(buf, len, type, sha1, NULL);
+}
+
 int hash_sha1_file_literally(const void *buf, unsigned long len, const char 
*type,
 struct object_id *oid, unsigned flags)
 {
@@ -1832,7 +1840,8 @@ static int index_mem(struct object_id *oid, void *buf, 
size_t size,
}
 
if (write_object)
-   ret = write_sha1_file(buf, size, typename(type), oid->hash);
+   ret = write_sha1_file_with_path(buf, size, typename(type),
+   oid->hash, path);
else
ret = hash_sha1_file(buf, size, typename(type), oid->hash);

[PATCH 20/40] external-odb: accept only blobs for now

2018-01-03 Thread Christian Couder

The mechanism to decide which blobs should be sent to which
external object database will be very simple for now.
If the external odb helper support any "put_*" instruction
all the new blobs will be sent to it.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/external-odb.c b/external-odb.c
index 337bdd2540..93971e9ce4 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -159,6 +159,10 @@ int external_odb_put_object(const void *buf, size_t len,
 
external_odb_init();
 
+   /* For now accept only blobs */
+   if (strcmp(type, "blob"))
+   return 1;
+
for (o = helpers; o; o = o->next) {
int r = odb_helper_put_object(o, buf, len, type, sha1);
if (r <= 0)
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 22/40] Add t0410 to test external ODB transfer

2018-01-03 Thread Christian Couder

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t0410-transfer-e-odb.sh | 144 ++
 1 file changed, 144 insertions(+)
 create mode 100755 t/t0410-transfer-e-odb.sh

diff --git a/t/t0410-transfer-e-odb.sh b/t/t0410-transfer-e-odb.sh
new file mode 100755
index 00..065ec7d759
--- /dev/null
+++ b/t/t0410-transfer-e-odb.sh
@@ -0,0 +1,144 @@
+#!/bin/sh
+
+test_description='basic tests for transfering external ODBs'
+
+. ./test-lib.sh
+
+ORIG_SOURCE="$PWD/.git"
+export ORIG_SOURCE
+
+ALT_SOURCE1="$PWD/alt-repo1/.git"
+export ALT_SOURCE1
+write_script odb-helper1 <<\EOF
+die() {
+   printf >&2 "%s\n" "$@"
+   exit 1
+}
+GIT_DIR=$ALT_SOURCE1; export GIT_DIR
+case "$1" in
+init)
+   echo "capability=get_git_obj"
+   echo "capability=have"
+   ;;
+have)
+   git cat-file --batch-check --batch-all-objects |
+   awk '{print $1 " " $3 " " $2}'
+   ;;
+get_git_obj)
+   cat "$GIT_DIR"/objects/$(echo $2 | sed 's#..#&/#')
+   ;;
+put_raw_obj)
+   sha1="$2"
+   size="$3"
+   kind="$4"
+   writen=$(git hash-object -w -t "$kind" --stdin)
+   test "$writen" = "$sha1" || die "bad sha1 passed '$sha1' vs writen 
'$writen'"
+   ref_hash=$(echo "$sha1 $size $kind" | GIT_DIR=$ORIG_SOURCE 
GIT_NO_EXTERNAL_ODB=1 git hash-object -w -t blob --stdin) || exit
+   GIT_DIR=$ORIG_SOURCE git update-ref refs/odbs/magic/"$sha1" "$ref_hash"
+   ;;
+*)
+   die "unknown command '$1'"
+   ;;
+esac
+EOF
+HELPER1="\"$PWD\"/odb-helper1"
+
+OTHER_SOURCE="$PWD/.git"
+export OTHER_SOURCE
+
+ALT_SOURCE2="$PWD/alt-repo2/.git"
+export ALT_SOURCE2
+write_script odb-helper2 <<\EOF
+die() {
+   printf >&2 "%s\n" "$@"
+   exit 1
+}
+GIT_DIR=$ALT_SOURCE2; export GIT_DIR
+case "$1" in
+init)
+   echo "capability=get_git_obj"
+   echo "capability=have"
+   ;;
+have)
+   GIT_DIR=$OTHER_SOURCE git for-each-ref --format='%(objectname)' 
refs/odbs/magic/ | GIT_DIR=$OTHER_SOURCE xargs git show
+   ;;
+get_git_obj)
+   OBJ_FILE="$GIT_DIR"/objects/$(echo $2 | sed 's#..#&/#')
+   if ! test -f "$OBJ_FILE"
+   then
+   # "Download" the missing object by copying it from alt-repo1
+   OBJ_DIR=$(echo $2 | sed 's/\(..\).*/\1/')
+   OBJ_BASE=$(basename "$OBJ_FILE")
+   ALT_OBJ_DIR1="$ALT_SOURCE1/objects/$OBJ_DIR"
+   ALT_OBJ_DIR2="$ALT_SOURCE2/objects/$OBJ_DIR"
+   mkdir -p "$ALT_OBJ_DIR2" || die "Could not mkdir 
'$ALT_OBJ_DIR2'"
+   OBJ_SRC="$ALT_OBJ_DIR1/$OBJ_BASE"
+   cp "$OBJ_SRC" "$ALT_OBJ_DIR2" ||
+   die "Could not cp '$OBJ_SRC' into '$ALT_OBJ_DIR2'"
+   fi
+   cat "$OBJ_FILE" || die "Could not cat '$OBJ_FILE'"
+   ;;
+put_raw_obj)
+   sha1="$2"
+   size="$3"
+   kind="$4"
+   writen=$(git hash-object -w -t "$kind" --stdin)
+   test "$writen" = "$sha1" || die "bad sha1 passed '$sha1' vs writen 
'$writen'"
+   ref_hash=$(echo "$sha1 $size $kind" | GIT_DIR=$OTHER_SOURCE 
GIT_NO_EXTERNAL_ODB=1 git hash-object -w -t blob --stdin) || exit
+   GIT_DIR=$OTHER_SOURCE git update-ref refs/odbs/magic/"$sha1" "$ref_hash"
+   ;;
+*)
+   die "unknown command '$1'"
+   ;;
+esac
+EOF
+HELPER2="\"$PWD\"/odb-helper2"
+
+test_expect_success 'setup first alternate repo' '
+   git init alt-repo1 &&
+   test_commit zero &&
+   git config odb.magic.scriptCommand "$HELPER1"
+'
+
+test_expect_success 'setup other repo and its alternate repo' '
+   git init other-repo &&
+   git init alt-repo2 &&
+   (cd other-repo &&
+git remote add origin .. &&
+git pull origin master &&
+git checkout master &&
+git log)
+'
+
+test_expect_success 'new blobs are put in first object store' '
+   test_commit one &&
+   hash1=$(git ls-tree HEAD | grep one.t | cut -f1 | cut -d\  -f3) &&
+   content=$(cd alt-repo1 && git show "$hash1") &&
+   test "$content" = "one" &&
+   test_commit two &&
+   hash2=$(git ls-tree HEAD | grep two.t | cut -f1 | cut -d\  -f3) &&
+   content=$(cd alt-repo1 && git show "$hash2") &&
+   test "$content" = "two"
+'
+
+test_expect_success 'other repo gets the blobs from object store' '
+   (cd other-repo &&
+git fetch origin "refs/odbs/magic/*:refs/odbs/magic/*" &&
+test_must_fail git cat-file blob "$hash1" &&
+test_must_fail git cat-file blob "$hash2" &&
+git config odb.magic.scriptCommand "$HELPER2" &&
+git cat-file blob "$hash1" &&
+git cat-file blob "$hash2"
+   )
+'
+
+test_expect_success 'other repo gets everything else' '
+   (cd other-repo &&
+git fetch origin &&
+content=$(git show "$hash1") &&
+test "$content" = "one" &&
+content=$(git show "$hash2") &&
+test "$content" = "two")
+'
+
+test_done
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 19/40] external odb: add 'put_raw_obj' support

2018-01-03 Thread Christian Couder

Add support for a 'put_raw_obj' capability/instruction to send new
objects to an external odb. Objects will be sent as they are (in
their 'raw' format). They will not be converted to Git objects.

For now any new Git object (blob, tree, commit, ...) would be sent
if 'put_raw_obj' is supported by an odb helper. This is not a great
default, but let's leave it to following commits to tweak that.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 15 +++
 external-odb.h |  2 ++
 odb-helper.c   | 43 ++-
 odb-helper.h   |  3 +++
 sha1_file.c|  2 ++
 5 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/external-odb.c b/external-odb.c
index 2622c12853..337bdd2540 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -151,3 +151,18 @@ int external_odb_get_direct(const unsigned char *sha1)
 
return -1;
 }
+
+int external_odb_put_object(const void *buf, size_t len,
+   const char *type, unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   external_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   int r = odb_helper_put_object(o, buf, len, type, sha1);
+   if (r <= 0)
+   return r;
+   }
+   return 1;
+}
diff --git a/external-odb.h b/external-odb.h
index fb8b94972f..26bb931685 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -6,5 +6,7 @@ extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
 extern int external_odb_get_object(const unsigned char *sha1);
 extern int external_odb_get_direct(const unsigned char *sha1);
+extern int external_odb_put_object(const void *buf, size_t len,
+  const char *type, unsigned char *sha1);
 
 #endif /* EXTERNAL_ODB_H */
diff --git a/odb-helper.c b/odb-helper.c
index ea642fd438..6f56f07b38 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -72,9 +72,10 @@ static void prepare_helper_command(struct argv_array *argv, 
const char *cmd,
strbuf_release();
 }
 
-__attribute__((format (printf,3,4)))
+__attribute__((format (printf,4,5)))
 static int odb_helper_start(struct odb_helper *o,
struct odb_helper_cmd *cmd,
+   int use_stdin,
const char *fmt, ...)
 {
va_list ap;
@@ -91,7 +92,10 @@ static int odb_helper_start(struct odb_helper *o,
 
cmd->child.argv = cmd->argv.argv;
cmd->child.use_shell = 1;
-   cmd->child.no_stdin = 1;
+   if (use_stdin)
+   cmd->child.in = -1;
+   else
+   cmd->child.no_stdin = 1;
cmd->child.out = -1;
 
if (start_command(>child) < 0) {
@@ -120,7 +124,7 @@ int odb_helper_init(struct odb_helper *o)
FILE *fh;
struct strbuf line = STRBUF_INIT;
 
-   if (odb_helper_start(o, , "init") < 0)
+   if (odb_helper_start(o, , 0, "init") < 0)
return -1;
 
fh = xfdopen(cmd.child.out, "r");
@@ -180,7 +184,7 @@ static void odb_helper_load_have(struct odb_helper *o)
return;
o->have_valid = 1;
 
-   if (odb_helper_start(o, , "have") < 0)
+   if (odb_helper_start(o, , 0, "have") < 0)
return;
 
fh = xfdopen(cmd.child.out, "r");
@@ -235,7 +239,7 @@ int odb_helper_get_object(struct odb_helper *o, const 
unsigned char *sha1,
if (!obj)
return -1;
 
-   if (odb_helper_start(o, , "get_git_obj %s", sha1_to_hex(sha1)) < 0)
+   if (odb_helper_start(o, , 0, "get_git_obj %s", sha1_to_hex(sha1)) < 
0)
return -1;
 
memset(, 0, sizeof(stream));
@@ -335,3 +339,32 @@ int odb_helper_get_direct(struct odb_helper *o,
 
return res;
 }
+
+int odb_helper_put_object(struct odb_helper *o,
+ const void *buf, size_t len,
+ const char *type, unsigned char *sha1)
+{
+   struct odb_helper_cmd cmd;
+
+   if (odb_helper_start(o, , 1, "put_raw_obj %s %"PRIuMAX" %s",
+sha1_to_hex(sha1), (uintmax_t)len, type) < 0)
+   return -1;
+
+   do {
+   int w = xwrite(cmd.child.in, buf, len);
+   if (w < 0) {
+   error("unable to write to odb helper '%s': %s",
+ o->name, strerror(errno));
+   close(cmd.child.in);
+   close(cmd.child.out);
+   odb_helper_finish(o, );
+   return -1;
+   }
+   len -= w;
+   } while (len > 0);
+
+   close(cmd.child.in);
+   close(cmd.child.out);
+   odb_helper_finish(o, );
+   return 0;
+}
diff

[PATCH 10/40] external-odb: implement external_odb_get_direct

2018-01-03 Thread Christian Couder

This is implemented only in the promisor remote mode
for now by calling fetch_object().

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c | 15 +++
 external-odb.h |  1 +
 odb-helper.c   | 13 +
 odb-helper.h   |  3 ++-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/external-odb.c b/external-odb.c
index d26e63d8b1..5d0afb9762 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -76,3 +76,18 @@ int external_odb_has_object(const unsigned char *sha1)
return 1;
return 0;
 }
+
+int external_odb_get_direct(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+
+   external_odb_init();
+
+   for (o = helpers; o; o = o->next) {
+   if (odb_helper_get_direct(o, sha1) < 0)
+   continue;
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/external-odb.h b/external-odb.h
index 9a3c2f01b3..fd6708163e 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -4,5 +4,6 @@
 extern int has_external_odb(void);
 extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
+extern int external_odb_get_direct(const unsigned char *sha1);
 
 #endif /* EXTERNAL_ODB_H */
diff --git a/odb-helper.c b/odb-helper.c
index 1404393807..4b70b287af 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -4,6 +4,7 @@
 #include "odb-helper.h"
 #include "run-command.h"
 #include "sha1-lookup.h"
+#include "fetch-object.h"
 
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
@@ -52,3 +53,15 @@ int odb_helper_has_object(struct odb_helper *o, const 
unsigned char *sha1)
return !!odb_helper_lookup(o, sha1);
 }
 
+int odb_helper_get_direct(struct odb_helper *o,
+ const unsigned char *sha1)
+{
+   int res = 0;
+   uint64_t start = getnanotime();
+
+   fetch_object(o->dealer, sha1);
+
+   trace_performance_since(start, "odb_helper_get_direct");
+
+   return res;
+}
diff --git a/odb-helper.h b/odb-helper.h
index 9395e606ce..f4bc66b0ef 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -20,5 +20,6 @@ struct odb_helper {
 extern struct odb_helper *odb_helper_new(const char *name, int namelen);
 extern int odb_helper_has_object(struct odb_helper *o,
 const unsigned char *sha1);
-
+extern int odb_helper_get_direct(struct odb_helper *o,
+const unsigned char *sha1);
 #endif /* ODB_HELPER_H */
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 36/40] Add t0470 to test passing raw objects

2018-01-03 Thread Christian Couder

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t0470-read-object-http-e-odb.sh | 109 ++
 t/t0470/read-object-plain |  83 +
 2 files changed, 192 insertions(+)
 create mode 100755 t/t0470-read-object-http-e-odb.sh
 create mode 100755 t/t0470/read-object-plain

diff --git a/t/t0470-read-object-http-e-odb.sh 
b/t/t0470-read-object-http-e-odb.sh
new file mode 100755
index 00..774528c04f
--- /dev/null
+++ b/t/t0470-read-object-http-e-odb.sh
@@ -0,0 +1,109 @@
+#!/bin/sh
+
+test_description='tests for read-object process passing plain objects to an 
HTTPD server'
+
+. ./test-lib.sh
+
+# If we don't specify a port, the current test number will be used
+# which will not work as it is less than 1024, so it can only be used by root.
+LIB_HTTPD_PORT=$(expr ${this_test#t} + 12000)
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+
+start_httpd apache-e-odb.conf
+
+PATH="$PATH:$TEST_DIRECTORY/t0470"
+
+# odb helper script must see this
+export HTTPD_URL
+
+HELPER="read-object-plain"
+
+test_expect_success 'setup repo with a root commit' '
+   test_commit zero
+'
+
+test_expect_success 'setup another repo from the first one' '
+   git init other-repo &&
+   (cd other-repo &&
+git remote add origin .. &&
+git pull origin master &&
+git checkout master &&
+git log)
+'
+
+test_expect_success 'setup the helper in the root repo' '
+   git config odb.magic.subprocessCommand "$HELPER"
+'
+
+UPLOADFILENAME="hello_apache_upload.txt"
+
+UPLOAD_URL="$HTTPD_URL/upload/?sha1=$UPLOADFILENAME=123=blob"
+
+test_expect_success 'can upload a file' '
+   echo "Hello Apache World!" >hello_to_send.txt &&
+   echo "How are you?" >>hello_to_send.txt &&
+   curl --data-binary @hello_to_send.txt --include "$UPLOAD_URL" 
>out_upload
+'
+
+LIST_URL="$HTTPD_URL/list/"
+
+test_expect_success 'can list uploaded files' '
+   curl --include "$LIST_URL" >out_list &&
+   grep "$UPLOADFILENAME" out_list
+'
+
+test_expect_success 'can delete uploaded files' '
+   curl --data "delete" --include "$UPLOAD_URL=1" >out_delete &&
+   curl --include "$LIST_URL" >out_list2 &&
+   ! grep "$UPLOADFILENAME" out_list2
+'
+
+FILES_DIR="httpd/www/files"
+
+test_expect_success 'new blobs are transfered to the http server' '
+   test_commit one &&
+   hash1=$(git ls-tree HEAD | grep one.t | cut -f1 | cut -d\  -f3) &&
+   echo "$hash1-4-blob" >expected &&
+   ls "$FILES_DIR" >actual &&
+   test_cmp expected actual
+'
+
+test_expect_success 'blobs can be retrieved from the http server' '
+   git cat-file blob "$hash1" &&
+   git log -p >expected
+'
+
+test_expect_success 'update other repo from the first one' '
+   (cd other-repo &&
+git fetch origin "refs/odbs/magic/*:refs/odbs/magic/*" &&
+test_must_fail git cat-file blob "$hash1" &&
+git config odb.magic.subprocesscommand "$HELPER" &&
+git cat-file blob "$hash1" &&
+git pull origin master)
+'
+
+test_expect_success 'local clone from the first repo' '
+   mkdir my-clone &&
+   (cd my-clone &&
+git clone .. . &&
+git cat-file blob "$hash1")
+'
+
+test_expect_success 'no-local clone from the first repo fails' '
+   mkdir my-other-clone &&
+   (cd my-other-clone &&
+test_must_fail git clone --no-local .. .) &&
+   rm -rf my-other-clone
+'
+
+test_expect_success 'no-local clone from the first repo with helper succeeds' '
+   mkdir my-other-clone &&
+   (cd my-other-clone &&
+git clone -c odb.magic.subprocessCommand="$HELPER" --no-local .. .) &&
+   rm -rf my-other-clone
+'
+
+stop_httpd
+
+test_done
diff --git a/t/t0470/read-object-plain b/t/t0470/read-object-plain
new file mode 100755
index 00..0766e16032
--- /dev/null
+++ b/t/t0470/read-object-plain
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+#
+
+use 5.008;
+use lib (split(/:/, $ENV{GITPERLLIB}));
+use strict;
+use warnings;
+use Git::Packet;
+use LWP::UserAgent;
+use HTTP::Request::Common;
+
+packet_initialize("git-read-object", 1);
+
+my %remote_caps = packet_read_and_check_capabilities("get_raw_obj", 
"put_raw_obj");
+packet_check_and_write_capabilities(\%remote_caps, "get_raw_obj", 
"put_raw_obj");
+
+my $http_url = $ENV{HTTPD_URL};
+
+while (1) {
+   my ($res, $command) = packet_txt_read();
+
+

[PATCH 40/40] Add Documentation/technical/external-odb.txt

2018-01-03 Thread Christian Couder

This describes the external odb mechanism's purpose and
how it works.

Helped-by: Ben Peart <benpe...@microsoft.com>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 Documentation/technical/external-odb.txt | 342 +++
 1 file changed, 342 insertions(+)
 create mode 100644 Documentation/technical/external-odb.txt

diff --git a/Documentation/technical/external-odb.txt 
b/Documentation/technical/external-odb.txt
new file mode 100644
index 00..58ec8a8145
--- /dev/null
+++ b/Documentation/technical/external-odb.txt
@@ -0,0 +1,342 @@
+External ODBs
+^
+
+The External ODB mechanism makes it possible for Git objects, only
+blobs for now though, to be stored in an "external object database"
+(External ODB).
+
+An External ODB can be any object store as long as there is an helper
+program called an "odb helper" that can communicate with Git to
+transfer objects to/from the external odb and to retrieve information
+about available objects in the external odb.
+
+Purpose
+===
+
+The purpose of this mechanism is to make possible to handle Git
+objects, especially blobs, in much more flexible ways.
+
+Currently Git can store its objects only in the form of loose objects
+in separate files or packed objects in a pack file. These existing
+object stores cannot be easily optimized for many different kind of
+contents.
+
+So the current stores are not flexible enough for some important use
+cases like handling really big binary files or handling a really big
+number of files that are fetched only as needed. And it is not
+realistic to expect that Git could fully natively handle many of such
+use cases. Git would need to natively implement different internal
+stores which would be a huge burden and which could lead to
+re-implement things like HTTP servers, Docker registries or artifact
+stores that already exist outside Git.
+
+Furthermore many improvements that are dependent on specific setups
+could be implemented in the way Git objects are managed if it was
+possible to customize how the Git objects are handled. For example a
+restartable clone using the bundle mechanism has often been requested,
+but implementing that would go against the current strict rules under
+which the Git objects are currently handled.
+
+What Git needs is a mechanism to make it possible to customize in a
+lot of different ways how the Git objects are handled. Though this
+mechanism should try as much as possible to avoid interfering with the
+usual way in which Git handle its objects.
+
+Helpers
+===
+
+ODB helpers are commands that have to be registered using either the
+"odb..subprocessCommand" or the "odb..scriptCommand"
+config variables.
+
+Registering such a command tells Git that an external odb called
+ exists and that the registered command should be used to
+communicate with it.
+
+The communication happens through instructions that are sent by Git
+and that the commands should answer. If it makes sense, Git can send
+the same instruction to many commands in the order in which they are
+configured.
+
+There are 2 kinds of commands. Commands registered using the
+"odb..subprocessCommand" config variable are called "process
+commands" and the associated mode is called "process mode". Commands
+registered using the "odb..scriptCommand" config variables
+are called "script commands" and the associated mode is called "script
+mode".
+
+Early on git commands send an 'init' instruction to the registered
+commands. A capability negociation will take place during this
+request/response exchange which will let Git and the helpers know how
+they can further collaborate. The attribute system can also be used to
+tell Git which objects should be handled by which helper.
+
+Process Mode
+
+
+In process mode the command is started as a single process invocation
+that should last for the entire life of the single Git command that
+started it.
+
+A packet format (pkt-line, see technical/protocol-common.txt) based
+protocol over standard input and standard output is used for
+communication between Git and the helper command.
+
+After the process command is started, Git sends a welcome message
+("git-read-object-client"), a list of supported protocol version
+numbers, and a flush packet. Git expects to read a welcome response
+message ("git-read-object-server"), exactly one protocol version
+number from the previously sent list, and a flush packet. All further
+communication will be based on the selected version.
+
+The remaining protocol description below documents "version=1". Please
+note that "version=42" in the example below does not exist and is only
+there to illustrate how the protocol would look with more than one
+version.
+
+After the version negotiation Git sends a list of all capabilities
+that it supports

[PATCH 38/40] Add t0480 to test "have" capability and raw objects

2018-01-03 Thread Christian Couder

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t0480-read-object-have-http-e-odb.sh | 109 +
 t/t0480/read-object-plain-have | 103 +++
 2 files changed, 212 insertions(+)
 create mode 100755 t/t0480-read-object-have-http-e-odb.sh
 create mode 100755 t/t0480/read-object-plain-have

diff --git a/t/t0480-read-object-have-http-e-odb.sh 
b/t/t0480-read-object-have-http-e-odb.sh
new file mode 100755
index 00..056a40f2bb
--- /dev/null
+++ b/t/t0480-read-object-have-http-e-odb.sh
@@ -0,0 +1,109 @@
+#!/bin/sh
+
+test_description='tests for read-object process with "have" cap and plain 
objects'
+
+. ./test-lib.sh
+
+# If we don't specify a port, the current test number will be used
+# which will not work as it is less than 1024, so it can only be used by root.
+LIB_HTTPD_PORT=$(expr ${this_test#t} + 12000)
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+
+start_httpd apache-e-odb.conf
+
+PATH="$PATH:$TEST_DIRECTORY/t0480"
+
+# odb helper script must see this
+export HTTPD_URL
+
+HELPER="read-object-plain-have"
+
+test_expect_success 'setup repo with a root commit' '
+   test_commit zero
+'
+
+test_expect_success 'setup another repo from the first one' '
+   git init other-repo &&
+   (cd other-repo &&
+git remote add origin .. &&
+git pull origin master &&
+git checkout master &&
+git log)
+'
+
+test_expect_success 'setup the helper in the root repo' '
+   git config odb.magic.subprocessCommand "$HELPER"
+'
+
+UPLOADFILENAME="hello_apache_upload.txt"
+
+UPLOAD_URL="$HTTPD_URL/upload/?sha1=$UPLOADFILENAME=123=blob"
+
+test_expect_success 'can upload a file' '
+   echo "Hello Apache World!" >hello_to_send.txt &&
+   echo "How are you?" >>hello_to_send.txt &&
+   curl --data-binary @hello_to_send.txt --include "$UPLOAD_URL" 
>out_upload
+'
+
+LIST_URL="$HTTPD_URL/list/"
+
+test_expect_success 'can list uploaded files' '
+   curl --include "$LIST_URL" >out_list &&
+   grep "$UPLOADFILENAME" out_list
+'
+
+test_expect_success 'can delete uploaded files' '
+   curl --data "delete" --include "$UPLOAD_URL=1" >out_delete &&
+   curl --include "$LIST_URL" >out_list2 &&
+   ! grep "$UPLOADFILENAME" out_list2
+'
+
+FILES_DIR="httpd/www/files"
+
+test_expect_success 'new blobs are transfered to the http server' '
+   test_commit one &&
+   hash1=$(git ls-tree HEAD | grep one.t | cut -f1 | cut -d\  -f3) &&
+   echo "$hash1-4-blob" >expected &&
+   ls "$FILES_DIR" >actual &&
+   test_cmp expected actual
+'
+
+test_expect_success 'blobs can be retrieved from the http server' '
+   git cat-file blob "$hash1" &&
+   git log -p >expected
+'
+
+test_expect_success 'update other repo from the first one' '
+   (cd other-repo &&
+git fetch origin "refs/odbs/magic/*:refs/odbs/magic/*" &&
+test_must_fail git cat-file blob "$hash1" &&
+git config odb.magic.subprocessCommand "$HELPER" &&
+git cat-file blob "$hash1" &&
+git pull origin master)
+'
+
+test_expect_success 'local clone from the first repo' '
+   mkdir my-clone &&
+   (cd my-clone &&
+git clone .. . &&
+git cat-file blob "$hash1")
+'
+
+test_expect_success 'no-local clone from the first repo fails' '
+   mkdir my-other-clone &&
+   (cd my-other-clone &&
+test_must_fail git clone --no-local .. .) &&
+   rm -rf my-other-clone
+'
+
+test_expect_success 'no-local clone from the first repo with helper succeeds' '
+   mkdir my-other-clone &&
+   (cd my-other-clone &&
+git clone -c odb.magic.subprocessCommand="$HELPER" --no-local .. .) &&
+   rm -rf my-other-clone
+'
+
+stop_httpd
+
+test_done
diff --git a/t/t0480/read-object-plain-have b/t/t0480/read-object-plain-have
new file mode 100755
index 00..f230cbd5eb
--- /dev/null
+++ b/t/t0480/read-object-plain-have
@@ -0,0 +1,103 @@
+#!/usr/bin/perl
+#
+
+use 5.008;
+use lib (split(/:/, $ENV{GITPERLLIB}));
+use strict;
+use warnings;
+use Git::Packet;
+use LWP::UserAgent;
+use HTTP::Request::Common;
+
+packet_initialize("git-read-object", 1);
+
+my %remote_caps = packet_read_and_check_capabilities("get_raw_obj", 
"put_raw_obj", "have");
+packet_check_and_write_capabilities(\%remote_caps, "get_raw_obj", 
"put_raw_obj", "have");
+
+my $http_url =

[PATCH 37/40] odb-helper: add have_object_process()

2018-01-03 Thread Christian Couder

This adds the infrastructure to handle 'have' instructions in
process mode.

The answer from the helper sub-process should be like the
output in script mode, that is lines like this:

sha1 SPACE size SPACE type NEWLINE

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 odb-helper.c | 76 ++--
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/odb-helper.c b/odb-helper.c
index d901f6d0bc..d8902a9541 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -635,6 +635,70 @@ static int odb_helper_object_cmp(const void *va, const 
void *vb)
return hashcmp(a->sha1, b->sha1);
 }
 
+static int send_have_packets(struct odb_helper *o,
+struct object_process *entry,
+struct strbuf *status)
+{
+   int packet_len;
+   int total_got = 0;
+   struct child_process *process = >subprocess.process;
+   int err = packet_write_fmt_gently(process->in, "command=have\n");
+
+   if (err)
+   return err;
+
+   err = packet_flush_gently(process->in);
+   if (err)
+   return err;
+
+   for (;;) {
+   /* packet_read() writes a '\0' extra byte at the end */
+   char buf[LARGE_PACKET_DATA_MAX + 1];
+   char *p = buf;
+   int more;
+
+   packet_len = packet_read(process->out, NULL, NULL,
+   buf, LARGE_PACKET_DATA_MAX + 1,
+   PACKET_READ_GENTLE_ON_EOF);
+
+   if (packet_len <= 0)
+   break;
+
+   total_got += packet_len;
+
+   /* 'have' packets should end with '\n' or '\0' */
+   do {
+   char *eol = strchrnul(p, '\n');
+   more = (*eol == '\n');
+   *eol = '\0';
+   if (add_have_entry(o, p))
+   break;
+   p = eol + 1;
+   } while (more && *p);
+   }
+
+   if (packet_len < 0)
+   return packet_len;
+
+   return check_object_process_status(process->out, status);
+}
+
+static int have_object_process(struct odb_helper *o)
+{
+   int err;
+   struct object_process *entry;
+   struct strbuf status = STRBUF_INIT;
+
+   entry = launch_object_process(o, ODB_HELPER_CAP_HAVE);
+   if (!entry)
+   return -1;
+
+   err = send_have_packets(o, entry, );
+
+   return check_object_process_error(err, status.buf, entry, o->dealer,
+ ODB_HELPER_CAP_HAVE);
+}
+
 static void have_object_script(struct odb_helper *o)
 {
struct odb_helper_cmd cmd;
@@ -656,12 +720,20 @@ static void have_object_script(struct odb_helper *o)
 
 static void odb_helper_load_have(struct odb_helper *o)
 {
+   uint64_t start;
+
if (o->have_valid)
return;
o->have_valid = 1;
 
+   start = getnanotime();
+
if (o->type == ODB_HELPER_SCRIPT_CMD)
have_object_script(o);
+   else if (o->type == ODB_HELPER_SUBPROCESS_CMD)
+   have_object_process(o);
+
+   trace_performance_since(start, "odb_helper_load_have");
 
qsort(o->have, o->have_nr, sizeof(*o->have), odb_helper_object_cmp);
 }
@@ -923,7 +995,7 @@ int odb_helper_get_direct(struct odb_helper *o,
fetch_object(o->dealer, sha1);
else if (o->type == ODB_HELPER_SCRIPT_CMD)
res = get_direct_script(o, sha1);
-   else
+   else if (o->type == ODB_HELPER_SUBPROCESS_CMD)
res = get_object_process(o, sha1, -1);
 
trace_performance_since(start, "odb_helper_get_direct");
@@ -993,7 +1065,7 @@ int odb_helper_put_object(struct odb_helper *o,
  const void *buf, size_t len,
  const char *type, unsigned char *sha1)
 {
-   int res;
+   int res = 0;
uint64_t start = getnanotime();
 
if (o->type == ODB_HELPER_SCRIPT_CMD)
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 35/40] odb-helper: add put_object_process()

2018-01-03 Thread Christian Couder

This adds the infrastructure to send objects to a sub-process
handling the communication with an external odb.

For now we only handle sending raw blobs using the 'put_raw_obj'
instruction.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 odb-helper.c | 75 +---
 1 file changed, 72 insertions(+), 3 deletions(-)

diff --git a/odb-helper.c b/odb-helper.c
index a67dfddca0..d901f6d0bc 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -434,6 +434,58 @@ static int get_object_process(struct odb_helper *o, const 
unsigned char *sha1, i
  o->dealer, cur_cap);
 }
 
+static int send_put_packets(struct object_process *entry,
+   const unsigned char *sha1,
+   const void *buf,
+   size_t len,
+   struct strbuf *status)
+{
+   struct child_process *process = >subprocess.process;
+   int err = packet_write_fmt_gently(process->in, "command=put_raw_obj\n");
+   if (err)
+   return err;
+
+   err = packet_write_fmt_gently(process->in, "sha1=%s\n", 
sha1_to_hex(sha1));
+   if (err)
+   return err;
+
+   err = packet_write_fmt_gently(process->in, "size=%"PRIuMAX"\n", len);
+   if (err)
+   return err;
+
+   err = packet_write_fmt_gently(process->in, "kind=blob\n");
+   if (err)
+   return err;
+
+   err = packet_flush_gently(process->in);
+   if (err)
+   return err;
+
+   err = write_packetized_from_buf(buf, len, process->in);
+   if (err)
+   return err;
+
+   return check_object_process_status(process->out, status);
+}
+
+static int put_object_process(struct odb_helper *o,
+ const void *buf, size_t len,
+ const char *type, unsigned char *sha1)
+{
+   int err;
+   struct object_process *entry;
+   struct strbuf status = STRBUF_INIT;
+
+   entry = launch_object_process(o, ODB_HELPER_CAP_PUT_RAW_OBJ);
+   if (!entry)
+   return -1;
+
+   err = send_put_packets(entry, sha1, buf, len, );
+
+   return check_object_process_error(err, status.buf, entry, o->dealer,
+ ODB_HELPER_CAP_PUT_RAW_OBJ);
+}
+
 struct odb_helper *odb_helper_new(const char *name, int namelen)
 {
struct odb_helper *o;
@@ -908,9 +960,9 @@ int odb_helper_get_object(struct odb_helper *o,
return res;
 }
 
-int odb_helper_put_object(struct odb_helper *o,
- const void *buf, size_t len,
- const char *type, unsigned char *sha1)
+static int put_raw_object_script(struct odb_helper *o,
+const void *buf, size_t len,
+const char *type, unsigned char *sha1)
 {
struct odb_helper_cmd cmd;
 
@@ -936,3 +988,20 @@ int odb_helper_put_object(struct odb_helper *o,
odb_helper_finish(o, );
return 0;
 }
+
+int odb_helper_put_object(struct odb_helper *o,
+ const void *buf, size_t len,
+ const char *type, unsigned char *sha1)
+{
+   int res;
+   uint64_t start = getnanotime();
+
+   if (o->type == ODB_HELPER_SCRIPT_CMD)
+   res = put_raw_object_script(o, buf, len, type, sha1);
+   else if (o->type == ODB_HELPER_SUBPROCESS_CMD)
+   res = put_object_process(o, buf, len, type, sha1);
+
+   trace_performance_since(start, "odb_helper_put_object");
+
+   return res;
+}
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 34/40] Add t0460 to test passing git objects

2018-01-03 Thread Christian Couder

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t0460-read-object-git.sh | 28 +
 t/t0460/read-object-git| 78 ++
 2 files changed, 106 insertions(+)
 create mode 100755 t/t0460-read-object-git.sh
 create mode 100755 t/t0460/read-object-git

diff --git a/t/t0460-read-object-git.sh b/t/t0460-read-object-git.sh
new file mode 100755
index 00..2873b445f3
--- /dev/null
+++ b/t/t0460-read-object-git.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+test_description='tests for long running read-object process passing git 
objects'
+
+. ./test-lib.sh
+
+PATH="$PATH:$TEST_DIRECTORY/t0460"
+
+test_expect_success 'setup host repo with a root commit' '
+   test_commit zero &&
+   hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\  -f3)
+'
+
+HELPER="read-object-git"
+
+test_expect_success 'blobs can be retrieved from the host repo' '
+   git init guest-repo &&
+   (cd guest-repo &&
+git config odb.magic.subprocessCommand "$HELPER" &&
+git cat-file blob "$hash1" >/dev/null)
+'
+
+test_expect_success 'invalid blobs generate errors' '
+   cd guest-repo &&
+   test_must_fail git cat-file blob "invalid"
+'
+
+test_done
diff --git a/t/t0460/read-object-git b/t/t0460/read-object-git
new file mode 100755
index 00..4b3ca0948b
--- /dev/null
+++ b/t/t0460/read-object-git
@@ -0,0 +1,78 @@
+#!/usr/bin/perl
+#
+# Example implementation for the Git read-object protocol version 1
+# See Documentation/technical/read-object-protocol.txt
+#
+# Allows you to test the ability for blobs to be pulled from a host git repo
+# "on demand."  Called when git needs a blob it couldn't find locally due to
+# a lazy clone that only cloned the commits and trees.
+#
+# A lazy clone can be simulated via the following commands from the host repo
+# you wish to create a lazy clone of:
+#
+# cd /host_repo
+# git rev-parse HEAD
+# git init /guest_repo
+# git cat-file --batch-check --batch-all-objects | grep -v 'blob' |
+#  cut -d' ' -f1 | git pack-objects /e/guest_repo/.git/objects/pack/noblobs
+# cd /guest_repo
+# git config core.virtualizeobjects true
+# git reset --hard 
+#
+# Please note, this sample is a minimal skeleton. No proper error handling 
+# was implemented.
+#
+
+use 5.008;
+use lib (split(/:/, $ENV{GITPERLLIB}));
+use strict;
+use warnings;
+use Git::Packet;
+
+#
+# Point $DIR to the folder where your host git repo is located so we can pull
+# missing objects from it
+#
+my $DIR = "../.git/";
+
+packet_initialize("git-read-object", 1);
+
+my %remote_caps = packet_read_and_check_capabilities("get_git_obj");
+packet_check_and_write_capabilities(\%remote_caps, "get_git_obj");
+
+while (1) {
+   my ($res, $command) = packet_txt_read();
+
+   if ( $res == -1 ) {
+   exit 0;
+   }
+
+   $command =~ s/^command=//;
+
+   if ( $command eq "init" ) {
+   packet_bin_read();
+
+   packet_txt_write("status=success");
+   packet_flush();
+   } elsif ( $command eq "get_git_obj" ) {
+   my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/;
+   packet_bin_read();
+
+   my $path = $sha1;
+   $path =~ s{..}{$&/};
+   $path = $DIR . "/objects/" . $path;
+
+   my $contents = do {
+   local $/;
+   open my $fh, $path or die "Can't open '$path': $!";
+   <$fh>
+   };
+
+   packet_bin_write($contents);
+   packet_flush();
+   packet_txt_write("status=success");
+   packet_flush();
+   } else {
+   die "bad command '$command'";
+   }
+}
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 33/40] Add t0450 to test 'get_direct' mechanism

2018-01-03 Thread Christian Couder

From: Ben Peart <benpe...@microsoft.com>

Signed-off-by: Ben Peart <benpe...@microsoft.com>
Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/t0450-read-object.sh | 28 +
 t/t0450/read-object| 68 ++
 2 files changed, 96 insertions(+)
 create mode 100755 t/t0450-read-object.sh
 create mode 100755 t/t0450/read-object

diff --git a/t/t0450-read-object.sh b/t/t0450-read-object.sh
new file mode 100755
index 00..6b97305452
--- /dev/null
+++ b/t/t0450-read-object.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+test_description='tests for long running read-object process'
+
+. ./test-lib.sh
+
+PATH="$PATH:$TEST_DIRECTORY/t0450"
+
+test_expect_success 'setup host repo with a root commit' '
+   test_commit zero &&
+   hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\  -f3)
+'
+
+HELPER="read-object"
+
+test_expect_success 'blobs can be retrieved from the host repo' '
+   git init guest-repo &&
+   (cd guest-repo &&
+git config odb.magic.subprocessCommand "$HELPER" &&
+git cat-file blob "$hash1" >/dev/null)
+'
+
+test_expect_success 'invalid blobs generate errors' '
+   cd guest-repo &&
+   test_must_fail git cat-file blob "invalid"
+'
+
+test_done
diff --git a/t/t0450/read-object b/t/t0450/read-object
new file mode 100755
index 00..004e9368c9
--- /dev/null
+++ b/t/t0450/read-object
@@ -0,0 +1,68 @@
+#!/usr/bin/perl
+#
+# Example implementation for the Git read-object protocol version 1
+# See Documentation/technical/read-object-protocol.txt
+#
+# Allows you to test the ability for blobs to be pulled from a host git repo
+# "on demand."  Called when git needs a blob it couldn't find locally due to
+# a lazy clone that only cloned the commits and trees.
+#
+# A lazy clone can be simulated via the following commands from the host repo
+# you wish to create a lazy clone of:
+#
+# cd /host_repo
+# git rev-parse HEAD
+# git init /guest_repo
+# git cat-file --batch-check --batch-all-objects | grep -v 'blob' |
+#  cut -d' ' -f1 | git pack-objects /e/guest_repo/.git/objects/pack/noblobs
+# cd /guest_repo
+# git config core.virtualizeobjects true
+# git reset --hard 
+#
+# Please note, this sample is a minimal skeleton. No proper error handling 
+# was implemented.
+#
+
+use 5.008;
+use lib (split(/:/, $ENV{GITPERLLIB}));
+use strict;
+use warnings;
+use Git::Packet;
+
+#
+# Point $DIR to the folder where your host git repo is located so we can pull
+# missing objects from it
+#
+my $DIR = "../.git/";
+
+packet_initialize("git-read-object", 1);
+
+my %remote_caps = packet_read_and_check_capabilities("get_direct");
+packet_check_and_write_capabilities(\%remote_caps, "get_direct");
+
+while (1) {
+   my ($res, $command) = packet_txt_read();
+
+   if ( $res == -1 ) {
+   exit 0;
+   }
+
+   $command =~ s/^command=//;
+
+   if ( $command eq "init" ) {
+   packet_bin_read();
+
+   packet_txt_write("status=success");
+   packet_flush();
+   } elsif ( $command eq "get_direct" ) {
+   my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/;
+   packet_bin_read();
+
+   system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . 
' | GIT_NO_EXTERNAL_ODB=1 git hash-object -w --stdin >/dev/null 2>&1');
+
+   packet_txt_write(($?) ? "status=error" : "status=success");
+   packet_flush();
+   } else {
+   die "bad command '$command'";
+   }
+}
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 26/40] lib-httpd: add apache-e-odb.conf

2018-01-03 Thread Christian Couder

This is an apache config file to test external object databases.
It uses the upload.sh and list.sh cgi that have been added
previously to make apache store external objects.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/lib-httpd/apache-e-odb.conf | 214 ++
 1 file changed, 214 insertions(+)
 create mode 100644 t/lib-httpd/apache-e-odb.conf

diff --git a/t/lib-httpd/apache-e-odb.conf b/t/lib-httpd/apache-e-odb.conf
new file mode 100644
index 00..19a1540c82
--- /dev/null
+++ b/t/lib-httpd/apache-e-odb.conf
@@ -0,0 +1,214 @@
+ServerName dummy
+PidFile httpd.pid
+DocumentRoot www
+LogFormat "%h %l %u %t \"%r\" %>s %b" common
+CustomLog access.log common
+ErrorLog error.log
+
+   LoadModule log_config_module modules/mod_log_config.so
+
+
+   LoadModule alias_module modules/mod_alias.so
+
+
+   LoadModule cgi_module modules/mod_cgi.so
+
+
+   LoadModule env_module modules/mod_env.so
+
+
+   LoadModule rewrite_module modules/mod_rewrite.so
+
+
+   LoadModule version_module modules/mod_version.so
+
+
+   LoadModule headers_module modules/mod_headers.so
+
+
+
+LockFile accept.lock
+
+
+
+
+   LoadModule auth_module modules/mod_auth.so
+
+
+
+= 2.1>
+
+   LoadModule auth_basic_module modules/mod_auth_basic.so
+
+
+   LoadModule authn_file_module modules/mod_authn_file.so
+
+
+   LoadModule authz_user_module modules/mod_authz_user.so
+
+
+   LoadModule authz_host_module modules/mod_authz_host.so
+
+
+
+= 2.4>
+
+   LoadModule authn_core_module modules/mod_authn_core.so
+
+
+   LoadModule authz_core_module modules/mod_authz_core.so
+
+
+   LoadModule access_compat_module modules/mod_access_compat.so
+
+
+   LoadModule mpm_prefork_module modules/mod_mpm_prefork.so
+
+
+   LoadModule unixd_module modules/mod_unixd.so
+
+
+
+PassEnv GIT_VALGRIND
+PassEnv GIT_VALGRIND_OPTIONS
+PassEnv GNUPGHOME
+PassEnv ASAN_OPTIONS
+PassEnv GIT_TRACE
+PassEnv GIT_CONFIG_NOSYSTEM
+
+Alias /dumb/ www/
+Alias /auth/dumb/ www/auth/dumb/
+
+
+   SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
+   SetEnv GIT_HTTP_EXPORT_ALL
+
+
+   SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
+
+
+   SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
+   SetEnv GIT_HTTP_EXPORT_ALL
+   SetEnv GIT_COMMITTER_NAME "Custom User"
+   SetEnv GIT_COMMITTER_EMAIL cus...@example.com
+
+
+   SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
+   SetEnv GIT_HTTP_EXPORT_ALL
+   SetEnv GIT_NAMESPACE ns
+
+
+   SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
+   SetEnv GIT_HTTP_EXPORT_ALL
+   Header set Set-Cookie name=value
+
+
+   SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
+   SetEnv GIT_HTTP_EXPORT_ALL
+
+ScriptAlias /upload/ upload.sh/
+ScriptAlias /list/ list.sh/
+
+   Options FollowSymlinks
+
+
+  Options ExecCGI
+
+
+  Options ExecCGI
+
+
+   Options ExecCGI
+
+
+RewriteEngine on
+RewriteRule ^/smart-redir-perm/(.*)$ /smart/$1 [R=301]
+RewriteRule ^/smart-redir-temp/(.*)$ /smart/$1 [R=302]
+RewriteRule ^/smart-redir-auth/(.*)$ /auth/smart/$1 [R=301]
+RewriteRule ^/smart-redir-limited/(.*)/info/refs$ /smart/$1/info/refs [R=301]
+RewriteRule ^/ftp-redir/(.*)$ ftp://localhost:1000/$1 [R=302]
+
+RewriteRule ^/loop-redir/x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-(.*) /$1 
[R=302]
+RewriteRule ^/loop-redir/(.*)$ /loop-redir/x-$1 [R=302]
+
+# Apache 2.2 does not understand , so we use RewriteCond.
+# And as RewriteCond does not allow testing for non-matches, we match
+# the desired case first (one has abra, two has cadabra), and let it
+# pass by marking the RewriteRule as [L], "last rule, do not process
+# any other matching RewriteRules after this"), and then have another
+# RewriteRule that matches all other cases and lets them fail via '[F]',
+# "fail the request".
+RewriteCond %{HTTP:x-magic-one} =abra
+RewriteCond %{HTTP:x-magic-two} =cadabra
+RewriteRule ^/smart_headers/.* - [L]
+RewriteRule ^/smart_headers/.* - [F]
+
+
+LoadModule ssl_module modules/mod_ssl.so
+
+SSLCertificateFile httpd.pem
+SSLCertificateKeyFile httpd.pem
+SSLRandomSeed startup file:/dev/urandom 512
+SSLRandomSeed connect file:/dev/urandom 512
+SSLSessionCache none
+SSLMutex file:ssl_mutex
+SSLEngine On
+
+
+
+   AuthType Basic
+   AuthName "git-auth"
+   AuthUserFile passwd
+   Require valid-user
+
+
+
+   AuthType Basic
+   AuthName "git-auth"
+   AuthUserFile passwd
+   Require valid-user
+
+
+
+   AuthType Basic
+   AuthName "git-auth"
+   AuthUserFile passwd
+   Require valid-user
+
+
+RewriteCond %{QUERY_STRING} service=git-receive-pack [OR]
+RewriteCond %{REQUEST_URI} /git-receive-pack$
+RewriteRule ^/half-auth-complete/ - [E=AUTHREQUIRED:yes]
+
+
+  Order Deny,Allow
+  Deny from env=AUTHREQUIRED
+
+  AuthType Basic
+  AuthName "Git Access"
+  AuthUserFile passwd
+  Require valid-user
+  Sa

[PATCH 31/40] odb-helper: add 'script_mode' to 'struct odb_helper'

2018-01-03 Thread Christian Couder

to prepare for having a long running odb helper sub-process
handling the communication between Git and an external odb.

We introduce "odb..subprocesscommand" to make it
possible to define such a sub-process, and we mark such odb
helpers with the new 'script_mode' field set to 0.

Helpers defined using the existing "odb..scriptcommand"
are marked with the 'script_mode' field set to 1.

Implementation of the different capabilities/instructions in
the new (sub-)process mode is left for following commits.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c |  4 
 odb-helper.c   | 19 ++-
 odb-helper.h   |  2 +-
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/external-odb.c b/external-odb.c
index 3ce3d111f3..f38c2c2fe3 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -43,6 +43,10 @@ static int external_odb_config(const char *var, const char 
*value, void *data)
o->type = ODB_HELPER_SCRIPT_CMD;
return git_config_string(>dealer, var, value);
}
+   if (!strcmp(subkey, "subprocesscommand")) {
+   o->type = ODB_HELPER_SUBPROCESS_CMD;
+   return git_config_string(>dealer, var, value);
+   }
 
return 0;
 }
diff --git a/odb-helper.c b/odb-helper.c
index 0fa7af0348..91b4de1a05 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -124,6 +124,9 @@ int odb_helper_init(struct odb_helper *o)
FILE *fh;
struct strbuf line = STRBUF_INIT;
 
+   if (o->type != ODB_HELPER_SCRIPT_CMD)
+   return 0;
+
if (odb_helper_start(o, , 0, "init") < 0)
return -1;
 
@@ -174,16 +177,12 @@ static int odb_helper_object_cmp(const void *va, const 
void *vb)
return hashcmp(a->sha1, b->sha1);
 }
 
-static void odb_helper_load_have(struct odb_helper *o)
+static void have_object_script(struct odb_helper *o)
 {
struct odb_helper_cmd cmd;
FILE *fh;
struct strbuf line = STRBUF_INIT;
 
-   if (o->have_valid)
-   return;
-   o->have_valid = 1;
-
if (odb_helper_start(o, , 0, "have") < 0)
return;
 
@@ -195,6 +194,16 @@ static void odb_helper_load_have(struct odb_helper *o)
strbuf_release();
fclose(fh);
odb_helper_finish(o, );
+}
+
+static void odb_helper_load_have(struct odb_helper *o)
+{
+   if (o->have_valid)
+   return;
+   o->have_valid = 1;
+
+   if (o->type == ODB_HELPER_SCRIPT_CMD)
+   have_object_script(o);
 
qsort(o->have, o->have_nr, sizeof(*o->have), odb_helper_object_cmp);
 }
diff --git a/odb-helper.h b/odb-helper.h
index 4a9cc7f07b..9a33adbf0b 100644
--- a/odb-helper.h
+++ b/odb-helper.h
@@ -7,7 +7,7 @@ enum odb_helper_type {
ODB_HELPER_NONE = 0,
ODB_HELPER_GIT_REMOTE,
ODB_HELPER_SCRIPT_CMD,
-   ODB_HELPER_PROCESS_CMD,
+   ODB_HELPER_SUBPROCESS_CMD,
OBJ_HELPER_MAX
 };
 
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 23/40] lib-httpd: pass config file to start_httpd()

2018-01-03 Thread Christian Couder

This makes it possible to start an apache web server with different
config files.

This will be used in a later patch to pass a config file that makes
apache store external objects.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/lib-httpd.sh | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index 435a37465a..2e659a8ee2 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -171,12 +171,14 @@ prepare_httpd() {
 }
 
 start_httpd() {
+   APACHE_CONF_FILE=${1-apache.conf}
+
prepare_httpd >&3 2>&4
 
trap 'code=$?; stop_httpd; (exit $code); die' EXIT
 
"$LIB_HTTPD_PATH" -d "$HTTPD_ROOT_PATH" \
-   -f "$TEST_PATH/apache.conf" $HTTPD_PARA \
+   -f "$TEST_PATH/$APACHE_CONF_FILE" $HTTPD_PARA \
-c "Listen 127.0.0.1:$LIB_HTTPD_PORT" -k start \
>&3 2>&4
if test $? -ne 0
@@ -191,7 +193,7 @@ stop_httpd() {
trap 'die' EXIT
 
"$LIB_HTTPD_PATH" -d "$HTTPD_ROOT_PATH" \
-   -f "$TEST_PATH/apache.conf" $HTTPD_PARA -k stop
+   -f "$TEST_PATH/$APACHE_CONF_FILE" $HTTPD_PARA -k stop
 }
 
 test_http_push_nonff () {
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 09/40] introduce fetch-object: fetch one promisor object

2018-01-03 Thread Christian Couder

From: Jonathan Tan 

Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.

This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).

Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.

This will be tested in a subsequent commit.

Signed-off-by: Jonathan Tan 
Signed-off-by: Junio C Hamano 
---
 Documentation/gitremote-helpers.txt |  7 ++
 Makefile|  1 +
 builtin/fetch-pack.c|  8 +++
 builtin/index-pack.c| 14 ---
 fetch-object.c  | 28 ++
 fetch-object.h  |  6 +
 fetch-pack.c| 48 +
 fetch-pack.h|  8 +++
 remote-curl.c   | 14 ++-
 transport.c |  8 +++
 transport.h | 11 +
 11 files changed, 128 insertions(+), 25 deletions(-)
 create mode 100644 fetch-object.c
 create mode 100644 fetch-object.h

diff --git a/Documentation/gitremote-helpers.txt 
b/Documentation/gitremote-helpers.txt
index 4a584f3c5d..4b8c93ec59 100644
--- a/Documentation/gitremote-helpers.txt
+++ b/Documentation/gitremote-helpers.txt
@@ -466,6 +466,13 @@ set by Git if the remote helper has the 'option' 
capability.
Transmit  as a push option. As the push option
must not contain LF or NUL characters, the string is not encoded.
 
+'option from-promisor' {'true'|'false'}::
+   Indicate that these objects are being fetched from a promisor.
+
+'option no-dependents' {'true'|'false'}::
+   Indicate that only the objects wanted need to be fetched, not
+   their dependents.
+
 SEE ALSO
 
 linkgit:git-remote[1]
diff --git a/Makefile b/Makefile
index 07694185c9..25b878279e 100644
--- a/Makefile
+++ b/Makefile
@@ -800,6 +800,7 @@ LIB_OBJS += ewah/ewah_io.o
 LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec_cmd.o
 LIB_OBJS += external-odb.o
+LIB_OBJS += fetch-object.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
 LIB_OBJS += fsmonitor.o
diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index 366b9d13f9..02abe7211e 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -143,6 +143,14 @@ int cmd_fetch_pack(int argc, const char **argv, const char 
*prefix)
args.update_shallow = 1;
continue;
}
+   if (!strcmp("--from-promisor", arg)) {
+   args.from_promisor = 1;
+   continue;
+   }
+   if (!strcmp("--no-dependents", arg)) {
+   args.no_dependents = 1;
+   continue;
+   }
usage(fetch_pack_usage);
}
if (deepen_not.nr)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 47515d8977..9dffaf20ae 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1429,14 +1429,16 @@ static void write_special_file(const char *suffix, 
const char *msg,
if (close(fd) != 0)
die_errno(_("cannot close written %s file '%s'"),
  suffix, filename);
-   *report = suffix;
+   if (report)
+   *report = suffix;
}
strbuf_release(_buf);
 }
 
 static void final(const char *final_pack_name, const char *curr_pack_name,
  const char *final_index_name, const char *curr_index_name,
- const char *keep_msg, unsigned char *sha1)
+ const char *keep_msg, const char *promisor_msg,
+ unsigned char *sha1)
 {
const char *report = "pack";
struct strbuf pack_name = STRBUF_INIT;
@@ -1455,6 +1457,9 @@ static void final(const char *final_pack_name, const char 
*curr_pack_name,
if (keep_msg)
write_special_file("keep", keep_msg, final_pack_name, sha1,
   );
+   if (promisor_msg)
+   write_special_file("promisor", promisor_msg,

[PATCH 11/40] sha1_file: support lazily fetching missing objects

2018-01-03 Thread Christian Couder

From: Jonathan Tan 

Teach sha1_file to fetch objects from the remote configured in
extensions.partialclone whenever an object is requested but missing.

The fetching of objects can be suppressed through a global variable.
This is used by fsck and index-pack.

However, by default, such fetching is not suppressed. This is meant as a
temporary measure to ensure that all Git commands work in such a
situation. Future patches will update some commands to either tolerate
missing objects (without fetching them) or be more efficient in fetching
them.

In order to determine the code changes in sha1_file.c necessary, I
investigated the following:
 (1) functions in sha1_file.c that take in a hash, without the user
 regarding how the object is stored (loose or packed)
 (2) functions in packfile.c (because I need to check callers that know
 about the loose/packed distinction and operate on both differently,
 and ensure that they can handle the concept of objects that are
 neither loose nor packed)

(1) is handled by the modification to sha1_object_info_extended().

For (2), I looked at for_each_packed_object and others.  For
for_each_packed_object, the callers either already work or are fixed in
this patch:
 - reachable - only to find recent objects
 - builtin/fsck - already knows about missing objects
 - builtin/cat-file - warning message added in this commit

Callers of the other functions do not need to be changed:
 - parse_pack_index
   - http - indirectly from http_get_info_packs
   - find_pack_entry_one
 - this searches a single pack that is provided as an argument; the
   caller already knows (through other means) that the sought object
   is in a specific pack
 - find_sha1_pack
   - fast-import - appears to be an optimization to not store a file if
 it is already in a pack
   - http-walker - to search through a struct alt_base
   - http-push - to search through remote packs
 - has_sha1_pack
   - builtin/fsck - already knows about promisor objects
   - builtin/count-objects - informational purposes only (check if loose
 object is also packed)
   - builtin/prune-packed - check if object to be pruned is packed (if
 not, don't prune it)
   - revision - used to exclude packed objects if requested by user
   - diff - just for optimization

Signed-off-by: Jonathan Tan 
Signed-off-by: Junio C Hamano 
---
 builtin/cat-file.c   |  3 +++
 builtin/fetch-pack.c |  2 ++
 builtin/fsck.c   |  3 +++
 builtin/index-pack.c |  6 ++
 cache.h  |  8 
 fetch-object.c   |  3 +++
 sha1_file.c  | 28 ++
 t/t0410-partial-clone.sh | 51 
 8 files changed, 96 insertions(+), 8 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index f5fa4fd75a..1e4edd81a0 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -13,6 +13,7 @@
 #include "tree-walk.h"
 #include "sha1-array.h"
 #include "packfile.h"
+#include "external-odb.h"
 
 struct batch_options {
int enabled;
@@ -475,6 +476,8 @@ static int batch_objects(struct batch_options *opt)
 
for_each_loose_object(batch_loose_object, , 0);
for_each_packed_object(batch_packed_object, , 0);
+   if (has_external_odb())
+   warning("This repository uses an odb. Some objects may 
not be loaded.");
 
cb.opt = opt;
cb.expand = 
diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index 02abe7211e..15eeed7b17 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -53,6 +53,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char 
*prefix)
struct oid_array shallow = OID_ARRAY_INIT;
struct string_list deepen_not = STRING_LIST_INIT_DUP;
 
+   fetch_if_missing = 0;
+
packet_trace_identity("fetch-pack");
 
memset(, 0, sizeof(args));
diff --git a/builtin/fsck.c b/builtin/fsck.c
index a6fa6d6482..7a8a679d4f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -678,6 +678,9 @@ int cmd_fsck(int argc, const char **argv, const char 
*prefix)
int i;
struct alternate_object_database *alt;
 
+   /* fsck knows how to handle missing promisor objects */
+   fetch_if_missing = 0;
+
errors_found = 0;
check_replace_refs = 0;
 
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9dffaf20ae..54c921fa71 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1657,6 +1657,12 @@ int cmd_index_pack(int argc, const char **argv, const 
char *prefix)
unsigned foreign_nr = 1;/* zero is a "good" value, assume bad */
int report_end_of_input = 0;
 
+   /*
+* index-pack never needs to fetch missing objects, since it only
+* accesses the repo to do hash collision checks
+*/
+   fetch_if_missing = 0;
+

[PATCH 28/40] pack-objects: don't pack objects in external odbs

2018-01-03 Thread Christian Couder

Objects managed by an external ODB should not be put into
pack files. They should be transfered using other mechanism
that can be specific to the external odb.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 builtin/pack-objects.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6c71552cdf..4ed66c7677 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -28,6 +28,7 @@
 #include "argv-array.h"
 #include "mru.h"
 #include "packfile.h"
+#include "external-odb.h"
 
 static const char *pack_usage[] = {
N_("git pack-objects --stdout [...] [<  | < 
]"),
@@ -1026,6 +1027,9 @@ static int want_object_in_pack(const struct object_id 
*oid,
return want;
}
 
+   if (external_odb_has_object(oid->hash))
+   return 0;
+
for (entry = packed_git_mru.head; entry; entry = entry->next) {
struct packed_git *p = entry->item;
off_t offset;
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 30/40] external-odb: add 'get_direct' support

2018-01-03 Thread Christian Couder

This implements the 'get_direct' capability/instruction that makes
it possible for external odb helper scripts to pass blobs to Git
by directly writing them as loose objects files.

It is better to call this a "direct" mode rather than a "fault-in"
mode as we could have the same kind of mechanism to "put" objects
into an external odb, where the odb helper would access blobs it
wants to send to an external odb directly from files, but it
would be strange to call that a fault-in mode too.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 external-odb.c |  3 ++-
 odb-helper.c   | 28 +++-
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/external-odb.c b/external-odb.c
index 93971e9ce4..3ce3d111f3 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -109,7 +109,8 @@ int external_odb_get_object(const unsigned char *sha1)
int ret;
int fd;
 
-   if (!odb_helper_has_object(o, sha1))
+   if (!(o->supported_capabilities & ODB_HELPER_CAP_GET_RAW_OBJ) &&
+   !(o->supported_capabilities & ODB_HELPER_CAP_GET_GIT_OBJ))
continue;
 
fd = create_object_tmpfile(, path);
diff --git a/odb-helper.c b/odb-helper.c
index fc30c2fa57..0fa7af0348 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -429,24 +429,42 @@ static int odb_helper_get_git_object(struct odb_helper *o,
 int odb_helper_get_direct(struct odb_helper *o,
  const unsigned char *sha1)
 {
-   int res = 0;
uint64_t start = getnanotime();
 
-   fetch_object(o->dealer, sha1);
+   if (o->type == ODB_HELPER_GIT_REMOTE) {
+   fetch_object(o->dealer, sha1);
+   } else {
+   struct odb_helper_object *obj;
+   struct odb_helper_cmd cmd;
+
+   obj = odb_helper_lookup(o, sha1);
+   if (!obj)
+   return -1;
+
+   if (odb_helper_start(o, , 0, "get_direct %s", 
sha1_to_hex(sha1)) < 0)
+   return -1;
+
+   if (odb_helper_finish(o, ))
+   return -1;
+   }
 
trace_performance_since(start, "odb_helper_get_direct");
 
-   return res;
+   return 0;
 }
 
 int odb_helper_get_object(struct odb_helper *o,
  const unsigned char *sha1,
  int fd)
 {
+   if (o->supported_capabilities & ODB_HELPER_CAP_GET_GIT_OBJ)
+   return odb_helper_get_git_object(o, sha1, fd);
if (o->supported_capabilities & ODB_HELPER_CAP_GET_RAW_OBJ)
return odb_helper_get_raw_object(o, sha1, fd);
-   else
-   return odb_helper_get_git_object(o, sha1, fd);
+   if (o->supported_capabilities & ODB_HELPER_CAP_GET_DIRECT)
+   return 0;
+
+   BUG("invalid get capability (capabilities: '%d')", 
o->supported_capabilities);
 }
 
 int odb_helper_put_object(struct odb_helper *o,
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 24/40] lib-httpd: add upload.sh

2018-01-03 Thread Christian Couder

This cgi will be used to upload objects to, or to delete
objects from, an apache web server.

This way the apache server can work as an external object
database.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/lib-httpd.sh|  1 +
 t/lib-httpd/upload.sh | 45 +
 2 files changed, 46 insertions(+)
 create mode 100644 t/lib-httpd/upload.sh

diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index 2e659a8ee2..d80b004549 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -132,6 +132,7 @@ prepare_httpd() {
cp "$TEST_PATH"/passwd "$HTTPD_ROOT_PATH"
install_script broken-smart-http.sh
install_script error.sh
+   install_script upload.sh
 
ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules"
 
diff --git a/t/lib-httpd/upload.sh b/t/lib-httpd/upload.sh
new file mode 100644
index 00..64d3f31c31
--- /dev/null
+++ b/t/lib-httpd/upload.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+# In part from 
http://codereview.stackexchange.com/questions/79549/bash-cgi-upload-file
+
+FILES_DIR="www/files"
+
+OLDIFS="$IFS"
+IFS='&'
+set -- $QUERY_STRING
+IFS="$OLDIFS"
+
+while test $# -gt 0
+do
+   key=${1%%=*}
+   val=${1#*=}
+
+   case "$key" in
+   "sha1") sha1="$val" ;;
+   "type") type="$val" ;;
+   "size") size="$val" ;;
+   "delete") delete=1 ;;
+   *) echo >&2 "unknown key '$key'" ;;
+   esac
+
+   shift
+done
+
+case "$REQUEST_METHOD" in
+POST)
+   if test "$delete" = "1"
+   then
+   rm -f "$FILES_DIR/$sha1-$size-$type"
+   else
+   mkdir -p "$FILES_DIR"
+   cat >"$FILES_DIR/$sha1-$size-$type"
+   fi
+
+   echo 'Status: 204 No Content'
+   echo
+   ;;
+
+*)
+   echo 'Status: 405 Method Not Allowed'
+   echo
+esac
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

[PATCH 25/40] lib-httpd: add list.sh

2018-01-03 Thread Christian Couder

This cgi script can list Git objects that have been uploaded as
files to an apache web server. This script can also retrieve
the content of each of these files.

This will help make apache work as an external object database.

Signed-off-by: Christian Couder <chrisc...@tuxfamily.org>
---
 t/lib-httpd.sh  |  1 +
 t/lib-httpd/list.sh | 41 +
 2 files changed, 42 insertions(+)
 create mode 100644 t/lib-httpd/list.sh

diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index d80b004549..f31ea261f5 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -133,6 +133,7 @@ prepare_httpd() {
install_script broken-smart-http.sh
install_script error.sh
install_script upload.sh
+   install_script list.sh
 
ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules"
 
diff --git a/t/lib-httpd/list.sh b/t/lib-httpd/list.sh
new file mode 100644
index 00..b6d6c29a2f
--- /dev/null
+++ b/t/lib-httpd/list.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+FILES_DIR="www/files"
+
+OLDIFS="$IFS"
+IFS='&'
+set -- $QUERY_STRING
+IFS="$OLDIFS"
+
+while test $# -gt 0
+do
+   key=${1%%=*}
+   val=${1#*=}
+
+   case "$key" in
+   "sha1") sha1="$val" ;;
+   *) echo >&2 "unknown key '$key'" ;;
+   esac
+
+   shift
+done
+
+if test -d "$FILES_DIR"
+then
+   if test -z "$sha1"
+   then
+   echo 'Status: 200 OK'
+   echo
+   ls "$FILES_DIR" | tr '-' ' '
+   else
+   if test -f "$FILES_DIR/$sha1"-*
+   then
+   echo 'Status: 200 OK'
+   echo
+   cat "$FILES_DIR/$sha1"-*
+   else
+   echo 'Status: 404 Not Found'
+   echo
+   fi
+   fi
+fi
-- 
2.16.0.rc0.16.g82191dbc6c.dirty

< 1 2 3 4 5 6 7 8 9 10 >

301 - 400 of 2901 matches

Mail list logo