Re: [PATCH 15/40] external-odb: add script mode support

2018-03-19 Thread Christian Couder
On Thu, Jan 4, 2018 at 8:55 PM, Jeff Hostetler  wrote:
>
> On 1/3/2018 11:33 AM, Christian Couder wrote:
>>
>> diff --git a/odb-helper.c b/odb-helper.c
>> index 4b70b287af..c1a3443dc7 100644
>> --- a/odb-helper.c
>> +++ b/odb-helper.c
>> @@ -21,13 +21,124 @@ struct odb_helper_cmd {
>> struct child_process child;
>>   };
>>   +/*
>> + * Callers are responsible to ensure that the result of vaddf(fmt, ap)
>> + * is properly shell-quoted.
>> + */
>> +static void prepare_helper_command(struct argv_array *argv, const char
>> *cmd,
>> +  const char *fmt, va_list ap)
>> +{
>> +   struct strbuf buf = STRBUF_INIT;
>> +
>> +   strbuf_addstr(, cmd);
>> +   strbuf_addch(, ' ');
>> +   strbuf_vaddf(, fmt, ap);
>
> I do find this a bit odd that you're putting the cmd, a space,
> and the printf results into a single arg in the argv, rather than
> directly loading up the argv.
>
> Is there an issue with the whitespace between the cmd and the
> printf result being in the same arg -- especially if there are
> quoting issues in the fmt string as you mention in the comment
> above?  (not sure, just asking)

This was discussed with Junio here:

https://public-inbox.org/git/xmqqmvggbl6m@gitster.mtv.corp.google.com/

I agree that I should take another look at it though. I will do that
in the next version after the one I will send really soon now.

>> +static int parse_object_line(struct odb_helper_object *o, const char
>> *line)
>> +{
>
> Is there a reason to order the fields this way?  In the test
> at the bottom of this commit, you take cat-file output and
> re-order the columns with awk.   I'm just wondering if we kept
> cat-file ordering in the format here, we could simplify things.

Yeah, maybe the shell script could be simplified while the C code
would not be more complex. I don't remember if that was in the
original version from Peff or if there was a reason to do it this way.
I will take a look at that in the next version after the one I will
send really soon now.

>> +   char *end;
>> +   if (get_sha1_hex(line, o->sha1) < 0)
>> +   return -1;
>> +
>> +   line += 40;
>> +   if (*line++ != ' ')
>> +   return -1;
>> +
>> +   o->size = strtoul(line, , 10);
>> +   if (line == end || *end++ != ' ')
>> +   return -1;
>> +
>> +   o->type = type_from_string(end);
>> +   return 0;
>> +}
>> +
>> +static int add_have_entry(struct odb_helper *o, const char *line)
>> +{
>> +   ALLOC_GROW(o->have, o->have_nr+1, o->have_alloc);
>
> I didn't see where o->have is initially allocated.  The default is
> to start with 64 and then grow by 3/2 as needed.  If we are getting
> lots of objects here, we'll have lots of reallocs slowing things down.
> It would be better to seed it somewhere to a large value.

Yeah but using this is optional. It depends on the cap_have
capability. And I think that if there is a really huge number of
objects in the external odb, it might be better to just not use
cap_have.

Another possibility would be for the helper to send the number of
objects it has first, so that we could alloc the right number before
receiving the haves, but this could require the helper to be more
complex.

>> +   if (parse_object_line(>have[o->have_nr], line) < 0) {
>> +   warning("bad 'have' input from odb helper '%s': %s",
>> +   o->name, line);
>> +   return 1;
>> +   }
>> +   o->have_nr++;
>> +   return 0;
>> +}
>> +
>> +static int odb_helper_object_cmp(const void *va, const void *vb)
>> +{
>> +   const struct odb_helper_object *a = va, *b = vb;
>> +   return hashcmp(a->sha1, b->sha1);
>> +}
>> +
>>   static void odb_helper_load_have(struct odb_helper *o)
>>   {
>> +   struct odb_helper_cmd cmd;
>> +   FILE *fh;
>> +   struct strbuf line = STRBUF_INIT;
>> +
>> if (o->have_valid)
>> return;
>> o->have_valid = 1;
>>   - /* TODO */
>> +   if (odb_helper_start(o, , "have") < 0)
>> +   return;
>> +
>> +   fh = xfdopen(cmd.child.out, "r");
>> +   while (strbuf_getline(, fh) != EOF)
>> +   if (add_have_entry(o, line.buf))
>> +   break;
>> +
>> +   strbuf_release();
>> +   fclose(fh);
>> +   odb_helper_finish(o, );
>> +
>> +   qsort(o->have, o->have_nr, sizeof(*o->have),
>> odb_helper_object_cmp);
>>   }
>
> Help me understand this.  I originally thought that the "have"
> command would ask about one or more specific OIDs, but after a
> couple of readings it looks like the "have" command is getting the
> *complete* list of objects known to this external ODB and building
> a sorted array of them.  And then we do this for each external ODB
> configured.
>
> If this is the case, I'm concerned that this will have scale problems.
> "git cat-file..." shows that even my little git.git repo has 360K
> objects.  And 

Re: [PATCH 15/40] external-odb: add script mode support

2018-01-04 Thread Jeff Hostetler



On 1/3/2018 11:33 AM, Christian Couder wrote:

This adds support for the script command mode where
an helper script or command is called to retrieve or
manage objects.

This implements the 'have' and 'get_git_obj'
instructions for the script mode.

Signed-off-by: Christian Couder 
---
  external-odb.c  |  51 ++-
  external-odb.h  |   1 +
  odb-helper.c| 218 +++-
  odb-helper.h|   4 +
  sha1_file.c |  12 ++-
  t/t0400-external-odb.sh |  44 ++
  6 files changed, 327 insertions(+), 3 deletions(-)
  create mode 100755 t/t0400-external-odb.sh

diff --git a/external-odb.c b/external-odb.c
index 5d0afb9762..81f2aa5fac 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -33,8 +33,14 @@ static int external_odb_config(const char *var, const char 
*value, void *data)
  
  	o = find_or_create_helper(name, namelen);
  
-	if (!strcmp(subkey, "promisorremote"))

+   if (!strcmp(subkey, "promisorremote")) {
+   o->type = ODB_HELPER_GIT_REMOTE;
return git_config_string(>dealer, var, value);
+   }
+   if (!strcmp(subkey, "scriptcommand")) {
+   o->type = ODB_HELPER_SCRIPT_CMD;
+   return git_config_string(>dealer, var, value);
+   }
  
  	return 0;

  }
@@ -77,6 +83,49 @@ int external_odb_has_object(const unsigned char *sha1)
return 0;
  }
  
+int external_odb_get_object(const unsigned char *sha1)

+{
+   struct odb_helper *o;
+   const char *path;
+
+   if (!external_odb_has_object(sha1))
+   return -1;
+
+   path = sha1_file_name_alt(external_odb_root(), sha1);
+   safe_create_leading_directories_const(path);
+   prepare_external_alt_odb();
+
+   for (o = helpers; o; o = o->next) {
+   struct strbuf tmpfile = STRBUF_INIT;
+   int ret;
+   int fd;
+
+   if (!odb_helper_has_object(o, sha1))
+   continue;
+
+   fd = create_object_tmpfile(, path);
+   if (fd < 0) {
+   strbuf_release();
+   return -1;
+   }
+
+   if (odb_helper_get_object(o, sha1, fd) < 0) {
+   close(fd);
+   unlink(tmpfile.buf);
+   strbuf_release();
+   continue;
+   }
+
+   close_sha1_file(fd);
+   ret = finalize_object_file(tmpfile.buf, path);
+   strbuf_release();
+   if (!ret)
+   return 0;
+   }
+
+   return -1;
+}
+
  int external_odb_get_direct(const unsigned char *sha1)
  {
struct odb_helper *o;
diff --git a/external-odb.h b/external-odb.h
index fd6708163e..fb8b94972f 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -4,6 +4,7 @@
  extern int has_external_odb(void);
  extern const char *external_odb_root(void);
  extern int external_odb_has_object(const unsigned char *sha1);
+extern int external_odb_get_object(const unsigned char *sha1);
  extern int external_odb_get_direct(const unsigned char *sha1);
  
  #endif /* EXTERNAL_ODB_H */

diff --git a/odb-helper.c b/odb-helper.c
index 4b70b287af..c1a3443dc7 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -21,13 +21,124 @@ struct odb_helper_cmd {
struct child_process child;
  };
  
+/*

+ * Callers are responsible to ensure that the result of vaddf(fmt, ap)
+ * is properly shell-quoted.
+ */
+static void prepare_helper_command(struct argv_array *argv, const char *cmd,
+  const char *fmt, va_list ap)
+{
+   struct strbuf buf = STRBUF_INIT;
+
+   strbuf_addstr(, cmd);
+   strbuf_addch(, ' ');
+   strbuf_vaddf(, fmt, ap);


I do find this a bit odd that you're putting the cmd, a space,
and the printf results into a single arg in the argv, rather than
directly loading up the argv.

Is there an issue with the whitespace between the cmd and the
printf result being in the same arg -- especially if there are
quoting issues in the fmt string as you mention in the comment
above?  (not sure, just asking)


+
+   argv_array_push(argv, buf.buf);
+   strbuf_release();
+}
+
+__attribute__((format (printf,3,4)))
+static int odb_helper_start(struct odb_helper *o,
+   struct odb_helper_cmd *cmd,
+   const char *fmt, ...)
+{
+   va_list ap;
+
+   memset(cmd, 0, sizeof(*cmd));
+   argv_array_init(>argv);
+
+   if (!o->dealer)
+   return -1;
+
+   va_start(ap, fmt);
+   prepare_helper_command(>argv, o->dealer, fmt, ap);
+   va_end(ap);
+
+   cmd->child.argv = cmd->argv.argv;
+   cmd->child.use_shell = 1;
+   cmd->child.no_stdin = 1;
+   cmd->child.out = -1;
+
+   if (start_command(>child) < 0) {
+   argv_array_clear(>argv);
+   return -1;
+   }
+
+ 

[PATCH 15/40] external-odb: add script mode support

2018-01-03 Thread Christian Couder
This adds support for the script command mode where
an helper script or command is called to retrieve or
manage objects.

This implements the 'have' and 'get_git_obj'
instructions for the script mode.

Signed-off-by: Christian Couder 
---
 external-odb.c  |  51 ++-
 external-odb.h  |   1 +
 odb-helper.c| 218 +++-
 odb-helper.h|   4 +
 sha1_file.c |  12 ++-
 t/t0400-external-odb.sh |  44 ++
 6 files changed, 327 insertions(+), 3 deletions(-)
 create mode 100755 t/t0400-external-odb.sh

diff --git a/external-odb.c b/external-odb.c
index 5d0afb9762..81f2aa5fac 100644
--- a/external-odb.c
+++ b/external-odb.c
@@ -33,8 +33,14 @@ static int external_odb_config(const char *var, const char 
*value, void *data)
 
o = find_or_create_helper(name, namelen);
 
-   if (!strcmp(subkey, "promisorremote"))
+   if (!strcmp(subkey, "promisorremote")) {
+   o->type = ODB_HELPER_GIT_REMOTE;
return git_config_string(>dealer, var, value);
+   }
+   if (!strcmp(subkey, "scriptcommand")) {
+   o->type = ODB_HELPER_SCRIPT_CMD;
+   return git_config_string(>dealer, var, value);
+   }
 
return 0;
 }
@@ -77,6 +83,49 @@ int external_odb_has_object(const unsigned char *sha1)
return 0;
 }
 
+int external_odb_get_object(const unsigned char *sha1)
+{
+   struct odb_helper *o;
+   const char *path;
+
+   if (!external_odb_has_object(sha1))
+   return -1;
+
+   path = sha1_file_name_alt(external_odb_root(), sha1);
+   safe_create_leading_directories_const(path);
+   prepare_external_alt_odb();
+
+   for (o = helpers; o; o = o->next) {
+   struct strbuf tmpfile = STRBUF_INIT;
+   int ret;
+   int fd;
+
+   if (!odb_helper_has_object(o, sha1))
+   continue;
+
+   fd = create_object_tmpfile(, path);
+   if (fd < 0) {
+   strbuf_release();
+   return -1;
+   }
+
+   if (odb_helper_get_object(o, sha1, fd) < 0) {
+   close(fd);
+   unlink(tmpfile.buf);
+   strbuf_release();
+   continue;
+   }
+
+   close_sha1_file(fd);
+   ret = finalize_object_file(tmpfile.buf, path);
+   strbuf_release();
+   if (!ret)
+   return 0;
+   }
+
+   return -1;
+}
+
 int external_odb_get_direct(const unsigned char *sha1)
 {
struct odb_helper *o;
diff --git a/external-odb.h b/external-odb.h
index fd6708163e..fb8b94972f 100644
--- a/external-odb.h
+++ b/external-odb.h
@@ -4,6 +4,7 @@
 extern int has_external_odb(void);
 extern const char *external_odb_root(void);
 extern int external_odb_has_object(const unsigned char *sha1);
+extern int external_odb_get_object(const unsigned char *sha1);
 extern int external_odb_get_direct(const unsigned char *sha1);
 
 #endif /* EXTERNAL_ODB_H */
diff --git a/odb-helper.c b/odb-helper.c
index 4b70b287af..c1a3443dc7 100644
--- a/odb-helper.c
+++ b/odb-helper.c
@@ -21,13 +21,124 @@ struct odb_helper_cmd {
struct child_process child;
 };
 
+/*
+ * Callers are responsible to ensure that the result of vaddf(fmt, ap)
+ * is properly shell-quoted.
+ */
+static void prepare_helper_command(struct argv_array *argv, const char *cmd,
+  const char *fmt, va_list ap)
+{
+   struct strbuf buf = STRBUF_INIT;
+
+   strbuf_addstr(, cmd);
+   strbuf_addch(, ' ');
+   strbuf_vaddf(, fmt, ap);
+
+   argv_array_push(argv, buf.buf);
+   strbuf_release();
+}
+
+__attribute__((format (printf,3,4)))
+static int odb_helper_start(struct odb_helper *o,
+   struct odb_helper_cmd *cmd,
+   const char *fmt, ...)
+{
+   va_list ap;
+
+   memset(cmd, 0, sizeof(*cmd));
+   argv_array_init(>argv);
+
+   if (!o->dealer)
+   return -1;
+
+   va_start(ap, fmt);
+   prepare_helper_command(>argv, o->dealer, fmt, ap);
+   va_end(ap);
+
+   cmd->child.argv = cmd->argv.argv;
+   cmd->child.use_shell = 1;
+   cmd->child.no_stdin = 1;
+   cmd->child.out = -1;
+
+   if (start_command(>child) < 0) {
+   argv_array_clear(>argv);
+   return -1;
+   }
+
+   return 0;
+}
+
+static int odb_helper_finish(struct odb_helper *o,
+struct odb_helper_cmd *cmd)
+{
+   int ret = finish_command(>child);
+   argv_array_clear(>argv);
+   if (ret) {
+   warning("odb helper '%s' reported failure", o->name);
+   return -1;
+   }
+   return 0;
+}
+
+static int parse_object_line(struct odb_helper_object *o, const char *line)
+{
+