Re: [RFC PATCHv2] repack: rewrite the shell script in C.

2013-08-18 Thread Stefan Beller
On 08/17/2013 03:34 PM, René Scharfe wrote:
> 
> Hmm, stepping back a bit, why not just build the paths and call unlink
> for them right away, without readdir?  The shell version only ever
> deletes existing .pack files (those in $existing alias existing_packs)
> as well as their .idx and .keep files, if present.  It doesn't use a
> glob pattern, unlike remove_pack here.

I'll meditate on that. 

Thanks for all the other remarks. Now the code looks much more
git-ish, similar to other commands. 
The lines of code went down from 411 to 385, I guess we can cut off
more inefficiencies there. 

As you suggested, maybe we should juts have one helper function to
read in the pack directory and keeping all the information (complete filename),
so we do not need to find the exact filename later again by looping over
the directory again.

Stefan

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCHv2] repack: rewrite the shell script in C.

2013-08-17 Thread Kyle J. McKay

On Aug 17, 2013, at 06:34, René Scharfe wrote:

On Aug 15, 2013, at 17:12, Stefan Beller wrote:

+   if (sha_begin >= e->d_name && !strncmp(sha_begin, sha1, 40)) {
+   char *fname;
+   fname = xmalloc(strlen(path) + 1 + strlen(e->d_name));


This needs another +1 because


+   sprintf(fname, "%s/%s", path, e->d_name);


len(path) + len("/") + len(e->d_name) + len("\0")



mkpathdup..


+   unlink(fname);
+   /*TODO: free(fname); fails here sometimes, needs 
investigation*/


Strange.  Perhaps valgrind can tell you what's wrong.


which is probably why it fails since the byte beyond the end of fname  
is being overwritten with a nul.--

To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCHv2] repack: rewrite the shell script in C.

2013-08-17 Thread René Scharfe



This is the beginning of the rewrite of the repacking.

 * Removed unneeded system header files
 * corrected remove_pack to really remove any pack files with the given
   sha1
 * fail if pack-objects fails
 * Only test t7701 (2nd) fails now  with this patch.

Signed-off-by: Stefan Beller 
---
 Makefile|   2 +-
 builtin.h   |   1 +
 builtin/repack.c| 411 
 git-repack.sh => contrib/examples/git-repack.sh |   0
 git.c   |   1 +
 5 files changed, 414 insertions(+), 1 deletion(-)
 create mode 100644 builtin/repack.c
 rename git-repack.sh => contrib/examples/git-repack.sh (100%)

diff --git a/Makefile b/Makefile
index ef442eb..4ec5bbe 100644
--- a/Makefile
+++ b/Makefile
@@ -464,7 +464,6 @@ SCRIPT_SH += git-pull.sh
 SCRIPT_SH += git-quiltimport.sh
 SCRIPT_SH += git-rebase.sh
 SCRIPT_SH += git-remote-testgit.sh
-SCRIPT_SH += git-repack.sh
 SCRIPT_SH += git-request-pull.sh
 SCRIPT_SH += git-stash.sh
 SCRIPT_SH += git-submodule.sh
@@ -972,6 +971,7 @@ BUILTIN_OBJS += builtin/reflog.o
 BUILTIN_OBJS += builtin/remote.o
 BUILTIN_OBJS += builtin/remote-ext.o
 BUILTIN_OBJS += builtin/remote-fd.o
+BUILTIN_OBJS += builtin/repack.o
 BUILTIN_OBJS += builtin/replace.o
 BUILTIN_OBJS += builtin/rerere.o
 BUILTIN_OBJS += builtin/reset.o
diff --git a/builtin.h b/builtin.h
index 8afa2de..b56cf07 100644
--- a/builtin.h
+++ b/builtin.h
@@ -102,6 +102,7 @@ extern int cmd_reflog(int argc, const char **argv, const 
char *prefix);
 extern int cmd_remote(int argc, const char **argv, const char *prefix);
 extern int cmd_remote_ext(int argc, const char **argv, const char *prefix);
 extern int cmd_remote_fd(int argc, const char **argv, const char *prefix);
+extern int cmd_repack(int argc, const char **argv, const char *prefix);
 extern int cmd_repo_config(int argc, const char **argv, const char *prefix);
 extern int cmd_rerere(int argc, const char **argv, const char *prefix);
 extern int cmd_reset(int argc, const char **argv, const char *prefix);
diff --git a/builtin/repack.c b/builtin/repack.c
new file mode 100644
index 000..f72911d
--- /dev/null
+++ b/builtin/repack.c
@@ -0,0 +1,411 @@
+/*
+ * The shell version was written by Linus Torvalds (2005) and many others.
+ * This is a translation into C by Stefan Beller (2013)
+ */
+
+#include "builtin.h"
+#include "cache.h"
+#include "dir.h"
+#include "parse-options.h"
+#include "run-command.h"
+#include "sigchain.h"
+#include "strbuf.h"
+#include "string-list.h"
+
+static const char *const git_repack_usage[] = {
+   N_("git repack [options]"),
+   NULL
+};
+
+/* enabled by default since 22c79eab (2008-06-25) */
+static int delta_base_offset = 1;
+
+static int repack_config(const char *var, const char *value, void *cb)
+{
+   if (!strcmp(var, "repack.usedeltabaseoffset")) {
+   delta_base_offset = git_config_bool(var, value);
+   return 0;
+   }
+   return git_default_config(var, value, cb);
+}
+
+static void remove_temporary_files() {
+   DIR *dir;
+   struct dirent *e;
+   char *prefix, *path, *fname;
+
+   prefix = xmalloc(strlen(".tmp-1-pack") + 1);
+   sprintf(prefix, ".tmp-%d-pack", getpid());


This will overflow for PIDs with more than five digits. Better use a 
strbuf and build the string with strbuf_addf.  Or better, use mkpathdup, 
which does that under the hood.



+
+   path = xmalloc(strlen(get_object_directory()) + strlen("/pack") + 1);
+   sprintf(path, "%s/pack", get_object_directory());


mkpathdup?


+
+   fname = xmalloc(strlen(path) + strlen("/")
+   + strlen(prefix) + strlen("/")
+   + 40 + strlen(".pack") + 1);
+
+   dir = opendir(path);
+   while ((e = readdir(dir)) != NULL) {
+   if (!prefixcmp(e->d_name, prefix)) {
+   sprintf(fname, "%s/%s", path, e->d_name);


If someone has a directory entry that begins with 'prefix' but is longer 
than expected this will overflow.  That's unlikely, but lets avoid it 
outright.  You could use a strbuf instead and reset it to the length of 
the prefix at the start of the loop (with strbuf_setlen), before adding 
the entry's name.



+   unlink(fname);
+   }
+   }
+   free(fname);
+   free(prefix);
+   free(path);
+   closedir(dir);
+}
+
+static void remove_pack_on_signal(int signo)
+{
+   remove_temporary_files();
+   sigchain_pop(signo);
+   raise(signo);
+}
+
+void get_pack_sha1_list(char *packdir, struct string_list *sha1_list)
+{
+   DIR *dir;
+   struct dirent *e;
+   char *path, *suffix;
+
+   path = xmalloc(strlen(get_object_directory()) + strlen("/pack") + 1);
+   sprintf(path, "%s/pack", get_object_directory());


mkpathdup again.

Or would it make sense to cd into the pack directory and avoid these 
string manipulations outr

[RFC PATCHv2] repack: rewrite the shell script in C.

2013-08-15 Thread Stefan Beller
This is the beginning of the rewrite of the repacking.

 * Removed unneeded system header files
 * corrected remove_pack to really remove any pack files with the given
   sha1
 * fail if pack-objects fails
 * Only test t7701 (2nd) fails now  with this patch.

Signed-off-by: Stefan Beller 
---
 Makefile|   2 +-
 builtin.h   |   1 +
 builtin/repack.c| 411 
 git-repack.sh => contrib/examples/git-repack.sh |   0
 git.c   |   1 +
 5 files changed, 414 insertions(+), 1 deletion(-)
 create mode 100644 builtin/repack.c
 rename git-repack.sh => contrib/examples/git-repack.sh (100%)

diff --git a/Makefile b/Makefile
index ef442eb..4ec5bbe 100644
--- a/Makefile
+++ b/Makefile
@@ -464,7 +464,6 @@ SCRIPT_SH += git-pull.sh
 SCRIPT_SH += git-quiltimport.sh
 SCRIPT_SH += git-rebase.sh
 SCRIPT_SH += git-remote-testgit.sh
-SCRIPT_SH += git-repack.sh
 SCRIPT_SH += git-request-pull.sh
 SCRIPT_SH += git-stash.sh
 SCRIPT_SH += git-submodule.sh
@@ -972,6 +971,7 @@ BUILTIN_OBJS += builtin/reflog.o
 BUILTIN_OBJS += builtin/remote.o
 BUILTIN_OBJS += builtin/remote-ext.o
 BUILTIN_OBJS += builtin/remote-fd.o
+BUILTIN_OBJS += builtin/repack.o
 BUILTIN_OBJS += builtin/replace.o
 BUILTIN_OBJS += builtin/rerere.o
 BUILTIN_OBJS += builtin/reset.o
diff --git a/builtin.h b/builtin.h
index 8afa2de..b56cf07 100644
--- a/builtin.h
+++ b/builtin.h
@@ -102,6 +102,7 @@ extern int cmd_reflog(int argc, const char **argv, const 
char *prefix);
 extern int cmd_remote(int argc, const char **argv, const char *prefix);
 extern int cmd_remote_ext(int argc, const char **argv, const char *prefix);
 extern int cmd_remote_fd(int argc, const char **argv, const char *prefix);
+extern int cmd_repack(int argc, const char **argv, const char *prefix);
 extern int cmd_repo_config(int argc, const char **argv, const char *prefix);
 extern int cmd_rerere(int argc, const char **argv, const char *prefix);
 extern int cmd_reset(int argc, const char **argv, const char *prefix);
diff --git a/builtin/repack.c b/builtin/repack.c
new file mode 100644
index 000..f72911d
--- /dev/null
+++ b/builtin/repack.c
@@ -0,0 +1,411 @@
+/*
+ * The shell version was written by Linus Torvalds (2005) and many others.
+ * This is a translation into C by Stefan Beller (2013)
+ */
+
+#include "builtin.h"
+#include "cache.h"
+#include "dir.h"
+#include "parse-options.h"
+#include "run-command.h"
+#include "sigchain.h"
+#include "strbuf.h"
+#include "string-list.h"
+
+static const char *const git_repack_usage[] = {
+   N_("git repack [options]"),
+   NULL
+};
+
+/* enabled by default since 22c79eab (2008-06-25) */
+static int delta_base_offset = 1;
+
+static int repack_config(const char *var, const char *value, void *cb)
+{
+   if (!strcmp(var, "repack.usedeltabaseoffset")) {
+   delta_base_offset = git_config_bool(var, value);
+   return 0;
+   }
+   return git_default_config(var, value, cb);
+}
+
+static void remove_temporary_files() {
+   DIR *dir;
+   struct dirent *e;
+   char *prefix, *path, *fname;
+
+   prefix = xmalloc(strlen(".tmp-1-pack") + 1);
+   sprintf(prefix, ".tmp-%d-pack", getpid());
+
+   path = xmalloc(strlen(get_object_directory()) + strlen("/pack") + 1);
+   sprintf(path, "%s/pack", get_object_directory());
+
+   fname = xmalloc(strlen(path) + strlen("/")
+   + strlen(prefix) + strlen("/")
+   + 40 + strlen(".pack") + 1);
+
+   dir = opendir(path);
+   while ((e = readdir(dir)) != NULL) {
+   if (!prefixcmp(e->d_name, prefix)) {
+   sprintf(fname, "%s/%s", path, e->d_name);
+   unlink(fname);
+   }
+   }
+   free(fname);
+   free(prefix);
+   free(path);
+   closedir(dir);
+}
+
+static void remove_pack_on_signal(int signo)
+{
+   remove_temporary_files();
+   sigchain_pop(signo);
+   raise(signo);
+}
+
+void get_pack_sha1_list(char *packdir, struct string_list *sha1_list)
+{
+   DIR *dir;
+   struct dirent *e;
+   char *path, *suffix;
+
+   path = xmalloc(strlen(get_object_directory()) + strlen("/pack") + 1);
+   sprintf(path, "%s/pack", get_object_directory());
+
+   suffix = ".pack";
+
+   dir = opendir(path);
+   while ((e = readdir(dir)) != NULL) {
+   if (!suffixcmp(e->d_name, suffix)) {
+   char buf[255], *sha1;
+   strcpy(buf, e->d_name);
+   buf[strlen(e->d_name) - strlen(suffix)] = '\0';
+   sha1 = &buf[strlen(e->d_name) - strlen(suffix) - 40];
+   string_list_append(sha1_list, sha1);
+   }
+   }
+   free(path);
+   closedir(dir);
+}
+
+/*
+ * remove_pack will remove any files following the pattern *${SHA1}.{EXT}
+ * where E