On Fri, Jul 06, 2012 at 01:31:31PM +0200, Denis Excoffier wrote:
>>
>> To be myself honest also, i have sometimes to deliver directories that
>> contain about 50000 files, that i would like sorted. In any case, i
>> prefer to rely on a single additional option on my tar command line (in
>> TAR_OPTIONS in this case) than on a complex and error-prone find
>> incantation. If not provided directly (as an option) or indirectly
>> (by just replacing 100 with 50000 in the code) by
>> tar, i feel myself capable to insert some dirty malloc/qsort/free
>> somewhere in tar/src/create.c, not far where --no-recursion is
>> actually acting.
>>
Please find attached a naive implementation of
--sort-directory-entries (patch against tar-1.26). Any comments?
Regards,
Denis Excoffier.
diff -uNr tar-1.26/src/common.h tar-1.26b/src/common.h
--- tar-1.26/src/common.h 2011-02-11 12:55:49.000000000 +0059
+++ tar-1.26b/src/common.h 2012-07-08 21:39:23.000000000 +0159
@@ -225,6 +225,11 @@
/* Zero if there is no recursion, otherwise FNM_LEADING_DIR. */
GLOBAL int recursion_option;
+#ifdef ORIGINAL
+#else
+GLOBAL int sort_directory_entries_option;
+
+#endif
GLOBAL bool numeric_owner_option;
GLOBAL bool one_file_system_option;
diff -uNr tar-1.26/src/create.c tar-1.26b/src/create.c
--- tar-1.26/src/create.c 2011-03-12 10:09:09.000000000 +0059
+++ tar-1.26b/src/create.c 2012-07-08 21:43:50.000000000 +0159
@@ -1074,6 +1074,16 @@
}
return dump_status_ok;
}
+#ifdef ORIGINAL
+#else
+
+static int strcmp_for_qsort (void const *ps1, void const *ps2)
+{
+ char const *const *xs1 = ps1;
+ char const *const *xs2 = ps2;
+ return strcmp(*xs1, *xs2);
+}
+#endif
/* Copy info from the directory identified by ST into the archive.
@@ -1181,6 +1191,10 @@
name_size = name_len = strlen (name_buf);
/* Now output all the files in the directory. */
+#ifdef ORIGINAL
+#else
+ if (!sort_directory_entries_option) {
+#endif
for (entry = directory; (entry_len = strlen (entry)) != 0;
entry += entry_len + 1)
{
@@ -1193,6 +1207,41 @@
if (!excluded_name (name_buf))
dump_file (st, entry, name_buf);
}
+#ifdef ORIGINAL
+#else
+ } else {
+ unsigned int nb_entry = 0;
+ for (entry = directory; (entry_len = strlen(entry)) != 0;
+ entry += entry_len + 1) {
+ ++nb_entry;
+ };
+ /* improve: don't do this if less than two entries */
+ char const **entry_table = xmalloc((nb_entry + 1) * sizeof(char
const *));
+ char const **p = entry_table;
+ for (entry = directory; (entry_len = strlen(entry)) != 0;
+ entry += entry_len + 1) {
+ *p++ = entry;
+ };
+ *p = (char const *)NULL;
+ qsort(entry_table, nb_entry, sizeof(char const *),
strcmp_for_qsort);
+ p = entry_table;
+ while ((entry = *p++)) {
+ entry_len = strlen(entry);
+ /* below 10 lines copied verbatim from above */
+ {
+ if (name_size < name_len + entry_len)
+ {
+ name_size = name_len + entry_len;
+ name_buf = xrealloc (name_buf, name_size + 1);
+ }
+ strcpy (name_buf + name_len, entry);
+ if (!excluded_name (name_buf))
+ dump_file (st, entry, name_buf);
+ }
+ };
+ free(entry_table);
+ };
+#endif
free (name_buf);
}
diff -uNr tar-1.26/src/tar.c tar-1.26b/src/tar.c
--- tar-1.26/src/tar.c 2010-10-24 20:07:31.000000000 +0159
+++ tar-1.26b/src/tar.c 2012-07-08 21:45:23.000000000 +0159
@@ -298,6 +298,11 @@
NO_OVERWRITE_DIR_OPTION,
NO_QUOTE_CHARS_OPTION,
NO_RECURSION_OPTION,
+#ifdef ORIGINAL
+#else
+ SORT_DIRECTORY_ENTRIES_OPTION,
+ NO_SORT_DIRECTORY_ENTRIES_OPTION,
+#endif
NO_SAME_OWNER_OPTION,
NO_SAME_PERMISSIONS_OPTION,
NO_SEEK_OPTION,
@@ -675,6 +680,13 @@
N_("exclude backup and lock files"), GRID+1 },
{"no-recursion", NO_RECURSION_OPTION, 0, 0,
N_("avoid descending automatically in directories"), GRID+1 },
+#ifdef ORIGINAL
+#else
+ {"sort-directory-entries", SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+ N_("store directory entries as sorted"), GRID+1 },
+ {"no-sort-directory-entries", NO_SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+ N_("store directory entries iaw directory order (default)"), GRID+1 },
+#endif
{"one-file-system", ONE_FILE_SYSTEM_OPTION, 0, 0,
N_("stay in local file system when creating archive"), GRID+1 },
{"recursion", RECURSION_OPTION, 0, 0,
@@ -2071,6 +2083,17 @@
recursion_option = 0;
break;
+#ifdef ORIGINAL
+#else
+ case SORT_DIRECTORY_ENTRIES_OPTION:
+ sort_directory_entries_option = 1;
+ break;
+
+ case NO_SORT_DIRECTORY_ENTRIES_OPTION:
+ sort_directory_entries_option = 0;
+ break;
+
+#endif
case NO_SAME_OWNER_OPTION:
same_owner_option = -1;
break;
@@ -2237,6 +2260,10 @@
newer_mtime_option.tv_sec = TYPE_MINIMUM (time_t);
newer_mtime_option.tv_nsec = -1;
recursion_option = FNM_LEADING_DIR;
+#ifdef ORIGINAL
+#else
+ sort_directory_entries_option = 0;
+#endif
unquote_option = true;
tar_sparse_major = 1;
tar_sparse_minor = 0;