On Fri, Jul 06, 2012 at 01:31:31PM +0200, Denis Excoffier wrote:
>> 
>> To be myself honest also, i have sometimes to deliver directories that
>> contain about 50000 files, that i would like sorted. In any case, i
>> prefer to rely on a single additional option on my tar command line (in
>> TAR_OPTIONS in this case) than on a complex and error-prone find
>> incantation. If not provided directly (as an option) or indirectly
>> (by just replacing 100 with 50000 in the code) by
>> tar, i feel myself capable to insert some dirty malloc/qsort/free
>> somewhere in tar/src/create.c, not far where --no-recursion is
>> actually acting.
>> 

Please find attached a naive implementation of
--sort-directory-entries (patch against tar-1.26). Any comments?

Regards,

Denis Excoffier.
diff -uNr tar-1.26/src/common.h tar-1.26b/src/common.h
--- tar-1.26/src/common.h       2011-02-11 12:55:49.000000000 +0059
+++ tar-1.26b/src/common.h      2012-07-08 21:39:23.000000000 +0159
@@ -225,6 +225,11 @@
 /* Zero if there is no recursion, otherwise FNM_LEADING_DIR.  */
 GLOBAL int recursion_option;
 
+#ifdef ORIGINAL
+#else
+GLOBAL int sort_directory_entries_option;
+
+#endif
 GLOBAL bool numeric_owner_option;
 
 GLOBAL bool one_file_system_option;
diff -uNr tar-1.26/src/create.c tar-1.26b/src/create.c
--- tar-1.26/src/create.c       2011-03-12 10:09:09.000000000 +0059
+++ tar-1.26b/src/create.c      2012-07-08 21:43:50.000000000 +0159
@@ -1074,6 +1074,16 @@
     }
   return dump_status_ok;
 }
+#ifdef ORIGINAL
+#else
+
+static int strcmp_for_qsort (void const *ps1, void const *ps2)
+{
+  char const *const *xs1 = ps1;
+  char const *const *xs2 = ps2;
+  return strcmp(*xs1, *xs2);
+}
+#endif
 
 
 /* Copy info from the directory identified by ST into the archive.
@@ -1181,6 +1191,10 @@
            name_size = name_len = strlen (name_buf);
 
            /* Now output all the files in the directory.  */
+#ifdef ORIGINAL
+#else
+            if (!sort_directory_entries_option) {
+#endif
            for (entry = directory; (entry_len = strlen (entry)) != 0;
                 entry += entry_len + 1)
              {
@@ -1193,6 +1207,41 @@
                if (!excluded_name (name_buf))
                  dump_file (st, entry, name_buf);
              }
+#ifdef ORIGINAL
+#else
+            } else {
+              unsigned int nb_entry = 0;
+              for (entry = directory; (entry_len = strlen(entry)) != 0;
+                  entry += entry_len + 1) {
+                ++nb_entry;
+              };
+              /* improve: don't do this if less than two entries */
+              char const **entry_table = xmalloc((nb_entry + 1) * sizeof(char 
const *));
+              char const **p = entry_table;
+              for (entry = directory; (entry_len = strlen(entry)) != 0;
+                  entry += entry_len + 1) {
+                *p++ = entry;
+              };
+              *p = (char const *)NULL;
+              qsort(entry_table, nb_entry, sizeof(char const *), 
strcmp_for_qsort);
+              p = entry_table;
+              while ((entry = *p++)) {
+                entry_len = strlen(entry);
+                /* below 10 lines copied verbatim from above */
+             {
+               if (name_size < name_len + entry_len)
+                 {
+                   name_size = name_len + entry_len;
+                   name_buf = xrealloc (name_buf, name_size + 1);
+                 }
+               strcpy (name_buf + name_len, entry);
+               if (!excluded_name (name_buf))
+                 dump_file (st, entry, name_buf);
+             }
+              };
+              free(entry_table);
+            };
+#endif
 
            free (name_buf);
          }
diff -uNr tar-1.26/src/tar.c tar-1.26b/src/tar.c
--- tar-1.26/src/tar.c  2010-10-24 20:07:31.000000000 +0159
+++ tar-1.26b/src/tar.c 2012-07-08 21:45:23.000000000 +0159
@@ -298,6 +298,11 @@
   NO_OVERWRITE_DIR_OPTION,
   NO_QUOTE_CHARS_OPTION,
   NO_RECURSION_OPTION,
+#ifdef ORIGINAL
+#else
+  SORT_DIRECTORY_ENTRIES_OPTION,
+  NO_SORT_DIRECTORY_ENTRIES_OPTION,
+#endif
   NO_SAME_OWNER_OPTION,
   NO_SAME_PERMISSIONS_OPTION,
   NO_SEEK_OPTION,
@@ -675,6 +680,13 @@
    N_("exclude backup and lock files"), GRID+1 },
   {"no-recursion", NO_RECURSION_OPTION, 0, 0,
    N_("avoid descending automatically in directories"), GRID+1 },
+#ifdef ORIGINAL
+#else
+  {"sort-directory-entries", SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+   N_("store directory entries as sorted"), GRID+1 },
+  {"no-sort-directory-entries", NO_SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+   N_("store directory entries iaw directory order (default)"), GRID+1 },
+#endif
   {"one-file-system", ONE_FILE_SYSTEM_OPTION, 0, 0,
    N_("stay in local file system when creating archive"), GRID+1 },
   {"recursion", RECURSION_OPTION, 0, 0,
@@ -2071,6 +2083,17 @@
       recursion_option = 0;
       break;
 
+#ifdef ORIGINAL
+#else
+    case SORT_DIRECTORY_ENTRIES_OPTION:
+      sort_directory_entries_option = 1;
+      break;
+
+    case NO_SORT_DIRECTORY_ENTRIES_OPTION:
+      sort_directory_entries_option = 0;
+      break;
+
+#endif
     case NO_SAME_OWNER_OPTION:
       same_owner_option = -1;
       break;
@@ -2237,6 +2260,10 @@
   newer_mtime_option.tv_sec = TYPE_MINIMUM (time_t);
   newer_mtime_option.tv_nsec = -1;
   recursion_option = FNM_LEADING_DIR;
+#ifdef ORIGINAL
+#else
+  sort_directory_entries_option = 0;
+#endif
   unquote_option = true;
   tar_sparse_major = 1;
   tar_sparse_minor = 0;

Reply via email to