On Mon, Jul 09, 2012 at 12:58:31AM -0700, Paul Eggert wrote:
>> One problem I can see is that the sort does not
>> use the locale's collating rules, which will undoubtely
>> run afoul of somebody somewhere. Conversely, locale-specific
>> comparison can fail, which is a pain to work around (see how
>> GNU 'ls' does it -- ouch!). We may have to bite the bullet
>> and do things the 'ls' way....
>>
I've read the comments and code in coreutils/src/ls.c and improved
the patch in that direction.
It seems to me that the comment 'strcoll() succeeded' in ls.c should
be replaced by something like 'let's try first with strcoll()'.
Regards,
Denis Excoffier.
diff -uNr tar-1.26/src/common.h tar-1.26c/src/common.h
--- tar-1.26/src/common.h 2011-02-11 12:55:49.000000000 +0059
+++ tar-1.26c/src/common.h 2012-07-08 21:39:23.000000000 +0159
@@ -225,6 +225,11 @@
/* Zero if there is no recursion, otherwise FNM_LEADING_DIR. */
GLOBAL int recursion_option;
+#ifdef ORIGINAL
+#else
+GLOBAL int sort_directory_entries_option;
+
+#endif
GLOBAL bool numeric_owner_option;
GLOBAL bool one_file_system_option;
diff -uNr tar-1.26/src/create.c tar-1.26c/src/create.c
--- tar-1.26/src/create.c 2011-03-12 10:09:09.000000000 +0059
+++ tar-1.26c/src/create.c 2012-07-10 07:16:19.000000000 +0159
@@ -20,6 +20,10 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <system.h>
+#ifdef ORIGINAL
+#else
+#include <setjmp.h>
+#endif
#include <quotearg.h>
@@ -1074,6 +1078,43 @@
}
return dump_status_ok;
}
+#ifdef ORIGINAL
+#else
+
+/* setjmp/longjmp code borrowed from coreutils/src/ls.c */
+static jmp_buf failed_strcoll;
+
+typedef int (*qsortFunc)(void const *a, void const *b);
+
+static int xstrcoll (char const *a, char const *b)
+{
+ int diff;
+ errno = 0;
+ diff = strcoll (a, b);
+ if (errno)
+ {
+ error (0, errno, _("cannot compare file names %s and %s"),
+ quote_n (0, a), quote_n (1, b));
+ set_exit_status (TAREXIT_FAILURE);
+ longjmp (failed_strcoll, 1);
+ }
+ return diff;
+}
+
+static int xstrcoll_for_qsort (void const *ps1, void const *ps2)
+{
+ char const *const *xs1 = ps1;
+ char const *const *xs2 = ps2;
+ return xstrcoll (*xs1, *xs2);
+}
+
+static int strcmp_for_qsort (void const *ps1, void const *ps2)
+{
+ char const *const *xs1 = ps1;
+ char const *const *xs2 = ps2;
+ return strcmp (*xs1, *xs2);
+}
+#endif
/* Copy info from the directory identified by ST into the archive.
@@ -1181,6 +1222,10 @@
name_size = name_len = strlen (name_buf);
/* Now output all the files in the directory. */
+#ifdef ORIGINAL
+#else
+ if (!sort_directory_entries_option) {
+#endif
for (entry = directory; (entry_len = strlen (entry)) != 0;
entry += entry_len + 1)
{
@@ -1193,6 +1238,44 @@
if (!excluded_name (name_buf))
dump_file (st, entry, name_buf);
}
+#ifdef ORIGINAL
+#else
+ } else {
+ unsigned int nb_entry = 0;
+ for (entry = directory; (entry_len = strlen(entry)) != 0;
+ entry += entry_len + 1) {
+ ++nb_entry;
+ };
+ /* improve: don't do this if less than two entries */
+ char const **entry_table = xmalloc ((nb_entry + 1) * sizeof(char
const *));
+ /* setjmp/longjmp code borrowed from coreutils/src/ls.c */
+ qsortFunc compar_for_qsort = setjmp (failed_strcoll) ?
strcmp_for_qsort : xstrcoll_for_qsort;
+ char const **p = entry_table;
+ for (entry = directory; (entry_len = strlen(entry)) != 0;
+ entry += entry_len + 1) {
+ *p++ = entry;
+ };
+ *p = (char const *)NULL;
+ /* improve: replace qsort with mpsort */
+ qsort (entry_table, nb_entry, sizeof (char const *),
compar_for_qsort);
+ p = entry_table;
+ while ((entry = *p++)) {
+ entry_len = strlen (entry);
+ /* below 10 lines copied verbatim from above */
+ {
+ if (name_size < name_len + entry_len)
+ {
+ name_size = name_len + entry_len;
+ name_buf = xrealloc (name_buf, name_size + 1);
+ }
+ strcpy (name_buf + name_len, entry);
+ if (!excluded_name (name_buf))
+ dump_file (st, entry, name_buf);
+ }
+ };
+ free (entry_table);
+ };
+#endif
free (name_buf);
}
diff -uNr tar-1.26/src/tar.c tar-1.26c/src/tar.c
--- tar-1.26/src/tar.c 2010-10-24 20:07:31.000000000 +0159
+++ tar-1.26c/src/tar.c 2012-07-08 21:45:23.000000000 +0159
@@ -298,6 +298,11 @@
NO_OVERWRITE_DIR_OPTION,
NO_QUOTE_CHARS_OPTION,
NO_RECURSION_OPTION,
+#ifdef ORIGINAL
+#else
+ SORT_DIRECTORY_ENTRIES_OPTION,
+ NO_SORT_DIRECTORY_ENTRIES_OPTION,
+#endif
NO_SAME_OWNER_OPTION,
NO_SAME_PERMISSIONS_OPTION,
NO_SEEK_OPTION,
@@ -675,6 +680,13 @@
N_("exclude backup and lock files"), GRID+1 },
{"no-recursion", NO_RECURSION_OPTION, 0, 0,
N_("avoid descending automatically in directories"), GRID+1 },
+#ifdef ORIGINAL
+#else
+ {"sort-directory-entries", SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+ N_("store directory entries as sorted"), GRID+1 },
+ {"no-sort-directory-entries", NO_SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+ N_("store directory entries iaw directory order (default)"), GRID+1 },
+#endif
{"one-file-system", ONE_FILE_SYSTEM_OPTION, 0, 0,
N_("stay in local file system when creating archive"), GRID+1 },
{"recursion", RECURSION_OPTION, 0, 0,
@@ -2071,6 +2083,17 @@
recursion_option = 0;
break;
+#ifdef ORIGINAL
+#else
+ case SORT_DIRECTORY_ENTRIES_OPTION:
+ sort_directory_entries_option = 1;
+ break;
+
+ case NO_SORT_DIRECTORY_ENTRIES_OPTION:
+ sort_directory_entries_option = 0;
+ break;
+
+#endif
case NO_SAME_OWNER_OPTION:
same_owner_option = -1;
break;
@@ -2237,6 +2260,10 @@
newer_mtime_option.tv_sec = TYPE_MINIMUM (time_t);
newer_mtime_option.tv_nsec = -1;
recursion_option = FNM_LEADING_DIR;
+#ifdef ORIGINAL
+#else
+ sort_directory_entries_option = 0;
+#endif
unquote_option = true;
tar_sparse_major = 1;
tar_sparse_minor = 0;