On Tue, Jul 10, 2012 at 07:31:50AM +0159, Denis Excoffier wrote:
>> I've read the comments and code in coreutils/src/ls.c and improved
>> the patch in that direction.
>> 
Again improved, this (final) patch uses mpsort() instead of qsort().

Regards,

Denis Excoffier.

diff -uNr tar-1.26a/lib/Makefile.in tar-1.26b/lib/Makefile.in
--- tar-1.26a/lib/Makefile.in   2011-03-12 10:49:46.000000000 +0059
+++ tar-1.26b/lib/Makefile.in   2012-07-11 10:40:56.033513700 +0159
@@ -179,7 +179,7 @@
 am__v_at_0 = @
 libtar_a_AR = $(AR) $(ARFLAGS)
 libtar_a_LIBADD =
-am_libtar_a_OBJECTS = paxerror.$(OBJEXT) paxexit-status.$(OBJEXT) \
+am_libtar_a_OBJECTS = mpsort.$(OBJEXT) paxerror.$(OBJEXT) 
paxexit-status.$(OBJEXT) \
        paxnames.$(OBJEXT) prepargs.$(OBJEXT) rtapelib.$(OBJEXT) \
        stdopen.$(OBJEXT)
 libtar_a_OBJECTS = $(am_libtar_a_OBJECTS)
@@ -973,6 +973,7 @@
 INCLUDES = -I$(top_srcdir)/gnu -I../ -I../gnu
 noinst_HEADERS = system.h system-ioctl.h rmt.h paxlib.h stdopen.h
 libtar_a_SOURCES = \
+  mpsort.c mpsort.h \
   paxerror.c paxexit-status.c paxlib.h paxnames.c \
   prepargs.c prepargs.h \
   rtapelib.c \
@@ -1029,6 +1030,7 @@
 distclean-compile:
        -rm -f *.tab.c
 
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpsort.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/paxerror.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/paxexit-status.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/paxnames.Po@am__quote@
diff -uNr tar-1.26a/lib/mpsort.c tar-1.26b/lib/mpsort.c
--- tar-1.26a/lib/mpsort.c      1970-01-01 01:00:00.000000000 +0100
+++ tar-1.26b/lib/mpsort.c      2012-01-06 08:20:26.000000000 +0059
@@ -0,0 +1,156 @@
+/* Sort a vector of pointers to data.
+
+   Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Paul Eggert.  */
+
+#include <config.h>
+
+#include "mpsort.h"
+
+#include <string.h>
+
+/* The type of qsort-style comparison functions.  */
+
+typedef int (*comparison_function) (void const *, void const *);
+
+static void mpsort_with_tmp (void const **restrict, size_t,
+                             void const **restrict, comparison_function);
+
+/* Sort a vector BASE containing N pointers, placing the sorted array
+   into TMP.  Compare pointers with CMP.  N must be at least 2.  */
+
+static void
+mpsort_into_tmp (void const **restrict base, size_t n,
+                 void const **restrict tmp,
+                 comparison_function cmp)
+{
+  size_t n1 = n / 2;
+  size_t n2 = n - n1;
+  size_t a = 0;
+  size_t alim = n1;
+  size_t b = n1;
+  size_t blim = n;
+  void const *ba;
+  void const *bb;
+
+  mpsort_with_tmp (base + n1, n2, tmp, cmp);
+  mpsort_with_tmp (base, n1, tmp, cmp);
+
+  ba = base[a];
+  bb = base[b];
+
+  for (;;)
+    if (cmp (ba, bb) <= 0)
+      {
+        *tmp++ = ba;
+        a++;
+        if (a == alim)
+          {
+            a = b;
+            alim = blim;
+            break;
+          }
+        ba = base[a];
+      }
+    else
+      {
+        *tmp++ = bb;
+        b++;
+        if (b == blim)
+          break;
+        bb = base[b];
+      }
+
+  memcpy (tmp, base + a, (alim - a) * sizeof *base);
+}
+
+/* Sort a vector BASE containing N pointers, in place.  Use TMP
+   (containing N / 2 pointers) for temporary storage.  Compare
+   pointers with CMP.  */
+
+static void
+mpsort_with_tmp (void const **restrict base, size_t n,
+                 void const **restrict tmp,
+                 comparison_function cmp)
+{
+  if (n <= 2)
+    {
+      if (n == 2)
+        {
+          void const *p0 = base[0];
+          void const *p1 = base[1];
+          if (! (cmp (p0, p1) <= 0))
+            {
+              base[0] = p1;
+              base[1] = p0;
+            }
+        }
+    }
+  else
+    {
+      size_t n1 = n / 2;
+      size_t n2 = n - n1;
+      size_t i;
+      size_t t = 0;
+      size_t tlim = n1;
+      size_t b = n1;
+      size_t blim = n;
+      void const *bb;
+      void const *tt;
+
+      mpsort_with_tmp (base + n1, n2, tmp, cmp);
+
+      if (n1 < 2)
+        tmp[0] = base[0];
+      else
+        mpsort_into_tmp (base, n1, tmp, cmp);
+
+      tt = tmp[t];
+      bb = base[b];
+
+      for (i = 0; ; )
+        if (cmp (tt, bb) <= 0)
+          {
+            base[i++] = tt;
+            t++;
+            if (t == tlim)
+              break;
+            tt = tmp[t];
+          }
+        else
+          {
+            base[i++] = bb;
+            b++;
+            if (b == blim)
+              {
+                memcpy (base + i, tmp + t, (tlim - t) * sizeof *base);
+                break;
+              }
+            bb = base[b];
+          }
+    }
+}
+
+/* Sort a vector BASE containing N pointers, in place.  BASE must
+   contain enough storage to hold N + N / 2 vectors; the trailing
+   vectors are used for temporaries.  Compare pointers with CMP.  */
+
+void
+mpsort (void const **base, size_t n, comparison_function cmp)
+{
+  mpsort_with_tmp (base, n, base + n, cmp);
+}
diff -uNr tar-1.26a/lib/mpsort.h tar-1.26b/lib/mpsort.h
--- tar-1.26a/lib/mpsort.h      1970-01-01 01:00:00.000000000 +0100
+++ tar-1.26b/lib/mpsort.h      2011-04-24 19:21:21.000000000 +0159
@@ -0,0 +1,2 @@
+#include <stddef.h>
+void mpsort (void const **, size_t, int (*) (void const *, void const *));
diff -uNr tar-1.26a/src/common.h tar-1.26b/src/common.h
--- tar-1.26a/src/common.h      2011-02-11 12:55:49.000000000 +0059
+++ tar-1.26b/src/common.h      2012-07-11 10:40:55.533510500 +0159
@@ -225,6 +225,11 @@
 /* Zero if there is no recursion, otherwise FNM_LEADING_DIR.  */
 GLOBAL int recursion_option;
 
+#ifdef ORIGINAL
+#else
+GLOBAL int sort_directory_entries_option;
+
+#endif
 GLOBAL bool numeric_owner_option;
 
 GLOBAL bool one_file_system_option;
diff -uNr tar-1.26a/src/create.c tar-1.26b/src/create.c
--- tar-1.26a/src/create.c      2011-03-12 10:09:09.000000000 +0059
+++ tar-1.26b/src/create.c      2012-07-11 10:40:55.611636000 +0159
@@ -1,3 +1,11 @@
+#ifdef ORIGINAL
+#else
+/* #define exactly one among USE_QSORT and USE_MPSORT */
+#define USE_QSORT
+#undef USE_QSORT
+#undef USE_MPSORT
+#define USE_MPSORT
+#endif
 /* Create a tar archive.
 
    Copyright (C) 1985, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001,
@@ -20,6 +28,13 @@
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
 
 #include <system.h>
+#ifdef ORIGINAL
+#else
+#include <setjmp.h>
+#ifdef USE_MPSORT
+#include <mpsort.h>
+#endif
+#endif
 
 #include <quotearg.h>
 
@@ -1074,6 +1089,48 @@
     }
   return dump_status_ok;
 }
+#ifdef ORIGINAL
+#else
+
+/* setjmp/longjmp/xstrcoll code borrowed from coreutils/src/ls.c */
+static jmp_buf failed_strcoll;
+
+typedef int (*qsortFunc)(void const *a, void const *b);
+
+static int
+xstrcoll (char const *a, char const *b)
+{
+  int diff;
+  errno = 0;
+  diff = strcoll (a, b);
+  if (errno)
+    {
+      error (0, errno, _("cannot compare file names %s and %s"),
+             quote_n (0, a), quote_n (1, b));
+      set_exit_status (TAREXIT_FAILURE);
+      longjmp (failed_strcoll, 1);
+    }
+  return diff;
+}
+#ifdef USE_QSORT
+
+static int
+xstrcoll_for_qsort (void const *ps1, void const *ps2)
+{
+  char const *const *xs1 = ps1;
+  char const *const *xs2 = ps2;
+  return xstrcoll (*xs1, *xs2);
+}
+
+static int
+strcmp_for_qsort (void const *ps1, void const *ps2)
+{
+  char const *const *xs1 = ps1;
+  char const *const *xs2 = ps2;
+  return strcmp (*xs1, *xs2);
+}
+#endif
+#endif
 
 
 /* Copy info from the directory identified by ST into the archive.
@@ -1181,6 +1238,17 @@
            name_size = name_len = strlen (name_buf);
 
            /* Now output all the files in the directory.  */
+#ifdef ORIGINAL
+#else
+            size_t nb_entry = 0;
+            if (sort_directory_entries_option) {
+              for (entry = directory; (entry_len = strlen (entry)) != 0;
+                  entry += entry_len + 1) {
+                ++nb_entry;
+              };
+            };
+            if (!sort_directory_entries_option || nb_entry < 2) {
+#endif
            for (entry = directory; (entry_len = strlen (entry)) != 0;
                 entry += entry_len + 1)
              {
@@ -1193,6 +1261,53 @@
                if (!excluded_name (name_buf))
                  dump_file (st, entry, name_buf);
              }
+#ifdef ORIGINAL
+#else
+            } else {
+#ifdef USE_QSORT
+              char const **entry_table = xmalloc ((nb_entry + 1) * sizeof 
(char const *));
+#endif
+#ifdef USE_MPSORT
+              /* since nb_entry is here >= 2, the extra slot needed is the 
first from nb_entry / 2 */
+              char const **entry_table = xmalloc ((nb_entry + nb_entry / 2) * 
sizeof (char const *));
+#endif
+              /* setjmp/longjmp/xstrcoll code borrowed from coreutils/src/ls.c 
*/
+#ifdef USE_QSORT
+              qsortFunc compar_for_qsort = setjmp (failed_strcoll) ? 
strcmp_for_qsort : xstrcoll_for_qsort;
+#endif
+#ifdef USE_MPSORT
+              qsortFunc compar_for_mpsort = setjmp (failed_strcoll) ? 
(qsortFunc)strcmp : (qsortFunc)xstrcoll;
+#endif
+              char const **p = entry_table;
+              for (entry = directory; (entry_len = strlen (entry)) != 0;
+                  entry += entry_len + 1) {
+                *p++ = entry;
+              };
+#ifdef USE_QSORT
+              qsort (entry_table, nb_entry, sizeof (char const *), 
compar_for_qsort);
+#endif
+#ifdef USE_MPSORT
+              mpsort ((void const **)entry_table, nb_entry, compar_for_mpsort);
+#endif
+              *p = (char const *)NULL;
+              p = entry_table;
+              while ((entry = *p++)) {
+                entry_len = strlen (entry);
+                /* below 10 lines copied verbatim from above */
+             {
+               if (name_size < name_len + entry_len)
+                 {
+                   name_size = name_len + entry_len;
+                   name_buf = xrealloc (name_buf, name_size + 1);
+                 }
+               strcpy (name_buf + name_len, entry);
+               if (!excluded_name (name_buf))
+                 dump_file (st, entry, name_buf);
+             }
+              };
+              free (entry_table);
+            };
+#endif
 
            free (name_buf);
          }
diff -uNr tar-1.26a/src/tar.c tar-1.26b/src/tar.c
--- tar-1.26a/src/tar.c 2010-10-24 20:07:31.000000000 +0159
+++ tar-1.26b/src/tar.c 2012-07-11 10:40:55.799137200 +0159
@@ -298,6 +298,11 @@
   NO_OVERWRITE_DIR_OPTION,
   NO_QUOTE_CHARS_OPTION,
   NO_RECURSION_OPTION,
+#ifdef ORIGINAL
+#else
+  SORT_DIRECTORY_ENTRIES_OPTION,
+  NO_SORT_DIRECTORY_ENTRIES_OPTION,
+#endif
   NO_SAME_OWNER_OPTION,
   NO_SAME_PERMISSIONS_OPTION,
   NO_SEEK_OPTION,
@@ -675,6 +680,13 @@
    N_("exclude backup and lock files"), GRID+1 },
   {"no-recursion", NO_RECURSION_OPTION, 0, 0,
    N_("avoid descending automatically in directories"), GRID+1 },
+#ifdef ORIGINAL
+#else
+  {"sort-directory-entries", SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+   N_("store directory entries as sorted"), GRID+1 },
+  {"no-sort-directory-entries", NO_SORT_DIRECTORY_ENTRIES_OPTION, 0, 0,
+   N_("store directory entries iaw directory order (default)"), GRID+1 },
+#endif
   {"one-file-system", ONE_FILE_SYSTEM_OPTION, 0, 0,
    N_("stay in local file system when creating archive"), GRID+1 },
   {"recursion", RECURSION_OPTION, 0, 0,
@@ -2071,6 +2083,17 @@
       recursion_option = 0;
       break;
 
+#ifdef ORIGINAL
+#else
+    case SORT_DIRECTORY_ENTRIES_OPTION:
+      sort_directory_entries_option = 1;
+      break;
+
+    case NO_SORT_DIRECTORY_ENTRIES_OPTION:
+      sort_directory_entries_option = 0;
+      break;
+
+#endif
     case NO_SAME_OWNER_OPTION:
       same_owner_option = -1;
       break;
@@ -2237,6 +2260,10 @@
   newer_mtime_option.tv_sec = TYPE_MINIMUM (time_t);
   newer_mtime_option.tv_nsec = -1;
   recursion_option = FNM_LEADING_DIR;
+#ifdef ORIGINAL
+#else
+  sort_directory_entries_option = 0;
+#endif
   unquote_option = true;
   tar_sparse_major = 1;
   tar_sparse_minor = 0;

Reply via email to