Re: [egg...@cs.ucla.edu: Re: sort -N patch: sort in natural order]

2009-06-05 Thread Martin Sarfy

You are right, "-V" is exactly the same algorithm. The moral of the 
story is that next time I will patch against latest greatest source 
code version :-)

Have a nice day
Martin

On Fri, Jun 05, 2009 at 04:43:05PM +0100, Pádraig Brady wrote:
> 
> Thanks very much for that. However I think this option
> added in release 7.0 covers this functionality?
> 
> -V, --version-sort
>natural sort of (version) numbers within text
> 
> Could you check if your patch handles cases that -V doesn't?
> 
> cheers,
> Pádraig.
> 

-- 
Best regards
Martin Sarfy


___
Bug-coreutils mailing list
Bug-coreutils@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-coreutils


[egg...@cs.ucla.edu: Re: sort -N patch: sort in natural order]

2009-06-05 Thread Martin Sarfy

Hello,

attached there is a patch against coreutils-6.10/sort.c implementing
"Natural order sorting" (e.g. "A1" < "A2" < "A10"), using "-N" parameter.

Please, consider it's inclusion in upstream :-)

Signing papers with copyright is not a problem.

Best regards,
Martin Sarfy

- Forwarded message from Paul Eggert  -

To: Martin Sarfy 
Subject: Re: sort -N patch: sort in natural order
From: Paul Eggert 
Date: Thu, 04 Jun 2009 12:10:04 -0700

Could you please send this directly to bug-coreut...@gnu.org?
Also, assuming it meets favor there, will you and your employer be willing
to sign papers assigning copyright to the FSF?


----- End forwarded message -

-- 
Best regards
Martin Sarfy
--- coreutils-6.10-sort.c	2009-06-04 17:27:35.0 +0200
+++ coreutils-6.10-sort-natural.c	2009-06-04 17:27:44.0 +0200
@@ -170,6 +170,7 @@
   bool random;			/* Sort by random hash of key.  */
   bool general_numeric;		/* Flag for general, numeric comparison.
    Handle numbers in exponential notation. */
+  bool natural;			/* Flag for natural order comparison. */
   bool month;			/* Flag for comparison by month name. */
   bool reverse;			/* Reverse the sense of comparison. */
   struct keyfield *next;	/* Next keyfield to try. */
@@ -327,6 +328,7 @@
   -i, --ignore-nonprintingconsider only printable characters\n\
   -M, --month-sortcompare (unknown) < `JAN' < ... < `DEC'\n\
   -n, --numeric-sort  compare according to string numerical value\n\
+  -N, --natural-order-sortsort strings containing numbers in natural order\n\
   -R, --random-sort   sort by random hash of keys\n\
   --random-source=FILEget random bytes from FILE (default /dev/urandom)\n\
   -r, --reverse   reverse the result of comparisons\n\
@@ -394,7 +396,7 @@
   RANDOM_SOURCE_OPTION
 };
 
-static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
+static char const short_options[] = "-bcCdfgik:mMnNo:rRsS:t:T:uy:z";
 
 static struct option const long_options[] =
 {
@@ -409,6 +411,7 @@
   {"merge", no_argument, NULL, 'm'},
   {"month-sort", no_argument, NULL, 'M'},
   {"numeric-sort", no_argument, NULL, 'n'},
+  {"natural-order-sort", no_argument, NULL, 'N'},
   {"random-sort", no_argument, NULL, 'R'},
   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
   {"output", required_argument, NULL, 'o'},
@@ -1514,6 +1517,139 @@
   return strnumcmp (a, b, decimal_point, thousands_sep);
 }
 
+/* Perform 'natural order' comparisons of strings ("A1" < "A2" < "A10")
+   Adoption for "sort" from strnatcmp.c by Martin Sarfy 
+   Original author (C) 2000, 2004 by Martin Pool   */
+
+static inline int
+nat_isdigit(char a)
+{
+ return isdigit((unsigned char) a);
+}
+
+
+static inline int
+nat_isspace(char a)
+{
+ return isspace((unsigned char) a);
+}
+
+
+static inline char
+nat_toupper(char a)
+{
+ return toupper((unsigned char) a);
+}
+
+static int
+nat_compare_right(char const *a, char const *b)
+{
+ int bias = 0;
+
+ /* The longest run of digits wins.  That aside, the greatest
+value wins, but we can't know that it will until we've scanned
+both numbers to know that they have the same magnitude, so we
+remember it in BIAS. */
+ for (;; a++, b++) {
+  if (!nat_isdigit(*a)  &&  !nat_isdigit(*b))
+   return bias;
+  else if (!nat_isdigit(*a))
+   return -1;
+  else if (!nat_isdigit(*b))
+   return +1;
+  else if (*a < *b) {
+   if (!bias)
+bias = -1;
+  } else if (*a > *b) {
+   if (!bias)
+bias = +1;
+  } else if (!*a  &&  !*b)
+   return bias;
+ }
+
+ return 0;
+}
+
+
+static int
+nat_compare_left(char const *a, char const *b)
+{
+ /* Compare two left-aligned numbers: the first to have a
+different value wins. */
+ for (;; a++, b++) {
+  if (!nat_isdigit(*a)  &&  !nat_isdigit(*b))
+   return 0;
+  else if (!nat_isdigit(*a))
+   return -1;
+  else if (!nat_isdigit(*b))
+   return +1;
+  else if (*a < *b)
+   return -1;
+  else if (*a > *b)
+   return +1;
+ }
+
+ return 0;
+}
+
+
+static int strnatcmp0(char const *a, char const *b, int fold_case)
+{
+ int ai, bi;
+ char ca, cb;
+ int fractional, result;
+
+ /* assert(a && b); */
+ ai = bi = 0;
+ while (1) {
+  ca = a[ai]; cb = b[bi];
+
+  /* skip over leading spaces or zeros */
+  while (nat_isspace(ca))
+   ca = a[++ai];
+
+  while (nat_isspace(cb))
+   cb = b[++bi];
+
+  /* process run of digits *