Hello,
attached there is a patch against coreutils-6.10/sort.c implementing
"Natural order sorting" (e.g. "A1" < "A2" < "A10"), using "-N" parameter.
Please, consider it's inclusion in upstream :-)
Signing papers with copyright is not a problem.
Best regards,
Martin Sarfy
- Forwarded message from Paul Eggert -
To: Martin Sarfy
Subject: Re: sort -N patch: sort in natural order
From: Paul Eggert
Date: Thu, 04 Jun 2009 12:10:04 -0700
Could you please send this directly to bug-coreut...@gnu.org?
Also, assuming it meets favor there, will you and your employer be willing
to sign papers assigning copyright to the FSF?
----- End forwarded message -
--
Best regards
Martin Sarfy
--- coreutils-6.10-sort.c 2009-06-04 17:27:35.0 +0200
+++ coreutils-6.10-sort-natural.c 2009-06-04 17:27:44.0 +0200
@@ -170,6 +170,7 @@
bool random; /* Sort by random hash of key. */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
+ bool natural; /* Flag for natural order comparison. */
bool month; /* Flag for comparison by month name. */
bool reverse; /* Reverse the sense of comparison. */
struct keyfield *next; /* Next keyfield to try. */
@@ -327,6 +328,7 @@
-i, --ignore-nonprintingconsider only printable characters\n\
-M, --month-sortcompare (unknown) < `JAN' < ... < `DEC'\n\
-n, --numeric-sort compare according to string numerical value\n\
+ -N, --natural-order-sortsort strings containing numbers in natural order\n\
-R, --random-sort sort by random hash of keys\n\
--random-source=FILEget random bytes from FILE (default /dev/urandom)\n\
-r, --reverse reverse the result of comparisons\n\
@@ -394,7 +396,7 @@
RANDOM_SOURCE_OPTION
};
-static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
+static char const short_options[] = "-bcCdfgik:mMnNo:rRsS:t:T:uy:z";
static struct option const long_options[] =
{
@@ -409,6 +411,7 @@
{"merge", no_argument, NULL, 'm'},
{"month-sort", no_argument, NULL, 'M'},
{"numeric-sort", no_argument, NULL, 'n'},
+ {"natural-order-sort", no_argument, NULL, 'N'},
{"random-sort", no_argument, NULL, 'R'},
{"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
{"output", required_argument, NULL, 'o'},
@@ -1514,6 +1517,139 @@
return strnumcmp (a, b, decimal_point, thousands_sep);
}
+/* Perform 'natural order' comparisons of strings ("A1" < "A2" < "A10")
+ Adoption for "sort" from strnatcmp.c by Martin Sarfy
+ Original author (C) 2000, 2004 by Martin Pool */
+
+static inline int
+nat_isdigit(char a)
+{
+ return isdigit((unsigned char) a);
+}
+
+
+static inline int
+nat_isspace(char a)
+{
+ return isspace((unsigned char) a);
+}
+
+
+static inline char
+nat_toupper(char a)
+{
+ return toupper((unsigned char) a);
+}
+
+static int
+nat_compare_right(char const *a, char const *b)
+{
+ int bias = 0;
+
+ /* The longest run of digits wins. That aside, the greatest
+value wins, but we can't know that it will until we've scanned
+both numbers to know that they have the same magnitude, so we
+remember it in BIAS. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return bias;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b) {
+ if (!bias)
+bias = -1;
+ } else if (*a > *b) {
+ if (!bias)
+bias = +1;
+ } else if (!*a && !*b)
+ return bias;
+ }
+
+ return 0;
+}
+
+
+static int
+nat_compare_left(char const *a, char const *b)
+{
+ /* Compare two left-aligned numbers: the first to have a
+different value wins. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return 0;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return +1;
+ }
+
+ return 0;
+}
+
+
+static int strnatcmp0(char const *a, char const *b, int fold_case)
+{
+ int ai, bi;
+ char ca, cb;
+ int fractional, result;
+
+ /* assert(a && b); */
+ ai = bi = 0;
+ while (1) {
+ ca = a[ai]; cb = b[bi];
+
+ /* skip over leading spaces or zeros */
+ while (nat_isspace(ca))
+ ca = a[++ai];
+
+ while (nat_isspace(cb))
+ cb = b[++bi];
+
+ /* process run of digits *