On 20/04/10 09:37, Alan Curry wrote:
> --- coreutils-8.4.orig/src/sort.c 2010-04-20 02:45:35.000000000 -0500
> +++ coreutils-8.4/src/sort.c 2010-04-20 03:12:57.000000000 -0500
> @@ -1460,9 +1460,6 @@
> char *ptr = line->text, *lim = ptr + line->length - 1;
> size_t eword = key->eword, echar = key->echar;
>
> - if (echar == 0)
> - eword++; /* Skip all of end field. */
> -
> /* Move PTR past EWORD fields or to one past the last byte on LINE,
> whichever comes first. If there are more than EWORD fields, leave
> PTR pointing at the beginning of the field having zero-based index,
> @@ -3424,6 +3421,8 @@
> s = parse_field_count (s + 1, &key->echar,
> N_("invalid number after `.'"));
> }
> + if (key->echar == 0)
> + key->eword++; /* Skip all of end field. */
> s = set_ordering (s, key, bl_end);
> }
> if (*s)
I've changed it around in the attached patch so that
we consistently use zero based limits throughout the code.
I'll push this later on tonight unless there are objections.
cheers,
Pádraig.
>From a76f1ef9c137ab55a5796dcb8dd548e3998101ce Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Tue, 20 Apr 2010 16:25:55 +0100
Subject: [PATCH] sort: fix parsing of end field in obsolescent key formats
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
This regression was introduced in commit 224a69b5, 2009-02-24,
"sort: Fix two bugs with determining the end of field".
The specific regression being that we include 1 field too many when
an end field is specified using obsolescent key syntax (+POS -POS).
* src/sort.c (main): When processing obsolescent format key specifications,
normalize eword to a zero based count when no specific end char is given
for an end field. This matches what's done when keys are specified with -k.
* tests/misc/sort: Add a few more tests for the obsolescent key formats,
with test 07i being the particular failure addressed by this change.
* THANKS: Add Alan Curry who precisely identified the issue.
* NEWS: Mention the fix.
Reported by Santiago RodrÃguez
---
NEWS | 4 ++++
THANKS | 1 +
src/sort.c | 10 ++++++++++
tests/misc/sort | 6 ++++++
4 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/NEWS b/NEWS
index 867589c..32ea392 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,10 @@ GNU coreutils NEWS -*- outline -*-
handled correctly, including multi byte locales with the caveat
that multi byte characters are matched case sensitively.
+ sort again handles obsolescent key formats (+POS -POS) correctly.
+ Previously if -POS was specified, 1 field too many was used in the sort.
+ [bug introduced in coreutils-7.2]
+
** New features
join now accepts the --header option, to treat the first line of each
diff --git a/THANKS b/THANKS
index fad308a..2ea6801 100644
--- a/THANKS
+++ b/THANKS
@@ -17,6 +17,7 @@ Adrian Bunk [email protected]
AIDA Shinra [email protected]
Akim Demaille [email protected]
Alain Magloire [email protected]
+Alan Curry [email protected]
Alan Iwi [email protected]
Albert Chin-A-Young [email protected]
Albert Hopkins [email protected]
diff --git a/src/sort.c b/src/sort.c
index d619c60..d0e0b66 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -3389,6 +3389,16 @@ main (int argc, char **argv)
if (*s == '.')
s = parse_field_count (s + 1, &key->echar,
N_("invalid number after `.'"));
+ if (!key->echar && key->eword)
+ {
+ /* obsolescent syntax +A.x -B.y is equivalent to:
+ -k A+1.x+1,B.y (when y = 0)
+ -k A+1.x+1,B+1.y (when y > 0)
+ So eword is decremented as in the -k case
+ only when the end field (B) is specified and
+ echar (y) is 0. */
+ key->eword--;
+ }
if (*set_ordering (s, key, bl_end))
badfieldspec (optarg1,
N_("stray character in field spec"));
diff --git a/tests/misc/sort b/tests/misc/sort
index e5d18d0..e871724 100755
--- a/tests/misc/sort
+++ b/tests/misc/sort
@@ -142,6 +142,12 @@ my @Tests =
["07f", '-n -k1.3,1.1', {IN=>"a 2\nb 1\n"}, {OUT=>"a 2\nb 1\n"}],
["07g", '-n -k2.2,1.2', {IN=>"aa 2\nbb 1\n"}, {OUT=>"aa 2\nbb 1\n"}],
["07h", '-k1.3nb,1.3', {IN=>" a 2\n b 1\n"}, {OUT=>" a 2\n b 1\n"}],
+#ensure obsolescent key limits are handled correctly
+["07i", '-s +0 -1', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
+["07j", '-s +0 -1.0', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
+["07k", '-s +0 -1.1', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
+["07l", '-s +0 -1.2', {IN=>"a c\na b\n"}, {OUT=>"a b\na c\n"}],
+["07m", '-s +0 -1.1b', {IN=>"a c\na b\n"}, {OUT=>"a b\na c\n"}],
#
# report an error for `.' without following char spec
["08a", '-k 2.,3', {EXIT=>2},
--
1.6.2.5