The following patch implements --check-fields and --separator.
--
Debian GNU/Linux 2.1 is out! ( http://www.debian.org/ )
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Index: doc/textutils.texi
===================================================================
RCS file: /home/gondor/herbert/src/CVS/debian/textutils/doc/textutils.texi,v
retrieving revision 1.2
diff -u -r1.2 textutils.texi
--- doc/textutils.texi 2000/06/27 10:27:49 1.2
+++ doc/textutils.texi 2000/06/28 03:15:37
@@ -2464,6 +2464,9 @@
If no @var{output} file is specified, @code{uniq} writes to standard
output.
+Fields are sequences of non-space non-tab characters that are separated from
+each other by at least one spaces or tabs, unless a separator is given.
+
The program accepts the following options. Also see @ref{Common options}.
@table @samp
@@ -2474,9 +2477,7 @@
@opindex -@var{n}
@opindex -f
@opindex --skip-fields
-Skip @var{n} fields on each line before checking for uniqueness. Fields
-are sequences of non-space non-tab characters that are separated from
-each other by at least one spaces or tabs.
+Skip @var{n} fields on each line before checking for uniqueness.
@item +@var{n}
@itemx -s @var{n}
@@ -2524,13 +2525,27 @@
@cindex unique lines, outputting
Print only unique lines.
+@itemx -t @var{sep}
+@itemx --separator=@var{sep}
+@opindex -t
+@opindex --separator
+Use @var{sep} to delimit fields.
+
@item -w @var{n}
@itemx --check-chars=@var{n}
@opindex -w
@opindex --check-chars
-Compare @var{n} characters on each line (after skipping any specified
-fields and characters). By default the entire rest of the lines are
-compared.
+Compare no more than @var{n} characters on each line (after skipping any
+specified fields and characters). By default the entire rest of the lines
+are compared.
+
+@item -@ @var{n}
+@itemx --check-fields=@var{n}
+@opindex -W
+@opindex --check-fields
+Compare no more than @var{n} fields on each line (after skipping any
+specified fields and characters). By default the entire rest of the lines
+are compared.
@end table
Index: man/uniq.1
===================================================================
RCS file: /home/gondor/herbert/src/CVS/debian/textutils/man/uniq.1,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 uniq.1
--- man/uniq.1 1999/08/06 19:24:10 1.1.1.1
+++ man/uniq.1 2000/06/28 03:09:05
@@ -30,12 +30,18 @@
\fB\-s\fR, \fB\-\-skip\-chars\fR=\fIN\fR
avoid comparing the first N characters
.TP
+\fB\-t\fR, \fB\-\-separator\fR=\fISEP\fR
+use SEParator to delimit fields
+.TP
\fB\-u\fR, \fB\-\-unique\fR
only print unique lines
.TP
\fB\-w\fR, \fB\-\-check\-chars\fR=\fIN\fR
compare no more than N characters in lines
.TP
+\fB\-W\fR, \fB\-\-check\-fields\fR=\fIN\fR
+compare no more than N fields in lines
+.TP
\fB\-N\fR
same as \fB\-f\fR N
.TP
@@ -48,8 +54,8 @@
\fB\-\-version\fR
output version information and exit
.PP
-A field is a run of whitespace, then non-whitespace characters.
-Fields are skipped before chars.
+A field is a run of whitespace, then non-whitespace characters, unless a
+SEParator is given. Fields are skipped before chars.
.SH "REPORTING BUGS"
Report bugs to <[EMAIL PROTECTED]>.
.SH "SEE ALSO"
Index: src/uniq.c
===================================================================
RCS file: /home/gondor/herbert/src/CVS/debian/textutils/src/uniq.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 uniq.c
--- src/uniq.c 1999/07/04 10:02:54 1.1.1.1
+++ src/uniq.c 2000/06/28 03:18:16
@@ -47,9 +47,17 @@
/* Number of chars to skip after skipping any fields. */
static int skip_chars;
-/* Number of chars to compare; if 0, compare the whole lines. */
+/* Number of fields to compare; if 0, compare the whole lines. */
+static int check_fields;
+
+/* Number of chars to compare; if 0, compare the whole lines. When used in
+ conjunction with check_fields, the minimum of the two applies. */
static int check_chars;
+/* Separator between fields; if this is NUL, a field is a run of whitespace,
+ then non-whitespace characters. */
+static int tab;
+
enum countmode
{
count_occurrences, /* -c Print count before output lines. */
@@ -84,6 +92,8 @@
{"skip-fields", required_argument, NULL, 'f'},
{"skip-chars", required_argument, NULL, 's'},
{"check-chars", required_argument, NULL, 'w'},
+ {"check-fields", required_argument, NULL, 'W'},
+ {"separator", required_argument, NULL, 't'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
@@ -111,21 +121,53 @@
-f, --skip-fields=N avoid comparing the first N fields\n\
-i, --ignore-case ignore differences in case when comparing\n\
-s, --skip-chars=N avoid comparing the first N characters\n\
+ -t, --separator=SEP use SEParator to delimit fields\n\
-u, --unique only print unique lines\n\
-w, --check-chars=N compare no more than N characters in lines\n\
+ -W, --check-fields=N compare no more than N fields in lines\n\
-N same as -f N\n\
+N same as -s N\n\
--help display this help and exit\n\
--version output version information and exit\n\
\n\
-A field is a run of whitespace, then non-whitespace characters.\n\
-Fields are skipped before chars.\n\
+A field is a run of whitespace, then non-whitespace characters, unless\n\
+a SEParator is given. Fields are skipped before chars.\n\
"));
puts (_("\nReport bugs to <[EMAIL PROTECTED]>."));
}
exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
+/* Given a string,
+ return the length of the first n fields. */
+static int
+find_field_length (const char *string, size_t size, int n)
+{
+ int count;
+ int i = 0;
+
+ if (tab)
+ {
+ for (count = 0; count < n && i < size; count++)
+ {
+ while (string[i++] != tab && i < size)
+ ;
+ }
+ }
+ else
+ {
+ for (count = 0; count < n && i < size; count++)
+ {
+ while (i < size && ISBLANK (string[i]))
+ i++;
+ while (i < size && !ISBLANK (string[i]))
+ i++;
+ }
+ }
+
+ return i;
+}
+
/* Given a linebuffer LINE,
return a pointer to the beginning of the line's field to be compared. */
@@ -135,15 +177,9 @@
register int count;
register char *lp = line->buffer;
register size_t size = line->length;
- register size_t i = 0;
+ register size_t i;
- for (count = 0; count < skip_fields && i < size; count++)
- {
- while (i < size && ISBLANK (lp[i]))
- i++;
- while (i < size && !ISBLANK (lp[i]))
- i++;
- }
+ i = find_field_length(lp, size, skip_fields);
for (count = 0; count < skip_chars && i < size; count++)
i++;
@@ -161,6 +197,11 @@
{
register int order;
+ if (check_fields)
+ {
+ oldlen = find_field_length(old, oldlen, check_fields);
+ newlen = find_field_length(new, newlen, check_fields);
+ }
if (check_chars)
{
if (oldlen > check_chars)
@@ -292,11 +333,13 @@
skip_chars = 0;
skip_fields = 0;
check_chars = 0;
+ check_fields = 0;
+ tab = 0;
mode = output_all;
countmode = count_none;
- while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:uw:", longopts,
- NULL)) != -1)
+ while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:t:uw:W:",
+ longopts, NULL)) != -1)
{
switch (optc)
{
@@ -356,6 +399,10 @@
}
break;
+ case 't':
+ tab = *optarg;
+ break;
+
case 'u':
mode = output_unique;
break;
@@ -369,6 +416,18 @@
_("invalid number of bytes to compare: `%s'"),
optarg);
check_chars = (int) tmp_long;
+ }
+ break;
+
+ case 'W':
+ {
+ long int tmp_long;
+ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
+ || tmp_long <= 0 || tmp_long > INT_MAX)
+ error (EXIT_FAILURE, 0,
+ _("invalid number of fields to compare: `%s'"),
+ optarg);
+ check_fields = (int) tmp_long;
}
break;