From c53ab1f18f429923f202125bea27b1276d7b9f87 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgordon@gmail.com>
Date: Fri, 17 Jul 2015 23:30:30 -0400
Subject: [PATCH 1/3] cut: refactor into set-fields module

Extract the functionality of parsing --field=LIST into a separate
module, to be used by other programs.

* src/cut.c: move field parsing code from here ...
* src/set-fields.{c,h}: ... to here.
  set_fields(): generalize by supporting multiple parsing/reporting
  options.
  struct range_pair: rename to field_range_pair.
* src/local.mk: link cut with set-field.
* po/POTFILES.in: add set-field.c
---
 po/POTFILES.in   |   1 +
 src/cut.c        | 257 +++------------------------------------------
 src/local.mk     |   3 +
 src/set-fields.c | 309 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/set-fields.h |  49 +++++++++
 5 files changed, 374 insertions(+), 245 deletions(-)
 create mode 100644 src/set-fields.c
 create mode 100644 src/set-fields.h

diff --git a/po/POTFILES.in b/po/POTFILES.in
index abec109..b3fe668 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -109,6 +109,7 @@ src/rmdir.c
 src/runcon.c
 src/selinux.c
 src/seq.c
+src/set-fields.c
 src/shred.c
 src/shuf.c
 src/sleep.c
diff --git a/src/cut.c b/src/cut.c
index 4f80ebd..9ed8481 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -34,9 +34,10 @@
 #include "fadvise.h"
 #include "getndelim2.h"
 #include "hash.h"
-#include "quote.h"
 #include "xstrndup.h"
 
+#include "set-fields.h"
+
 /* The official name of this program (e.g., no 'g' prefix).  */
 #define PROGRAM_NAME "cut"
 
@@ -54,41 +55,11 @@
   while (0)
 
 
-struct range_pair
-  {
-    size_t lo;
-    size_t hi;
-  };
-
-/* Array of `struct range_pair' holding all the finite ranges. */
-static struct range_pair *rp;
-
 /* Pointer inside RP.  When checking if a byte or field is selected
    by a finite range, we check if it is between CURRENT_RP.LO
    and CURRENT_RP.HI.  If the byte or field index is greater than
    CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
-static struct range_pair *current_rp;
-
-/* Number of finite ranges specified by the user. */
-static size_t n_rp;
-
-/* Number of `struct range_pair's allocated. */
-static size_t n_rp_allocated;
-
-
-/* Append LOW, HIGH to the list RP of range pairs, allocating additional
-   space if necessary.  Update global variable N_RP.  When allocating,
-   update global variable N_RP_ALLOCATED.  */
-
-static void
-add_range_pair (size_t lo, size_t hi)
-{
-  if (n_rp == n_rp_allocated)
-    rp = X2NREALLOC (rp, &n_rp_allocated);
-  rp[n_rp].lo = lo;
-  rp[n_rp].hi = hi;
-  ++n_rp;
-}
+static struct field_range_pair *current_rp;
 
 /* This buffer is used to support the semantics of the -s option
    (or lack of same) when the specified field list includes (does
@@ -221,208 +192,6 @@ Each range is one of:\n\
   exit (status);
 }
 
-/* Comparison function for qsort to order the list of
-   struct range_pairs.  */
-static int
-compare_ranges (const void *a, const void *b)
-{
-  int a_start = ((const struct range_pair *) a)->lo;
-  int b_start = ((const struct range_pair *) b)->lo;
-  return a_start < b_start ? -1 : a_start > b_start;
-}
-
-/* Reallocate Range Pair entries, with corresponding
-   entries outside the range of each specified entry.  */
-
-static void
-complement_rp (void)
-{
-  if (complement)
-    {
-      struct range_pair *c = rp;
-      size_t n = n_rp;
-      size_t i;
-
-      rp = NULL;
-      n_rp = 0;
-      n_rp_allocated = 0;
-
-      if (c[0].lo > 1)
-        add_range_pair (1, c[0].lo - 1);
-
-      for (i = 1; i < n; ++i)
-        {
-          if (c[i-1].hi + 1 == c[i].lo)
-            continue;
-
-          add_range_pair (c[i-1].hi + 1, c[i].lo - 1);
-        }
-
-      if (c[n-1].hi < SIZE_MAX)
-        add_range_pair (c[n-1].hi + 1, SIZE_MAX);
-
-      free (c);
-    }
-}
-
-/* Given the list of field or byte range specifications FIELDSTR,
-   allocate and initialize the RP array. FIELDSTR should
-   be composed of one or more numbers or ranges of numbers, separated
-   by blanks or commas.  Incomplete ranges may be given: '-m' means '1-m';
-   'n-' means 'n' through end of line.
-   Return true if FIELDSTR contains at least one field specification,
-   false otherwise.  */
-
-static bool
-set_fields (const char *fieldstr)
-{
-  size_t initial = 1;		/* Value of first number in a range.  */
-  size_t value = 0;		/* If nonzero, a number being accumulated.  */
-  bool lhs_specified = false;
-  bool rhs_specified = false;
-  bool dash_found = false;	/* True if a '-' is found in this field.  */
-  bool field_found = false;	/* True if at least one field spec
-                                   has been processed.  */
-
-  size_t i;
-  bool in_digits = false;
-
-  /* Collect and store in RP the range end points. */
-
-  while (true)
-    {
-      if (*fieldstr == '-')
-        {
-          in_digits = false;
-          /* Starting a range. */
-          if (dash_found)
-            FATAL_ERROR (_("invalid byte, character or field list"));
-          dash_found = true;
-          fieldstr++;
-
-          if (lhs_specified && !value)
-            FATAL_ERROR (_("fields and positions are numbered from 1"));
-
-          initial = (lhs_specified ? value : 1);
-          value = 0;
-        }
-      else if (*fieldstr == ','
-               || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
-        {
-          in_digits = false;
-          /* Ending the string, or this field/byte sublist. */
-          if (dash_found)
-            {
-              dash_found = false;
-
-              if (!lhs_specified && !rhs_specified)
-                FATAL_ERROR (_("invalid range with no endpoint: -"));
-
-              /* A range.  Possibilities: -n, m-n, n-.
-                 In any case, 'initial' contains the start of the range. */
-              if (!rhs_specified)
-                {
-                  /* 'n-'.  From 'initial' to end of line. */
-                  add_range_pair (initial, SIZE_MAX);
-                  field_found = true;
-                }
-              else
-                {
-                  /* 'm-n' or '-n' (1-n). */
-                  if (value < initial)
-                    FATAL_ERROR (_("invalid decreasing range"));
-
-                  add_range_pair (initial, value);
-                  field_found = true;
-                }
-              value = 0;
-            }
-          else
-            {
-              /* A simple field number, not a range. */
-              if (value == 0)
-                FATAL_ERROR (_("fields and positions are numbered from 1"));
-              add_range_pair (value, value);
-              value = 0;
-              field_found = true;
-            }
-
-          if (*fieldstr == '\0')
-            break;
-
-          fieldstr++;
-          lhs_specified = false;
-          rhs_specified = false;
-        }
-      else if (ISDIGIT (*fieldstr))
-        {
-          /* Record beginning of digit string, in case we have to
-             complain about it.  */
-          static char const *num_start;
-          if (!in_digits || !num_start)
-            num_start = fieldstr;
-          in_digits = true;
-
-          if (dash_found)
-            rhs_specified = 1;
-          else
-            lhs_specified = 1;
-
-          /* Detect overflow.  */
-          if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
-              || value == SIZE_MAX)
-            {
-              /* In case the user specified -c$(echo 2^64|bc),22,
-                 complain only about the first number.  */
-              /* Determine the length of the offending number.  */
-              size_t len = strspn (num_start, "0123456789");
-              char *bad_num = xstrndup (num_start, len);
-              if (operating_mode == byte_mode)
-                error (0, 0,
-                       _("byte offset %s is too large"), quote (bad_num));
-              else
-                error (0, 0,
-                       _("field number %s is too large"), quote (bad_num));
-              free (bad_num);
-              exit (EXIT_FAILURE);
-            }
-
-          fieldstr++;
-        }
-      else
-        FATAL_ERROR (_("invalid byte, character or field list"));
-    }
-
-  qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
-
-  /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
-  for (i = 0; i < n_rp; ++i)
-    {
-      for (size_t j = i + 1; j < n_rp; ++j)
-        {
-          if (rp[j].lo <= rp[i].hi)
-            {
-              rp[i].hi = MAX (rp[j].hi, rp[i].hi);
-              memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
-              n_rp--;
-              j--;
-            }
-          else
-            break;
-        }
-    }
-
-  complement_rp ();
-
-  /* After merging, reallocate RP so we release memory to the system.
-     Also add a sentinel at the end of RP, to avoid out of bounds access
-     and for performance reasons.  */
-  ++n_rp;
-  rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
-  rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
-
-  return field_found;
-}
 
 /* Increment *ITEM_IDX (i.e., a field or byte index),
    and if required CURRENT_RP.  */
@@ -463,7 +232,7 @@ cut_bytes (FILE *stream)
 
   byte_idx = 0;
   print_delimiter = false;
-  current_rp = rp;
+  current_rp = frp;
   while (true)
     {
       int c;		/* Each character from the file. */
@@ -475,7 +244,7 @@ cut_bytes (FILE *stream)
           putchar ('\n');
           byte_idx = 0;
           print_delimiter = false;
-          current_rp = rp;
+          current_rp = frp;
         }
       else if (c == EOF)
         {
@@ -514,7 +283,7 @@ cut_fields (FILE *stream)
   bool found_any_selected_field = false;
   bool buffer_first_field;
 
-  current_rp = rp;
+  current_rp = frp;
 
   c = getc (stream);
   if (c == EOF)
@@ -642,7 +411,7 @@ cut_fields (FILE *stream)
           if (c == EOF)
             break;
           field_idx = 1;
-          current_rp = rp;
+          current_rp = frp;
           found_any_selected_field = false;
         }
     }
@@ -793,13 +562,9 @@ main (int argc, char **argv)
     FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
 \tonly when operating on fields"));
 
-  if (! set_fields (spec_list_string))
-    {
-      if (operating_mode == field_mode)
-        FATAL_ERROR (_("missing list of fields"));
-      else
-        FATAL_ERROR (_("missing list of positions"));
-    }
+  set_fields (spec_list_string,
+              ((operating_mode == field_mode)?0:SETFLD_ERRMSG_USE_POS)
+              | (complement?SETFLD_COMPLEMENT:0) );
 
   if (!delim_specified)
     delim = '\t';
@@ -826,5 +591,7 @@ main (int argc, char **argv)
       ok = false;
     }
 
+  IF_LINT (reset_fields ());
+
   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
 }
diff --git a/src/local.mk b/src/local.mk
index 3fb1676..23ff951 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -55,6 +55,7 @@ noinst_HEADERS =		\
   src/operand2sig.h		\
   src/prog-fprintf.h		\
   src/remove.h			\
+  src/set-fields.h		\
   src/system.h			\
   src/uname.h
 
@@ -386,6 +387,8 @@ src_stat_SOURCES = src/stat.c src/find-mount-point.c
 src_uname_SOURCES = src/uname.c src/uname-uname.c
 src_arch_SOURCES = src/uname.c src/uname-arch.c
 
+src_cut_SOURCES = src/cut.c src/set-fields.c
+
 src_md5sum_CPPFLAGS = -DHASH_ALGO_MD5=1 $(AM_CPPFLAGS)
 src_sha1sum_SOURCES = src/md5sum.c
 src_sha1sum_CPPFLAGS = -DHASH_ALGO_SHA1=1 $(AM_CPPFLAGS)
diff --git a/src/set-fields.c b/src/set-fields.c
new file mode 100644
index 0000000..432c58b
--- /dev/null
+++ b/src/set-fields.c
@@ -0,0 +1,309 @@
+/* set-fields.c -- common functions for parsing field list
+   Copyright (C) 2008-2015 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Extracted from cut.c by Assaf Gordon */
+
+#include <config.h>
+
+#include "system.h"
+#include "error.h"
+#include "quote.h"
+#include "xstrndup.h"
+#include "set-fields.h"
+
+/* Array of `struct field_range_pair' holding all the finite ranges. */
+struct field_range_pair *frp;
+
+/* Number of finite ranges specified by the user. */
+size_t n_frp;
+
+/* Number of `struct field_range_pair's allocated. */
+static size_t n_frp_allocated;
+
+#define FATAL_ERROR(Message)                                            \
+  do                                                                    \
+    {                                                                   \
+      error (0, 0, (Message));                                          \
+      usage (EXIT_FAILURE);                                             \
+    }                                                                   \
+  while (0)
+
+/* Append LOW, HIGH to the list RP of range pairs, allocating additional
+   space if necessary.  Update global variable N_FRP.  When allocating,
+   update global variable N_FRP_ALLOCATED.  */
+static void
+add_range_pair (size_t lo, size_t hi)
+{
+  if (n_frp == n_frp_allocated)
+    frp = X2NREALLOC (frp, &n_frp_allocated);
+  frp[n_frp].lo = lo;
+  frp[n_frp].hi = hi;
+  ++n_frp;
+}
+
+
+/* Comparison function for qsort to order the list of
+   struct range_pairs.  */
+static int
+compare_ranges (const void *a, const void *b)
+{
+  int a_start = ((const struct field_range_pair *) a)->lo;
+  int b_start = ((const struct field_range_pair *) b)->lo;
+  return a_start < b_start ? -1 : a_start > b_start;
+}
+
+/* Reallocate Range Pair entries, with corresponding
+   entries outside the range of each specified entry.  */
+
+static void
+complement_rp (void)
+{
+  struct field_range_pair *c = frp;
+  size_t n = n_frp;
+  size_t i;
+
+  frp = NULL;
+  n_frp = 0;
+  n_frp_allocated = 0;
+
+  if (c[0].lo > 1)
+    add_range_pair (1, c[0].lo - 1);
+
+  for (i = 1; i < n; ++i)
+    {
+      if (c[i-1].hi + 1 == c[i].lo)
+        continue;
+
+      add_range_pair (c[i-1].hi + 1, c[i].lo - 1);
+    }
+
+  if (c[n-1].hi < SIZE_MAX)
+    add_range_pair (c[n-1].hi + 1, SIZE_MAX);
+
+  free (c);
+}
+
+/* Given the list of field or byte range specifications FIELDSTR,
+   allocate and initialize the FRP array. FIELDSTR should
+   be composed of one or more numbers or ranges of numbers, separated
+   by blanks or commas.  Incomplete ranges may be given: '-m' means '1-m';
+   'n-' means 'n' through end of line.
+
+   if SETFLD_ALLOW_DASH option is used, a single '-' means all fields
+   (otherwise a single dash triggers an error).
+
+   if SETFLD_COMPLEMENT option is used, the specified field list
+   is complemented (e.g. '1-3' will result in fields '4-').
+
+   if SETFLD_ERRMSG_USE_POS option is used, error messages
+   will say 'position' (or 'byte/character positions')
+   instead of fields (used with cut -b/-c).
+
+   The function terminates on failure.
+
+   Upon return, the FRP array is initialized to contain
+   a non-overlapping, increasing list of field ranges.
+
+   N_FRP holds the number of field ranges in the FRP array.
+
+   A sentinel of SIZE_MAX/SIZE_MAX is always added as the last
+   field range pair.
+*/
+void
+set_fields (const char *fieldstr, unsigned int options)
+{
+  size_t initial = 1;		/* Value of first number in a range.  */
+  size_t value = 0;		/* If nonzero, a number being accumulated.  */
+  bool lhs_specified = false;
+  bool rhs_specified = false;
+  bool dash_found = false;	/* True if a '-' is found in this field.  */
+
+  size_t i;
+  bool in_digits = false;
+
+  /* Collect and store in RP the range end points. */
+
+  /* Special case: '--field=-' means all fields, emulate '--field=1-' . */
+  if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-"))
+    {
+      value = 1;
+      lhs_specified = true;
+      dash_found = true;
+      fieldstr++;
+    }
+
+  while (true)
+    {
+      if (*fieldstr == '-')
+        {
+          in_digits = false;
+          /* Starting a range. */
+          if (dash_found)
+            FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS)
+                          ?_("invalid byte or character range")
+                          :_("invalid field range"));
+
+          dash_found = true;
+          fieldstr++;
+
+          if (lhs_specified && !value)
+            FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS)
+                          ?_("byte/character positions are numbered from 1")
+                          :_("fields are numbered from 1"));
+
+          initial = (lhs_specified ? value : 1);
+          value = 0;
+        }
+      else if (*fieldstr == ','
+               || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
+        {
+          in_digits = false;
+          /* Ending the string, or this field/byte sublist. */
+          if (dash_found)
+            {
+              dash_found = false;
+
+              if (!lhs_specified && !rhs_specified)
+                {
+                  /* if a lone dash is allowed, emulate '1-' for all fields */
+                  if (options & SETFLD_ALLOW_DASH)
+                    initial = 1;
+                  else
+                    FATAL_ERROR (_("invalid range with no endpoint: -"));
+                }
+
+              /* A range.  Possibilities: -n, m-n, n-.
+                 In any case, 'initial' contains the start of the range. */
+              if (!rhs_specified)
+                {
+                  /* 'n-'.  From 'initial' to end of line. */
+                  add_range_pair (initial, SIZE_MAX);
+                }
+              else
+                {
+                  /* 'm-n' or '-n' (1-n). */
+                  if (value < initial)
+                    FATAL_ERROR (_("invalid decreasing range"));
+
+                  add_range_pair (initial, value);
+                }
+              value = 0;
+            }
+          else
+            {
+              /* A simple field number, not a range. */
+              if (value == 0)
+                FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS)
+                              ?_("byte/character positions are numbered from 1")
+                              :_("fields are numbered from 1"));
+
+              add_range_pair (value, value);
+              value = 0;
+            }
+
+          if (*fieldstr == '\0')
+            break;
+
+          fieldstr++;
+          lhs_specified = false;
+          rhs_specified = false;
+        }
+      else if (ISDIGIT (*fieldstr))
+        {
+          /* Record beginning of digit string, in case we have to
+             complain about it.  */
+          static char const *num_start;
+          if (!in_digits || !num_start)
+            num_start = fieldstr;
+          in_digits = true;
+
+          if (dash_found)
+            rhs_specified = 1;
+          else
+            lhs_specified = 1;
+
+          /* Detect overflow.  */
+          if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
+              || value == SIZE_MAX)
+            {
+              /* In case the user specified -c$(echo 2^64|bc),22,
+                 complain only about the first number.  */
+              /* Determine the length of the offending number.  */
+              size_t len = strspn (num_start, "0123456789");
+              char *bad_num = xstrndup (num_start, len);
+              error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
+                           ?_("byte/character offset %s is too large")
+                           :_("field number %s is too large"),
+                           quote (bad_num));
+              free (bad_num);
+              usage (EXIT_FAILURE);
+            }
+
+          fieldstr++;
+        }
+      else
+        {
+          error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
+                       ?_("invalid byte/character position %s")
+                       :_("invalid field value %s"),
+                       quote (fieldstr));
+          usage (EXIT_FAILURE);
+        }
+    }
+
+  if (!n_frp)
+    FATAL_ERROR ( (options&SETFLD_ERRMSG_USE_POS)
+                  ?_("missing list of byte/character positions")
+                  :_("missing list of fields"));
+
+  qsort (frp, n_frp, sizeof (frp[0]), compare_ranges);
+
+  /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
+  for (i = 0; i < n_frp; ++i)
+    {
+      for (size_t j = i + 1; j < n_frp; ++j)
+        {
+          if (frp[j].lo <= frp[i].hi)
+            {
+              frp[i].hi = MAX (frp[j].hi, frp[i].hi);
+              memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp);
+              n_frp--;
+              j--;
+            }
+          else
+            break;
+        }
+    }
+
+  if (options & SETFLD_COMPLEMENT)
+    complement_rp ();
+
+  /* After merging, reallocate RP so we release memory to the system.
+     Also add a sentinel at the end of RP, to avoid out of bounds access
+     and for performance reasons.  */
+  ++n_frp;
+  frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair));
+  frp[n_frp - 1].lo = frp[n_frp - 1].hi = SIZE_MAX;
+}
+
+void
+reset_fields (void)
+{
+  n_frp = 0 ;
+  n_frp_allocated = 0;
+  free (frp);
+  frp = NULL;
+}
diff --git a/src/set-fields.h b/src/set-fields.h
new file mode 100644
index 0000000..c04a999
--- /dev/null
+++ b/src/set-fields.h
@@ -0,0 +1,49 @@
+/* set-fields.h -- parse field list argument
+
+   Copyright (C) 2008-2015 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+#ifndef SET_FIELDS_H
+# define SET_FIELDS_H
+
+struct field_range_pair
+  {
+    size_t lo;
+    size_t hi;
+  };
+
+/* Array of `struct range_pair' holding all the finite ranges. */
+extern struct field_range_pair *frp;
+
+/* Number of finite ranges specified by the user. */
+extern size_t n_frp;
+
+/* field list parsing options */
+enum
+{
+  SETFLD_ALLOW_DASH = 0x01,     /* allow single dash meaning 'all fields' */
+  SETFLD_COMPLEMENT = 0x02,     /* complement the field list */
+  SETFLD_ERRMSG_USE_POS = 0x04  /* when reporting errors, say 'position' instead
+                                   of 'field' (used with cut -b/-c) */
+};
+
+/* allocates and initializes the FRP array and N_FRP count */
+void
+set_fields (const char *fieldstr, unsigned int options);
+
+/* frees memory allocated by set_fields() */
+void
+reset_fields (void);
+
+#endif
-- 
2.4.3


From c4cea05659d1d4361fd458a409cd1d4d09b1d9e6 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgordon@gmail.com>
Date: Fri, 17 Jul 2015 23:53:38 -0400
Subject: [PATCH 2/3] tests: update cut's tests to new set-field errors

* tests/misc/cut.pl: update wording of error messages
---
 tests/misc/cut.pl | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl
index 23e9ce3..a6239d6 100755
--- a/tests/misc/cut.pl
+++ b/tests/misc/cut.pl
@@ -29,8 +29,10 @@ my $mb_locale = $ENV{LOCALE_FR_UTF8};
 
 my $prog = 'cut';
 my $try = "Try '$prog --help' for more information.\n";
-my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
-my $inval = "$prog: invalid byte, character or field list\n$try";
+my $from_field1 = "$prog: fields are numbered from 1\n$try";
+my $from_pos1 =   "$prog: byte/character positions are numbered from 1\n$try";
+my $inval_fld = "$prog: invalid field range\n$try";
+my $inval_pos = "$prog: invalid byte or character range\n$try";
 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
 my $nofield = "$prog: an input delimiter may be specified only when " .
               "operating on fields\n$try";
@@ -42,16 +44,16 @@ my @Tests =
 
   # This failed (as it should) even before coreutils-6.9.90,
   # but cut from 6.9.90 produces a more useful diagnostic.
-  ['zero-1', '-b0',   {ERR=>$from_1}, {EXIT => 1} ],
+  ['zero-1', '-b0',   {ERR=>$from_pos1}, {EXIT => 1} ],
 
   # Up to coreutils-6.9, specifying a range of 0-2 was not an error.
   # It was treated just like "-2".
-  ['zero-2', '-f0-2', {ERR=>$from_1}, {EXIT => 1} ],
+  ['zero-2', '-f0-2', {ERR=>$from_field1}, {EXIT => 1} ],
 
   # Up to coreutils-8.20, specifying a range of 0- was not an error.
-  ['zero-3b', '-b0-', {ERR=>$from_1}, {EXIT => 1} ],
-  ['zero-3c', '-c0-', {ERR=>$from_1}, {EXIT => 1} ],
-  ['zero-3f', '-f0-', {ERR=>$from_1}, {EXIT => 1} ],
+  ['zero-3b', '-b0-', {ERR=>$from_pos1}, {EXIT => 1} ],
+  ['zero-3c', '-c0-', {ERR=>$from_pos1}, {EXIT => 1} ],
+  ['zero-3f', '-f0-', {ERR=>$from_field1}, {EXIT => 1} ],
 
   ['1', '-d:', '-f1,3-', {IN=>"a:b:c\n"}, {OUT=>"a:c\n"}],
   ['2', '-d:', '-f1,3-', {IN=>"a:b:c\n"}, {OUT=>"a:c\n"}],
@@ -101,13 +103,16 @@ my @Tests =
    {ERR=>"$prog: you must specify a list of bytes, characters, or fields\n$try"}
   ],
   # Empty field list
-  ['empty-fl', qw(-f ''), {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, {ERR=>$from_1}],
+  ['empty-fl', qw(-f ''), {IN=>":\n"}, {OUT=>""}, {EXIT=>1},
+   {ERR=>$from_field1}],
   # Missing field list
-  ['missing-fl', qw(-f --), {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, {ERR=>$inval}],
+  ['missing-fl', qw(-f --), {IN=>":\n"}, {OUT=>""}, {EXIT=>1},
+   {ERR=>$inval_fld}],
   # Empty byte list
-  ['empty-bl', qw(-b ''), {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, {ERR=>$from_1}],
+  ['empty-bl', qw(-b ''), {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, {ERR=>$from_pos1}],
   # Missing byte list
-  ['missing-bl', qw(-b --), {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, {ERR=>$inval}],
+  ['missing-bl', qw(-b --), {IN=>":\n"}, {OUT=>""}, {EXIT=>1},
+   {ERR=>$inval_pos}],
 
   # This test fails with cut from textutils-1.22.
   ['empty-f1', '-f1', {IN=>""}, {OUT=>""}],
-- 
2.4.3


From 2a936176f7b32981e68f71ab3eacb7b41e48a2c4 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgordon@gmail.com>
Date: Fri, 17 Jul 2015 23:58:31 -0400
Subject: [PATCH 3/3] numfmt: use new set-fields module to parse --field

numfmt --field=LIST can accept the same options as cut.

* bootstrap.conf: remove xlist, linked-list
* src/local.mk: link numfmt with set-fields
* src/numfmt.c: use set-fields.c instead of custom field parsing code.
  include_field(): adapt to new code.
* tests/misc/numfmt.pl: add new tests, adapt current tests to new
  error message wording from set-fields.c
---
 bootstrap.conf       |   2 -
 src/local.mk         |   1 +
 src/numfmt.c         | 179 +++++----------------------------------------------
 tests/misc/numfmt.pl |  71 +++++++++++++++++++-
 4 files changed, 85 insertions(+), 168 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 67837b3..d9c2497 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -34,7 +34,6 @@ gnulib_modules="
   argv-iter
   assert
   autobuild
-  linked-list
   backupfile
   base64
   buffer-lcm
@@ -272,7 +271,6 @@ gnulib_modules="
   xgetcwd
   xgetgroups
   xgethostname
-  xlist
   xmemcoll
   xnanosleep
   xprintf
diff --git a/src/local.mk b/src/local.mk
index 23ff951..c130ae4 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -388,6 +388,7 @@ src_uname_SOURCES = src/uname.c src/uname-uname.c
 src_arch_SOURCES = src/uname.c src/uname-arch.c
 
 src_cut_SOURCES = src/cut.c src/set-fields.c
+src_numfmt_SOURCES = src/numfmt.c src/set-fields.c
 
 src_md5sum_CPPFLAGS = -DHASH_ALGO_MD5=1 $(AM_CPPFLAGS)
 src_sha1sum_SOURCES = src/md5sum.c
diff --git a/src/numfmt.c b/src/numfmt.c
index 35c5c5b..24bf45b 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -29,8 +29,8 @@
 #include "system.h"
 #include "xstrtol.h"
 #include "xstrndup.h"
-#include "gl_linked_list.h"
-#include "gl_xlist.h"
+
+#include "set-fields.h"
 
 #if HAVE_FPSETPREC
 # include <ieeefp.h>
@@ -189,10 +189,6 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
 /* auto-pad each line based on skipped whitespace.  */
 static int auto_padding = 0;
 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
-static bool all_fields = false;
-static size_t all_fields_after = 0;
-static size_t all_fields_before = 0;
-static gl_list_t field_list;
 static int delimiter = DELIMITER_DEFAULT;
 
 /* if non-zero, the first 'header' lines from STDIN are skipped.  */
@@ -1313,141 +1309,6 @@ process_suffixed_number (char *text, long double *result,
   return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
 }
 
-typedef struct range_pair
-{
-  size_t lo;
-  size_t hi;
-} range_pair_t;
-
-static int
-sort_field (const void *elt1, const void *elt2)
-{
-  range_pair_t* rp1 = (range_pair_t*) elt1;
-  range_pair_t* rp2 = (range_pair_t*) elt2;
-
-  if (rp1->lo < rp2->lo)
-    return -1;
-
-  return rp1->lo > rp2->lo;
-}
-
-static int
-match_field (const void *elt1, const void *elt2)
-{
-  range_pair_t* rp = (range_pair_t*) elt1;
-  size_t field = *(size_t*) elt2;
-
-  if (rp->lo <= field && field <= rp->hi)
-    return 0;
-
-  if (rp->lo < field)
-    return -1;
-
-  return 1;
-}
-
-static void
-free_field (const void *elt)
-{
-  void *p = (void *)elt;
-  free (p);
-}
-
-/* Add the specified fields to field_list.
-   The format recognized is similar to cut.
-   TODO: Refactor the more performant cut implementation
-   for use by both utilities.  */
-static void
-parse_field_arg (char *arg)
-{
-
-  char *start, *end;
-  range_pair_t *rp;
-  size_t field_val;
-  size_t range_val = 0;
-
-  start = end = arg;
-
-  if (STREQ (arg, "-"))
-    {
-      all_fields = true;
-
-      return;
-    }
-
-  if (*start == '-')
-    {
-      /* range -M */
-      ++start;
-
-      all_fields_before = strtol (start, &end, 10);
-
-      if (start == end || all_fields_before <=0)
-        error (EXIT_FAILURE, 0, _("invalid field value %s"),
-               quote (start));
-
-      return;
-    }
-
-  field_list = gl_list_create_empty (GL_LINKED_LIST,
-                                     NULL, NULL, free_field, false);
-
-  while (*end != '\0') {
-    field_val = strtol (start, &end, 10);
-
-    if (start == end || field_val <=0)
-      error (EXIT_FAILURE, 0, _("invalid field value %s"),
-             quote (start));
-
-    if (! range_val)
-      {
-        /* field N */
-        rp = xmalloc (sizeof (*rp));
-        rp->lo = rp->hi = field_val;
-        gl_sortedlist_add (field_list, sort_field, rp);
-      }
-    else
-      {
-        /* range N-M
-           The last field was the start of the field range. The current
-           field is the end of the field range.  We already added the
-           start field, so increment and add all the fields through
-           range end. */
-        if (field_val < range_val)
-          error (EXIT_FAILURE, 0, _("invalid decreasing range"));
-        rp = xmalloc (sizeof (*rp));
-        rp->lo = range_val + 1;
-        rp->hi = field_val;
-        gl_sortedlist_add (field_list, sort_field, rp);
-
-        range_val = 0;
-      }
-
-    switch (*end) {
-      case ',':
-        /* discrete field separator */
-        ++end;
-        start = end;
-        break;
-
-      case '-':
-        /* field range separator */
-        ++end;
-        start = end;
-        range_val = field_val;
-        break;
-    }
-  }
-
-  if (range_val)
-    {
-      /* range N-
-         range_val was not reset indicating ARG
-         ended with a trailing '-' */
-      all_fields_after = range_val;
-    }
-}
-
 /* Return a pointer to the beginning of the next field in line.
    The line pointer is moved to the end of the next field. */
 static char*
@@ -1479,23 +1340,20 @@ next_field (char **line)
   return field_start;
 }
 
-static bool
+static bool _GL_ATTRIBUTE_PURE
 include_field (size_t field)
 {
-  if (all_fields)
-    return true;
-
-  if (all_fields_after && all_fields_after <= field)
-    return true;
-
-  if (all_fields_before && field <= all_fields_before)
-    return true;
-
-  /* default to field 1 */
-  if (! field_list)
+  struct field_range_pair *p = frp;
+  if (!p)
     return field == 1;
 
-  return gl_sortedlist_search (field_list, match_field, &field);
+  while (p->lo != SIZE_MAX)
+    {
+      if (p->lo <= field && p->hi >= field)
+        return true;
+      ++p;
+    }
+  return false;
 }
 
 /* Convert and output the given field. If it is not included in the set
@@ -1640,12 +1498,9 @@ main (int argc, char **argv)
           break;
 
         case FIELD_OPTION:
-          if (all_fields || all_fields_before || all_fields_after || field_list)
-            {
-              error (EXIT_FAILURE, 0,
-                     _("multiple field specifications"));
-            }
-          parse_field_arg (optarg);
+          if (n_frp)
+            error (EXIT_FAILURE, 0, _("multiple field specifications"));
+          set_fields (optarg, SETFLD_ALLOW_DASH);
           break;
 
         case 'd':
@@ -1761,9 +1616,7 @@ main (int argc, char **argv)
   free (padding_buffer);
   free (format_str_prefix);
   free (format_str_suffix);
-
-  if (field_list)
-    gl_list_free (field_list);
+  reset_fields ();
 #endif
 
   if (debug && !valid_numbers)
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index 0e4dc79..eb4fcdd 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -31,6 +31,8 @@ my $locale = $ENV{LOCALE_FR_UTF8};
 ! defined $locale || $locale eq 'none'
   and $locale = 'C';
 
+my $try = "Try '$prog --help' for more information.\n";
+
 my @Tests =
     (
      ['1', '1234',             {OUT => "1234"}],
@@ -197,10 +199,12 @@ my @Tests =
      ['delim-4', '--delimiter=: --from=auto 40M:60M',  {OUT=>'40000000:60M'}],
      ['delim-5', '-d: --field=2 --from=auto :40M:60M',  {OUT=>':40000000:60M'}],
      ['delim-6', '-d: --field 3 --from=auto 40M:60M', {OUT=>"40M:60M"}],
+     ['delim-err-1', '-d,, --to=si 1', {EXIT=>1},
+             {ERR => "$prog: the delimiter must be a single character\n"}],
 
      #Fields
      ['field-1', '--field A',
-             {ERR => "$prog: invalid field value 'A'\n"},
+             {ERR => "$prog: invalid field value 'A'\n$try"},
              {EXIT => '1'}],
      ['field-2', '--field 2 --from=auto "Hello 40M World 90G"',
              {OUT=>'Hello 40000000 World 90G'}],
@@ -244,9 +248,71 @@ my @Tests =
      ['field-range-7', '--field -3 --to=si "1000 2000 3000 4000 5000"',
              {OUT=>"1.0K 2.0K 3.0K 4000 5000"}],
 
+     ['field-range-8', '--field 1-2,4-5 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3000 4.0K 5.0K"}],
+     ['field-range-9', '--field 4-5,1-2 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3000 4.0K 5.0K"}],
+
+     ['field-range-10','--field 1-3,2-4 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5000"}],
+     ['field-range-11','--field 2-4,1-3 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5000"}],
+
+     ['field-range-12','--field 1-1,3-3 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2000 3.0K 4000 5000"}],
+
+     ['field-range-13', '--field 1,-2 --to=si "1000 2000 3000"',
+             {OUT=>"1.0K 2.0K 3000"}],
+
+     ['field-range-14', '--field -2,4- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3000 4.0K 5.0K"}],
+     ['field-range-15', '--field -2,-4 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5000"}],
+     ['field-range-16', '--field 2-,4- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2.0K 3.0K 4.0K 5.0K"}],
+     ['field-range-17', '--field 4-,2- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2.0K 3.0K 4.0K 5.0K"}],
+
+     # white space are valid field separators
+     # (undocumented? but works in cut as well).
+     ['field-range-18', '--field "1,2 4" --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3000 4.0K 5000"}],
+
+     # Unlike 'cut', a lone '-' means 'all fields', even as part of a list
+     # of fields.
+     ['field-range-19','--field 3,- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
      ['all-fields-1', '--field=- --to=si "1000 2000 3000 4000 5000"',
              {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
 
+     ['field-range-err-1', '--field -foo --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: invalid field value 'foo'\n$try"}],
+     ['field-range-err-2', '--field --3 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: invalid field range\n$try"}],
+     ['field-range-err-3', '--field 0 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: fields are numbered from 1\n$try"}],
+     ['field-range-err-4', '--field 3-2 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: invalid decreasing range\n$try"}],
+     ['field-range-err-6', '--field - --field 1- --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+     ['field-range-err-7', '--field -1 --field 1- --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+     ['field-range-err-8', '--field -1 --field 1,2,3 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+     ['field-range-err-9', '--field 1- --field 1,2,3 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+     ['field-range-err-10','--field 1,2,3 --field 1- --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+     ['field-range-err-11','--field 1-2-3 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: invalid field range\n$try"}],
+     ['field-range-err-12','--field 0-1 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: fields are numbered from 1\n$try"}],
+     ['field-range-err-13','--field 9918446744073709551616,22 --to=si 10',
+             {EXIT=>1}, {ERR=>"$prog: field number " .
+                              "'9918446744073709551616' is too large\n$try"}],
+
+
      # Auto-consume white-space, setup auto-padding
      ['whitespace-1', '--to=si --field 2 "A    500 B"', {OUT=>"A    500 B"}],
      ['whitespace-2', '--to=si --field 2 "A   5000 B"', {OUT=>"A   5.0K B"}],
@@ -582,8 +648,7 @@ my @Tests =
 
      # Invalid parameters
      ['help-1', '--foobar',
-             {ERR=>"$prog: unrecognized option\n" .
-                   "Try '$prog --help' for more information.\n"},
+             {ERR=>"$prog: unrecognized option\n$try"},
              {ERR_SUBST=>"s/option.*/option/; s/unknown/unrecognized/"},
              {EXIT=>1}],
 
-- 
2.4.3

