On 07/05/2013 12:12 PM, Pádraig Brady wrote:
On 07/05/2013 07:04 PM, Assaf Gordon wrote:
Hello,

Regarding old discussion here:
http://lists.gnu.org/archive/html/coreutils/2011-02/msg00030.html

Attached is a patch with adds "--repetition" option to shuf, enabling random 
number generation with repetitions.


I like this.
--repetition seems to be a very good interface too,
since it aligns with standard math nomenclature in regard to permutations.

I'd prefer to generalize it though, to supporting stdin as well as -i.

Attached is an updated patch, supporting "--repetitions" with STDIN/FILE/-e 
(using the naive implementation ATM).
e.g.
   $ shuf --repetitions --head-count=100 --echo Head Tail
or
   $ shuf -r -n100 -e Head Tail

Excellent thanks.

But the code is getting a bit messy, I guess from evolving features over time.
I'd like to re-organize it a bit, re-factor some functions and make the code 
clearer - what do you think?
it will make the code slightly more verbose (and slightly bigger), but 
shouldn't change the running performance.

If you're getting your head around the code enough to refactor,
then it would be great if you could handle the TODO: item in shuf.c

Attached is an updated patch, with some code cleanups (not including said TODO 
item yet).

-gordon


>From 5ba2828e72f6d276fc349f69824cd6cb626053a4 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 15:41:17 -0600
Subject: [PATCH 00/14] *** SUBJECT HERE ***

*** BLURB HERE ***

Assaf Gordon (14):
  shuf: add --repetition to generate random numbers
  shuf: add tests for --repetition option
  shuf: mention new --repetition option in NEWS
  shuf: document new --repetition option
  shuf: enable --repetition on stdin/FILE/-e input
  shuf: add tests for --repetition with STDIN
  shuf: document new --repetitions option
  shuf: code-cleanup
  shuf: add more tests
  shuf: refactor --repetition with stdin
  shuf: refactor write_permuted_output()
  shuf: code cleanup
  shuf: code clean-up
  shuf: add tests for more erroneous usage

 NEWS               |   3 +
 doc/coreutils.texi |  37 +++++++++++
 src/shuf.c         | 192 +++++++++++++++++++++++++++++++++++++----------------
 tests/misc/shuf.sh |  92 +++++++++++++++++++++++++
 4 files changed, 268 insertions(+), 56 deletions(-)

-- 
1.8.3.2

>From c41160016ed36fe5b4e2b3d03cde34e0dcec84b6 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Thu, 4 Jul 2013 13:26:45 -0600
Subject: [PATCH 01/14] shuf: add --repetition to generate random numbers

* src/shuf.c: new option (-r,--repetition), generate random numbers.
main(): process new option.
usage(): mention new option.
write_random_numbers(): generate random numbers.
---
 src/shuf.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/shuf.c b/src/shuf.c
index 0fabb0b..cdc3151 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -76,6 +76,9 @@ Write a random permutation of the input lines to standard output.\n\
   -n, --head-count=COUNT    output at most COUNT lines\n\
   -o, --output=FILE         write result to FILE instead of standard output\n\
       --random-source=FILE  get random bytes from FILE\n\
+  -r, --repetition          used with -iLO-HI, output COUNT random numbers\n\
+                            between LO and HI, with repetitions.\n\
+                            count defaults to 1 if -n COUNT is not used.\n\
   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -104,6 +107,7 @@ static struct option const long_opts[] =
   {"head-count", required_argument, NULL, 'n'},
   {"output", required_argument, NULL, 'o'},
   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
+  {"repetition", no_argument, NULL, 'r'},
   {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
@@ -328,6 +332,23 @@ write_permuted_output (size_t n_lines, char *const *line, size_t lo_input,
   return 0;
 }
 
+static int
+write_random_numbers (struct randint_source *s, size_t count,
+                      size_t lo_input, size_t hi_input, char eolbyte)
+{
+  size_t i;
+  const randint range = hi_input - lo_input + 1;
+
+  for (i = 0; i < count; i++)
+    {
+      randint j = lo_input + randint_choose (s, range);
+      if (printf ("%lu%c", j, eolbyte) < 0)
+        return -1;
+    }
+
+  return 0;
+}
+
 int
 main (int argc, char **argv)
 {
@@ -340,6 +361,7 @@ main (int argc, char **argv)
   char eolbyte = '\n';
   char **input_lines = NULL;
   bool use_reservoir_sampling = false;
+  bool repetition = false;
 
   int optc;
   int n_operands;
@@ -348,7 +370,7 @@ main (int argc, char **argv)
   char **line = NULL;
   struct linebuffer *reservoir = NULL;
   struct randint_source *randint_source;
-  size_t *permutation;
+  size_t *permutation = NULL;
   int i;
 
   initialize_main (&argc, &argv);
@@ -424,6 +446,10 @@ main (int argc, char **argv)
         random_source = optarg;
         break;
 
+      case 'r':
+        repetition = true;
+        break;
+
       case 'z':
         eolbyte = '\0';
         break;
@@ -454,9 +480,19 @@ main (int argc, char **argv)
         }
       n_lines = hi_input - lo_input + 1;
       line = NULL;
+
+      /* When generating random numbers with repetitions,
+         the default count is one, unless specified by the user */
+      if (repetition && head_lines == SIZE_MAX)
+        head_lines = 1 ;
     }
   else
     {
+      if (repetition)
+        {
+          error (0, 0, _("--repetition requires --input-range"));
+          usage (EXIT_FAILURE);
+        }
       switch (n_operands)
         {
         case 0:
@@ -488,10 +524,12 @@ main (int argc, char **argv)
         }
     }
 
-  head_lines = MIN (head_lines, n_lines);
+  if (!repetition)
+    head_lines = MIN (head_lines, n_lines);
 
   randint_source = randint_all_new (random_source,
-                                    use_reservoir_sampling ? SIZE_MAX :
+                                    (use_reservoir_sampling||repetition)?
+                                    SIZE_MAX:
                                     randperm_bound (head_lines, n_lines));
   if (! randint_source)
     error (EXIT_FAILURE, errno, "%s", quotearg_colon (random_source));
@@ -512,13 +550,17 @@ main (int argc, char **argv)
       && (fclose (stdin) != 0))
     error (EXIT_FAILURE, errno, _("read error"));
 
-  permutation = randperm_new (randint_source, head_lines, n_lines);
+  if (!repetition)
+    permutation = randperm_new (randint_source, head_lines, n_lines);
 
   if (outfile && ! freopen (outfile, "w", stdout))
     error (EXIT_FAILURE, errno, "%s", quotearg_colon (outfile));
 
   if (use_reservoir_sampling)
     i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
+  else if (repetition)
+    i = write_random_numbers (randint_source, head_lines,
+                              lo_input, hi_input, eolbyte);
   else
     i = write_permuted_output (head_lines, line, lo_input,
                                permutation, eolbyte);
-- 
1.8.3.2


>From 545e164dd89a785ce28b867e3c25c55142dea204 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Thu, 4 Jul 2013 13:54:04 -0600
Subject: [PATCH 02/14] shuf: add tests for --repetition option

* tests/misc/shuf.sh: add tests for --repetition option.
---
 tests/misc/shuf.sh | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index 3e33b61..3cf39c3 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -65,4 +65,33 @@ if ! test -r unreadable; then
   shuf -n1 unreadable && fail=1
 fi
 
+# Test --repetition option
+
+# --repetition should fail without --input-range
+shuf --repetition &&
+  { fail=1; echo "--repetition should fail without range">&2 ; }
+
+# --repetition without count should return one line
+shuf --rep -i0-10 > exp || framework_failure_
+c=$(cat exp | wc -l) || framework_failure_
+test "$c" -eq 1 || { fail=1; echo "--repetition default count is not 1">&2 ; }
+
+# --repetition can output more values than the input range
+shuf --rep -i0-9 -n1000 > exp || framework_failure_
+c=$(cat exp | wc -l) || framework_failure_
+test "$c" -eq 1000 || { fail=1; echo "--repetition with --count failed">&2 ; }
+
+# Check output values (this is not bullet-proof, but drawing 1000 values
+# between 0 and 9 should produce all values, unless there's a bug in shuf
+# or a very poor random source
+c=$(cat exp | sort -nu | fmt ) || framework_failure_
+test "$c" = "0 1 2 3 4 5 6 7 8 9" ||
+  { fail=1; echo "--repetition produced bad output">&2 ; }
+
+# check --repetition with non-zero low value
+shuf --rep -i222-233 -n2000 > exp || framework_failure_
+c=$(cat exp | sort -nu | fmt ) || framework_failure_
+test "$c" = "222 223 224 225 226 227 228 229 230 231 232 233" ||
+ { fail=1; echo "--repetition produced bad output with non-zero low">&2 ; }
+
 Exit $fail
-- 
1.8.3.2


>From f737c5a4bfc1d4a8d1b2c39c09c332a9b0c3aaf4 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Thu, 4 Jul 2013 13:55:59 -0600
Subject: [PATCH 03/14] shuf: mention new --repetition option in NEWS

* NEWS: mention new shuf option.
---
 NEWS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/NEWS b/NEWS
index 75ec253..9736690 100644
--- a/NEWS
+++ b/NEWS
@@ -42,6 +42,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   csplit accepts a new option: --suppressed-matched, to elide the lines
   used to identify the split points.
 
+  shuf accepts a new option: --repetition (-r). Output random numbers in
+  a given range, with repetitions.
+
 ** Changes in behavior
 
   stdbuf now requires at least one buffering mode option to be specified,
-- 
1.8.3.2


>From bd97294c36dbbc4e0775a92c69bd53cbd0cdcef8 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Thu, 4 Jul 2013 14:31:06 -0600
Subject: [PATCH 04/14] shuf: document new --repetition option

* doc/coreutils.texi: mention --repetition, add examples.
---
 doc/coreutils.texi | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index b3233f6..817cd7e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -4945,6 +4945,15 @@ commands like @code{shuf -o F <F} and @code{cat F | shuf -o F}.
 Use @var{file} as a source of random data used to determine which
 permutation to generate.  @xref{Random sources}.
 
+@item -r
+@itemx --repetition
+@opindex -r
+@opindex --repetition
+@cindex writing random numbers with repetitions
+output random numbers with repetitions. Use @option{--input-range} to specify
+the range of output values. Use @option{--had-count} to specify count of output
+numbers (defaults to 1 if not specified).
+
 @zeroTerminatedOption
 
 @end table
@@ -5004,6 +5013,20 @@ general, if there are @var{n} input lines, there are @var{n}! (i.e.,
 @var{n} factorial, or @var{n} * (@var{n} - 1) * @dots{} * 1) possible
 output permutations.
 
+@noindent
+To output 50 random numbers between 0 and 9, use:
+
+@example
+shuf --repetition --input-range 0-9 --head-count 50
+@end example
+
+@noindent
+or (using short options):
+
+@example
+shuf --rep -i0-9 -n50
+@end example
+
 @exitstatus
 
 
-- 
1.8.3.2


>From 2445fe3a9db16f87a56f5f04f2afc62d27feb37e Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 11:24:54 -0600
Subject: [PATCH 05/14] shuf: enable --repetition on stdin/FILE/-e input

* src/shuf.c:
   usage(): update usage information.
   main(): enable repetition on input data from stdin.
   randint_choose_new(): allocate N random values.
     TODO: move this function to gnulib's randint.c.
* tests/misc/shuf.sh: remove obsolete test.
---
 src/shuf.c         | 44 +++++++++++++++++++++++++++-----------------
 tests/misc/shuf.sh |  4 ----
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/shuf.c b/src/shuf.c
index cdc3151..26d7da1 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -76,8 +76,9 @@ Write a random permutation of the input lines to standard output.\n\
   -n, --head-count=COUNT    output at most COUNT lines\n\
   -o, --output=FILE         write result to FILE instead of standard output\n\
       --random-source=FILE  get random bytes from FILE\n\
-  -r, --repetition          used with -iLO-HI, output COUNT random numbers\n\
-                            between LO and HI, with repetitions.\n\
+  -r, --repetitions         output COUNT values, with repetitions.\n\
+                            with -iLO-HI, output random numbers.\n\
+                            with -e, stdin or FILE, output random lines.\n\
                             count defaults to 1 if -n COUNT is not used.\n\
   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 "), stdout);
@@ -107,13 +108,25 @@ static struct option const long_opts[] =
   {"head-count", required_argument, NULL, 'n'},
   {"output", required_argument, NULL, 'o'},
   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
-  {"repetition", no_argument, NULL, 'r'},
+  {"repetitions", no_argument, NULL, 'r'},
   {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {0, 0, 0, 0},
 };
 
+/* Returns a newly allocated array of COUNT random elements,
+   between 0-(CHOICES-1) */
+static size_t *
+randint_choose_new (struct randint_source *src, size_t choices, size_t count)
+{
+  size_t i;
+  size_t *v = xnmalloc (count, sizeof *v);
+  for (i=0; i<count; ++i)
+    v[i] = randint_choose (src, choices);
+  return v;
+}
+
 static bool
 input_numbers_option_used (size_t lo_input, size_t hi_input)
 {
@@ -381,7 +394,7 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
-  while ((optc = getopt_long (argc, argv, "ei:n:o:z", long_opts, NULL)) != -1)
+  while ((optc = getopt_long (argc, argv, "ei:n:o:rz", long_opts, NULL)) != -1)
     switch (optc)
       {
       case 'e':
@@ -480,19 +493,9 @@ main (int argc, char **argv)
         }
       n_lines = hi_input - lo_input + 1;
       line = NULL;
-
-      /* When generating random numbers with repetitions,
-         the default count is one, unless specified by the user */
-      if (repetition && head_lines == SIZE_MAX)
-        head_lines = 1 ;
     }
   else
     {
-      if (repetition)
-        {
-          error (0, 0, _("--repetition requires --input-range"));
-          usage (EXIT_FAILURE);
-        }
       switch (n_operands)
         {
         case 0:
@@ -511,8 +514,8 @@ main (int argc, char **argv)
 
       fadvise (stdin, FADVISE_SEQUENTIAL);
 
-      if (head_lines != SIZE_MAX && (! head_lines
-                                     || input_size () > RESERVOIR_MIN_INPUT))
+      if (!repetition && head_lines != SIZE_MAX
+          && (! head_lines || input_size () > RESERVOIR_MIN_INPUT))
         {
           use_reservoir_sampling = true;
           n_lines = SIZE_MAX;   /* unknown number of input lines, for now.  */
@@ -524,6 +527,11 @@ main (int argc, char **argv)
         }
     }
 
+  /* When generating random numbers with repetitions,
+     the default count is one, unless specified by the user */
+  if (repetition && head_lines == SIZE_MAX)
+    head_lines = 1 ;
+
   if (!repetition)
     head_lines = MIN (head_lines, n_lines);
 
@@ -552,13 +560,15 @@ main (int argc, char **argv)
 
   if (!repetition)
     permutation = randperm_new (randint_source, head_lines, n_lines);
+  if (repetition && !input_numbers_option_used (lo_input, hi_input))
+    permutation = randint_choose_new (randint_source, n_lines, head_lines);
 
   if (outfile && ! freopen (outfile, "w", stdout))
     error (EXIT_FAILURE, errno, "%s", quotearg_colon (outfile));
 
   if (use_reservoir_sampling)
     i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
-  else if (repetition)
+  else if (repetition && input_numbers_option_used (lo_input, hi_input))
     i = write_random_numbers (randint_source, head_lines,
                               lo_input, hi_input, eolbyte);
   else
diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index 3cf39c3..bdf6645 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -67,10 +67,6 @@ fi
 
 # Test --repetition option
 
-# --repetition should fail without --input-range
-shuf --repetition &&
-  { fail=1; echo "--repetition should fail without range">&2 ; }
-
 # --repetition without count should return one line
 shuf --rep -i0-10 > exp || framework_failure_
 c=$(cat exp | wc -l) || framework_failure_
-- 
1.8.3.2


>From 77ad196b0ba203bdec9a2ef33f5f8a10b5643f1b Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 11:43:16 -0600
Subject: [PATCH 06/14] shuf: add tests for --repetition with STDIN

* tests/misc/shuf.sh: add more tests.
---
 tests/misc/shuf.sh | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index bdf6645..63dc764 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -79,7 +79,7 @@ test "$c" -eq 1000 || { fail=1; echo "--repetition with --count failed">&2 ; }
 
 # Check output values (this is not bullet-proof, but drawing 1000 values
 # between 0 and 9 should produce all values, unless there's a bug in shuf
-# or a very poor random source
+# or a very poor random source, or extremely bad luck)
 c=$(cat exp | sort -nu | fmt ) || framework_failure_
 test "$c" = "0 1 2 3 4 5 6 7 8 9" ||
   { fail=1; echo "--repetition produced bad output">&2 ; }
@@ -90,4 +90,42 @@ c=$(cat exp | sort -nu | fmt ) || framework_failure_
 test "$c" = "222 223 224 225 226 227 228 229 230 231 232 233" ||
  { fail=1; echo "--repetition produced bad output with non-zero low">&2 ; }
 
+# --repetition,-i,count=0 should not fail and produce no output
+shuf --rep -i0-9 -n0 > exp || framework_failure_
+# file size should be zero (no output from shuf)
+test \! -s exp ||
+  { fail=1; echo "--repetition,-i0-9,-n0 produced bad output">&2 ; }
+
+# --repetition with -e, without count, should return one line
+shuf --rep -e A B C D > exp || framework_failure_
+c=$(cat exp | wc -l) || framework_failure_
+test "$c" -eq 1 ||
+  { fail=1; echo "--repetition,-e default count is not 1">&2 ; }
+
+# --repetition with STDIN, without count, should return one line
+printf "A\nB\nC\nD\nE\n" | shuf --rep > exp || framework_failure_
+c=$(cat exp | wc -l) || framework_failure_
+test "$c" -eq 1 ||
+  { fail=1; echo "--repetition,STDIN default count is not 1">&2 ; }
+
+# --repetition with STDIN,count - can return move values than input lines
+printf "A\nB\nC\nD\nE\n" | shuf --rep -n2000 > exp || framework_failure_
+c=$(cat exp | wc -l) || framework_failure_
+test "$c" -eq 2000 ||
+  { fail=1; echo "--repetition,STDIN,count failed">&2 ; }
+
+# Check output values (this is not bullet-proof, but drawing 2000 values
+# between A and E should produce all values, unless there's a bug in shuf
+# or a very poor random source, or extremely bad luck)
+c=$(cat exp | sort -u | fmt ) || framework_failure_
+test "$c" = "A B C D E" ||
+  { fail=1; echo "--repetition,STDIN,count produced bad output">&2 ; }
+
+# --repetition,stdin,count=0 should not fail and produce no output
+printf "A\nB\nC\nD\nE\n" | shuf --rep -n0 > exp || framework_failure_
+# file size should be zero (no output from shuf)
+test \! -s exp ||
+  { fail=1; echo "--repetition,STDIN,-n0 produced bad output">&2 ; }
+
+
 Exit $fail
-- 
1.8.3.2


>From 9e14bf963eb27faed847a979677fb5f344c27362 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 11:55:57 -0600
Subject: [PATCH 07/14] shuf: document new --repetitions option

* NEWS: update wording.
* doc/coreutils.texi: update wording and add examples.
---
 NEWS               |  4 ++--
 doc/coreutils.texi | 30 ++++++++++++++++++++++--------
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/NEWS b/NEWS
index 9736690..542f5c5 100644
--- a/NEWS
+++ b/NEWS
@@ -42,8 +42,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   csplit accepts a new option: --suppressed-matched, to elide the lines
   used to identify the split points.
 
-  shuf accepts a new option: --repetition (-r). Output random numbers in
-  a given range, with repetitions.
+  shuf accepts a new option: --repetitions (-r). Output random values,
+  with repetitions.
 
 ** Changes in behavior
 
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 817cd7e..1ca4b95 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -4946,13 +4946,14 @@ Use @var{file} as a source of random data used to determine which
 permutation to generate.  @xref{Random sources}.
 
 @item -r
-@itemx --repetition
+@itemx --repetitions
 @opindex -r
-@opindex --repetition
-@cindex writing random numbers with repetitions
-output random numbers with repetitions. Use @option{--input-range} to specify
-the range of output values. Use @option{--had-count} to specify count of output
-numbers (defaults to 1 if not specified).
+@opindex --repetitions
+@cindex allowing repetitions in output values
+Changes the default behaviour of @command{shuf}, allowing repetition in
+output values (in which case, @option{--head-count} can be larger
+than the number of input values). If @option{--head-count} is not
+specified, outputs a single random value.
 
 @zeroTerminatedOption
 
@@ -5017,14 +5018,27 @@ output permutations.
 To output 50 random numbers between 0 and 9, use:
 
 @example
-shuf --repetition --input-range 0-9 --head-count 50
+shuf --repetitions --input-range 0-9 --head-count 50
 @end example
 
 @noindent
 or (using short options):
 
 @example
-shuf --rep -i0-9 -n50
+shuf -r -i0-9 -n50
+@end example
+
+@noindent
+To simulate 100 coin flips, use:
+
+@example
+shuf -r -n100 -e Head Tail
+@end example
+
+@noindent
+or
+@example
+printf "Head\nTail\n" | shuf -r -n100
 @end example
 
 @exitstatus
-- 
1.8.3.2


>From 99db4abc3cf14c0bba89f32a4e14887b7ff3bb25 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 12:55:31 -0600
Subject: [PATCH 08/14] shuf: code-cleanup

* src/shuf.c: replace input_numbers_option_used() with a local variable.
---
 src/shuf.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/shuf.c b/src/shuf.c
index 26d7da1..fc1a483 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -127,12 +127,6 @@ randint_choose_new (struct randint_source *src, size_t choices, size_t count)
   return v;
 }
 
-static bool
-input_numbers_option_used (size_t lo_input, size_t hi_input)
-{
-  return ! (lo_input == SIZE_MAX && hi_input == 0);
-}
-
 static void
 input_from_argv (char **operand, int n_operands, char eolbyte)
 {
@@ -366,6 +360,7 @@ int
 main (int argc, char **argv)
 {
   bool echo = false;
+  bool input_range = false;
   size_t lo_input = SIZE_MAX;
   size_t hi_input = 0;
   size_t head_lines = SIZE_MAX;
@@ -408,8 +403,9 @@ main (int argc, char **argv)
           char const *hi_optarg = optarg;
           bool invalid = !p;
 
-          if (input_numbers_option_used (lo_input, hi_input))
+          if (input_range)
             error (EXIT_FAILURE, 0, _("multiple -i options specified"));
+          input_range = true;
 
           if (p)
             {
@@ -478,13 +474,13 @@ main (int argc, char **argv)
 
   if (echo)
     {
-      if (input_numbers_option_used (lo_input, hi_input))
+      if (input_range)
         error (EXIT_FAILURE, 0, _("cannot combine -e and -i options"));
       input_from_argv (operand, n_operands, eolbyte);
       n_lines = n_operands;
       line = operand;
     }
-  else if (input_numbers_option_used (lo_input, hi_input))
+  else if (input_range)
     {
       if (n_operands)
         {
@@ -554,13 +550,13 @@ main (int argc, char **argv)
   /* Close stdin now, rather than earlier, so that randint_all_new
      doesn't have to worry about opening something other than
      stdin.  */
-  if (! (echo || input_numbers_option_used (lo_input, hi_input))
+  if (! (echo || input_range)
       && (fclose (stdin) != 0))
     error (EXIT_FAILURE, errno, _("read error"));
 
   if (!repetition)
     permutation = randperm_new (randint_source, head_lines, n_lines);
-  if (repetition && !input_numbers_option_used (lo_input, hi_input))
+  if (repetition && !input_range)
     permutation = randint_choose_new (randint_source, n_lines, head_lines);
 
   if (outfile && ! freopen (outfile, "w", stdout))
@@ -568,7 +564,7 @@ main (int argc, char **argv)
 
   if (use_reservoir_sampling)
     i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
-  else if (repetition && input_numbers_option_used (lo_input, hi_input))
+  else if (repetition && input_range)
     i = write_random_numbers (randint_source, head_lines,
                               lo_input, hi_input, eolbyte);
   else
-- 
1.8.3.2


>From 784fbd09cda531f6b820216f24a612b6db379500 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 13:06:50 -0600
Subject: [PATCH 09/14] shuf: add more tests

* tests/misc/shuf.sh: add tests for erroneous conditions.
---
 tests/misc/shuf.sh | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index 63dc764..5c70a73 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -65,6 +65,29 @@ if ! test -r unreadable; then
   shuf -n1 unreadable && fail=1
 fi
 
+# Multiple -n is accepted, should use the smallest value
+shuf -n10 -i0-9 -n3 -n20 > exp || framework_failure_
+c=$(cat exp | wc -l) || framework_failure_
+test "$c" -eq 3 || { fail=1; echo "Multiple -n failed">&2 ; }
+
+# Test error conditions
+
+# -i and -e must not be used together
+printf "" | shuf -i -e A B &&
+  { fail=1; echo "shuf did not detect erroneous -e and -i usage.">&2 ; }
+# Test invalid value for -n
+printf "" | shuf -nA &&
+  { fail=1; echo "shuf did not detect erroneous -n usage.">&2 ; }
+# Test multiple -i
+shuf -i0-9 -n10 -i8-90 &&
+  { fail=1; echo "shuf did not detect multiple -i usage.">&2 ; }
+# Test invalid range
+for ARG in "1" "A" "1-" "1-A" ;
+do
+  shuf -i$ARG &&
+    { fail=1; echo "shuf did not detect erroneous -i$ARG usage.">&2 ; }
+done
+
 # Test --repetition option
 
 # --repetition without count should return one line
-- 
1.8.3.2


>From 65ffa3b599210d7875f6375b55caea94f59b3cf4 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 13:29:17 -0600
Subject: [PATCH 10/14] shuf: refactor --repetition with stdin

* src/shuf.c: instead of pre-allocating random numbers, print lines
on-the-go.
randint_choose_new(): removed.
write_random_lines(): draw a random number, print the line.
---
 src/shuf.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/shuf.c b/src/shuf.c
index fc1a483..59012df 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -115,18 +115,6 @@ static struct option const long_opts[] =
   {0, 0, 0, 0},
 };
 
-/* Returns a newly allocated array of COUNT random elements,
-   between 0-(CHOICES-1) */
-static size_t *
-randint_choose_new (struct randint_source *src, size_t choices, size_t count)
-{
-  size_t i;
-  size_t *v = xnmalloc (count, sizeof *v);
-  for (i=0; i<count; ++i)
-    v[i] = randint_choose (src, choices);
-  return v;
-}
-
 static void
 input_from_argv (char **operand, int n_operands, char eolbyte)
 {
@@ -356,6 +344,24 @@ write_random_numbers (struct randint_source *s, size_t count,
   return 0;
 }
 
+static int
+write_random_lines (struct randint_source *s, size_t count,
+                    char *const *lines, size_t n_lines)
+{
+  size_t i;
+
+  for (i = 0; i < count; i++)
+    {
+      const randint j = randint_choose (s, n_lines);
+      char *const *p = lines + j;
+      size_t len = p[1] - p[0];
+      if (fwrite (p[0], sizeof *p[0], len, stdout) != len)
+        return -1;
+    }
+
+  return 0;
+}
+
 int
 main (int argc, char **argv)
 {
@@ -556,8 +562,6 @@ main (int argc, char **argv)
 
   if (!repetition)
     permutation = randperm_new (randint_source, head_lines, n_lines);
-  if (repetition && !input_range)
-    permutation = randint_choose_new (randint_source, n_lines, head_lines);
 
   if (outfile && ! freopen (outfile, "w", stdout))
     error (EXIT_FAILURE, errno, "%s", quotearg_colon (outfile));
@@ -567,6 +571,8 @@ main (int argc, char **argv)
   else if (repetition && input_range)
     i = write_random_numbers (randint_source, head_lines,
                               lo_input, hi_input, eolbyte);
+  else if (repetition && !input_range)
+    i = write_random_lines (randint_source, head_lines, line, n_lines);
   else
     i = write_permuted_output (head_lines, line, lo_input,
                                permutation, eolbyte);
-- 
1.8.3.2


>From ec42051663a8f3064d50f7c8dc90dd7c90c91dc4 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 13:34:06 -0600
Subject: [PATCH 11/14] shuf: refactor write_permuted_output()

* src/shuf.c: split write_permuted_output() into two smaller functions:
write_permuted_numbers() and write_permuted_lines().
Update main() accordingly.
---
 src/shuf.c | 48 +++++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/shuf.c b/src/shuf.c
index 59012df..e357dce 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -303,26 +303,34 @@ read_input (FILE *in, char eolbyte, char ***pline)
 }
 
 static int
-write_permuted_output (size_t n_lines, char *const *line, size_t lo_input,
-                       size_t const *permutation, char eolbyte)
+write_permuted_lines (size_t n_lines, char *const *line,
+                      size_t const *permutation)
 {
   size_t i;
 
-  if (line)
-    for (i = 0; i < n_lines; i++)
-      {
-        char *const *p = line + permutation[i];
-        size_t len = p[1] - p[0];
-        if (fwrite (p[0], sizeof *p[0], len, stdout) != len)
-          return -1;
-      }
-  else
-    for (i = 0; i < n_lines; i++)
-      {
-        unsigned long int n = lo_input + permutation[i];
-        if (printf ("%lu%c", n, eolbyte) < 0)
-          return -1;
-      }
+  for (i = 0; i < n_lines; i++)
+    {
+      char *const *p = line + permutation[i];
+      size_t len = p[1] - p[0];
+      if (fwrite (p[0], sizeof *p[0], len, stdout) != len)
+        return -1;
+    }
+
+  return 0;
+}
+
+static int
+write_permuted_numbers (size_t n_lines, size_t lo_input,
+                        size_t const *permutation, char eolbyte)
+{
+  size_t i;
+
+  for (i = 0; i < n_lines; i++)
+    {
+      unsigned long int n = lo_input + permutation[i];
+      if (printf ("%lu%c", n, eolbyte) < 0)
+        return -1;
+    }
 
   return 0;
 }
@@ -573,9 +581,11 @@ main (int argc, char **argv)
                               lo_input, hi_input, eolbyte);
   else if (repetition && !input_range)
     i = write_random_lines (randint_source, head_lines, line, n_lines);
+  else if (input_range)
+    i = write_permuted_numbers (head_lines, lo_input, permutation, eolbyte);
   else
-    i = write_permuted_output (head_lines, line, lo_input,
-                               permutation, eolbyte);
+    i = write_permuted_lines (head_lines, line, permutation);
+
   if (i != 0)
     error (EXIT_FAILURE, errno, _("write error"));
 
-- 
1.8.3.2


>From 64febd3eafb497b1dfc1ba61834c256a69741893 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 14:20:08 -0600
Subject: [PATCH 12/14] shuf: code cleanup

* src/shuf.c: add comments,
restructure main() output generation section.
---
 src/shuf.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/src/shuf.c b/src/shuf.c
index e357dce..15748a8 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -302,6 +302,10 @@ read_input (FILE *in, char eolbyte, char ***pline)
   return n_lines;
 }
 
+/* output 'n_lines' to stdout from 'line' array,
+   chosen by the indices in 'permutation'.
+   'permutation' and 'line' must have at least 'n_lines' elements.
+   strings in 'line' must include the line-terminator character. */
 static int
 write_permuted_lines (size_t n_lines, char *const *line,
                       size_t const *permutation)
@@ -319,6 +323,8 @@ write_permuted_lines (size_t n_lines, char *const *line,
   return 0;
 }
 
+/* output 'n_lines' of numbers to stdout, from 'permutation' array.
+   'permutation' must have at least 'n_lines' elements. */
 static int
 write_permuted_numbers (size_t n_lines, size_t lo_input,
                         size_t const *permutation, char eolbyte)
@@ -335,6 +341,8 @@ write_permuted_numbers (size_t n_lines, size_t lo_input,
   return 0;
 }
 
+/* output 'count' numbers to stdout, chosen randomly from range
+   lo_input to hi_input. */
 static int
 write_random_numbers (struct randint_source *s, size_t count,
                       size_t lo_input, size_t hi_input, char eolbyte)
@@ -352,6 +360,9 @@ write_random_numbers (struct randint_source *s, size_t count,
   return 0;
 }
 
+/* output 'count' lines to stdout from 'lines' array.
+   'lines' must have at least 'n_lines' element in it.
+   strings in 'line' must include the line-terminator character. */
 static int
 write_random_lines (struct randint_source *s, size_t count,
                     char *const *lines, size_t n_lines)
@@ -574,17 +585,25 @@ main (int argc, char **argv)
   if (outfile && ! freopen (outfile, "w", stdout))
     error (EXIT_FAILURE, errno, "%s", quotearg_colon (outfile));
 
-  if (use_reservoir_sampling)
-    i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
-  else if (repetition && input_range)
-    i = write_random_numbers (randint_source, head_lines,
-                              lo_input, hi_input, eolbyte);
-  else if (repetition && !input_range)
-    i = write_random_lines (randint_source, head_lines, line, n_lines);
-  else if (input_range)
-    i = write_permuted_numbers (head_lines, lo_input, permutation, eolbyte);
+  /* Generate output according to requested method */
+  if (repetition)
+    {
+      if (input_range)
+        i = write_random_numbers (randint_source, head_lines,
+                                  lo_input, hi_input, eolbyte);
+      else
+        i = write_random_lines (randint_source, head_lines, line, n_lines);
+    }
   else
-    i = write_permuted_lines (head_lines, line, permutation);
+    {
+      if (use_reservoir_sampling)
+        i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
+      else if (input_range)
+        i = write_permuted_numbers (head_lines, lo_input,
+                                    permutation, eolbyte);
+      else
+        i = write_permuted_lines (head_lines, line, permutation);
+    }
 
   if (i != 0)
     error (EXIT_FAILURE, errno, _("write error"));
-- 
1.8.3.2


>From 0f09af3214eeb2551420c4307c2e6f3a704e61b8 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 14:55:17 -0600
Subject: [PATCH 13/14] shuf: code clean-up

* src/shuf.c: main() re-organize argument processing.
---
 src/shuf.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/src/shuf.c b/src/shuf.c
index 15748a8..bfcb06d 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -497,41 +497,38 @@ main (int argc, char **argv)
   n_operands = argc - optind;
   operand = argv + optind;
 
+  /* Check invalid usage */
+  if (echo && input_range)
+    {
+      error (0, 0, _("cannot combine -e and -i options"));
+      usage (EXIT_FAILURE);
+    }
+  if ( (n_operands>0 && input_range)
+       || (!echo && !input_range && n_operands>=2))
+    {
+      error (0, 0, _("extra operand %s"), quote (operand[1]));
+      usage (EXIT_FAILURE);
+    }
+
+  /* Prepare input */
   if (echo)
     {
-      if (input_range)
-        error (EXIT_FAILURE, 0, _("cannot combine -e and -i options"));
       input_from_argv (operand, n_operands, eolbyte);
       n_lines = n_operands;
       line = operand;
     }
   else if (input_range)
     {
-      if (n_operands)
-        {
-          error (0, 0, _("extra operand %s"), quote (operand[0]));
-          usage (EXIT_FAILURE);
-        }
       n_lines = hi_input - lo_input + 1;
       line = NULL;
     }
   else
     {
-      switch (n_operands)
-        {
-        case 0:
-          break;
-
-        case 1:
+      /* Input file specified, re-open it as STDIN */
+      if (n_operands==1)
           if (! (STREQ (operand[0], "-") || ! head_lines
                  || freopen (operand[0], "r", stdin)))
             error (EXIT_FAILURE, errno, "%s", operand[0]);
-          break;
-
-        default:
-          error (0, 0, _("extra operand %s"), quote (operand[1]));
-          usage (EXIT_FAILURE);
-        }
 
       fadvise (stdin, FADVISE_SEQUENTIAL);
 
-- 
1.8.3.2


>From 5ba2828e72f6d276fc349f69824cd6cb626053a4 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <assafgor...@gmail.com>
Date: Fri, 5 Jul 2013 14:59:44 -0600
Subject: [PATCH 14/14] shuf: add tests for more erroneous usage

* test/misc/shuf.sh: test multiple '-o' and '--random-source'.
---
 tests/misc/shuf.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index 5c70a73..ecdfba7 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -87,6 +87,12 @@ do
   shuf -i$ARG &&
     { fail=1; echo "shuf did not detect erroneous -i$ARG usage.">&2 ; }
 done
+# multiple -o are forbidden
+shuf -i0-9 -o A -o B &&
+  { fail=1; echo "shuf did not detect erroneous multiple -o usage.">&2 ; }
+# multiple random-sources are forbidden
+shuf -i0-9 --random-source A --random-source B &&
+  { fail=1; echo "shuf did not detect multiple --random-source usage.">&2 ; }
 
 # Test --repetition option
 
-- 
1.8.3.2

Reply via email to