Hello Pádraig and all,

On 01/08/2016 11:56 AM, Pádraig Brady wrote:
[...]
Possible additions to this class:

   nl (N/A as primarily text rather than record oriented)
   numfmt (ditto)
   expand (ditto)
   unexpand (ditto)


Attached similarly structured patch adding -z to numfmt (it does not include a 
NEWS entry, yet).

an open question:
With -z, do embedded newlines count as whitespace/field delimiters ?
(not sure if this applies to other programs).

For example:

   $ printf "A B\tC\nD 1000\x00"

Should the newline count as whitespace/field delimiter (since numfmt defaults 
to whitespace delimiters) ?
If so, the "1000" should be the fifth field.
If not, the "1000" should be in the fourth field (and "C\nD" cound as one 
field).

Currently, because the numfmt code uses "isblank()", newlines DO NOT count as 
whitespace:

    $ printf "A B\tC\nD 1000\x00" | ./src/numfmt -z --to=si --field=4 | od -a
    0000000   A  sp   B  sp   C  nl   D  sp   1   .   0   K nul
    0000015

====

Also,
Two minor questions:

1. If null-terminated test fail due to incorrect output, the log will contain:
    numfmt.pl: test z4: stdout mismatch, comparing z4.2 (expected) and z4.O 
(actual)
    Binary files z4.2 and z4.O differ

This will make it hard for users to send us bug reports.
Perhaps it's worth thinking about how to display a diff even for 
null-terminated lines (not sure how best to approach this).


2. In the patch for "wc", the long-form of the parameter (for getopt_long) is "zero" 
instead of "zero-terminated" - is that intentional ?

===

comments welcomed.

 - assaf




>From 09b653bf2162d5b18e5e60bf4931b4cf89629b25 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <[email protected]>
Date: Fri, 8 Jan 2016 13:55:12 -0500
Subject: [PATCH] numfmt: add the -z,--zero-terminated option

TODO: do embedded NLs count as whitespace with -z ?

* doc/coreutils.texi (numfmt invocation): Reference the description.
* src/numfmt.c: Parameterize '\n' references.
* tests/misc/numfmt.pl: Add tests for character and field processing.
---
 doc/coreutils.texi   |  2 ++
 src/numfmt.c         | 27 +++++++++++++++++++++------
 tests/misc/numfmt.pl | 31 +++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 771418e..e6d501b 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -17168,6 +17168,8 @@ the output numbers represent other units (e.g. to 
represent @samp{4,000,000}
 bytes in blocks of 1KB, use @samp{--to=si --to-unit=1000}).
 Suffixes are handled as with @samp{--from=auto}.
 
+@optZeroTerminated
+
 @end table
 
 @subsection Possible @var{unit}s:
diff --git a/src/numfmt.c b/src/numfmt.c
index 5d38cbd..3d5dde8 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -147,6 +147,7 @@ static struct option const longopts[] =
   {"header", optional_argument, NULL, HEADER_OPTION},
   {"format", required_argument, NULL, FORMAT_OPTION},
   {"invalid", required_argument, NULL, INVALID_OPTION},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -189,8 +190,13 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
 /* auto-pad each line based on skipped whitespace.  */
 static int auto_padding = 0;
 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
+
+/* field delimiter */
 static int delimiter = DELIMITER_DEFAULT;
 
+/* line delimiter.  */
+static unsigned char line_delim = '\n';
+
 /* if non-zero, the first 'header' lines from STDIN are skipped.  */
 static uintmax_t header = 0;
 
@@ -205,6 +211,7 @@ static int decimal_point_length;
 /* debugging for developers.  Enables devmsg().  */
 static bool dev_debug = false;
 
+
 static inline int
 default_scale_base (enum scale_type scale)
 {
@@ -934,7 +941,9 @@ Reformat NUMBER(s), or the numbers from standard input if 
none are specified.\n\
       fputs (_("\
       --to-unit=N      the output unit size (instead of the default 1)\n\
 "), stdout);
-
+      fputs (_("\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
+"), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 
@@ -1420,7 +1429,7 @@ process_line (char *line, bool newline)
   }
 
   if (newline)
-    putchar ('\n');
+    putchar (line_delim);
 
   return valid_number;
 }
@@ -1451,7 +1460,7 @@ main (int argc, char **argv)
 
   while (true)
     {
-      int c = getopt_long (argc, argv, "d:", longopts, NULL);
+      int c = getopt_long (argc, argv, "zd:", longopts, NULL);
 
       if (c == -1)
         break;
@@ -1512,6 +1521,10 @@ main (int argc, char **argv)
           delimiter = optarg[0];
           break;
 
+       case 'z':
+         line_delim = '\0' ;
+         break;
+
         case SUFFIX_OPTION:
           suffix = optarg;
           break;
@@ -1599,12 +1612,14 @@ main (int argc, char **argv)
       size_t line_allocated = 0;
       ssize_t len;
 
-      while (header-- && getline (&line, &line_allocated, stdin) > 0)
+      while (header-- && getdelim (&line, &line_allocated,
+                                  line_delim, stdin) > 0)
         fputs (line, stdout);
 
-      while ((len = getline (&line, &line_allocated, stdin)) > 0)
+      while ((len = getdelim (&line, &line_allocated,
+                             line_delim, stdin)) > 0)
         {
-          bool newline = line[len - 1] == '\n';
+          bool newline = line[len - 1] == line_delim;
           if (newline)
             line[len - 1] = '\0';
           valid_numbers &= process_line (line, newline);
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index 451bb34..1b4edc3 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -804,6 +804,34 @@ my @Tests =
              {EXIT => 2}],
     );
 
+# test null-terminated lines
+my @NullDelim_Tests =
+  (
+     # Input from STDIN
+     ['z1', '-z --to=iec',
+             {IN_PIPE => "1025\x002048\x00"}, {OUT=>"1.1K\x002.0K\x00"}],
+
+     # Input from the commandline - terminated by NULL vs NL
+     ['z3', '   --to=iec 1024',  {OUT=>"1.0K\n"}],
+     ['z2', '-z --to=iec 1024',  {OUT=>"1.0K\x00"}],
+
+     # Input from STDIN, with fields
+     ['z4', '-z --field=3 --to=si',
+             {IN_PIPE => "A B 1001 C\x00" .
+                         "D E 2002 F\x00"},
+             {OUT => "A B 1.1K C\x00" .
+                     "D E 2.1K F\x00"}],
+
+     # Input from STDIN, with fields and embedded NL
+     # TODO: FIX THIS: does NL count as whitespace with '-z' ?
+     #       currently, due to 'isblank()' it does not, and this test fails.
+     ['z5', '-z --field=3 --to=si',
+             {IN_PIPE => "A\nB 1001 C\x00" .
+                         "D E\n2002 F\x00"},
+             {OUT => "A B 1.1K C\x00" .
+                     "D E 2.1K F\x00"}],
+  );
+
 my @Limit_Tests =
   (
      # Large Values
@@ -1080,6 +1108,9 @@ foreach $t (@Tests)
       }
   }
 
+# Add test for null-terminated lines (after adjusting the OUT string, above).
+push @Tests, @NullDelim_Tests;
+
 my $save_temps = $ENV{SAVE_TEMPS};
 my $verbose = $ENV{VERBOSE};
 
-- 
1.9.1

Reply via email to