bug#17505: bug#22277: 'dd' - stats are not what expected

Pádraig Brady Fri, 01 Jan 2016 16:12:47 -0800

On 31/12/15 10:18, Pádraig Brady wrote:
> unarchive 17505
> forcemerge 17505 22277
> stop
> 
> On 31/12/15 01:11, Mike Fiedler wrote:
>>  
>> Hi,
>>  
>> I ran one of my favorite utilities 'dd' again this evening, this time with 
>> bs=1G ( IEC ) - I usually do 1M but this time I dealt with more data to be 
>> copied...
>> I had to copy about 215 GiB of data from one to another drive ( offset 215 
>> GiB was about the end of the last partition ).
>> So I did:
>>  
>> $  dd if=/dev/sdb of=/dev/sda bs=*1G* count=*222*
>> 222+0 records in
>> 222+0 records out
>> 238370684928 bytes (*238 GB*) copied, 1275.03 s, 187 MB/s
>>
>> When it finished, I got a bit confused, and I asked myself a question if the 
>> data I requested did really get copied..  of course it did, but I was not 
>> expecting 238 GB to be shown.
>> To make sure I calculated the 512 byte sector end number out of the 
>> 238370684928 bytes 'dd' result and compared it with the output of fdisk 
>> showing the last sector of the last partition... I was fine.
>>  
>> I think, and many others have a same opinion, 1kB = 1000B, etc, should be 
>> banned from use in the IT world, and banned from use by the sales people.
>>  
>> The point is, as you probably noticed, if dd is told to use IEC, let's stick 
>> to IEC and not get the results in whatever artificial decimal crap....
>> It can not only confuse, but utility like 'dd' should be 100% specific about 
>> handling the units, and there should be not a bit of doubt when it spits out 
>> the results.
>> If I would use 1K in this case, I would not notice the difference - my brain 
>> is simply too simple, and small, but 1G should at least result in displaying 
>> 222 GiB and for sure not GB.
> 
> I have to agree, and this has come up a few times now.
> 
> The number in brackets is not exact and informational for human consumption,
> so we should make an effort to be less confusing.
> There was a proposed patch at:
> http://debbugs.gnu.org/cgi/bugreport.cgi?bug=17505#37
> which auto determines the appropriate base from the amount output,
> to output the number with the least amount of info loss.
> 
> There were some issues noted with that,
> but IMHO they were lesser than the current issue.
> 
> We will have to be careful to not corrupt output
> when switching with status=progress (due to possibly shorter status line).
> 
> I'll have another look.


The attached auto sets the units.
For status=progress this is done based on output block size,
for the final transfer stats, it's done based on the transferred byte count.

cheers,
Pádraig.

>From 5885398ed2252af1ce433232e00933f15e0a318f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Fri, 16 May 2014 10:32:43 +0100
Subject: [PATCH] dd: output base 1024 transfer counts in IEC units

* src/dd.c (human_size): Add "base" and "from/to block size" params
so we can reuse this function for all output size conversions.
(print_xfer_stats): Use IEC units for status=progress output
when the obs is a multiple of 2 and not a multiple of 10.
Also use IEC units for the final status summary when the
number of output bytes is a power of 1024, and not a power of 1000.
* tests/dd/stats.sh: Update to test status units.
* NEWS: Mention the change in behavior.
Fixes http://bugs.gnu.org/17505
---
 NEWS              |  2 ++
 src/dd.c          | 44 +++++++++++++++++++++++++++++---------------
 tests/dd/stats.sh | 10 +++++++---
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/NEWS b/NEWS
index 5080f0f..f7d219e 100644
--- a/NEWS
+++ b/NEWS
@@ -45,6 +45,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   date --iso-8601 now uses +00:00 timezone format rather than +0000.
   The standard states to use this "extended" format throughout a timestamp.
 
+  dd now uses IEC units if appropriate for the human readable byte count status.
+
   df now prefers sources towards the root of a device when
   eliding duplicate bind mounted entries.
 
diff --git a/src/dd.c b/src/dd.c
index a5557a8..870e938 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -674,13 +674,23 @@ Options are:\n\
 }
 
 static char *
-human_size (size_t n)
+human_size (size_t n, size_t base, uintmax_t from_bs, uintmax_t to_bs)
 {
   static char hbuf[LONGEST_HUMAN_READABLE + 1];
+
   int human_opts =
-    (human_autoscale | human_round_to_nearest | human_base_1024
+    (human_autoscale | human_round_to_nearest
      | human_space_before_unit | human_SI | human_B);
-  return human_readable (n, hbuf, human_opts, 1, 1);
+
+  if (! base)
+    {
+      if ((n % 1000) && ! (n % 1024))
+        human_opts |= human_base_1024;
+    }
+  else if (base == 1024)
+    human_opts |= human_base_1024;
+
+  return human_readable (n, hbuf, human_opts, from_bs, to_bs);
 }
 
 /* Ensure input buffer IBUF is allocated.  */
@@ -695,7 +705,8 @@ alloc_ibuf (void)
   if (!real_buf)
     error (EXIT_FAILURE, 0,
            _("memory exhausted by input buffer of size %"PRIuMAX" bytes (%s)"),
-           (uintmax_t) input_blocksize, human_size (input_blocksize));
+           (uintmax_t) input_blocksize,
+           human_size (input_blocksize, 1024, 1, 1));
 
   real_buf += SWAB_ALIGN_OFFSET;	/* allow space for swab */
 
@@ -718,7 +729,8 @@ alloc_obuf (void)
         error (EXIT_FAILURE, 0,
                _("memory exhausted by output buffer of size %"PRIuMAX
                  " bytes (%s)"),
-               (uintmax_t) output_blocksize, human_size (output_blocksize));
+               (uintmax_t) output_blocksize,
+               human_size (output_blocksize, 1024, 1, 1));
       obuf = ptr_align (real_obuf, page_size);
     }
   else
@@ -749,16 +761,19 @@ multiple_bits_set (int i)
 /* Print transfer statistics.  */
 
 static void
-print_xfer_stats (xtime_t progress_time) {
-  char hbuf[LONGEST_HUMAN_READABLE + 1];
-  int human_opts =
-    (human_autoscale | human_round_to_nearest
-     | human_space_before_unit | human_SI | human_B);
+print_xfer_stats (xtime_t progress_time)
+{
   double delta_s;
   char const *bytes_per_second;
+  size_t w_bytes_base = 0; /* auto for final stats.  */
 
   if (progress_time)
-    fputc ('\r', stderr);
+    {
+      fputc ('\r', stderr);
+      w_bytes_base = 1000;
+      if ((output_blocksize % 10) && ! (output_blocksize % 2))
+        w_bytes_base = 1024;
+    }
 
   /* Use integer arithmetic to compute the transfer rate,
      since that makes it easy to use SI abbreviations.  */
@@ -767,8 +782,7 @@ print_xfer_stats (xtime_t progress_time) {
            ngettext ("%"PRIuMAX" byte (%s) copied",
                      "%"PRIuMAX" bytes (%s) copied",
                      select_plural (w_bytes)),
-           w_bytes,
-           human_readable (w_bytes, hbuf, human_opts, 1, 1));
+           w_bytes, human_size (w_bytes, w_bytes_base, 1, 1));
 
   xtime_t now = progress_time ? progress_time : gethrxtime ();
 
@@ -778,8 +792,8 @@ print_xfer_stats (xtime_t progress_time) {
       uintmax_t delta_xtime = now;
       delta_xtime -= start_time;
       delta_s = delta_xtime / XTIME_PRECISIONe0;
-      bytes_per_second = human_readable (w_bytes, hbuf, human_opts,
-                                         XTIME_PRECISION, delta_xtime);
+      bytes_per_second = human_size (w_bytes, 1000, XTIME_PRECISION,
+                                     delta_xtime);
     }
   else
     {
diff --git a/tests/dd/stats.sh b/tests/dd/stats.sh
index da2c2d2..7b7c856 100755
--- a/tests/dd/stats.sh
+++ b/tests/dd/stats.sh
@@ -63,11 +63,15 @@ for open in '' '1'; do
   grep '250000000 bytes .* copied' err || { cat err; fail=1; }
 done
 
+# Check progress is output, and check that the correct
+# IEC/SI units are used both for the progress and final status output.
 progress_output()
 {
-  { sleep "$1"; echo 1; } | dd bs=1 status=progress of=/dev/null 2>err
-  # Progress output should be for "byte ... copied", while final is "bytes ..."
-  grep 'byte .* copied' err
+  { sleep "$1"; dd status=none bs=1024 count=1 if=/dev/zero; } |
+  dd bs=1000 of=/dev/null status=progress 2>err
+
+  grep -F '1000 bytes (1.0 kB) copied' err &&  # Progress line
+  grep -F '1024 bytes (1.0 KiB) copied' err    # Final status line
 }
 retry_delay_ progress_output 1 4 || { cat err; fail=1; }
 
-- 
2.5.0

bug#17505: bug#22277: 'dd' - stats are not what expected

Reply via email to