On 17/03/11 07:24, Mike Frysinger wrote:
> On Wednesday, March 16, 2011 19:55:56 Pádraig Brady wrote:
>> On 16/03/11 19:18, Mike Frysinger wrote:
>>> well, in the bug report i was working with, we were seeing data loss.  i
>>> wonder if it'd be possible to detect the fs/kernel version and error out
>>> when versions are found that are known to be broken ?
>>
>> That was a different issue.
>>
>> It would be nice, but I don't think it would be practical to try and detect
>> and work around such file system issues in cp.
>>
>> There are always going to be teething problems with a large
>> body of new logic, so I think the best approach with file systems
>> is to increase trust in the gradually over time.
> 
> while this is true, i'd understand if the issue were bugs in coreutils' cp.
> they'd get fixed and a new release made, no problem.  but we're talking about
> silent errors in the kernel, so anyone running a newer coreutils with a kernel
> from two days ago is going hit issues.  if we were looking at basic read/write
> functionality, i'd understand not bothering, but we're talking purely about
> optimization paths in coreutils' cp.
> 
> on the Gentoo side, i guess i'll run with a hack like so:
> 
> --- a/src/copy.c
> +++ b/src/copy.c
> @@ -22,6 +22,7 @@
>  #include <sys/ioctl.h>
>  #include <sys/types.h>
>  #include <selinux/selinux.h>
> +#include <sys/utsname.h>
>  
>  #if HAVE_HURD_H
>  # include <hurd.h>
> @@ -930,7 +931,32 @@ copy_reg (char const *src_name, char const *dst_name,
>               the file is a hole.  */
>            if (x->sparse_mode == SPARSE_AUTO && S_ISREG (src_open_sb.st_mode)
>                && ST_NBLOCKS (src_open_sb) < src_open_sb.st_size / 
> ST_NBLOCKSIZE)
> -            make_holes = true;
> +            {
> +              make_holes = true;
> +
> +# ifdef __linux__
> +              static int safe_kernel = -1;
> +
> +              if (safe_kernel == -1)
> +                {
> +                  struct utsname name;
> +
> +                  safe_kernel = 1;
> +
> +                  if (uname (&name) != -1 && strncmp (name.release, "2.6.", 
> 4) == 0)
> +                    {
> +                      int kver = atoi (name.release + 4);
> +
> +                      /* ext4 & btrfs are known to be broken */
> +                      if (kver < 38)
> +                        safe_kernel = 0;
> +                    }
> +                }
> +
> +              if (safe_kernel == 0)
> +                make_holes = false;
> +# endif
> +            }
>  #endif
>          }
>  
> -mike

I'm going to use this "2.6.38" check to only enable FIEMAP_FLAG_SYNC
before Linux kernel 2.6.38.  It's always worth avoiding sync if possible.
Proposed patch attached.
I'll submit my 3 outstanding fiemap patches tomorrow.

cheers,
Pádraig.
>From d2e62bb40017571baf12727c942fb9f131f8c951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Wed, 30 Mar 2011 22:50:05 +0100
Subject: [PATCH] copy: with fiemap copy, only sync when needed

* src/extent-scan.h (struct extent_scan): Add the fm_flags member to
pass to the fiemap scan.
* src/extent-scan.c (extent_need_sync): A new function used to
detect Linux kernels before 2.6.38.
(extent_scan_init): Add FIEMAP_FLAG_SYNC when needed.
* tests/cp/sparse-fiemap: Adjust comment.
* NEWS: Mention the change in behavior.
Indirectly suggested by Mike Frysinger
---
 NEWS                   |    4 ++++
 src/extent-scan.c      |   32 +++++++++++++++++++++++++++++++-
 src/extent-scan.h      |    3 +++
 tests/cp/sparse-fiemap |    4 ++--
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index 9c4a16f..d1020cb 100644
--- a/NEWS
+++ b/NEWS
@@ -31,6 +31,10 @@ GNU coreutils NEWS                                    -*- outline -*-
   bytes from the input, and also can efficiently create a hole in the
   output file when --sparse=always is specified.
 
+  cp now avoids syncing files when possible, when doing a FIEMAP copy.
+  The sync in only needed on Linux kernels before 2.6.38.
+  [The sync was introduced in coreutils-8.10]
+
 ** New features
 
   dd now accepts the 'nocache' flag to the iflag and oflag options,
diff --git a/src/extent-scan.c b/src/extent-scan.c
index 24b56b8..b563363 100644
--- a/src/extent-scan.c
+++ b/src/extent-scan.c
@@ -20,15 +20,44 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/ioctl.h>
+#include <sys/utsname.h>
 #include <assert.h>
 
 #include "system.h"
 #include "extent-scan.h"
+#include "xstrtol.h"
 
 #ifndef HAVE_FIEMAP
 # include "fiemap.h"
 #endif
 
+/* Work around Linux kernel issues on BTRFS and EXT4 before 2.6.38.  */
+static bool
+extent_need_sync (void)
+{
+  static int need_sync = -1;
+
+  if (need_sync == -1)
+    {
+      struct utsname name;
+      need_sync = 0; /* No workaround by default.  */
+
+#ifdef __linux__
+      if (uname (&name) != -1 && strncmp (name.release, "2.6.", 4) == 0)
+        {
+           unsigned long val;
+           if (xstrtoul (name.release + 4, NULL, 0, &val, NULL) == LONGINT_OK)
+             {
+               if (val < 38)
+                 need_sync = 1;
+             }
+        }
+#endif
+    }
+
+  return need_sync;
+}
+
 /* Allocate space for struct extent_scan, initialize the entries if
    necessary and return it as the input argument of extent_scan_read().  */
 extern void
@@ -39,6 +68,7 @@ extent_scan_init (int src_fd, struct extent_scan *scan)
   scan->scan_start = 0;
   scan->initial_scan_failed = false;
   scan->hit_final_extent = false;
+  scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
 }
 
 #ifdef __linux__
@@ -62,7 +92,7 @@ extent_scan_read (struct extent_scan *scan)
   memset (&fiemap_buf, 0, sizeof fiemap_buf);
 
   fiemap->fm_start = scan->scan_start;
-  fiemap->fm_flags = FIEMAP_FLAG_SYNC;
+  fiemap->fm_flags = scan->fm_flags;
   fiemap->fm_extent_count = count;
   fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
 
diff --git a/src/extent-scan.h b/src/extent-scan.h
index 4724b25..8728515 100644
--- a/src/extent-scan.h
+++ b/src/extent-scan.h
@@ -41,6 +41,9 @@ struct extent_scan
   /* Next scan start offset.  */
   off_t scan_start;
 
+  /* Flags to use for scan.  */
+  uint32_t fm_flags;
+
   /* How many extent info returned for a scan.  */
   uint32_t ei_count;
 
diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap
index 73448ec..bac2890 100755
--- a/tests/cp/sparse-fiemap
+++ b/tests/cp/sparse-fiemap
@@ -69,8 +69,8 @@ for i in $(seq 1 2 21); do
           -e 'for (1..'$j') { sysseek (*F, $n, 1)' \
           -e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > j1 || fail=1
 
-    # Note there is an implicit sync performed by cp to
-    # work around bugs in EXT4 and BTRFS before Linux 2.6.38
+    # Note there is an implicit sync performed by cp on Linux kernels
+    # before 2.6.38 to work around bugs in EXT4 and BTRFS.
     # Note also the -s parameter to the filefrag commands below
     # for the same reasons.
     cp --sparse=always j1 j2 || fail=1
-- 
1.7.4

Reply via email to