On 17/03/11 07:24, Mike Frysinger wrote: > On Wednesday, March 16, 2011 19:55:56 Pádraig Brady wrote: >> On 16/03/11 19:18, Mike Frysinger wrote: >>> well, in the bug report i was working with, we were seeing data loss. i >>> wonder if it'd be possible to detect the fs/kernel version and error out >>> when versions are found that are known to be broken ? >> >> That was a different issue. >> >> It would be nice, but I don't think it would be practical to try and detect >> and work around such file system issues in cp. >> >> There are always going to be teething problems with a large >> body of new logic, so I think the best approach with file systems >> is to increase trust in the gradually over time. > > while this is true, i'd understand if the issue were bugs in coreutils' cp. > they'd get fixed and a new release made, no problem. but we're talking about > silent errors in the kernel, so anyone running a newer coreutils with a kernel > from two days ago is going hit issues. if we were looking at basic read/write > functionality, i'd understand not bothering, but we're talking purely about > optimization paths in coreutils' cp. > > on the Gentoo side, i guess i'll run with a hack like so: > > --- a/src/copy.c > +++ b/src/copy.c > @@ -22,6 +22,7 @@ > #include <sys/ioctl.h> > #include <sys/types.h> > #include <selinux/selinux.h> > +#include <sys/utsname.h> > > #if HAVE_HURD_H > # include <hurd.h> > @@ -930,7 +931,32 @@ copy_reg (char const *src_name, char const *dst_name, > the file is a hole. */ > if (x->sparse_mode == SPARSE_AUTO && S_ISREG (src_open_sb.st_mode) > && ST_NBLOCKS (src_open_sb) < src_open_sb.st_size / > ST_NBLOCKSIZE) > - make_holes = true; > + { > + make_holes = true; > + > +# ifdef __linux__ > + static int safe_kernel = -1; > + > + if (safe_kernel == -1) > + { > + struct utsname name; > + > + safe_kernel = 1; > + > + if (uname (&name) != -1 && strncmp (name.release, "2.6.", > 4) == 0) > + { > + int kver = atoi (name.release + 4); > + > + /* ext4 & btrfs are known to be broken */ > + if (kver < 38) > + safe_kernel = 0; > + } > + } > + > + if (safe_kernel == 0) > + make_holes = false; > +# endif > + } > #endif > } > > -mike
I'm going to use this "2.6.38" check to only enable FIEMAP_FLAG_SYNC before Linux kernel 2.6.38. It's always worth avoiding sync if possible. Proposed patch attached. I'll submit my 3 outstanding fiemap patches tomorrow. cheers, Pádraig.
>From d2e62bb40017571baf12727c942fb9f131f8c951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com> Date: Wed, 30 Mar 2011 22:50:05 +0100 Subject: [PATCH] copy: with fiemap copy, only sync when needed * src/extent-scan.h (struct extent_scan): Add the fm_flags member to pass to the fiemap scan. * src/extent-scan.c (extent_need_sync): A new function used to detect Linux kernels before 2.6.38. (extent_scan_init): Add FIEMAP_FLAG_SYNC when needed. * tests/cp/sparse-fiemap: Adjust comment. * NEWS: Mention the change in behavior. Indirectly suggested by Mike Frysinger --- NEWS | 4 ++++ src/extent-scan.c | 32 +++++++++++++++++++++++++++++++- src/extent-scan.h | 3 +++ tests/cp/sparse-fiemap | 4 ++-- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 9c4a16f..d1020cb 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,10 @@ GNU coreutils NEWS -*- outline -*- bytes from the input, and also can efficiently create a hole in the output file when --sparse=always is specified. + cp now avoids syncing files when possible, when doing a FIEMAP copy. + The sync in only needed on Linux kernels before 2.6.38. + [The sync was introduced in coreutils-8.10] + ** New features dd now accepts the 'nocache' flag to the iflag and oflag options, diff --git a/src/extent-scan.c b/src/extent-scan.c index 24b56b8..b563363 100644 --- a/src/extent-scan.c +++ b/src/extent-scan.c @@ -20,15 +20,44 @@ #include <stdio.h> #include <sys/types.h> #include <sys/ioctl.h> +#include <sys/utsname.h> #include <assert.h> #include "system.h" #include "extent-scan.h" +#include "xstrtol.h" #ifndef HAVE_FIEMAP # include "fiemap.h" #endif +/* Work around Linux kernel issues on BTRFS and EXT4 before 2.6.38. */ +static bool +extent_need_sync (void) +{ + static int need_sync = -1; + + if (need_sync == -1) + { + struct utsname name; + need_sync = 0; /* No workaround by default. */ + +#ifdef __linux__ + if (uname (&name) != -1 && strncmp (name.release, "2.6.", 4) == 0) + { + unsigned long val; + if (xstrtoul (name.release + 4, NULL, 0, &val, NULL) == LONGINT_OK) + { + if (val < 38) + need_sync = 1; + } + } +#endif + } + + return need_sync; +} + /* Allocate space for struct extent_scan, initialize the entries if necessary and return it as the input argument of extent_scan_read(). */ extern void @@ -39,6 +68,7 @@ extent_scan_init (int src_fd, struct extent_scan *scan) scan->scan_start = 0; scan->initial_scan_failed = false; scan->hit_final_extent = false; + scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0; } #ifdef __linux__ @@ -62,7 +92,7 @@ extent_scan_read (struct extent_scan *scan) memset (&fiemap_buf, 0, sizeof fiemap_buf); fiemap->fm_start = scan->scan_start; - fiemap->fm_flags = FIEMAP_FLAG_SYNC; + fiemap->fm_flags = scan->fm_flags; fiemap->fm_extent_count = count; fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start; diff --git a/src/extent-scan.h b/src/extent-scan.h index 4724b25..8728515 100644 --- a/src/extent-scan.h +++ b/src/extent-scan.h @@ -41,6 +41,9 @@ struct extent_scan /* Next scan start offset. */ off_t scan_start; + /* Flags to use for scan. */ + uint32_t fm_flags; + /* How many extent info returned for a scan. */ uint32_t ei_count; diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 73448ec..bac2890 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -69,8 +69,8 @@ for i in $(seq 1 2 21); do -e 'for (1..'$j') { sysseek (*F, $n, 1)' \ -e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > j1 || fail=1 - # Note there is an implicit sync performed by cp to - # work around bugs in EXT4 and BTRFS before Linux 2.6.38 + # Note there is an implicit sync performed by cp on Linux kernels + # before 2.6.38 to work around bugs in EXT4 and BTRFS. # Note also the -s parameter to the filefrag commands below # for the same reasons. cp --sparse=always j1 j2 || fail=1 -- 1.7.4