On 17/03/11 07:24, Mike Frysinger wrote:
> On Wednesday, March 16, 2011 19:55:56 Pádraig Brady wrote:
>> On 16/03/11 19:18, Mike Frysinger wrote:
>>> well, in the bug report i was working with, we were seeing data loss. i
>>> wonder if it'd be possible to detect the fs/kernel version and error out
>>> when versions are found that are known to be broken ?
>>
>> That was a different issue.
>>
>> It would be nice, but I don't think it would be practical to try and detect
>> and work around such file system issues in cp.
>>
>> There are always going to be teething problems with a large
>> body of new logic, so I think the best approach with file systems
>> is to increase trust in the gradually over time.
>
> while this is true, i'd understand if the issue were bugs in coreutils' cp.
> they'd get fixed and a new release made, no problem. but we're talking about
> silent errors in the kernel, so anyone running a newer coreutils with a kernel
> from two days ago is going hit issues. if we were looking at basic read/write
> functionality, i'd understand not bothering, but we're talking purely about
> optimization paths in coreutils' cp.
>
> on the Gentoo side, i guess i'll run with a hack like so:
>
> --- a/src/copy.c
> +++ b/src/copy.c
> @@ -22,6 +22,7 @@
> #include <sys/ioctl.h>
> #include <sys/types.h>
> #include <selinux/selinux.h>
> +#include <sys/utsname.h>
>
> #if HAVE_HURD_H
> # include <hurd.h>
> @@ -930,7 +931,32 @@ copy_reg (char const *src_name, char const *dst_name,
> the file is a hole. */
> if (x->sparse_mode == SPARSE_AUTO && S_ISREG (src_open_sb.st_mode)
> && ST_NBLOCKS (src_open_sb) < src_open_sb.st_size /
> ST_NBLOCKSIZE)
> - make_holes = true;
> + {
> + make_holes = true;
> +
> +# ifdef __linux__
> + static int safe_kernel = -1;
> +
> + if (safe_kernel == -1)
> + {
> + struct utsname name;
> +
> + safe_kernel = 1;
> +
> + if (uname (&name) != -1 && strncmp (name.release, "2.6.",
> 4) == 0)
> + {
> + int kver = atoi (name.release + 4);
> +
> + /* ext4 & btrfs are known to be broken */
> + if (kver < 38)
> + safe_kernel = 0;
> + }
> + }
> +
> + if (safe_kernel == 0)
> + make_holes = false;
> +# endif
> + }
> #endif
> }
>
> -mike
I'm going to use this "2.6.38" check to only enable FIEMAP_FLAG_SYNC
before Linux kernel 2.6.38. It's always worth avoiding sync if possible.
Proposed patch attached.
I'll submit my 3 outstanding fiemap patches tomorrow.
cheers,
Pádraig.
>From d2e62bb40017571baf12727c942fb9f131f8c951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Wed, 30 Mar 2011 22:50:05 +0100
Subject: [PATCH] copy: with fiemap copy, only sync when needed
* src/extent-scan.h (struct extent_scan): Add the fm_flags member to
pass to the fiemap scan.
* src/extent-scan.c (extent_need_sync): A new function used to
detect Linux kernels before 2.6.38.
(extent_scan_init): Add FIEMAP_FLAG_SYNC when needed.
* tests/cp/sparse-fiemap: Adjust comment.
* NEWS: Mention the change in behavior.
Indirectly suggested by Mike Frysinger
---
NEWS | 4 ++++
src/extent-scan.c | 32 +++++++++++++++++++++++++++++++-
src/extent-scan.h | 3 +++
tests/cp/sparse-fiemap | 4 ++--
4 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/NEWS b/NEWS
index 9c4a16f..d1020cb 100644
--- a/NEWS
+++ b/NEWS
@@ -31,6 +31,10 @@ GNU coreutils NEWS -*- outline -*-
bytes from the input, and also can efficiently create a hole in the
output file when --sparse=always is specified.
+ cp now avoids syncing files when possible, when doing a FIEMAP copy.
+ The sync in only needed on Linux kernels before 2.6.38.
+ [The sync was introduced in coreutils-8.10]
+
** New features
dd now accepts the 'nocache' flag to the iflag and oflag options,
diff --git a/src/extent-scan.c b/src/extent-scan.c
index 24b56b8..b563363 100644
--- a/src/extent-scan.c
+++ b/src/extent-scan.c
@@ -20,15 +20,44 @@
#include <stdio.h>
#include <sys/types.h>
#include <sys/ioctl.h>
+#include <sys/utsname.h>
#include <assert.h>
#include "system.h"
#include "extent-scan.h"
+#include "xstrtol.h"
#ifndef HAVE_FIEMAP
# include "fiemap.h"
#endif
+/* Work around Linux kernel issues on BTRFS and EXT4 before 2.6.38. */
+static bool
+extent_need_sync (void)
+{
+ static int need_sync = -1;
+
+ if (need_sync == -1)
+ {
+ struct utsname name;
+ need_sync = 0; /* No workaround by default. */
+
+#ifdef __linux__
+ if (uname (&name) != -1 && strncmp (name.release, "2.6.", 4) == 0)
+ {
+ unsigned long val;
+ if (xstrtoul (name.release + 4, NULL, 0, &val, NULL) == LONGINT_OK)
+ {
+ if (val < 38)
+ need_sync = 1;
+ }
+ }
+#endif
+ }
+
+ return need_sync;
+}
+
/* Allocate space for struct extent_scan, initialize the entries if
necessary and return it as the input argument of extent_scan_read(). */
extern void
@@ -39,6 +68,7 @@ extent_scan_init (int src_fd, struct extent_scan *scan)
scan->scan_start = 0;
scan->initial_scan_failed = false;
scan->hit_final_extent = false;
+ scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
}
#ifdef __linux__
@@ -62,7 +92,7 @@ extent_scan_read (struct extent_scan *scan)
memset (&fiemap_buf, 0, sizeof fiemap_buf);
fiemap->fm_start = scan->scan_start;
- fiemap->fm_flags = FIEMAP_FLAG_SYNC;
+ fiemap->fm_flags = scan->fm_flags;
fiemap->fm_extent_count = count;
fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
diff --git a/src/extent-scan.h b/src/extent-scan.h
index 4724b25..8728515 100644
--- a/src/extent-scan.h
+++ b/src/extent-scan.h
@@ -41,6 +41,9 @@ struct extent_scan
/* Next scan start offset. */
off_t scan_start;
+ /* Flags to use for scan. */
+ uint32_t fm_flags;
+
/* How many extent info returned for a scan. */
uint32_t ei_count;
diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap
index 73448ec..bac2890 100755
--- a/tests/cp/sparse-fiemap
+++ b/tests/cp/sparse-fiemap
@@ -69,8 +69,8 @@ for i in $(seq 1 2 21); do
-e 'for (1..'$j') { sysseek (*F, $n, 1)' \
-e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > j1 || fail=1
- # Note there is an implicit sync performed by cp to
- # work around bugs in EXT4 and BTRFS before Linux 2.6.38
+ # Note there is an implicit sync performed by cp on Linux kernels
+ # before 2.6.38 to work around bugs in EXT4 and BTRFS.
# Note also the -s parameter to the filefrag commands below
# for the same reasons.
cp --sparse=always j1 j2 || fail=1
--
1.7.4