On 10/02/11 10:01, Pádraig Brady wrote:
> $ fallocate -l $((1<<29)) /tmp/t.f
> $ strace -e _llseek cp-before /tmp/t.f /dev/null 2>&1 | wc -l
> 180
> $ strace -e _llseek cp /tmp/t.f /dev/null 2>&1 | wc -l
> 0
> 
> Hmm, the above suggests to me that we could use the
> FIEMAP_EXTENT_UNWRITTEN flag, so as to skip the read()
> for these allocated but unwritten (zero) extents.
> We could treat such extents like holes, except we
> would only generate holes in the dest with SPARSE_ALWAYS.

First pass at the above attached.
I still need to check on BTRFS etc.
and I can probably clean up the code a bit more

cheers,
Pádraig.
>From 9109561d08d966d02799491a105d7571b97c89c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Fri, 11 Feb 2011 08:55:22 +0000
Subject: [PATCH] copy: process empty extents more efficiently

* src/copy.c (extent_copy): Treat an allocated but empty extent
much like a hole.  I.E. don't read data we know is going to be NUL.
Also we only convert the empty extent to a hole when SPARSE_ALWAYS
so that the source and dest have the same allocation.  This will
be improved soon, when we use fallocate() to do the allocation.
* tests/cp/fiemap-empty: A new test for efficiency and correctness
of copying empty extents.
* tests/Makefile.am: Reference the new test.
* NEWS: Mention the change in behavior.
---
 NEWS                  |    7 ++++
 src/copy.c            |   80 ++++++++++++++++++++++++++++++++++++++++--------
 tests/Makefile.am     |    1 +
 tests/cp/fiemap-empty |   73 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 147 insertions(+), 14 deletions(-)
 create mode 100755 tests/cp/fiemap-empty

diff --git a/NEWS b/NEWS
index a367d8d..02f5dd6 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,13 @@ GNU coreutils NEWS                                    -*- outline -*-
   delimiter and an unbounded range like "-f1234567890-".
   [bug introduced in coreutils-5.3.0]
 
+** Changes in behavior
+
+  cp now copies empty extents efficiently on file systems with FIEMAP
+  support (ext4, btrfs, xfs, ocfs2).  It no longer reads the zero
+  bytes from the input, and also can easily create a hole in the output
+  file when --sparse=always is specified.
+
 
 * Noteworthy changes in release 8.10 (2011-02-04) [stable]
 
diff --git a/src/copy.c b/src/copy.c
index 104652d..96f6eb7 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -39,6 +39,7 @@
 #include "extent-scan.h"
 #include "error.h"
 #include "fcntl--.h"
+#include "fiemap.h"
 #include "file-set.h"
 #include "filemode.h"
 #include "filenamecat.h"
@@ -330,11 +331,28 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
         }
 
       unsigned int i;
-      for (i = 0; i < scan.ei_count; i++)
+      bool empty_extent = false;
+      for (i = 0; i < scan.ei_count || empty_extent; i++)
         {
-          off_t ext_start = scan.ext_info[i].ext_logical;
-          uint64_t ext_len = scan.ext_info[i].ext_length;
-          uint64_t hole_size = ext_start - last_ext_start - last_ext_len;
+          off_t ext_start;
+          uint64_t ext_len;
+          uint64_t hole_size;
+
+          if (i == scan.ei_count) /* empty extent at EOF.  */
+            {
+              i--;
+              ext_start = last_ext_start + scan.ext_info[i].ext_length;
+              ext_len = 0;
+            }
+          else
+            {
+              ext_start = scan.ext_info[i].ext_logical;
+              ext_len = scan.ext_info[i].ext_length;
+            }
+
+          hole_size = ext_start - last_ext_start - last_ext_len;
+
+          wrote_hole_at_eof = false;
 
           if (hole_size)
             {
@@ -346,38 +364,72 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
                   return false;
                 }
 
-              if (sparse_mode != SPARSE_NEVER)
+              if ((empty_extent && sparse_mode == SPARSE_ALWAYS)
+                  || (!empty_extent && sparse_mode != SPARSE_NEVER))
                 {
                   if (lseek (dest_fd, ext_start, SEEK_SET) < 0)
                     {
                       error (0, errno, _("cannot lseek %s"), quote (dst_name));
                       goto fail;
                     }
+                  wrote_hole_at_eof = true;
                 }
               else
                 {
                   /* When not inducing holes and when there is a hole between
                      the end of the previous extent and the beginning of the
                      current one, write zeros to the destination file.  */
-                  if (! write_zeros (dest_fd, hole_size))
+                  off_t nzeros = hole_size;
+                  if (empty_extent)
+                    nzeros = MIN (src_total_size - dest_pos, hole_size);
+
+                  if (! write_zeros (dest_fd, nzeros))
                     {
                       error (0, errno, _("%s: write failed"), quote (dst_name));
                       goto fail;
                     }
+
+                  dest_pos = MIN (src_total_size, ext_start);
                 }
             }
 
           last_ext_start = ext_start;
-          last_ext_len = ext_len;
 
-          off_t n_read;
-          if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size,
-                              sparse_mode == SPARSE_ALWAYS, src_name, dst_name,
-                              ext_len, &n_read,
-                              &wrote_hole_at_eof))
-            return false;
+          /* Treat an unwritten but allocated extent much like a hole.
+             I.E. don't read, but don't convert to a hole in the destination,
+             unless SPARSE_ALWAYS.  */
+          if (scan.ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN)
+            {
+              empty_extent = true;
+              last_ext_len = 0;
+              if (ext_len == 0) /* The last extent is empty and processed.  */
+                empty_extent = false;
+            }
+          else
+            {
+              off_t n_read;
+              empty_extent = false;
+              last_ext_len = ext_len;
+
+              if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size,
+                                  sparse_mode == SPARSE_ALWAYS,
+                                  src_name, dst_name, ext_len, &n_read,
+                                  &wrote_hole_at_eof))
+                return false;
+
+              dest_pos = ext_start + n_read;
+            }
 
-          dest_pos = ext_start + n_read;
+          /* If the file ends with unwritten extents not accounted for in the
+             size, then skip processing them, and the associated redundant
+             read() calls which will always return 0.  We will need to
+             remove this when we add fallocate() so that we can maintain
+             extents beyond the apparent size.  */
+          if (dest_pos == src_total_size)
+            {
+              scan.hit_final_extent = true;
+              break;
+            }
         }
 
       /* Release the space allocated to scan->ext_info.  */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 8aa56cd..b2885e7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -321,6 +321,7 @@ TESTS =						\
   cp/dir-vs-file				\
   cp/existing-perm-race				\
   cp/fail-perm					\
+  cp/fiemap-empty                               \
   cp/fiemap-perf                                \
   cp/fiemap-2                                   \
   cp/file-perm-race				\
diff --git a/tests/cp/fiemap-empty b/tests/cp/fiemap-empty
new file mode 100755
index 0000000..01ef14d
--- /dev/null
+++ b/tests/cp/fiemap-empty
@@ -0,0 +1,73 @@
+#!/bin/sh
+# Test cp reads unwritten extents efficiently
+
+# Copyright (C) 2011 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ cp
+
+fiemap_capable_ . ||
+  skip_ 'this file system lacks FIEMAP support'
+
+fallocate --help >/dev/null || skip_test_ 'The fallocate utility is required'
+fallocate -l 1 -n falloc.test ||
+  skip_ 'this file system lacks FALLOCATE support'
+
+# Ensure we read a large empty file quickly
+fallocate -l 300000000 empty.big || framework_failure
+timeout 3 cp --sparse=always empty.big cp.test || fail=1
+test $(stat -c %s empty.big) = $(stat -c %s cp.test) || fail=1
+rm empty.big cp.test
+
+# Ensure we handle extents beyond file size correctly.
+# Note until we support fallocate, we will not maintain
+# the file allocation. Ammend this test when fallocate is supported
+fallocate -l 10000000 -n unwritten.withdata || framework_failure
+dd count=10 if=/dev/urandom conv=notrunc iflag=fullblock of=unwritten.withdata
+cp unwritten.withdata cp.test || fail=1
+test $(stat -c %s unwritten.withdata) = $(stat -c %s cp.test) || fail=1
+cmp unwritten.withdata cp.test || fail=1
+rm unwritten.withdata cp.test
+
+# The following to generate unaccounted extents followed by a hole, is not
+# supported by ext4 at least. The ftruncate discards all extents not
+# accounted for in the size.
+#  fallocate -l 10000000 -n unacc.withholes
+#  dd count=10 if=/dev/urandom conv=notrunc iflag=fullblock of=unacc.withholes
+#  truncate -s20000000 unacc.withholes
+
+# Ensure we handle a hole after empty extents correctly.
+# Since all extents are accounted for in the size,
+# we can maintain the allocation independently from
+# fallocate() support.
+fallocate -l 10000000 empty.withholes
+truncate -s 20000000 empty.withholes
+sectors_per_block=$(expr $(stat -c %o .) / 512)
+cp empty.withholes cp.test || fail=1
+test $(stat -c %s empty.withholes) = $(stat -c %s cp.test) || fail=1
+# These are usually equal but can vary by an IO block due to alignment
+alloc_diff=$(expr $(stat -c %b empty.withholes) - $(stat -c %b cp.test))
+alloc_diff=$(echo $alloc_diff | tr -d -- -)
+test $alloc_diff -le $sectors_per_block || fail=1
+# Again with SPARSE_ALWAYS
+cp --sparse=always empty.withholes cp.test || fail=1
+test $(stat -c %s empty.withholes) = $(stat -c %s cp.test) || fail=1
+# cp.test should take 0 space, but allowing for some systems
+# that store default extended attributes in data blocks
+test $(stat -c %b cp.test) -le $sectors_per_block || fail=1
+rm empty.withholes cp.test
+
+Exit $fail
-- 
1.7.4

Reply via email to