Paul Eggert wrote:
>>>> This doesn't sound right.  A FIEMAP_EXTENT_UNWRITTEN extent is all zeros, 
>>>> and
>>>> so it should act as if it were a hole.  The goal is not to copy the exact
>>>> fiemap structure of the source (that's impossible): the goal is to use as
>>>> little time and space as possible.
> 
>> A FIEMAP_EXTENT_UNWRITTEN extent is marked to allocated although
>> read it will return ZEROs through the filesystem.  So why not using
>> fallocate(2) to deal with it?  IMHO, it meet the goal to use little
>> time and space as possible, Am I miss something?
> 
> It's faster to simply skip around that extent while reading it, and to
> skip around it when writing it, than to allocate it with fallocate
> when writing it.  Logically, a FIEMAP_EXTENT_UNWRITTEN extent is a
> hole, and should be optimized when reading and writing, just like any
> hole.
> 
>>> I see fiemap as optimizing reads,
>>> posix_fallocate() as optimizing writing zeros
>>> and fallocate() as optimizing allocation.
> 
> It may not be quite that simple.  Some platforms won't have fallocate
> and so posix_fallocate will have to do double duty as optimizing
> allocation too.  Also, lseek is part of the process of optimizing
> reads, and of optimizing writing zeros.  Most important, the
> heuristics for optimizing the writes should use info derived from
> optimizing the reads.
> 
> I'm not objecting to breaking these improvements into two or three
> pieces, if someone wants to do that.  However, it shouldn't be
> required to break them up; it's OK if someone wants to do it all at
> once.  (This stuff is not that hard, after all.)  I was planning to
> give it a shot at some point but obviously have not done so yet.

Hi paul, Pádraig and All,

Thanks for all your comments!

For now, I am inclined to separate efficient read through fiemap
and improve the write and allocation stuff via fallocate() or other ways later.

In this version, I created a new file src/sparse-core.c(not find out a better 
name yet :( ) to place
the original fiemap_copy().

Below is the revised fiemap_copy(), it honor make_holes to write all data(a new 
function
fill_with_holes_ok() implemented at sparse-core.c for this purpose) if 
--sparse=never option is
specified, it still does same thing as original for --sparse=always and 
--sparse=auto excecpt make
use of external buf to avoid allocated another one while reading and writing.

>From 53e1975cb23a4d3c9d9f982d1e48437e95b071d8 Mon Sep 17 00:00:00 2001
From: Jie Liu <jeff....@oracle.com>
Date: Fri, 16 Jul 2010 22:25:21 +0800
Subject: [PATCH 1/1] cp: Isolate fiemap_copy() to sparse-core.c

* src/sparse-core.c: new file include fiemap_copy()
* src/sparse-core.h: header file of sparse-core.c
* src/copy.c (copy_reg): Now, `cp' attempt to get an fiemap of the source file 
by default, fall back
  to
  a normal copy if the underlaying file system does not support it.
  We honor --sparse=never by writing all data but use fiemap if available to 
efficiently read.
* po/POTFILES.in: add sparse-core.c to it to pass po_check for translatable 
diagnostics.

Signed-off-by: Jie Liu <jeff....@oracle.com>
---
 po/POTFILES.in    |    1 +
 src/Makefile.am   |    3 +-
 src/copy.c        |  187 ++++-------------------------------------
 src/sparse-core.c |  240 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/sparse-core.h |   24 ++++++
 5 files changed, 284 insertions(+), 171 deletions(-)
 create mode 100644 src/sparse-core.c
 create mode 100644 src/sparse-core.h

diff --git a/po/POTFILES.in b/po/POTFILES.in
index c862877..025c9b0 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -110,6 +110,7 @@ src/shred.c
 src/shuf.c
 src/sleep.c
 src/sort.c
+src/sparse-core.c
 src/split.c
 src/stat.c
 src/stdbuf.c
diff --git a/src/Makefile.am b/src/Makefile.am
index 7d56312..a838920 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -144,6 +144,7 @@ noinst_HEADERS =    \
   chown-core.h         \
   copy.h               \
   cp-hash.h            \
+  sparse-core.h         \
   dircolors.h          \
   fiemap.h             \
   fs.h                 \
@@ -459,7 +460,7 @@ uninstall-local:
          fi; \
        fi

-copy_sources = copy.c cp-hash.c
+copy_sources = copy.c cp-hash.c sparse-core.c

 # Use `ginstall' in the definition of PROGRAMS and in dependencies to avoid
 # confusion with the `install' target.  The install rule transforms `ginstall'
diff --git a/src/copy.c b/src/copy.c
index f48c74d..f6d9b5e 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -30,6 +30,7 @@
 #endif

 #include "system.h"
+#include "sparse-core.h"
 #include "acl.h"
 #include "backupfile.h"
 #include "buffer-lcm.h"
@@ -63,10 +64,6 @@

 #include <sys/ioctl.h>

-#ifndef HAVE_FIEMAP
-# include "fiemap.h"
-#endif
-
 #ifndef HAVE_FCHOWN
 # define HAVE_FCHOWN false
 # define fchown(fd, uid, gid) (-1)
@@ -153,153 +150,6 @@ clone_file (int dest_fd, int src_fd)
 #endif
 }

-#ifdef __linux__
-# ifndef FS_IOC_FIEMAP
-#  define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap)
-# endif
-/* Perform a FIEMAP copy, if possible.
-   Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to
-   obtain a map of file extents excluding holes.  This avoids the
-   overhead of detecting holes in a hole-introducing/preserving copy,
-   and thus makes copying sparse files much more efficient.  Upon a
-   successful copy, return true.  If the initial ioctl fails, set
-   *NORMAL_COPY_REQUIRED to true and return false.  Upon any other
-   failure, set *NORMAL_COPY_REQUIRED to false and return false.  */
-static bool
-fiemap_copy (int src_fd, int dest_fd, size_t buf_size,
-             off_t src_total_size, char const *src_name,
-             char const *dst_name, bool *normal_copy_required)
-{
-  bool last = false;
-  union { struct fiemap f; char c[4096]; } fiemap_buf;
-  struct fiemap *fiemap = &fiemap_buf.f;
-  struct fiemap_extent *fm_ext = &fiemap->fm_extents[0];
-  enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_ext };
-  verify (count != 0);
-
-  off_t last_ext_logical = 0;
-  uint64_t last_ext_len = 0;
-  uint64_t last_read_size = 0;
-  unsigned int i = 0;
-  *normal_copy_required = false;
-
-  /* This is required at least to initialize fiemap->fm_start,
-     but also serves (in mid 2010) to appease valgrind, which
-     appears not to know the semantics of the FIEMAP ioctl. */
-  memset (&fiemap_buf, 0, sizeof fiemap_buf);
-
-  do
-    {
-      fiemap->fm_length = FIEMAP_MAX_OFFSET;
-      fiemap->fm_flags = FIEMAP_FLAG_SYNC;
-      fiemap->fm_extent_count = count;
-
-      /* When ioctl(2) fails, fall back to the normal copy only if it
-         is the first time we met.  */
-      if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0)
-        {
-          /* If the first ioctl fails, tell the caller that it is
-             ok to proceed with a normal copy.  */
-          if (i == 0)
-            *normal_copy_required = true;
-          else
-            {
-              /* If the second or subsequent ioctl fails, diagnose it,
-                 since it ends up causing the entire copy/cp to fail.  */
-              error (0, errno, _("%s: FIEMAP ioctl failed"), quote (src_name));
-            }
-          return false;
-        }
-
-      /* If 0 extents are returned, then more ioctls are not needed.  */
-      if (fiemap->fm_mapped_extents == 0)
-        break;
-
-      for (i = 0; i < fiemap->fm_mapped_extents; i++)
-        {
-          assert (fm_ext[i].fe_logical <= OFF_T_MAX);
-
-          off_t ext_logical = fm_ext[i].fe_logical;
-          uint64_t ext_len = fm_ext[i].fe_length;
-
-          if (lseek (src_fd, ext_logical, SEEK_SET) < 0)
-            {
-              error (0, errno, _("cannot lseek %s"), quote (src_name));
-              return false;
-            }
-
-          if (lseek (dest_fd, ext_logical, SEEK_SET) < 0)
-            {
-              error (0, errno, _("cannot lseek %s"), quote (dst_name));
-              return false;
-            }
-
-          if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST)
-            {
-              last_ext_logical = ext_logical;
-              last_ext_len = ext_len;
-              last = true;
-            }
-
-          while (ext_len)
-            {
-              char buf[buf_size];
-
-              /* Avoid reading into the holes if the left extent
-                 length is shorter than the buffer size.  */
-              if (ext_len < buf_size)
-                buf_size = ext_len;
-
-              ssize_t n_read = read (src_fd, buf, buf_size);
-              if (n_read < 0)
-                {
-#ifdef EINTR
-                  if (errno == EINTR)
-                    continue;
-#endif
-                  error (0, errno, _("reading %s"), quote (src_name));
-                  return false;
-                }
-
-              if (n_read == 0)
-                {
-                  /* Figure out how many bytes read from the last extent.  */
-                  last_read_size = last_ext_len - ext_len;
-                  break;
-                }
-
-              if (full_write (dest_fd, buf, n_read) != n_read)
-                {
-                  error (0, errno, _("writing %s"), quote (dst_name));
-                  return false;
-                }
-
-              ext_len -= n_read;
-            }
-        }
-
-      fiemap->fm_start = fm_ext[i - 1].fe_logical + fm_ext[i - 1].fe_length;
-
-    } while (! last);
-
-  /* If a file ends up with holes, the sum of the last extent logical offset
-     and the read-returned size will be shorter than the actual size of the
-     file.  Use ftruncate to extend the length of the destination file.  */
-  if (last_ext_logical + last_read_size < src_total_size)
-    {
-      if (ftruncate (dest_fd, src_total_size) < 0)
-        {
-          error (0, errno, _("failed to extend %s"), quote (dst_name));
-          return false;
-        }
-    }
-
-  return true;
-}
-#else
-static bool fiemap_copy (ignored) { errno == ENOTSUP; return false; }
-#endif
-
 /* FIXME: describe */
 /* FIXME: rewrite this to use a hash table so we avoid the quadratic
    performance hit that's probably noticeable only on trees deeper
@@ -830,25 +680,6 @@ copy_reg (char const *src_name, char const *dst_name,
 #endif
         }

-      if (make_holes)
-        {
-          bool require_normal_copy;
-          /* Perform efficient FIEMAP copy for sparse files, fall back to the
-             standard copy only if the ioctl(2) fails.  */
-          if (fiemap_copy (source_desc, dest_desc, buf_size,
-                           src_open_sb.st_size, src_name,
-                           dst_name, &require_normal_copy))
-            goto preserve_metadata;
-          else
-            {
-              if (! require_normal_copy)
-                {
-                  return_val = false;
-                  goto close_src_and_dst_desc;
-                }
-            }
-        }
-
       /* If not making a sparse file, try to use a more-efficient
          buffer size.  */
       if (! make_holes)
@@ -877,6 +708,22 @@ copy_reg (char const *src_name, char const *dst_name,
       buf_alloc = xmalloc (buf_size + buf_alignment_slop);
       buf = ptr_align (buf_alloc, buf_alignment);

+      bool require_normal_copy;
+      /* Perform efficient FIEMAP copy for sparse files, fall back to the
+         standard copy only if the ioctl(2) fails.  */
+      if (fiemap_copy (source_desc, dest_desc, buf, buf_size,
+                       make_holes, src_name, dst_name,
+                       src_open_sb.st_size, &require_normal_copy))
+        goto preserve_metadata;
+      else
+        {
+          if (! require_normal_copy)
+            {
+              return_val = false;
+              goto close_src_and_dst_desc;
+            }
+        }
+
       while (true)
         {
           word *wp = NULL;
diff --git a/src/sparse-core.c b/src/sparse-core.c
new file mode 100644
index 0000000..e122dc8
--- /dev/null
+++ b/src/sparse-core.c
@@ -0,0 +1,240 @@
+/* sparse-core.c -- core functions for efficient reading sparse files
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+
+#include "system.h"
+#include "sparse-core.h"
+#include "error.h"
+#include "quote.h"
+#include "full-write.h"
+
+#ifndef HAVE_FIEMAP
+# include "fiemap.h"
+#endif
+
+static bool
+fill_with_holes_ok(int dest_fd, const char *dst_name,
+                   char *buf, size_t buf_size,
+                   uint64_t holes_len)
+{
+  while (buf_size < holes_len)
+    {
+      if (full_write (dest_fd, buf, buf_size) != buf_size)
+        {
+          error (0, errno, _("writing %s"), quote (dst_name));
+          return false;
+        }
+        holes_len -= buf_size;
+    }
+
+  if (0 < holes_len)
+    {
+      if (full_write (dest_fd, buf, holes_len) != holes_len)
+        {
+          error (0, errno, _("writing %s"), quote (dst_name));
+          return false;
+        }
+    }
+
+  return true;
+}
+
+
+#ifdef __linux__
+# ifndef FS_IOC_FIEMAP
+#  define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap)
+# endif
+
+/* Perform a FIEMAP copy, if possible.
+   Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to
+   obtain a map of file extents excluding holes.  This avoids the
+   overhead of detecting holes in a hole-introducing/preserving copy,
+   and thus makes copying sparse files much more efficient.  Upon a
+   successful copy, return true.  If the initial ioctl fails, set
+   *NORMAL_COPY_REQUIRED to true and return false.  Upon any other
+   failure, set *NORMAL_COPY_REQUIRED to false and return false.  */
+
+extern bool
+fiemap_copy (int src_fd, int dest_fd, char *buf,
+             size_t buf_size, bool make_holes,
+             char const *src_name, char const *dst_name,
+             off_t src_total_size, bool *normal_copy_required)
+{
+  bool last = false;
+  union { struct fiemap f; char c[4096]; } fiemap_buf;
+  struct fiemap *fiemap = &fiemap_buf.f;
+  struct fiemap_extent *fm_ext = &fiemap->fm_extents[0];
+  enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_ext };
+  verify (count != 0);
+
+  off_t prev_ext_logical = 0;
+  off_t last_ext_logical = 0;
+  uint64_t prev_ext_len = 0;
+  uint64_t last_ext_len = 0;
+  uint64_t last_read_size = 0;
+  unsigned int i = 0;
+  *normal_copy_required = false;
+
+  if (! make_holes)
+    memset (buf, 0, buf_size);
+
+  /* This is required at least to initialize fiemap->fm_start,
+     but also serves (in mid 2010) to appease valgrind, which
+     appears not to know the semantics of the FIEMAP ioctl. */
+  memset (&fiemap_buf, 0, sizeof fiemap_buf);
+
+  do
+    {
+      fiemap->fm_length = FIEMAP_MAX_OFFSET;
+      fiemap->fm_flags = FIEMAP_FLAG_SYNC;
+      fiemap->fm_extent_count = count;
+
+      /* When ioctl(2) fails, fall back to the normal copy only if it
+         is the first time we met.  */
+      if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0)
+        {
+          /* If the first ioctl fails, tell the caller that it is
+             ok to proceed with a normal copy.  */
+          if (i == 0)
+            *normal_copy_required = true;
+          else
+            {
+              /* If the second or subsequent ioctl fails, diagnose it,
+                 since it ends up causing the entire copy/cp to fail.  */
+              error (0, errno, _("%s: FIEMAP ioctl failed"), quote (src_name));
+            }
+          return false;
+        }
+
+      /* If 0 extents are returned, then more ioctls are not needed.  */
+      if (fiemap->fm_mapped_extents == 0)
+        break;
+
+      for (i = 0; i < fiemap->fm_mapped_extents; i++)
+        {
+          assert (fm_ext[i].fe_logical <= OFF_T_MAX);
+
+          off_t ext_logical = fm_ext[i].fe_logical;
+          uint64_t ext_len = fm_ext[i].fe_length;
+
+          if (lseek (src_fd, ext_logical, SEEK_SET) < 0)
+            {
+              error (0, errno, _("cannot lseek %s"), quote (src_name));
+              return false;
+            }
+
+          if (0 < i)
+            {
+              prev_ext_logical = fm_ext[i - 1].fe_logical,
+              prev_ext_len = fm_ext[i - 1].fe_length;
+            }
+
+          if (! make_holes)
+            {
+              if (prev_ext_logical + prev_ext_len < ext_logical)
+                {
+                  uint64_t holes_len = ext_logical - prev_ext_logical - 
prev_ext_len;
+                  if (! fill_with_holes_ok (dest_fd, dst_name, buf, buf_size, 
holes_len))
+                    return false;
+                }
+            }
+          else
+            {
+              if (lseek (dest_fd, ext_logical, SEEK_SET) < 0)
+                {
+                  error (0, errno, _("cannot lseek %s"), quote (dst_name));
+                  return false;
+                }
+            }
+
+          if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST)
+            {
+              last_ext_logical = ext_logical;
+              last_ext_len = ext_len;
+              last = true;
+            }
+
+          while (ext_len)
+            {
+              /* Avoid reading into the holes if the left extent
+                 length is shorter than the buffer size.  */
+              if (ext_len < buf_size)
+                buf_size = ext_len;
+
+              ssize_t n_read = read (src_fd, buf, buf_size);
+              if (n_read < 0)
+                {
+#ifdef EINTR
+                  if (errno == EINTR)
+                    continue;
+#endif
+                  error (0, errno, _("reading %s"), quote (src_name));
+                  return false;
+                }
+
+              if (n_read == 0)
+                {
+                  /* Figure out how many bytes read from the last extent.  */
+                  last_read_size = last_ext_len - ext_len;
+                  break;
+                }
+
+              if (full_write (dest_fd, buf, n_read) != n_read)
+                {
+                  error (0, errno, _("writing %s"), quote (dst_name));
+                  return false;
+                }
+
+              ext_len -= n_read;
+            }
+        }
+
+      fiemap->fm_start = fm_ext[i - 1].fe_logical + fm_ext[i - 1].fe_length;
+
+    } while (! last);
+
+  /* If a file ends up with holes, the sum of the last extent logical offset
+     and the read-returned size will be shorter than the actual size of the
+     file.  Use ftruncate to extend the length of the destination file.  */
+  if (last_ext_logical + last_read_size < src_total_size)
+    {
+      if (! make_holes)
+        {
+          uint64_t holes_len = src_total_size - last_ext_logical - 
last_ext_len;
+          if (! fill_with_holes_ok (dest_fd, dst_name, buf, buf_size, 
holes_len))
+            return false;
+        }
+      else
+        {
+          if (ftruncate (dest_fd, src_total_size) < 0)
+            {
+              error (0, errno, _("failed to extend %s"), quote (dst_name));
+              return false;
+            }
+        }
+    }
+
+  return true;
+}
+#else
+extern bool fiemap_copy (ignored) { errno == ENOTSUP; return false; }
+#endif
diff --git a/src/sparse-core.h b/src/sparse-core.h
new file mode 100644
index 0000000..89c9ebf
--- /dev/null
+++ b/src/sparse-core.h
@@ -0,0 +1,24 @@
+/* core functions for efficient reading sparse files
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef SPARSE_CORE_H
+# define SPARSE_CORE_H
+
+bool fiemap_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
+                  bool make_holes, char const *src_name, char const *dst_name,
+                  off_t src_total_size, bool *normal_copy_required);
+
+#endif /* SPARSE_CORE_H */
-- 
1.5.4.3


Thanks,
-Jeff

-- 
The knowledge you get, no matter how much it is, must be possessed yourself and 
nourished with your
own painstaking efforts and be your achievement through hard work.



Reply via email to