I noticed fadvise(DONTNEED) getting some love in the kernel recently
   http://lkml.org/lkml/2011/2/17/169
Which prompted me to implement this. With the attached one can now:

  # Advise to drop cache for whole file
  dd if=ifile iflag=nocache count=0

  # Ensure drop cache for whole file
  dd of=ofile oflag=nocache conv=notrunc,fdatasync count=0

  # Drop cache for part of file
  dd if=ifile iflag=nocache skip=10 count=10 of=/dev/null

  # Stream data just using readahead cache
  dd if=ifile of=ofile iflag=nocache oflag=nocache

When count=0, i.e. when only manipulating the cache,
we propagate the posix_fadvise() return to the exit status.

Note this will invalidate the cache even if another
process has the file open. That could be avoided with mincor:
   http://insights.oetiker.ch/linux/fadvise.html
However, I don't think that's needed for a tool like dd.

cheers,
Pádraig.
>From 5ebe7618f8d62a81e053a57390132e4013218416 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 22 Feb 2011 21:14:00 +0000
Subject: [PATCH] dd: add a flag to discard cached data

* src/dd.c (usage): Add the 'nocache' flag.
(cache_round): A new function to help ignore cache
drop requests less than page_size.
(invalidate_cache): A new function to call posix_fadvise()
with the appropriate offset and length.  Note we don't
use fdadvise() so we can detect errors when count=0.
(dd_copy): Call invalidate_cache() for the processed portions.
(main): Call invalidate_cache for page_size slop or
for full file when count=0.
* cfg.mk (sc_dd_O_FLAGS): Adjust to pass.
* doc/coreutils.texi (dd invocation): Describe the 'nocache' flag,
and give some examples of how it can be used.
* tests/dd/nocache: A new test.
* tests/Makefile.am: Reference the new test.
* NEWS: Mention the new feature.
---
 NEWS               |    6 ++
 cfg.mk             |    4 +-
 doc/coreutils.texi |   25 ++++++++
 src/dd.c           |  160 +++++++++++++++++++++++++++++++++++++++++++++++++---
 tests/Makefile.am  |    1 +
 tests/dd/nocache   |   48 ++++++++++++++++
 6 files changed, 234 insertions(+), 10 deletions(-)
 create mode 100755 tests/dd/nocache

diff --git a/NEWS b/NEWS
index a367d8d..bbb8bd3 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,12 @@ GNU coreutils NEWS                                    -*- outline -*-
   delimiter and an unbounded range like "-f1234567890-".
   [bug introduced in coreutils-5.3.0]
 
+** New features
+
+  dd now accepts the nocache option to iflag and oflag,
+  which will discard any cache associated with the files,
+  or processed portion thereof.
+
 
 * Noteworthy changes in release 8.10 (2011-02-04) [stable]
 
diff --git a/cfg.mk b/cfg.mk
index e4f3faa..c897dc4 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -39,8 +39,8 @@ _hv_file ?= $(srcdir)/tests/misc/help-version
 dd = $(srcdir)/src/dd.c
 sc_dd_O_FLAGS:
 	@rm -f $@.1 $@.2
-	@{ echo O_FULLBLOCK; perl -nle '/^ +\| (O_\w*)$$/ and print $$1' \
-	  $(dd); } | sort > $@.1
+	@{ echo O_FULLBLOCK; echo O_NOCACHE;				\
+	  perl -nle '/^ +\| (O_\w*)$$/ and print $$1' $(dd); } | sort > $@.1
 	@{ echo O_NOFOLLOW; perl -nle '/{"[a-z]+",\s*(O_\w+)},/ and print $$1' \
 	  $(dd); } | sort > $@.2
 	@diff -u $@.1 $@.2 || diff=1 || diff=;				\
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index ea35afe..529adab 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -8168,6 +8168,31 @@ last-access and last-modified time) is not necessarily synchronized.
 @cindex synchronized data and metadata I/O
 Use synchronized I/O for both data and metadata.
 
+@item nocache
+@opindex nocache
+@cindex discarding file cache
+Discard the data cache for a file.
+When count=0 all cache is discarded,
+otherwise the cache is dropped for the processed
+portion of the file.  Also when count=0
+failure to discard the cache is diagnosed
+and reflected in the exit status.
+Here as some usage examples:
+
+@example
+# Advise to drop cache for whole file
+dd if=ifile iflag=nocache count=0
+
+# Ensure drop cache for the whole file
+dd of=ofile oflag=nocache conv=notrunc,fdatasync count=0
+
+# Drop cache for part of file
+dd if=ifile iflag=nocache skip=10 count=10 of=/dev/null
+
+# Stream data just using read-ahead cache
+dd if=ifile of=ofile iflag=nocache oflag=nocache
+@end example
+
 @item nonblock
 @opindex nonblock
 @cindex nonblocking I/O
diff --git a/src/dd.c b/src/dd.c
index daddc1e..f62aaed 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -225,6 +225,9 @@ static sig_atomic_t volatile interrupt_signal;
 /* A count of the number of pending info signals that have been received.  */
 static sig_atomic_t volatile info_signal_count;
 
+/* Whether to discard cache for input or output.  */
+static bool i_nocache, o_nocache;
+
 /* Function used for read (to handle iflag=fullblock parameter).  */
 static ssize_t (*iread_fnc) (int fd, char *buf, size_t size);
 
@@ -259,6 +262,7 @@ static struct symbol_value const conversions[] =
   {"", 0}
 };
 
+#define FFS_MASK(x) ((x) ^ ((x) & ((x) - 1)))
 enum
   {
     /* Compute a value that's bitwise disjoint from the union
@@ -278,17 +282,23 @@ enum
           | O_SYNC
           | O_TEXT
           ),
-    /* Use its lowest bit.  */
-    O_FULLBLOCK = v ^ (v & (v - 1))
+
+    /* Use its lowest bits for private flags.  */
+    O_FULLBLOCK = FFS_MASK (v),
+    v2 = v ^ O_FULLBLOCK,
+
+    O_NOCACHE = FFS_MASK (v2)
   };
 
 /* Ensure that we got something.  */
 verify (O_FULLBLOCK != 0);
+verify (O_NOCACHE != 0);
 
 #define MULTIPLE_BITS_SET(i) (((i) & ((i) - 1)) != 0)
 
 /* Ensure that this is a single-bit value.  */
 verify ( ! MULTIPLE_BITS_SET (O_FULLBLOCK));
+verify ( ! MULTIPLE_BITS_SET (O_NOCACHE));
 
 /* Flags, for iflag="..." and oflag="...".  */
 static struct symbol_value const flags[] =
@@ -300,6 +310,7 @@ static struct symbol_value const flags[] =
   {"directory",	O_DIRECTORY},
   {"dsync",	O_DSYNC},
   {"noatime",	O_NOATIME},
+  {"nocache",	O_NOCACHE},   /* Discard cache.  */
   {"noctty",	O_NOCTTY},
   {"nofollow",	HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0},
   {"nolinks",	O_NOLINKS},
@@ -536,6 +547,10 @@ Each FLAG symbol may be:\n\
         fputs (_("  nonblock  use non-blocking I/O\n"), stdout);
       if (O_NOATIME)
         fputs (_("  noatime   do not update access time\n"), stdout);
+#if HAVE_POSIX_FADVISE
+      if (O_NOCACHE)
+        fputs (_("  nocache   discard cached data\n"), stdout);
+#endif
       if (O_NOCTTY)
         fputs (_("  noctty    do not assign controlling terminal from file\n"),
                stdout);
@@ -789,6 +804,91 @@ process_signals (void)
     }
 }
 
+/* Return LEN rounded down to a multiple of PAGE_SIZE
+   while storing the remainder internally per FD.
+   Pass LEN == 0 to get the current remainder.  */
+
+static off_t
+cache_round (int fd, off_t len)
+{
+  static off_t i_pending, o_pending;
+  off_t *pending = (fd == STDIN_FILENO ? &i_pending : &o_pending);
+
+  if (len)
+    {
+      off_t c_pending = *pending + len;
+      *pending = c_pending % page_size;
+      if (c_pending > *pending)
+        len = c_pending - *pending;
+      else
+        len = 0;
+    }
+  else
+    len = *pending;
+
+  return len;
+}
+
+/* Discard the cache from the current offset of either
+   STDIN_FILENO or STDOUT_FILENO.
+   Return true on success.  */
+
+static bool
+invalidate_cache (int fd, off_t len)
+{
+  int adv_ret = -1;
+
+  /* Minimize syscalls.  */
+  off_t clen = cache_round (fd, len);
+  if (len && !clen)
+    return true; /* Don't advise this time.  */
+  if (!len && !clen && max_records)
+    return true; /* Nothing pending.  */
+  off_t pending = len ? cache_round (fd, 0) : 0;
+
+  if (fd == STDIN_FILENO)
+    {
+      if (input_seekable)
+        {
+          /* Note we're being careful here to only invalidate what
+             we've read, so as not to dump any read ahead cache.  */
+#if HAVE_POSIX_FADVISE
+            adv_ret = posix_fadvise (fd, input_offset - clen - pending, clen,
+                                     POSIX_FADV_DONTNEED);
+#else
+            errno = ENOTSUP;
+#endif
+        }
+      else
+        errno = ESPIPE;
+    }
+  else if (fd == STDOUT_FILENO)
+    {
+      static off_t output_offset = -2;
+
+      if (output_offset != -1)
+        {
+          if (0 > output_offset)
+            {
+              output_offset = lseek (fd, 0, SEEK_CUR);
+              output_offset -= clen + pending;
+            }
+          if (0 <= output_offset)
+            {
+#if HAVE_POSIX_FADVISE
+              adv_ret = posix_fadvise (fd, output_offset, clen,
+                                       POSIX_FADV_DONTNEED);
+#else
+              errno = ENOTSUP;
+#endif
+              output_offset += clen + pending;
+            }
+        }
+    }
+
+  return adv_ret != -1 ? true : false;
+}
+
 /* Read from FD into the buffer BUF of size SIZE, processing any
    signals that arrive before bytes are read.  Return the number of
    bytes read if successful, -1 (setting errno) on failure.  */
@@ -849,9 +949,7 @@ iwrite (int fd, char const *buf, size_t size)
          posix_fadvise to tell the system not to pollute the buffer
          cache with this data.  Don't bother to diagnose lseek or
          posix_fadvise failure. */
-      off_t off = lseek (STDOUT_FILENO, 0, SEEK_CUR);
-      if (0 <= off)
-        fdadvise (STDOUT_FILENO, off, 0, FADVISE_DONTNEED);
+      invalidate_cache (STDOUT_FILENO, 0);
 
       /* Attempt to ensure that that final block is committed
          to disk as quickly as possible.  */
@@ -880,6 +978,9 @@ iwrite (int fd, char const *buf, size_t size)
         total_written += nwritten;
     }
 
+  if (o_nocache && total_written)
+    invalidate_cache (fd, total_written);
+
   return total_written;
 }
 
@@ -1103,6 +1204,20 @@ scanargs (int argc, char *const *argv)
     error (EXIT_FAILURE, 0, _("cannot combine lcase and ucase"));
   if (multiple_bits_set (conversions_mask & (C_EXCL | C_NOCREAT)))
     error (EXIT_FAILURE, 0, _("cannot combine excl and nocreat"));
+  if (multiple_bits_set (input_flags & (O_DIRECT | O_NOCACHE))
+      || multiple_bits_set (output_flags & (O_DIRECT | O_NOCACHE)))
+    error (EXIT_FAILURE, 0, _("cannot combine direct and nocache"));
+
+  if (input_flags & O_NOCACHE)
+    {
+      i_nocache = true;
+      input_flags &= ~O_NOCACHE;
+    }
+  if (output_flags & O_NOCACHE)
+    {
+      o_nocache = true;
+      output_flags &= ~O_NOCACHE;
+    }
 }
 
 /* Fix up translation table. */
@@ -1685,9 +1800,6 @@ dd_copy (void)
 
       nread = iread_fnc (STDIN_FILENO, ibuf, input_blocksize);
 
-      if (nread == 0)
-        break;			/* EOF.  */
-
       if (nread < 0)
         {
           error (0, errno, _("reading %s"), quote (input_file));
@@ -1721,6 +1833,12 @@ dd_copy (void)
       n_bytes_read = nread;
       advance_input_offset (nread);
 
+      if (i_nocache)
+        invalidate_cache (STDIN_FILENO, nread);
+
+      if (nread == 0)
+        break;			/* EOF.  */
+
       if (n_bytes_read < input_blocksize)
         {
           r_partial++;
@@ -1950,5 +2068,31 @@ main (int argc, char **argv)
 
   exit_status = dd_copy ();
 
+  if (max_records == 0)
+    {
+      /* Special case to invalidate cache to end of file.  */
+      if (i_nocache && !invalidate_cache (STDIN_FILENO, 0))
+        {
+          error (0, errno, _("failed to discard cache for: %s"),
+                 quote (input_file));
+          exit_status = EXIT_FAILURE;
+        }
+      if (o_nocache && !invalidate_cache (STDOUT_FILENO, 0))
+        {
+          error (0, errno, _("failed to discard cache for: %s"),
+                 quote (output_file));
+          exit_status = EXIT_FAILURE;
+        }
+    }
+  else if (max_records != (uintmax_t) -1)
+    {
+      /* Invalidate any pending region less that page size,
+         in case the kernel might round up.  */
+      if (i_nocache)
+        invalidate_cache (STDIN_FILENO, 0);
+      if (o_nocache)
+        invalidate_cache (STDOUT_FILENO, 0);
+    }
+
   quit (exit_status);
 }
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 8aa56cd..08bd3d4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -352,6 +352,7 @@ TESTS =						\
   df/unreadable					\
   dd/direct					\
   dd/misc					\
+  dd/nocache					\
   dd/not-rewound				\
   dd/reblock					\
   dd/skip-seek					\
diff --git a/tests/dd/nocache b/tests/dd/nocache
new file mode 100755
index 0000000..4cddea2
--- /dev/null
+++ b/tests/dd/nocache
@@ -0,0 +1,48 @@
+#!/bin/sh
+# Ensure dd handles the 'nocache' flag
+
+# Copyright (C) 2011 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ dd
+
+# This should not call posix_fadvise
+dd iflag=nocache oflag=nocache if=/dev/null of=/dev/null || fail=1
+
+# We should get an error for trying to process a pipe
+dd iflag=nocache count=0 && fail=1
+
+# O_DIRECT is orthogonal to drop cache so mutually exclusive
+dd iflag=nocache,direct if=/dev/null && fail=1
+
+# The rest ensure that the documented uses cases
+# proceed without error
+dd if=/dev/zero of=ifile count=100 || framework_failure
+dd if=/dev/zero of=ofile count=100 || framework_failure
+
+# Advise to drop cache for whole file
+dd if=ifile iflag=nocache count=0 || fail=1
+
+# Ensure drop cache for whole file
+dd of=ofile oflag=nocache conv=notrunc,fdatasync count=0 || fail=1
+
+# Drop cache for part of file
+dd if=ifile iflag=nocache skip=10 count=10 of=/dev/null || fail=1
+
+# Stream data just using readahead cache
+dd if=ifile of=ofile iflag=nocache oflag=nocache || fail=1
+
+Exit $fail
-- 
1.7.4

Reply via email to