Hi Paolo, In my experience, dev/ino is sufficient, as long as you're not using one of a few POSIX-violating fringe file systems (clearcase's MVS comes to mind). I remember problems years ago that were fixed when Paul added ctime, mtime, etc. checks to diffutils.
So for a case like this, I think we should use something like this (see the long comment just preceding it in http://git.savannah.gnu.org/cgit/diffutils.git/tree/src/system.h#n171): #ifndef same_file_attributes # define same_file_attributes(s, t) \ ((s)->st_mode == (t)->st_mode \ && (s)->st_nlink == (t)->st_nlink \ && (s)->st_uid == (t)->st_uid \ && (s)->st_gid == (t)->st_gid \ && (s)->st_size == (t)->st_size \ && (s)->st_mtime == (t)->st_mtime \ && (s)->st_ctime == (t)->st_ctime) #endif I'd be surprised if there's any system on which adding an st_birthtime comparison would change the outcome. Here's a preliminary patch. I will add a test case and update NEWS. In the mean time, if anyone knows who submitted the proposed patch, I'll mention that it inspired mine. With that change, now I see this behavior: $ mkdir d && touch d/k && ./grep -r pat d > d/k ./grep: input file `d/k' is also the output [Exit 2] Before, depending on pattern/contents, it would either exit 0, 1 or fill your partition. >From ed7715eaf0deea8d5b4c925a7358d05ab1688d34 Mon Sep 17 00:00:00 2001 From: Jim Meyering <[email protected]> Date: Sat, 23 Jul 2011 22:52:06 +0200 Subject: [PATCH] fail with exit status 2 when an input file is the same as the output This avoids a potential "infinite" disk-filling loop. Reported in http://savannah.gnu.org/patch/?5316 and http://savannah.gnu.org/bugs/?17457. * src/main.c: Include "quote.h". (out_stat): New global. (grepfile): Compare each regular file's dev/ino/etc. with those from the file on stdout (if it too is regular). (main): Set out_stat, if stdout is a regular file. * src/system.h: Include "same-inode.h". (same_file_attributes): Define. From diffutils. (SAME_REGULAR_FILE): Define. * bootstrap.conf (gnulib_modules): Use quote, not quotearg. Use same-inode. --- bootstrap.conf | 3 ++- src/main.c | 28 ++++++++++++++++++++++++++++ src/system.h | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/bootstrap.conf b/bootstrap.conf index 143d7b5..66c239e 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -57,10 +57,11 @@ obstack open progname propername -quotearg +quote readme-release realloc-gnu regex +same-inode ssize_t stddef stdlib diff --git a/src/main.c b/src/main.c index 4e01a17..da74916 100644 --- a/src/main.c +++ b/src/main.c @@ -44,6 +44,7 @@ #include "isdir.h" #include "progname.h" #include "propername.h" +#include "quote.h" #include "savedir.h" #include "version-etc.h" #include "xalloc.h" @@ -59,6 +60,11 @@ proper_name ("Mike Haertel"), \ _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>") +/* When stdout is connected to a regular file, save its stat + information here, so that we can automatically skip it, thus + avoiding a potential (racy) infinite loop. */ +static struct stat out_stat; + struct stats { struct stats const *parent; @@ -1212,6 +1218,24 @@ grepfile (char const *file, struct stats *stats) || S_ISSOCK (stats->stat.st_mode) || S_ISFIFO (stats->stat.st_mode))) return 1; + + /* If there's a regular file on stdout and the current file refers + to the same i-node, we have to report the problem and skip it. + Otherwise when matching lines from some other input reach the + disk before we open this file, we can end up reading and matching + those lines and appending them to the file from which we're reading. + Then we'd have what appears to be an infinite loop that'd terminate + only upon filling the output file system or reaching a quota. */ + if (S_ISREG (stats->stat.st_mode) && out_stat.st_ino + && SAME_REGULAR_FILE (stats->stat, out_stat)) + { + if (! suppress_errors) + error (0, 0, _("input file %s is also the output"), + quote (file)); + errseen = 1; + return 1; + } + while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR) continue; @@ -2121,6 +2145,10 @@ main (int argc, char **argv) if (show_help) usage (EXIT_SUCCESS); + struct stat tmp_stat; + if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) + out_stat = tmp_stat; + if (keys) { if (keycc == 0) diff --git a/src/system.h b/src/system.h index 199b485..e7afc46 100644 --- a/src/system.h +++ b/src/system.h @@ -27,6 +27,7 @@ #include "configmake.h" #include "dirname.h" #include "minmax.h" +#include "same-inode.h" #if O_BINARY # define HAVE_DOS_FILE_CONTENTS 1 @@ -53,8 +54,47 @@ enum { EXIT_TROUBLE = 2 }; #include <locale.h> #ifndef initialize_main -#define initialize_main(argcp, argvp) +# define initialize_main(argcp, argvp) #endif +/* Do struct stat *S, *T have the same file attributes? + + POSIX says that two files are identical if st_ino and st_dev are + the same, but many file systems incorrectly assign the same (device, + inode) pair to two distinct files, including: + + - GNU/Linux NFS servers that export all local file systems as a + single NFS file system, if a local device number (st_dev) exceeds + 255, or if a local inode number (st_ino) exceeds 16777215. + + - Network Appliance NFS servers in snapshot directories; see + Network Appliance bug #195. + + - ClearCase MVFS; see bug id ATRia04618. + + Check whether two files that purport to be the same have the same + attributes, to work around instances of this common bug. Do not + inspect all attributes, only attributes useful in checking for this + bug. + + It's possible for two distinct files on a buggy file system to have + the same attributes, but it's not worth slowing down all + implementations (or complicating the configuration) to cater to + these rare cases in buggy implementations. */ + +#ifndef same_file_attributes +# define same_file_attributes(s, t) \ + ((s)->st_mode == (t)->st_mode \ + && (s)->st_nlink == (t)->st_nlink \ + && (s)->st_uid == (t)->st_uid \ + && (s)->st_gid == (t)->st_gid \ + && (s)->st_size == (t)->st_size \ + && (s)->st_mtime == (t)->st_mtime \ + && (s)->st_ctime == (t)->st_ctime) +#endif + +#define SAME_REGULAR_FILE(s, t) \ + (SAME_INODE (s, t) && same_file_attributes (&s, &t)) + #include "unlocked-io.h" #endif -- 1.7.6.609.gbf6a9
