Hello

This topic comes up every couple of years and I guess it's that time
again.

I was prompted to look into this due to a perplexing interaction between
a database that is supposed to only create immutable database files and
another process that tries to create a backup from hardlinks pointing to
those files. The surprise here are tar reports that the file changed.

After reading https://issues.apache.org/jira/browse/CASSANDRA-17473 I'm
almost sure that it's ctime changing in the meantime due to the original
hardlink getting removed (due to a process this database system calls
compaction) while tar is processing the blocks.

Therefore I would like to propose the attached patch for inclusion in a
future version of tar.

It improves the granularity of what the user can define as a file
change; for now I added size+, size-, mtime, and ctime. The original
behavior is kept as the default (and equivalent to --file-changed=none
--file-changed=ctime --file-changed=size+).

I made --file-changed work like --warning, and although I'm not the
biggest fan of the UX it offers to the user, it's at least consistent
with an existing option.

I documented this change in tar.1, but didn't add any tests.

Piotr
diff --git a/doc/tar.1 b/doc/tar.1
index 180f4cbb..a3866818 100644
--- a/doc/tar.1
+++ b/doc/tar.1
@@ -249,6 +249,40 @@ Print program version and copyright information and exit.
 \fB\-\-check\-device\fR
 Check device numbers when creating incremental archives (default).
 .TP
+\fB\-\-file\-changed\fR=\fIATTRIBUTE\fR
+Define which file attributes are taken into account when deciding if
+the file changed as we read it.  This can affect the exit status and
+the \fBfile-changed\fR warning.
+
+Multiple \fB\-\-file\-changed\fR attributes accumulate.
+
+Available \fB\-\-file\-changed\fR attributes are:
+.RS
+.TP
+.B ctime
+Report if ctime changed.
+.TP
+.B mtime
+Report if mtime changed.
+.TP
+.B size+
+Report if the size of the file increased.
+.TP
+.B size-
+Report if the size of the file decreased.
+.TP
+.B all
+Report all of the above.
+.TP
+.B none, nothing
+Ignore file changes.
+.TP
+The default behavior is equivalent to
+.EX
+--file-changed=none --file-changed=ctime --file-changed=size+
+.EE
+.RE
+.TP
 \fB\-g\fR, \fB\-\-listed\-incremental\fR=\fIFILE\fR
 Handle new GNU-format incremental backups.  \fIFILE\fR is the name of
 a \fBsnapshot file\fR, where tar stores additional information which
diff --git a/src/common.h b/src/common.h
index 62149113..a6b55b20 100644
--- a/src/common.h
+++ b/src/common.h
@@ -164,6 +164,17 @@ enum exclusion_tag_type
 GLOBAL char const *group_name_option;
 GLOBAL gid_t group_option;
 
+enum file_changed
+{
+  NOTHING_CHANGED = 1 << 0,
+  NONE_CHANGED = 1 << 0,
+  MTIME_CHANGED = 1 << 1,
+  CTIME_CHANGED = 1 << 2,
+  SIZE_INCREASED = 1 << 3,
+  SIZE_DECREASED = 1 << 4
+};
+extern unsigned file_changed_option;
+
 GLOBAL bool ignore_failed_read_option;
 
 GLOBAL bool ignore_zeros_option;
diff --git a/src/create.c b/src/create.c
index 30db2b5c..91581374 100644
--- a/src/create.c
+++ b/src/create.c
@@ -1641,6 +1641,7 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p)
   char type;
   off_t original_size;
   struct timespec original_ctime;
+  struct timespec original_mtime;
   off_t block_ordinal = -1;
   int fd = 0;
   bool is_dir;
@@ -1685,7 +1686,7 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p)
 
   st->archive_file_size = original_size = st->stat.st_size;
   st->atime = get_stat_atime (&st->stat);
-  st->mtime = get_stat_mtime (&st->stat);
+  st->mtime = original_mtime = get_stat_mtime (&st->stat);
   st->ctime = original_ctime = get_stat_ctime (&st->stat);
 
 #ifdef S_ISHIDDEN
@@ -1816,11 +1817,20 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p)
 
       if (ok)
 	{
-	  if ((timespec_cmp (get_stat_ctime (&final_stat), original_ctime) != 0
+	  unsigned differs = 0;
+
+	  differs |= file_changed_option & CTIME_CHANGED &&
+	      (timespec_cmp (get_stat_ctime (&final_stat), original_ctime) != 0
 	       /* Original ctime will change if the file is a directory and
 		  --remove-files is given */
-	       && !(remove_files_option && is_dir))
-	      || original_size < final_stat.st_size)
+	       && !(remove_files_option && is_dir));
+	  differs |= file_changed_option & MTIME_CHANGED &&
+	      timespec_cmp (get_stat_mtime (&final_stat), original_mtime) != 0;
+	  differs |= file_changed_option & SIZE_INCREASED &&
+	      original_size < final_stat.st_size;
+	  differs |= file_changed_option & SIZE_DECREASED &&
+	      original_size > final_stat.st_size;
+	  if (differs)
 	    {
 	      WARNOPT (WARN_FILE_CHANGED,
 		       (0, 0, _("%s: file changed as we read it"),
diff --git a/src/tar.c b/src/tar.c
index 47144812..894bf8cb 100644
--- a/src/tar.c
+++ b/src/tar.c
@@ -279,6 +279,7 @@ enum
   DELAY_DIRECTORY_RESTORE_OPTION,
   HARD_DEREFERENCE_OPTION,
   DELETE_OPTION,
+  FILE_CHANGED_OPTION,
   FORCE_LOCAL_OPTION,
   FULL_TIME_OPTION,
   GROUP_OPTION,
@@ -483,6 +484,8 @@ static struct argp_option options[] = {
    N_("handle new GNU-format incremental backup"), GRID_MODIFIER },
   {"level", LEVEL_OPTION, N_("NUMBER"), 0,
    N_("dump level for created listed-incremental archive"), GRID_MODIFIER },
+  {"file-changed", FILE_CHANGED_OPTION, N_("ATTRIBUTE"), 0,
+   N_("define what it means for a file to change"), GRID_MODIFIER },
   {"ignore-failed-read", IGNORE_FAILED_READ_OPTION, 0, 0,
    N_("do not exit with nonzero on unreadable files"), GRID_MODIFIER },
   {"occurrence", OCCURRENCE_OPTION, N_("NUMBER"), OPTION_ARG_OPTIONAL,
@@ -1321,6 +1324,51 @@ static int const hole_detection_types[] =
 
 ARGMATCH_VERIFY (hole_detection_args, hole_detection_types);
 
+unsigned file_changed_option = CTIME_CHANGED | SIZE_INCREASED;
+
+static char const *const file_changed_args[] =
+{
+  "nothing", "none", "mtime", "ctime", "size+", "size-", NULL
+};
+
+static int const file_changed_types[] =
+{
+  NOTHING_CHANGED, NONE_CHANGED,
+  MTIME_CHANGED, CTIME_CHANGED,
+  SIZE_INCREASED, SIZE_DECREASED
+};
+
+ARGMATCH_VERIFY (file_changed_args, file_changed_types);
+
+static void
+set_file_changed_option (const char *arg)
+{
+  int negate = 0;
+  int option;
+
+  if (strcmp (arg, "nothing") == 0 || strcmp (arg, "none") == 0)
+    {
+      file_changed_option = 0;
+      return;
+    }
+  if (strcmp (arg, "all") == 0)
+    {
+      file_changed_option = ~0U;
+      return;
+    }
+  if (strlen (arg) > 2 && memcmp (arg, "no-", 3) == 0)
+    {
+      negate = 1;
+      arg += 3;
+    }
+  option = XARGMATCH ("--file-changed", arg,
+    file_changed_args, file_changed_types);
+  if (negate)
+    file_changed_option &= ~option;
+  else
+    file_changed_option |= option;
+}
+
 
 static void
 set_old_files_option (int code, struct option_locus *loc)
@@ -1845,6 +1893,10 @@ parse_opt (int key, char *arg, struct argp_state *state)
       ignore_command_error_option = true;
       break;
 
+    case FILE_CHANGED_OPTION:
+      set_file_changed_option (arg);
+      break;
+
     case IGNORE_FAILED_READ_OPTION:
       ignore_failed_read_option = true;
       break;

Reply via email to