Hello
This topic comes up every couple of years and I guess it's that time
again.
I was prompted to look into this due to a perplexing interaction between
a database that is supposed to only create immutable database files and
another process that tries to create a backup from hardlinks pointing to
those files. The surprise here are tar reports that the file changed.
After reading https://issues.apache.org/jira/browse/CASSANDRA-17473 I'm
almost sure that it's ctime changing in the meantime due to the original
hardlink getting removed (due to a process this database system calls
compaction) while tar is processing the blocks.
Therefore I would like to propose the attached patch for inclusion in a
future version of tar.
It improves the granularity of what the user can define as a file
change; for now I added size+, size-, mtime, and ctime. The original
behavior is kept as the default (and equivalent to --file-changed=none
--file-changed=ctime --file-changed=size+).
I made --file-changed work like --warning, and although I'm not the
biggest fan of the UX it offers to the user, it's at least consistent
with an existing option.
I documented this change in tar.1, but didn't add any tests.
Piotr
diff --git a/doc/tar.1 b/doc/tar.1
index 180f4cbb..a3866818 100644
--- a/doc/tar.1
+++ b/doc/tar.1
@@ -249,6 +249,40 @@ Print program version and copyright information and exit.
\fB\-\-check\-device\fR
Check device numbers when creating incremental archives (default).
.TP
+\fB\-\-file\-changed\fR=\fIATTRIBUTE\fR
+Define which file attributes are taken into account when deciding if
+the file changed as we read it. This can affect the exit status and
+the \fBfile-changed\fR warning.
+
+Multiple \fB\-\-file\-changed\fR attributes accumulate.
+
+Available \fB\-\-file\-changed\fR attributes are:
+.RS
+.TP
+.B ctime
+Report if ctime changed.
+.TP
+.B mtime
+Report if mtime changed.
+.TP
+.B size+
+Report if the size of the file increased.
+.TP
+.B size-
+Report if the size of the file decreased.
+.TP
+.B all
+Report all of the above.
+.TP
+.B none, nothing
+Ignore file changes.
+.TP
+The default behavior is equivalent to
+.EX
+--file-changed=none --file-changed=ctime --file-changed=size+
+.EE
+.RE
+.TP
\fB\-g\fR, \fB\-\-listed\-incremental\fR=\fIFILE\fR
Handle new GNU-format incremental backups. \fIFILE\fR is the name of
a \fBsnapshot file\fR, where tar stores additional information which
diff --git a/src/common.h b/src/common.h
index 62149113..a6b55b20 100644
--- a/src/common.h
+++ b/src/common.h
@@ -164,6 +164,17 @@ enum exclusion_tag_type
GLOBAL char const *group_name_option;
GLOBAL gid_t group_option;
+enum file_changed
+{
+ NOTHING_CHANGED = 1 << 0,
+ NONE_CHANGED = 1 << 0,
+ MTIME_CHANGED = 1 << 1,
+ CTIME_CHANGED = 1 << 2,
+ SIZE_INCREASED = 1 << 3,
+ SIZE_DECREASED = 1 << 4
+};
+extern unsigned file_changed_option;
+
GLOBAL bool ignore_failed_read_option;
GLOBAL bool ignore_zeros_option;
diff --git a/src/create.c b/src/create.c
index 30db2b5c..91581374 100644
--- a/src/create.c
+++ b/src/create.c
@@ -1641,6 +1641,7 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p)
char type;
off_t original_size;
struct timespec original_ctime;
+ struct timespec original_mtime;
off_t block_ordinal = -1;
int fd = 0;
bool is_dir;
@@ -1685,7 +1686,7 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p)
st->archive_file_size = original_size = st->stat.st_size;
st->atime = get_stat_atime (&st->stat);
- st->mtime = get_stat_mtime (&st->stat);
+ st->mtime = original_mtime = get_stat_mtime (&st->stat);
st->ctime = original_ctime = get_stat_ctime (&st->stat);
#ifdef S_ISHIDDEN
@@ -1816,11 +1817,20 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p)
if (ok)
{
- if ((timespec_cmp (get_stat_ctime (&final_stat), original_ctime) != 0
+ unsigned differs = 0;
+
+ differs |= file_changed_option & CTIME_CHANGED &&
+ (timespec_cmp (get_stat_ctime (&final_stat), original_ctime) != 0
/* Original ctime will change if the file is a directory and
--remove-files is given */
- && !(remove_files_option && is_dir))
- || original_size < final_stat.st_size)
+ && !(remove_files_option && is_dir));
+ differs |= file_changed_option & MTIME_CHANGED &&
+ timespec_cmp (get_stat_mtime (&final_stat), original_mtime) != 0;
+ differs |= file_changed_option & SIZE_INCREASED &&
+ original_size < final_stat.st_size;
+ differs |= file_changed_option & SIZE_DECREASED &&
+ original_size > final_stat.st_size;
+ if (differs)
{
WARNOPT (WARN_FILE_CHANGED,
(0, 0, _("%s: file changed as we read it"),
diff --git a/src/tar.c b/src/tar.c
index 47144812..894bf8cb 100644
--- a/src/tar.c
+++ b/src/tar.c
@@ -279,6 +279,7 @@ enum
DELAY_DIRECTORY_RESTORE_OPTION,
HARD_DEREFERENCE_OPTION,
DELETE_OPTION,
+ FILE_CHANGED_OPTION,
FORCE_LOCAL_OPTION,
FULL_TIME_OPTION,
GROUP_OPTION,
@@ -483,6 +484,8 @@ static struct argp_option options[] = {
N_("handle new GNU-format incremental backup"), GRID_MODIFIER },
{"level", LEVEL_OPTION, N_("NUMBER"), 0,
N_("dump level for created listed-incremental archive"), GRID_MODIFIER },
+ {"file-changed", FILE_CHANGED_OPTION, N_("ATTRIBUTE"), 0,
+ N_("define what it means for a file to change"), GRID_MODIFIER },
{"ignore-failed-read", IGNORE_FAILED_READ_OPTION, 0, 0,
N_("do not exit with nonzero on unreadable files"), GRID_MODIFIER },
{"occurrence", OCCURRENCE_OPTION, N_("NUMBER"), OPTION_ARG_OPTIONAL,
@@ -1321,6 +1324,51 @@ static int const hole_detection_types[] =
ARGMATCH_VERIFY (hole_detection_args, hole_detection_types);
+unsigned file_changed_option = CTIME_CHANGED | SIZE_INCREASED;
+
+static char const *const file_changed_args[] =
+{
+ "nothing", "none", "mtime", "ctime", "size+", "size-", NULL
+};
+
+static int const file_changed_types[] =
+{
+ NOTHING_CHANGED, NONE_CHANGED,
+ MTIME_CHANGED, CTIME_CHANGED,
+ SIZE_INCREASED, SIZE_DECREASED
+};
+
+ARGMATCH_VERIFY (file_changed_args, file_changed_types);
+
+static void
+set_file_changed_option (const char *arg)
+{
+ int negate = 0;
+ int option;
+
+ if (strcmp (arg, "nothing") == 0 || strcmp (arg, "none") == 0)
+ {
+ file_changed_option = 0;
+ return;
+ }
+ if (strcmp (arg, "all") == 0)
+ {
+ file_changed_option = ~0U;
+ return;
+ }
+ if (strlen (arg) > 2 && memcmp (arg, "no-", 3) == 0)
+ {
+ negate = 1;
+ arg += 3;
+ }
+ option = XARGMATCH ("--file-changed", arg,
+ file_changed_args, file_changed_types);
+ if (negate)
+ file_changed_option &= ~option;
+ else
+ file_changed_option |= option;
+}
+
static void
set_old_files_option (int code, struct option_locus *loc)
@@ -1845,6 +1893,10 @@ parse_opt (int key, char *arg, struct argp_state *state)
ignore_command_error_option = true;
break;
+ case FILE_CHANGED_OPTION:
+ set_file_changed_option (arg);
+ break;
+
case IGNORE_FAILED_READ_OPTION:
ignore_failed_read_option = true;
break;