Hello rsyncers,

I have long wished for a feature in rsync to detect files that have
renamed on the sender side since the last time a sync was performed,
and avoid transfering those files to the destination the same way it
avoids transfering files that haven't changed.

Example 1: a log directory (like /var/log) is backed up every day. Most
of the time, rsync transfers very little data, but once a week it
basically performs a full copy without finding any basis files on the
destination copy. This is because the logs have been rotated and what
was /var/log/example.1.gz has been renamed to /var/log/example.2.gz,
/var/log/example.2.gz has become /var/log/example.3.gz, and so on.
rsync has no way to know this.

Example 2: a home directory is similarly backed up every day. One day,
the user decides to clean house and move lots of files around, creating
new directories, and moving hundreds of files around among different
directories. Once again, rsync is going to have to retransfer each of
those files.

My solution is to add a stable, unchanging, name for each file in the
transfer. As long as the --hard-links (-H) option is used, this stable
name will provide a name that already exists on the receiver side, and
the receiver can create a hard link to this name even when the file
appears under a completely new name. These stable names do not actually
exist on the sender side, they are synthesized by rsync from the
unchanging attributes that confer the file its identity: its device and
inode number.

I have attached a patch for a proof of concept of this feature. With it,
I can start with this directory structure on the sender side:

testfiles/
testfiles/one
testfiles/two

...and rsync it to the destination. On the destination there is an extra
directory called "byinode" which contains hard links to both regular files.
Then I rename testfiles/one to testfiles/oneone . When I rsync again,
instead of deleting the file called "one" and transfering the full contents
of an apparently new file called "oneone", the file "one" is deleted and
"oneone" is created by hardlinking to the stable filename.

I would like to know the following:

- Are people interested in a feature like this?

- Is there a better way to do it?

- It only works with protocol version 30 at the moment. Would there be
any interest in making it work with older protocol versions? (it has to
be done very differently in older versions)

- Could this patch be committed once I take it beyond proof of concept
state?

Notes, if you want to test it:

- It is hardcoded to always enable the feature synthesize a directory
called "byinode" and add it to the file list. The final version will
make this a command line option, of course.
- It only works with protocol version 30.
- Use with at least --delete --no-i-r -r
- Only the sender side requires the patch
- The patch is against rsync-3.0.3pre2

Thank you for your feedback

-Phil
--- flist.c	2008/06/20 17:29:19	1.1
+++ flist.c	2008/06/22 23:16:06
@@ -70,6 +70,8 @@
 extern int unsort_ndx;
 extern struct stats stats;
 extern char *filesfrom_host;
+const char *dev_inode_map = "byinode";
+/* const char *dev_inode_map = NULL; */
 
 extern char curr_dir[MAXPATHLEN];
 
@@ -117,6 +119,7 @@
 static dev_t tmp_rdev;
 #ifdef SUPPORT_HARD_LINKS
 static int64 tmp_dev, tmp_ino;
+static int please_synth_dev_inodemap;
 #endif
 static char tmp_sum[MAX_DIGEST_LEN];
 
@@ -1202,8 +1205,10 @@
 #ifdef SUPPORT_HARD_LINKS
 	if (preserve_hard_links && flist && flist->prev) {
 		if (protocol_version >= 28
-		 ? (!S_ISDIR(st.st_mode) && st.st_nlink > 1)
-		 : S_ISREG(st.st_mode)) {
+		 ? ((!S_ISDIR(st.st_mode)) && (
+			(dev_inode_map && (protocol_version >= 30)) ||
+			(st.st_nlink > 1)
+		 )) : S_ISREG(st.st_mode)) {
 			tmp_dev = st.st_dev;
 			tmp_ino = st.st_ino;
 		} else
@@ -1284,6 +1289,100 @@
 	free(REQ_EXTRA(file, F_DEPTH(file)));
 }
 
+static void synthesize_directory(int f, struct file_list *flist, const char *name, int namelen, int flags)
+{
+int basei;
+struct file_struct *file;
+int extra_len = file_extra_cnt * EXTRA_LEN;
+alloc_pool_t *pool;
+int alloc_len;
+char *bp;
+
+	for (basei = namelen; basei > 0;) {
+		if (name[--basei] == '/') {
+			synthesize_directory(f, flist, name, basei++, flags);
+			break;
+		}
+	}
+
+	/* Only divert a directory in the main transfer. */
+	if (flist->prev && (flags & FLAG_DIVERT_DIRS)) {
+		/* Room for parent/sibling/next-child info. */
+		extra_len += DIRNODE_EXTRA_CNT * EXTRA_LEN;
+		if (relative_paths)
+			extra_len += PTR_EXTRA_CNT * EXTRA_LEN;
+		pool = dir_flist->file_pool;
+	} else
+		pool = flist->file_pool;
+
+#if EXTRA_ROUNDING > 0
+	if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
+		extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
+#endif
+
+	alloc_len = FILE_STRUCT_LEN + extra_len + namelen - basei + 1;
+	if (pool)
+		bp = pool_alloc(pool, alloc_len, "synthesize_directory");
+	else {
+		if (!(bp = new_array(char, alloc_len)))
+			out_of_memory("synthesize_directory");
+	}
+
+	memset(bp, 0, extra_len + FILE_STRUCT_LEN);
+	bp += extra_len;
+	file = (struct file_struct *)bp;
+	bp += FILE_STRUCT_LEN;
+
+	memcpy(bp, name+basei, namelen-basei);
+	bp[namelen-basei] = 0;
+
+	file->flags = flags | FLAG_FAKE_FILE;
+	file->modtime = time(NULL);
+	file->len32 = 0;
+	file->mode = S_IFDIR|0700;
+	if (uid_ndx) F_OWNER(file) = 0;
+	if (gid_ndx) F_GROUP(file) = 0;
+
+	if (basei > 0) {
+		char *p = new_array(char, basei);
+
+		memcpy(p, name, basei);
+		p[basei] = 0;
+		file->dirname = p;
+	}
+
+	if (verbose > 2) {
+		rprintf(FINFO, "[%s] synthesize_directory(%s%s%s)\n",
+			who_am_i(), file->dirname ? file->dirname : "",
+			file->dirname ? "/" : "", file->basename);
+	}
+
+	if (am_sender)
+		F_PATHNAME(file) = pathname;
+	else if (!pool)
+		F_DEPTH(file) = extra_len / EXTRA_LEN;
+
+	if (unsort_ndx)
+		F_NDX(file) = dir_count;
+
+	if (f >= 0) {
+		int64 tmp_tmp_dev;
+		char fbuf[MAXPATHLEN];
+
+		/* XXX do I need to do the iconv thing? */
+		f_name(file, fbuf);
+		clean_fname(fbuf, 0);
+		
+		tmp_tmp_dev = tmp_dev;
+		tmp_dev = 0;
+		send_file_entry(f, fbuf, file, flist->used, flist->ndx_start);
+		tmp_dev = tmp_tmp_dev;
+	}
+
+	flist_expand(flist, 1);
+	flist->files[flist->used++] = file;
+}
+
 static struct file_struct *send_file_name(int f, struct file_list *flist,
 					  const char *fname, STRUCT_STAT *stp,
 					  int flags, int filter_level)
@@ -1369,6 +1468,65 @@
 	flist_expand(flist, 1);
 	flist->files[flist->used++] = file;
 
+#ifdef SUPPORT_HARD_LINKS
+	if (dev_inode_map && (tmp_dev != 0) && (protocol_version >= 30)) {
+		struct file_struct *synth_file;
+		int extra_len = file_extra_cnt * EXTRA_LEN;
+		int base_alloc_len;
+		int alloc_len;
+		char *bp;
+		char basename[64];
+
+		if (please_synth_dev_inodemap) {
+			synthesize_directory(f, flist, dev_inode_map, strlen(dev_inode_map), flags);
+			please_synth_dev_inodemap = 0;
+		}
+
+#if EXTRA_ROUNDING > 0
+		if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
+			extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
+#endif
+		base_alloc_len = FILE_STRUCT_LEN + extra_len;
+
+		sprintf(basename, "%llx.%lld", tmp_dev, tmp_ino);
+		alloc_len = strlen(basename);
+
+		if ((alloc_len + strlen(dev_inode_map) + 1) > MAXPATHLEN) {
+			rprintf(FINFO, "skipping overly long name: %s/%s\n", dev_inode_map, basename);
+		} else {
+			alloc_len += base_alloc_len + 1;
+
+			if (flist->file_pool) {
+				bp = pool_alloc(flist->file_pool, alloc_len, "send_file_name");
+			} else {
+				if (!(bp = new_array(char, alloc_len)))
+					out_of_memory("send_file_name");
+			}
+			memcpy(bp, ((char *)(file)) - extra_len, base_alloc_len);
+			bp += extra_len;
+			synth_file = (struct file_struct *)bp;
+			bp += FILE_STRUCT_LEN;
+
+			memcpy(bp, basename, alloc_len-base_alloc_len);
+
+			synth_file->dirname = dev_inode_map;
+
+			if (f >= 0) {
+				char fbuf[MAXPATHLEN];
+
+				/* XXX do I need to do the iconv thing? */
+				f_name(synth_file, fbuf);
+				clean_fname(fbuf, 0);
+				
+				send_file_entry(f, fbuf, synth_file, flist->used, flist->ndx_start);
+			}
+
+			flist_expand(flist, 1);
+			flist->files[flist->used++] = synth_file;
+		}
+	}
+#endif
+
 	return file;
 }
 
@@ -1380,6 +1538,7 @@
 	char is_dot_dir = fbuf[ol-1] == '.' && (ol == 1 || fbuf[ol-2] == '/');
 
 	if (S_ISDIR(file->mode)
+	    && !(file->flags & FLAG_FAKE_FILE)
 	    && !(file->flags & FLAG_MOUNT_DIR) && f_name(file, fbuf)) {
 		void *save_filters;
 		unsigned int len = strlen(fbuf);
@@ -1899,6 +2058,8 @@
 		use_ff_fd = 1;
 	}
 
+	please_synth_dev_inodemap = (dev_inode_map && (protocol_version >= 30));
+
 	while (1) {
 		char fbuf[MAXPATHLEN], *fn, name_type;
 
--- rsync.h	2008/06/22 23:14:46	1.1
+++ rsync.h	2008/06/22 23:15:19
@@ -79,6 +79,7 @@
 #define FLAG_LENGTH64 (1<<9)	/* sender/receiver/generator */
 #define FLAG_SKIP_GROUP (1<<10)	/* receiver/generator */
 #define FLAG_TIME_FAILED (1<<11)/* generator */
+#define FLAG_FAKE_FILE (1<<12)	/* synthesized directory, don't try to read it */
 
 /* These flags are passed to functions but not stored. */
 
-- 
Please use reply-all for most replies to avoid omitting the mailing list.
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html

Reply via email to