On 5 Jan 2005, Rusty Russell <[EMAIL PROTECTED]> wrote: > On Tue, 2005-01-04 at 18:24 +0100, Robert Lemmen wrote: > > hi rusty, > > > > i read on some webpage about rsync and debian that you wrote a patch to > > rsync that let's it uses heuristics when deciding which local file to > > use. could you tell me whether this is planned to be included in a rsync > > release? could i have that patch? > > Hmm, good question. This is from 2.5.4, and can't remember how well it > worked. Good luck!
I'm not the rsync maintainer anymore, but I think it would be cool if this were merged, if the current team feels OK about it. > > Rusty. > > diff -urN rsync-2.5.4/Makefile.in rsync-2.5.4-fuzzy/Makefile.in > --- rsync-2.5.4/Makefile.in 2002-02-26 05:48:25.000000000 +1100 > +++ rsync-2.5.4-fuzzy/Makefile.in 2002-04-03 16:35:55.000000000 +1000 > @@ -28,7 +28,7 @@ > ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \ > zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \ > zlib/zutil.o zlib/adler32.o > -OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o > main.o checksum.o match.o syscall.o log.o backup.o > +OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o > main.o checksum.o match.o syscall.o log.o backup.o alternate.o > OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o > fileio.o batch.o \ > clientname.o > DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o > authenticate.o > diff -urN rsync-2.5.4/alternate.c rsync-2.5.4-fuzzy/alternate.c > --- rsync-2.5.4/alternate.c 1970-01-01 10:00:00.000000000 +1000 > +++ rsync-2.5.4-fuzzy/alternate.c 2002-04-03 17:04:15.000000000 +1000 > @@ -0,0 +1,117 @@ > +#include "rsync.h" > + > +extern char *compare_dest; > +extern int verbose; > + > +/* Alternate methods for opening files, if local doesn't exist */ > +/* Sanity check that we are about to open regular file */ > +int do_open_regular(char *fname) > +{ > + STRUCT_STAT st; > + > + if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode)) > + return do_open(fname, O_RDONLY, 0); > + > + return -1; > +} > + > +static void split_names(char *fname, char **dirname, char **basename) > +{ > + char *slash; > + > + slash = strrchr(fname, '/'); > + if (slash) { > + *dirname = fname; > + *slash = '\0'; > + *basename = slash+1; > + } else { > + *basename = fname; > + *dirname = "."; > + } > +} > + > +static unsigned int measure_name(const char *name, > + const char *basename, > + const char *ext) > +{ > + int namelen = strlen(name); > + int extlen = strlen(ext); > + unsigned int score = 0; > + > + /* Extensions must match */ > + if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0) > + return 0; > + > + /* Now score depends on similarity of prefix */ > + for (; *name==*basename && *name; name++, basename++) > + score++; > + return score; > +} > + > +int open_alternate_base_fuzzy(const char *fname) > +{ > + DIR *d; > + struct dirent *di; > + char *basename, *dirname; > + char mangled_name[MAXPATHLEN]; > + char bestname[MAXPATHLEN]; > + unsigned int bestscore = 0; > + const char *ext; > + > + /* FIXME: can we assume fname fits here? */ > + strcpy(mangled_name, fname); > + > + split_names(mangled_name, &dirname, &basename); > + d = opendir(dirname); > + if (!d) { > + rprintf(FERROR,"recv_generator opendir(%s): %s\n", > + dirname,strerror(errno)); > + return -1; > + } > + > + /* Get final extension, eg. .gz; never full basename though. */ > + ext = strrchr(basename + 1, '.'); > + if (!ext) > + ext = basename + strlen(basename); /* ext = "" */ > + > + while ((di = readdir(d)) != NULL) { > + const char *dname = d_name(di); > + unsigned int score; > + > + if (strcmp(dname,".")==0 || > + strcmp(dname,"..")==0) > + continue; > + > + score = measure_name(dname, basename, ext); > + if (verbose > 4) > + rprintf(FINFO,"fuzzy score for %s = %u\n", > + dname, score); > + if (score > bestscore) { > + strcpy(bestname, dname); > + bestscore = score; > + } > + } > + closedir(d); > + > + /* Found a candidate. */ > + if (bestscore != 0) { > + char fuzzyname[MAXPATHLEN]; > + > + snprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname); > + if (verbose > 2) > + rprintf(FINFO,"fuzzy match %s->%s\n", > + fname, fuzzyname); > + return do_open_regular(fuzzyname); > + } > + return -1; > +} > + > +int open_alternate_base_comparedir(const char *fname) > +{ > + char fnamebuf[MAXPATHLEN]; > + /* try the file at compare_dest instead */ > + snprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname); > + > + /* FIXME: now follows symlinks... */ > + return do_open_regular(fnamebuf); > +} > diff -urN rsync-2.5.4/generator.c rsync-2.5.4-fuzzy/generator.c > --- rsync-2.5.4/generator.c 2002-02-08 03:36:12.000000000 +1100 > +++ rsync-2.5.4-fuzzy/generator.c 2002-04-03 17:00:06.000000000 +1000 > @@ -42,11 +42,12 @@ > extern int always_checksum; > extern int modify_window; > extern char *compare_dest; > +extern int fuzzy; > > > /* choose whether to skip a particular file */ > static int skip_file(char *fname, > - struct file_struct *file, STRUCT_STAT *st) > + struct file_struct *file, const STRUCT_STAT *st) > { > if (st->st_size != file->length) { > return 0; > @@ -185,7 +186,61 @@ > return s; > } > > +/* Returns -1 for can't open (null file), -2 for skip */ > +static int open_base_file(struct file_struct *file, > + char *fname, > + int statret, > + STRUCT_STAT *st) > +{ > + int fd = -1; > + > + if (statret == 0) { > + if (S_ISREG(st->st_mode)) { > + if (update_only > + && cmp_modtime(st->st_mtime, file->modtime) > 0) { > + if (verbose > 1) > + rprintf(FINFO,"%s is newer\n",fname); > + return -2; > + } > + if (skip_file(fname, file, st)) { > + set_perms(fname, file, st, 1); > + return -2; > + } > + fd = do_open(fname, O_RDONLY, 0); > + if (fd == -1) { > + rprintf(FERROR,"failed to open %s, continuing : > %s\n",fname,strerror(errno)); > + return -1; > + } else > + return fd; > + } else { > + /* Try to use symlink contents */ > + if (S_ISLNK(st->st_mode)) { > + fd = do_open_regular(fname); > + /* Don't delete yet; receiver will need it */ > + } else { > + if (delete_file(fname) != 0) { > + if (fd != -1) > + close(fd); > + return -2; > + } > + } > + } > + } > + > + if (fd == -1 && compare_dest != NULL) > + fd = open_alternate_base_comparedir(fname); > > + if (fd == -1 && fuzzy) > + fd = open_alternate_base_fuzzy(fname); > + > + /* Update stat to understand size */ > + if (fd != -1) { > + if (do_fstat(fd, st) != 0) > + rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno)); > + } > + > + return fd; > +} > > /* > * Acts on file number I from FLIST, whose name is fname. > @@ -203,9 +258,6 @@ > struct sum_struct *s; > int statret; > struct file_struct *file = flist->files[i]; > - char *fnamecmp; > - char fnamecmpbuf[MAXPATHLEN]; > - extern char *compare_dest; > extern int list_only; > extern int preserve_perms; > extern int only_existing; > @@ -341,82 +393,29 @@ > return; > } > > - fnamecmp = fname; > - > - if ((statret == -1) && (compare_dest != NULL)) { > - /* try the file at compare_dest instead */ > - int saveerrno = errno; > - snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname); > - statret = link_stat(fnamecmpbuf,&st); > - if (!S_ISREG(st.st_mode)) > - statret = -1; > - if (statret == -1) > - errno = saveerrno; > - else > - fnamecmp = fnamecmpbuf; > - } > - > - if (statret == -1) { > - if (errno == ENOENT) { > - write_int(f_out,i); > - if (!dry_run) send_sums(NULL,f_out); > - } else { > - if (verbose > 1) > - rprintf(FERROR, RSYNC_NAME > - ": recv_generator failed to open > \"%s\": %s\n", > - fname, strerror(errno)); > - } > - return; > - } > - > - if (!S_ISREG(st.st_mode)) { > - if (delete_file(fname) != 0) { > - return; > - } > - > - /* now pretend the file didn't exist */ > - write_int(f_out,i); > - if (!dry_run) send_sums(NULL,f_out); > - return; > - } > - > - if (opt_ignore_existing && fnamecmp == fname) { > - if (verbose > 1) > - rprintf(FINFO,"%s exists\n",fname); > - return; > - } > - > - if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp > == fname) { > + /* Failed to stat for some other reason. */ > + if (statret == -1 && errno != ENOENT) { > if (verbose > 1) > - rprintf(FINFO,"%s is newer\n",fname); > + rprintf(FERROR, RSYNC_NAME > + ": recv_generator failed to open \"%s\": %s\n", > + fname, strerror(errno)); > return; > } > > - if (skip_file(fname, file, &st)) { > - if (fnamecmp == fname) > - set_perms(fname,file,&st,1); > - return; > - } > - > - if (dry_run) { > - write_int(f_out,i); > + fd = open_base_file(file, fname, statret, &st); > + if (fd == -2) > return; > - } > - > - if (whole_file) { > - write_int(f_out,i); > - send_sums(NULL,f_out); > - return; > - } > - > - /* open the file */ > - fd = do_open(fnamecmp, O_RDONLY, 0); > > - if (fd == -1) { > - rprintf(FERROR,RSYNC_NAME": failed to open \"%s\", continuing : > %s\n",fnamecmp,strerror(errno)); > - /* pretend the file didn't exist */ > + if ((whole_file || dry_run) && fd != -1) { > + close(fd); > + fd = -1; > + } > + > + if (fd == -1) { > + /* the file didn't exist, or we can pretend it doesn't */ > write_int(f_out,i); > - send_sums(NULL,f_out); > + if (!dry_run) > + send_sums(NULL,f_out); > return; > } > > @@ -427,7 +426,7 @@ > } > > if (verbose > 3) > - rprintf(FINFO,"gen mapped %s of size > %.0f\n",fnamecmp,(double)st.st_size); > + rprintf(FINFO,"gen mapped %s of size > %.0f\n",fname,(double)st.st_size); > > s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size)); > > diff -urN rsync-2.5.4/options.c rsync-2.5.4-fuzzy/options.c > --- rsync-2.5.4/options.c 2002-02-28 09:49:57.000000000 +1100 > +++ rsync-2.5.4-fuzzy/options.c 2002-04-03 16:43:54.000000000 +1000 > @@ -73,6 +73,7 @@ > #else > int modify_window=0; > #endif > +int fuzzy=0; > int blocking_io=-1; > > /** Network address family. **/ > @@ -245,6 +246,7 @@ > rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per > second\n"); > rprintf(F," --write-batch=PREFIX write batch fileset starting with > PREFIX\n"); > rprintf(F," --read-batch=PREFIX read batch fileset starting with > PREFIX\n"); > + rprintf(F," --fuzzy use similar file as basis if it > does't exist\n"); > rprintf(F," -h, --help show this help screen\n"); > #ifdef INET6 > rprintf(F," -4 prefer IPv4\n"); > @@ -340,6 +342,7 @@ > {"hard-links", 'H', POPT_ARG_NONE, &preserve_hard_links}, > {"read-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_READ_BATCH}, > {"write-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_WRITE_BATCH}, > + {"fuzzy", 0, POPT_ARG_NONE, &fuzzy}, > #ifdef INET6 > {0, '4', POPT_ARG_VAL, &default_af_hint, AF_INET > }, > {0, '6', POPT_ARG_VAL, &default_af_hint, > AF_INET6 }, > @@ -757,7 +760,9 @@ > args[ac++] = "--compare-dest"; > args[ac++] = compare_dest; > } > - > + > + if (fuzzy && am_sender) > + args[ac++] = "--fuzzy"; > > *argc = ac; > } > diff -urN rsync-2.5.4/proto.h rsync-2.5.4-fuzzy/proto.h > --- rsync-2.5.4/proto.h 2002-02-23 11:05:06.000000000 +1100 > +++ rsync-2.5.4-fuzzy/proto.h 2002-04-03 16:35:25.000000000 +1000 > @@ -256,3 +256,6 @@ > int cmp_modtime(time_t file1, time_t file2); > int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6); > int sys_gettimeofday(struct timeval *tv); > +int do_open_regular(char *fname); > +int open_alternate_base_fuzzy(const char *fname); > +int open_alternate_base_comparedir(const char *fname); > diff -urN rsync-2.5.4/receiver.c rsync-2.5.4-fuzzy/receiver.c > --- rsync-2.5.4/receiver.c 2002-02-14 05:42:20.000000000 +1100 > +++ rsync-2.5.4-fuzzy/receiver.c 2002-04-03 16:46:46.000000000 +1000 > @@ -36,6 +36,7 @@ > extern char *compare_dest; > extern int make_backups; > extern char *backup_suffix; > +extern int fuzzy; > > static struct delete_list { > DEV64_T dev; > @@ -307,8 +308,6 @@ > char *fname; > char template[MAXPATHLEN]; > char fnametmp[MAXPATHLEN]; > - char *fnamecmp; > - char fnamecmpbuf[MAXPATHLEN]; > struct map_struct *buf; > int i; > struct file_struct *file; > @@ -366,28 +365,24 @@ > if (verbose > 2) > rprintf(FINFO,"recv_files(%s)\n",fname); > > - fnamecmp = fname; > - > /* open the file */ > - fd1 = do_open(fnamecmp, O_RDONLY, 0); > + fd1 = do_open(fname, O_RDONLY, 0); > > - if ((fd1 == -1) && (compare_dest != NULL)) { > - /* try the file at compare_dest instead */ > - snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s", > - compare_dest,fname); > - fnamecmp = fnamecmpbuf; > - fd1 = do_open(fnamecmp, O_RDONLY, 0); > - } > + if (fd1 == -1 && compare_dest != NULL) > + fd1 = open_alternate_base_comparedir(fname); > + > + if (fd1 == -1 && fuzzy) > + fd1 = open_alternate_base_fuzzy(fname); > > if (fd1 != -1 && do_fstat(fd1,&st) != 0) { > - rprintf(FERROR,"fstat %s : > %s\n",fnamecmp,strerror(errno)); > + rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno)); > receive_data(f_in,NULL,-1,NULL,file->length); > close(fd1); > continue; > } > > if (fd1 != -1 && !S_ISREG(st.st_mode)) { > - rprintf(FERROR,"%s : not a regular file > (recv_files)\n",fnamecmp); > + rprintf(FERROR,"%s : not a regular file > (recv_files)\n",fname); > receive_data(f_in,NULL,-1,NULL,file->length); > close(fd1); > continue; > @@ -403,7 +398,7 @@ > if (fd1 != -1 && st.st_size > 0) { > buf = map_file(fd1,st.st_size); > if (verbose > 2) > - rprintf(FINFO,"recv mapped %s of size > %.0f\n",fnamecmp,(double)st.st_size); > + rprintf(FINFO,"recv mapped %s of size > %.0f\n",fname,(double)st.st_size); > } else { > buf = NULL; > } > > -- > A bad analogy is like a leaky screwdriver -- Richard Braakman -- Martin -- To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html